12 files changed, 293 insertions, 131 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net-qmi b/Documentation/ABI/testing/sysfs-class-net-qmi
index 7122d6264c49..c310db4ccbc2 100644
--- a/Documentation/ABI/testing/sysfs-class-net-qmi
+++ b/Documentation/ABI/testing/sysfs-class-net-qmi
@@ -29,7 +29,7 @@ Contact:	Bjørn Mork <bjorn@mork.no>
 Description:
 		Unsigned integer.
 
-		Write a number ranging from 1 to 127 to add a qmap mux
+		Write a number ranging from 1 to 254 to add a qmap mux
 		based network device, supported by recent Qualcomm based
 		modems.
 
@@ -46,5 +46,5 @@ Contact:	Bjørn Mork <bjorn@mork.no>
 Description:
 		Unsigned integer.
 
-		Write a number ranging from 1 to 127 to delete a previously
+		Write a number ranging from 1 to 254 to delete a previously
 		created qmap mux based network device.
diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt
index 9940e924a47e..5689fc9a976a 100644
--- a/Documentation/arm64/sve.txt
+++ b/Documentation/arm64/sve.txt
@@ -56,6 +56,18 @@ model features for SVE is included in Appendix A.
   is to connect to a target process first and then attempt a
   ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov).
 
+* Whenever SVE scalable register values (Zn, Pn, FFR) are exchanged in memory
+  between userspace and the kernel, the register value is encoded in memory in
+  an endianness-invariant layout, with bits [(8 * i + 7) : (8 * i)] encoded at
+  byte offset i from the start of the memory representation.  This affects for
+  example the signal frame (struct sve_context) and ptrace interface
+  (struct user_sve_header) and associated data.
+
+  Beware that on big-endian systems this results in a different byte order than
+  for the FPSIMD V-registers, which are stored as single host-endian 128-bit
+  values, with bits [(127 - 8 * i) : (120 - 8 * i)] of the register encoded at
+  byte offset i.  (struct fpsimd_context, struct user_fpsimd_state).
+
 
 2.  Vector length terminology
 -----------------------------
@@ -124,6 +136,10 @@ the SVE instruction set architecture.
   size and layout.  Macros SVE_SIG_* are defined [1] to facilitate access to
   the members.
 
+* Each scalable register (Zn, Pn, FFR) is stored in an endianness-invariant
+  layout, with bits [(8 * i + 7) : (8 * i)] stored at byte offset i from the
+  start of the register's representation in memory.
+
 * If the SVE context is too big to fit in sigcontext.__reserved[], then extra
   space is allocated on the stack, an extra_context record is written in
   __reserved[] referencing this space.  sve_context is then written in the
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
index 3b2612e342f1..7977f6fb8b20 100644
--- a/Documentation/block/switching-sched.txt
+++ b/Documentation/block/switching-sched.txt
@@ -13,11 +13,9 @@ you can do so by typing:
 
 # mount none /sys -t sysfs
 
-As of the Linux 2.6.10 kernel, it is now possible to change the
-IO scheduler for a given block device on the fly (thus making it possible,
-for instance, to set the CFQ scheduler for the system default, but
-set a specific device to use the deadline or noop schedulers - which
-can improve that device's throughput).
+It is possible to change the IO scheduler for a given block device on
+the fly to select one of mq-deadline, none, bfq, or kyber schedulers -
+which can improve that device's throughput.
 
 To set a specific scheduler, simply do this:
 
@@ -30,8 +28,8 @@ The list of defined schedulers can be found by simply doing
 a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
 will be displayed, with the currently selected scheduler in brackets:
 
-# cat /sys/block/hda/queue/scheduler
-noop deadline [cfq]
-# echo deadline > /sys/block/hda/queue/scheduler
-# cat /sys/block/hda/queue/scheduler
-noop [deadline] cfq
+# cat /sys/block/sda/queue/scheduler
+[mq-deadline] kyber bfq none
+# echo none >/sys/block/sda/queue/scheduler
+# cat /sys/block/sda/queue/scheduler
+[none] mq-deadline kyber bfq
diff --git a/Documentation/cgroup-v1/blkio-controller.txt b/Documentation/cgroup-v1/blkio-controller.txt
index 673dc34d3f78..d1a1b7bdd03a 100644
--- a/Documentation/cgroup-v1/blkio-controller.txt
+++ b/Documentation/cgroup-v1/blkio-controller.txt
@@ -8,61 +8,13 @@ both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
 Plan is to use the same cgroup based management interface for blkio controller
 and based on user options switch IO policies in the background.
 
-Currently two IO control policies are implemented. First one is proportional
-weight time based division of disk policy. It is implemented in CFQ. Hence
-this policy takes effect only on leaf nodes when CFQ is being used. The second
-one is throttling policy which can be used to specify upper IO rate limits
-on devices. This policy is implemented in generic block layer and can be
-used on leaf nodes as well as higher level logical devices like device mapper.
+One IO control policy is throttling policy which can be used to
+specify upper IO rate limits on devices. This policy is implemented in
+generic block layer and can be used on leaf nodes as well as higher
+level logical devices like device mapper.
 
 HOWTO
 =====
-Proportional Weight division of bandwidth
------------------------------------------
-You can do a very simple testing of running two dd threads in two different
-cgroups. Here is what you can do.
-
-- Enable Block IO controller
-	CONFIG_BLK_CGROUP=y
-
-- Enable group scheduling in CFQ
-	CONFIG_CFQ_GROUP_IOSCHED=y
-
-- Compile and boot into kernel and mount IO controller (blkio); see
-  cgroups.txt, Why are cgroups needed?.
-
-	mount -t tmpfs cgroup_root /sys/fs/cgroup
-	mkdir /sys/fs/cgroup/blkio
-	mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
-
-- Create two cgroups
-	mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
-
-- Set weights of group test1 and test2
-	echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
-	echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
-
-- Create two same size files (say 512MB each) on same disk (file1, file2) and
-  launch two dd threads in different cgroup to read those files.
-
-	sync
-	echo 3 > /proc/sys/vm/drop_caches
-
-	dd if=/mnt/sdb/zerofile1 of=/dev/null &
-	echo $! > /sys/fs/cgroup/blkio/test1/tasks
-	cat /sys/fs/cgroup/blkio/test1/tasks
-
-	dd if=/mnt/sdb/zerofile2 of=/dev/null &
-	echo $! > /sys/fs/cgroup/blkio/test2/tasks
-	cat /sys/fs/cgroup/blkio/test2/tasks
-
-- At macro level, first dd should finish first. To get more precise data, keep
-  on looking at (with the help of script), at blkio.disk_time and
-  blkio.disk_sectors files of both test1 and test2 groups. This will tell how
-  much disk time (in milliseconds), each group got and how many sectors each
-  group dispatched to the disk. We provide fairness in terms of disk time, so
-  ideally io.disk_time of cgroups should be in proportion to the weight.
-
 Throttling/Upper Limit policy
 -----------------------------
 - Enable Block IO controller
@@ -94,7 +46,7 @@ Throttling/Upper Limit policy
 Hierarchical Cgroups
 ====================
 
-Both CFQ and throttling implement hierarchy support; however,
+Throttling implements hierarchy support; however,
 throttling's hierarchy support is enabled iff "sane_behavior" is
 enabled from cgroup side, which currently is a development option and
 not publicly available.
@@ -107,9 +59,8 @@ If somebody created a hierarchy like as follows.
 			|
 		     test3
 
-CFQ by default and throttling with "sane_behavior" will handle the
-hierarchy correctly.  For details on CFQ hierarchy support, refer to
-Documentation/block/cfq-iosched.txt.  For throttling, all limits apply
+Throttling with "sane_behavior" will handle the
+hierarchy correctly. For throttling, all limits apply
 to the whole subtree while all statistics are local to the IOs
 directly generated by tasks in that cgroup.
 
@@ -130,10 +81,6 @@ CONFIG_DEBUG_BLK_CGROUP
 	- Debug help. Right now some additional stats file show up in cgroup
 	  if this option is enabled.
 
-CONFIG_CFQ_GROUP_IOSCHED
-	- Enables group scheduling in CFQ. Currently only 1 level of group
-	  creation is allowed.
-
 CONFIG_BLK_DEV_THROTTLING
 	- Enable block device throttling support in block layer.
 
@@ -344,32 +291,3 @@ Common files among various policies
 - blkio.reset_stats
 	- Writing an int to this file will result in resetting all the stats
 	  for that cgroup.
-
-CFQ sysfs tunable
-=================
-/sys/block/<disk>/queue/iosched/slice_idle
-------------------------------------------
-On a faster hardware CFQ can be slow, especially with sequential workload.
-This happens because CFQ idles on a single queue and single queue might not
-drive deeper request queue depths to keep the storage busy. In such scenarios
-one can try setting slice_idle=0 and that would switch CFQ to IOPS
-(IO operations per second) mode on NCQ supporting hardware.
-
-That means CFQ will not idle between cfq queues of a cfq group and hence be
-able to driver higher queue depth and achieve better throughput. That also
-means that cfq provides fairness among groups in terms of IOPS and not in
-terms of disk time.
-
-/sys/block/<disk>/queue/iosched/group_idle
-------------------------------------------
-If one disables idling on individual cfq queues and cfq service trees by
-setting slice_idle=0, group_idle kicks in. That means CFQ will still idle
-on the group in an attempt to provide fairness among groups.
-
-By default group_idle is same as slice_idle and does not do anything if
-slice_idle is enabled.
-
-One can experience an overall throughput drop if you have created multiple
-groups and put applications in that group which are not driving enough
-IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
-on individual groups and throughput should improve.
diff --git a/Documentation/cgroup-v1/hugetlb.txt b/Documentation/cgroup-v1/hugetlb.txt
index 106245c3aecc..1260e5369b9b 100644
--- a/Documentation/cgroup-v1/hugetlb.txt
+++ b/Documentation/cgroup-v1/hugetlb.txt
@@ -32,14 +32,18 @@ Brief summary of control files
  hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
  hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
 
-For a system supporting two hugepage size (16M and 16G) the control
+For a system supporting three hugepage sizes (64k, 32M and 1G), the control
 files include:
 
-hugetlb.16GB.limit_in_bytes
-hugetlb.16GB.max_usage_in_bytes
-hugetlb.16GB.usage_in_bytes
-hugetlb.16GB.failcnt
-hugetlb.16MB.limit_in_bytes
-hugetlb.16MB.max_usage_in_bytes
-hugetlb.16MB.usage_in_bytes
-hugetlb.16MB.failcnt
+hugetlb.1GB.limit_in_bytes
+hugetlb.1GB.max_usage_in_bytes
+hugetlb.1GB.usage_in_bytes
+hugetlb.1GB.failcnt
+hugetlb.64KB.limit_in_bytes
+hugetlb.64KB.max_usage_in_bytes
+hugetlb.64KB.usage_in_bytes
+hugetlb.64KB.failcnt
+hugetlb.32MB.limit_in_bytes
+hugetlb.32MB.max_usage_in_bytes
+hugetlb.32MB.usage_in_bytes
+hugetlb.32MB.failcnt
diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
index 188c8bd4eb67..5a0111d4de58 100644
--- a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
@@ -4,6 +4,7 @@ Required properties:
  - compatible: Should be one of the following:
    - "microchip,mcp2510" for MCP2510.
    - "microchip,mcp2515" for MCP2515.
+   - "microchip,mcp25625" for MCP25625.
  - reg: SPI chip select.
  - clocks: The clock feeding the CAN controller.
  - interrupts: Should contain IRQ line for the CAN controller.
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
new file mode 100644
index 000000000000..27f02ec4bb45
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: (GPL-2.0 OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/cpus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RISC-V bindings for 'cpus' DT nodes
+
+maintainers:
+  - Paul Walmsley <paul.walmsley@sifive.com>
+  - Palmer Dabbelt <palmer@sifive.com>
+
+allOf:
+  - $ref: /schemas/cpus.yaml#
+
+properties:
+  $nodename:
+    const: cpus
+    description: Container of cpu nodes
+
+  '#address-cells':
+    const: 1
+    description: |
+      A single unsigned 32-bit integer uniquely identifies each RISC-V
+      hart in a system.  (See the "reg" node under the "cpu" node,
+      below).
+
+  '#size-cells':
+    const: 0
+
+patternProperties:
+  '^cpu@[0-9a-f]+$':
+    properties:
+      compatible:
+        type: array
+        items:
+          - enum:
+              - sifive,rocket0
+              - sifive,e5
+              - sifive,e51
+              - sifive,u54-mc
+              - sifive,u54
+              - sifive,u5
+          - const: riscv
+        description:
+          Identifies that the hart uses the RISC-V instruction set
+          and identifies the type of the hart.
+
+      mmu-type:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/string"
+          - enum:
+              - riscv,sv32
+              - riscv,sv39
+              - riscv,sv48
+        description:
+          Identifies the MMU address translation mode used on this
+          hart.  These values originate from the RISC-V Privileged
+          Specification document, available from
+          https://riscv.org/specifications/
+
+      riscv,isa:
+        allOf:
+          - $ref: "/schemas/types.yaml#/definitions/string"
+          - enum:
+              - rv64imac
+              - rv64imafdc
+        description:
+          Identifies the specific RISC-V instruction set architecture
+          supported by the hart.  These are documented in the RISC-V
+          User-Level ISA document, available from
+          https://riscv.org/specifications/
+
+      timebase-frequency:
+        type: integer
+        minimum: 1
+        description:
+          Specifies the clock frequency of the system timer in Hz.
+          This value is common to all harts on a single system image.
+
+      interrupt-controller:
+        type: object
+        description: Describes the CPU's local interrupt controller
+
+        properties:
+          '#interrupt-cells':
+            const: 1
+
+          compatible:
+            const: riscv,cpu-intc
+
+          interrupt-controller: true
+
+        required:
+          - '#interrupt-cells'
+          - compatible
+          - interrupt-controller
+
+    required:
+      - riscv,isa
+      - timebase-frequency
+      - interrupt-controller
+
+examples:
+  - |
+    // Example 1: SiFive Freedom U540G Development Kit
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        timebase-frequency = <1000000>;
+        cpu@0 {
+                clock-frequency = <0>;
+                compatible = "sifive,rocket0", "riscv";
+                device_type = "cpu";
+                i-cache-block-size = <64>;
+                i-cache-sets = <128>;
+                i-cache-size = <16384>;
+                reg = <0>;
+                riscv,isa = "rv64imac";
+                cpu_intc0: interrupt-controller {
+                        #interrupt-cells = <1>;
+                        compatible = "riscv,cpu-intc";
+                        interrupt-controller;
+                };
+        };
+        cpu@1 {
+                clock-frequency = <0>;
+                compatible = "sifive,rocket0", "riscv";
+                d-cache-block-size = <64>;
+                d-cache-sets = <64>;
+                d-cache-size = <32768>;
+                d-tlb-sets = <1>;
+                d-tlb-size = <32>;
+                device_type = "cpu";
+                i-cache-block-size = <64>;
+                i-cache-sets = <64>;
+                i-cache-size = <32768>;
+                i-tlb-sets = <1>;
+                i-tlb-size = <32>;
+                mmu-type = "riscv,sv39";
+                reg = <1>;
+                riscv,isa = "rv64imafdc";
+                tlb-split;
+                cpu_intc1: interrupt-controller {
+                        #interrupt-cells = <1>;
+                        compatible = "riscv,cpu-intc";
+                        interrupt-controller;
+                };
+        };
+    };
+
+  - |
+    // Example 2: Spike ISA Simulator with 1 Hart
+    cpus {
+            cpu@0 {
+                    device_type = "cpu";
+                    reg = <0>;
+                    compatible = "riscv";
+                    riscv,isa = "rv64imafdc";
+                    mmu-type = "riscv,sv48";
+                    interrupt-controller {
+                            #interrupt-cells = <1>;
+                            interrupt-controller;
+                            compatible = "riscv,cpu-intc";
+                    };
+            };
+    };
+...
diff --git a/Documentation/devicetree/bindings/riscv/sifive.yaml b/Documentation/devicetree/bindings/riscv/sifive.yaml
new file mode 100644
index 000000000000..9d17dc2f3f84
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/sifive.yaml
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: (GPL-2.0 OR MIT)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/sifive.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive SoC-based boards
+
+maintainers:
+  - Paul Walmsley <paul.walmsley@sifive.com>
+  - Palmer Dabbelt <palmer@sifive.com>
+
+description:
+  SiFive SoC-based boards
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    items:
+      - enum:
+          - sifive,freedom-unleashed-a00
+      - const: sifive,fu540-c000
+      - const: sifive,fu540
+...
diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index e14d7d40fc75..50bccbf68308 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -316,16 +316,16 @@ A: When a netdev of a physical NIC is initialized, Linux usually
    all the traffic, you can force the netdev to only have 1 queue, queue
    id 0, and then bind to queue 0. You can use ethtool to do this::
 
-   sudo ethtool -L <interface> combined 1
+     sudo ethtool -L <interface> combined 1
 
    If you want to only see part of the traffic, you can program the
    NIC through ethtool to filter out your traffic to a single queue id
    that you can bind your XDP socket to. Here is one example in which
    UDP traffic to and from port 4242 are sent to queue 2::
 
-   sudo ethtool -N <interface> rx-flow-hash udp4 fn
-   sudo ethtool -N <interface> flow-type udp4 src-port 4242 dst-port \
-   4242 action 2
+     sudo ethtool -N <interface> rx-flow-hash udp4 fn
+     sudo ethtool -N <interface> flow-type udp4 src-port 4242 dst-port \
+     4242 action 2
 
    A number of other ways are possible all up to the capabilitites of
    the NIC you have.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 14fe93049d28..22f6b8b1110a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -255,6 +255,14 @@ tcp_base_mss - INTEGER
 	Path MTU discovery (MTU probing).  If MTU probing is enabled,
 	this is the initial MSS used by the connection.
 
+tcp_min_snd_mss - INTEGER
+	TCP SYN and SYNACK messages usually advertise an ADVMSS option,
+	as described in RFC 1122 and RFC 6691.
+	If this ADVMSS option is smaller than tcp_min_snd_mss,
+	it is silently capped to tcp_min_snd_mss.
+
+	Default : 48 (at least 8 bytes of payload per segment)
+
 tcp_congestion_control - STRING
 	Set the congestion control algorithm to be used for new
 	connections. The algorithm "reno" is always available, but
@@ -772,6 +780,14 @@ tcp_challenge_ack_limit - INTEGER
 	in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
 	Default: 100
 
+tcp_rx_skb_cache - BOOLEAN
+	Controls a per TCP socket cache of one skb, that might help
+	performance of some workloads. This might be dangerous
+	on systems with a lot of TCP sockets, since it increases
+	memory usage.
+
+	Default: 0 (disabled)
+
 UDP variables:
 
 udp_l3mdev_accept - BOOLEAN
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
index 0235ae69af2a..f2a0147c933d 100644
--- a/Documentation/networking/rds.txt
+++ b/Documentation/networking/rds.txt
@@ -389,7 +389,7 @@ Multipath RDS (mprds)
   a common (to all paths) part, and a per-path struct rds_conn_path. All
   I/O workqs and reconnect threads are driven from the rds_conn_path.
   Transports such as TCP that are multipath capable may then set up a
-  TPC socket per rds_conn_path, and this is managed by the transport via
+  TCP socket per rds_conn_path, and this is managed by the transport via
   the transport privatee cp_transport_data pointer.
 
   Transports announce themselves as multipath capable by setting the
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index ba6c42c576dd..2a4531bb06bd 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1079,7 +1079,7 @@ yet and must be cleared on entry.
 
 4.35 KVM_SET_USER_MEMORY_REGION
 
-Capability: KVM_CAP_USER_MEM
+Capability: KVM_CAP_USER_MEMORY
 Architectures: all
 Type: vm ioctl
 Parameters: struct kvm_userspace_memory_region (in)
@@ -3857,43 +3857,59 @@ Type: vcpu ioctl
 Parameters: struct kvm_nested_state (in/out)
 Returns: 0 on success, -1 on error
 Errors:
-  E2BIG:     the total state size (including the fixed-size part of struct
-             kvm_nested_state) exceeds the value of 'size' specified by
+  E2BIG:     the total state size exceeds the value of 'size' specified by
              the user; the size required will be written into size.
 
 struct kvm_nested_state {
 	__u16 flags;
 	__u16 format;
 	__u32 size;
+
 	union {
-		struct kvm_vmx_nested_state vmx;
-		struct kvm_svm_nested_state svm;
+		struct kvm_vmx_nested_state_hdr vmx;
+		struct kvm_svm_nested_state_hdr svm;
+
+		/* Pad the header to 128 bytes.  */
 		__u8 pad[120];
-	};
-	__u8 data[0];
+	} hdr;
+
+	union {
+		struct kvm_vmx_nested_state_data vmx[0];
+		struct kvm_svm_nested_state_data svm[0];
+	} data;
 };
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
 
-#define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
-#define KVM_STATE_NESTED_SMM_VMXON	0x00000002
+#define KVM_STATE_NESTED_FORMAT_VMX		0
+#define KVM_STATE_NESTED_FORMAT_SVM		1
 
-struct kvm_vmx_nested_state {
+#define KVM_STATE_NESTED_VMX_VMCS_SIZE		0x1000
+
+#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_VMX_SMM_VMXON		0x00000002
+
+struct kvm_vmx_nested_state_hdr {
 	__u64 vmxon_pa;
-	__u64 vmcs_pa;
+	__u64 vmcs12_pa;
 
 	struct {
 		__u16 flags;
 	} smm;
 };
 
+struct kvm_vmx_nested_state_data {
+	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+};
+
 This ioctl copies the vcpu's nested virtualization state from the kernel to
 userspace.
 
-The maximum size of the state, including the fixed-size part of struct
-kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
-the KVM_CHECK_EXTENSION ioctl().
+The maximum size of the state can be retrieved by passing KVM_CAP_NESTED_STATE
+to the KVM_CHECK_EXTENSION ioctl().
 
 4.115 KVM_SET_NESTED_STATE
 
@@ -3903,8 +3919,8 @@ Type: vcpu ioctl
 Parameters: struct kvm_nested_state (in)
 Returns: 0 on success, -1 on error
 
-This copies the vcpu's kvm_nested_state struct from userspace to the kernel.  For
-the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
+This copies the vcpu's kvm_nested_state struct from userspace to the kernel.
+For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
 
 4.116 KVM_(UN)REGISTER_COALESCED_MMIO