2159 files changed, 87061 insertions, 35698 deletions
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000000..faffc0d5af4e
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,428 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 4.
+#
+# For more information, see:
+#
+#   Documentation/process/clang-format.rst
+#   https://clang.llvm.org/docs/ClangFormat.html
+#   https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
+AlignOperands: true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  #AfterExternBlock: false # Unknown to clang-format-5.0
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+  #SplitEmptyFunction: true # Unknown to clang-format-4.0
+  #SplitEmptyRecord: true # Unknown to clang-format-4.0
+  #SplitEmptyNamespace: true # Unknown to clang-format-4.0
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+#CompactNamespaces: false # Unknown to clang-format-4.0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+#FixNamespaceComments: false # Unknown to clang-format-4.0
+
+# Taken from:
+#   git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
+#   | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$,  - '\1'," \
+#   | sort | uniq
+ForEachMacros:
+  - 'apei_estatus_for_each_section'
+  - 'ata_for_each_dev'
+  - 'ata_for_each_link'
+  - 'ax25_for_each'
+  - 'ax25_uid_for_each'
+  - 'bio_for_each_integrity_vec'
+  - '__bio_for_each_segment'
+  - 'bio_for_each_segment'
+  - 'bio_for_each_segment_all'
+  - 'bio_list_for_each'
+  - 'bip_for_each_vec'
+  - 'blkg_for_each_descendant_post'
+  - 'blkg_for_each_descendant_pre'
+  - 'blk_queue_for_each_rl'
+  - 'bond_for_each_slave'
+  - 'bond_for_each_slave_rcu'
+  - 'btree_for_each_safe128'
+  - 'btree_for_each_safe32'
+  - 'btree_for_each_safe64'
+  - 'btree_for_each_safel'
+  - 'card_for_each_dev'
+  - 'cgroup_taskset_for_each'
+  - 'cgroup_taskset_for_each_leader'
+  - 'cpufreq_for_each_entry'
+  - 'cpufreq_for_each_entry_idx'
+  - 'cpufreq_for_each_valid_entry'
+  - 'cpufreq_for_each_valid_entry_idx'
+  - 'css_for_each_child'
+  - 'css_for_each_descendant_post'
+  - 'css_for_each_descendant_pre'
+  - 'device_for_each_child_node'
+  - 'drm_atomic_crtc_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane_state'
+  - 'drm_for_each_connector_iter'
+  - 'drm_for_each_crtc'
+  - 'drm_for_each_encoder'
+  - 'drm_for_each_encoder_mask'
+  - 'drm_for_each_fb'
+  - 'drm_for_each_legacy_plane'
+  - 'drm_for_each_plane'
+  - 'drm_for_each_plane_mask'
+  - 'drm_mm_for_each_hole'
+  - 'drm_mm_for_each_node'
+  - 'drm_mm_for_each_node_in_range'
+  - 'drm_mm_for_each_node_safe'
+  - 'for_each_active_drhd_unit'
+  - 'for_each_active_iommu'
+  - 'for_each_available_child_of_node'
+  - 'for_each_bio'
+  - 'for_each_board_func_rsrc'
+  - 'for_each_bvec'
+  - 'for_each_child_of_node'
+  - 'for_each_clear_bit'
+  - 'for_each_clear_bit_from'
+  - 'for_each_cmsghdr'
+  - 'for_each_compatible_node'
+  - 'for_each_console'
+  - 'for_each_cpu'
+  - 'for_each_cpu_and'
+  - 'for_each_cpu_not'
+  - 'for_each_cpu_wrap'
+  - 'for_each_dev_addr'
+  - 'for_each_dma_cap_mask'
+  - 'for_each_drhd_unit'
+  - 'for_each_dss_dev'
+  - 'for_each_efi_memory_desc'
+  - 'for_each_efi_memory_desc_in_map'
+  - 'for_each_endpoint_of_node'
+  - 'for_each_evictable_lru'
+  - 'for_each_fib6_node_rt_rcu'
+  - 'for_each_fib6_walker_rt'
+  - 'for_each_free_mem_range'
+  - 'for_each_free_mem_range_reverse'
+  - 'for_each_func_rsrc'
+  - 'for_each_hstate'
+  - 'for_each_if'
+  - 'for_each_iommu'
+  - 'for_each_ip_tunnel_rcu'
+  - 'for_each_irq_nr'
+  - 'for_each_lru'
+  - 'for_each_matching_node'
+  - 'for_each_matching_node_and_match'
+  - 'for_each_memblock'
+  - 'for_each_memblock_type'
+  - 'for_each_memcg_cache_index'
+  - 'for_each_mem_pfn_range'
+  - 'for_each_mem_range'
+  - 'for_each_mem_range_rev'
+  - 'for_each_migratetype_order'
+  - 'for_each_msi_entry'
+  - 'for_each_net'
+  - 'for_each_netdev'
+  - 'for_each_netdev_continue'
+  - 'for_each_netdev_continue_rcu'
+  - 'for_each_netdev_feature'
+  - 'for_each_netdev_in_bond_rcu'
+  - 'for_each_netdev_rcu'
+  - 'for_each_netdev_reverse'
+  - 'for_each_netdev_safe'
+  - 'for_each_net_rcu'
+  - 'for_each_new_connector_in_state'
+  - 'for_each_new_crtc_in_state'
+  - 'for_each_new_plane_in_state'
+  - 'for_each_new_private_obj_in_state'
+  - 'for_each_node'
+  - 'for_each_node_by_name'
+  - 'for_each_node_by_type'
+  - 'for_each_node_mask'
+  - 'for_each_node_state'
+  - 'for_each_node_with_cpus'
+  - 'for_each_node_with_property'
+  - 'for_each_of_allnodes'
+  - 'for_each_of_allnodes_from'
+  - 'for_each_of_pci_range'
+  - 'for_each_old_connector_in_state'
+  - 'for_each_old_crtc_in_state'
+  - 'for_each_oldnew_connector_in_state'
+  - 'for_each_oldnew_crtc_in_state'
+  - 'for_each_oldnew_plane_in_state'
+  - 'for_each_oldnew_private_obj_in_state'
+  - 'for_each_old_plane_in_state'
+  - 'for_each_old_private_obj_in_state'
+  - 'for_each_online_cpu'
+  - 'for_each_online_node'
+  - 'for_each_online_pgdat'
+  - 'for_each_pci_bridge'
+  - 'for_each_pci_dev'
+  - 'for_each_pci_msi_entry'
+  - 'for_each_populated_zone'
+  - 'for_each_possible_cpu'
+  - 'for_each_present_cpu'
+  - 'for_each_prime_number'
+  - 'for_each_prime_number_from'
+  - 'for_each_process'
+  - 'for_each_process_thread'
+  - 'for_each_property_of_node'
+  - 'for_each_reserved_mem_region'
+  - 'for_each_resv_unavail_range'
+  - 'for_each_rtdcom'
+  - 'for_each_rtdcom_safe'
+  - 'for_each_set_bit'
+  - 'for_each_set_bit_from'
+  - 'for_each_sg'
+  - 'for_each_sg_page'
+  - '__for_each_thread'
+  - 'for_each_thread'
+  - 'for_each_zone'
+  - 'for_each_zone_zonelist'
+  - 'for_each_zone_zonelist_nodemask'
+  - 'fwnode_for_each_available_child_node'
+  - 'fwnode_for_each_child_node'
+  - 'fwnode_graph_for_each_endpoint'
+  - 'gadget_for_each_ep'
+  - 'hash_for_each'
+  - 'hash_for_each_possible'
+  - 'hash_for_each_possible_rcu'
+  - 'hash_for_each_possible_rcu_notrace'
+  - 'hash_for_each_possible_safe'
+  - 'hash_for_each_rcu'
+  - 'hash_for_each_safe'
+  - 'hctx_for_each_ctx'
+  - 'hlist_bl_for_each_entry'
+  - 'hlist_bl_for_each_entry_rcu'
+  - 'hlist_bl_for_each_entry_safe'
+  - 'hlist_for_each'
+  - 'hlist_for_each_entry'
+  - 'hlist_for_each_entry_continue'
+  - 'hlist_for_each_entry_continue_rcu'
+  - 'hlist_for_each_entry_continue_rcu_bh'
+  - 'hlist_for_each_entry_from'
+  - 'hlist_for_each_entry_from_rcu'
+  - 'hlist_for_each_entry_rcu'
+  - 'hlist_for_each_entry_rcu_bh'
+  - 'hlist_for_each_entry_rcu_notrace'
+  - 'hlist_for_each_entry_safe'
+  - '__hlist_for_each_rcu'
+  - 'hlist_for_each_safe'
+  - 'hlist_nulls_for_each_entry'
+  - 'hlist_nulls_for_each_entry_from'
+  - 'hlist_nulls_for_each_entry_rcu'
+  - 'hlist_nulls_for_each_entry_safe'
+  - 'ide_host_for_each_port'
+  - 'ide_port_for_each_dev'
+  - 'ide_port_for_each_present_dev'
+  - 'idr_for_each_entry'
+  - 'idr_for_each_entry_continue'
+  - 'idr_for_each_entry_ul'
+  - 'inet_bind_bucket_for_each'
+  - 'inet_lhash2_for_each_icsk_rcu'
+  - 'iov_for_each'
+  - 'key_for_each'
+  - 'key_for_each_safe'
+  - 'klp_for_each_func'
+  - 'klp_for_each_object'
+  - 'kvm_for_each_memslot'
+  - 'kvm_for_each_vcpu'
+  - 'list_for_each'
+  - 'list_for_each_entry'
+  - 'list_for_each_entry_continue'
+  - 'list_for_each_entry_continue_rcu'
+  - 'list_for_each_entry_continue_reverse'
+  - 'list_for_each_entry_from'
+  - 'list_for_each_entry_from_reverse'
+  - 'list_for_each_entry_lockless'
+  - 'list_for_each_entry_rcu'
+  - 'list_for_each_entry_reverse'
+  - 'list_for_each_entry_safe'
+  - 'list_for_each_entry_safe_continue'
+  - 'list_for_each_entry_safe_from'
+  - 'list_for_each_entry_safe_reverse'
+  - 'list_for_each_prev'
+  - 'list_for_each_prev_safe'
+  - 'list_for_each_safe'
+  - 'llist_for_each'
+  - 'llist_for_each_entry'
+  - 'llist_for_each_entry_safe'
+  - 'llist_for_each_safe'
+  - 'media_device_for_each_entity'
+  - 'media_device_for_each_intf'
+  - 'media_device_for_each_link'
+  - 'media_device_for_each_pad'
+  - 'netdev_for_each_lower_dev'
+  - 'netdev_for_each_lower_private'
+  - 'netdev_for_each_lower_private_rcu'
+  - 'netdev_for_each_mc_addr'
+  - 'netdev_for_each_uc_addr'
+  - 'netdev_for_each_upper_dev_rcu'
+  - 'netdev_hw_addr_list_for_each'
+  - 'nft_rule_for_each_expr'
+  - 'nla_for_each_attr'
+  - 'nla_for_each_nested'
+  - 'nlmsg_for_each_attr'
+  - 'nlmsg_for_each_msg'
+  - 'nr_neigh_for_each'
+  - 'nr_neigh_for_each_safe'
+  - 'nr_node_for_each'
+  - 'nr_node_for_each_safe'
+  - 'of_for_each_phandle'
+  - 'of_property_for_each_string'
+  - 'of_property_for_each_u32'
+  - 'pci_bus_for_each_resource'
+  - 'ping_portaddr_for_each_entry'
+  - 'plist_for_each'
+  - 'plist_for_each_continue'
+  - 'plist_for_each_entry'
+  - 'plist_for_each_entry_continue'
+  - 'plist_for_each_entry_safe'
+  - 'plist_for_each_safe'
+  - 'pnp_for_each_card'
+  - 'pnp_for_each_dev'
+  - 'protocol_for_each_card'
+  - 'protocol_for_each_dev'
+  - 'queue_for_each_hw_ctx'
+  - 'radix_tree_for_each_contig'
+  - 'radix_tree_for_each_slot'
+  - 'radix_tree_for_each_tagged'
+  - 'rbtree_postorder_for_each_entry_safe'
+  - 'resource_list_for_each_entry'
+  - 'resource_list_for_each_entry_safe'
+  - 'rhl_for_each_entry_rcu'
+  - 'rhl_for_each_rcu'
+  - 'rht_for_each'
+  - 'rht_for_each_continue'
+  - 'rht_for_each_entry'
+  - 'rht_for_each_entry_continue'
+  - 'rht_for_each_entry_rcu'
+  - 'rht_for_each_entry_rcu_continue'
+  - 'rht_for_each_entry_safe'
+  - 'rht_for_each_rcu'
+  - 'rht_for_each_rcu_continue'
+  - '__rq_for_each_bio'
+  - 'rq_for_each_segment'
+  - 'scsi_for_each_prot_sg'
+  - 'scsi_for_each_sg'
+  - 'sctp_for_each_hentry'
+  - 'sctp_skb_for_each'
+  - 'shdma_for_each_chan'
+  - '__shost_for_each_device'
+  - 'shost_for_each_device'
+  - 'sk_for_each'
+  - 'sk_for_each_bound'
+  - 'sk_for_each_entry_offset_rcu'
+  - 'sk_for_each_from'
+  - 'sk_for_each_rcu'
+  - 'sk_for_each_safe'
+  - 'sk_nulls_for_each'
+  - 'sk_nulls_for_each_from'
+  - 'sk_nulls_for_each_rcu'
+  - 'snd_pcm_group_for_each_entry'
+  - 'snd_soc_dapm_widget_for_each_path'
+  - 'snd_soc_dapm_widget_for_each_path_safe'
+  - 'snd_soc_dapm_widget_for_each_sink_path'
+  - 'snd_soc_dapm_widget_for_each_source_path'
+  - 'tb_property_for_each'
+  - 'udp_portaddr_for_each_entry'
+  - 'udp_portaddr_for_each_entry_rcu'
+  - 'usb_hub_for_each_child'
+  - 'v4l2_device_for_each_subdev'
+  - 'v4l2_m2m_for_each_dst_buf'
+  - 'v4l2_m2m_for_each_dst_buf_safe'
+  - 'v4l2_m2m_for_each_src_buf'
+  - 'v4l2_m2m_for_each_src_buf_safe'
+  - 'zorro_for_each_dev'
+
+#IncludeBlocks: Preserve # Unknown to clang-format-5.0
+IncludeCategories:
+  - Regex: '.*'
+    Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+#IndentPPDirectives: None # Unknown to clang-format-5.0
+IndentWidth: 8
+IndentWrappedFunctionNames: true
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: Inner
+#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+#SortUsingDeclarations: false # Unknown to clang-format-4.0
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
+#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
+SpaceBeforeParens: ControlStatements
+#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 8
+UseTab: Always
+...
diff --git a/.gitignore b/.gitignore
index 1c8189e67467..97ba6b79834c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,6 +84,7 @@ modules.builtin
 !.gitignore
 !.mailmap
 !.cocciconfig
+!.clang-format
 
 #
 # Generated include files
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index 2028f2d093b2..b8465e00ba5f 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -26,7 +26,7 @@ Description:
 				 [obj_user=] [obj_role=] [obj_type=]]
 			option:	[[appraise_type=]] [permit_directio]
 
-		base: 	func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK]
+		base: 	func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
 				[FIRMWARE_CHECK]
 				[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
 			mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
diff --git a/Documentation/ABI/testing/sysfs-class-rtc b/Documentation/ABI/testing/sysfs-class-rtc
index cf60412882f0..95984289a4ee 100644
--- a/Documentation/ABI/testing/sysfs-class-rtc
+++ b/Documentation/ABI/testing/sysfs-class-rtc
@@ -43,6 +43,14 @@ Contact:	linux-rtc@vger.kernel.org
 Description:
 		(RO) The name of the RTC corresponding to this sysfs directory
 
+What:		/sys/class/rtc/rtcX/range
+Date:		January 2018
+KernelVersion:	4.16
+Contact:	linux-rtc@vger.kernel.org
+Description:
+		Valid time range for the RTC, as seconds from epoch, formatted
+		as [min, max]
+
 What:		/sys/class/rtc/rtcX/since_epoch
 Date:		March 2006
 KernelVersion:	2.6.17
@@ -57,14 +65,6 @@ Contact:	linux-rtc@vger.kernel.org
 Description:
 		(RO) RTC-provided time in 24-hour notation (hh:mm:ss)
 
-What:		/sys/class/rtc/rtcX/*/nvmem
-Date:		February 2016
-KernelVersion:	4.6
-Contact:	linux-rtc@vger.kernel.org
-Description:
-		(RW) The non volatile storage exported as a raw file, as
-		described in Documentation/nvmem/nvmem.txt
-
 What:		/sys/class/rtc/rtcX/offset
 Date:		February 2016
 KernelVersion:	4.6
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 683145d7b054..11fc28ecdb6d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1521,7 +1521,8 @@
 
 	ima_policy=	[IMA]
 			The builtin policies to load during IMA setup.
-			Format: "tcb | appraise_tcb | secure_boot"
+			Format: "tcb | appraise_tcb | secure_boot |
+				 fail_securely"
 
 			The "tcb" policy measures all programs exec'd, files
 			mmap'd for exec, and all files opened with the read
@@ -1536,6 +1537,11 @@
 			of files (eg. kexec kernel image, kernel modules,
 			firmware, policy, etc) based on file signatures.
 
+			The "fail_securely" policy forces file signature
+			verification failure also on privileged mounted
+			filesystems with the SB_I_UNVERIFIABLE_SIGNATURE
+			flag.
+
 	ima_tcb		[IMA] Deprecated.  Use ima_policy= instead.
 			Load a policy which meets the needs of the Trusted
 			Computing Base.  This means IMA will measure all
@@ -1901,6 +1907,9 @@
 	kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
 			Default is 0 (don't ignore, but inject #GP)
 
+	kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
+				   Default is false (don't support).
+
 	kvm.mmu_audit=	[KVM] This is a R/W parameter which allows audit
 			KVM MMU at runtime.
 			Default is 0 (off)
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 671bc0639262..c5dab30d3389 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -86,9 +86,12 @@ Translation table lookup with 64KB pages:
  +-------------------------------------------------> [63] TTBR0/1
 
 
-When using KVM without the Virtualization Host Extensions, the hypervisor
-maps kernel pages in EL2 at a fixed offset from the kernel VA. See the
-kern_hyp_va macro for more details.
+When using KVM without the Virtualization Host Extensions, the
+hypervisor maps kernel pages in EL2 at a fixed (and potentially
+random) offset from the linear mapping. See the kern_hyp_va macro and
+kvm_update_va_mask function for more details. MMIO devices such as
+GICv2 gets mapped next to the HYP idmap page, as do vectors when
+ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs.
 
 When using KVM with the Virtualization Host Extensions, no additional
 mappings are created, since the host kernel runs directly in EL2.
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index a4af2e124e24..3682e99234c2 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt
@@ -262,7 +262,7 @@ When oom event notifier is registered, event will be delivered.
 2.6 Locking
 
    lock_page_cgroup()/unlock_page_cgroup() should not be called under
-   mapping->tree_lock.
+   the i_pages lock.
 
    Other lock order is following:
    PG_locked.
diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index be909ed45970..511628bb3d3a 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt
@@ -268,9 +268,19 @@ The common clock framework uses two global locks, the prepare lock and the
 enable lock.
 
 The enable lock is a spinlock and is held across calls to the .enable,
-.disable and .is_enabled operations. Those operations are thus not allowed to
-sleep, and calls to the clk_enable(), clk_disable() and clk_is_enabled() API
-functions are allowed in atomic context.
+.disable operations. Those operations are thus not allowed to sleep,
+and calls to the clk_enable(), clk_disable() API functions are allowed in
+atomic context.
+
+For clk_is_enabled() API, it is also designed to be allowed to be used in
+atomic context. However, it doesn't really make any sense to hold the enable
+lock in core, unless you want to do something else with the information of
+the enable state with that lock held. Otherwise, seeing if a clk is enabled is
+a one-shot read of the enabled state, which could just as easily change after
+the function returns because the lock is released. Thus the user of this API
+needs to handle synchronizing the read of the state with whatever they're
+using it for to make sure that the enable state doesn't change during that
+time.
 
 The prepare lock is a mutex and is held across calls to all other operations.
 All those operations are allowed to sleep, and calls to the corresponding API
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index 978463a7c81e..073f128af5a7 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -97,12 +97,10 @@ flags	- flags of the cpufreq driver
 ==================================================================
 For details about OPP, see Documentation/power/opp.txt
 
-dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
-	cpufreq_table_validate_and_show() which is provided with the list of
-	frequencies that are available for operation. This function provides
-	a ready to use conversion routine to translate the OPP layer's internal
-	information about the available frequencies into a format readily
-	providable to cpufreq.
+dev_pm_opp_init_cpufreq_table -
+	This function provides a ready to use conversion routine to translate
+	the OPP layer's internal information about the available frequencies
+	into a format readily providable to cpufreq.
 
 	WARNING: Do not use this function in interrupt context.
 
@@ -112,7 +110,7 @@ dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
 		/* Do things */
 		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
 		if (!r)
-			cpufreq_table_validate_and_show(policy, freq_table);
+			policy->freq_table = freq_table;
 		/* Do other things */
 	 }
 
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 61546ac578d6..6e353d00cdc6 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -259,10 +259,8 @@ CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any
 particular order, but if they are cpufreq core will do DVFS a bit
 quickly for them as search for best match is faster.
 
-By calling cpufreq_table_validate_and_show(), the cpuinfo.min_freq and
-cpuinfo.max_freq values are detected, and policy->min and policy->max
-are set to the same values. This is helpful for the per-CPU
-initialization stage.
+The cpufreq table is verified automatically by the core if the policy contains a
+valid pointer in its policy->freq_table field.
 
 cpufreq_frequency_table_verify() assures that at least one valid
 frequency is within policy->min and policy->max, and all other criteria
diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt
index b6f44f490ed7..d1587f434e7b 100644
--- a/Documentation/cpuidle/sysfs.txt
+++ b/Documentation/cpuidle/sysfs.txt
@@ -40,6 +40,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -50,6 +51,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -60,6 +62,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -70,6 +73,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 --------------------------------------------------------------------------------
@@ -78,6 +82,8 @@ total 0
 * desc : Small description about the idle state (string)
 * disable : Option to disable this idle state (bool) -> see note below
 * latency : Latency to exit out of this idle state (in microseconds)
+* residency : Time after which a state becomes more effecient than any
+  shallower state (in microseconds)
 * name : Name of the idle state (string)
 * power : Power consumed while in this idle state (in milliwatts)
 * time : Total time spent in this idle state (in microseconds)
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
index 9b8f578d5e19..34a69ba67f13 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,audsys.txt
@@ -6,6 +6,7 @@ The MediaTek AUDSYS controller provides various clocks to the system.
 Required Properties:
 
 - compatible: Should be one of:
+	- "mediatek,mt2701-audsys", "syscon"
 	- "mediatek,mt7622-audsys", "syscon"
 - #clock-cells: Must be 1
 
@@ -13,10 +14,19 @@ The AUDSYS controller uses the common clk binding from
 Documentation/devicetree/bindings/clock/clock-bindings.txt
 The available clocks are defined in dt-bindings/clock/mt*-clk.h.
 
+Required sub-nodes:
+-------
+For common binding part and usage, refer to
+../sonud/mt2701-afe-pcm.txt.
+
 Example:
 
-audsys: audsys@11220000 {
-	compatible = "mediatek,mt7622-audsys", "syscon";
-	reg = <0 0x11220000 0 0x1000>;
-	#clock-cells = <1>;
-};
+	audsys: clock-controller@11220000 {
+		compatible = "mediatek,mt7622-audsys", "syscon";
+		reg = <0 0x11220000 0 0x2000>;
+		#clock-cells = <1>;
+
+		afe: audio-controller {
+			...
+		};
+	};
diff --git a/Documentation/devicetree/bindings/clock/imx6sll-clock.txt b/Documentation/devicetree/bindings/clock/imx6sll-clock.txt
new file mode 100644
index 000000000000..fee849d5fdd1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6sll-clock.txt
@@ -0,0 +1,36 @@
+* Clock bindings for Freescale i.MX6 SLL
+
+Required properties:
+- compatible: Should be "fsl,imx6sll-ccm"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+- clocks: list of clock specifiers, must contain an entry for each required
+  entry in clock-names
+- clock-names: should include entries "ckil", "osc", "ipp_di0" and "ipp_di1"
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell.  See include/dt-bindings/clock/imx6sll-clock.h
+for the full list of i.MX6 SLL clock IDs.
+
+Examples:
+
+#include <dt-bindings/clock/imx6sll-clock.h>
+
+clks: clock-controller@20c4000 {
+		compatible = "fsl,imx6sll-ccm";
+		reg = <0x020c4000 0x4000>;
+		interrupts = <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
+		#clock-cells = <1>;
+		clocks = <&ckil>, <&osc>, <&ipp_di0>, <&ipp_di1>;
+		clock-names = "ckil", "osc", "ipp_di0", "ipp_di1";
+};
+
+uart1: serial@2020000 {
+		compatible = "fsl,imx6sl-uart", "fsl,imx6q-uart", "fsl,imx21-uart";
+		reg = <0x02020000 0x4000>;
+		interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&clks IMX6SLL_CLK_UART1_IPG>,
+			 <&clks IMX6SLL_CLK_UART1_SERIAL>;
+		clock-names = "ipg", "per";
+};
diff --git a/Documentation/devicetree/bindings/clock/intc_stratix10.txt b/Documentation/devicetree/bindings/clock/intc_stratix10.txt
new file mode 100644
index 000000000000..9f4ec5cb5c6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/intc_stratix10.txt
@@ -0,0 +1,20 @@
+Device Tree Clock bindings for Intel's SoCFPGA Stratix10 platform
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be
+	"intel,stratix10-clkmgr"
+
+- reg : shall be the control register offset from CLOCK_MANAGER's base for the clock.
+
+- #clock-cells : from common clock binding, shall be set to 1.
+
+Example:
+	clkmgr: clock-controller@ffd10000 {
+		compatible = "intel,stratix10-clkmgr";
+		reg = <0xffd10000 0x1000>;
+		#clock-cells = <1>;
+	};
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
index f1890d0777a6..773a5226342f 100644
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mssr.txt
@@ -22,7 +22,9 @@ Required Properties:
       - "renesas,r8a7794-cpg-mssr" for the r8a7794 SoC (R-Car E2)
       - "renesas,r8a7795-cpg-mssr" for the r8a7795 SoC (R-Car H3)
       - "renesas,r8a7796-cpg-mssr" for the r8a7796 SoC (R-Car M3-W)
+      - "renesas,r8a77965-cpg-mssr" for the r8a77965 SoC (R-Car M3-N)
       - "renesas,r8a77970-cpg-mssr" for the r8a77970 SoC (R-Car V3M)
+      - "renesas,r8a77980-cpg-mssr" for the r8a77980 SoC (R-Car V3H)
       - "renesas,r8a77995-cpg-mssr" for the r8a77995 SoC (R-Car D3)
 
   - reg: Base address and length of the memory resource used by the CPG/MSSR
@@ -32,8 +34,8 @@ Required Properties:
     clock-names
   - clock-names: List of external parent clock names. Valid names are:
       - "extal" (r8a7743, r8a7745, r8a7790, r8a7791, r8a7792, r8a7793, r8a7794,
-		 r8a7795, r8a7796, r8a77970, r8a77995)
-      - "extalr" (r8a7795, r8a7796, r8a77970)
+		 r8a7795, r8a7796, r8a77965, r8a77970, r8a77980, r8a77995)
+      - "extalr" (r8a7795, r8a7796, r8a77965, r8a77970, r8a77980)
       - "usb_extal" (r8a7743, r8a7745, r8a7790, r8a7791, r8a7793, r8a7794)
 
   - #clock-cells: Must be 2
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt
index e71c675ba5da..904ae682ea90 100644
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3328-cru.txt
@@ -32,6 +32,7 @@ clock-output-names:
  - "clkin_i2s" - external I2S clock - optional,
  - "gmac_clkin" - external GMAC clock - optional
  - "phy_50m_out" - output clock of the pll in the mac phy
+ - "hdmi_phy" - output clock of the hdmi phy pll - optional
 
 Example: Clock controller node:
 
diff --git a/Documentation/devicetree/bindings/clock/silabs,si544.txt b/Documentation/devicetree/bindings/clock/silabs,si544.txt
new file mode 100644
index 000000000000..b86535b80920
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si544.txt
@@ -0,0 +1,25 @@
+Binding for Silicon Labs 544 programmable I2C clock generator.
+
+Reference
+This binding uses the common clock binding[1]. Details about the device can be
+found in the datasheet[2].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Si544 datasheet
+    https://www.silabs.com/documents/public/data-sheets/si544-datasheet.pdf
+
+Required properties:
+ - compatible: One of "silabs,si514a", "silabs,si514b" "silabs,si514c" according
+               to the speed grade of the chip.
+ - reg: I2C device address.
+ - #clock-cells: From common clock bindings: Shall be 0.
+
+Optional properties:
+ - clock-output-names: From common clock bindings. Recommended to be "si544".
+
+Example:
+	si544: clock-controller@55 {
+		reg = <0x55>;
+		#clock-cells = <0>;
+		compatible = "silabs,si544b";
+	};
diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt
new file mode 100644
index 000000000000..fb9495ea582c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.txt
@@ -0,0 +1,60 @@
+STMicroelectronics STM32 Peripheral Reset Clock Controller
+==========================================================
+
+The RCC IP is both a reset and a clock controller.
+
+RCC makes also power management (resume/supend and wakeup interrupt).
+
+Please also refer to reset.txt for common reset controller binding usage.
+
+Please also refer to clock-bindings.txt for common clock controller
+binding usage.
+
+
+Required properties:
+- compatible: "st,stm32mp1-rcc", "syscon"
+- reg: should be register base and length as documented in the datasheet
+- #clock-cells: 1, device nodes should specify the clock in their
+  "clocks" property, containing a phandle to the clock device node,
+  an index specifying the clock to use.
+- #reset-cells: Shall be 1
+- interrupts: Should contain a general interrupt line and a interrupt line
+  to the wake-up of processor (CSTOP).
+
+Example:
+	rcc: rcc@50000000 {
+		compatible = "st,stm32mp1-rcc", "syscon";
+		reg = <0x50000000 0x1000>;
+		#clock-cells = <1>;
+		#reset-cells = <1>;
+		interrupts = <GIC_SPI 5 IRQ_TYPE_NONE>,
+			     <GIC_SPI 145 IRQ_TYPE_NONE>;
+	};
+
+Specifying clocks
+=================
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/stm32mp1-clks.h header and can be used in device
+tree sources.
+
+Specifying softreset control of devices
+=======================================
+
+Device nodes should specify the reset channel required in their "resets"
+property, containing a phandle to the reset device node and an index specifying
+which channel to use.
+The index is the bit number within the RCC registers bank, starting from RCC
+base address.
+It is calculated as: index = register_offset / 4 * 32 + bit_offset.
+Where bit_offset is the bit offset within the register.
+
+For example on STM32MP1, for LTDC reset:
+ ltdc = APB4_RSTSETR_offset / 4 * 32 + LTDC_bit_offset
+      = 0x180 / 4 * 32 + 0 = 3072
+
+The list of valid indices for STM32MP1 is available in:
+include/dt-bindings/reset-controller/stm32mp1-resets.h
+
+This file implements defines like:
+#define LTDC_R	3072
diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
index 4ca21c3a6fc9..460ef27b1008 100644
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
@@ -20,6 +20,7 @@ Required properties :
 		- "allwinner,sun50i-a64-ccu"
 		- "allwinner,sun50i-a64-r-ccu"
 		- "allwinner,sun50i-h5-ccu"
+		- "allwinner,sun50i-h6-ccu"
 		- "nextthing,gr8-ccu"
 
 - reg: Must contain the registers base address and length
@@ -31,6 +32,9 @@ Required properties :
 - #clock-cells : must contain 1
 - #reset-cells : must contain 1
 
+For the main CCU on H6, one more clock is needed:
+- "iosc": the SoC's internal frequency oscillator
+
 For the PRCM CCUs on A83T/H3/A64, two more clocks are needed:
 - "pll-periph": the SoC's peripheral PLL from the main CCU
 - "iosc": the SoC's internal frequency oscillator
diff --git a/Documentation/devicetree/bindings/clock/ti/davinci/da8xx-cfgchip.txt b/Documentation/devicetree/bindings/clock/ti/davinci/da8xx-cfgchip.txt
new file mode 100644
index 000000000000..1e03dce99a8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/davinci/da8xx-cfgchip.txt
@@ -0,0 +1,93 @@
+Binding for TI DA8XX/OMAP-L13X/AM17XX/AM18XX CFGCHIP clocks
+
+TI DA8XX/OMAP-L13X/AM17XX/AM18XX SoCs contain a general purpose set of
+registers call CFGCHIPn. Some of these registers function as clock
+gates. This document describes the bindings for those clocks.
+
+All of the clock nodes described below must be child nodes of a CFGCHIP node
+(compatible = "ti,da830-cfgchip").
+
+USB PHY clocks
+--------------
+Required properties:
+- compatible: shall be "ti,da830-usb-phy-clocks".
+- #clock-cells: from common clock binding; shall be set to 1.
+- clocks: phandles to the parent clocks corresponding to clock-names
+- clock-names: shall be "fck", "usb_refclkin", "auxclk"
+
+This node provides two clocks. The clock at index 0 is the USB 2.0 PHY 48MHz
+clock and the clock at index 1 is the USB 1.1 PHY 48MHz clock.
+
+eHRPWM Time Base Clock (TBCLK)
+------------------------------
+Required properties:
+- compatible: shall be "ti,da830-tbclksync".
+- #clock-cells: from common clock binding; shall be set to 0.
+- clocks: phandle to the parent clock
+- clock-names: shall be "fck"
+
+PLL DIV4.5 divider
+------------------
+Required properties:
+- compatible: shall be "ti,da830-div4p5ena".
+- #clock-cells: from common clock binding; shall be set to 0.
+- clocks: phandle to the parent clock
+- clock-names: shall be "pll0_pllout"
+
+EMIFA clock source (ASYNC1)
+---------------------------
+Required properties:
+- compatible: shall be "ti,da850-async1-clksrc".
+- #clock-cells: from common clock binding; shall be set to 0.
+- clocks: phandles to the parent clocks corresponding to clock-names
+- clock-names: shall be "pll0_sysclk3", "div4.5"
+
+ASYNC3 clock source
+-------------------
+Required properties:
+- compatible: shall be "ti,da850-async3-clksrc".
+- #clock-cells: from common clock binding; shall be set to 0.
+- clocks: phandles to the parent clocks corresponding to clock-names
+- clock-names: shall be "pll0_sysclk2", "pll1_sysclk2"
+
+Examples:
+
+	cfgchip: syscon@1417c {
+		compatible = "ti,da830-cfgchip", "syscon", "simple-mfd";
+		reg = <0x1417c 0x14>;
+
+		usb_phy_clk: usb-phy-clocks {
+			compatible = "ti,da830-usb-phy-clocks";
+			#clock-cells = <1>;
+			clocks = <&psc1 1>, <&usb_refclkin>, <&pll0_auxclk>;
+			clock-names = "fck", "usb_refclkin", "auxclk";
+		};
+		ehrpwm_tbclk: ehrpwm_tbclk {
+			compatible = "ti,da830-tbclksync";
+			#clock-cells = <0>;
+			clocks = <&psc1 17>;
+			clock-names = "fck";
+		};
+		div4p5_clk: div4.5 {
+			compatible = "ti,da830-div4p5ena";
+			#clock-cells = <0>;
+			clocks = <&pll0_pllout>;
+			clock-names = "pll0_pllout";
+		};
+		async1_clk: async1 {
+			compatible = "ti,da850-async1-clksrc";
+			#clock-cells = <0>;
+			clocks = <&pll0_sysclk 3>, <&div4p5_clk>;
+			clock-names = "pll0_sysclk3", "div4.5";
+		};
+		async3_clk: async3 {
+			compatible = "ti,da850-async3-clksrc";
+			#clock-cells = <0>;
+			clocks = <&pll0_sysclk 2>, <&pll1_sysclk 2>;
+			clock-names = "pll0_sysclk2", "pll1_sysclk2";
+		};
+	};
+
+Also see:
+- Documentation/devicetree/bindings/clock/clock-bindings.txt
+
diff --git a/Documentation/devicetree/bindings/clock/ti/davinci/pll.txt b/Documentation/devicetree/bindings/clock/ti/davinci/pll.txt
new file mode 100644
index 000000000000..36998e184821
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/davinci/pll.txt
@@ -0,0 +1,96 @@
+Binding for TI DaVinci PLL Controllers
+
+The PLL provides clocks to most of the components on the SoC. In addition
+to the PLL itself, this controller also contains bypasses, gates, dividers,
+an multiplexers for various clock signals.
+
+Required properties:
+- compatible: shall be one of:
+	- "ti,da850-pll0" for PLL0 on DA850/OMAP-L138/AM18XX
+	- "ti,da850-pll1" for PLL1 on DA850/OMAP-L138/AM18XX
+- reg: physical base address and size of the controller's register area.
+- clocks: phandles corresponding to the clock names
+- clock-names: names of the clock sources - depends on compatible string
+	- for "ti,da850-pll0", shall be "clksrc", "extclksrc"
+	- for "ti,da850-pll1", shall be "clksrc"
+
+Optional properties:
+- ti,clkmode-square-wave: Indicates that the the board is supplying a square
+	wave input on the OSCIN pin instead of using a crystal oscillator.
+	This property is only valid when compatible = "ti,da850-pll0".
+
+
+Optional child nodes:
+
+pllout
+	Describes the main PLL clock output (before POSTDIV). The node name must
+	be "pllout".
+
+	Required properties:
+	- #clock-cells: shall be 0
+
+sysclk
+	Describes the PLLDIVn divider clocks that provide the SYSCLKn clock
+	domains. The node name must be "sysclk". Consumers of this node should
+	use "n" in "SYSCLKn" as the index parameter for the clock cell.
+
+	Required properties:
+	- #clock-cells: shall be 1
+
+auxclk
+	Describes the AUXCLK output of the PLL. The node name must be "auxclk".
+	This child node is only valid when compatible = "ti,da850-pll0".
+
+	Required properties:
+	- #clock-cells: shall be 0
+
+obsclk
+	Describes the OBSCLK output of the PLL. The node name must be "obsclk".
+
+	Required properties:
+	- #clock-cells: shall be 0
+
+
+Examples:
+
+	pll0: clock-controller@11000 {
+		compatible = "ti,da850-pll0";
+		reg = <0x11000 0x1000>;
+		clocks = <&ref_clk>, <&pll1_sysclk 3>;
+		clock-names = "clksrc", "extclksrc";
+		ti,clkmode-square-wave;
+
+		pll0_pllout: pllout {
+			#clock-cells = <0>;
+		};
+
+		pll0_sysclk: sysclk {
+			#clock-cells = <1>;
+		};
+
+		pll0_auxclk: auxclk {
+			#clock-cells = <0>;
+		};
+
+		pll0_obsclk: obsclk {
+			#clock-cells = <0>;
+		};
+	};
+
+	pll1: clock-controller@21a000 {
+		compatible = "ti,da850-pll1";
+		reg = <0x21a000 0x1000>;
+		clocks = <&ref_clk>;
+		clock-names = "clksrc";
+
+		pll0_sysclk: sysclk {
+			#clock-cells = <1>;
+		};
+
+		pll0_obsclk: obsclk {
+			#clock-cells = <0>;
+		};
+	};
+
+Also see:
+- Documentation/devicetree/bindings/clock/clock-bindings.txt
diff --git a/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt b/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
new file mode 100644
index 000000000000..dae4ad8e198c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/davinci/psc.txt
@@ -0,0 +1,71 @@
+Binding for TI DaVinci Power Sleep Controller (PSC)
+
+The PSC provides power management, clock gating and reset functionality. It is
+primarily used for clocking.
+
+Required properties:
+- compatible: shall be one of:
+	- "ti,da850-psc0" for PSC0 on DA850/OMAP-L138/AM18XX
+	- "ti,da850-psc1" for PSC1 on DA850/OMAP-L138/AM18XX
+- reg: physical base address and size of the controller's register area
+- #clock-cells: from common clock binding; shall be set to 1
+- #power-domain-cells: from generic power domain binding; shall be set to 1.
+- clocks: phandles to clocks corresponding to the clock-names property
+- clock-names: list of parent clock names - depends on compatible value
+	- for "ti,da850-psc0", shall be "pll0_sysclk1", "pll0_sysclk2",
+	  "pll0_sysclk4", "pll0_sysclk6", "async1"
+	- for "ti,da850-psc1", shall be "pll0_sysclk2", "pll0_sysclk4", "async3"
+
+Optional properties:
+- #reset-cells: from reset binding; shall be set to 1 - only applicable when
+  at least one local domain provides a local reset.
+
+Consumers:
+
+	Clock, power domain and reset consumers shall use the local power domain
+	module ID (LPSC) as the index corresponding to the clock cell. Refer to
+	the device-specific datasheet to find these numbers. NB: Most local
+	domains	only provide a clock/power domain and not a reset.
+
+Examples:
+
+	psc0: clock-controller@10000 {
+		compatible = "ti,da850-psc0";
+		reg = <0x10000 0x1000>;
+		#clock-cells = <1>;
+		#power-domain-cells = <1>;
+		#reset-cells = <1>;
+		clocks = <&pll0_sysclk 1>, <&pll0_sysclk 2>,
+			 <&pll0_sysclk 4>, <&pll0_sysclk 6>, <&async1_clk>;
+		clock_names = "pll0_sysclk1", "pll0_sysclk2",
+			      "pll0_sysclk4", "pll0_sysclk6", "async1";
+	};
+	psc1: clock-controller@227000 {
+		compatible = "ti,da850-psc1";
+		reg = <0x227000 0x1000>;
+		#clock-cells = <1>;
+		#power-domain-cells = <1>;
+		clocks = <&pll0_sysclk 2>, <&pll0_sysclk 4>, <&async3_clk>;
+		clock_names = "pll0_sysclk2", "pll0_sysclk4", "async3";
+	};
+
+	/* consumer */
+	dsp: dsp@11800000 {
+		compatible = "ti,da850-dsp";
+		reg = <0x11800000 0x40000>,
+		      <0x11e00000 0x8000>,
+		      <0x11f00000 0x8000>,
+		      <0x01c14044 0x4>,
+		      <0x01c14174 0x8>;
+		reg-names = "l2sram", "l1pram", "l1dram", "host1cfg", "chipsig";
+		interrupt-parent = <&intc>;
+		interrupts = <28>;
+		clocks = <&psc0 15>;
+		power-domains = <&psc0 15>;
+		resets = <&psc0 15>;
+	};
+
+Also see:
+- Documentation/devicetree/bindings/clock/clock-bindings.txt
+- Documentation/devicetree/bindings/power/power_domain.txt
+- Documentation/devicetree/bindings/reset/reset.txt
diff --git a/Documentation/devicetree/bindings/clock/ti/divider.txt b/Documentation/devicetree/bindings/clock/ti/divider.txt
index 35a6f5c7e5c2..9b13b32974f9 100644
--- a/Documentation/devicetree/bindings/clock/ti/divider.txt
+++ b/Documentation/devicetree/bindings/clock/ti/divider.txt
@@ -75,6 +75,9 @@ Optional properties:
 - ti,invert-autoidle-bit : autoidle is enabled by setting the bit to 0,
   see [2]
 - ti,set-rate-parent : clk_set_rate is propagated to parent
+- ti,latch-bit : latch the divider value to HW, only needed if the register
+  access requires this. As an example dra76x DPLL_GMAC H14 divider implements
+  such behavior.
 
 Examples:
 dpll_usb_m2_ck: dpll_usb_m2_ck@4a008190 {
diff --git a/Documentation/devicetree/bindings/clock/ti/mux.txt b/Documentation/devicetree/bindings/clock/ti/mux.txt
index 2d0d170f8001..eec8994b9be8 100644
--- a/Documentation/devicetree/bindings/clock/ti/mux.txt
+++ b/Documentation/devicetree/bindings/clock/ti/mux.txt
@@ -48,6 +48,9 @@ Optional properties:
   zero
 - ti,set-rate-parent : clk_set_rate is propagated to parent clock,
   not supported by the composite-mux-clock subtype
+- ti,latch-bit : latch the mux value to HW, only needed if the register
+  access requires this. As an example, dra7x DPLL_GMAC H14 muxing
+  implements such behavior.
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/dma/mtk-hsdma.txt b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt
new file mode 100644
index 000000000000..4bb317359dc6
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt
@@ -0,0 +1,33 @@
+MediaTek High-Speed DMA Controller
+==================================
+
+This device follows the generic DMA bindings defined in dma/dma.txt.
+
+Required properties:
+
+- compatible:	Must be one of
+		  "mediatek,mt7622-hsdma": for MT7622 SoC
+		  "mediatek,mt7623-hsdma": for MT7623 SoC
+- reg:		Should contain the register's base address and length.
+- interrupts:	Should contain a reference to the interrupt used by this
+		device.
+- clocks:	Should be the clock specifiers corresponding to the entry in
+		clock-names property.
+- clock-names:	Should contain "hsdma" entries.
+- power-domains: Phandle to the power domain that the device is part of
+- #dma-cells: 	The length of the DMA specifier, must be <1>. This one cell
+		in dmas property of a client device represents the channel
+		number.
+Example:
+
+        hsdma: dma-controller@1b007000 {
+		compatible = "mediatek,mt7623-hsdma";
+		reg = <0 0x1b007000 0 0x1000>;
+		interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_LOW>;
+		clocks = <&ethsys CLK_ETHSYS_HSDMA>;
+		clock-names = "hsdma";
+		power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
+		#dma-cells = <1>;
+	};
+
+DMA clients must use the format described in dma/dma.txt file.
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
index 9cbf5d9df8fd..cf5b9e44432c 100644
--- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -15,6 +15,10 @@ Required properties:
   the secure world.
 - qcom,controlled-remotely : optional, indicates that the bam is controlled by
   remote proccessor i.e. execution environment.
+- num-channels : optional, indicates supported number of DMA channels in a
+  remotely controlled bam.
+- qcom,num-ees : optional, indicates supported number of Execution Environments
+  in a remotely controlled bam.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
index 891db41e9420..aadfb236d53a 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -18,6 +18,7 @@ Required Properties:
 	      Examples with soctypes are:
 		- "renesas,dmac-r8a7743" (RZ/G1M)
 		- "renesas,dmac-r8a7745" (RZ/G1E)
+		- "renesas,dmac-r8a77470" (RZ/G1C)
 		- "renesas,dmac-r8a7790" (R-Car H2)
 		- "renesas,dmac-r8a7791" (R-Car M2-W)
 		- "renesas,dmac-r8a7792" (R-Car V2H)
@@ -26,6 +27,7 @@ Required Properties:
 		- "renesas,dmac-r8a7795" (R-Car H3)
 		- "renesas,dmac-r8a7796" (R-Car M3-W)
 		- "renesas,dmac-r8a77970" (R-Car V3M)
+		- "renesas,dmac-r8a77980" (R-Car V3H)
 
 - reg: base address and length of the registers block for the DMAC
 
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
index f3d1f151ba80..9dc935e24e55 100644
--- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
@@ -11,6 +11,7 @@ Required Properties:
 	  - "renesas,r8a7794-usb-dmac" (R-Car E2)
 	  - "renesas,r8a7795-usb-dmac" (R-Car H3)
 	  - "renesas,r8a7796-usb-dmac" (R-Car M3-W)
+	  - "renesas,r8a77965-usb-dmac" (R-Car M3-N)
 - reg: base address and length of the registers block for the DMAC
 - interrupts: interrupt specifiers for the DMAC, one for each entry in
   interrupt-names.
diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
new file mode 100644
index 000000000000..f237b7928283
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
@@ -0,0 +1,41 @@
+Synopsys DesignWare AXI DMA Controller
+
+Required properties:
+- compatible: "snps,axi-dma-1.01a"
+- reg: Address range of the DMAC registers. This should include
+  all of the per-channel registers.
+- interrupt: Should contain the DMAC interrupt number.
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device.
+- dma-channels: Number of channels supported by hardware.
+- snps,dma-masters: Number of AXI masters supported by the hardware.
+- snps,data-width: Maximum AXI data width supported by hardware.
+  (0 - 8bits, 1 - 16bits, 2 - 32bits, ..., 6 - 512bits)
+- snps,priority: Priority of channel. Array size is equal to the number of
+  dma-channels. Priority value must be programmed within [0:dma-channels-1]
+  range. (0 - minimum priority)
+- snps,block-size: Maximum block size supported by the controller channel.
+  Array size is equal to the number of dma-channels.
+
+Optional properties:
+- snps,axi-max-burst-len: Restrict master AXI burst length by value specified
+  in this property. If this property is missing the maximum AXI burst length
+  supported by DMAC is used. [1:256]
+
+Example:
+
+dmac: dma-controller@80000 {
+	compatible = "snps,axi-dma-1.01a";
+	reg = <0x80000 0x400>;
+	clocks = <&core_clk>, <&cfgr_clk>;
+	clock-names = "core-clk", "cfgr-clk";
+	interrupt-parent = <&intc>;
+	interrupts = <27>;
+
+	dma-channels = <4>;
+	snps,dma-masters = <2>;
+	snps,data-width = <3>;
+	snps,block-size = <4096 4096 4096 4096>;
+	snps,priority = <0 1 2 3>;
+	snps,axi-max-burst-len = <16>;
+};
diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt
index 0b55718bf889..c5f519097204 100644
--- a/Documentation/devicetree/bindings/dma/stm32-dma.txt
+++ b/Documentation/devicetree/bindings/dma/stm32-dma.txt
@@ -62,14 +62,14 @@ channel: a phandle to the DMA controller plus the following four integer cells:
 	0x1: medium
 	0x2: high
 	0x3: very high
-4. A 32bit mask specifying the DMA FIFO threshold configuration which are device
-   dependent:
- -bit 0-1: Fifo threshold
+4. A 32bit bitfield value specifying DMA features which are device dependent:
+ -bit 0-1: DMA FIFO threshold selection
 	0x0: 1/4 full FIFO
 	0x1: 1/2 full FIFO
 	0x2: 3/4 full FIFO
 	0x3: full FIFO
 
+
 Example:
 
 	usart1: serial@40011000 {
diff --git a/Documentation/devicetree/bindings/eeprom/at24.txt b/Documentation/devicetree/bindings/eeprom/at24.txt
index abfae1beca2b..61d833abafbf 100644
--- a/Documentation/devicetree/bindings/eeprom/at24.txt
+++ b/Documentation/devicetree/bindings/eeprom/at24.txt
@@ -41,12 +41,16 @@ Required properties:
                 "nxp",
                 "ramtron",
                 "renesas",
+                "rohm",
                 "st",
 
                 Some vendors use different model names for chips which are just
                 variants of the above. Known such exceptions are listed below:
 
+                "nxp,se97b" - the fallback is "atmel,24c02",
                 "renesas,r1ex24002" - the fallback is "atmel,24c02"
+                "renesas,r1ex24128" - the fallback is "atmel,24c128"
+                "rohm,br24t01" - the fallback is "atmel,24c01"
 
   - reg: The I2C address of the EEPROM.
 
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
index a777477e4547..4a7811ecd954 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
@@ -13,7 +13,9 @@ Required properties:
 	"renesas,i2c-r8a7794" if the device is a part of a R8A7794 SoC.
 	"renesas,i2c-r8a7795" if the device is a part of a R8A7795 SoC.
 	"renesas,i2c-r8a7796" if the device is a part of a R8A7796 SoC.
+	"renesas,i2c-r8a77965" if the device is a part of a R8A77965 SoC.
 	"renesas,i2c-r8a77970" if the device is a part of a R8A77970 SoC.
+	"renesas,i2c-r8a77995" if the device is a part of a R8A77995 SoC.
 	"renesas,rcar-gen1-i2c" for a generic R-Car Gen1 compatible device.
 	"renesas,rcar-gen2-i2c" for a generic R-Car Gen2 or RZ/G1 compatible
 				device.
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt b/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
index 224390999e81..fc7e17802746 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
@@ -13,6 +13,7 @@ Required properties:
 			- "renesas,iic-r8a7794" (R-Car E2)
 			- "renesas,iic-r8a7795" (R-Car H3)
 			- "renesas,iic-r8a7796" (R-Car M3-W)
+			- "renesas,iic-r8a77965" (R-Car M3-N)
 			- "renesas,iic-sh73a0" (SH-Mobile AG5)
 			- "renesas,rcar-gen2-iic" (generic R-Car Gen2 or RZ/G1
 							compatible device)
diff --git a/Documentation/devicetree/bindings/i2c/i2c-synquacer.txt b/Documentation/devicetree/bindings/i2c/i2c-synquacer.txt
new file mode 100644
index 000000000000..72f4a2f0fedc
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-synquacer.txt
@@ -0,0 +1,29 @@
+Socionext SynQuacer I2C
+
+Required properties:
+- compatible      : Must be "socionext,synquacer-i2c"
+- reg             : Offset and length of the register set for the device
+- interrupts      : A single interrupt specifier
+- #address-cells  : Must be <1>;
+- #size-cells     : Must be <0>;
+- clock-names     : Must contain "pclk".
+- clocks          : Must contain an entry for each name in clock-names.
+                    (See the common clock bindings.)
+
+Optional properties:
+- clock-frequency : Desired I2C bus clock frequency in Hz. As only Normal and
+                    Fast modes are supported, possible values are 100000 and
+                    400000.
+
+Example :
+
+    i2c@51210000 {
+        compatible = "socionext,synquacer-i2c";
+        reg = <0x51210000 0x1000>;
+        interrupts = <GIC_SPI 165 IRQ_TYPE_LEVEL_HIGH>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+        clock-names = "pclk";
+        clocks = <&clk_i2c>;
+        clock-frequency = <400000>;
+    };
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
index 1fd5d69647ca..ffadb7c6f1f3 100644
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -11,6 +11,8 @@ Required Properties:
     the device is compatible with the R-Car Gen2 VMSA-compatible IPMMU.
 
     - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
+    - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
+    - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
     - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
     - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
     - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU.
@@ -19,7 +21,8 @@ Required Properties:
     - "renesas,ipmmu-r8a7796" for the R8A7796 (R-Car M3-W) IPMMU.
     - "renesas,ipmmu-r8a77970" for the R8A77970 (R-Car V3M) IPMMU.
     - "renesas,ipmmu-r8a77995" for the R8A77995 (R-Car D3) IPMMU.
-    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU.
+    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 or RZ/G1 VMSA-compatible
+			   IPMMU.
 
   - reg: Base address and size of the IPMMU registers.
   - interrupts: Specifiers for the MMU fault interrupts. For instances that
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
index 2098f7732264..6ecefea1c6f9 100644
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
@@ -14,6 +14,11 @@ Required properties:
                     "single-master" device, and needs no additional information
                     to associate with its master device.  See:
                     Documentation/devicetree/bindings/iommu/iommu.txt
+- clocks          : A list of clocks required for the IOMMU to be accessible by
+                    the host CPU.
+- clock-names     : Should contain the following:
+	"iface" - Main peripheral bus clock (PCLK/HCL) (required)
+	"aclk"  - AXI bus clock (required)
 
 Optional properties:
 - rockchip,disable-mmu-reset : Don't use the mmu reset operation.
@@ -27,5 +32,7 @@ Example:
 		reg = <0xff940300 0x100>;
 		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "vopl_mmu";
+		clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
+		clock-names = "aclk", "iface";
 		#iommu-cells = <0>;
 	};
diff --git a/Documentation/devicetree/bindings/mips/mscc.txt b/Documentation/devicetree/bindings/mips/mscc.txt
new file mode 100644
index 000000000000..ae15ec333542
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/mscc.txt
@@ -0,0 +1,43 @@
+* Microsemi MIPS CPUs
+
+Boards with a SoC of the Microsemi MIPS family shall have the following
+properties:
+
+Required properties:
+- compatible: "mscc,ocelot"
+
+
+* Other peripherals:
+
+o CPU chip regs:
+
+The SoC has a few registers (DEVCPU_GCB:CHIP_REGS) handling miscellaneous
+functionalities: chip ID, general purpose register for software use, reset
+controller, hardware status and configuration, efuses.
+
+Required properties:
+- compatible: Should be "mscc,ocelot-chip-regs", "simple-mfd", "syscon"
+- reg : Should contain registers location and length
+
+Example:
+	syscon@71070000 {
+		compatible = "mscc,ocelot-chip-regs", "simple-mfd", "syscon";
+		reg = <0x71070000 0x1c>;
+	};
+
+
+o CPU system control:
+
+The SoC has a few registers (ICPU_CFG:CPU_SYSTEM_CTRL) handling configuration of
+the CPU: 8 general purpose registers, reset control, CPU en/disabling, CPU
+endianness, CPU bus control, CPU status.
+
+Required properties:
+- compatible: Should be "mscc,ocelot-cpu-syscon", "syscon"
+- reg : Should contain registers location and length
+
+Example:
+	syscon@70000000 {
+		compatible = "mscc,ocelot-cpu-syscon", "syscon";
+		reg = <0x70000000 0x2c>;
+	};
diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
index 594982c6b9f9..79bf352e659c 100644
--- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
+++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
@@ -6,7 +6,11 @@ the definition of the PHY node in booting-without-of.txt for an example
 of how to define a PHY.
 
 Required properties:
-  - reg : Offset and length of the register set for the device
+  - reg : Offset and length of the register set for the device, and optionally
+          the offset and length of the TBIPA register (TBI PHY address
+	  register).  If TBIPA register is not specified, the driver will
+	  attempt to infer it from the register set specified (your mileage may
+	  vary).
   - compatible : Should define the compatible device type for the
     mdio. Currently supported strings/devices are:
 	- "fsl,gianfar-tbi"
diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt
new file mode 100644
index 000000000000..5cfa4f016a00
--- /dev/null
+++ b/Documentation/devicetree/bindings/pmem/pmem-region.txt
@@ -0,0 +1,65 @@
+Device-tree bindings for persistent memory regions
+-----------------------------------------------------
+
+Persistent memory refers to a class of memory devices that are:
+
+	a) Usable as main system memory (i.e. cacheable), and
+	b) Retain their contents across power failure.
+
+Given b) it is best to think of persistent memory as a kind of memory mapped
+storage device. To ensure data integrity the operating system needs to manage
+persistent regions separately to the normal memory pool. To aid with that this
+binding provides a standardised interface for discovering where persistent
+memory regions exist inside the physical address space.
+
+Bindings for the region nodes:
+-----------------------------
+
+Required properties:
+	- compatible = "pmem-region"
+
+	- reg = <base, size>;
+		The reg property should specificy an address range that is
+		translatable to a system physical address range. This address
+		range should be mappable as normal system memory would be
+		(i.e cacheable).
+
+		If the reg property contains multiple address ranges
+		each address range will be treated as though it was specified
+		in a separate device node. Having multiple address ranges in a
+		node implies no special relationship between the two ranges.
+
+Optional properties:
+	- Any relevant NUMA assocativity properties for the target platform.
+
+	- volatile; This property indicates that this region is actually
+	  backed by non-persistent memory. This lets the OS know that it
+	  may skip the cache flushes required to ensure data is made
+	  persistent after a write.
+
+	  If this property is absent then the OS must assume that the region
+	  is backed by non-volatile memory.
+
+Examples:
+--------------------
+
+	/*
+	 * This node specifies one 4KB region spanning from
+	 * 0x5000 to 0x5fff that is backed by non-volatile memory.
+	 */
+	pmem@5000 {
+		compatible = "pmem-region";
+		reg = <0x00005000 0x00001000>;
+	};
+
+	/*
+	 * This node specifies two 4KB regions that are backed by
+	 * volatile (normal) memory.
+	 */
+	pmem@6000 {
+		compatible = "pmem-region";
+		reg = < 0x00006000 0x00001000
+			0x00008000 0x00001000 >;
+		volatile;
+	};
+
diff --git a/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt b/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
new file mode 100644
index 000000000000..7d9d3f90641b
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/ingenic,jz47xx-pwm.txt
@@ -0,0 +1,25 @@
+Ingenic JZ47xx PWM Controller
+=============================
+
+Required properties:
+- compatible: One of:
+  * "ingenic,jz4740-pwm"
+  * "ingenic,jz4770-pwm"
+  * "ingenic,jz4780-pwm"
+- #pwm-cells: Should be 3. See pwm.txt in this directory for a description
+  of the cells format.
+- clocks : phandle to the external clock.
+- clock-names : Should be "ext".
+
+
+Example:
+
+	pwm: pwm@10002000 {
+		compatible = "ingenic,jz4740-pwm";
+		reg = <0x10002000 0x1000>;
+
+		#pwm-cells = <3>;
+
+		clocks = <&ext>;
+		clock-names = "ext";
+	};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt b/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
index f8338d11fd2b..bd23302e84be 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-stm32-lp.txt
@@ -7,6 +7,8 @@ See ../mfd/stm32-lptimer.txt for details about the parent node.
 
 Required parameters:
 - compatible:		Must be "st,stm32-pwm-lp".
+- #pwm-cells:		Should be set to 3. This PWM chip uses the default 3 cells
+			bindings defined in pwm.txt.
 
 Optional properties:
 - pinctrl-names: 	Set to "default".
@@ -18,6 +20,7 @@ Example:
 		...
 		pwm {
 			compatible = "st,stm32-pwm-lp";
+			#pwm-cells = <3>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&lppwm1_pins>;
 		};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt
index 51ff54c8b8ef..2a1affbff45e 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt
@@ -7,6 +7,8 @@ Required properties:
     - "allwinner,sun5i-a13-pwm"
     - "allwinner,sun7i-a20-pwm"
     - "allwinner,sun8i-h3-pwm"
+    - "allwinner,sun50i-a64-pwm", "allwinner,sun5i-a13-pwm"
+    - "allwinner,sun50i-h5-pwm", "allwinner,sun5i-a13-pwm"
   - reg: physical base address and length of the controller's registers
   - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
     the cells format.
diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
index 74c118015980..35a3b9761ee5 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
@@ -2,6 +2,8 @@
 
 Required Properties:
 - compatible: should be "renesas,pwm-rcar" and one of the following.
+ - "renesas,pwm-r8a7743": for RZ/G1M
+ - "renesas,pwm-r8a7745": for RZ/G1E
  - "renesas,pwm-r8a7778": for R-Car M1A
  - "renesas,pwm-r8a7779": for R-Car H1
  - "renesas,pwm-r8a7790": for R-Car H2
@@ -9,6 +11,7 @@ Required Properties:
  - "renesas,pwm-r8a7794": for R-Car E2
  - "renesas,pwm-r8a7795": for R-Car H3
  - "renesas,pwm-r8a7796": for R-Car M3-W
+ - "renesas,pwm-r8a77965": for R-Car M3-N
  - "renesas,pwm-r8a77995": for R-Car D3
 - reg: base address and length of the registers block for the PWM.
 - #pwm-cells: should be 2. See pwm.txt in this directory for a description of
@@ -17,13 +20,15 @@ Required Properties:
 - pinctrl-0: phandle, referring to a default pin configuration node.
 - pinctrl-names: Set to "default".
 
-Example: R8A7790 (R-Car H2) PWM Timer node
+Example: R8A7743 (RZ/G1M) PWM Timer node
 
 	pwm0: pwm@e6e30000 {
-		compatible = "renesas,pwm-r8a7790", "renesas,pwm-rcar";
+		compatible = "renesas,pwm-r8a7743", "renesas,pwm-rcar";
 		reg = <0 0xe6e30000 0 0x8>;
+		clocks = <&cpg CPG_MOD 523>;
+		power-domains = <&sysc R8A7743_PD_ALWAYS_ON>;
+		resets = <&cpg 523>;
 		#pwm-cells = <2>;
-		clocks = <&mstp5_clks R8A7790_CLK_PWM>;
 		pinctrl-0 = <&pwm0_pins>;
 		pinctrl-names = "default";
 	};
diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
index 1aadc804dae4..d53a16715da6 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
@@ -3,10 +3,12 @@
 Required Properties:
 
   - compatible: should be one of the following.
-    - "renesas,tpu-r8a73a4": for R8A77A4 (R-Mobile APE6) compatible PWM controller.
+    - "renesas,tpu-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible PWM controller.
     - "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller.
+    - "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller.
+    - "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller.
     - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
-    - "renesas,tpu": for generic R-Car TPU PWM controller.
+    - "renesas,tpu": for generic R-Car and RZ/G1 TPU PWM controller.
 
   - reg: Base address and length of each memory resource used by the PWM
     controller hardware module.
@@ -18,10 +20,10 @@ Required Properties:
 Please refer to pwm.txt in this directory for details of the common PWM bindings
 used by client devices.
 
-Example: R8A7740 (R-Car A1) TPU controller node
+Example: R8A7740 (R-Mobile A1) TPU controller node
 
 	tpu: pwm@e6600000 {
 		compatible = "renesas,tpu-r8a7740", "renesas,tpu";
-		reg = <0xe6600000 0x100>;
+		reg = <0xe6600000 0x148>;
 		#pwm-cells = <3>;
 	};
diff --git a/Documentation/devicetree/bindings/rtc/isil,isl12026.txt b/Documentation/devicetree/bindings/rtc/isil,isl12026.txt
new file mode 100644
index 000000000000..2e0be45193bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/isil,isl12026.txt
@@ -0,0 +1,28 @@
+ISL12026 I2C RTC/EEPROM
+
+ISL12026 is an I2C RTC/EEPROM combination device.  The RTC and control
+registers respond at bus address 0x6f, and the EEPROM array responds
+at bus address 0x57.  The canonical "reg" value will be for the RTC portion.
+
+Required properties supported by the device:
+
+ - "compatible": must be "isil,isl12026"
+ - "reg": I2C bus address of the device (always 0x6f)
+
+Optional properties:
+
+ - "isil,pwr-bsw": If present PWR.BSW bit must be set to the specified
+                   value for proper operation.
+
+ - "isil,pwr-sbib": If present PWR.SBIB bit must be set to the specified
+                    value for proper operation.
+
+
+Example:
+
+	rtc@6f {
+		compatible = "isil,isl12026";
+		reg = <0x6f>;
+		isil,pwr-bsw = <0>;
+		isil,pwr-sbib = <1>;
+	}
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 12e8b3e576b0..b5f978a4cac6 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -225,6 +225,7 @@ motorola	Motorola, Inc.
 moxa	Moxa Inc.
 mpl	MPL AG
 mqmaker	mqmaker Inc.
+mscc	Microsemi Corporation
 msi	Micro-Star International Co. Ltd.
 mti	Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
 multi-inno	Multi-Inno Technology Co.,Ltd
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
index 107280ef0025..adc6b76fcb3a 100644
--- a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
@@ -11,6 +11,7 @@ Optional properties:
   detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
 - fsl,ext-reset-output: If present the watchdog device is configured to
   assert its external reset (WDOG_B) instead of issuing a software reset.
+- timeout-sec : Contains the watchdog timeout in seconds
 
 Examples:
 
@@ -19,4 +20,5 @@ wdt@73f98000 {
 	reg = <0x73f98000 0x4000>;
 	interrupts = <58>;
 	big-endian;
+	timeout-sec = <20>;
 };
diff --git a/Documentation/devicetree/bindings/watchdog/meson-wdt.txt b/Documentation/devicetree/bindings/watchdog/meson-wdt.txt
index 8a6d84cb36c9..7588cc3971bf 100644
--- a/Documentation/devicetree/bindings/watchdog/meson-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/meson-wdt.txt
@@ -9,9 +9,13 @@ Required properties:
 	"amlogic,meson8m2-wdt" and "amlogic,meson8b-wdt" on Meson8m2 SoCs
 - reg : Specifies base physical address and size of the registers.
 
+Optional properties:
+- timeout-sec: contains the watchdog timeout in seconds.
+
 Example:
 
 wdt: watchdog@c1109900 {
 	compatible = "amlogic,meson6-wdt";
 	reg = <0xc1109900 0x8>;
+	timeout-sec = <10>;
 };
diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
index 5b38a30e608c..859dee167b91 100644
--- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
@@ -11,9 +11,13 @@ Required properties:
 
 - reg : Specifies base physical address and size of the registers.
 
+Optional properties:
+- timeout-sec: contains the watchdog timeout in seconds.
+
 Example:
 
 wdt: watchdog@10000000 {
 	compatible = "mediatek,mt6589-wdt";
 	reg = <0x10000000 0x18>;
+	timeout-sec = <10>;
 };
diff --git a/Documentation/devicetree/bindings/watchdog/nuvoton,npcm-wdt.txt b/Documentation/devicetree/bindings/watchdog/nuvoton,npcm-wdt.txt
new file mode 100644
index 000000000000..6d593003c933
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/nuvoton,npcm-wdt.txt
@@ -0,0 +1,28 @@
+Nuvoton NPCM Watchdog
+
+Nuvoton NPCM timer module provides five 24-bit timer counters, and a watchdog.
+The watchdog supports a pre-timeout interrupt that fires 10ms before the
+expiry.
+
+Required properties:
+- compatible      : "nuvoton,npcm750-wdt" for NPCM750 (Poleg).
+- reg             : Offset and length of the register set for the device.
+- interrupts      : Contain the timer interrupt with flags for
+                    falling edge.
+
+Required clocking property, have to be one of:
+- clocks          : phandle of timer reference clock.
+- clock-frequency : The frequency in Hz of the clock that drives the NPCM7xx
+                    timer (usually 25000000).
+
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+
+Example:
+
+timer@f000801c {
+    compatible = "nuvoton,npcm750-wdt";
+    interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
+    reg = <0xf000801c 0x4>;
+    clocks = <&clk NPCM7XX_CLK_TIMER>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt b/Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt
index 9cbc76c89b2b..0dce5e3100b4 100644
--- a/Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt
@@ -5,10 +5,14 @@ Required properties:
 - reg: Address range of tick timer/WDT register set
 - interrupts: interrupt number to the cpu
 
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+
 Example:
 
 timer@b0020000 {
 	compatible = "sirf,prima2-tick";
 	reg = <0xb0020000 0x1000>;
 	interrupts = <0>;
+	timeout-sec = <30>;
 };
diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
index 04fc368d828f..ed11ce0ac836 100644
--- a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
@@ -8,9 +8,13 @@ Required properties:
 	"allwinner,sun50i-a64-wdt","allwinner,sun6i-a31-wdt"
 - reg : Specifies base physical address and size of the registers.
 
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+
 Example:
 
 wdt: watchdog@1c20c90 {
 	compatible = "allwinner,sun4i-a10-wdt";
 	reg = <0x01c20c90 0x10>;
+	timeout-sec = <10>;
 };
diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt
index c5254f6d234d..8c6ea7b41048 100644
--- a/Documentation/filesystems/afs.txt
+++ b/Documentation/filesystems/afs.txt
@@ -11,7 +11,7 @@ Contents:
  - Proc filesystem.
  - The cell database.
  - Security.
- - Examples.
+ - The @sys substitution.
 
 
 ========
@@ -230,3 +230,29 @@ If a file is opened with a particular key and then the file descriptor is
 passed to a process that doesn't have that key (perhaps over an AF_UNIX
 socket), then the operations on the file will be made with key that was used to
 open the file.
+
+
+=====================
+THE @SYS SUBSTITUTION
+=====================
+
+The list of up to 16 @sys substitutions for the current network namespace can
+be configured by writing a list to /proc/fs/afs/sysname:
+
+	[root@andromeda ~]# echo foo amd64_linux_26 >/proc/fs/afs/sysname
+
+or cleared entirely by writing an empty list:
+
+	[root@andromeda ~]# echo >/proc/fs/afs/sysname
+
+The current list for current network namespace can be retrieved by:
+
+	[root@andromeda ~]# cat /proc/fs/afs/sysname
+	foo
+	amd64_linux_26
+
+When @sys is being substituted for, each element of the list is tried in the
+order given.
+
+By default, the list will contain one item that conforms to the pattern
+"<arch>_linux_26", amd64 being the name for x86_64.
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 0eb31de3a2c1..2a6f7399c1f3 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -129,20 +129,10 @@ To define an object, a structure of the following type should be filled out:
 			const void *parent_netfs_data,
 			const void *cookie_netfs_data);
 
-		uint16_t (*get_key)(const void *cookie_netfs_data,
-				    void *buffer,
-				    uint16_t bufmax);
-
-		void (*get_attr)(const void *cookie_netfs_data,
-				 uint64_t *size);
-
-		uint16_t (*get_aux)(const void *cookie_netfs_data,
-				    void *buffer,
-				    uint16_t bufmax);
-
 		enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
 						   const void *data,
-						   uint16_t datalen);
+						   uint16_t datalen,
+						   loff_t object_size);
 
 		void (*get_context)(void *cookie_netfs_data, void *context);
 
@@ -187,36 +177,7 @@ This has the following fields:
      cache in the parent's list will be chosen, or failing that, the first
      cache in the master list.
 
- (4) A function to retrieve an object's key from the netfs [mandatory].
-
-     This function will be called with the netfs data that was passed to the
-     cookie acquisition function and the maximum length of key data that it may
-     provide.  It should write the required key data into the given buffer and
-     return the quantity it wrote.
-
- (5) A function to retrieve attribute data from the netfs [optional].
-
-     This function will be called with the netfs data that was passed to the
-     cookie acquisition function.  It should return the size of the file if
-     this is a data file.  The size may be used to govern how much cache must
-     be reserved for this file in the cache.
-
-     If the function is absent, a file size of 0 is assumed.
-
- (6) A function to retrieve auxiliary data from the netfs [optional].
-
-     This function will be called with the netfs data that was passed to the
-     cookie acquisition function and the maximum length of auxiliary data that
-     it may provide.  It should write the auxiliary data into the given buffer
-     and return the quantity it wrote.
-
-     If this function is absent, the auxiliary data length will be set to 0.
-
-     The length of the auxiliary data buffer may be dependent on the key
-     length.  A netfs mustn't rely on being able to provide more than 400 bytes
-     for both.
-
- (7) A function to check the auxiliary data [optional].
+ (4) A function to check the auxiliary data [optional].
 
      This function will be called to check that a match found in the cache for
      this object is valid.  For instance with AFS it could check the auxiliary
@@ -226,6 +187,9 @@ This has the following fields:
      If this function is absent, it will be assumed that matching objects in a
      cache are always valid.
 
+     The function is also passed the cache's idea of the object size and may
+     use this to manage coherency also.
+
      If present, the function should return one of the following values:
 
 	(*) FSCACHE_CHECKAUX_OKAY		- the entry is okay as is
@@ -235,7 +199,7 @@ This has the following fields:
      This function can also be used to extract data from the auxiliary data in
      the cache and copy it into the netfs's structures.
 
- (8) A pair of functions to manage contexts for the completion callback
+ (5) A pair of functions to manage contexts for the completion callback
      [optional].
 
      The cache read/write functions are passed a context which is then passed
@@ -249,7 +213,7 @@ This has the following fields:
      required for indices as indices may not contain data.  These functions may
      be called in interrupt context and so may not sleep.
 
- (9) A function to mark a page as retaining cache metadata [optional].
+ (6) A function to mark a page as retaining cache metadata [optional].
 
      This is called by the cache to indicate that it is retaining in-memory
      information for this page and that the netfs should uncache the page when
@@ -261,7 +225,7 @@ This has the following fields:
 
      This function is not required for indices as they're not permitted data.
 
-(10) A function to unmark all the pages retaining cache metadata [mandatory].
+ (7) A function to unmark all the pages retaining cache metadata [mandatory].
 
      This is called by FS-Cache to indicate that a backing store is being
      unbound from a cookie and that all the marks on the pages should be
@@ -333,12 +297,32 @@ the path to the file:
 	struct fscache_cookie *
 	fscache_acquire_cookie(struct fscache_cookie *parent,
 			       const struct fscache_object_def *def,
+			       const void *index_key,
+			       size_t index_key_len,
+			       const void *aux_data,
+			       size_t aux_data_len,
 			       void *netfs_data,
+			       loff_t object_size,
 			       bool enable);
 
 This function creates an index entry in the index represented by parent,
 filling in the index entry by calling the operations pointed to by def.
 
+A unique key that represents the object within the parent must be pointed to by
+index_key and is of length index_key_len.
+
+An optional blob of auxiliary data that is to be stored within the cache can be
+pointed to with aux_data and should be of length aux_data_len.  This would
+typically be used for storing coherency data.
+
+The netfs may pass an arbitrary value in netfs_data and this will be presented
+to it in the event of any calling back.  This may also be used in tracing or
+logging of messages.
+
+The cache tracks the size of the data attached to an object and this set to be
+object_size.  For indices, this should be 0.  This value will be passed to the
+->check_aux() callback.
+
 Note that this function never returns an error - all errors are handled
 internally.  It may, however, return NULL to indicate no cookie.  It is quite
 acceptable to pass this token back to this function as the parent to another
@@ -355,30 +339,24 @@ must be enabled to do anything with it.  A disabled cookie can be enabled by
 calling fscache_enable_cookie() (see below).
 
 For example, with AFS, a cell would be added to the primary index.  This index
-entry would have a dependent inode containing a volume location index for the
-volume mappings within this cell:
+entry would have a dependent inode containing volume mappings within this cell:
 
 	cell->cache =
 		fscache_acquire_cookie(afs_cache_netfs.primary_index,
 				       &afs_cell_cache_index_def,
-				       cell, true);
-
-Then when a volume location was accessed, it would be entered into the cell's
-index and an inode would be allocated that acts as a volume type and hash chain
-combination:
+				       cell->name, strlen(cell->name),
+				       NULL, 0,
+				       cell, 0, true);
 
-	vlocation->cache =
-		fscache_acquire_cookie(cell->cache,
-				       &afs_vlocation_cache_index_def,
-				       vlocation, true);
-
-And then a particular flavour of volume (R/O for example) could be added to
-that index, creating another index for vnodes (AFS inode equivalents):
+And then a particular volume could be added to that index by ID, creating
+another index for vnodes (AFS inode equivalents):
 
 	volume->cache =
-		fscache_acquire_cookie(vlocation->cache,
+		fscache_acquire_cookie(volume->cell->cache,
 				       &afs_volume_cache_index_def,
-				       volume, true);
+				       &volume->vid, sizeof(volume->vid),
+				       NULL, 0,
+				       volume, 0, true);
 
 
 ======================
@@ -392,7 +370,9 @@ the object definition should be something other than index type.
 	vnode->cache =
 		fscache_acquire_cookie(volume->cache,
 				       &afs_vnode_cache_object_def,
-				       vnode, true);
+				       &key, sizeof(key),
+				       &aux, sizeof(aux),
+				       vnode, vnode->status.size, true);
 
 
 =================================
@@ -408,7 +388,9 @@ it would be some other type of object such as a data file.
 	xattr->cache =
 		fscache_acquire_cookie(vnode->cache,
 				       &afs_xattr_cache_object_def,
-				       xattr, true);
+				       &xattr->name, strlen(xattr->name),
+				       NULL, 0,
+				       xattr, strlen(xattr->val), true);
 
 Miscellaneous objects might be used to store extended attributes or directory
 entries for example.
@@ -425,8 +407,7 @@ cache to adjust its metadata for data tracking appropriately:
 	int fscache_attr_changed(struct fscache_cookie *cookie);
 
 The cache will return -ENOBUFS if there is no backing cache or if there is no
-space to allocate any extra metadata required in the cache.  The attributes
-will be accessed with the get_attr() cookie definition operation.
+space to allocate any extra metadata required in the cache.
 
 Note that attempts to read or write data pages in the cache over this size may
 be rebuffed with -ENOBUFS.
@@ -551,12 +532,13 @@ written back to the cache:
 
 	int fscache_write_page(struct fscache_cookie *cookie,
 			       struct page *page,
+			       loff_t object_size,
 			       gfp_t gfp);
 
 The cookie argument must specify a data file cookie, the page specified should
 contain the data to be written (and is also used to specify the page number),
-and the gfp argument is used to control how any memory allocations made are
-satisfied.
+object_size is the revised size of the object and the gfp argument is used to
+control how any memory allocations made are satisfied.
 
 The page must have first been read or allocated successfully and must not have
 been uncached before writing is performed.
@@ -717,21 +699,23 @@ INDEX AND DATA FILE CONSISTENCY
 To find out whether auxiliary data for an object is up to data within the
 cache, the following function can be called:
 
-	int fscache_check_consistency(struct fscache_cookie *cookie)
+	int fscache_check_consistency(struct fscache_cookie *cookie,
+				      const void *aux_data);
 
 This will call back to the netfs to check whether the auxiliary data associated
-with a cookie is correct.  It returns 0 if it is and -ESTALE if it isn't; it
-may also return -ENOMEM and -ERESTARTSYS.
+with a cookie is correct; if aux_data is non-NULL, it will update the auxiliary
+data buffer first.  It returns 0 if it is and -ESTALE if it isn't; it may also
+return -ENOMEM and -ERESTARTSYS.
 
 To request an update of the index data for an index or other object, the
 following function should be called:
 
-	void fscache_update_cookie(struct fscache_cookie *cookie);
+	void fscache_update_cookie(struct fscache_cookie *cookie,
+				   const void *aux_data);
 
-This function will refer back to the netfs_data pointer stored in the cookie by
-the acquisition function to obtain the data to write into each revised index
-entry.  The update method in the parent index definition will be called to
-transfer the data.
+This function will update the cookie's auxiliary data buffer from aux_data if
+that is non-NULL and then schedule this to be stored on disk.  The update
+method in the parent index definition will be called to transfer the data.
 
 Note that partial updates may happen automatically at other times, such as when
 data blocks are added to a data file object.
@@ -748,10 +732,11 @@ still possible to uncache pages and relinquish the cookie.
 
 The initial enablement state is set by fscache_acquire_cookie(), but the cookie
 can be enabled or disabled later.  To disable a cookie, call:
-    
+
 	void fscache_disable_cookie(struct fscache_cookie *cookie,
+				    const void *aux_data,
     				    bool invalidate);
-    
+
 If the cookie is not already disabled, this locks the cookie against other
 enable and disable ops, marks the cookie as being disabled, discards or
 invalidates any backing objects and waits for cessation of activity on any
@@ -760,13 +745,15 @@ associated object before unlocking the cookie.
 All possible failures are handled internally.  The caller should consider
 calling fscache_uncache_all_inode_pages() afterwards to make sure all page
 markings are cleared up.
-    
+
 Cookies can be enabled or reenabled with:
-    
+
     	void fscache_enable_cookie(struct fscache_cookie *cookie,
+				   const void *aux_data,
+				   loff_t object_size,
     				   bool (*can_enable)(void *data),
     				   void *data)
-    
+
 If the cookie is not already enabled, this locks the cookie against other
 enable and disable ops, invokes can_enable() and, if the cookie is not an index
 cookie, will begin the procedure of acquiring backing objects.
@@ -777,6 +764,12 @@ ruling as to whether or not enablement should actually be permitted to begin.
 All possible failures are handled internally.  The cookie will only be marked
 as enabled if provisional backing objects are allocated.
 
+The object's data size is updated from object_size and is passed to the
+->check_aux() function.
+
+In both cases, the cookie's auxiliary data buffer is updated from aux_data if
+that is non-NULL inside the enablement lock before proceeding.
+
 
 ===============================
 MISCELLANEOUS COOKIE OPERATIONS
@@ -823,6 +816,7 @@ COOKIE UNREGISTRATION
 To get rid of a cookie, this function should be called.
 
 	void fscache_relinquish_cookie(struct fscache_cookie *cookie,
+				       const void *aux_data,
 				       bool retire);
 
 If retire is non-zero, then the object will be marked for recycling, and all
@@ -833,6 +827,9 @@ If retire is zero, then the object may be available again when next the
 acquisition function is called.  Retirement here will overrule the pinning on a
 cookie.
 
+The cookie's auxiliary data will be updated from aux_data if that is non-NULL
+so that the cache can lazily update it on disk.
+
 One very important note - relinquish must NOT be called for a cookie unless all
 the cookies for "child" indices, objects and pages have been relinquished
 first.
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
index 0b302a11718a..d7f011ddc150 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.txt
@@ -62,6 +62,18 @@ subdirectories, and a summation of all nested file sizes.  This makes
 the identification of large disk space consumers relatively quick, as
 no 'du' or similar recursive scan of the file system is required.
 
+Finally, Ceph also allows quotas to be set on any directory in the system.
+The quota can restrict the number of bytes or the number of files stored
+beneath that point in the directory hierarchy.  Quotas can be set using
+extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg:
+
+ setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir
+ getfattr -n ceph.quota.max_bytes /some/dir
+
+A limitation of the current quotas implementation is that it relies on the
+cooperation of the client mounting the file system to stop writers when a
+limit is reached.  A modified or adversarial client cannot be prevented
+from writing as much data as it needs.
 
 Mount Syntax
 ============
@@ -137,6 +149,10 @@ Mount Options
   noasyncreaddir
 	Do not use the dcache as above for readdir.
 
+  noquotadf
+        Report overall filesystem usage in statfs instead of using the root
+        directory quota.
+
 More Information
 ================
 
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
index 1fb12f9dfe48..7059623635b2 100644
--- a/Documentation/filesystems/gfs2-glocks.txt
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -100,14 +100,15 @@ indicates that it is caching uptodate data.
 
 Glock locking order within GFS2:
 
- 1. i_mutex (if required)
+ 1. i_rwsem (if required)
  2. Rename glock (for rename only)
  3. Inode glock(s)
     (Parents before children, inodes at "same level" with same parent in
      lock number order)
  4. Rgrp glock(s) (for (de)allocation operations)
  5. Transaction glock (via gfs2_trans_begin) for non-read operations
- 6. Page lock  (always last, very important!)
+ 6. i_rw_mutex (if required)
+ 7. Page lock  (always last, very important!)
 
 There are two glocks per inode. One deals with access to the inode
 itself (locking order as above), and the other, known as the iopen
diff --git a/Documentation/filesystems/orangefs.txt b/Documentation/filesystems/orangefs.txt
index e2818b60a5c2..f4ba94950e3f 100644
--- a/Documentation/filesystems/orangefs.txt
+++ b/Documentation/filesystems/orangefs.txt
@@ -21,10 +21,16 @@ Orangefs features include:
   * Stateless
 
 
-MAILING LIST
-============
+MAILING LIST ARCHIVES
+=====================
 
-http://beowulf-underground.org/mailman/listinfo/pvfs2-users
+http://lists.orangefs.org/pipermail/devel_lists.orangefs.org/
+
+
+MAILING LIST SUBMISSIONS
+========================
+
+devel@lists.orangefs.org
 
 
 DOCUMENTATION
@@ -42,12 +48,59 @@ Orangefs versions prior to 2.9.3 would not be compatible with the
 upstream version of the kernel client.
 
 
-BUILDING THE USERSPACE FILESYSTEM ON A SINGLE SERVER
-====================================================
+RUNNING ORANGEFS ON A SINGLE SERVER
+===================================
+
+OrangeFS is usually run in large installations with multiple servers and
+clients, but a complete filesystem can be run on a single machine for
+development and testing.
+
+On Fedora, install orangefs and orangefs-server.
+
+dnf -y install orangefs orangefs-server
+
+There is an example server configuration file in
+/etc/orangefs/orangefs.conf.  Change localhost to your hostname if
+necessary.
+
+To generate a filesystem to run xfstests against, see below.
+
+There is an example client configuration file in /etc/pvfs2tab.  It is a
+single line.  Uncomment it and change the hostname if necessary.  This
+controls clients which use libpvfs2.  This does not control the
+pvfs2-client-core.
+
+Create the filesystem.
+
+pvfs2-server -f /etc/orangefs/orangefs.conf
+
+Start the server.
+
+systemctl start orangefs-server
+
+Test the server.
+
+pvfs2-ping -m /pvfsmnt
+
+Start the client.  The module must be compiled in or loaded before this
+point.
+
+systemctl start orangefs-client
+
+Mount the filesystem.
+
+mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
+
 
-You can omit --prefix if you don't care that things are sprinkled around in
-/usr/local. As of version 2.9.6, Orangefs uses Berkeley DB by default, we
-will probably be changing the default to lmdb soon.
+BUILDING ORANGEFS ON A SINGLE SERVER
+====================================
+
+Where OrangeFS cannot be installed from distribution packages, it may be
+built from source.
+
+You can omit --prefix if you don't care that things are sprinkled around
+in /usr/local.  As of version 2.9.6, OrangeFS uses Berkeley DB by
+default, we will probably be changing the default to LMDB soon.
 
 ./configure --prefix=/opt/ofs --with-db-backend=lmdb
 
@@ -55,35 +108,69 @@ make
 
 make install
 
-Create an orangefs config file:
+Create an orangefs config file.
+
 /opt/ofs/bin/pvfs2-genconfig /etc/pvfs2.conf
 
-  for "Enter hostnames", use the hostname, don't let it default to
-  localhost.
+Create an /etc/pvfs2tab file.
+
+echo tcp://localhost:3334/orangefs /pvfsmnt pvfs2 defaults,noauto 0 0 > \
+    /etc/pvfs2tab
+
+Create the mount point you specified in the tab file if needed.
 
-create a pvfs2tab file in /etc:
-cat /etc/pvfs2tab
-tcp://myhostname:3334/orangefs /mymountpoint pvfs2 defaults,noauto 0 0
+mkdir /pvfsmnt
 
-create the mount point you specified in the tab file if needed:
-mkdir /mymountpoint
+Bootstrap the server.
 
-bootstrap the server:
-/opt/ofs/sbin/pvfs2-server /etc/pvfs2.conf -f
+/opt/ofs/sbin/pvfs2-server -f /etc/pvfs2.conf
+
+Start the server.
 
-start the server:
 /opt/osf/sbin/pvfs2-server /etc/pvfs2.conf
 
-Now the server is running. At this point you might like to
-prove things are working with:
+Now the server should be running. Pvfs2-ls is a simple
+test to verify that the server is running.
+
+/opt/ofs/bin/pvfs2-ls /pvfsmnt
 
-/opt/osf/bin/pvfs2-ls /mymountpoint
+If stuff seems to be working, load the kernel module and
+turn on the client core.
 
-If stuff seems to be working, turn on the client core:
-/opt/osf/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core
+/opt/ofs/sbin/pvfs2-client -p /opt/osf/sbin/pvfs2-client-core
 
 Mount your filesystem.
-mount -t pvfs2 tcp://myhostname:3334/orangefs /mymountpoint
+
+mount -t pvfs2 tcp://localhost:3334/orangefs /pvfsmnt
+
+
+RUNNING XFSTESTS
+================
+
+It is useful to use a scratch filesystem with xfstests.  This can be
+done with only one server.
+
+Make a second copy of the FileSystem section in the server configuration
+file, which is /etc/orangefs/orangefs.conf.  Change the Name to scratch.
+Change the ID to something other than the ID of the first FileSystem
+section (2 is usually a good choice).
+
+Then there are two FileSystem sections: orangefs and scratch.
+
+This change should be made before creating the filesystem.
+
+pvfs2-server -f /etc/orangefs/orangefs.conf
+
+To run xfstests, create /etc/xfsqa.config.
+
+TEST_DIR=/orangefs
+TEST_DEV=tcp://localhost:3334/orangefs
+SCRATCH_MNT=/scratch
+SCRATCH_DEV=tcp://localhost:3334/scratch
+
+Then xfstests can be run
+
+./check -pvfs2
 
 
 OPTIONS
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 6ea1e64d1464..961b287ef323 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -14,9 +14,13 @@ The result will inevitably fail to look exactly like a normal
 filesystem for various technical reasons.  The expectation is that
 many use cases will be able to ignore these differences.
 
-This approach is 'hybrid' because the objects that appear in the
-filesystem do not all appear to belong to that filesystem.  In many
-cases an object accessed in the union will be indistinguishable
+
+Overlay objects
+---------------
+
+The overlay filesystem approach is 'hybrid', because the objects that
+appear in the filesystem do not always appear to belong to that filesystem.
+In many cases, an object accessed in the union will be indistinguishable
 from accessing the corresponding object from the original filesystem.
 This is most obvious from the 'st_dev' field returned by stat(2).
 
@@ -34,6 +38,19 @@ make the overlay mount more compliant with filesystem scanners and
 overlay objects will be distinguishable from the corresponding
 objects in the original filesystem.
 
+On 64bit systems, even if all overlay layers are not on the same
+underlying filesystem, the same compliant behavior could be achieved
+with the "xino" feature.  The "xino" feature composes a unique object
+identifier from the real object st_ino and an underlying fsid index.
+If all underlying filesystems support NFS file handles and export file
+handles with 32bit inode number encoding (e.g. ext4), overlay filesystem
+will use the high inode number bits for fsid.  Even when the underlying
+filesystem uses 64bit inode numbers, users can still enable the "xino"
+feature with the "-o xino=on" overlay mount option.  That is useful for the
+case of underlying filesystems like xfs and tmpfs, which use 64bit inode
+numbers, but are very unlikely to use the high inode number bit.
+
+
 Upper and Lower
 ---------------
 
@@ -290,10 +307,19 @@ Non-standard behavior
 ---------------------
 
 The copy_up operation essentially creates a new, identical file and
-moves it over to the old name.  The new file may be on a different
-filesystem, so both st_dev and st_ino of the file may change.
+moves it over to the old name.  Any open files referring to this inode
+will access the old data.
+
+The new file may be on a different filesystem, so both st_dev and st_ino
+of the real file may change.  The values of st_dev and st_ino returned by
+stat(2) on an overlay object are often not the same as the real file
+stat(2) values to prevent the values from changing on copy_up.
 
-Any open files referring to this inode will access the old data.
+Unless "xino" feature is enabled, when overlay layers are not all on the
+same underlying filesystem, the value of st_dev may be different for two
+non-directory objects in the same overlay filesystem and the value of
+st_ino for directory objects may be non persistent and could change even
+while the overlay filesystem is still mounted.
 
 Unless "inode index" feature is enabled, if a file with multiple hard
 links is copied up, then this will "break" the link.  Changes will not be
@@ -302,6 +328,7 @@ propagated to other names referring to the same inode.
 Unless "redirect_dir" feature is enabled, rename(2) on a lower or merged
 directory will fail with EXDEV.
 
+
 Changes to underlying filesystems
 ---------------------------------
 
diff --git a/Documentation/hwmon/adm1275 b/Documentation/hwmon/adm1275
index 791bc0bd91e6..39033538eb03 100644
--- a/Documentation/hwmon/adm1275
+++ b/Documentation/hwmon/adm1275
@@ -6,6 +6,10 @@ Supported chips:
     Prefix: 'adm1075'
     Addresses scanned: -
     Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1075.pdf
+  * Analog Devices ADM1272
+    Prefix: 'adm1272'
+    Addresses scanned: -
+    Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1272.pdf
   * Analog Devices ADM1275
     Prefix: 'adm1275'
     Addresses scanned: -
@@ -29,11 +33,11 @@ Author: Guenter Roeck <linux@roeck-us.net>
 Description
 -----------
 
-This driver supports hardware monitoring for Analog Devices ADM1075, ADM1275,
-ADM1276, ADM1278, ADM1293, and ADM1294 Hot-Swap Controller and Digital
-Power Monitors.
+This driver supports hardware monitoring for Analog Devices ADM1075, ADM1272,
+ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 Hot-Swap Controller and
+Digital Power Monitors.
 
-ADM1075, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 are hot-swap
+ADM1075, ADM1272, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 are hot-swap
 controllers that allow a circuit board to be removed from or inserted into
 a live backplane. They also feature current and voltage readback via an
 integrated 12 bit analog-to-digital converter (ADC), accessed using a
@@ -100,11 +104,10 @@ power1_input_lowest	Lowest observed input power. ADM1293 and ADM1294 only.
 power1_input_highest	Highest observed input power.
 power1_reset_history	Write any value to reset history.
 
-			Power attributes are supported on ADM1075, ADM1276,
-			ADM1293, and ADM1294.
+			Power attributes are supported on ADM1075, ADM1272,
+			ADM1276, ADM1293, and ADM1294.
 
 temp1_input		Chip temperature.
-			Temperature attributes are only available on ADM1278.
 temp1_max		Maximum chip temperature.
 temp1_max_alarm		Temperature alarm.
 temp1_crit		Critical chip temperature.
@@ -112,4 +115,5 @@ temp1_crit_alarm	Critical temperature high alarm.
 temp1_highest		Highest observed temperature.
 temp1_reset_history	Write any value to reset history.
 
-			Temperature attributes are supported on ADM1278.
+			Temperature attributes are supported on ADM1272 and
+			ADM1278.
diff --git a/Documentation/hwmon/lm92 b/Documentation/hwmon/lm92
index 22f68ad032cf..cfa99a353b8c 100644
--- a/Documentation/hwmon/lm92
+++ b/Documentation/hwmon/lm92
@@ -11,10 +11,8 @@ Supported chips:
     Addresses scanned: none, force parameter needed
     Datasheet: http://www.national.com/pf/LM/LM76.html
   * Maxim MAX6633/MAX6634/MAX6635
-    Prefix: 'lm92'
-    Addresses scanned: I2C 0x48 - 0x4b
-    MAX6633 with address in 0x40 - 0x47, 0x4c - 0x4f needs force parameter
-    and MAX6634 with address in 0x4c - 0x4f needs force parameter
+    Prefix: 'max6635'
+    Addresses scanned: none, force parameter needed
     Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3074
 
 Authors:
diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775
index 76add4c9cd68..bd59834d310f 100644
--- a/Documentation/hwmon/nct6775
+++ b/Documentation/hwmon/nct6775
@@ -36,6 +36,14 @@ Supported chips:
     Prefix: 'nct6793'
     Addresses scanned: ISA address retrieved from Super I/O registers
     Datasheet: Available from Nuvoton upon request
+  * Nuvoton NCT6795D
+    Prefix: 'nct6795'
+    Addresses scanned: ISA address retrieved from Super I/O registers
+    Datasheet: Available from Nuvoton upon request
+  * Nuvoton NCT6796D
+    Prefix: 'nct6796'
+    Addresses scanned: ISA address retrieved from Super I/O registers
+    Datasheet: Available from Nuvoton upon request
 
 Authors:
         Guenter Roeck <linux@roeck-us.net>
@@ -88,10 +96,10 @@ The mode works for fan1-fan5.
 sysfs attributes
 ----------------
 
-pwm[1-5] - this file stores PWM duty cycle or DC value (fan speed) in range:
+pwm[1-7] - this file stores PWM duty cycle or DC value (fan speed) in range:
 	   0 (lowest speed) to 255 (full)
 
-pwm[1-5]_enable - this file controls mode of fan/temperature control:
+pwm[1-7]_enable - this file controls mode of fan/temperature control:
 	* 0 Fan control disabled (fans set to maximum speed)
 	* 1 Manual mode, write to pwm[0-5] any value 0-255
 	* 2 "Thermal Cruise" mode
@@ -99,16 +107,16 @@ pwm[1-5]_enable - this file controls mode of fan/temperature control:
 	* 4 "Smart Fan III" mode (NCT6775F only)
 	* 5 "Smart Fan IV" mode
 
-pwm[1-5]_mode - controls if output is PWM or DC level
+pwm[1-7]_mode - controls if output is PWM or DC level
         * 0 DC output
         * 1 PWM output
 
 Common fan control attributes
 -----------------------------
 
-pwm[1-5]_temp_sel	Temperature source. Value is temperature sensor index.
+pwm[1-7]_temp_sel	Temperature source. Value is temperature sensor index.
 			For example, select '1' for temp1_input.
-pwm[1-5]_weight_temp_sel
+pwm[1-7]_weight_temp_sel
 			Secondary temperature source. Value is temperature
 			sensor index. For example, select '1' for temp1_input.
 			Set to 0 to disable secondary temperature control.
@@ -116,16 +124,16 @@ pwm[1-5]_weight_temp_sel
 If secondary temperature functionality is enabled, it is controlled with the
 following attributes.
 
-pwm[1-5]_weight_duty_step
+pwm[1-7]_weight_duty_step
 			Duty step size.
-pwm[1-5]_weight_temp_step
+pwm[1-7]_weight_temp_step
 			Temperature step size. With each step over
 			temp_step_base, the value of weight_duty_step is added
 			to the current pwm value.
-pwm[1-5]_weight_temp_step_base
+pwm[1-7]_weight_temp_step_base
 			Temperature at which secondary temperature control kicks
 			in.
-pwm[1-5]_weight_temp_step_tol
+pwm[1-7]_weight_temp_step_tol
 			Temperature step tolerance.
 
 Thermal Cruise mode (2)
@@ -133,9 +141,9 @@ Thermal Cruise mode (2)
 
 If the temperature is in the range defined by:
 
-pwm[1-5]_target_temp	Target temperature, unit millidegree Celsius
+pwm[1-7]_target_temp	Target temperature, unit millidegree Celsius
 			(range 0 - 127000)
-pwm[1-5]_temp_tolerance
+pwm[1-7]_temp_tolerance
 			Target temperature tolerance, unit millidegree Celsius
 
 there are no changes to fan speed. Once the temperature leaves the interval, fan
@@ -143,14 +151,14 @@ speed increases (if temperature is higher that desired) or decreases (if
 temperature is lower than desired), using the following limits and time
 intervals.
 
-pwm[1-5]_start		fan pwm start value (range 1 - 255), to start fan
+pwm[1-7]_start		fan pwm start value (range 1 - 255), to start fan
 			when the temperature is above defined range.
-pwm[1-5]_floor		lowest fan pwm (range 0 - 255) if temperature is below
+pwm[1-7]_floor		lowest fan pwm (range 0 - 255) if temperature is below
 			the defined range. If set to 0, the fan is expected to
 			stop if the temperature is below the defined range.
-pwm[1-5]_step_up_time	milliseconds before fan speed is increased
-pwm[1-5]_step_down_time	milliseconds before fan speed is decreased
-pwm[1-5]_stop_time	how many milliseconds must elapse to switch
+pwm[1-7]_step_up_time	milliseconds before fan speed is increased
+pwm[1-7]_step_down_time	milliseconds before fan speed is decreased
+pwm[1-7]_stop_time	how many milliseconds must elapse to switch
 			corresponding fan off (when the temperature was below
 			defined range).
 
@@ -159,8 +167,8 @@ Speed Cruise mode (3)
 
 This modes tries to keep the fan speed constant.
 
-fan[1-5]_target		Target fan speed
-fan[1-5]_tolerance
+fan[1-7]_target		Target fan speed
+fan[1-7]_tolerance
 			Target speed tolerance
 
 
@@ -177,19 +185,19 @@ points should be set to higher temperatures and higher pwm values to achieve
 higher fan speeds with increasing temperature. The last data point reflects
 critical temperature mode, in which the fans should run at full speed.
 
-pwm[1-5]_auto_point[1-7]_pwm
+pwm[1-7]_auto_point[1-7]_pwm
 			pwm value to be set if temperature reaches matching
 			temperature range.
-pwm[1-5]_auto_point[1-7]_temp
+pwm[1-7]_auto_point[1-7]_temp
 			Temperature over which the matching pwm is enabled.
-pwm[1-5]_temp_tolerance
+pwm[1-7]_temp_tolerance
 			Temperature tolerance, unit millidegree Celsius
-pwm[1-5]_crit_temp_tolerance
+pwm[1-7]_crit_temp_tolerance
 			Temperature tolerance for critical temperature,
 			unit millidegree Celsius
 
-pwm[1-5]_step_up_time	milliseconds before fan speed is increased
-pwm[1-5]_step_down_time	milliseconds before fan speed is decreased
+pwm[1-7]_step_up_time	milliseconds before fan speed is increased
+pwm[1-7]_step_down_time	milliseconds before fan speed is decreased
 
 Usage Notes
 -----------
diff --git a/Documentation/hwmon/sht21 b/Documentation/hwmon/sht21
index 47f4765db256..8b3cdda541c1 100644
--- a/Documentation/hwmon/sht21
+++ b/Documentation/hwmon/sht21
@@ -6,13 +6,13 @@ Supported chips:
     Prefix: 'sht21'
     Addresses scanned: none
     Datasheet: Publicly available at the Sensirion website
-    http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT21.pdf
+    http://www.sensirion.com/file/datasheet_sht21
 
   * Sensirion SHT25
-    Prefix: 'sht21'
+    Prefix: 'sht25'
     Addresses scanned: none
     Datasheet: Publicly available at the Sensirion website
-    http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT25.pdf
+    http://www.sensirion.com/file/datasheet_sht25
 
 Author:
   Urs Fleisch <urs.fleisch@sensirion.com>
diff --git a/Documentation/hwmon/sht3x b/Documentation/hwmon/sht3x
index b0d88184f48e..d9daa6ab1e8e 100644
--- a/Documentation/hwmon/sht3x
+++ b/Documentation/hwmon/sht3x
@@ -5,7 +5,7 @@ Supported chips:
   * Sensirion SHT3x-DIS
     Prefix: 'sht3x'
     Addresses scanned: none
-    Datasheet: http://www.sensirion.com/fileadmin/user_upload/customers/sensirion/Dokumente/Humidity/Sensirion_Humidity_Datasheet_SHT3x_DIS.pdf
+    Datasheet: https://www.sensirion.com/file/datasheet_sht3x_digital
 
 Author:
   David Frey <david.frey@sensirion.com>
diff --git a/Documentation/media/kapi/v4l2-dev.rst b/Documentation/media/kapi/v4l2-dev.rst
index 7bb0505b60f1..eb03ccc41c41 100644
--- a/Documentation/media/kapi/v4l2-dev.rst
+++ b/Documentation/media/kapi/v4l2-dev.rst
@@ -31,7 +31,7 @@ of the video device exits.
 The default :c:func:`video_device_release` callback currently
 just calls ``kfree`` to free the allocated memory.
 
-There is also a ::c:func:`video_device_release_empty` function that does
+There is also a :c:func:`video_device_release_empty` function that does
 nothing (is empty) and should be used if the struct is embedded and there
 is nothing to do when it is released.
 
diff --git a/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst b/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst
index 45e76e5bc1ea..582fda488810 100644
--- a/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst
+++ b/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst
@@ -89,7 +89,7 @@ id's until they get an error.
 
        -
        -
-       -  Entity type, see :ref:`media-entity-type` for details.
+       -  Entity type, see :ref:`media-entity-functions` for details.
 
     -  .. row 4
 
diff --git a/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst b/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst
index c8f9ea37db2d..c4055ddf070a 100644
--- a/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst
+++ b/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst
@@ -205,13 +205,13 @@ desired arrays with the media graph elements.
 
        -  ``function``
 
-       -  Entity main function, see :ref:`media-entity-type` for details.
+       -  Entity main function, see :ref:`media-entity-functions` for details.
 
     -  .. row 4
 
        -  __u32
 
-       -  ``reserved``\ [12]
+       -  ``reserved``\ [6]
 
        -  Reserved for future extensions. Drivers and applications must set
 	  this array to zero.
@@ -334,7 +334,7 @@ desired arrays with the media graph elements.
 
        -  __u32
 
-       -  ``reserved``\ [9]
+       -  ``reserved``\ [5]
 
        -  Reserved for future extensions. Drivers and applications must set
 	  this array to zero.
@@ -390,7 +390,7 @@ desired arrays with the media graph elements.
 
        -  __u32
 
-       -  ``reserved``\ [5]
+       -  ``reserved``\ [6]
 
        -  Reserved for future extensions. Drivers and applications must set
 	  this array to zero.
diff --git a/Documentation/media/uapi/mediactl/media-types.rst b/Documentation/media/uapi/mediactl/media-types.rst
index f92f10b7ffbd..2dda14bd89b7 100644
--- a/Documentation/media/uapi/mediactl/media-types.rst
+++ b/Documentation/media/uapi/mediactl/media-types.rst
@@ -7,11 +7,11 @@ Types and flags used to represent the media graph elements
 
 ..  tabularcolumns:: |p{8.2cm}|p{10.3cm}|
 
-.. _media-entity-type:
+.. _media-entity-functions:
 
 .. cssclass:: longtable
 
-.. flat-table:: Media entity types
+.. flat-table:: Media entity functions
     :header-rows:  0
     :stub-columns: 0
 
diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index d5f3eb6e674a..03931f9b1285 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -3565,7 +3565,7 @@ enum v4l2_dv_it_content_type -
     HDMI carries 5V on one of the pins). This is often used to power an
     eeprom which contains EDID information, such that the source can
     read the EDID even if the sink is in standby/power off. Each bit
-    corresponds to an input pad on the transmitter. If an input pad
+    corresponds to an input pad on the receiver. If an input pad
     cannot detect whether power is present, then the bit for that pad
     will be 0. This read-only control is applicable to DVI-D, HDMI and
     DisplayPort connectors.
diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
index 337e8188caf1..ef52f637d8e9 100644
--- a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
@@ -55,12 +55,14 @@ describing all planes of that format.
       - ``pixelformat``
       - The pixel format. Both single- and multi-planar four character
 	codes can be used.
-    * - enum :c:type:`v4l2_field`
+    * - __u32
       - ``field``
-      - See struct :c:type:`v4l2_pix_format`.
-    * - enum :c:type:`v4l2_colorspace`
+      - Field order, from enum :c:type:`v4l2_field`.
+        See struct :c:type:`v4l2_pix_format`.
+    * - __u32
       - ``colorspace``
-      - See struct :c:type:`v4l2_pix_format`.
+      - Colorspace encoding, from enum :c:type:`v4l2_colorspace`.
+        See struct :c:type:`v4l2_pix_format`.
     * - struct :c:type:`v4l2_plane_pix_format`
       - ``plane_fmt[VIDEO_MAX_PLANES]``
       - An array of structures describing format of each plane this pixel
@@ -73,24 +75,34 @@ describing all planes of that format.
     * - __u8
       - ``flags``
       - Flags set by the application or driver, see :ref:`format-flags`.
-    * - enum :c:type:`v4l2_ycbcr_encoding`
+    * - union {
+      - (anonymous)
+      -
+    * - __u8
       - ``ycbcr_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_hsv_encoding`
+    * - __u8
       - ``hsv_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_quantization`
+    * - }
+      -
+      -
+    * - __u8
       - ``quantization``
-      - This information supplements the ``colorspace`` and must be set by
+      - Quantization range, from enum :c:type:`v4l2_quantization`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_xfer_func`
+    * - __u8
       - ``xfer_func``
-      - This information supplements the ``colorspace`` and must be set by
+      - Transfer function, from enum :c:type:`v4l2_xfer_func`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
     * - __u8
diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
index 6622938c1b41..826f2305da01 100644
--- a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
@@ -40,9 +40,10 @@ Single-planar format structure
 	RGB formats in :ref:`rgb-formats`, YUV formats in
 	:ref:`yuv-formats`, and reserved codes in
 	:ref:`reserved-formats`
-    * - enum :c:type:`v4l2_field`
+    * - __u32
       - ``field``
-      - Video images are typically interlaced. Applications can request to
+      - Field order, from enum :c:type:`v4l2_field`.
+        Video images are typically interlaced. Applications can request to
 	capture or output only the top or bottom field, or both fields
 	interlaced or sequentially stored in one buffer or alternating in
 	separate buffers. Drivers return the actual field order selected.
@@ -82,9 +83,10 @@ Single-planar format structure
 	driver. Usually this is ``bytesperline`` times ``height``. When
 	the image consists of variable length compressed data this is the
 	maximum number of bytes required to hold an image.
-    * - enum :c:type:`v4l2_colorspace`
+    * - __u32
       - ``colorspace``
-      - This information supplements the ``pixelformat`` and must be set
+      - Image colorspace, from enum :c:type:`v4l2_colorspace`.
+        This information supplements the ``pixelformat`` and must be set
 	by the driver for capture streams and by the application for
 	output streams, see :ref:`colorspaces`.
     * - __u32
@@ -116,23 +118,33 @@ Single-planar format structure
     * - __u32
       - ``flags``
       - Flags set by the application or driver, see :ref:`format-flags`.
-    * - enum :c:type:`v4l2_ycbcr_encoding`
+    * - union {
+      - (anonymous)
+      -
+    * - __u32
       - ``ycbcr_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_hsv_encoding`
+    * - __u32
       - ``hsv_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_quantization`
+    * - }
+      -
+      -
+    * - __u32
       - ``quantization``
-      - This information supplements the ``colorspace`` and must be set by
+      - Quantization range, from enum :c:type:`v4l2_quantization`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_xfer_func`
+    * - __u32
       - ``xfer_func``
-      - This information supplements the ``colorspace`` and must be set by
+      - Transfer function, from enum :c:type:`v4l2_xfer_func`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index 26b106071364..eb4b185d168c 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -58,6 +58,14 @@ can never be transgressed.  If there is a good reason to go against the
 style (a line which becomes far less readable if split to fit within the
 80-column limit, for example), just do it.
 
+Note that you can also use the ``clang-format`` tool to help you with
+these rules, to quickly re-format parts of your code automatically,
+and to review full files in order to spot coding style mistakes,
+typos and possible improvements. It is also handy for sorting ``#includes``,
+for aligning variables/macros, for reflowing text and other similar tasks.
+See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
+for more details.
+
 
 Abstraction layers
 ******************
diff --git a/Documentation/process/adding-syscalls.rst b/Documentation/process/adding-syscalls.rst
index 314c8bf6f2a2..0d4f29bc798b 100644
--- a/Documentation/process/adding-syscalls.rst
+++ b/Documentation/process/adding-syscalls.rst
@@ -360,7 +360,7 @@ First, the entry in ``arch/x86/entry/syscalls/syscall_32.tbl`` gets an extra
 column to indicate that a 32-bit userspace program running on a 64-bit kernel
 should hit the compat entry point::
 
-    380   i386     xyzzy     sys_xyzzy    compat_sys_xyzzy
+    380   i386     xyzzy     sys_xyzzy    __ia32_compat_sys_xyzzy
 
 Second, you need to figure out what should happen for the x32 ABI version of
 the new system call.  There's a choice here: the layout of the arguments
@@ -373,7 +373,7 @@ the compatibility wrapper::
 
     333   64       xyzzy     sys_xyzzy
     ...
-    555   x32      xyzzy     compat_sys_xyzzy
+    555   x32      xyzzy     __x32_compat_sys_xyzzy
 
 If no pointers are involved, then it is preferable to re-use the 64-bit system
 call for the x32 ABI (and consequently the entry in
diff --git a/Documentation/process/clang-format.rst b/Documentation/process/clang-format.rst
new file mode 100644
index 000000000000..6710c0707721
--- /dev/null
+++ b/Documentation/process/clang-format.rst
@@ -0,0 +1,184 @@
+.. _clangformat:
+
+clang-format
+============
+
+``clang-format`` is a tool to format C/C++/... code according to
+a set of rules and heuristics. Like most tools, it is not perfect
+nor covers every single case, but it is good enough to be helpful.
+
+``clang-format`` can be used for several purposes:
+
+  - Quickly reformat a block of code to the kernel style. Specially useful
+    when moving code around and aligning/sorting. See clangformatreformat_.
+
+  - Spot style mistakes, typos and possible improvements in files
+    you maintain, patches you review, diffs, etc. See clangformatreview_.
+
+  - Help you follow the coding style rules, specially useful for those
+    new to kernel development or working at the same time in several
+    projects with different coding styles.
+
+Its configuration file is ``.clang-format`` in the root of the kernel tree.
+The rules contained there try to approximate the most common kernel
+coding style. They also try to follow :ref:`Documentation/process/coding-style.rst <codingstyle>`
+as much as possible. Since not all the kernel follows the same style,
+it is possible that you may want to tweak the defaults for a particular
+subsystem or folder. To do so, you can override the defaults by writing
+another ``.clang-format`` file in a subfolder.
+
+The tool itself has already been included in the repositories of popular
+Linux distributions for a long time. Search for ``clang-format`` in
+your repositories. Otherwise, you can either download pre-built
+LLVM/clang binaries or build the source code from:
+
+    http://releases.llvm.org/download.html
+
+See more information about the tool at:
+
+    https://clang.llvm.org/docs/ClangFormat.html
+
+    https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+
+
+.. _clangformatreview:
+
+Review files and patches for coding style
+-----------------------------------------
+
+By running the tool in its inline mode, you can review full subsystems,
+folders or individual files for code style mistakes, typos or improvements.
+
+To do so, you can run something like::
+
+    # Make sure your working directory is clean!
+    clang-format -i kernel/*.[ch]
+
+And then take a look at the git diff.
+
+Counting the lines of such a diff is also useful for improving/tweaking
+the style options in the configuration file; as well as testing new
+``clang-format`` features/versions.
+
+``clang-format`` also supports reading unified diffs, so you can review
+patches and git diffs easily. See the documentation at:
+
+    https://clang.llvm.org/docs/ClangFormat.html#script-for-patch-reformatting
+
+To avoid ``clang-format`` formatting some portion of a file, you can do::
+
+    int formatted_code;
+    // clang-format off
+        void    unformatted_code  ;
+    // clang-format on
+    void formatted_code_again;
+
+While it might be tempting to use this to keep a file always in sync with
+``clang-format``, specially if you are writing new files or if you are
+a maintainer, please note that people might be running different
+``clang-format`` versions or not have it available at all. Therefore,
+you should probably refrain yourself from using this in kernel sources;
+at least until we see if ``clang-format`` becomes commonplace.
+
+
+.. _clangformatreformat:
+
+Reformatting blocks of code
+---------------------------
+
+By using an integration with your text editor, you can reformat arbitrary
+blocks (selections) of code with a single keystroke. This is specially
+useful when moving code around, for complex code that is deeply intended,
+for multi-line macros (and aligning their backslashes), etc.
+
+Remember that you can always tweak the changes afterwards in those cases
+where the tool did not do an optimal job. But as a first approximation,
+it can be very useful.
+
+There are integrations for many popular text editors. For some of them,
+like vim, emacs, BBEdit and Visual Studio you can find support built-in.
+For instructions, read the appropiate section at:
+
+    https://clang.llvm.org/docs/ClangFormat.html
+
+For Atom, Eclipse, Sublime Text, Visual Studio Code, XCode and other
+editors and IDEs you should be able to find ready-to-use plugins.
+
+For this use case, consider using a secondary ``.clang-format``
+so that you can tweak a few options. See clangformatextra_.
+
+
+.. _clangformatmissing:
+
+Missing support
+---------------
+
+``clang-format`` is missing support for some things that are common
+in kernel code. They are easy to remember, so if you use the tool
+regularly, you will quickly learn to avoid/ignore those.
+
+In particular, some very common ones you will notice are:
+
+  - Aligned blocks of one-line ``#defines``, e.g.::
+
+        #define TRACING_MAP_BITS_DEFAULT       11
+        #define TRACING_MAP_BITS_MAX           17
+        #define TRACING_MAP_BITS_MIN           7
+
+    vs.::
+
+        #define TRACING_MAP_BITS_DEFAULT 11
+        #define TRACING_MAP_BITS_MAX 17
+        #define TRACING_MAP_BITS_MIN 7
+
+  - Aligned designated initializers, e.g.::
+
+        static const struct file_operations uprobe_events_ops = {
+                .owner          = THIS_MODULE,
+                .open           = probes_open,
+                .read           = seq_read,
+                .llseek         = seq_lseek,
+                .release        = seq_release,
+                .write          = probes_write,
+        };
+
+    vs.::
+
+        static const struct file_operations uprobe_events_ops = {
+                .owner = THIS_MODULE,
+                .open = probes_open,
+                .read = seq_read,
+                .llseek = seq_lseek,
+                .release = seq_release,
+                .write = probes_write,
+        };
+
+
+.. _clangformatextra:
+
+Extra features/options
+----------------------
+
+Some features/style options are not enabled by default in the configuration
+file in order to minimize the differences between the output and the current
+code. In other words, to make the difference as small as possible,
+which makes reviewing full-file style, as well diffs and patches as easy
+as possible.
+
+In other cases (e.g. particular subsystems/folders/files), the kernel style
+might be different and enabling some of these options may approximate
+better the style there.
+
+For instance:
+
+  - Aligning assignments (``AlignConsecutiveAssignments``).
+
+  - Aligning declarations (``AlignConsecutiveDeclarations``).
+
+  - Reflowing text in comments (``ReflowComments``).
+
+  - Sorting ``#includes`` (``SortIncludes``).
+
+They are typically useful for block re-formatting, rather than full-file.
+You might want to create another ``.clang-format`` file and use that one
+from your editor/IDE instead.
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index d98deb62c400..4e7c0a1c427a 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -631,6 +631,14 @@ options ``-kr -i8`` (stands for ``K&R, 8 character indents``), or use
 re-formatting you may want to take a look at the man page.  But
 remember: ``indent`` is not a fix for bad programming.
 
+Note that you can also use the ``clang-format`` tool to help you with
+these rules, to quickly re-format parts of your code automatically,
+and to review full files in order to spot coding style mistakes,
+typos and possible improvements. It is also handy for sorting ``#includes``,
+for aligning variables/macros, for reflowing text and other similar tasks.
+See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
+for more details.
+
 
 10) Kconfig configuration files
 -------------------------------
diff --git a/Documentation/s390/vfio-ccw.txt b/Documentation/s390/vfio-ccw.txt
index 90b3dfead81b..2be11ad864ff 100644
--- a/Documentation/s390/vfio-ccw.txt
+++ b/Documentation/s390/vfio-ccw.txt
@@ -28,7 +28,7 @@ every detail. More information/reference could be found here:
   https://en.wikipedia.org/wiki/Channel_I/O
 - s390 architecture:
   s390 Principles of Operation manual (IBM Form. No. SA22-7832)
-- The existing Qemu code which implements a simple emulated channel
+- The existing QEMU code which implements a simple emulated channel
   subsystem could also be a good reference. It makes it easier to follow
   the flow.
   qemu/hw/s390x/css.c
@@ -39,22 +39,22 @@ For vfio mediated device framework:
 Motivation of vfio-ccw
 ----------------------
 
-Currently, a guest virtualized via qemu/kvm on s390 only sees
+Typically, a guest virtualized via QEMU/KVM on s390 only sees
 paravirtualized virtio devices via the "Virtio Over Channel I/O
 (virtio-ccw)" transport. This makes virtio devices discoverable via
 standard operating system algorithms for handling channel devices.
 
 However this is not enough. On s390 for the majority of devices, which
 use the standard Channel I/O based mechanism, we also need to provide
-the functionality of passing through them to a Qemu virtual machine.
+the functionality of passing through them to a QEMU virtual machine.
 This includes devices that don't have a virtio counterpart (e.g. tape
 drives) or that have specific characteristics which guests want to
 exploit.
 
 For passing a device to a guest, we want to use the same interface as
-everybody else, namely vfio. Thus, we would like to introduce vfio
-support for channel devices. And we would like to name this new vfio
-device "vfio-ccw".
+everybody else, namely vfio. We implement this vfio support for channel
+devices via the vfio mediated device framework and the subchannel device
+driver "vfio_ccw".
 
 Access patterns of CCW devices
 ------------------------------
@@ -99,7 +99,7 @@ As mentioned above, we realize vfio-ccw with a mdev implementation.
 Channel I/O does not have IOMMU hardware support, so the physical
 vfio-ccw device does not have an IOMMU level translation or isolation.
 
-Sub-channel I/O instructions are all privileged instructions, When
+Subchannel I/O instructions are all privileged instructions. When
 handling the I/O instruction interception, vfio-ccw has the software
 policing and translation how the channel program is programmed before
 it gets sent to hardware.
@@ -121,7 +121,7 @@ devices:
 - The vfio_mdev driver for the mediated vfio ccw device.
   This is provided by the mdev framework. It is a vfio device driver for
   the mdev that created by vfio_ccw.
-  It realize a group of vfio device driver callbacks, adds itself to a
+  It realizes a group of vfio device driver callbacks, adds itself to a
   vfio group, and registers itself to the mdev framework as a mdev
   driver.
   It uses a vfio iommu backend that uses the existing map and unmap
@@ -178,7 +178,7 @@ vfio-ccw I/O region
 
 An I/O region is used to accept channel program request from user
 space and store I/O interrupt result for user space to retrieve. The
-defination of the region is:
+definition of the region is:
 
 struct ccw_io_region {
 #define ORB_AREA_SIZE 12
@@ -198,30 +198,23 @@ irb_area stores the I/O result.
 
 ret_code stores a return code for each access of the region.
 
-vfio-ccw patches overview
--------------------------
+vfio-ccw operation details
+--------------------------
 
-For now, our patches are rebased on the latest mdev implementation.
-vfio-ccw follows what vfio-pci did on the s390 paltform and uses
-vfio-iommu-type1 as the vfio iommu backend. It's a good start to launch
-the code review for vfio-ccw. Note that the implementation is far from
-complete yet; but we'd like to get feedback for the general
-architecture.
+vfio-ccw follows what vfio-pci did on the s390 platform and uses
+vfio-iommu-type1 as the vfio iommu backend.
 
 * CCW translation APIs
-- Description:
-  These introduce a group of APIs (start with 'cp_') to do CCW
-  translation. The CCWs passed in by a user space program are
-  organized with their guest physical memory addresses. These APIs
-  will copy the CCWs into the kernel space, and assemble a runnable
-  kernel channel program by updating the guest physical addresses with
-  their corresponding host physical addresses.
-- Patches:
-  vfio: ccw: introduce channel program interfaces
+  A group of APIs (start with 'cp_') to do CCW translation. The CCWs
+  passed in by a user space program are organized with their guest
+  physical memory addresses. These APIs will copy the CCWs into kernel
+  space, and assemble a runnable kernel channel program by updating the
+  guest physical addresses with their corresponding host physical addresses.
+  Note that we have to use IDALs even for direct-access CCWs, as the
+  referenced memory can be located anywhere, including above 2G.
 
 * vfio_ccw device driver
-- Description:
-  The following patches utilizes the CCW translation APIs and introduce
+  This driver utilizes the CCW translation APIs and introduces
   vfio_ccw, which is the driver for the I/O subchannel devices you want
   to pass through.
   vfio_ccw implements the following vfio ioctls:
@@ -236,20 +229,14 @@ architecture.
   This also provides the SET_IRQ ioctl to setup an event notifier to
   notify the user space program the I/O completion in an asynchronous
   way.
-- Patches:
-  vfio: ccw: basic implementation for vfio_ccw driver
-  vfio: ccw: introduce ccw_io_region
-  vfio: ccw: realize VFIO_DEVICE_GET_REGION_INFO ioctl
-  vfio: ccw: realize VFIO_DEVICE_RESET ioctl
-  vfio: ccw: realize VFIO_DEVICE_G(S)ET_IRQ_INFO ioctls
-
-The user of vfio-ccw is not limited to Qemu, while Qemu is definitely a
+
+The use of vfio-ccw is not limited to QEMU, while QEMU is definitely a
 good example to get understand how these patches work. Here is a little
-bit more detail how an I/O request triggered by the Qemu guest will be
+bit more detail how an I/O request triggered by the QEMU guest will be
 handled (without error handling).
 
 Explanation:
-Q1-Q7: Qemu side process.
+Q1-Q7: QEMU side process.
 K1-K5: Kernel side process.
 
 Q1. Get I/O region info during initialization.
@@ -263,7 +250,7 @@ Q4. Write the guest channel program and ORB to the I/O region.
     K2. Translate the guest channel program to a host kernel space
         channel program, which becomes runnable for a real device.
     K3. With the necessary information contained in the orb passed in
-        by Qemu, issue the ccwchain to the device.
+        by QEMU, issue the ccwchain to the device.
     K4. Return the ssch CC code.
 Q5. Return the CC code to the guest.
 
@@ -271,7 +258,7 @@ Q5. Return the CC code to the guest.
 
     K5. Interrupt handler gets the I/O result and write the result to
         the I/O region.
-    K6. Signal Qemu to retrieve the result.
+    K6. Signal QEMU to retrieve the result.
 Q6. Get the signal and event handler reads out the result from the I/O
     region.
 Q7. Update the irb for the guest.
@@ -289,10 +276,20 @@ More information for DASD and ECKD could be found here:
 https://en.wikipedia.org/wiki/Direct-access_storage_device
 https://en.wikipedia.org/wiki/Count_key_data
 
-Together with the corresponding work in Qemu, we can bring the passed
+Together with the corresponding work in QEMU, we can bring the passed
 through DASD/ECKD device online in a guest now and use it as a block
 device.
 
+While the current code allows the guest to start channel programs via
+START SUBCHANNEL, support for HALT SUBCHANNEL or CLEAR SUBCHANNEL is
+not yet implemented.
+
+vfio-ccw supports classic (command mode) channel I/O only. Transport
+mode (HPF) is not supported.
+
+QDIO subchannels are currently not supported. Classic devices other than
+DASD/ECKD might work, but have not been tested.
+
 Reference
 ---------
 1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 412314eebda6..eded671d55eb 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -964,32 +964,34 @@ detect a hard lockup condition.
 
 tainted:
 
-Non-zero if the kernel has been tainted.  Numeric values, which
-can be ORed together:
-
-   1 - A module with a non-GPL license has been loaded, this
-       includes modules with no license.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   2 - A module was force loaded by insmod -f.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   4 - Unsafe SMP processors: SMP with CPUs not designed for SMP.
-   8 - A module was forcibly unloaded from the system by rmmod -f.
-  16 - A hardware machine check error occurred on the system.
-  32 - A bad page was discovered on the system.
-  64 - The user has asked that the system be marked "tainted".  This
-       could be because they are running software that directly modifies
-       the hardware, or for other reasons.
- 128 - The system has died.
- 256 - The ACPI DSDT has been overridden with one supplied by the user
-        instead of using the one provided by the hardware.
- 512 - A kernel warning has occurred.
-1024 - A module from drivers/staging was loaded.
-2048 - The system is working around a severe firmware bug.
-4096 - An out-of-tree module has been loaded.
-8192 - An unsigned module has been loaded in a kernel supporting module
-       signature.
-16384 - A soft lockup has previously occurred on the system.
-32768 - The kernel has been live patched.
+Non-zero if the kernel has been tainted. Numeric values, which can be
+ORed together. The letters are seen in "Tainted" line of Oops reports.
+
+     1 (P):  A module with a non-GPL license has been loaded, this
+             includes modules with no license.
+             Set by modutils >= 2.4.9 and module-init-tools.
+     2 (F): A module was force loaded by insmod -f.
+            Set by modutils >= 2.4.9 and module-init-tools.
+     4 (S): Unsafe SMP processors: SMP with CPUs not designed for SMP.
+     8 (R): A module was forcibly unloaded from the system by rmmod -f.
+    16 (M): A hardware machine check error occurred on the system.
+    32 (B): A bad page was discovered on the system.
+    64 (U): The user has asked that the system be marked "tainted". This
+            could be because they are running software that directly modifies
+            the hardware, or for other reasons.
+   128 (D): The system has died.
+   256 (A): The ACPI DSDT has been overridden with one supplied by the user
+            instead of using the one provided by the hardware.
+   512 (W): A kernel warning has occurred.
+  1024 (C): A module from drivers/staging was loaded.
+  2048 (I): The system is working around a severe firmware bug.
+  4096 (O): An out-of-tree module has been loaded.
+  8192 (E): An unsigned module has been loaded in a kernel supporting module
+            signature.
+ 16384 (L): A soft lockup has previously occurred on the system.
+ 32768 (K): The kernel has been live patched.
+ 65536 (X): Auxiliary taint, defined and used by for distros.
+131072 (T): The kernel was built with the struct randomization plugin.
 
 ==============================================================
 
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index ff234d229cbb..17256f2ad919 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -312,8 +312,6 @@ The lowmem_reserve_ratio is an array. You can see them by reading this file.
 % cat /proc/sys/vm/lowmem_reserve_ratio
 256     256     32
 -
-Note: # of this elements is one fewer than number of zones. Because the highest
-      zone's value is not necessary for following calculation.
 
 But, these values are not used directly. The kernel calculates # of protection
 pages for each zones from them. These are shown as array of protection pages
@@ -364,7 +362,8 @@ As above expression, they are reciprocal number of ratio.
 pages of higher zones on the node.
 
 If you would like to protect more pages, smaller values are effective.
-The minimum value is 1 (1/1 -> 100%).
+The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
+disables protection of the pages.
 
 ==============================================================
 
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index bb9a0a53e76b..911399730c1c 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -255,6 +255,7 @@ temperature) and throttle appropriate devices.
 2. sysfs attributes structure
 
 RO	read only value
+WO	write only value
 RW	read/write value
 
 Thermal sysfs attributes will be represented under /sys/class/thermal.
@@ -286,6 +287,11 @@ Thermal cooling device sys I/F, created once it's registered:
     |---type:			Type of the cooling device(processor/fan/...)
     |---max_state:		Maximum cooling state of the cooling device
     |---cur_state:		Current cooling state of the cooling device
+    |---stats:			Directory containing cooling device's statistics
+    |---stats/reset:		Writing any value resets the statistics
+    |---stats/time_in_state_ms:	Time (msec) spent in various cooling states
+    |---stats/total_trans:	Total number of times cooling state is changed
+    |---stats/trans_table:	Cooing state transition table
 
 
 Then next two dynamic attributes are created/removed in pairs. They represent
@@ -490,6 +496,31 @@ cur_state
 	- cur_state == max_state means the maximum cooling.
 	RW, Required
 
+stats/reset
+	Writing any value resets the cooling device's statistics.
+	WO, Required
+
+stats/time_in_state_ms:
+	The amount of time spent by the cooling device in various cooling
+	states. The output will have "<state> <time>" pair in each line, which
+	will mean this cooling device spent <time> msec of time at <state>.
+	Output will have one line for each of the supported states.  usertime
+	units here is 10mS (similar to other time exported in /proc).
+	RO, Required
+
+stats/total_trans:
+	A single positive value showing the total number of times the state of a
+	cooling device is changed.
+	RO, Required
+
+stats/trans_table:
+	This gives fine grained information about all the cooling state
+	transitions. The cat output here is a two dimensional matrix, where an
+	entry <i,j> (row i, column j) represents the number of transitions from
+	State_i to State_j. If the transition table is bigger than PAGE_SIZE,
+	reading this will return an -EFBIG error.
+	RO, Required
+
 3. A simple implementation
 
 ACPI thermal zone may support multiple trip points like critical, hot,
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index bdf1963ba6ba..a5ea2cb0082b 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -520,1550 +520,4 @@ The following commands are supported:
   totals derived from one or more trace event format fields and/or
   event counts (hitcount).
 
-  The format of a hist trigger is as follows::
-
-        hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
-          [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
-          [:clear][:name=histname1] [if <filter>]
-
-  When a matching event is hit, an entry is added to a hash table
-  using the key(s) and value(s) named.  Keys and values correspond to
-  fields in the event's format description.  Values must correspond to
-  numeric fields - on an event hit, the value(s) will be added to a
-  sum kept for that field.  The special string 'hitcount' can be used
-  in place of an explicit value field - this is simply a count of
-  event hits.  If 'values' isn't specified, an implicit 'hitcount'
-  value will be automatically created and used as the only value.
-  Keys can be any field, or the special string 'stacktrace', which
-  will use the event's kernel stacktrace as the key.  The keywords
-  'keys' or 'key' can be used to specify keys, and the keywords
-  'values', 'vals', or 'val' can be used to specify values.  Compound
-  keys consisting of up to two fields can be specified by the 'keys'
-  keyword.  Hashing a compound key produces a unique entry in the
-  table for each unique combination of component keys, and can be
-  useful for providing more fine-grained summaries of event data.
-  Additionally, sort keys consisting of up to two fields can be
-  specified by the 'sort' keyword.  If more than one field is
-  specified, the result will be a 'sort within a sort': the first key
-  is taken to be the primary sort key and the second the secondary
-  key.  If a hist trigger is given a name using the 'name' parameter,
-  its histogram data will be shared with other triggers of the same
-  name, and trigger hits will update this common data.  Only triggers
-  with 'compatible' fields can be combined in this way; triggers are
-  'compatible' if the fields named in the trigger share the same
-  number and type of fields and those fields also have the same names.
-  Note that any two events always share the compatible 'hitcount' and
-  'stacktrace' fields and can therefore be combined using those
-  fields, however pointless that may be.
-
-  'hist' triggers add a 'hist' file to each event's subdirectory.
-  Reading the 'hist' file for the event will dump the hash table in
-  its entirety to stdout.  If there are multiple hist triggers
-  attached to an event, there will be a table for each trigger in the
-  output.  The table displayed for a named trigger will be the same as
-  any other instance having the same name. Each printed hash table
-  entry is a simple list of the keys and values comprising the entry;
-  keys are printed first and are delineated by curly braces, and are
-  followed by the set of value fields for the entry.  By default,
-  numeric fields are displayed as base-10 integers.  This can be
-  modified by appending any of the following modifiers to the field
-  name:
-
-        - .hex        display a number as a hex value
-	- .sym        display an address as a symbol
-	- .sym-offset display an address as a symbol and offset
-	- .syscall    display a syscall id as a system call name
-	- .execname   display a common_pid as a program name
-
-  Note that in general the semantics of a given field aren't
-  interpreted when applying a modifier to it, but there are some
-  restrictions to be aware of in this regard:
-
-    - only the 'hex' modifier can be used for values (because values
-      are essentially sums, and the other modifiers don't make sense
-      in that context).
-    - the 'execname' modifier can only be used on a 'common_pid'.  The
-      reason for this is that the execname is simply the 'comm' value
-      saved for the 'current' process when an event was triggered,
-      which is the same as the common_pid value saved by the event
-      tracing code.  Trying to apply that comm value to other pid
-      values wouldn't be correct, and typically events that care save
-      pid-specific comm fields in the event itself.
-
-  A typical usage scenario would be the following to enable a hist
-  trigger, read its current contents, and then turn it off::
-
-	  # echo 'hist:keys=skbaddr.hex:vals=len' > \
-	    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-	  # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-	  # echo '!hist:keys=skbaddr.hex:vals=len' > \
-	    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  The trigger file itself can be read to show the details of the
-  currently attached hist trigger.  This information is also displayed
-  at the top of the 'hist' file when read.
-
-  By default, the size of the hash table is 2048 entries.  The 'size'
-  parameter can be used to specify more or fewer than that.  The units
-  are in terms of hashtable entries - if a run uses more entries than
-  specified, the results will show the number of 'drops', the number
-  of hits that were ignored.  The size should be a power of 2 between
-  128 and 131072 (any non- power-of-2 number specified will be rounded
-  up).
-
-  The 'sort' parameter can be used to specify a value field to sort
-  on.  The default if unspecified is 'hitcount' and the default sort
-  order is 'ascending'.  To sort in the opposite direction, append
-  .descending' to the sort key.
-
-  The 'pause' parameter can be used to pause an existing hist trigger
-  or to start a hist trigger but not log any events until told to do
-  so.  'continue' or 'cont' can be used to start or restart a paused
-  hist trigger.
-
-  The 'clear' parameter will clear the contents of a running hist
-  trigger and leave its current paused/active state.
-
-  Note that the 'pause', 'cont', and 'clear' parameters should be
-  applied using 'append' shell operator ('>>') if applied to an
-  existing trigger, rather than via the '>' operator, which will cause
-  the trigger to be removed through truncation.
-
-- enable_hist/disable_hist
-
-  The enable_hist and disable_hist triggers can be used to have one
-  event conditionally start and stop another event's already-attached
-  hist trigger.  Any number of enable_hist and disable_hist triggers
-  can be attached to a given event, allowing that event to kick off
-  and stop aggregations on a host of other events.
-
-  The format is very similar to the enable/disable_event triggers::
-
-      enable_hist:<system>:<event>[:count]
-      disable_hist:<system>:<event>[:count]
-
-  Instead of enabling or disabling the tracing of the target event
-  into the trace buffer as the enable/disable_event triggers do, the
-  enable/disable_hist triggers enable or disable the aggregation of
-  the target event into a hash table.
-
-  A typical usage scenario for the enable_hist/disable_hist triggers
-  would be to first set up a paused hist trigger on some event,
-  followed by an enable_hist/disable_hist pair that turns the hist
-  aggregation on and off when conditions of interest are hit::
-
-	  # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
-	    /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-	  # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
-	    /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-	  # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
-	    /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  The above sets up an initially paused hist trigger which is unpaused
-  and starts aggregating events when a given program is executed, and
-  which stops aggregating when the process exits and the hist trigger
-  is paused again.
-
-  The examples below provide a more concrete illustration of the
-  concepts and typical usage patterns discussed above.
-
-
-6.2 'hist' trigger examples
----------------------------
-
-  The first set of examples creates aggregations using the kmalloc
-  event.  The fields that can be used for the hist trigger are listed
-  in the kmalloc event's format file::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format
-    name: kmalloc
-    ID: 374
-    format:
-	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
-	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
-	field:unsigned char common_preempt_count;		offset:3;	size:1;	signed:0;
-	field:int common_pid;					offset:4;	size:4;	signed:1;
-
-	field:unsigned long call_site;				offset:8;	size:8;	signed:0;
-	field:const void * ptr;					offset:16;	size:8;	signed:0;
-	field:size_t bytes_req;					offset:24;	size:8;	signed:0;
-	field:size_t bytes_alloc;				offset:32;	size:8;	signed:0;
-	field:gfp_t gfp_flags;					offset:40;	size:4;	signed:0;
-
-  We'll start by creating a hist trigger that generates a simple table
-  that lists the total number of bytes requested for each function in
-  the kernel that made one or more calls to kmalloc::
-
-    # echo 'hist:key=call_site:val=bytes_req' > \
-            /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  This tells the tracing system to create a 'hist' trigger using the
-  call_site field of the kmalloc event as the key for the table, which
-  just means that each unique call_site address will have an entry
-  created for it in the table.  The 'val=bytes_req' parameter tells
-  the hist trigger that for each unique entry (call_site) in the
-  table, it should keep a running total of the number of bytes
-  requested by that call_site.
-
-  We'll let it run for awhile and then dump the contents of the 'hist'
-  file in the kmalloc event's subdirectory (for readability, a number
-  of entries have been omitted)::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: 18446744072106379007 } hitcount:          1  bytes_req:        176
-    { call_site: 18446744071579557049 } hitcount:          1  bytes_req:       1024
-    { call_site: 18446744071580608289 } hitcount:          1  bytes_req:      16384
-    { call_site: 18446744071581827654 } hitcount:          1  bytes_req:         24
-    { call_site: 18446744071580700980 } hitcount:          1  bytes_req:          8
-    { call_site: 18446744071579359876 } hitcount:          1  bytes_req:        152
-    { call_site: 18446744071580795365 } hitcount:          3  bytes_req:        144
-    { call_site: 18446744071581303129 } hitcount:          3  bytes_req:        144
-    { call_site: 18446744071580713234 } hitcount:          4  bytes_req:       2560
-    { call_site: 18446744071580933750 } hitcount:          4  bytes_req:        736
-    .
-    .
-    .
-    { call_site: 18446744072106047046 } hitcount:         69  bytes_req:       5576
-    { call_site: 18446744071582116407 } hitcount:         73  bytes_req:       2336
-    { call_site: 18446744072106054684 } hitcount:        136  bytes_req:     140504
-    { call_site: 18446744072106224230 } hitcount:        136  bytes_req:      19584
-    { call_site: 18446744072106078074 } hitcount:        153  bytes_req:       2448
-    { call_site: 18446744072106062406 } hitcount:        153  bytes_req:      36720
-    { call_site: 18446744071582507929 } hitcount:        153  bytes_req:      37088
-    { call_site: 18446744072102520590 } hitcount:        273  bytes_req:      10920
-    { call_site: 18446744071582143559 } hitcount:        358  bytes_req:        716
-    { call_site: 18446744072106465852 } hitcount:        417  bytes_req:      56712
-    { call_site: 18446744072102523378 } hitcount:        485  bytes_req:      27160
-    { call_site: 18446744072099568646 } hitcount:       1676  bytes_req:      33520
-
-    Totals:
-        Hits: 4610
-        Entries: 45
-        Dropped: 0
-
-  The output displays a line for each entry, beginning with the key
-  specified in the trigger, followed by the value(s) also specified in
-  the trigger.  At the beginning of the output is a line that displays
-  the trigger info, which can also be displayed by reading the
-  'trigger' file::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-    hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-  At the end of the output are a few lines that display the overall
-  totals for the run.  The 'Hits' field shows the total number of
-  times the event trigger was hit, the 'Entries' field shows the total
-  number of used entries in the hash table, and the 'Dropped' field
-  shows the number of hits that were dropped because the number of
-  used entries for the run exceeded the maximum number of entries
-  allowed for the table (normally 0, but if not a hint that you may
-  want to increase the size of the table using the 'size' parameter).
-
-  Notice in the above output that there's an extra field, 'hitcount',
-  which wasn't specified in the trigger.  Also notice that in the
-  trigger info output, there's a parameter, 'sort=hitcount', which
-  wasn't specified in the trigger either.  The reason for that is that
-  every trigger implicitly keeps a count of the total number of hits
-  attributed to a given entry, called the 'hitcount'.  That hitcount
-  information is explicitly displayed in the output, and in the
-  absence of a user-specified sort parameter, is used as the default
-  sort field.
-
-  The value 'hitcount' can be used in place of an explicit value in
-  the 'values' parameter if you don't really need to have any
-  particular field summed and are mainly interested in hit
-  frequencies.
-
-  To turn the hist trigger off, simply call up the trigger in the
-  command history and re-execute it with a '!' prepended::
-
-    # echo '!hist:key=call_site:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  Finally, notice that the call_site as displayed in the output above
-  isn't really very useful.  It's an address, but normally addresses
-  are displayed in hex.  To have a numeric field displayed as a hex
-  value, simply append '.hex' to the field name in the trigger::
-
-    # echo 'hist:key=call_site.hex:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: ffffffffa026b291 } hitcount:          1  bytes_req:        433
-    { call_site: ffffffffa07186ff } hitcount:          1  bytes_req:        176
-    { call_site: ffffffff811ae721 } hitcount:          1  bytes_req:      16384
-    { call_site: ffffffff811c5134 } hitcount:          1  bytes_req:          8
-    { call_site: ffffffffa04a9ebb } hitcount:          1  bytes_req:        511
-    { call_site: ffffffff8122e0a6 } hitcount:          1  bytes_req:         12
-    { call_site: ffffffff8107da84 } hitcount:          1  bytes_req:        152
-    { call_site: ffffffff812d8246 } hitcount:          1  bytes_req:         24
-    { call_site: ffffffff811dc1e5 } hitcount:          3  bytes_req:        144
-    { call_site: ffffffffa02515e8 } hitcount:          3  bytes_req:        648
-    { call_site: ffffffff81258159 } hitcount:          3  bytes_req:        144
-    { call_site: ffffffff811c80f4 } hitcount:          4  bytes_req:        544
-    .
-    .
-    .
-    { call_site: ffffffffa06c7646 } hitcount:        106  bytes_req:       8024
-    { call_site: ffffffffa06cb246 } hitcount:        132  bytes_req:      31680
-    { call_site: ffffffffa06cef7a } hitcount:        132  bytes_req:       2112
-    { call_site: ffffffff8137e399 } hitcount:        132  bytes_req:      23232
-    { call_site: ffffffffa06c941c } hitcount:        185  bytes_req:     171360
-    { call_site: ffffffffa06f2a66 } hitcount:        185  bytes_req:      26640
-    { call_site: ffffffffa036a70e } hitcount:        265  bytes_req:      10600
-    { call_site: ffffffff81325447 } hitcount:        292  bytes_req:        584
-    { call_site: ffffffffa072da3c } hitcount:        446  bytes_req:      60656
-    { call_site: ffffffffa036b1f2 } hitcount:        526  bytes_req:      29456
-    { call_site: ffffffffa0099c06 } hitcount:       1780  bytes_req:      35600
-
-    Totals:
-        Hits: 4775
-        Entries: 46
-        Dropped: 0
-
-  Even that's only marginally more useful - while hex values do look
-  more like addresses, what users are typically more interested in
-  when looking at text addresses are the corresponding symbols
-  instead.  To have an address displayed as symbolic value instead,
-  simply append '.sym' or '.sym-offset' to the field name in the
-  trigger::
-
-    # echo 'hist:key=call_site.sym:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: [ffffffff810adcb9] syslog_print_all                              } hitcount:          1  bytes_req:       1024
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffff8154acbe] usb_alloc_urb                                 } hitcount:          1  bytes_req:        192
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff811febd5] fsnotify_alloc_group                          } hitcount:          2  bytes_req:        528
-    { call_site: [ffffffff81440f58] __tty_buffer_request_room                     } hitcount:          2  bytes_req:       2624
-    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          2  bytes_req:         96
-    { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211]      } hitcount:          2  bytes_req:        464
-    { call_site: [ffffffff81672406] tcp_get_metrics                               } hitcount:          2  bytes_req:        304
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff81089b05] sched_create_group                            } hitcount:          2  bytes_req:       1424
-    .
-    .
-    .
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1185  bytes_req:     123240
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:       1185  bytes_req:     104280
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       1402  bytes_req:     190672
-    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       1518  bytes_req:     146208
-    { call_site: [ffffffffa029070e] drm_vma_node_allow [drm]                      } hitcount:       1746  bytes_req:      69840
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       2021  bytes_req:     792312
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       2592  bytes_req:     145152
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2629  bytes_req:     378576
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2629  bytes_req:    3783248
-    { call_site: [ffffffff81325607] apparmor_file_alloc_security                  } hitcount:       5192  bytes_req:      10384
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       5529  bytes_req:     110584
-    { call_site: [ffffffff8131ebf7] aa_alloc_task_context                         } hitcount:      21943  bytes_req:     702176
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:      55759  bytes_req:    5074265
-
-    Totals:
-        Hits: 109928
-        Entries: 71
-        Dropped: 0
-
-  Because the default sort key above is 'hitcount', the above shows a
-  the list of call_sites by increasing hitcount, so that at the bottom
-  we see the functions that made the most kmalloc calls during the
-  run.  If instead we we wanted to see the top kmalloc callers in
-  terms of the number of bytes requested rather than the number of
-  calls, and we wanted the top caller to appear at the top, we can use
-  the 'sort' parameter, along with the 'descending' modifier::
-
-    # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2186  bytes_req:    3397464
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1790  bytes_req:     712176
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       8132  bytes_req:     513135
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        106  bytes_req:     440128
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2186  bytes_req:     314784
-    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       2174  bytes_req:     208992
-    { call_site: [ffffffff811ae8e1] __kmalloc                                     } hitcount:          8  bytes_req:     131072
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:        859  bytes_req:     116824
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       1834  bytes_req:     102704
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:        972  bytes_req:     101088
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:        972  bytes_req:      85536
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       3333  bytes_req:      66664
-    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        209  bytes_req:      61632
-    .
-    .
-    .
-    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff812d8406] copy_semundo                                  } hitcount:          2  bytes_req:         48
-    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          1  bytes_req:         48
-    { call_site: [ffffffffa027121a] drm_getmagic [drm]                            } hitcount:          1  bytes_req:         48
-    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
-    { call_site: [ffffffff811c52f4] bprm_change_interp                            } hitcount:          2  bytes_req:         16
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
-
-    Totals:
-        Hits: 32133
-        Entries: 81
-        Dropped: 0
-
-  To display the offset and size information in addition to the symbol
-  name, just use 'sym-offset' instead::
-
-    # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915]                  } hitcount:       4569  bytes_req:    3163720
-    { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915]                      } hitcount:       4569  bytes_req:     657936
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915]      } hitcount:       1519  bytes_req:     472936
-    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915]      } hitcount:       3050  bytes_req:     211832
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50                                 } hitcount:         34  bytes_req:     148384
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915]                  } hitcount:       1385  bytes_req:     144040
-    { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0                                   } hitcount:          8  bytes_req:     131072
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm]              } hitcount:       1385  bytes_req:     121880
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm]                  } hitcount:       1848  bytes_req:     103488
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915]            } hitcount:        461  bytes_req:      62696
-    { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm]                      } hitcount:       1541  bytes_req:      61640
-    { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0                                } hitcount:         57  bytes_req:      57456
-    .
-    .
-    .
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0                       } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm]                      } hitcount:          3  bytes_req:         96
-    { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0                         } hitcount:          8  bytes_req:         96
-    { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650                            } hitcount:          3  bytes_req:         84
-    { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110                              } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid]                    } hitcount:          1  bytes_req:          7
-
-    Totals:
-        Hits: 26098
-        Entries: 64
-        Dropped: 0
-
-  We can also add multiple fields to the 'values' parameter.  For
-  example, we might want to see the total number of bytes allocated
-  alongside bytes requested, and display the result sorted by bytes
-  allocated in a descending order::
-
-    # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       7403  bytes_req:    4084360  bytes_alloc:    5958016
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        541  bytes_req:    2213968  bytes_alloc:    2228224
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       7404  bytes_req:    1066176  bytes_alloc:    1421568
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1565  bytes_req:     557368  bytes_alloc:    1037760
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       9557  bytes_req:     595778  bytes_alloc:     695744
-    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       5839  bytes_req:     430680  bytes_alloc:     470400
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       2388  bytes_req:     324768  bytes_alloc:     458496
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       3911  bytes_req:     219016  bytes_alloc:     250304
-    { call_site: [ffffffff815f8d7b] sk_prot_alloc                                 } hitcount:        235  bytes_req:     236880  bytes_alloc:     240640
-    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        557  bytes_req:     169024  bytes_alloc:     221760
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       9378  bytes_req:     187548  bytes_alloc:     206312
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1519  bytes_req:     157976  bytes_alloc:     194432
-    .
-    .
-    .
-    { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach                 } hitcount:          2  bytes_req:        144  bytes_alloc:        192
-    { call_site: [ffffffff81097ee8] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81213e80] load_elf_binary                               } hitcount:          3  bytes_req:         84  bytes_alloc:         96
-    { call_site: [ffffffff81079a2e] kthread_create_on_node                        } hitcount:          1  bytes_req:         56  bytes_alloc:         64
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7  bytes_alloc:          8
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8  bytes_alloc:          8
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7  bytes_alloc:          8
-
-    Totals:
-        Hits: 66598
-        Entries: 65
-        Dropped: 0
-
-  Finally, to finish off our kmalloc example, instead of simply having
-  the hist trigger display symbolic call_sites, we can have the hist
-  trigger additionally display the complete set of kernel stack traces
-  that led to each call_site.  To do that, we simply use the special
-  value 'stacktrace' for the key parameter::
-
-    # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  The above trigger will use the kernel stack trace in effect when an
-  event is triggered as the key for the hash table.  This allows the
-  enumeration of every kernel callpath that led up to a particular
-  event, along with a running total of any of the event fields for
-  that event.  Here we tally bytes requested and bytes allocated for
-  every callpath in the system that led up to a kmalloc (in this case
-  every callpath to a kmalloc for a kernel compile)::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
-
-    { stacktrace:
-         __kmalloc_track_caller+0x10b/0x1a0
-         kmemdup+0x20/0x50
-         hidraw_report_event+0x8a/0x120 [hid]
-         hid_report_raw_event+0x3ea/0x440 [hid]
-         hid_input_report+0x112/0x190 [hid]
-         hid_irq_in+0xc2/0x260 [usbhid]
-         __usb_hcd_giveback_urb+0x72/0x120
-         usb_giveback_urb_bh+0x9e/0xe0
-         tasklet_hi_action+0xf8/0x100
-         __do_softirq+0x114/0x2c0
-         irq_exit+0xa5/0xb0
-         do_IRQ+0x5a/0xf0
-         ret_from_intr+0x0/0x30
-         cpuidle_enter+0x17/0x20
-         cpu_startup_entry+0x315/0x3e0
-         rest_init+0x7c/0x80
-    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
-         __kmalloc_track_caller+0x10b/0x1a0
-         kmemdup+0x20/0x50
-         hidraw_report_event+0x8a/0x120 [hid]
-         hid_report_raw_event+0x3ea/0x440 [hid]
-         hid_input_report+0x112/0x190 [hid]
-         hid_irq_in+0xc2/0x260 [usbhid]
-         __usb_hcd_giveback_urb+0x72/0x120
-         usb_giveback_urb_bh+0x9e/0xe0
-         tasklet_hi_action+0xf8/0x100
-         __do_softirq+0x114/0x2c0
-         irq_exit+0xa5/0xb0
-         do_IRQ+0x5a/0xf0
-         ret_from_intr+0x0/0x30
-    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
-         kmem_cache_alloc_trace+0xeb/0x150
-         aa_alloc_task_context+0x27/0x40
-         apparmor_cred_prepare+0x1f/0x50
-         security_prepare_creds+0x16/0x20
-         prepare_creds+0xdf/0x1a0
-         SyS_capset+0xb5/0x200
-         system_call_fastpath+0x12/0x6a
-    } hitcount:          1  bytes_req:         32  bytes_alloc:         32
-    .
-    .
-    .
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         i915_gem_execbuffer2+0x6c/0x2c0 [i915]
-         drm_ioctl+0x349/0x670 [drm]
-         do_vfs_ioctl+0x2f0/0x4f0
-         SyS_ioctl+0x81/0xa0
-         system_call_fastpath+0x12/0x6a
-    } hitcount:      17726  bytes_req:   13944120  bytes_alloc:   19593808
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         load_elf_phdrs+0x76/0xa0
-         load_elf_binary+0x102/0x1650
-         search_binary_handler+0x97/0x1d0
-         do_execveat_common.isra.34+0x551/0x6e0
-         SyS_execve+0x3a/0x50
-         return_from_execve+0x0/0x23
-    } hitcount:      33348  bytes_req:   17152128  bytes_alloc:   20226048
-    { stacktrace:
-         kmem_cache_alloc_trace+0xeb/0x150
-         apparmor_file_alloc_security+0x27/0x40
-         security_file_alloc+0x16/0x20
-         get_empty_filp+0x93/0x1c0
-         path_openat+0x31/0x5f0
-         do_filp_open+0x3a/0x90
-         do_sys_open+0x128/0x220
-         SyS_open+0x1e/0x20
-         system_call_fastpath+0x12/0x6a
-    } hitcount:    4766422  bytes_req:    9532844  bytes_alloc:   38131376
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         seq_buf_alloc+0x1b/0x50
-         seq_read+0x2cc/0x370
-         proc_reg_read+0x3d/0x80
-         __vfs_read+0x28/0xe0
-         vfs_read+0x86/0x140
-         SyS_read+0x46/0xb0
-         system_call_fastpath+0x12/0x6a
-    } hitcount:      19133  bytes_req:   78368768  bytes_alloc:   78368768
-
-    Totals:
-        Hits: 6085872
-        Entries: 253
-        Dropped: 0
-
-  If you key a hist trigger on common_pid, in order for example to
-  gather and display sorted totals for each process, you can use the
-  special .execname modifier to display the executable names for the
-  processes in the table rather than raw pids.  The example below
-  keeps a per-process sum of total bytes read::
-
-    # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \
-           /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
-
-    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist
-    # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active]
-
-    { common_pid: gnome-terminal  [      3196] } hitcount:        280  count:    1093512
-    { common_pid: Xorg            [      1309] } hitcount:        525  count:     256640
-    { common_pid: compiz          [      2889] } hitcount:         59  count:     254400
-    { common_pid: bash            [      8710] } hitcount:          3  count:      66369
-    { common_pid: dbus-daemon-lau [      8703] } hitcount:         49  count:      47739
-    { common_pid: irqbalance      [      1252] } hitcount:         27  count:      27648
-    { common_pid: 01ifupdown      [      8705] } hitcount:          3  count:      17216
-    { common_pid: dbus-daemon     [       772] } hitcount:         10  count:      12396
-    { common_pid: Socket Thread   [      8342] } hitcount:         11  count:      11264
-    { common_pid: nm-dhcp-client. [      8701] } hitcount:          6  count:       7424
-    { common_pid: gmain           [      1315] } hitcount:         18  count:       6336
-    .
-    .
-    .
-    { common_pid: postgres        [      1892] } hitcount:          2  count:         32
-    { common_pid: postgres        [      1891] } hitcount:          2  count:         32
-    { common_pid: gmain           [      8704] } hitcount:          2  count:         32
-    { common_pid: upstart-dbus-br [      2740] } hitcount:         21  count:         21
-    { common_pid: nm-dispatcher.a [      8696] } hitcount:          1  count:         16
-    { common_pid: indicator-datet [      2904] } hitcount:          1  count:         16
-    { common_pid: gdbus           [      2998] } hitcount:          1  count:         16
-    { common_pid: rtkit-daemon    [      2052] } hitcount:          1  count:          8
-    { common_pid: init            [         1] } hitcount:          2  count:          2
-
-    Totals:
-        Hits: 2116
-        Entries: 51
-        Dropped: 0
-
-  Similarly, if you key a hist trigger on syscall id, for example to
-  gather and display a list of systemwide syscall hits, you can use
-  the special .syscall modifier to display the syscall names rather
-  than raw ids.  The example below keeps a running total of syscall
-  counts for the system during the run::
-
-    # echo 'hist:key=id.syscall:val=hitcount' > \
-           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-    # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active]
-
-    { id: sys_fsync                     [ 74] } hitcount:          1
-    { id: sys_newuname                  [ 63] } hitcount:          1
-    { id: sys_prctl                     [157] } hitcount:          1
-    { id: sys_statfs                    [137] } hitcount:          1
-    { id: sys_symlink                   [ 88] } hitcount:          1
-    { id: sys_sendmmsg                  [307] } hitcount:          1
-    { id: sys_semctl                    [ 66] } hitcount:          1
-    { id: sys_readlink                  [ 89] } hitcount:          3
-    { id: sys_bind                      [ 49] } hitcount:          3
-    { id: sys_getsockname               [ 51] } hitcount:          3
-    { id: sys_unlink                    [ 87] } hitcount:          3
-    { id: sys_rename                    [ 82] } hitcount:          4
-    { id: unknown_syscall               [ 58] } hitcount:          4
-    { id: sys_connect                   [ 42] } hitcount:          4
-    { id: sys_getpid                    [ 39] } hitcount:          4
-    .
-    .
-    .
-    { id: sys_rt_sigprocmask            [ 14] } hitcount:        952
-    { id: sys_futex                     [202] } hitcount:       1534
-    { id: sys_write                     [  1] } hitcount:       2689
-    { id: sys_setitimer                 [ 38] } hitcount:       2797
-    { id: sys_read                      [  0] } hitcount:       3202
-    { id: sys_select                    [ 23] } hitcount:       3773
-    { id: sys_writev                    [ 20] } hitcount:       4531
-    { id: sys_poll                      [  7] } hitcount:       8314
-    { id: sys_recvmsg                   [ 47] } hitcount:      13738
-    { id: sys_ioctl                     [ 16] } hitcount:      21843
-
-    Totals:
-        Hits: 67612
-        Entries: 72
-        Dropped: 0
-
-  The syscall counts above provide a rough overall picture of system
-  call activity on the system; we can see for example that the most
-  popular system call on this system was the 'sys_ioctl' system call.
-
-  We can use 'compound' keys to refine that number and provide some
-  further insight as to which processes exactly contribute to the
-  overall ioctl count.
-
-  The command below keeps a hitcount for every unique combination of
-  system call id and pid - the end result is essentially a table
-  that keeps a per-pid sum of system call hits.  The results are
-  sorted using the system call id as the primary key, and the
-  hitcount sum as the secondary key::
-
-      # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \
-             /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-      # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-      # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active]
-
-      { id: sys_read                      [  0], common_pid: rtkit-daemon    [      1877] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: gdbus           [      2976] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: console-kit-dae [      3400] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: postgres        [      1865] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: deja-dup-monito [      3543] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: NetworkManager  [       890] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: evolution-calen [      3048] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: postgres        [      1864] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: nm-applet       [      3022] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: whoopsie        [      1212] } hitcount:          2
-      .
-      .
-      .
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8479] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      3472] } hitcount:         12
-      { id: sys_ioctl                     [ 16], common_pid: gnome-terminal  [      3199] } hitcount:         16
-      { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:       1808
-      { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:       5580
-      .
-      .
-      .
-      { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2690] } hitcount:          3
-      { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2688] } hitcount:         16
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [       975] } hitcount:          2
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3204] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2888] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3003] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2873] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3196] } hitcount:          6
-      { id: sys_openat                    [257], common_pid: java            [      2623] } hitcount:          2
-      { id: sys_eventfd2                  [290], common_pid: ibus-ui-gtk3    [      2760] } hitcount:          4
-      { id: sys_eventfd2                  [290], common_pid: compiz          [      2994] } hitcount:          6
-
-      Totals:
-          Hits: 31536
-          Entries: 323
-          Dropped: 0
-
-  The above list does give us a breakdown of the ioctl syscall by
-  pid, but it also gives us quite a bit more than that, which we
-  don't really care about at the moment.  Since we know the syscall
-  id for sys_ioctl (16, displayed next to the sys_ioctl name), we
-  can use that to filter out all the other syscalls::
-
-      # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \
-             /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-      # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-      # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active]
-
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2769] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: evolution-addre [      8571] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      3003] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2781] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2829] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8726] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8508] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2970] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2768] } hitcount:          1
-      .
-      .
-      .
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8559] } hitcount:         45
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8555] } hitcount:         48
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8551] } hitcount:         48
-      { id: sys_ioctl                     [ 16], common_pid: avahi-daemon    [       896] } hitcount:         66
-      { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:      26674
-      { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:      73443
-
-      Totals:
-          Hits: 101162
-          Entries: 103
-          Dropped: 0
-
-  The above output shows that 'compiz' and 'Xorg' are far and away
-  the heaviest ioctl callers (which might lead to questions about
-  whether they really need to be making all those calls and to
-  possible avenues for further investigation.)
-
-  The compound key examples used a key and a sum value (hitcount) to
-  sort the output, but we can just as easily use two keys instead.
-  Here's an example where we use a compound key composed of the the
-  common_pid and size event fields.  Sorting with pid as the primary
-  key and 'size' as the secondary key allows us to display an
-  ordered summary of the recvfrom sizes, with counts, received by
-  each process::
-
-      # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \
-             /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger
-
-      # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist
-      # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active]
-
-      { common_pid: smbd            [       784], size:          4 } hitcount:          1
-      { common_pid: dnsmasq         [      1412], size:       4096 } hitcount:        672
-      { common_pid: postgres        [      1796], size:       1000 } hitcount:          6
-      { common_pid: postgres        [      1867], size:       1000 } hitcount:         10
-      { common_pid: bamfdaemon      [      2787], size:         28 } hitcount:          2
-      { common_pid: bamfdaemon      [      2787], size:      14360 } hitcount:          1
-      { common_pid: compiz          [      2994], size:          8 } hitcount:          1
-      { common_pid: compiz          [      2994], size:         20 } hitcount:         11
-      { common_pid: gnome-terminal  [      3199], size:          4 } hitcount:          2
-      { common_pid: firefox         [      8817], size:          4 } hitcount:          1
-      { common_pid: firefox         [      8817], size:          8 } hitcount:          5
-      { common_pid: firefox         [      8817], size:        588 } hitcount:          2
-      { common_pid: firefox         [      8817], size:        628 } hitcount:          1
-      { common_pid: firefox         [      8817], size:       6944 } hitcount:          1
-      { common_pid: firefox         [      8817], size:     408880 } hitcount:          2
-      { common_pid: firefox         [      8822], size:          8 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        160 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        320 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        352 } hitcount:          1
-      .
-      .
-      .
-      { common_pid: pool            [      8923], size:       1960 } hitcount:         10
-      { common_pid: pool            [      8923], size:       2048 } hitcount:         10
-      { common_pid: pool            [      8924], size:       1960 } hitcount:         10
-      { common_pid: pool            [      8924], size:       2048 } hitcount:         10
-      { common_pid: pool            [      8928], size:       1964 } hitcount:          4
-      { common_pid: pool            [      8928], size:       1965 } hitcount:          2
-      { common_pid: pool            [      8928], size:       2048 } hitcount:          6
-      { common_pid: pool            [      8929], size:       1982 } hitcount:          1
-      { common_pid: pool            [      8929], size:       2048 } hitcount:          1
-
-      Totals:
-          Hits: 2016
-          Entries: 224
-          Dropped: 0
-
-  The above example also illustrates the fact that although a compound
-  key is treated as a single entity for hashing purposes, the sub-keys
-  it's composed of can be accessed independently.
-
-  The next example uses a string field as the hash key and
-  demonstrates how you can manually pause and continue a hist trigger.
-  In this example, we'll aggregate fork counts and don't expect a
-  large number of entries in the hash table, so we'll drop it to a
-  much smaller number, say 256::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: whoopsie                            } hitcount:          1
-    { child_comm: smbd                                } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: postgres                            } hitcount:          2
-    { child_comm: bash                                } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: dhclient                            } hitcount:          4
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: nm-dispatcher.a                     } hitcount:          8
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dbus-daemon                         } hitcount:          8
-    { child_comm: glib-pacrunner                      } hitcount:         10
-    { child_comm: evolution                           } hitcount:         23
-
-    Totals:
-        Hits: 89
-        Entries: 20
-        Dropped: 0
-
-  If we want to pause the hist trigger, we can simply append :pause to
-  the command that started the trigger.  Notice that the trigger info
-  displays as [paused]::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: smbd                                } hitcount:          2
-    { child_comm: bash                                } hitcount:          3
-    { child_comm: whoopsie                            } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: postgres                            } hitcount:          6
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dhclient                            } hitcount:         10
-    { child_comm: emacs                               } hitcount:         12
-    { child_comm: dbus-daemon                         } hitcount:         20
-    { child_comm: nm-dispatcher.a                     } hitcount:         20
-    { child_comm: evolution                           } hitcount:         35
-    { child_comm: glib-pacrunner                      } hitcount:         59
-
-    Totals:
-        Hits: 199
-        Entries: 21
-        Dropped: 0
-
-  To manually continue having the trigger aggregate events, append
-  :cont instead.  Notice that the trigger info displays as [active]
-  again, and the data has changed::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: smbd                                } hitcount:          2
-    { child_comm: whoopsie                            } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: bash                                } hitcount:          5
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: postgres                            } hitcount:          6
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dhclient                            } hitcount:         11
-    { child_comm: emacs                               } hitcount:         12
-    { child_comm: dbus-daemon                         } hitcount:         22
-    { child_comm: nm-dispatcher.a                     } hitcount:         22
-    { child_comm: evolution                           } hitcount:         35
-    { child_comm: glib-pacrunner                      } hitcount:         59
-
-    Totals:
-        Hits: 206
-        Entries: 21
-        Dropped: 0
-
-  The previous example showed how to start and stop a hist trigger by
-  appending 'pause' and 'continue' to the hist trigger command.  A
-  hist trigger can also be started in a paused state by initially
-  starting the trigger with ':pause' appended.  This allows you to
-  start the trigger only when you're ready to start collecting data
-  and not before.  For example, you could start the trigger in a
-  paused state, then unpause it and do something you want to measure,
-  then pause the trigger again when done.
-
-  Of course, doing this manually can be difficult and error-prone, but
-  it is possible to automatically start and stop a hist trigger based
-  on some condition, via the enable_hist and disable_hist triggers.
-
-  For example, suppose we wanted to take a look at the relative
-  weights in terms of skb length for each callpath that leads to a
-  netif_receieve_skb event when downloading a decent-sized file using
-  wget.
-
-  First we set up an initially paused stacktrace trigger on the
-  netif_receive_skb event::
-
-    # echo 'hist:key=stacktrace:vals=len:pause' > \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  Next, we set up an 'enable_hist' trigger on the sched_process_exec
-  event, with an 'if filename==/usr/bin/wget' filter.  The effect of
-  this new trigger is that it will 'unpause' the hist trigger we just
-  set up on netif_receive_skb if and only if it sees a
-  sched_process_exec event with a filename of '/usr/bin/wget'.  When
-  that happens, all netif_receive_skb events are aggregated into a
-  hash table keyed on stacktrace::
-
-    # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-  The aggregation continues until the netif_receive_skb is paused
-  again, which is what the following disable_hist event does by
-  creating a similar setup on the sched_process_exit event, using the
-  filter 'comm==wget'::
-
-    # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  Whenever a process exits and the comm field of the disable_hist
-  trigger filter matches 'comm==wget', the netif_receive_skb hist
-  trigger is disabled.
-
-  The overall effect is that netif_receive_skb events are aggregated
-  into the hash table for only the duration of the wget.  Executing a
-  wget command and then listing the 'hist' file will display the
-  output generated by the wget command::
-
-    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
-
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_receive+0xc8/0x100
-         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
-         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
-         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
-         ieee80211_rx+0x31d/0x900 [mac80211]
-         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
-         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
-         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-         ret_from_fork+0x42/0x70
-    } hitcount:         85  len:      28884
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_complete+0xa4/0xe0
-         dev_gro_receive+0x23a/0x360
-         napi_gro_receive+0x30/0x100
-         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
-         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
-         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
-         ieee80211_rx+0x31d/0x900 [mac80211]
-         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
-         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
-         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-    } hitcount:         98  len:     664329
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         process_backlog+0xa8/0x150
-         net_rx_action+0x15d/0x340
-         __do_softirq+0x114/0x2c0
-         do_softirq_own_stack+0x1c/0x30
-         do_softirq+0x65/0x70
-         __local_bh_enable_ip+0xb5/0xc0
-         ip_finish_output+0x1f4/0x840
-         ip_output+0x6b/0xc0
-         ip_local_out_sk+0x31/0x40
-         ip_send_skb+0x1a/0x50
-         udp_send_skb+0x173/0x2a0
-         udp_sendmsg+0x2bf/0x9f0
-         inet_sendmsg+0x64/0xa0
-         sock_sendmsg+0x3d/0x50
-    } hitcount:        115  len:      13030
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_complete+0xa4/0xe0
-         napi_gro_flush+0x6d/0x90
-         iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-         ret_from_fork+0x42/0x70
-    } hitcount:        934  len:    5512212
-
-    Totals:
-        Hits: 1232
-        Entries: 4
-        Dropped: 0
-
-  The above shows all the netif_receive_skb callpaths and their total
-  lengths for the duration of the wget command.
-
-  The 'clear' hist trigger param can be used to clear the hash table.
-  Suppose we wanted to try another run of the previous example but
-  this time also wanted to see the complete list of events that went
-  into the histogram.  In order to avoid having to set everything up
-  again, we can just clear the histogram first::
-
-    # echo 'hist:key=stacktrace:vals=len:clear' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  Just to verify that it is in fact cleared, here's what we now see in
-  the hist file::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-  Since we want to see the detailed list of every netif_receive_skb
-  event occurring during the new run, which are in fact the same
-  events being aggregated into the hash table, we add some additional
-  'enable_event' events to the triggering sched_process_exec and
-  sched_process_exit events as such::
-
-    # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-    # echo 'disable_event:net:netif_receive_skb if comm==wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  If you read the trigger files for the sched_process_exec and
-  sched_process_exit triggers, you should see two triggers for each:
-  one enabling/disabling the hist aggregation and the other
-  enabling/disabling the logging of events::
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-    enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
-    enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-    enable_event:net:netif_receive_skb:unlimited if comm==wget
-    disable_hist:net:netif_receive_skb:unlimited if comm==wget
-
-  In other words, whenever either of the sched_process_exec or
-  sched_process_exit events is hit and matches 'wget', it enables or
-  disables both the histogram and the event log, and what you end up
-  with is a hash table and set of events just covering the specified
-  duration.  Run the wget command again::
-
-    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
-
-  Displaying the 'hist' file should show something similar to what you
-  saw in the last run, but this time you should also see the
-  individual events in the trace file::
-
-    # cat /sys/kernel/debug/tracing/trace
-
-    # tracer: nop
-    #
-    # entries-in-buffer/entries-written: 183/1426   #P:4
-    #
-    #                              _-----=> irqs-off
-    #                             / _----=> need-resched
-    #                            | / _---=> hardirq/softirq
-    #                            || / _--=> preempt-depth
-    #                            ||| /     delay
-    #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-    #              | |       |   ||||       |         |
-                wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60
-                wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60
-             dnsmasq-1382  [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130
-             dnsmasq-1382  [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138
-    ##### CPU 2 buffer started ####
-      irq/29-iwlwifi-559   [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500
-      irq/29-iwlwifi-559   [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500
-    ....
-
-
-  The following example demonstrates how multiple hist triggers can be
-  attached to a given event.  This capability can be useful for
-  creating a set of different summaries derived from the same set of
-  events, or for comparing the effects of different filters, among
-  other things.
-  ::
-
-    # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=len:vals=common_preempt_count' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  The above set of commands create four triggers differing only in
-  their filters, along with a completely different though fairly
-  nonsensical trigger.  Note that in order to append multiple hist
-  triggers to the same file, you should use the '>>' operator to
-  append them ('>' will also add the new hist trigger, but will remove
-  any existing hist triggers beforehand).
-
-  Displaying the contents of the 'hist' file for the event shows the
-  contents of all five histograms::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-
-    # event histogram
-    #
-    # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active]
-    #
-
-    { len:        176 } hitcount:          1  common_preempt_count:          0
-    { len:        223 } hitcount:          1  common_preempt_count:          0
-    { len:       4854 } hitcount:          1  common_preempt_count:          0
-    { len:        395 } hitcount:          1  common_preempt_count:          0
-    { len:        177 } hitcount:          1  common_preempt_count:          0
-    { len:        446 } hitcount:          1  common_preempt_count:          0
-    { len:       1601 } hitcount:          1  common_preempt_count:          0
-    .
-    .
-    .
-    { len:       1280 } hitcount:         66  common_preempt_count:          0
-    { len:        116 } hitcount:         81  common_preempt_count:         40
-    { len:        708 } hitcount:        112  common_preempt_count:          0
-    { len:         46 } hitcount:        221  common_preempt_count:          0
-    { len:       1264 } hitcount:        458  common_preempt_count:          0
-
-    Totals:
-        Hits: 1428
-        Entries: 147
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff8800baee5e00 } hitcount:          1  len:        130
-    { skbaddr: ffff88005f3d5600 } hitcount:          1  len:       1280
-    { skbaddr: ffff88005f3d4900 } hitcount:          1  len:       1280
-    { skbaddr: ffff88009fed6300 } hitcount:          1  len:        115
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:        115
-    { skbaddr: ffff88008cdb1900 } hitcount:          1  len:         46
-    { skbaddr: ffff880064b5ef00 } hitcount:          1  len:        118
-    { skbaddr: ffff880044e3c700 } hitcount:          1  len:         60
-    { skbaddr: ffff880100065900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d46bd500 } hitcount:          1  len:        116
-    { skbaddr: ffff88005f3d5f00 } hitcount:          1  len:       1280
-    { skbaddr: ffff880100064700 } hitcount:          1  len:        365
-    { skbaddr: ffff8800badb6f00 } hitcount:          1  len:         60
-    .
-    .
-    .
-    { skbaddr: ffff88009fe0be00 } hitcount:         27  len:      24677
-    { skbaddr: ffff88009fe0a400 } hitcount:         27  len:      23052
-    { skbaddr: ffff88009fe0b700 } hitcount:         31  len:      25589
-    { skbaddr: ffff88009fe0b600 } hitcount:         32  len:      27326
-    { skbaddr: ffff88006a462800 } hitcount:         68  len:      71678
-    { skbaddr: ffff88006a463700 } hitcount:         70  len:      72678
-    { skbaddr: ffff88006a462b00 } hitcount:         71  len:      77589
-    { skbaddr: ffff88006a463600 } hitcount:         73  len:      71307
-    { skbaddr: ffff88006a462200 } hitcount:         81  len:      81032
-
-    Totals:
-        Hits: 1451
-        Entries: 318
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active]
-    #
-
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active]
-    #
-
-    { skbaddr: ffff88009fd2c300 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcce00 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcd700 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcda00 } hitcount:          1  len:      21492
-    { skbaddr: ffff8800ae2e2d00 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          1  len:       7212
-    { skbaddr: ffff88006a4df500 } hitcount:          1  len:       4854
-    { skbaddr: ffff88008ce47b00 } hitcount:          1  len:      18636
-    { skbaddr: ffff8800ae2e2200 } hitcount:          1  len:      12924
-    { skbaddr: ffff88005f3e1000 } hitcount:          1  len:       4356
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          2  len:      24420
-    { skbaddr: ffff8800d2bcc200 } hitcount:          2  len:      12996
-
-    Totals:
-        Hits: 14
-        Entries: 12
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active]
-    #
-
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-  Named triggers can be used to have triggers share a common set of
-  histogram data.  This capability is mostly useful for combining the
-  output of events generated by tracepoints contained inside inline
-  functions, but names can be used in a hist trigger on any event.
-  For example, these two triggers when hit will update the same 'len'
-  field in the shared 'foo' histogram data::
-
-    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  You can see that they're updating common histogram data by reading
-  each event's hist files at the same time::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist;
-      cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-    # event histogram
-    #
-    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
-    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
-    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
-    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
-    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
-    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
-    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
-    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
-    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
-    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
-    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
-    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
-    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
-    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
-    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
-    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
-    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
-    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
-    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
-    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
-    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
-    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
-    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
-    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
-    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
-    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
-    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
-    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
-    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
-    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
-    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
-    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
-    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
-    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
-    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
-
-    Totals:
-        Hits: 81
-        Entries: 42
-        Dropped: 0
-    # event histogram
-    #
-    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
-    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
-    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
-    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
-    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
-    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
-    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
-    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
-    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
-    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
-    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
-    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
-    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
-    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
-    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
-    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
-    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
-    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
-    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
-    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
-    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
-    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
-    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
-    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
-    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
-    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
-    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
-    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
-    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
-    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
-    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
-    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
-    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
-    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
-    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
-
-    Totals:
-        Hits: 81
-        Entries: 42
-        Dropped: 0
-
-  And here's an example that shows how to combine histogram data from
-  any two events even if they don't share any 'compatible' fields
-  other than 'hitcount' and 'stacktrace'.  These commands create a
-  couple of triggers named 'bar' using those fields::
-
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
-          /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  And displaying the output of either shows some interesting if
-  somewhat confusing output::
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-    # event histogram
-    #
-    # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
-    #
-
-    { stacktrace:
-             _do_fork+0x18e/0x330
-             kernel_thread+0x29/0x30
-             kthreadd+0x154/0x1b0
-             ret_from_fork+0x3f/0x70
-    } hitcount:          1
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx_ni+0x20/0x70
-             dev_loopback_xmit+0xaa/0xd0
-             ip_mc_output+0x126/0x240
-             ip_local_out_sk+0x31/0x40
-             igmp_send_report+0x1e9/0x230
-             igmp_timer_expire+0xe9/0x120
-             call_timer_fn+0x39/0xf0
-             run_timer_softirq+0x1e1/0x290
-             __do_softirq+0xfd/0x290
-             irq_exit+0x98/0xb0
-             smp_apic_timer_interrupt+0x4a/0x60
-             apic_timer_interrupt+0x6d/0x80
-             cpuidle_enter+0x17/0x20
-             call_cpuidle+0x3b/0x60
-             cpu_startup_entry+0x22d/0x310
-    } hitcount:          1
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx_ni+0x20/0x70
-             dev_loopback_xmit+0xaa/0xd0
-             ip_mc_output+0x17f/0x240
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x13e/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             SYSC_sendto+0xef/0x170
-             SyS_sendto+0xe/0x10
-             entry_SYSCALL_64_fastpath+0x12/0x6a
-    } hitcount:          2
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             ___sys_sendmsg+0x14e/0x270
-    } hitcount:         76
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             ___sys_sendmsg+0x269/0x270
-    } hitcount:         77
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             SYSC_sendto+0xef/0x170
-    } hitcount:         88
-    { stacktrace:
-             _do_fork+0x18e/0x330
-             SyS_clone+0x19/0x20
-             entry_SYSCALL_64_fastpath+0x12/0x6a
-    } hitcount:        244
-
-    Totals:
-        Hits: 489
-        Entries: 7
-        Dropped: 0
+  See Documentation/trace/histogram.txt for details and examples.
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index fdf5fb54a04c..e45f0786f3f9 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -543,6 +543,30 @@ of ftrace. Here is a list of some of the key files:
 
 	See events.txt for more information.
 
+  timestamp_mode:
+
+	Certain tracers may change the timestamp mode used when
+	logging trace events into the event buffer.  Events with
+	different modes can coexist within a buffer but the mode in
+	effect when an event is logged determines which timestamp mode
+	is used for that event.  The default timestamp mode is
+	'delta'.
+
+	Usual timestamp modes for tracing:
+
+	  # cat timestamp_mode
+	  [delta] absolute
+
+	  The timestamp mode with the square brackets around it is the
+	  one in effect.
+
+	  delta: Default timestamp mode - timestamp is a delta against
+	         a per-buffer timestamp.
+
+	  absolute: The timestamp is a full timestamp, not a delta
+                 against some other value.  As such it takes up more
+                 space and is less efficient.
+
   hwlat_detector:
 
 	Directory for the Hardware Latency Detector.
diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt
new file mode 100644
index 000000000000..6e05510afc28
--- /dev/null
+++ b/Documentation/trace/histogram.txt
@@ -0,0 +1,1995 @@
+			     Event Histograms
+
+		    Documentation written by Tom Zanussi
+
+1. Introduction
+===============
+
+  Histogram triggers are special event triggers that can be used to
+  aggregate trace event data into histograms.  For information on
+  trace events and event triggers, see Documentation/trace/events.txt.
+
+
+2. Histogram Trigger Command
+============================
+
+  A histogram trigger command is an event trigger command that
+  aggregates event hits into a hash table keyed on one or more trace
+  event format fields (or stacktrace) and a set of running totals
+  derived from one or more trace event format fields and/or event
+  counts (hitcount).
+
+  The format of a hist trigger is as follows:
+
+        hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
+          [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
+          [:clear][:name=histname1] [if <filter>]
+
+  When a matching event is hit, an entry is added to a hash table
+  using the key(s) and value(s) named.  Keys and values correspond to
+  fields in the event's format description.  Values must correspond to
+  numeric fields - on an event hit, the value(s) will be added to a
+  sum kept for that field.  The special string 'hitcount' can be used
+  in place of an explicit value field - this is simply a count of
+  event hits.  If 'values' isn't specified, an implicit 'hitcount'
+  value will be automatically created and used as the only value.
+  Keys can be any field, or the special string 'stacktrace', which
+  will use the event's kernel stacktrace as the key.  The keywords
+  'keys' or 'key' can be used to specify keys, and the keywords
+  'values', 'vals', or 'val' can be used to specify values.  Compound
+  keys consisting of up to two fields can be specified by the 'keys'
+  keyword.  Hashing a compound key produces a unique entry in the
+  table for each unique combination of component keys, and can be
+  useful for providing more fine-grained summaries of event data.
+  Additionally, sort keys consisting of up to two fields can be
+  specified by the 'sort' keyword.  If more than one field is
+  specified, the result will be a 'sort within a sort': the first key
+  is taken to be the primary sort key and the second the secondary
+  key.  If a hist trigger is given a name using the 'name' parameter,
+  its histogram data will be shared with other triggers of the same
+  name, and trigger hits will update this common data.  Only triggers
+  with 'compatible' fields can be combined in this way; triggers are
+  'compatible' if the fields named in the trigger share the same
+  number and type of fields and those fields also have the same names.
+  Note that any two events always share the compatible 'hitcount' and
+  'stacktrace' fields and can therefore be combined using those
+  fields, however pointless that may be.
+
+  'hist' triggers add a 'hist' file to each event's subdirectory.
+  Reading the 'hist' file for the event will dump the hash table in
+  its entirety to stdout.  If there are multiple hist triggers
+  attached to an event, there will be a table for each trigger in the
+  output.  The table displayed for a named trigger will be the same as
+  any other instance having the same name. Each printed hash table
+  entry is a simple list of the keys and values comprising the entry;
+  keys are printed first and are delineated by curly braces, and are
+  followed by the set of value fields for the entry.  By default,
+  numeric fields are displayed as base-10 integers.  This can be
+  modified by appending any of the following modifiers to the field
+  name:
+
+        .hex        display a number as a hex value
+	.sym        display an address as a symbol
+	.sym-offset display an address as a symbol and offset
+	.syscall    display a syscall id as a system call name
+	.execname   display a common_pid as a program name
+	.log2       display log2 value rather than raw number
+	.usecs      display a common_timestamp in microseconds
+
+  Note that in general the semantics of a given field aren't
+  interpreted when applying a modifier to it, but there are some
+  restrictions to be aware of in this regard:
+
+    - only the 'hex' modifier can be used for values (because values
+      are essentially sums, and the other modifiers don't make sense
+      in that context).
+    - the 'execname' modifier can only be used on a 'common_pid'.  The
+      reason for this is that the execname is simply the 'comm' value
+      saved for the 'current' process when an event was triggered,
+      which is the same as the common_pid value saved by the event
+      tracing code.  Trying to apply that comm value to other pid
+      values wouldn't be correct, and typically events that care save
+      pid-specific comm fields in the event itself.
+
+  A typical usage scenario would be the following to enable a hist
+  trigger, read its current contents, and then turn it off:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len' > \
+    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+  # echo '!hist:keys=skbaddr.hex:vals=len' > \
+    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  The trigger file itself can be read to show the details of the
+  currently attached hist trigger.  This information is also displayed
+  at the top of the 'hist' file when read.
+
+  By default, the size of the hash table is 2048 entries.  The 'size'
+  parameter can be used to specify more or fewer than that.  The units
+  are in terms of hashtable entries - if a run uses more entries than
+  specified, the results will show the number of 'drops', the number
+  of hits that were ignored.  The size should be a power of 2 between
+  128 and 131072 (any non- power-of-2 number specified will be rounded
+  up).
+
+  The 'sort' parameter can be used to specify a value field to sort
+  on.  The default if unspecified is 'hitcount' and the default sort
+  order is 'ascending'.  To sort in the opposite direction, append
+  .descending' to the sort key.
+
+  The 'pause' parameter can be used to pause an existing hist trigger
+  or to start a hist trigger but not log any events until told to do
+  so.  'continue' or 'cont' can be used to start or restart a paused
+  hist trigger.
+
+  The 'clear' parameter will clear the contents of a running hist
+  trigger and leave its current paused/active state.
+
+  Note that the 'pause', 'cont', and 'clear' parameters should be
+  applied using 'append' shell operator ('>>') if applied to an
+  existing trigger, rather than via the '>' operator, which will cause
+  the trigger to be removed through truncation.
+
+- enable_hist/disable_hist
+
+  The enable_hist and disable_hist triggers can be used to have one
+  event conditionally start and stop another event's already-attached
+  hist trigger.  Any number of enable_hist and disable_hist triggers
+  can be attached to a given event, allowing that event to kick off
+  and stop aggregations on a host of other events.
+
+  The format is very similar to the enable/disable_event triggers:
+
+      enable_hist:<system>:<event>[:count]
+      disable_hist:<system>:<event>[:count]
+
+  Instead of enabling or disabling the tracing of the target event
+  into the trace buffer as the enable/disable_event triggers do, the
+  enable/disable_hist triggers enable or disable the aggregation of
+  the target event into a hash table.
+
+  A typical usage scenario for the enable_hist/disable_hist triggers
+  would be to first set up a paused hist trigger on some event,
+  followed by an enable_hist/disable_hist pair that turns the hist
+  aggregation on and off when conditions of interest are hit:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
+    /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+    /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+  # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+    /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  The above sets up an initially paused hist trigger which is unpaused
+  and starts aggregating events when a given program is executed, and
+  which stops aggregating when the process exits and the hist trigger
+  is paused again.
+
+  The examples below provide a more concrete illustration of the
+  concepts and typical usage patterns discussed above.
+
+  'special' event fields
+  ------------------------
+
+  There are a number of 'special event fields' available for use as
+  keys or values in a hist trigger.  These look like and behave as if
+  they were actual event fields, but aren't really part of the event's
+  field definition or format file.  They are however available for any
+  event, and can be used anywhere an actual event field could be.
+  They are:
+
+    common_timestamp       u64 - timestamp (from ring buffer) associated
+                                 with the event, in nanoseconds.  May be
+				 modified by .usecs to have timestamps
+				 interpreted as microseconds.
+    cpu                    int - the cpu on which the event occurred.
+
+  Extended error information
+  --------------------------
+
+  For some error conditions encountered when invoking a hist trigger
+  command, extended error information is available via the
+  corresponding event's 'hist' file.  Reading the hist file after an
+  error will display more detailed information about what went wrong,
+  if information is available.  This extended error information will
+  be available until the next hist trigger command for that event.
+
+  If available for a given error condition, the extended error
+  information and usage takes the following form:
+
+    # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
+    echo: write error: Invalid argument
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
+    ERROR: Couldn't yyy: zzz
+      Last command: xxx
+
+6.2 'hist' trigger examples
+---------------------------
+
+  The first set of examples creates aggregations using the kmalloc
+  event.  The fields that can be used for the hist trigger are listed
+  in the kmalloc event's format file:
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format
+    name: kmalloc
+    ID: 374
+    format:
+	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
+	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
+	field:unsigned char common_preempt_count;		offset:3;	size:1;	signed:0;
+	field:int common_pid;					offset:4;	size:4;	signed:1;
+
+	field:unsigned long call_site;				offset:8;	size:8;	signed:0;
+	field:const void * ptr;					offset:16;	size:8;	signed:0;
+	field:size_t bytes_req;					offset:24;	size:8;	signed:0;
+	field:size_t bytes_alloc;				offset:32;	size:8;	signed:0;
+	field:gfp_t gfp_flags;					offset:40;	size:4;	signed:0;
+
+  We'll start by creating a hist trigger that generates a simple table
+  that lists the total number of bytes requested for each function in
+  the kernel that made one or more calls to kmalloc:
+
+    # echo 'hist:key=call_site:val=bytes_req' > \
+            /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  This tells the tracing system to create a 'hist' trigger using the
+  call_site field of the kmalloc event as the key for the table, which
+  just means that each unique call_site address will have an entry
+  created for it in the table.  The 'val=bytes_req' parameter tells
+  the hist trigger that for each unique entry (call_site) in the
+  table, it should keep a running total of the number of bytes
+  requested by that call_site.
+
+  We'll let it run for awhile and then dump the contents of the 'hist'
+  file in the kmalloc event's subdirectory (for readability, a number
+  of entries have been omitted):
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: 18446744072106379007 } hitcount:          1  bytes_req:        176
+    { call_site: 18446744071579557049 } hitcount:          1  bytes_req:       1024
+    { call_site: 18446744071580608289 } hitcount:          1  bytes_req:      16384
+    { call_site: 18446744071581827654 } hitcount:          1  bytes_req:         24
+    { call_site: 18446744071580700980 } hitcount:          1  bytes_req:          8
+    { call_site: 18446744071579359876 } hitcount:          1  bytes_req:        152
+    { call_site: 18446744071580795365 } hitcount:          3  bytes_req:        144
+    { call_site: 18446744071581303129 } hitcount:          3  bytes_req:        144
+    { call_site: 18446744071580713234 } hitcount:          4  bytes_req:       2560
+    { call_site: 18446744071580933750 } hitcount:          4  bytes_req:        736
+    .
+    .
+    .
+    { call_site: 18446744072106047046 } hitcount:         69  bytes_req:       5576
+    { call_site: 18446744071582116407 } hitcount:         73  bytes_req:       2336
+    { call_site: 18446744072106054684 } hitcount:        136  bytes_req:     140504
+    { call_site: 18446744072106224230 } hitcount:        136  bytes_req:      19584
+    { call_site: 18446744072106078074 } hitcount:        153  bytes_req:       2448
+    { call_site: 18446744072106062406 } hitcount:        153  bytes_req:      36720
+    { call_site: 18446744071582507929 } hitcount:        153  bytes_req:      37088
+    { call_site: 18446744072102520590 } hitcount:        273  bytes_req:      10920
+    { call_site: 18446744071582143559 } hitcount:        358  bytes_req:        716
+    { call_site: 18446744072106465852 } hitcount:        417  bytes_req:      56712
+    { call_site: 18446744072102523378 } hitcount:        485  bytes_req:      27160
+    { call_site: 18446744072099568646 } hitcount:       1676  bytes_req:      33520
+
+    Totals:
+        Hits: 4610
+        Entries: 45
+        Dropped: 0
+
+  The output displays a line for each entry, beginning with the key
+  specified in the trigger, followed by the value(s) also specified in
+  the trigger.  At the beginning of the output is a line that displays
+  the trigger info, which can also be displayed by reading the
+  'trigger' file:
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+    hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+  At the end of the output are a few lines that display the overall
+  totals for the run.  The 'Hits' field shows the total number of
+  times the event trigger was hit, the 'Entries' field shows the total
+  number of used entries in the hash table, and the 'Dropped' field
+  shows the number of hits that were dropped because the number of
+  used entries for the run exceeded the maximum number of entries
+  allowed for the table (normally 0, but if not a hint that you may
+  want to increase the size of the table using the 'size' parameter).
+
+  Notice in the above output that there's an extra field, 'hitcount',
+  which wasn't specified in the trigger.  Also notice that in the
+  trigger info output, there's a parameter, 'sort=hitcount', which
+  wasn't specified in the trigger either.  The reason for that is that
+  every trigger implicitly keeps a count of the total number of hits
+  attributed to a given entry, called the 'hitcount'.  That hitcount
+  information is explicitly displayed in the output, and in the
+  absence of a user-specified sort parameter, is used as the default
+  sort field.
+
+  The value 'hitcount' can be used in place of an explicit value in
+  the 'values' parameter if you don't really need to have any
+  particular field summed and are mainly interested in hit
+  frequencies.
+
+  To turn the hist trigger off, simply call up the trigger in the
+  command history and re-execute it with a '!' prepended:
+
+    # echo '!hist:key=call_site:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  Finally, notice that the call_site as displayed in the output above
+  isn't really very useful.  It's an address, but normally addresses
+  are displayed in hex.  To have a numeric field displayed as a hex
+  value, simply append '.hex' to the field name in the trigger:
+
+    # echo 'hist:key=call_site.hex:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: ffffffffa026b291 } hitcount:          1  bytes_req:        433
+    { call_site: ffffffffa07186ff } hitcount:          1  bytes_req:        176
+    { call_site: ffffffff811ae721 } hitcount:          1  bytes_req:      16384
+    { call_site: ffffffff811c5134 } hitcount:          1  bytes_req:          8
+    { call_site: ffffffffa04a9ebb } hitcount:          1  bytes_req:        511
+    { call_site: ffffffff8122e0a6 } hitcount:          1  bytes_req:         12
+    { call_site: ffffffff8107da84 } hitcount:          1  bytes_req:        152
+    { call_site: ffffffff812d8246 } hitcount:          1  bytes_req:         24
+    { call_site: ffffffff811dc1e5 } hitcount:          3  bytes_req:        144
+    { call_site: ffffffffa02515e8 } hitcount:          3  bytes_req:        648
+    { call_site: ffffffff81258159 } hitcount:          3  bytes_req:        144
+    { call_site: ffffffff811c80f4 } hitcount:          4  bytes_req:        544
+    .
+    .
+    .
+    { call_site: ffffffffa06c7646 } hitcount:        106  bytes_req:       8024
+    { call_site: ffffffffa06cb246 } hitcount:        132  bytes_req:      31680
+    { call_site: ffffffffa06cef7a } hitcount:        132  bytes_req:       2112
+    { call_site: ffffffff8137e399 } hitcount:        132  bytes_req:      23232
+    { call_site: ffffffffa06c941c } hitcount:        185  bytes_req:     171360
+    { call_site: ffffffffa06f2a66 } hitcount:        185  bytes_req:      26640
+    { call_site: ffffffffa036a70e } hitcount:        265  bytes_req:      10600
+    { call_site: ffffffff81325447 } hitcount:        292  bytes_req:        584
+    { call_site: ffffffffa072da3c } hitcount:        446  bytes_req:      60656
+    { call_site: ffffffffa036b1f2 } hitcount:        526  bytes_req:      29456
+    { call_site: ffffffffa0099c06 } hitcount:       1780  bytes_req:      35600
+
+    Totals:
+        Hits: 4775
+        Entries: 46
+        Dropped: 0
+
+  Even that's only marginally more useful - while hex values do look
+  more like addresses, what users are typically more interested in
+  when looking at text addresses are the corresponding symbols
+  instead.  To have an address displayed as symbolic value instead,
+  simply append '.sym' or '.sym-offset' to the field name in the
+  trigger:
+
+    # echo 'hist:key=call_site.sym:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: [ffffffff810adcb9] syslog_print_all                              } hitcount:          1  bytes_req:       1024
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffff8154acbe] usb_alloc_urb                                 } hitcount:          1  bytes_req:        192
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff811febd5] fsnotify_alloc_group                          } hitcount:          2  bytes_req:        528
+    { call_site: [ffffffff81440f58] __tty_buffer_request_room                     } hitcount:          2  bytes_req:       2624
+    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          2  bytes_req:         96
+    { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211]      } hitcount:          2  bytes_req:        464
+    { call_site: [ffffffff81672406] tcp_get_metrics                               } hitcount:          2  bytes_req:        304
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff81089b05] sched_create_group                            } hitcount:          2  bytes_req:       1424
+    .
+    .
+    .
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1185  bytes_req:     123240
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:       1185  bytes_req:     104280
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       1402  bytes_req:     190672
+    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       1518  bytes_req:     146208
+    { call_site: [ffffffffa029070e] drm_vma_node_allow [drm]                      } hitcount:       1746  bytes_req:      69840
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       2021  bytes_req:     792312
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       2592  bytes_req:     145152
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2629  bytes_req:     378576
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2629  bytes_req:    3783248
+    { call_site: [ffffffff81325607] apparmor_file_alloc_security                  } hitcount:       5192  bytes_req:      10384
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       5529  bytes_req:     110584
+    { call_site: [ffffffff8131ebf7] aa_alloc_task_context                         } hitcount:      21943  bytes_req:     702176
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:      55759  bytes_req:    5074265
+
+    Totals:
+        Hits: 109928
+        Entries: 71
+        Dropped: 0
+
+  Because the default sort key above is 'hitcount', the above shows a
+  the list of call_sites by increasing hitcount, so that at the bottom
+  we see the functions that made the most kmalloc calls during the
+  run.  If instead we we wanted to see the top kmalloc callers in
+  terms of the number of bytes requested rather than the number of
+  calls, and we wanted the top caller to appear at the top, we can use
+  the 'sort' parameter, along with the 'descending' modifier:
+
+    # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2186  bytes_req:    3397464
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1790  bytes_req:     712176
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       8132  bytes_req:     513135
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        106  bytes_req:     440128
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2186  bytes_req:     314784
+    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       2174  bytes_req:     208992
+    { call_site: [ffffffff811ae8e1] __kmalloc                                     } hitcount:          8  bytes_req:     131072
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:        859  bytes_req:     116824
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       1834  bytes_req:     102704
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:        972  bytes_req:     101088
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:        972  bytes_req:      85536
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       3333  bytes_req:      66664
+    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        209  bytes_req:      61632
+    .
+    .
+    .
+    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff812d8406] copy_semundo                                  } hitcount:          2  bytes_req:         48
+    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          1  bytes_req:         48
+    { call_site: [ffffffffa027121a] drm_getmagic [drm]                            } hitcount:          1  bytes_req:         48
+    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
+    { call_site: [ffffffff811c52f4] bprm_change_interp                            } hitcount:          2  bytes_req:         16
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
+
+    Totals:
+        Hits: 32133
+        Entries: 81
+        Dropped: 0
+
+  To display the offset and size information in addition to the symbol
+  name, just use 'sym-offset' instead:
+
+    # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915]                  } hitcount:       4569  bytes_req:    3163720
+    { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915]                      } hitcount:       4569  bytes_req:     657936
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915]      } hitcount:       1519  bytes_req:     472936
+    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915]      } hitcount:       3050  bytes_req:     211832
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50                                 } hitcount:         34  bytes_req:     148384
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915]                  } hitcount:       1385  bytes_req:     144040
+    { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0                                   } hitcount:          8  bytes_req:     131072
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm]              } hitcount:       1385  bytes_req:     121880
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm]                  } hitcount:       1848  bytes_req:     103488
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915]            } hitcount:        461  bytes_req:      62696
+    { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm]                      } hitcount:       1541  bytes_req:      61640
+    { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0                                } hitcount:         57  bytes_req:      57456
+    .
+    .
+    .
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0                       } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm]                      } hitcount:          3  bytes_req:         96
+    { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0                         } hitcount:          8  bytes_req:         96
+    { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650                            } hitcount:          3  bytes_req:         84
+    { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110                              } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid]                    } hitcount:          1  bytes_req:          7
+
+    Totals:
+        Hits: 26098
+        Entries: 64
+        Dropped: 0
+
+  We can also add multiple fields to the 'values' parameter.  For
+  example, we might want to see the total number of bytes allocated
+  alongside bytes requested, and display the result sorted by bytes
+  allocated in a descending order:
+
+    # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       7403  bytes_req:    4084360  bytes_alloc:    5958016
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        541  bytes_req:    2213968  bytes_alloc:    2228224
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       7404  bytes_req:    1066176  bytes_alloc:    1421568
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1565  bytes_req:     557368  bytes_alloc:    1037760
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       9557  bytes_req:     595778  bytes_alloc:     695744
+    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       5839  bytes_req:     430680  bytes_alloc:     470400
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       2388  bytes_req:     324768  bytes_alloc:     458496
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       3911  bytes_req:     219016  bytes_alloc:     250304
+    { call_site: [ffffffff815f8d7b] sk_prot_alloc                                 } hitcount:        235  bytes_req:     236880  bytes_alloc:     240640
+    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        557  bytes_req:     169024  bytes_alloc:     221760
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       9378  bytes_req:     187548  bytes_alloc:     206312
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1519  bytes_req:     157976  bytes_alloc:     194432
+    .
+    .
+    .
+    { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach                 } hitcount:          2  bytes_req:        144  bytes_alloc:        192
+    { call_site: [ffffffff81097ee8] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81213e80] load_elf_binary                               } hitcount:          3  bytes_req:         84  bytes_alloc:         96
+    { call_site: [ffffffff81079a2e] kthread_create_on_node                        } hitcount:          1  bytes_req:         56  bytes_alloc:         64
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7  bytes_alloc:          8
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8  bytes_alloc:          8
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7  bytes_alloc:          8
+
+    Totals:
+        Hits: 66598
+        Entries: 65
+        Dropped: 0
+
+  Finally, to finish off our kmalloc example, instead of simply having
+  the hist trigger display symbolic call_sites, we can have the hist
+  trigger additionally display the complete set of kernel stack traces
+  that led to each call_site.  To do that, we simply use the special
+  value 'stacktrace' for the key parameter:
+
+    # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  The above trigger will use the kernel stack trace in effect when an
+  event is triggered as the key for the hash table.  This allows the
+  enumeration of every kernel callpath that led up to a particular
+  event, along with a running total of any of the event fields for
+  that event.  Here we tally bytes requested and bytes allocated for
+  every callpath in the system that led up to a kmalloc (in this case
+  every callpath to a kmalloc for a kernel compile):
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
+
+    { stacktrace:
+         __kmalloc_track_caller+0x10b/0x1a0
+         kmemdup+0x20/0x50
+         hidraw_report_event+0x8a/0x120 [hid]
+         hid_report_raw_event+0x3ea/0x440 [hid]
+         hid_input_report+0x112/0x190 [hid]
+         hid_irq_in+0xc2/0x260 [usbhid]
+         __usb_hcd_giveback_urb+0x72/0x120
+         usb_giveback_urb_bh+0x9e/0xe0
+         tasklet_hi_action+0xf8/0x100
+         __do_softirq+0x114/0x2c0
+         irq_exit+0xa5/0xb0
+         do_IRQ+0x5a/0xf0
+         ret_from_intr+0x0/0x30
+         cpuidle_enter+0x17/0x20
+         cpu_startup_entry+0x315/0x3e0
+         rest_init+0x7c/0x80
+    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
+    { stacktrace:
+         __kmalloc_track_caller+0x10b/0x1a0
+         kmemdup+0x20/0x50
+         hidraw_report_event+0x8a/0x120 [hid]
+         hid_report_raw_event+0x3ea/0x440 [hid]
+         hid_input_report+0x112/0x190 [hid]
+         hid_irq_in+0xc2/0x260 [usbhid]
+         __usb_hcd_giveback_urb+0x72/0x120
+         usb_giveback_urb_bh+0x9e/0xe0
+         tasklet_hi_action+0xf8/0x100
+         __do_softirq+0x114/0x2c0
+         irq_exit+0xa5/0xb0
+         do_IRQ+0x5a/0xf0
+         ret_from_intr+0x0/0x30
+    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
+    { stacktrace:
+         kmem_cache_alloc_trace+0xeb/0x150
+         aa_alloc_task_context+0x27/0x40
+         apparmor_cred_prepare+0x1f/0x50
+         security_prepare_creds+0x16/0x20
+         prepare_creds+0xdf/0x1a0
+         SyS_capset+0xb5/0x200
+         system_call_fastpath+0x12/0x6a
+    } hitcount:          1  bytes_req:         32  bytes_alloc:         32
+    .
+    .
+    .
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         i915_gem_execbuffer2+0x6c/0x2c0 [i915]
+         drm_ioctl+0x349/0x670 [drm]
+         do_vfs_ioctl+0x2f0/0x4f0
+         SyS_ioctl+0x81/0xa0
+         system_call_fastpath+0x12/0x6a
+    } hitcount:      17726  bytes_req:   13944120  bytes_alloc:   19593808
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         load_elf_phdrs+0x76/0xa0
+         load_elf_binary+0x102/0x1650
+         search_binary_handler+0x97/0x1d0
+         do_execveat_common.isra.34+0x551/0x6e0
+         SyS_execve+0x3a/0x50
+         return_from_execve+0x0/0x23
+    } hitcount:      33348  bytes_req:   17152128  bytes_alloc:   20226048
+    { stacktrace:
+         kmem_cache_alloc_trace+0xeb/0x150
+         apparmor_file_alloc_security+0x27/0x40
+         security_file_alloc+0x16/0x20
+         get_empty_filp+0x93/0x1c0
+         path_openat+0x31/0x5f0
+         do_filp_open+0x3a/0x90
+         do_sys_open+0x128/0x220
+         SyS_open+0x1e/0x20
+         system_call_fastpath+0x12/0x6a
+    } hitcount:    4766422  bytes_req:    9532844  bytes_alloc:   38131376
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         seq_buf_alloc+0x1b/0x50
+         seq_read+0x2cc/0x370
+         proc_reg_read+0x3d/0x80
+         __vfs_read+0x28/0xe0
+         vfs_read+0x86/0x140
+         SyS_read+0x46/0xb0
+         system_call_fastpath+0x12/0x6a
+    } hitcount:      19133  bytes_req:   78368768  bytes_alloc:   78368768
+
+    Totals:
+        Hits: 6085872
+        Entries: 253
+        Dropped: 0
+
+  If you key a hist trigger on common_pid, in order for example to
+  gather and display sorted totals for each process, you can use the
+  special .execname modifier to display the executable names for the
+  processes in the table rather than raw pids.  The example below
+  keeps a per-process sum of total bytes read:
+
+    # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \
+           /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist
+    # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active]
+
+    { common_pid: gnome-terminal  [      3196] } hitcount:        280  count:    1093512
+    { common_pid: Xorg            [      1309] } hitcount:        525  count:     256640
+    { common_pid: compiz          [      2889] } hitcount:         59  count:     254400
+    { common_pid: bash            [      8710] } hitcount:          3  count:      66369
+    { common_pid: dbus-daemon-lau [      8703] } hitcount:         49  count:      47739
+    { common_pid: irqbalance      [      1252] } hitcount:         27  count:      27648
+    { common_pid: 01ifupdown      [      8705] } hitcount:          3  count:      17216
+    { common_pid: dbus-daemon     [       772] } hitcount:         10  count:      12396
+    { common_pid: Socket Thread   [      8342] } hitcount:         11  count:      11264
+    { common_pid: nm-dhcp-client. [      8701] } hitcount:          6  count:       7424
+    { common_pid: gmain           [      1315] } hitcount:         18  count:       6336
+    .
+    .
+    .
+    { common_pid: postgres        [      1892] } hitcount:          2  count:         32
+    { common_pid: postgres        [      1891] } hitcount:          2  count:         32
+    { common_pid: gmain           [      8704] } hitcount:          2  count:         32
+    { common_pid: upstart-dbus-br [      2740] } hitcount:         21  count:         21
+    { common_pid: nm-dispatcher.a [      8696] } hitcount:          1  count:         16
+    { common_pid: indicator-datet [      2904] } hitcount:          1  count:         16
+    { common_pid: gdbus           [      2998] } hitcount:          1  count:         16
+    { common_pid: rtkit-daemon    [      2052] } hitcount:          1  count:          8
+    { common_pid: init            [         1] } hitcount:          2  count:          2
+
+    Totals:
+        Hits: 2116
+        Entries: 51
+        Dropped: 0
+
+  Similarly, if you key a hist trigger on syscall id, for example to
+  gather and display a list of systemwide syscall hits, you can use
+  the special .syscall modifier to display the syscall names rather
+  than raw ids.  The example below keeps a running total of syscall
+  counts for the system during the run:
+
+    # echo 'hist:key=id.syscall:val=hitcount' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active]
+
+    { id: sys_fsync                     [ 74] } hitcount:          1
+    { id: sys_newuname                  [ 63] } hitcount:          1
+    { id: sys_prctl                     [157] } hitcount:          1
+    { id: sys_statfs                    [137] } hitcount:          1
+    { id: sys_symlink                   [ 88] } hitcount:          1
+    { id: sys_sendmmsg                  [307] } hitcount:          1
+    { id: sys_semctl                    [ 66] } hitcount:          1
+    { id: sys_readlink                  [ 89] } hitcount:          3
+    { id: sys_bind                      [ 49] } hitcount:          3
+    { id: sys_getsockname               [ 51] } hitcount:          3
+    { id: sys_unlink                    [ 87] } hitcount:          3
+    { id: sys_rename                    [ 82] } hitcount:          4
+    { id: unknown_syscall               [ 58] } hitcount:          4
+    { id: sys_connect                   [ 42] } hitcount:          4
+    { id: sys_getpid                    [ 39] } hitcount:          4
+    .
+    .
+    .
+    { id: sys_rt_sigprocmask            [ 14] } hitcount:        952
+    { id: sys_futex                     [202] } hitcount:       1534
+    { id: sys_write                     [  1] } hitcount:       2689
+    { id: sys_setitimer                 [ 38] } hitcount:       2797
+    { id: sys_read                      [  0] } hitcount:       3202
+    { id: sys_select                    [ 23] } hitcount:       3773
+    { id: sys_writev                    [ 20] } hitcount:       4531
+    { id: sys_poll                      [  7] } hitcount:       8314
+    { id: sys_recvmsg                   [ 47] } hitcount:      13738
+    { id: sys_ioctl                     [ 16] } hitcount:      21843
+
+    Totals:
+        Hits: 67612
+        Entries: 72
+        Dropped: 0
+
+    The syscall counts above provide a rough overall picture of system
+    call activity on the system; we can see for example that the most
+    popular system call on this system was the 'sys_ioctl' system call.
+
+    We can use 'compound' keys to refine that number and provide some
+    further insight as to which processes exactly contribute to the
+    overall ioctl count.
+
+    The command below keeps a hitcount for every unique combination of
+    system call id and pid - the end result is essentially a table
+    that keeps a per-pid sum of system call hits.  The results are
+    sorted using the system call id as the primary key, and the
+    hitcount sum as the secondary key:
+
+    # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active]
+
+    { id: sys_read                      [  0], common_pid: rtkit-daemon    [      1877] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: gdbus           [      2976] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: console-kit-dae [      3400] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: postgres        [      1865] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: deja-dup-monito [      3543] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: NetworkManager  [       890] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: evolution-calen [      3048] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: postgres        [      1864] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: nm-applet       [      3022] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: whoopsie        [      1212] } hitcount:          2
+    .
+    .
+    .
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8479] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      3472] } hitcount:         12
+    { id: sys_ioctl                     [ 16], common_pid: gnome-terminal  [      3199] } hitcount:         16
+    { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:       1808
+    { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:       5580
+    .
+    .
+    .
+    { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2690] } hitcount:          3
+    { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2688] } hitcount:         16
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [       975] } hitcount:          2
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3204] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2888] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3003] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2873] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3196] } hitcount:          6
+    { id: sys_openat                    [257], common_pid: java            [      2623] } hitcount:          2
+    { id: sys_eventfd2                  [290], common_pid: ibus-ui-gtk3    [      2760] } hitcount:          4
+    { id: sys_eventfd2                  [290], common_pid: compiz          [      2994] } hitcount:          6
+
+    Totals:
+        Hits: 31536
+        Entries: 323
+        Dropped: 0
+
+    The above list does give us a breakdown of the ioctl syscall by
+    pid, but it also gives us quite a bit more than that, which we
+    don't really care about at the moment.  Since we know the syscall
+    id for sys_ioctl (16, displayed next to the sys_ioctl name), we
+    can use that to filter out all the other syscalls:
+
+    # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active]
+
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2769] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: evolution-addre [      8571] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      3003] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2781] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2829] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8726] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8508] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2970] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2768] } hitcount:          1
+    .
+    .
+    .
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8559] } hitcount:         45
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8555] } hitcount:         48
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8551] } hitcount:         48
+    { id: sys_ioctl                     [ 16], common_pid: avahi-daemon    [       896] } hitcount:         66
+    { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:      26674
+    { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:      73443
+
+    Totals:
+        Hits: 101162
+        Entries: 103
+        Dropped: 0
+
+    The above output shows that 'compiz' and 'Xorg' are far and away
+    the heaviest ioctl callers (which might lead to questions about
+    whether they really need to be making all those calls and to
+    possible avenues for further investigation.)
+
+    The compound key examples used a key and a sum value (hitcount) to
+    sort the output, but we can just as easily use two keys instead.
+    Here's an example where we use a compound key composed of the the
+    common_pid and size event fields.  Sorting with pid as the primary
+    key and 'size' as the secondary key allows us to display an
+    ordered summary of the recvfrom sizes, with counts, received by
+    each process:
+
+    # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \
+           /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger
+
+    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist
+    # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active]
+
+    { common_pid: smbd            [       784], size:          4 } hitcount:          1
+    { common_pid: dnsmasq         [      1412], size:       4096 } hitcount:        672
+    { common_pid: postgres        [      1796], size:       1000 } hitcount:          6
+    { common_pid: postgres        [      1867], size:       1000 } hitcount:         10
+    { common_pid: bamfdaemon      [      2787], size:         28 } hitcount:          2
+    { common_pid: bamfdaemon      [      2787], size:      14360 } hitcount:          1
+    { common_pid: compiz          [      2994], size:          8 } hitcount:          1
+    { common_pid: compiz          [      2994], size:         20 } hitcount:         11
+    { common_pid: gnome-terminal  [      3199], size:          4 } hitcount:          2
+    { common_pid: firefox         [      8817], size:          4 } hitcount:          1
+    { common_pid: firefox         [      8817], size:          8 } hitcount:          5
+    { common_pid: firefox         [      8817], size:        588 } hitcount:          2
+    { common_pid: firefox         [      8817], size:        628 } hitcount:          1
+    { common_pid: firefox         [      8817], size:       6944 } hitcount:          1
+    { common_pid: firefox         [      8817], size:     408880 } hitcount:          2
+    { common_pid: firefox         [      8822], size:          8 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        160 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        320 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        352 } hitcount:          1
+    .
+    .
+    .
+    { common_pid: pool            [      8923], size:       1960 } hitcount:         10
+    { common_pid: pool            [      8923], size:       2048 } hitcount:         10
+    { common_pid: pool            [      8924], size:       1960 } hitcount:         10
+    { common_pid: pool            [      8924], size:       2048 } hitcount:         10
+    { common_pid: pool            [      8928], size:       1964 } hitcount:          4
+    { common_pid: pool            [      8928], size:       1965 } hitcount:          2
+    { common_pid: pool            [      8928], size:       2048 } hitcount:          6
+    { common_pid: pool            [      8929], size:       1982 } hitcount:          1
+    { common_pid: pool            [      8929], size:       2048 } hitcount:          1
+
+    Totals:
+        Hits: 2016
+        Entries: 224
+        Dropped: 0
+
+  The above example also illustrates the fact that although a compound
+  key is treated as a single entity for hashing purposes, the sub-keys
+  it's composed of can be accessed independently.
+
+  The next example uses a string field as the hash key and
+  demonstrates how you can manually pause and continue a hist trigger.
+  In this example, we'll aggregate fork counts and don't expect a
+  large number of entries in the hash table, so we'll drop it to a
+  much smaller number, say 256:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: whoopsie                            } hitcount:          1
+    { child_comm: smbd                                } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: postgres                            } hitcount:          2
+    { child_comm: bash                                } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: dhclient                            } hitcount:          4
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: nm-dispatcher.a                     } hitcount:          8
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dbus-daemon                         } hitcount:          8
+    { child_comm: glib-pacrunner                      } hitcount:         10
+    { child_comm: evolution                           } hitcount:         23
+
+    Totals:
+        Hits: 89
+        Entries: 20
+        Dropped: 0
+
+  If we want to pause the hist trigger, we can simply append :pause to
+  the command that started the trigger.  Notice that the trigger info
+  displays as [paused]:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: smbd                                } hitcount:          2
+    { child_comm: bash                                } hitcount:          3
+    { child_comm: whoopsie                            } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: postgres                            } hitcount:          6
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dhclient                            } hitcount:         10
+    { child_comm: emacs                               } hitcount:         12
+    { child_comm: dbus-daemon                         } hitcount:         20
+    { child_comm: nm-dispatcher.a                     } hitcount:         20
+    { child_comm: evolution                           } hitcount:         35
+    { child_comm: glib-pacrunner                      } hitcount:         59
+
+    Totals:
+        Hits: 199
+        Entries: 21
+        Dropped: 0
+
+  To manually continue having the trigger aggregate events, append
+  :cont instead.  Notice that the trigger info displays as [active]
+  again, and the data has changed:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: smbd                                } hitcount:          2
+    { child_comm: whoopsie                            } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: bash                                } hitcount:          5
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: postgres                            } hitcount:          6
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dhclient                            } hitcount:         11
+    { child_comm: emacs                               } hitcount:         12
+    { child_comm: dbus-daemon                         } hitcount:         22
+    { child_comm: nm-dispatcher.a                     } hitcount:         22
+    { child_comm: evolution                           } hitcount:         35
+    { child_comm: glib-pacrunner                      } hitcount:         59
+
+    Totals:
+        Hits: 206
+        Entries: 21
+        Dropped: 0
+
+  The previous example showed how to start and stop a hist trigger by
+  appending 'pause' and 'continue' to the hist trigger command.  A
+  hist trigger can also be started in a paused state by initially
+  starting the trigger with ':pause' appended.  This allows you to
+  start the trigger only when you're ready to start collecting data
+  and not before.  For example, you could start the trigger in a
+  paused state, then unpause it and do something you want to measure,
+  then pause the trigger again when done.
+
+  Of course, doing this manually can be difficult and error-prone, but
+  it is possible to automatically start and stop a hist trigger based
+  on some condition, via the enable_hist and disable_hist triggers.
+
+  For example, suppose we wanted to take a look at the relative
+  weights in terms of skb length for each callpath that leads to a
+  netif_receieve_skb event when downloading a decent-sized file using
+  wget.
+
+  First we set up an initially paused stacktrace trigger on the
+  netif_receive_skb event:
+
+    # echo 'hist:key=stacktrace:vals=len:pause' > \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  Next, we set up an 'enable_hist' trigger on the sched_process_exec
+  event, with an 'if filename==/usr/bin/wget' filter.  The effect of
+  this new trigger is that it will 'unpause' the hist trigger we just
+  set up on netif_receive_skb if and only if it sees a
+  sched_process_exec event with a filename of '/usr/bin/wget'.  When
+  that happens, all netif_receive_skb events are aggregated into a
+  hash table keyed on stacktrace:
+
+    # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+  The aggregation continues until the netif_receive_skb is paused
+  again, which is what the following disable_hist event does by
+  creating a similar setup on the sched_process_exit event, using the
+  filter 'comm==wget':
+
+    # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  Whenever a process exits and the comm field of the disable_hist
+  trigger filter matches 'comm==wget', the netif_receive_skb hist
+  trigger is disabled.
+
+  The overall effect is that netif_receive_skb events are aggregated
+  into the hash table for only the duration of the wget.  Executing a
+  wget command and then listing the 'hist' file will display the
+  output generated by the wget command:
+
+    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_receive+0xc8/0x100
+         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+         ieee80211_rx+0x31d/0x900 [mac80211]
+         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+         ret_from_fork+0x42/0x70
+    } hitcount:         85  len:      28884
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_complete+0xa4/0xe0
+         dev_gro_receive+0x23a/0x360
+         napi_gro_receive+0x30/0x100
+         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+         ieee80211_rx+0x31d/0x900 [mac80211]
+         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+    } hitcount:         98  len:     664329
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         process_backlog+0xa8/0x150
+         net_rx_action+0x15d/0x340
+         __do_softirq+0x114/0x2c0
+         do_softirq_own_stack+0x1c/0x30
+         do_softirq+0x65/0x70
+         __local_bh_enable_ip+0xb5/0xc0
+         ip_finish_output+0x1f4/0x840
+         ip_output+0x6b/0xc0
+         ip_local_out_sk+0x31/0x40
+         ip_send_skb+0x1a/0x50
+         udp_send_skb+0x173/0x2a0
+         udp_sendmsg+0x2bf/0x9f0
+         inet_sendmsg+0x64/0xa0
+         sock_sendmsg+0x3d/0x50
+    } hitcount:        115  len:      13030
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_complete+0xa4/0xe0
+         napi_gro_flush+0x6d/0x90
+         iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+         ret_from_fork+0x42/0x70
+    } hitcount:        934  len:    5512212
+
+    Totals:
+        Hits: 1232
+        Entries: 4
+        Dropped: 0
+
+  The above shows all the netif_receive_skb callpaths and their total
+  lengths for the duration of the wget command.
+
+  The 'clear' hist trigger param can be used to clear the hash table.
+  Suppose we wanted to try another run of the previous example but
+  this time also wanted to see the complete list of events that went
+  into the histogram.  In order to avoid having to set everything up
+  again, we can just clear the histogram first:
+
+    # echo 'hist:key=stacktrace:vals=len:clear' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  Just to verify that it is in fact cleared, here's what we now see in
+  the hist file:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+  Since we want to see the detailed list of every netif_receive_skb
+  event occurring during the new run, which are in fact the same
+  events being aggregated into the hash table, we add some additional
+  'enable_event' events to the triggering sched_process_exec and
+  sched_process_exit events as such:
+
+    # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+    # echo 'disable_event:net:netif_receive_skb if comm==wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  If you read the trigger files for the sched_process_exec and
+  sched_process_exit triggers, you should see two triggers for each:
+  one enabling/disabling the hist aggregation and the other
+  enabling/disabling the logging of events:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+    enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+    enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+    enable_event:net:netif_receive_skb:unlimited if comm==wget
+    disable_hist:net:netif_receive_skb:unlimited if comm==wget
+
+  In other words, whenever either of the sched_process_exec or
+  sched_process_exit events is hit and matches 'wget', it enables or
+  disables both the histogram and the event log, and what you end up
+  with is a hash table and set of events just covering the specified
+  duration.  Run the wget command again:
+
+    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+  Displaying the 'hist' file should show something similar to what you
+  saw in the last run, but this time you should also see the
+  individual events in the trace file:
+
+    # cat /sys/kernel/debug/tracing/trace
+
+    # tracer: nop
+    #
+    # entries-in-buffer/entries-written: 183/1426   #P:4
+    #
+    #                              _-----=> irqs-off
+    #                             / _----=> need-resched
+    #                            | / _---=> hardirq/softirq
+    #                            || / _--=> preempt-depth
+    #                            ||| /     delay
+    #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+    #              | |       |   ||||       |         |
+                wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60
+                wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60
+             dnsmasq-1382  [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130
+             dnsmasq-1382  [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138
+    ##### CPU 2 buffer started ####
+      irq/29-iwlwifi-559   [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500
+      irq/29-iwlwifi-559   [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500
+    .
+    .
+    .
+
+  The following example demonstrates how multiple hist triggers can be
+  attached to a given event.  This capability can be useful for
+  creating a set of different summaries derived from the same set of
+  events, or for comparing the effects of different filters, among
+  other things.
+
+    # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=len:vals=common_preempt_count' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  The above set of commands create four triggers differing only in
+  their filters, along with a completely different though fairly
+  nonsensical trigger.  Note that in order to append multiple hist
+  triggers to the same file, you should use the '>>' operator to
+  append them ('>' will also add the new hist trigger, but will remove
+  any existing hist triggers beforehand).
+
+  Displaying the contents of the 'hist' file for the event shows the
+  contents of all five histograms:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+
+    # event histogram
+    #
+    # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active]
+    #
+
+    { len:        176 } hitcount:          1  common_preempt_count:          0
+    { len:        223 } hitcount:          1  common_preempt_count:          0
+    { len:       4854 } hitcount:          1  common_preempt_count:          0
+    { len:        395 } hitcount:          1  common_preempt_count:          0
+    { len:        177 } hitcount:          1  common_preempt_count:          0
+    { len:        446 } hitcount:          1  common_preempt_count:          0
+    { len:       1601 } hitcount:          1  common_preempt_count:          0
+    .
+    .
+    .
+    { len:       1280 } hitcount:         66  common_preempt_count:          0
+    { len:        116 } hitcount:         81  common_preempt_count:         40
+    { len:        708 } hitcount:        112  common_preempt_count:          0
+    { len:         46 } hitcount:        221  common_preempt_count:          0
+    { len:       1264 } hitcount:        458  common_preempt_count:          0
+
+    Totals:
+        Hits: 1428
+        Entries: 147
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff8800baee5e00 } hitcount:          1  len:        130
+    { skbaddr: ffff88005f3d5600 } hitcount:          1  len:       1280
+    { skbaddr: ffff88005f3d4900 } hitcount:          1  len:       1280
+    { skbaddr: ffff88009fed6300 } hitcount:          1  len:        115
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:        115
+    { skbaddr: ffff88008cdb1900 } hitcount:          1  len:         46
+    { skbaddr: ffff880064b5ef00 } hitcount:          1  len:        118
+    { skbaddr: ffff880044e3c700 } hitcount:          1  len:         60
+    { skbaddr: ffff880100065900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d46bd500 } hitcount:          1  len:        116
+    { skbaddr: ffff88005f3d5f00 } hitcount:          1  len:       1280
+    { skbaddr: ffff880100064700 } hitcount:          1  len:        365
+    { skbaddr: ffff8800badb6f00 } hitcount:          1  len:         60
+    .
+    .
+    .
+    { skbaddr: ffff88009fe0be00 } hitcount:         27  len:      24677
+    { skbaddr: ffff88009fe0a400 } hitcount:         27  len:      23052
+    { skbaddr: ffff88009fe0b700 } hitcount:         31  len:      25589
+    { skbaddr: ffff88009fe0b600 } hitcount:         32  len:      27326
+    { skbaddr: ffff88006a462800 } hitcount:         68  len:      71678
+    { skbaddr: ffff88006a463700 } hitcount:         70  len:      72678
+    { skbaddr: ffff88006a462b00 } hitcount:         71  len:      77589
+    { skbaddr: ffff88006a463600 } hitcount:         73  len:      71307
+    { skbaddr: ffff88006a462200 } hitcount:         81  len:      81032
+
+    Totals:
+        Hits: 1451
+        Entries: 318
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active]
+    #
+
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active]
+    #
+
+    { skbaddr: ffff88009fd2c300 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcce00 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcd700 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcda00 } hitcount:          1  len:      21492
+    { skbaddr: ffff8800ae2e2d00 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          1  len:       7212
+    { skbaddr: ffff88006a4df500 } hitcount:          1  len:       4854
+    { skbaddr: ffff88008ce47b00 } hitcount:          1  len:      18636
+    { skbaddr: ffff8800ae2e2200 } hitcount:          1  len:      12924
+    { skbaddr: ffff88005f3e1000 } hitcount:          1  len:       4356
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          2  len:      24420
+    { skbaddr: ffff8800d2bcc200 } hitcount:          2  len:      12996
+
+    Totals:
+        Hits: 14
+        Entries: 12
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active]
+    #
+
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+  Named triggers can be used to have triggers share a common set of
+  histogram data.  This capability is mostly useful for combining the
+  output of events generated by tracepoints contained inside inline
+  functions, but names can be used in a hist trigger on any event.
+  For example, these two triggers when hit will update the same 'len'
+  field in the shared 'foo' histogram data:
+
+    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  You can see that they're updating common histogram data by reading
+  each event's hist files at the same time:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist;
+      cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+    # event histogram
+    #
+    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
+    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
+    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
+    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
+    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
+    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
+    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
+    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
+    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
+    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
+    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
+    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
+    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
+    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
+    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
+    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
+    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
+    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
+    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
+    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
+    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
+    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
+    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
+    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
+    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
+    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
+    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
+    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
+    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
+    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
+    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
+    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
+    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
+    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
+    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
+
+    Totals:
+        Hits: 81
+        Entries: 42
+        Dropped: 0
+    # event histogram
+    #
+    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
+    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
+    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
+    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
+    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
+    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
+    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
+    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
+    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
+    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
+    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
+    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
+    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
+    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
+    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
+    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
+    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
+    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
+    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
+    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
+    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
+    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
+    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
+    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
+    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
+    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
+    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
+    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
+    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
+    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
+    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
+    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
+    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
+    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
+    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
+
+    Totals:
+        Hits: 81
+        Entries: 42
+        Dropped: 0
+
+  And here's an example that shows how to combine histogram data from
+  any two events even if they don't share any 'compatible' fields
+  other than 'hitcount' and 'stacktrace'.  These commands create a
+  couple of triggers named 'bar' using those fields:
+
+    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+          /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  And displaying the output of either shows some interesting if
+  somewhat confusing output:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+    # event histogram
+    #
+    # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
+    #
+
+    { stacktrace:
+             _do_fork+0x18e/0x330
+             kernel_thread+0x29/0x30
+             kthreadd+0x154/0x1b0
+             ret_from_fork+0x3f/0x70
+    } hitcount:          1
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx_ni+0x20/0x70
+             dev_loopback_xmit+0xaa/0xd0
+             ip_mc_output+0x126/0x240
+             ip_local_out_sk+0x31/0x40
+             igmp_send_report+0x1e9/0x230
+             igmp_timer_expire+0xe9/0x120
+             call_timer_fn+0x39/0xf0
+             run_timer_softirq+0x1e1/0x290
+             __do_softirq+0xfd/0x290
+             irq_exit+0x98/0xb0
+             smp_apic_timer_interrupt+0x4a/0x60
+             apic_timer_interrupt+0x6d/0x80
+             cpuidle_enter+0x17/0x20
+             call_cpuidle+0x3b/0x60
+             cpu_startup_entry+0x22d/0x310
+    } hitcount:          1
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx_ni+0x20/0x70
+             dev_loopback_xmit+0xaa/0xd0
+             ip_mc_output+0x17f/0x240
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x13e/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             SYSC_sendto+0xef/0x170
+             SyS_sendto+0xe/0x10
+             entry_SYSCALL_64_fastpath+0x12/0x6a
+    } hitcount:          2
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             ___sys_sendmsg+0x14e/0x270
+    } hitcount:         76
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             ___sys_sendmsg+0x269/0x270
+    } hitcount:         77
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             SYSC_sendto+0xef/0x170
+    } hitcount:         88
+    { stacktrace:
+             _do_fork+0x18e/0x330
+             SyS_clone+0x19/0x20
+             entry_SYSCALL_64_fastpath+0x12/0x6a
+    } hitcount:        244
+
+    Totals:
+        Hits: 489
+        Entries: 7
+        Dropped: 0
+
+
+2.2 Inter-event hist triggers
+-----------------------------
+
+Inter-event hist triggers are hist triggers that combine values from
+one or more other events and create a histogram using that data.  Data
+from an inter-event histogram can in turn become the source for
+further combined histograms, thus providing a chain of related
+histograms, which is important for some applications.
+
+The most important example of an inter-event quantity that can be used
+in this manner is latency, which is simply a difference in timestamps
+between two events.  Although latency is the most important
+inter-event quantity, note that because the support is completely
+general across the trace event subsystem, any event field can be used
+in an inter-event quantity.
+
+An example of a histogram that combines data from other histograms
+into a useful chain would be a 'wakeupswitch latency' histogram that
+combines a 'wakeup latency' histogram and a 'switch latency'
+histogram.
+
+Normally, a hist trigger specification consists of a (possibly
+compound) key along with one or more numeric values, which are
+continually updated sums associated with that key.  A histogram
+specification in this case consists of individual key and value
+specifications that refer to trace event fields associated with a
+single event type.
+
+The inter-event hist trigger extension allows fields from multiple
+events to be referenced and combined into a multi-event histogram
+specification.  In support of this overall goal, a few enabling
+features have been added to the hist trigger support:
+
+  - In order to compute an inter-event quantity, a value from one
+    event needs to saved and then referenced from another event.  This
+    requires the introduction of support for histogram 'variables'.
+
+  - The computation of inter-event quantities and their combination
+    require some minimal amount of support for applying simple
+    expressions to variables (+ and -).
+
+  - A histogram consisting of inter-event quantities isn't logically a
+    histogram on either event (so having the 'hist' file for either
+    event host the histogram output doesn't really make sense).  To
+    address the idea that the histogram is associated with a
+    combination of events, support is added allowing the creation of
+    'synthetic' events that are events derived from other events.
+    These synthetic events are full-fledged events just like any other
+    and can be used as such, as for instance to create the
+    'combination' histograms mentioned previously.
+
+  - A set of 'actions' can be associated with histogram entries -
+    these can be used to generate the previously mentioned synthetic
+    events, but can also be used for other purposes, such as for
+    example saving context when a 'max' latency has been hit.
+
+  - Trace events don't have a 'timestamp' associated with them, but
+    there is an implicit timestamp saved along with an event in the
+    underlying ftrace ring buffer.  This timestamp is now exposed as a
+    a synthetic field named 'common_timestamp' which can be used in
+    histograms as if it were any other event field; it isn't an actual
+    field in the trace format but rather is a synthesized value that
+    nonetheless can be used as if it were an actual field.  By default
+    it is in units of nanoseconds; appending '.usecs' to a
+    common_timestamp field changes the units to microseconds.
+
+A note on inter-event timestamps: If common_timestamp is used in a
+histogram, the trace buffer is automatically switched over to using
+absolute timestamps and the "global" trace clock, in order to avoid
+bogus timestamp differences with other clocks that aren't coherent
+across CPUs.  This can be overridden by specifying one of the other
+trace clocks instead, using the "clock=XXX" hist trigger attribute,
+where XXX is any of the clocks listed in the tracing/trace_clock
+pseudo-file.
+
+These features are described in more detail in the following sections.
+
+2.2.1 Histogram Variables
+-------------------------
+
+Variables are simply named locations used for saving and retrieving
+values between matching events.  A 'matching' event is defined as an
+event that has a matching key - if a variable is saved for a histogram
+entry corresponding to that key, any subsequent event with a matching
+key can access that variable.
+
+A variable's value is normally available to any subsequent event until
+it is set to something else by a subsequent event.  The one exception
+to that rule is that any variable used in an expression is essentially
+'read-once' - once it's used by an expression in a subsequent event,
+it's reset to its 'unset' state, which means it can't be used again
+unless it's set again.  This ensures not only that an event doesn't
+use an uninitialized variable in a calculation, but that that variable
+is used only once and not for any unrelated subsequent match.
+
+The basic syntax for saving a variable is to simply prefix a unique
+variable name not corresponding to any keyword along with an '=' sign
+to any event field.
+
+Either keys or values can be saved and retrieved in this way.  This
+creates a variable named 'ts0' for a histogram entry with the key
+'next_pid':
+
+  # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... >> \
+	event/trigger
+
+The ts0 variable can be accessed by any subsequent event having the
+same pid as 'next_pid'.
+
+Variable references are formed by prepending the variable name with
+the '$' sign.  Thus for example, the ts0 variable above would be
+referenced as '$ts0' in expressions.
+
+Because 'vals=' is used, the common_timestamp variable value above
+will also be summed as a normal histogram value would (though for a
+timestamp it makes little sense).
+
+The below shows that a key value can also be saved in the same way:
+
+  # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger
+
+If a variable isn't a key variable or prefixed with 'vals=', the
+associated event field will be saved in a variable but won't be summed
+as a value:
+
+  # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger
+
+Multiple variables can be assigned at the same time.  The below would
+result in both ts0 and b being created as variables, with both
+common_timestamp and field1 additionally being summed as values:
+
+  # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \
+	event/trigger
+
+Note that variable assignments can appear either preceding or
+following their use.  The command below behaves identically to the
+command above:
+
+  # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \
+	event/trigger
+
+Any number of variables not bound to a 'vals=' prefix can also be
+assigned by simply separating them with colons.  Below is the same
+thing but without the values being summed in the histogram:
+
+  # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger
+
+Variables set as above can be referenced and used in expressions on
+another event.
+
+For example, here's how a latency can be calculated:
+
+  # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger
+  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger
+
+In the first line above, the event's timetamp is saved into the
+variable ts0.  In the next line, ts0 is subtracted from the second
+event's timestamp to produce the latency, which is then assigned into
+yet another variable, 'wakeup_lat'.  The hist trigger below in turn
+makes use of the wakeup_lat variable to compute a combined latency
+using the same key and variable from yet another event:
+
+  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger
+
+2.2.2 Synthetic Events
+----------------------
+
+Synthetic events are user-defined events generated from hist trigger
+variables or fields associated with one or more other events.  Their
+purpose is to provide a mechanism for displaying data spanning
+multiple events consistent with the existing and already familiar
+usage for normal events.
+
+To define a synthetic event, the user writes a simple specification
+consisting of the name of the new event along with one or more
+variables and their types, which can be any valid field type,
+separated by semicolons, to the tracing/synthetic_events file.
+
+For instance, the following creates a new event named 'wakeup_latency'
+with 3 fields: lat, pid, and prio.  Each of those fields is simply a
+variable reference to a variable on another event:
+
+  # echo 'wakeup_latency \
+          u64 lat; \
+          pid_t pid; \
+	  int prio' >> \
+	  /sys/kernel/debug/tracing/synthetic_events
+
+Reading the tracing/synthetic_events file lists all the currently
+defined synthetic events, in this case the event defined above:
+
+  # cat /sys/kernel/debug/tracing/synthetic_events
+    wakeup_latency u64 lat; pid_t pid; int prio
+
+An existing synthetic event definition can be removed by prepending
+the command that defined it with a '!':
+
+  # echo '!wakeup_latency u64 lat pid_t pid int prio' >> \
+    /sys/kernel/debug/tracing/synthetic_events
+
+At this point, there isn't yet an actual 'wakeup_latency' event
+instantiated in the event subsytem - for this to happen, a 'hist
+trigger action' needs to be instantiated and bound to actual fields
+and variables defined on other events (see Section 6.3.3 below).
+
+Once that is done, an event instance is created, and a histogram can
+be defined using it:
+
+  # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
+        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+The new event is created under the tracing/events/synthetic/ directory
+and looks and behaves just like any other event:
+
+  # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
+        enable  filter  format  hist  id  trigger
+
+Like any other event, once a histogram is enabled for the event, the
+output can be displayed by reading the event's 'hist' file.
+
+2.2.3 Hist trigger 'actions'
+----------------------------
+
+A hist trigger 'action' is a function that's executed whenever a
+histogram entry is added or updated.
+
+The default 'action' if no special function is explicity specified is
+as it always has been, to simply update the set of values associated
+with an entry.  Some applications, however, may want to perform
+additional actions at that point, such as generate another event, or
+compare and save a maximum.
+
+The following additional actions are available.  To specify an action
+for a given event, simply specify the action between colons in the
+hist trigger specification.
+
+  - onmatch(matching.event).<synthetic_event_name>(param list)
+
+    The 'onmatch(matching.event).<synthetic_event_name>(params)' hist
+    trigger action is invoked whenever an event matches and the
+    histogram entry would be added or updated.  It causes the named
+    synthetic event to be generated with the values given in the
+    'param list'.  The result is the generation of a synthetic event
+    that consists of the values contained in those variables at the
+    time the invoking event was hit.
+
+    The 'param list' consists of one or more parameters which may be
+    either variables or fields defined on either the 'matching.event'
+    or the target event.  The variables or fields specified in the
+    param list may be either fully-qualified or unqualified.  If a
+    variable is specified as unqualified, it must be unique between
+    the two events.  A field name used as a param can be unqualified
+    if it refers to the target event, but must be fully qualified if
+    it refers to the matching event.  A fully-qualified name is of the
+    form 'system.event_name.$var_name' or 'system.event_name.field'.
+
+    The 'matching.event' specification is simply the fully qualified
+    event name of the event that matches the target event for the
+    onmatch() functionality, in the form 'system.event_name'.
+
+    Finally, the number and type of variables/fields in the 'param
+    list' must match the number and types of the fields in the
+    synthetic event being generated.
+
+    As an example the below defines a simple synthetic event and uses
+    a variable defined on the sched_wakeup_new event as a parameter
+    when invoking the synthetic event.  Here we define the synthetic
+    event:
+
+    # echo 'wakeup_new_test pid_t pid' >> \
+           /sys/kernel/debug/tracing/synthetic_events
+
+    # cat /sys/kernel/debug/tracing/synthetic_events
+          wakeup_new_test pid_t pid
+
+    The following hist trigger both defines the missing testpid
+    variable and specifies an onmatch() action that generates a
+    wakeup_new_test synthetic event whenever a sched_wakeup_new event
+    occurs, which because of the 'if comm == "cyclictest"' filter only
+    happens when the executable is cyclictest:
+
+    # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\
+            wakeup_new_test($testpid) if comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
+
+    Creating and displaying a histogram based on those events is now
+    just a matter of using the fields and new synthetic event in the
+    tracing/events/synthetic directory, as usual:
+
+    # echo 'hist:keys=pid:sort=pid' >> \
+           /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger
+
+    Running 'cyclictest' should cause wakeup_new events to generate
+    wakeup_new_test synthetic events which should result in histogram
+    output in the wakeup_new_test event's hist file:
+
+    # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/hist
+
+    A more typical usage would be to use two events to calculate a
+    latency.  The following example uses a set of hist triggers to
+    produce a 'wakeup_latency' histogram:
+
+    First, we define a 'wakeup_latency' synthetic event:
+
+    # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \
+            /sys/kernel/debug/tracing/synthetic_events
+
+    Next, we specify that whenever we see a sched_waking event for a
+    cyclictest thread, save the timestamp in a 'ts0' variable:
+
+    # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=common_timestamp.usecs \
+            if comm=="cyclictest"' >> \
+	    /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+    Then, when the corresponding thread is actually scheduled onto the
+    CPU by a sched_switch event, calculate the latency and use that
+    along with another variable and an event field to generate a
+    wakeup_latency synthetic event:
+
+    # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\
+            onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\
+	            $saved_pid,next_prio) if next_comm=="cyclictest"' >> \
+	    /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+    We also need to create a histogram on the wakeup_latency synthetic
+    event in order to aggregate the generated synthetic event data:
+
+    # echo 'hist:keys=pid,prio,lat:sort=pid,lat' >> \
+            /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+    Finally, once we've run cyclictest to actually generate some
+    events, we can see the output by looking at the wakeup_latency
+    synthetic event's hist file:
+
+    # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
+
+  - onmax(var).save(field,..	.)
+
+    The 'onmax(var).save(field,...)' hist trigger action is invoked
+    whenever the value of 'var' associated with a histogram entry
+    exceeds the current maximum contained in that variable.
+
+    The end result is that the trace event fields specified as the
+    onmax.save() params will be saved if 'var' exceeds the current
+    maximum for that hist trigger entry.  This allows context from the
+    event that exhibited the new maximum to be saved for later
+    reference.  When the histogram is displayed, additional fields
+    displaying the saved values will be printed.
+
+    As an example the below defines a couple of hist triggers, one for
+    sched_waking and another for sched_switch, keyed on pid.  Whenever
+    a sched_waking occurs, the timestamp is saved in the entry
+    corresponding to the current pid, and when the scheduler switches
+    back to that pid, the timestamp difference is calculated.  If the
+    resulting latency, stored in wakeup_lat, exceeds the current
+    maximum latency, the values specified in the save() fields are
+    recoreded:
+
+    # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
+            if comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+    # echo 'hist:keys=next_pid:\
+            wakeup_lat=common_timestamp.usecs-$ts0:\
+            onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \
+            if next_comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+    When the histogram is displayed, the max value and the saved
+    values corresponding to the max are displayed following the rest
+    of the fields:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+      { next_pid:       2255 } hitcount:        239
+        common_timestamp-ts0:          0
+        max:         27
+	next_comm: cyclictest
+        prev_pid:          0  prev_prio:        120  prev_comm: swapper/1
+
+      { next_pid:       2256 } hitcount:       2355
+        common_timestamp-ts0: 0
+        max:         49  next_comm: cyclictest
+        prev_pid:          0  prev_prio:        120  prev_comm: swapper/0
+
+      Totals:
+          Hits: 12970
+          Entries: 2
+          Dropped: 0
diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX
index 3da73aabff5a..3492458a4ae8 100644
--- a/Documentation/virtual/kvm/00-INDEX
+++ b/Documentation/virtual/kvm/00-INDEX
@@ -1,7 +1,12 @@
 00-INDEX
 	- this file.
+amd-memory-encryption.rst
+	- notes on AMD Secure Encrypted Virtualization feature and SEV firmware
+	  command description
 api.txt
 	- KVM userspace API.
+arm
+	- internal ABI between the kernel and HYP (for arm/arm64)
 cpuid.txt
 	- KVM-specific cpuid leaves (x86).
 devices/
@@ -26,6 +31,5 @@ s390-diag.txt
 	- Diagnose hypercall description (for IBM S/390)
 timekeeping.txt
 	- timekeeping virtualization for x86-based architectures.
-amd-memory-encryption.txt
-	- notes on AMD Secure Encrypted Virtualization feature and SEV firmware
-	  command description
+vcpu-requests.rst
+	- internal VCPU request API
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index d6b3ff51a14f..1c7958b57fe9 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3480,7 +3480,7 @@ encrypted VMs.
 
 Currently, this ioctl is used for issuing Secure Encrypted Virtualization
 (SEV) commands on AMD Processors. The SEV commands are defined in
-Documentation/virtual/kvm/amd-memory-encryption.txt.
+Documentation/virtual/kvm/amd-memory-encryption.rst.
 
 4.111 KVM_MEMORY_ENCRYPT_REG_REGION
 
@@ -3516,6 +3516,38 @@ Returns: 0 on success; -1 on error
 This ioctl can be used to unregister the guest memory region registered
 with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
 
+4.113 KVM_HYPERV_EVENTFD
+
+Capability: KVM_CAP_HYPERV_EVENTFD
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_hyperv_eventfd (in)
+
+This ioctl (un)registers an eventfd to receive notifications from the guest on
+the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without
+causing a user exit.  SIGNAL_EVENT hypercall with non-zero event flag number
+(bits 24-31) still triggers a KVM_EXIT_HYPERV_HCALL user exit.
+
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+The conn_id field should fit within 24 bits:
+
+#define KVM_HYPERV_CONN_ID_MASK		0x00ffffff
+
+The acceptable values for the flags field are:
+
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
+Returns: 0 on success,
+	-EINVAL if conn_id or flags is outside the allowed range
+	-ENOENT on deassign if the conn_id isn't registered
+	-EEXIST on assign if the conn_id is already registered
+
 
 5. The kvm_run structure
 ------------------------
@@ -3873,7 +3905,7 @@ in userspace.
 	__u64 kvm_dirty_regs;
 	union {
 		struct kvm_sync_regs regs;
-		char padding[1024];
+		char padding[SYNC_REGS_SIZE_BYTES];
 	} s;
 
 If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access
@@ -4078,6 +4110,46 @@ Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
 accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
 the guest.
 
+6.74 KVM_CAP_SYNC_REGS
+Architectures: s390, x86
+Target: s390: always enabled, x86: vcpu
+Parameters: none
+Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register
+sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h).
+
+As described above in the kvm_sync_regs struct info in section 5 (kvm_run):
+KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers
+without having to call SET/GET_*REGS". This reduces overhead by eliminating
+repeated ioctl calls for setting and/or getting register values. This is
+particularly important when userspace is making synchronous guest state
+modifications, e.g. when emulating and/or intercepting instructions in
+userspace.
+
+For s390 specifics, please refer to the source code.
+
+For x86:
+- the register sets to be copied out to kvm_run are selectable
+  by userspace (rather that all sets being copied out for every exit).
+- vcpu_events are available in addition to regs and sregs.
+
+For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to
+function as an input bit-array field set by userspace to indicate the
+specific register sets to be copied out on the next exit.
+
+To indicate when userspace has modified values that should be copied into
+the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set.
+This is done using the same bitflags as for the 'kvm_valid_regs' field.
+If the dirty bit is not set, then the register set values will not be copied
+into the vCPU even if they've been modified.
+
+Unused bitfields in the bitarrays must be set to zero.
+
+struct kvm_sync_regs {
+        struct kvm_regs regs;
+        struct kvm_sregs sregs;
+        struct kvm_vcpu_events events;
+};
+
 7. Capabilities that can be enabled on VMs
 ------------------------------------------
 
@@ -4286,6 +4358,26 @@ enables QEMU to build error log and branch to guest kernel registered
 machine check handling routine. Without this capability KVM will
 branch to guests' 0x200 interrupt vector.
 
+7.13 KVM_CAP_X86_DISABLE_EXITS
+
+Architectures: x86
+Parameters: args[0] defines which exits are disabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid exits
+
+Valid bits in args[0] are
+
+#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)
+
+Enabling this capability on a VM provides userspace with a way to no
+longer intercept some instructions for improved latency in some
+workloads, and is suggested when vCPUs are associated to dedicated
+physical CPUs.  More bits can be added in the future; userspace can
+just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
+all such vmexits.
+
+Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
+
 8. Other capabilities.
 ----------------------
 
@@ -4398,15 +4490,6 @@ reserved.
     Both registers and addresses are 64-bits wide.
     It will be possible to run 64-bit or 32-bit guest code.
 
-8.8 KVM_CAP_X86_GUEST_MWAIT
-
-Architectures: x86
-
-This capability indicates that guest using memory monotoring instructions
-(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
-spent while virtual CPU is halted in this way will then be accounted for as
-guest running time on the host (as opposed to e.g. HLT).
-
 8.9 KVM_CAP_ARM_USER_IRQ
 
 Architectures: arm, arm64
@@ -4483,3 +4566,33 @@ Parameters: none
 This capability indicates if the flic device will be able to get/set the
 AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows
 to discover this without having to create a flic device.
+
+8.14 KVM_CAP_S390_PSW
+
+Architectures: s390
+
+This capability indicates that the PSW is exposed via the kvm_run structure.
+
+8.15 KVM_CAP_S390_GMAP
+
+Architectures: s390
+
+This capability indicates that the user space memory used as guest mapping can
+be anywhere in the user memory address space, as long as the memory slots are
+aligned and sized to a segment (1MB) boundary.
+
+8.16 KVM_CAP_S390_COW
+
+Architectures: s390
+
+This capability indicates that the user space memory used as guest mapping can
+use copy-on-write semantics as well as dirty pages tracking via read-only page
+tables.
+
+8.17 KVM_CAP_S390_BPB
+
+Architectures: s390
+
+This capability indicates that kvm will implement the interfaces to handle
+reset, migration and nested KVM for branch prediction blocking. The stfle
+facility 82 should not be provided to the guest without this capability.
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index 87a7506f31c2..d4f33eb805dd 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -23,8 +23,8 @@ This function queries the presence of KVM cpuid leafs.
 
 
 function: define KVM_CPUID_FEATURES (0x40000001)
-returns : ebx, ecx, edx = 0
-          eax = and OR'ed group of (1 << flag), where each flags is:
+returns : ebx, ecx
+          eax = an OR'ed group of (1 << flag), where each flags is:
 
 
 flag                               || value || meaning
@@ -66,3 +66,14 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
 ------------------------------------------------------------------------------
+
+          edx = an OR'ed group of (1 << flag), where each flags is:
+
+
+flag                               || value || meaning
+==================================================================================
+KVM_HINTS_DEDICATED                ||     0 || guest checks this feature bit to
+                                   ||       || determine if there is vCPU pinning
+                                   ||       || and there is no vCPU over-commitment,
+                                   ||       || allowing optimizations
+----------------------------------------------------------------------------------
diff --git a/Documentation/vm/hmm.txt b/Documentation/vm/hmm.txt
index 4d3aac9f4a5d..2d1d6f69e91b 100644
--- a/Documentation/vm/hmm.txt
+++ b/Documentation/vm/hmm.txt
@@ -1,152 +1,160 @@
 Heterogeneous Memory Management (HMM)
 
-Transparently allow any component of a program to use any memory region of said
-program with a device without using device specific memory allocator. This is
-becoming a requirement to simplify the use of advance heterogeneous computing
-where GPU, DSP or FPGA are use to perform various computations.
-
-This document is divided as follow, in the first section i expose the problems
-related to the use of a device specific allocator. The second section i expose
-the hardware limitations that are inherent to many platforms. The third section
-gives an overview of HMM designs. The fourth section explains how CPU page-
-table mirroring works and what is HMM purpose in this context. Fifth section
-deals with how device memory is represented inside the kernel. Finaly the last
-section present the new migration helper that allow to leverage the device DMA
-engine.
-
-
-1) Problems of using device specific memory allocator:
-2) System bus, device memory characteristics
-3) Share address space and migration
+Provide infrastructure and helpers to integrate non-conventional memory (device
+memory like GPU on board memory) into regular kernel path, with the cornerstone
+of this being specialized struct page for such memory (see sections 5 to 7 of
+this document).
+
+HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
+allowing a device to transparently access program address coherently with the
+CPU meaning that any valid pointer on the CPU is also a valid pointer for the
+device. This is becoming mandatory to simplify the use of advanced hetero-
+geneous computing where GPU, DSP, or FPGA are used to perform various
+computations on behalf of a process.
+
+This document is divided as follows: in the first section I expose the problems
+related to using device specific memory allocators. In the second section, I
+expose the hardware limitations that are inherent to many platforms. The third
+section gives an overview of the HMM design. The fourth section explains how
+CPU page-table mirroring works and the purpose of HMM in this context. The
+fifth section deals with how device memory is represented inside the kernel.
+Finally, the last section presents a new migration helper that allows lever-
+aging the device DMA engine.
+
+
+1) Problems of using a device specific memory allocator:
+2) I/O bus, device memory characteristics
+3) Shared address space and migration
 4) Address space mirroring implementation and API
 5) Represent and manage device memory from core kernel point of view
-6) Migrate to and from device memory
+6) Migration to and from device memory
 7) Memory cgroup (memcg) and rss accounting
 
 
 -------------------------------------------------------------------------------
 
-1) Problems of using device specific memory allocator:
-
-Device with large amount of on board memory (several giga bytes) like GPU have
-historically manage their memory through dedicated driver specific API. This
-creates a disconnect between memory allocated and managed by device driver and
-regular application memory (private anonymous, share memory or regular file
-back memory). From here on i will refer to this aspect as split address space.
-I use share address space to refer to the opposite situation ie one in which
-any memory region can be use by device transparently.
-
-Split address space because device can only access memory allocated through the
-device specific API. This imply that all memory object in a program are not
-equal from device point of view which complicate large program that rely on a
-wide set of libraries.
-
-Concretly this means that code that wants to leverage device like GPU need to
-copy object between genericly allocated memory (malloc, mmap private/share/)
-and memory allocated through the device driver API (this still end up with an
-mmap but of the device file).
-
-For flat dataset (array, grid, image, ...) this isn't too hard to achieve but
-complex data-set (list, tree, ...) are hard to get right. Duplicating a complex
-data-set need to re-map all the pointer relations between each of its elements.
-This is error prone and program gets harder to debug because of the duplicate
-data-set.
-
-Split address space also means that library can not transparently use data they
-are getting from core program or other library and thus each library might have
-to duplicate its input data-set using specific memory allocator. Large project
-suffer from this and waste resources because of the various memory copy.
-
-Duplicating each library API to accept as input or output memory allocted by
+1) Problems of using a device specific memory allocator:
+
+Devices with a large amount of on board memory (several gigabytes) like GPUs
+have historically managed their memory through dedicated driver specific APIs.
+This creates a disconnect between memory allocated and managed by a device
+driver and regular application memory (private anonymous, shared memory, or
+regular file backed memory). From here on I will refer to this aspect as split
+address space. I use shared address space to refer to the opposite situation:
+i.e., one in which any application memory region can be used by a device
+transparently.
+
+Split address space happens because device can only access memory allocated
+through device specific API. This implies that all memory objects in a program
+are not equal from the device point of view which complicates large programs
+that rely on a wide set of libraries.
+
+Concretely this means that code that wants to leverage devices like GPUs needs
+to copy object between generically allocated memory (malloc, mmap private, mmap
+share) and memory allocated through the device driver API (this still ends up
+with an mmap but of the device file).
+
+For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
+complex data sets (list, tree, ...) are hard to get right. Duplicating a
+complex data set needs to re-map all the pointer relations between each of its
+elements. This is error prone and program gets harder to debug because of the
+duplicate data set and addresses.
+
+Split address space also means that libraries cannot transparently use data
+they are getting from the core program or another library and thus each library
+might have to duplicate its input data set using the device specific memory
+allocator. Large projects suffer from this and waste resources because of the
+various memory copies.
+
+Duplicating each library API to accept as input or output memory allocated by
 each device specific allocator is not a viable option. It would lead to a
-combinatorial explosions in the library entry points.
+combinatorial explosion in the library entry points.
 
-Finaly with the advance of high level language constructs (in C++ but in other
-language too) it is now possible for compiler to leverage GPU or other devices
-without even the programmer knowledge. Some of compiler identified patterns are
-only do-able with a share address. It is as well more reasonable to use a share
-address space for all the other patterns.
+Finally, with the advance of high level language constructs (in C++ but in
+other languages too) it is now possible for the compiler to leverage GPUs and
+other devices without programmer knowledge. Some compiler identified patterns
+are only do-able with a shared address space. It is also more reasonable to use
+a shared address space for all other patterns.
 
 
 -------------------------------------------------------------------------------
 
-2) System bus, device memory characteristics
+2) I/O bus, device memory characteristics
 
-System bus cripple share address due to few limitations. Most system bus only
-allow basic memory access from device to main memory, even cache coherency is
-often optional. Access to device memory from CPU is even more limited, most
-often than not it is not cache coherent.
+I/O buses cripple shared address spaces due to a few limitations. Most I/O
+buses only allow basic memory access from device to main memory; even cache
+coherency is often optional. Access to device memory from CPU is even more
+limited. More often than not, it is not cache coherent.
 
-If we only consider the PCIE bus than device can access main memory (often
-through an IOMMU) and be cache coherent with the CPUs. However it only allows
-a limited set of atomic operation from device on main memory. This is worse
-in the other direction the CPUs can only access a limited range of the device
-memory and can not perform atomic operations on it. Thus device memory can not
-be consider like regular memory from kernel point of view.
+If we only consider the PCIE bus, then a device can access main memory (often
+through an IOMMU) and be cache coherent with the CPUs. However, it only allows
+a limited set of atomic operations from device on main memory. This is worse
+in the other direction: the CPU can only access a limited range of the device
+memory and cannot perform atomic operations on it. Thus device memory cannot
+be considered the same as regular memory from the kernel point of view.
 
 Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
-and 16 lanes). This is 33 times less that fastest GPU memory (1 TBytes/s).
-The final limitation is latency, access to main memory from the device has an
-order of magnitude higher latency than when the device access its own memory.
+and 16 lanes). This is 33 times less than the fastest GPU memory (1 TBytes/s).
+The final limitation is latency. Access to main memory from the device has an
+order of magnitude higher latency than when the device accesses its own memory.
 
-Some platform are developing new system bus or additions/modifications to PCIE
-to address some of those limitations (OpenCAPI, CCIX). They mainly allow two
+Some platforms are developing new I/O buses or additions/modifications to PCIE
+to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
 way cache coherency between CPU and device and allow all atomic operations the
-architecture supports. Saddly not all platform are following this trends and
-some major architecture are left without hardware solutions to those problems.
+architecture supports. Sadly, not all platforms are following this trend and
+some major architectures are left without hardware solutions to these problems.
 
-So for share address space to make sense not only we must allow device to
-access any memory memory but we must also permit any memory to be migrated to
-device memory while device is using it (blocking CPU access while it happens).
+So for shared address space to make sense, not only must we allow devices to
+access any memory but we must also permit any memory to be migrated to device
+memory while device is using it (blocking CPU access while it happens).
 
 
 -------------------------------------------------------------------------------
 
-3) Share address space and migration
+3) Shared address space and migration
 
 HMM intends to provide two main features. First one is to share the address
-space by duplication the CPU page table into the device page table so same
-address point to same memory and this for any valid main memory address in
+space by duplicating the CPU page table in the device page table so the same
+address points to the same physical memory for any valid main memory address in
 the process address space.
 
-To achieve this, HMM offer a set of helpers to populate the device page table
+To achieve this, HMM offers a set of helpers to populate the device page table
 while keeping track of CPU page table updates. Device page table updates are
-not as easy as CPU page table updates. To update the device page table you must
-allow a buffer (or use a pool of pre-allocated buffer) and write GPU specifics
-commands in it to perform the update (unmap, cache invalidations and flush,
-...). This can not be done through common code for all device. Hence why HMM
-provides helpers to factor out everything that can be while leaving the gory
-details to the device driver.
-
-The second mechanism HMM provide is a new kind of ZONE_DEVICE memory that does
-allow to allocate a struct page for each page of the device memory. Those page
-are special because the CPU can not map them. They however allow to migrate
-main memory to device memory using exhisting migration mechanism and everything
-looks like if page was swap out to disk from CPU point of view. Using a struct
-page gives the easiest and cleanest integration with existing mm mechanisms.
-Again here HMM only provide helpers, first to hotplug new ZONE_DEVICE memory
-for the device memory and second to perform migration. Policy decision of what
-and when to migrate things is left to the device driver.
-
-Note that any CPU access to a device page trigger a page fault and a migration
-back to main memory ie when a page backing an given address A is migrated from
-a main memory page to a device page then any CPU access to address A trigger a
-page fault and initiate a migration back to main memory.
-
-
-With this two features, HMM not only allow a device to mirror a process address
-space and keeps both CPU and device page table synchronize, but also allow to
-leverage device memory by migrating part of data-set that is actively use by a
-device.
+not as easy as CPU page table updates. To update the device page table, you must
+allocate a buffer (or use a pool of pre-allocated buffers) and write GPU
+specific commands in it to perform the update (unmap, cache invalidations, and
+flush, ...). This cannot be done through common code for all devices. Hence
+why HMM provides helpers to factor out everything that can be while leaving the
+hardware specific details to the device driver.
+
+The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
+allows allocating a struct page for each page of the device memory. Those pages
+are special because the CPU cannot map them. However, they allow migrating
+main memory to device memory using existing migration mechanisms and everything
+looks like a page is swapped out to disk from the CPU point of view. Using a
+struct page gives the easiest and cleanest integration with existing mm mech-
+anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
+memory for the device memory and second to perform migration. Policy decisions
+of what and when to migrate things is left to the device driver.
+
+Note that any CPU access to a device page triggers a page fault and a migration
+back to main memory. For example, when a page backing a given CPU address A is
+migrated from a main memory page to a device page, then any CPU access to
+address A triggers a page fault and initiates a migration back to main memory.
+
+With these two features, HMM not only allows a device to mirror process address
+space and keeping both CPU and device page table synchronized, but also lever-
+ages device memory by migrating the part of the data set that is actively being
+used by the device.
 
 
 -------------------------------------------------------------------------------
 
 4) Address space mirroring implementation and API
 
-Address space mirroring main objective is to allow to duplicate range of CPU
-page table into a device page table and HMM helps keeping both synchronize. A
-device driver that want to mirror a process address space must start with the
+Address space mirroring's main objective is to allow duplication of a range of
+CPU page table into a device page table; HMM helps keep both synchronized. A
+device driver that wants to mirror a process address space must start with the
 registration of an hmm_mirror struct:
 
  int hmm_mirror_register(struct hmm_mirror *mirror,
@@ -154,9 +162,9 @@ registration of an hmm_mirror struct:
  int hmm_mirror_register_locked(struct hmm_mirror *mirror,
                                 struct mm_struct *mm);
 
-The locked variant is to be use when the driver is already holding the mmap_sem
-of the mm in write mode. The mirror struct has a set of callback that are use
-to propagate CPU page table:
+The locked variant is to be used when the driver is already holding mmap_sem
+of the mm in write mode. The mirror struct has a set of callbacks that are used
+to propagate CPU page tables:
 
  struct hmm_mirror_ops {
      /* sync_cpu_device_pagetables() - synchronize page tables
@@ -181,13 +189,13 @@ to propagate CPU page table:
                      unsigned long end);
  };
 
-Device driver must perform update to the range following action (turn range
-read only, or fully unmap, ...). Once driver callback returns the device must
-be done with the update.
+The device driver must perform the update action to the range (mark range
+read only, or fully unmap, ...). The device must be done with the update before
+the driver callback returns.
 
 
-When device driver wants to populate a range of virtual address it can use
-either:
+When the device driver wants to populate a range of virtual addresses, it can
+use either:
  int hmm_vma_get_pfns(struct vm_area_struct *vma,
                       struct hmm_range *range,
                       unsigned long start,
@@ -201,17 +209,19 @@ either:
                    bool write,
                    bool block);
 
-First one (hmm_vma_get_pfns()) will only fetch present CPU page table entry and
-will not trigger a page fault on missing or non present entry. The second one
-do trigger page fault on missing or read only entry if write parameter is true.
-Page fault use the generic mm page fault code path just like a CPU page fault.
+The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
+entries and will not trigger a page fault on missing or non-present entries.
+The second one does trigger a page fault on missing or read-only entry if the
+write parameter is true. Page faults use the generic mm page fault code path
+just like a CPU page fault.
 
-Both function copy CPU page table into their pfns array argument. Each entry in
-that array correspond to an address in the virtual range. HMM provide a set of
-flags to help driver identify special CPU page table entries.
+Both functions copy CPU page table entries into their pfns array argument. Each
+entry in that array corresponds to an address in the virtual range. HMM
+provides a set of flags to help the driver identify special CPU page table
+entries.
 
 Locking with the update() callback is the most important aspect the driver must
-respect in order to keep things properly synchronize. The usage pattern is :
+respect in order to keep things properly synchronized. The usage pattern is:
 
  int driver_populate_range(...)
  {
@@ -233,43 +243,44 @@ respect in order to keep things properly synchronize. The usage pattern is :
       return 0;
  }
 
-The driver->update lock is the same lock that driver takes inside its update()
-callback. That lock must be call before hmm_vma_range_done() to avoid any race
-with a concurrent CPU page table update.
+The driver->update lock is the same lock that the driver takes inside its
+update() callback. That lock must be held before hmm_vma_range_done() to avoid
+any race with a concurrent CPU page table update.
 
-HMM implements all this on top of the mmu_notifier API because we wanted to a
-simpler API and also to be able to perform optimization latter own like doing
-concurrent device update in multi-devices scenario.
+HMM implements all this on top of the mmu_notifier API because we wanted a
+simpler API and also to be able to perform optimizations latter on like doing
+concurrent device updates in multi-devices scenario.
 
-HMM also serve as an impedence missmatch between how CPU page table update are
-done (by CPU write to the page table and TLB flushes) from how device update
-their own page table. Device update is a multi-step process, first appropriate
-commands are write to a buffer, then this buffer is schedule for execution on
-the device. It is only once the device has executed commands in the buffer that
-the update is done. Creating and scheduling update command buffer can happen
-concurrently for multiple devices. Waiting for each device to report commands
-as executed is serialize (there is no point in doing this concurrently).
+HMM also serves as an impedance mismatch between how CPU page table updates
+are done (by CPU write to the page table and TLB flushes) and how devices
+update their own page table. Device updates are a multi-step process. First,
+appropriate commands are written to a buffer, then this buffer is scheduled for
+execution on the device. It is only once the device has executed commands in
+the buffer that the update is done. Creating and scheduling the update command
+buffer can happen concurrently for multiple devices. Waiting for each device to
+report commands as executed is serialized (there is no point in doing this
+concurrently).
 
 
 -------------------------------------------------------------------------------
 
 5) Represent and manage device memory from core kernel point of view
 
-Several differents design were try to support device memory. First one use
-device specific data structure to keep information about migrated memory and
-HMM hooked itself in various place of mm code to handle any access to address
-that were back by device memory. It turns out that this ended up replicating
-most of the fields of struct page and also needed many kernel code path to be
-updated to understand this new kind of memory.
+Several different designs were tried to support device memory. First one used
+a device specific data structure to keep information about migrated memory and
+HMM hooked itself in various places of mm code to handle any access to
+addresses that were backed by device memory. It turns out that this ended up
+replicating most of the fields of struct page and also needed many kernel code
+paths to be updated to understand this new kind of memory.
 
-Thing is most kernel code path never try to access the memory behind a page
-but only care about struct page contents. Because of this HMM switchted to
-directly using struct page for device memory which left most kernel code path
-un-aware of the difference. We only need to make sure that no one ever try to
-map those page from the CPU side.
+Most kernel code paths never try to access the memory behind a page
+but only care about struct page contents. Because of this, HMM switched to
+directly using struct page for device memory which left most kernel code paths
+unaware of the difference. We only need to make sure that no one ever tries to
+map those pages from the CPU side.
 
-HMM provide a set of helpers to register and hotplug device memory as a new
-region needing struct page. This is offer through a very simple API:
+HMM provides a set of helpers to register and hotplug device memory as a new
+region needing a struct page. This is offered through a very simple API:
 
  struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
                                    struct device *device,
@@ -289,18 +300,19 @@ The hmm_devmem_ops is where most of the important things are:
  };
 
 The first callback (free()) happens when the last reference on a device page is
-drop. This means the device page is now free and no longer use by anyone. The
-second callback happens whenever CPU try to access a device page which it can
-not do. This second callback must trigger a migration back to system memory.
+dropped. This means the device page is now free and no longer used by anyone.
+The second callback happens whenever the CPU tries to access a device page
+which it cannot do. This second callback must trigger a migration back to
+system memory.
 
 
 -------------------------------------------------------------------------------
 
-6) Migrate to and from device memory
+6) Migration to and from device memory
 
-Because CPU can not access device memory, migration must use device DMA engine
-to perform copy from and to device memory. For this we need a new migration
-helper:
+Because the CPU cannot access device memory, migration must use the device DMA
+engine to perform copy from and to device memory. For this we need a new
+migration helper:
 
  int migrate_vma(const struct migrate_vma_ops *ops,
                  struct vm_area_struct *vma,
@@ -311,15 +323,15 @@ helper:
                  unsigned long *dst,
                  void *private);
 
-Unlike other migration function it works on a range of virtual address, there
-is two reasons for that. First device DMA copy has a high setup overhead cost
+Unlike other migration functions it works on a range of virtual address, there
+are two reasons for that. First, device DMA copy has a high setup overhead cost
 and thus batching multiple pages is needed as otherwise the migration overhead
-make the whole excersie pointless. The second reason is because driver trigger
-such migration base on range of address the device is actively accessing.
+makes the whole exercise pointless. The second reason is because the
+migration might be for a range of addresses the device is actively accessing.
 
-The migrate_vma_ops struct define two callbacks. First one (alloc_and_copy())
-control destination memory allocation and copy operation. Second one is there
-to allow device driver to perform cleanup operation after migration.
+The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
+controls destination memory allocation and copy operation. Second one is there
+to allow the device driver to perform cleanup operations after migration.
 
  struct migrate_vma_ops {
      void (*alloc_and_copy)(struct vm_area_struct *vma,
@@ -336,19 +348,19 @@ to allow device driver to perform cleanup operation after migration.
                               void *private);
  };
 
-It is important to stress that this migration helpers allow for hole in the
+It is important to stress that these migration helpers allow for holes in the
 virtual address range. Some pages in the range might not be migrated for all
-the usual reasons (page is pin, page is lock, ...). This helper does not fail
-but just skip over those pages.
+the usual reasons (page is pinned, page is locked, ...). This helper does not
+fail but just skips over those pages.
 
-The alloc_and_copy() might as well decide to not migrate all pages in the
-range (for reasons under the callback control). For those the callback just
-have to leave the corresponding dst entry empty.
+The alloc_and_copy() might decide to not migrate all pages in the
+range (for reasons under the callback control). For those, the callback just
+has to leave the corresponding dst entry empty.
 
-Finaly the migration of the struct page might fails (for file back page) for
+Finally, the migration of the struct page might fail (for file backed page) for
 various reasons (failure to freeze reference, or update page cache, ...). If
-that happens then the finalize_and_map() can catch any pages that was not
-migrated. Note those page were still copied to new page and thus we wasted
+that happens, then the finalize_and_map() can catch any pages that were not
+migrated. Note those pages were still copied to a new page and thus we wasted
 bandwidth but this is considered as a rare event and a price that we are
 willing to pay to keep all the code simpler.
 
@@ -358,27 +370,27 @@ willing to pay to keep all the code simpler.
 7) Memory cgroup (memcg) and rss accounting
 
 For now device memory is accounted as any regular page in rss counters (either
-anonymous if device page is use for anonymous, file if device page is use for
-file back page or shmem if device page is use for share memory). This is a
-deliberate choice to keep existing application that might start using device
-memory without knowing about it to keep runing unimpacted.
-
-Drawbacks is that OOM killer might kill an application using a lot of device
-memory and not a lot of regular system memory and thus not freeing much system
-memory. We want to gather more real world experience on how application and
-system react under memory pressure in the presence of device memory before
+anonymous if device page is used for anonymous, file if device page is used for
+file backed page or shmem if device page is used for shared memory). This is a
+deliberate choice to keep existing applications, that might start using device
+memory without knowing about it, running unimpacted.
+
+A drawback is that the OOM killer might kill an application using a lot of
+device memory and not a lot of regular system memory and thus not freeing much
+system memory. We want to gather more real world experience on how applications
+and system react under memory pressure in the presence of device memory before
 deciding to account device memory differently.
 
 
-Same decision was made for memory cgroup. Device memory page are accounted
+Same decision was made for memory cgroup. Device memory pages are accounted
 against same memory cgroup a regular page would be accounted to. This does
 simplify migration to and from device memory. This also means that migration
-back from device memory to regular memory can not fail because it would
+back from device memory to regular memory cannot fail because it would
 go above memory cgroup limit. We might revisit this choice latter on once we
-get more experience in how device memory is use and its impact on memory
+get more experience in how device memory is used and its impact on memory
 resource control.
 
 
-Note that device memory can never be pin nor by device driver nor through GUP
+Note that device memory can never be pinned by device driver nor through GUP
 and thus such memory is always free upon process exit. Or when last reference
-is drop in case of share memory or file back memory.
+is dropped in case of shared memory or file backed memory.
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
index 0478ae2ad44a..496868072e24 100644
--- a/Documentation/vm/page_migration
+++ b/Documentation/vm/page_migration
@@ -90,7 +90,7 @@ Steps:
 
 1. Lock the page to be migrated
 
-2. Insure that writeback is complete.
+2. Ensure that writeback is complete.
 
 3. Lock the new page that we want to move to. It is locked so that accesses to
    this (not yet uptodate) page immediately lock while the move is in progress.
@@ -100,8 +100,8 @@ Steps:
    mapcount is not zero then we do not migrate the page. All user space
    processes that attempt to access the page will now wait on the page lock.
 
-5. The radix tree lock is taken. This will cause all processes trying
-   to access the page via the mapping to block on the radix tree spinlock.
+5. The i_pages lock is taken. This will cause all processes trying
+   to access the page via the mapping to block on the spinlock.
 
 6. The refcount of the page is examined and we back out if references remain
    otherwise we know that we are the only one referencing this page.
@@ -114,12 +114,12 @@ Steps:
 
 9. The radix tree is changed to point to the new page.
 
-10. The reference count of the old page is dropped because the radix tree
+10. The reference count of the old page is dropped because the address space
     reference is gone. A reference to the new page is established because
-    the new page is referenced to by the radix tree.
+    the new page is referenced by the address space.
 
-11. The radix tree lock is dropped. With that lookups in the mapping
-    become possible again. Processes will move from spinning on the tree_lock
+11. The i_pages lock is dropped. With that lookups in the mapping
+    become possible again. Processes will move from spinning on the lock
     to sleeping on the locked new page.
 
 12. The page contents are copied to the new page.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index ea91cb61a602..5432a96d31ff 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -20,7 +20,7 @@ ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
+ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
 [fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
 ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
diff --git a/MAINTAINERS b/MAINTAINERS
index 7e48624f4f9f..0a1410d5a621 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -934,8 +934,8 @@ F:	drivers/char/apm-emulation.c
 APPARMOR SECURITY MODULE
 M:	John Johansen <john.johansen@canonical.com>
 L:	apparmor@lists.ubuntu.com (subscribers-only, general discussion)
-W:	apparmor.wiki.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
+W:	wiki.apparmor.net
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jj/linux-apparmor
 S:	Supported
 F:	security/apparmor/
 F:	Documentation/admin-guide/LSM/apparmor.rst
@@ -1232,10 +1232,15 @@ F:	Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
 
 ARM/ASPEED MACHINE SUPPORT
 M:	Joel Stanley <joel@jms.id.au>
-S:	Maintained
+R:	Andrew Jeffery <andrew@aj.id.au>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-aspeed@lists.ozlabs.org (moderated for non-subscribers)
+Q:	https://patchwork.ozlabs.org/project/linux-aspeed/list/
+S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/joel/aspeed.git
 F:	arch/arm/mach-aspeed/
 F:	arch/arm/boot/dts/aspeed-*
-F:	drivers/*/*aspeed*
+N:	aspeed
 
 ARM/ATMEL AT91 Clock Support
 M:	Boris Brezillon <boris.brezillon@bootlin.com>
@@ -1743,7 +1748,7 @@ F:	arch/arm/mach-orion5x/ts78xx-*
 ARM/OXNAS platform support
 M:	Neil Armstrong <narmstrong@baylibre.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers)
+L:	linux-oxnas@groups.io (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-oxnas/
 F:	arch/arm/boot/dts/ox8*.dts*
@@ -2421,7 +2426,6 @@ T:	git git://github.com/ndyer/linux.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/input/atmel,maxtouch.txt
 F:	drivers/input/touchscreen/atmel_mxt_ts.c
-F:	include/linux/platform_data/atmel_mxt_ts.h
 
 ATMEL SAMA5D2 ADC DRIVER
 M:	Ludovic Desroches <ludovic.desroches@microchip.com>
@@ -4392,7 +4396,7 @@ S:	Maintained
 F:	drivers/staging/fsl-dpaa2/ethsw
 
 DPT_I2O SCSI RAID DRIVER
-M:	Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+M:	Adaptec OEM Raid Solutions <aacraid@microsemi.com>
 L:	linux-scsi@vger.kernel.org
 W:	http://www.adaptec.com/
 S:	Maintained
@@ -5838,7 +5842,7 @@ F:	scripts/Makefile.gcc-plugins
 F:	Documentation/gcc-plugins.txt
 
 GCOV BASED KERNEL PROFILING
-M:	Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+M:	Peter Oberparleiter <oberpar@linux.ibm.com>
 S:	Maintained
 F:	kernel/gcov/
 F:	Documentation/dev-tools/gcov.rst
@@ -5906,6 +5910,11 @@ S:	Supported
 F:	drivers/phy/
 F:	include/linux/phy/
 
+GENERIC PINCTRL I2C DEMULTIPLEXER DRIVER
+M:	Wolfram Sang <wsa+renesas@sang-engineering.com>
+S:	Supported
+F:	drivers/i2c/muxes/i2c-demux-pinctrl.c
+
 GENERIC PM DOMAINS
 M:	"Rafael J. Wysocki" <rjw@rjwysocki.net>
 M:	Kevin Hilman <khilman@kernel.org>
@@ -6410,6 +6419,7 @@ L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/hmm*
 F:	include/linux/hmm*
+F:	Documentation/vm/hmm.txt
 
 HOST AP DRIVER
 M:	Jouni Malinen <j@w1.fi>
@@ -6516,7 +6526,7 @@ S:	Maintained
 F:	Documentation/networking/netvsc.txt
 F:	arch/x86/include/asm/mshyperv.h
 F:	arch/x86/include/asm/trace/hyperv.h
-F:	arch/x86/include/uapi/asm/hyperv.h
+F:	arch/x86/include/asm/hyperv-tlfs.h
 F:	arch/x86/kernel/cpu/mshyperv.c
 F:	arch/x86/hyperv
 F:	drivers/hid/hid-hyperv.c
@@ -6559,7 +6569,7 @@ F:	drivers/i2c/muxes/
 F:	include/linux/i2c-mux.h
 
 I2C MV64XXX MARVELL AND ALLWINNER DRIVER
-M:	Gregory CLEMENT <gregory.clement@free-electrons.com>
+M:	Gregory CLEMENT <gregory.clement@bootlin.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-mv64xxx.c
@@ -6580,15 +6590,25 @@ W:	https://i2c.wiki.kernel.org/
 Q:	https://patchwork.ozlabs.org/project/linux-i2c/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
 S:	Maintained
-F:	Documentation/devicetree/bindings/i2c/
+F:	Documentation/devicetree/bindings/i2c/i2c.txt
 F:	Documentation/i2c/
-F:	drivers/i2c/
-F:	drivers/i2c/*/
+F:	drivers/i2c/*
 F:	include/linux/i2c.h
-F:	include/linux/i2c-*.h
+F:	include/linux/i2c-dev.h
+F:	include/linux/i2c-smbus.h
 F:	include/uapi/linux/i2c.h
 F:	include/uapi/linux/i2c-*.h
 
+I2C SUBSYSTEM HOST DRIVERS
+L:	linux-i2c@vger.kernel.org
+W:	https://i2c.wiki.kernel.org/
+Q:	https://patchwork.ozlabs.org/project/linux-i2c/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/wsa/linux.git
+S:	Odd Fixes
+F:	Documentation/devicetree/bindings/i2c/
+F:	drivers/i2c/algos/
+F:	drivers/i2c/busses/
+
 I2C-TAOS-EVM DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-i2c@vger.kernel.org
@@ -7344,7 +7364,7 @@ F:	include/linux/ipmi*
 F:	include/uapi/linux/ipmi*
 
 IPS SCSI RAID DRIVER
-M:	Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+M:	Adaptec OEM Raid Solutions <aacraid@microsemi.com>
 L:	linux-scsi@vger.kernel.org
 W:	http://www.adaptec.com/
 S:	Maintained
@@ -7762,7 +7782,7 @@ F:	arch/powerpc/kernel/kvm*
 
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M:	Christian Borntraeger <borntraeger@de.ibm.com>
-M:	Janosch Frank <frankja@linux.vnet.ibm.com>
+M:	Janosch Frank <frankja@linux.ibm.com>
 R:	David Hildenbrand <david@redhat.com>
 R:	Cornelia Huck <cohuck@redhat.com>
 L:	linux-s390@vger.kernel.org
@@ -7919,7 +7939,10 @@ F:	drivers/scsi/53c700*
 
 LEAKING_ADDRESSES
 M:	Tobin C. Harding <me@tobin.cc>
+M:	Tycho Andersen <tycho@tycho.ws>
+L:	kernel-hardening@lists.openwall.com
 S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tobin/leaks.git
 F:	scripts/leaking_addresses.pl
 
 LED SUBSYSTEM
@@ -8045,6 +8068,14 @@ Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/pmem*
 
+LIBNVDIMM: DEVICETREE BINDINGS
+M:	Oliver O'Halloran <oohall@gmail.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:	Supported
+F:	drivers/nvdimm/of_pmem.c
+F:	Documentation/devicetree/bindings/pmem/pmem-region.txt
+
 LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
 M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-nvdimm@lists.01.org
@@ -8848,6 +8879,15 @@ M:	Sean Wang <sean.wang@mediatek.com>
 S:	Maintained
 F:	drivers/media/rc/mtk-cir.c
 
+MEDIATEK DMA DRIVER
+M:	Sean Wang <sean.wang@mediatek.com>
+L:	dmaengine@vger.kernel.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/dma/mtk-*
+F:	drivers/dma/mediatek/
+
 MEDIATEK PMIC LED DRIVER
 M:	Sean Wang <sean.wang@mediatek.com>
 S:	Maintained
@@ -9219,6 +9259,15 @@ S:	Maintained
 F:	drivers/usb/misc/usb251xb.c
 F:	Documentation/devicetree/bindings/usb/usb251xb.txt
 
+MICROSEMI MIPS SOCS
+M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
+L:	linux-mips@linux-mips.org
+S:	Maintained
+F:	arch/mips/generic/board-ocelot.c
+F:	arch/mips/configs/generic/board-ocelot.config
+F:	arch/mips/boot/dts/mscc/
+F:	Documentation/devicetree/bindings/mips/mscc.txt
+
 MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
 M:	Don Brace <don.brace@microsemi.com>
 L:	esc.storagedev@microsemi.com
@@ -11733,7 +11782,7 @@ F:	drivers/char/random.c
 
 RAPIDIO SUBSYSTEM
 M:	Matt Porter <mporter@kernel.crashing.org>
-M:	Alexandre Bounine <alexandre.bounine@idt.com>
+M:	Alexandre Bounine <alex.bou9@gmail.com>
 S:	Maintained
 F:	drivers/rapidio/
 
@@ -11807,7 +11856,7 @@ X:	kernel/torture.c
 
 REAL TIME CLOCK (RTC) SUBSYSTEM
 M:	Alessandro Zummo <a.zummo@towertech.it>
-M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
+M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
 L:	linux-rtc@vger.kernel.org
 Q:	http://patchwork.ozlabs.org/project/rtc-linux/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
@@ -11871,6 +11920,11 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-drivers.git c
 S:	Supported
 F:	drivers/clk/renesas/
 
+RENESAS EMEV2 I2C DRIVER
+M:	Wolfram Sang <wsa+renesas@sang-engineering.com>
+S:	Supported
+F:	drivers/i2c/busses/i2c-emev2.c
+
 RENESAS ETHERNET DRIVERS
 R:	Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
 L:	netdev@vger.kernel.org
@@ -11886,6 +11940,12 @@ L:	linux-iio@vger.kernel.org
 S:	Supported
 F:	drivers/iio/adc/rcar_gyro_adc.c
 
+RENESAS R-CAR I2C DRIVERS
+M:	Wolfram Sang <wsa+renesas@sang-engineering.com>
+S:	Supported
+F:	drivers/i2c/busses/i2c-rcar.c
+F:	drivers/i2c/busses/i2c-sh_mobile.c
+
 RENESAS USB PHY DRIVER
 M:	Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
 L:	linux-renesas-soc@vger.kernel.org
@@ -12089,16 +12149,16 @@ F:	Documentation/s390/
 F:	Documentation/driver-api/s390-drivers.rst
 
 S390 COMMON I/O LAYER
-M:	Sebastian Ott <sebott@linux.vnet.ibm.com>
-M:	Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
+M:	Sebastian Ott <sebott@linux.ibm.com>
+M:	Peter Oberparleiter <oberpar@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 F:	drivers/s390/cio/
 
 S390 DASD DRIVER
-M:	Stefan Haberland <sth@linux.vnet.ibm.com>
-M:	Jan Hoeppner <hoeppner@linux.vnet.ibm.com>
+M:	Stefan Haberland <sth@linux.ibm.com>
+M:	Jan Hoeppner <hoeppner@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
@@ -12113,8 +12173,8 @@ S:	Supported
 F:	drivers/iommu/s390-iommu.c
 
 S390 IUCV NETWORK LAYER
-M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
-M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
+M:	Julian Wiedmann <jwi@linux.ibm.com>
+M:	Ursula Braun <ubraun@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
@@ -12123,15 +12183,15 @@ F:	include/net/iucv/
 F:	net/iucv/
 
 S390 NETWORK DRIVERS
-M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
-M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
+M:	Julian Wiedmann <jwi@linux.ibm.com>
+M:	Ursula Braun <ubraun@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
 F:	drivers/s390/net/
 
 S390 PCI SUBSYSTEM
-M:	Sebastian Ott <sebott@linux.vnet.ibm.com>
+M:	Sebastian Ott <sebott@linux.ibm.com>
 M:	Gerald Schaefer <gerald.schaefer@de.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
@@ -12141,8 +12201,8 @@ F:	drivers/pci/hotplug/s390_pci_hpc.c
 
 S390 VFIO-CCW DRIVER
 M:	Cornelia Huck <cohuck@redhat.com>
-M:	Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
-M:	Halil Pasic <pasic@linux.vnet.ibm.com>
+M:	Dong Jia Shi <bjsdjshi@linux.ibm.com>
+M:	Halil Pasic <pasic@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 L:	kvm@vger.kernel.org
 S:	Supported
@@ -12158,8 +12218,8 @@ S:	Supported
 F:	drivers/s390/crypto/
 
 S390 ZFCP DRIVER
-M:	Steffen Maier <maier@linux.vnet.ibm.com>
-M:	Benjamin Block <bblock@linux.vnet.ibm.com>
+M:	Steffen Maier <maier@linux.ibm.com>
+M:	Benjamin Block <bblock@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
@@ -12304,6 +12364,7 @@ M:	Tomasz Figa <tomasz.figa@gmail.com>
 M:	Chanwoo Choi <cw00.choi@samsung.com>
 S:	Supported
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git
 F:	drivers/clk/samsung/
 F:	include/dt-bindings/clock/exynos*.h
 F:	Documentation/devicetree/bindings/clock/exynos*.txt
@@ -12595,7 +12656,7 @@ S:	Maintained
 F:	drivers/misc/sgi-xp/
 
 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
-M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
+M:	Ursula Braun <ubraun@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
@@ -12899,6 +12960,13 @@ F:	include/media/soc*
 F:	drivers/media/i2c/soc_camera/
 F:	drivers/media/platform/soc_camera/
 
+SOCIONEXT SYNQUACER I2C DRIVER
+M:	Ard Biesheuvel <ard.biesheuvel@linaro.org>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	drivers/i2c/busses/i2c-synquacer.c
+F:	Documentation/devicetree/bindings/i2c/i2c-synquacer.txt
+
 SOCIONEXT UNIPHIER SOUND DRIVER
 M:	Katsuhiro Suzuki <suzuki.katsuhiro@socionext.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -13455,6 +13523,12 @@ S:	Maintained
 F:	drivers/gpio/gpio-dwapb.c
 F:	Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
 
+SYNOPSYS DESIGNWARE AXI DMAC DRIVER
+M:	Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+S:	Maintained
+F:	drivers/dma/dwi-axi-dmac/
+F:	Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
+
 SYNOPSYS DESIGNWARE DMAC DRIVER
 M:	Viresh Kumar <vireshk@kernel.org>
 R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
@@ -13927,6 +14001,13 @@ F:	arch/arm/mach-davinci/
 F:	drivers/i2c/busses/i2c-davinci.c
 F:	arch/arm/boot/dts/da850*
 
+TI DAVINCI SERIES CLOCK DRIVER
+M:	David Lechner <david@lechnology.com>
+R:	Sekhar Nori <nsekhar@ti.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/clock/ti/davinci/
+F:	drivers/clk/davinci/
+
 TI DAVINCI SERIES GPIO DRIVER
 M:	Keerthy <j-keerthy@ti.com>
 L:	linux-gpio@vger.kernel.org
@@ -14917,7 +14998,7 @@ F:	include/uapi/linux/virtio_crypto.h
 
 VIRTIO DRIVERS FOR S390
 M:	Cornelia Huck <cohuck@redhat.com>
-M:	Halil Pasic <pasic@linux.vnet.ibm.com>
+M:	Halil Pasic <pasic@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 L:	virtualization@lists.linux-foundation.org
 L:	kvm@vger.kernel.org
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index e96adcbcab41..b2022885ced8 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -18,6 +18,7 @@ config ALPHA
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select AUDIT_ARCH
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index d123ff90f7a8..4c533fc94d62 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -341,14 +341,14 @@ extern inline unsigned int ioread16(void __iomem *addr)
 
 extern inline void iowrite8(u8 b, void __iomem *addr)
 {
-	IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr);
 	mb();
+	IO_CONCAT(__IO_PREFIX, iowrite8)(b, addr);
 }
 
 extern inline void iowrite16(u16 b, void __iomem *addr)
 {
-	IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr);
 	mb();
+	IO_CONCAT(__IO_PREFIX, iowrite16)(b, addr);
 }
 
 extern inline u8 inb(unsigned long port)
@@ -382,8 +382,8 @@ extern inline unsigned int ioread32(void __iomem *addr)
 
 extern inline void iowrite32(u32 b, void __iomem *addr)
 {
-	IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr);
 	mb();
+	IO_CONCAT(__IO_PREFIX, iowrite32)(b, addr);
 }
 
 extern inline u32 inl(unsigned long port)
@@ -434,14 +434,14 @@ extern inline u16 readw(const volatile void __iomem *addr)
 
 extern inline void writeb(u8 b, volatile void __iomem *addr)
 {
-	__raw_writeb(b, addr);
 	mb();
+	__raw_writeb(b, addr);
 }
 
 extern inline void writew(u16 b, volatile void __iomem *addr)
 {
-	__raw_writew(b, addr);
 	mb();
+	__raw_writew(b, addr);
 }
 #endif
 
@@ -482,14 +482,14 @@ extern inline u64 readq(const volatile void __iomem *addr)
 
 extern inline void writel(u32 b, volatile void __iomem *addr)
 {
-	__raw_writel(b, addr);
 	mb();
+	__raw_writel(b, addr);
 }
 
 extern inline void writeq(u64 b, volatile void __iomem *addr)
 {
-	__raw_writeq(b, addr);
 	mb();
+	__raw_writeq(b, addr);
 }
 #endif
 
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 2dbdf59258d9..f9d4e6b6d4bd 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -32,6 +32,7 @@
 #define MAP_NONBLOCK	0x40000		/* do not block on IO */
 #define MAP_STACK	0x80000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x100000	/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE	0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_SYNC		2		/* synchronous memory sync */
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index bf7b41fa7b01..5a74581bf0ee 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -9,7 +9,7 @@ ccflags-y	:= -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o osf_sys.o irq.o \
 	    irq_alpha.o signal.o setup.o ptrace.o time.o \
-	    systbls.o err_common.o io.o
+	    systbls.o err_common.o io.o bugs.o
 
 obj-$(CONFIG_VGA_HOSE)	+= console.o
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/alpha/kernel/bugs.c b/arch/alpha/kernel/bugs.c
new file mode 100644
index 000000000000..08cc10d7fa17
--- /dev/null
+++ b/arch/alpha/kernel/bugs.c
@@ -0,0 +1,45 @@
+
+#include <asm/hwrpb.h>
+#include <linux/device.h>
+
+
+#ifdef CONFIG_SYSFS
+
+static int cpu_is_ev6_or_later(void)
+{
+	struct percpu_struct *cpu;
+        unsigned long cputype;
+
+        cpu = (struct percpu_struct *)((char *)hwrpb + hwrpb->processor_offset);
+        cputype = cpu->type & 0xffffffff;
+        /* Include all of EV6, EV67, EV68, EV7, EV79 and EV69. */
+        return (cputype == EV6_CPU) || ((cputype >= EV67_CPU) && (cputype <= EV69_CPU));
+}
+
+ssize_t cpu_show_meltdown(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	if (cpu_is_ev6_or_later())
+		return sprintf(buf, "Vulnerable\n");
+	else
+		return sprintf(buf, "Not affected\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+	if (cpu_is_ev6_or_later())
+		return sprintf(buf, "Vulnerable\n");
+	else
+		return sprintf(buf, "Not affected\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (cpu_is_ev6_or_later())
+		return sprintf(buf, "Vulnerable\n");
+	else
+		return sprintf(buf, "Not affected\n");
+}
+#endif
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index d92abb01c249..c64806a2daf5 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -785,7 +785,6 @@ ret_from_kernel_thread:
 	mov	$9, $27
 	mov	$10, $16
 	jsr	$26, ($9)
-	mov	$31, $19		/* to disable syscall restarts */
 	br	$31, ret_to_user
 .end ret_from_kernel_thread
 
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index b995987b1557..b6ebb65127a8 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/dma-mapping.h>
 #include <linux/scatterlist.h>
+#include <linux/syscalls.h>
 
 #include "proto.h"
 
@@ -46,8 +47,8 @@ alloc_resource(void)
 	return alloc_bootmem(sizeof(struct resource));
 }
 
-asmlinkage long
-sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
+		unsigned long, dfn)
 {
 	struct pci_controller *hose;
 
@@ -84,9 +85,8 @@ sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
 	return -EOPNOTSUPP;
 }
 
-asmlinkage long
-sys_pciconfig_read(unsigned long bus, unsigned long dfn,
-		   unsigned long off, unsigned long len, void *buf)
+SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
+		unsigned long, off, unsigned long, len, void __user *, buf)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -94,9 +94,8 @@ sys_pciconfig_read(unsigned long bus, unsigned long dfn,
 		return -ENODEV;
 }
 
-asmlinkage long
-sys_pciconfig_write(unsigned long bus, unsigned long dfn,
-		    unsigned long off, unsigned long len, void *buf)
+SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
+		unsigned long, off, unsigned long, len, void __user *, buf)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 2e86ebb680ae..c668c3b7a167 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
+#include <linux/syscalls.h>
 #include <asm/machvec.h>
 
 #include "proto.h"
@@ -409,8 +410,8 @@ alloc_resource(void)
 /* Provide information on locations of various I/O regions in physical
    memory.  Do this on a per-card basis so that we choose the right hose.  */
 
-asmlinkage long
-sys_pciconfig_iobase(long which, unsigned long bus, unsigned long dfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
+		unsigned long, dfn)
 {
 	struct pci_controller *hose;
 	struct pci_dev *dev;
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index b3da0dcda47d..1376a2867048 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -97,7 +97,7 @@ alpha_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_year = year;
 	}
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int
@@ -115,83 +115,6 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
 }
 
 static int
-alpha_rtc_set_mmss(struct device *dev, time64_t nowtime)
-{
-	int retval = 0;
-	int real_seconds, real_minutes, cmos_minutes;
-	unsigned char save_control, save_freq_select;
-
-	/* Note: This code only updates minutes and seconds.  Comments
-	   indicate this was to avoid messing with unknown time zones,
-	   and with the epoch nonsense described above.  In order for
-	   this to work, the existing clock cannot be off by more than
-	   15 minutes.
-
-	   ??? This choice is may be out of date.  The x86 port does
-	   not have problems with timezones, and the epoch processing has
-	   now been fixed in alpha_set_rtc_time.
-
-	   In either case, one can always force a full rtc update with
-	   the userland hwclock program, so surely 15 minute accuracy
-	   is no real burden.  */
-
-	/* In order to set the CMOS clock precisely, we have to be called
-	   500 ms after the second nowtime has started, because when
-	   nowtime is written into the registers of the CMOS clock, it will
-	   jump to the next second precisely 500 ms later. Check the Motorola
-	   MC146818A or Dallas DS12887 data sheet for details.  */
-
-	/* irq are locally disabled here */
-	spin_lock(&rtc_lock);
-	/* Tell the clock it's being set */
-	save_control = CMOS_READ(RTC_CONTROL);
-	CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
-
-	/* Stop and reset prescaler */
-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-	CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
-
-	cmos_minutes = CMOS_READ(RTC_MINUTES);
-	if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
-		cmos_minutes = bcd2bin(cmos_minutes);
-
-	real_seconds = nowtime % 60;
-	real_minutes = nowtime / 60;
-	if (((abs(real_minutes - cmos_minutes) + 15) / 30) & 1) {
-		/* correct for half hour time zone */
-		real_minutes += 30;
-	}
-	real_minutes %= 60;
-
-	if (abs(real_minutes - cmos_minutes) < 30) {
-		if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
-			real_seconds = bin2bcd(real_seconds);
-			real_minutes = bin2bcd(real_minutes);
-		}
-		CMOS_WRITE(real_seconds,RTC_SECONDS);
-		CMOS_WRITE(real_minutes,RTC_MINUTES);
-	} else {
-		printk_once(KERN_NOTICE
-			    "set_rtc_mmss: can't update from %d to %d\n",
-			    cmos_minutes, real_minutes);
-		retval = -1;
-	}
-
-	/* The following flags have to be released exactly in this order,
-	 * otherwise the DS12887 (popular MC146818A clone with integrated
-	 * battery and quartz) will not reset the oscillator and will not
-	 * update precisely 500 ms later. You won't find this mentioned in
-	 * the Dallas Semiconductor data sheets, but who believes data
-	 * sheets anyway ...                           -- Markus Kuhn
-	 */
-	CMOS_WRITE(save_control, RTC_CONTROL);
-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
-	spin_unlock(&rtc_lock);
-
-	return retval;
-}
-
-static int
 alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -210,7 +133,6 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 static const struct rtc_class_ops alpha_rtc_ops = {
 	.read_time = alpha_rtc_read_time,
 	.set_time = alpha_rtc_set_time,
-	.set_mmss64 = alpha_rtc_set_mmss,
 	.ioctl = alpha_rtc_ioctl,
 };
 
@@ -225,7 +147,6 @@ static const struct rtc_class_ops alpha_rtc_ops = {
 
 union remote_data {
 	struct rtc_time *tm;
-	unsigned long now;
 	long retval;
 };
 
@@ -267,29 +188,9 @@ remote_set_time(struct device *dev, struct rtc_time *tm)
 	return alpha_rtc_set_time(NULL, tm);
 }
 
-static void
-do_remote_mmss(void *data)
-{
-	union remote_data *x = data;
-	x->retval = alpha_rtc_set_mmss(NULL, x->now);
-}
-
-static int
-remote_set_mmss(struct device *dev, time64_t now)
-{
-	union remote_data x;
-	if (smp_processor_id() != boot_cpuid) {
-		x.now = now;
-		smp_call_function_single(boot_cpuid, do_remote_mmss, &x, 1);
-		return x.retval;
-	}
-	return alpha_rtc_set_mmss(NULL, now);
-}
-
 static const struct rtc_class_ops remote_rtc_ops = {
 	.read_time = remote_read_time,
 	.set_time = remote_set_time,
-	.set_mmss64 = remote_set_mmss,
 	.ioctl = alpha_rtc_ioctl,
 };
 #endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1878083771af..a7f8e7f4b88f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -7,6 +7,7 @@ config ARM
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
+	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_PHYS_TO_DMA
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index a2ac3fe7dbf8..c16c1829a5e4 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -6,10 +6,7 @@
 #include <linux/stddef.h>	/* for NULL */
 #include <linux/linkage.h>
 #include <asm/string.h>
-
-extern unsigned long free_mem_ptr;
-extern unsigned long free_mem_end_ptr;
-extern void error(char *);
+#include "misc.h"
 
 #define STATIC static
 #define STATIC_RW_DATA	/* non-static please */
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c
index e8fe51f4e97a..e1e9a5dde853 100644
--- a/arch/arm/boot/compressed/misc.c
+++ b/arch/arm/boot/compressed/misc.c
@@ -22,9 +22,9 @@ unsigned int __machine_arch_type;
 #include <linux/compiler.h>	/* for inline */
 #include <linux/types.h>
 #include <linux/linkage.h>
+#include "misc.h"
 
 static void putstr(const char *ptr);
-extern void error(char *x);
 
 #include CONFIG_UNCOMPRESS_INCLUDE
 
@@ -160,3 +160,8 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p,
 	else
 		putstr(" done, booting the kernel.\n");
 }
+
+void fortify_panic(const char *name)
+{
+	error("detected buffer overflow");
+}
diff --git a/arch/arm/boot/compressed/misc.h b/arch/arm/boot/compressed/misc.h
new file mode 100644
index 000000000000..c958dccd1d97
--- /dev/null
+++ b/arch/arm/boot/compressed/misc.h
@@ -0,0 +1,10 @@
+#ifndef MISC_H
+#define MISC_H
+
+#include <linux/compiler.h>
+
+void error(char *x) __noreturn;
+extern unsigned long free_mem_ptr;
+extern unsigned long free_mem_end_ptr;
+
+#endif
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index fbd2897566c3..c55d479971cc 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -587,7 +587,8 @@
 			device_type = "mdio";
 			#address-cells = <1>;
 			#size-cells = <0>;
-			reg = <0x0 0x2d24000 0x0 0x4000>;
+			reg = <0x0 0x2d24000 0x0 0x4000>,
+			      <0x0 0x2d10030 0x0 0x4>;
 		};
 
 		ptp_clock@2d10e00 {
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 74504b154256..869080bedb89 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -318,10 +318,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 extern void flush_kernel_dcache_page(struct page *);
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_user_range(vma,page,addr,len) \
 	flush_dcache_page(page)
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 36dd2962a42d..5a953ecb0d78 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -70,7 +70,10 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
 
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+/* no VHE on 32-bit :( */
+static inline int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { BUG(); return 0; }
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
 
 extern void __init_stage2_translation(void);
 
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 9003bd19cb70..6493bd479ddc 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -41,7 +41,17 @@ static inline unsigned long *vcpu_reg32(struct kvm_vcpu *vcpu, u8 reg_num)
 	return vcpu_reg(vcpu, reg_num);
 }
 
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu);
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu);
+
+static inline unsigned long vpcu_read_spsr(struct kvm_vcpu *vcpu)
+{
+	return *__vcpu_spsr(vcpu);
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+	*__vcpu_spsr(vcpu) = v;
+}
 
 static inline unsigned long vcpu_get_reg(struct kvm_vcpu *vcpu,
 					 u8 reg_num)
@@ -92,14 +102,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 	vcpu->arch.hcr = HCR_GUEST_MASK;
 }
 
-static inline unsigned long vcpu_get_hcr(const struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.hcr;
-}
-
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr = hcr;
+	return (unsigned long *)&vcpu->arch.hcr;
 }
 
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 248b930563e5..c6a749568dd6 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -155,9 +155,6 @@ struct kvm_vcpu_arch {
 	/* HYP trapping configuration */
 	u32 hcr;
 
-	/* Interrupt related fields */
-	u32 irq_lines;		/* IRQ and FIQ levels */
-
 	/* Exception Information */
 	struct kvm_vcpu_fault_info fault;
 
@@ -315,4 +312,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 	return false;
 }
 
+static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index 1ab8329e9ff7..e93a0cac9add 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -110,6 +110,10 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
 
 asmlinkage void __vfp_save_state(struct vfp_hard_struct *vfp);
 asmlinkage void __vfp_restore_state(struct vfp_hard_struct *vfp);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index de1b919404e4..707a1f06dc5d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -28,6 +28,13 @@
  */
 #define kern_hyp_va(kva)	(kva)
 
+/* Contrary to arm64, there is no need to generate a PC-relative address */
+#define hyp_symbol_addr(s)						\
+	({								\
+		typeof(s) *addr = &(s);					\
+		addr;							\
+	})
+
 /*
  * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels.
  */
@@ -42,8 +49,15 @@
 #include <asm/pgalloc.h>
 #include <asm/stage2_pgtable.h>
 
+/* Ensure compatibility with arm64 */
+#define VA_BITS			32
+
 int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+			   void __iomem **kaddr,
+			   void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+			     void **haddr);
 void free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 496667703693..ed8fd0d19a3e 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -22,12 +22,6 @@
 #include <mach/memory.h>
 #endif
 
-/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
 /* PAGE_OFFSET - the virtual address of the start of the kernel image */
 #define PAGE_OFFSET		UL(CONFIG_PAGE_OFFSET)
 
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 6edd177bb1c7..2ba95d6fe852 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -135,6 +135,15 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_CRM_SHIFT		7
 #define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT	11
+/*
+ * For KVM currently all guest registers are nonsecure, but we reserve a bit
+ * in the encoding to distinguish secure from nonsecure for AArch32 system
+ * registers that are banked by security. This is 1 for the secure banked
+ * register, and 0 for the nonsecure banked register or if the register is
+ * not banked by security.
+ */
+#define KVM_REG_ARM_SECURE_MASK	0x0000000010000000
+#define KVM_REG_ARM_SECURE_SHIFT	28
 
 #define ARM_CP15_REG_SHIFT_MASK(x,n) \
 	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 12b87591eb7c..d32f5d35f602 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -15,38 +15,7 @@
 #include <asm/memory.h>
 #include <asm/page.h>
 
-#define PROC_INFO							\
-	. = ALIGN(4);							\
-	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
-	*(.proc.info.init)						\
-	VMLINUX_SYMBOL(__proc_info_end) = .;
-
-#define IDMAP_TEXT							\
-	ALIGN_FUNCTION();						\
-	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
-	*(.idmap.text)							\
-	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
-	. = ALIGN(PAGE_SIZE);						\
-	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
-	*(.hyp.idmap.text)						\
-	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define ARM_CPU_DISCARD(x)
-#define ARM_CPU_KEEP(x)		x
-#else
-#define ARM_CPU_DISCARD(x)	x
-#define ARM_CPU_KEEP(x)
-#endif
-
-#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
-	defined(CONFIG_GENERIC_BUG)
-#define ARM_EXIT_KEEP(x)	x
-#define ARM_EXIT_DISCARD(x)
-#else
-#define ARM_EXIT_KEEP(x)
-#define ARM_EXIT_DISCARD(x)	x
-#endif
+#include "vmlinux.lds.h"
 
 OUTPUT_ARCH(arm)
 ENTRY(stext)
@@ -69,20 +38,9 @@ SECTIONS
 	 * unwind sections get included.
 	 */
 	/DISCARD/ : {
-		*(.ARM.exidx.exit.text)
-		*(.ARM.extab.exit.text)
-		ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))
-		ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))
-		ARM_EXIT_DISCARD(EXIT_TEXT)
-		ARM_EXIT_DISCARD(EXIT_DATA)
-		EXIT_CALL
-#ifndef CONFIG_MMU
-		*(.text.fixup)
-		*(__ex_table)
-#endif
+		ARM_DISCARD
 		*(.alt.smp.init)
-		*(.discard)
-		*(.discard.*)
+		*(.pv_table)
 	}
 
 	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
@@ -95,22 +53,7 @@ SECTIONS
 
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
-			IDMAP_TEXT
-			__entry_text_start = .;
-			*(.entry.text)
-			__entry_text_end = .;
-			IRQENTRY_TEXT
-			TEXT_TEXT
-			SCHED_TEXT
-			CPUIDLE_TEXT
-			LOCK_TEXT
-			KPROBES_TEXT
-			*(.gnu.warning)
-			*(.glue_7)
-			*(.glue_7t)
-		. = ALIGN(4);
-		*(.got)			/* Global offset table		*/
-			ARM_CPU_KEEP(PROC_INFO)
+		ARM_TEXT
 	}
 
 	RO_DATA(PAGE_SIZE)
@@ -118,53 +61,19 @@ SECTIONS
 	. = ALIGN(4);
 	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
 		__start___ex_table = .;
-#ifdef CONFIG_MMU
-		*(__ex_table)
-#endif
+		ARM_MMU_KEEP(*(__ex_table))
 		__stop___ex_table = .;
 	}
 
 #ifdef CONFIG_ARM_UNWIND
-	/*
-	 * Stack unwinding tables
-	 */
-	. = ALIGN(8);
-	.ARM.unwind_idx : {
-		__start_unwind_idx = .;
-		*(.ARM.exidx*)
-		__stop_unwind_idx = .;
-	}
-	.ARM.unwind_tab : {
-		__start_unwind_tab = .;
-		*(.ARM.extab*)
-		__stop_unwind_tab = .;
-	}
+	ARM_UNWIND_SECTIONS
 #endif
 
 	NOTES
 
 	_etext = .;			/* End of text and rodata section */
 
-	/*
-	 * The vectors and stubs are relocatable code, and the
-	 * only thing that matters is their relative offsets
-	 */
-	__vectors_start = .;
-	.vectors 0xffff0000 : AT(__vectors_start) {
-		*(.vectors)
-	}
-	. = __vectors_start + SIZEOF(.vectors);
-	__vectors_end = .;
-
-	__stubs_start = .;
-	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {
-		*(.stubs)
-	}
-	. = __stubs_start + SIZEOF(.stubs);
-	__stubs_end = .;
-
-	PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
-
+	ARM_VECTORS
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
 		ARM_EXIT_KEEP(EXIT_TEXT)
@@ -223,6 +132,10 @@ SECTIONS
 	PERCPU_SECTION(L1_CACHE_BYTES)
 #endif
 
+#ifdef CONFIG_HAVE_TCM
+	ARM_TCM
+#endif
+
 	/*
 	 * End of copied data. We need a dummy section to get its LMA.
 	 * Also located before final ALIGN() as trailing padding is not stored
@@ -234,63 +147,6 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
 
-#ifdef CONFIG_HAVE_TCM
-        /*
-	 * We align everything to a page boundary so we can
-	 * free it after init has commenced and TCM contents have
-	 * been copied to its destination.
-	 */
-	.tcm_start : {
-		. = ALIGN(PAGE_SIZE);
-		__tcm_start = .;
-		__itcm_start = .;
-	}
-
-	/*
-	 * Link these to the ITCM RAM
-	 * Put VMA to the TCM address and LMA to the common RAM
-	 * and we'll upload the contents from RAM to TCM and free
-	 * the used RAM after that.
-	 */
-	.text_itcm ITCM_OFFSET : AT(__itcm_start)
-	{
-		__sitcm_text = .;
-		*(.tcm.text)
-		*(.tcm.rodata)
-		. = ALIGN(4);
-		__eitcm_text = .;
-	}
-
-	/*
-	 * Reset the dot pointer, this is needed to create the
-	 * relative __dtcm_start below (to be used as extern in code).
-	 */
-	. = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
-
-	.dtcm_start : {
-		__dtcm_start = .;
-	}
-
-	/* TODO: add remainder of ITCM as well, that can be used for data! */
-	.data_dtcm DTCM_OFFSET : AT(__dtcm_start)
-	{
-		. = ALIGN(4);
-		__sdtcm_data = .;
-		*(.tcm.data)
-		. = ALIGN(4);
-		__edtcm_data = .;
-	}
-
-	/* Reset the dot pointer or the linker gets confused */
-	. = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
-
-	/* End marker for freeing TCM copy in linked object */
-	.tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
-		. = ALIGN(PAGE_SIZE);
-		__tcm_end = .;
-	}
-#endif
-
 	BSS_SECTION(0, 0, 8)
 	_end = .;
 
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 84a1ae3ce46e..b77dc675ae55 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -15,43 +15,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
-#define PROC_INFO							\
-	. = ALIGN(4);							\
-	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
-	*(.proc.info.init)						\
-	VMLINUX_SYMBOL(__proc_info_end) = .;
-
-#define HYPERVISOR_TEXT							\
-	VMLINUX_SYMBOL(__hyp_text_start) = .;				\
-	*(.hyp.text)							\
-	VMLINUX_SYMBOL(__hyp_text_end) = .;
-
-#define IDMAP_TEXT							\
-	ALIGN_FUNCTION();						\
-	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
-	*(.idmap.text)							\
-	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
-	. = ALIGN(PAGE_SIZE);						\
-	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
-	*(.hyp.idmap.text)						\
-	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define ARM_CPU_DISCARD(x)
-#define ARM_CPU_KEEP(x)		x
-#else
-#define ARM_CPU_DISCARD(x)	x
-#define ARM_CPU_KEEP(x)
-#endif
-
-#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
-	defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL)
-#define ARM_EXIT_KEEP(x)	x
-#define ARM_EXIT_DISCARD(x)
-#else
-#define ARM_EXIT_KEEP(x)
-#define ARM_EXIT_DISCARD(x)	x
-#endif
+#include "vmlinux.lds.h"
 
 OUTPUT_ARCH(arm)
 ENTRY(stext)
@@ -74,22 +38,10 @@ SECTIONS
 	 * unwind sections get included.
 	 */
 	/DISCARD/ : {
-		*(.ARM.exidx.exit.text)
-		*(.ARM.extab.exit.text)
-		ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))
-		ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))
-		ARM_EXIT_DISCARD(EXIT_TEXT)
-		ARM_EXIT_DISCARD(EXIT_DATA)
-		EXIT_CALL
-#ifndef CONFIG_MMU
-		*(.text.fixup)
-		*(__ex_table)
-#endif
+		ARM_DISCARD
 #ifndef CONFIG_SMP_ON_UP
 		*(.alt.smp.init)
 #endif
-		*(.discard)
-		*(.discard.*)
 	}
 
 	. = PAGE_OFFSET + TEXT_OFFSET;
@@ -104,24 +56,7 @@ SECTIONS
 
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
-			IDMAP_TEXT
-			__entry_text_start = .;
-			*(.entry.text)
-			__entry_text_end = .;
-			IRQENTRY_TEXT
-			SOFTIRQENTRY_TEXT
-			TEXT_TEXT
-			SCHED_TEXT
-			CPUIDLE_TEXT
-			LOCK_TEXT
-			HYPERVISOR_TEXT
-			KPROBES_TEXT
-			*(.gnu.warning)
-			*(.glue_7)
-			*(.glue_7t)
-		. = ALIGN(4);
-		*(.got)			/* Global offset table		*/
-			ARM_CPU_KEEP(PROC_INFO)
+		ARM_TEXT
 	}
 
 #ifdef CONFIG_DEBUG_ALIGN_RODATA
@@ -134,27 +69,12 @@ SECTIONS
 	. = ALIGN(4);
 	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
 		__start___ex_table = .;
-#ifdef CONFIG_MMU
-		*(__ex_table)
-#endif
+		ARM_MMU_KEEP(*(__ex_table))
 		__stop___ex_table = .;
 	}
 
 #ifdef CONFIG_ARM_UNWIND
-	/*
-	 * Stack unwinding tables
-	 */
-	. = ALIGN(8);
-	.ARM.unwind_idx : {
-		__start_unwind_idx = .;
-		*(.ARM.exidx*)
-		__stop_unwind_idx = .;
-	}
-	.ARM.unwind_tab : {
-		__start_unwind_tab = .;
-		*(.ARM.extab*)
-		__stop_unwind_tab = .;
-	}
+	ARM_UNWIND_SECTIONS
 #endif
 
 	NOTES
@@ -166,26 +86,7 @@ SECTIONS
 #endif
 	__init_begin = .;
 
-	/*
-	 * The vectors and stubs are relocatable code, and the
-	 * only thing that matters is their relative offsets
-	 */
-	__vectors_start = .;
-	.vectors 0xffff0000 : AT(__vectors_start) {
-		*(.vectors)
-	}
-	. = __vectors_start + SIZEOF(.vectors);
-	__vectors_end = .;
-
-	__stubs_start = .;
-	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {
-		*(.stubs)
-	}
-	. = __stubs_start + SIZEOF(.stubs);
-	__stubs_end = .;
-
-	PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
-
+	ARM_VECTORS
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
 		ARM_EXIT_KEEP(EXIT_TEXT)
@@ -226,6 +127,10 @@ SECTIONS
 	PERCPU_SECTION(L1_CACHE_BYTES)
 #endif
 
+#ifdef CONFIG_HAVE_TCM
+	ARM_TCM
+#endif
+
 #ifdef CONFIG_STRICT_KERNEL_RWX
 	. = ALIGN(1<<SECTION_SHIFT);
 #else
@@ -237,63 +142,6 @@ SECTIONS
 	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
 	_edata = .;
 
-#ifdef CONFIG_HAVE_TCM
-        /*
-	 * We align everything to a page boundary so we can
-	 * free it after init has commenced and TCM contents have
-	 * been copied to its destination.
-	 */
-	.tcm_start : {
-		. = ALIGN(PAGE_SIZE);
-		__tcm_start = .;
-		__itcm_start = .;
-	}
-
-	/*
-	 * Link these to the ITCM RAM
-	 * Put VMA to the TCM address and LMA to the common RAM
-	 * and we'll upload the contents from RAM to TCM and free
-	 * the used RAM after that.
-	 */
-	.text_itcm ITCM_OFFSET : AT(__itcm_start)
-	{
-		__sitcm_text = .;
-		*(.tcm.text)
-		*(.tcm.rodata)
-		. = ALIGN(4);
-		__eitcm_text = .;
-	}
-
-	/*
-	 * Reset the dot pointer, this is needed to create the
-	 * relative __dtcm_start below (to be used as extern in code).
-	 */
-	. = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
-
-	.dtcm_start : {
-		__dtcm_start = .;
-	}
-
-	/* TODO: add remainder of ITCM as well, that can be used for data! */
-	.data_dtcm DTCM_OFFSET : AT(__dtcm_start)
-	{
-		. = ALIGN(4);
-		__sdtcm_data = .;
-		*(.tcm.data)
-		. = ALIGN(4);
-		__edtcm_data = .;
-	}
-
-	/* Reset the dot pointer or the linker gets confused */
-	. = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
-
-	/* End marker for freeing TCM copy in linked object */
-	.tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
-		. = ALIGN(PAGE_SIZE);
-		__tcm_end = .;
-	}
-#endif
-
 	BSS_SECTION(0, 0, 0)
 	_end = .;
 
diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
new file mode 100644
index 000000000000..71281e08e1d4
--- /dev/null
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define ARM_CPU_DISCARD(x)
+#define ARM_CPU_KEEP(x)		x
+#else
+#define ARM_CPU_DISCARD(x)	x
+#define ARM_CPU_KEEP(x)
+#endif
+
+#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
+	defined(CONFIG_GENERIC_BUG) || defined(CONFIG_JUMP_LABEL)
+#define ARM_EXIT_KEEP(x)	x
+#define ARM_EXIT_DISCARD(x)
+#else
+#define ARM_EXIT_KEEP(x)
+#define ARM_EXIT_DISCARD(x)	x
+#endif
+
+#ifdef CONFIG_MMU
+#define ARM_MMU_KEEP(x)		x
+#define ARM_MMU_DISCARD(x)
+#else
+#define ARM_MMU_KEEP(x)
+#define ARM_MMU_DISCARD(x)	x
+#endif
+
+#define PROC_INFO							\
+		. = ALIGN(4);						\
+		VMLINUX_SYMBOL(__proc_info_begin) = .;			\
+		*(.proc.info.init)					\
+		VMLINUX_SYMBOL(__proc_info_end) = .;
+
+#define HYPERVISOR_TEXT							\
+		VMLINUX_SYMBOL(__hyp_text_start) = .;			\
+		*(.hyp.text)						\
+		VMLINUX_SYMBOL(__hyp_text_end) = .;
+
+#define IDMAP_TEXT							\
+		ALIGN_FUNCTION();					\
+		VMLINUX_SYMBOL(__idmap_text_start) = .;			\
+		*(.idmap.text)						\
+		VMLINUX_SYMBOL(__idmap_text_end) = .;			\
+		. = ALIGN(PAGE_SIZE);					\
+		VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;		\
+		*(.hyp.idmap.text)					\
+		VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
+
+#define ARM_DISCARD							\
+		*(.ARM.exidx.exit.text)					\
+		*(.ARM.extab.exit.text)					\
+		ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))		\
+		ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))		\
+		ARM_EXIT_DISCARD(EXIT_TEXT)				\
+		ARM_EXIT_DISCARD(EXIT_DATA)				\
+		EXIT_CALL						\
+		ARM_MMU_DISCARD(*(.text.fixup))				\
+		ARM_MMU_DISCARD(*(__ex_table))				\
+		*(.discard)						\
+		*(.discard.*)
+
+#define ARM_TEXT							\
+		IDMAP_TEXT						\
+		__entry_text_start = .;					\
+		*(.entry.text)						\
+		__entry_text_end = .;					\
+		IRQENTRY_TEXT						\
+		SOFTIRQENTRY_TEXT					\
+		TEXT_TEXT						\
+		SCHED_TEXT						\
+		CPUIDLE_TEXT						\
+		LOCK_TEXT						\
+		HYPERVISOR_TEXT						\
+		KPROBES_TEXT						\
+		*(.gnu.warning)						\
+		*(.glue_7)						\
+		*(.glue_7t)						\
+		. = ALIGN(4);						\
+		*(.got)			/* Global offset table */	\
+		ARM_CPU_KEEP(PROC_INFO)
+
+/* Stack unwinding tables */
+#define ARM_UNWIND_SECTIONS						\
+	. = ALIGN(8);							\
+	.ARM.unwind_idx : {						\
+		__start_unwind_idx = .;					\
+		*(.ARM.exidx*)						\
+		__stop_unwind_idx = .;					\
+	}								\
+	.ARM.unwind_tab : {						\
+		__start_unwind_tab = .;					\
+		*(.ARM.extab*)						\
+		__stop_unwind_tab = .;					\
+	}
+
+/*
+ * The vectors and stubs are relocatable code, and the
+ * only thing that matters is their relative offsets
+ */
+#define ARM_VECTORS							\
+	__vectors_start = .;						\
+	.vectors 0xffff0000 : AT(__vectors_start) {			\
+		*(.vectors)						\
+	}								\
+	. = __vectors_start + SIZEOF(.vectors);				\
+	__vectors_end = .;						\
+									\
+	__stubs_start = .;						\
+	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {		\
+		*(.stubs)						\
+	}								\
+	. = __stubs_start + SIZEOF(.stubs);				\
+	__stubs_end = .;						\
+									\
+	PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
+
+#define ARM_TCM								\
+	__itcm_start = ALIGN(4);					\
+	.text_itcm ITCM_OFFSET : AT(__itcm_start - LOAD_OFFSET) {	\
+		__sitcm_text = .;					\
+		*(.tcm.text)						\
+		*(.tcm.rodata)						\
+		. = ALIGN(4);						\
+		__eitcm_text = .;					\
+	}								\
+	. = __itcm_start + SIZEOF(.text_itcm);				\
+									\
+	__dtcm_start = .;						\
+	.data_dtcm DTCM_OFFSET : AT(__dtcm_start - LOAD_OFFSET) {	\
+		__sdtcm_data = .;					\
+		*(.tcm.data)						\
+		. = ALIGN(4);						\
+		__edtcm_data = .;					\
+	}								\
+	. = __dtcm_start + SIZEOF(.data_dtcm);
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 6d1d2e26dfe5..3a02e76699a6 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -270,6 +270,60 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static bool access_cntp_tval(struct kvm_vcpu *vcpu,
+			     const struct coproc_params *p,
+			     const struct coproc_reg *r)
+{
+	u64 now = kvm_phys_timer_read();
+	u64 val;
+
+	if (p->is_write) {
+		val = *vcpu_reg(vcpu, p->Rt1);
+		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now);
+	} else {
+		val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+		*vcpu_reg(vcpu, p->Rt1) = val - now;
+	}
+
+	return true;
+}
+
+static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
+			    const struct coproc_params *p,
+			    const struct coproc_reg *r)
+{
+	u32 val;
+
+	if (p->is_write) {
+		val = *vcpu_reg(vcpu, p->Rt1);
+		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val);
+	} else {
+		val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
+		*vcpu_reg(vcpu, p->Rt1) = val;
+	}
+
+	return true;
+}
+
+static bool access_cntp_cval(struct kvm_vcpu *vcpu,
+			     const struct coproc_params *p,
+			     const struct coproc_reg *r)
+{
+	u64 val;
+
+	if (p->is_write) {
+		val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
+		val |= *vcpu_reg(vcpu, p->Rt1);
+		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val);
+	} else {
+		val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+		*vcpu_reg(vcpu, p->Rt1) = val;
+		*vcpu_reg(vcpu, p->Rt2) = val >> 32;
+	}
+
+	return true;
+}
+
 /*
  * We could trap ID_DFR0 and tell the guest we don't support performance
  * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
@@ -423,10 +477,17 @@ static const struct coproc_reg cp15_regs[] = {
 	{ CRn(13), CRm( 0), Op1( 0), Op2( 4), is32,
 			NULL, reset_unknown, c13_TID_PRIV },
 
+	/* CNTP */
+	{ CRm64(14), Op1( 2), is64, access_cntp_cval},
+
 	/* CNTKCTL: swapped by interrupt.S. */
 	{ CRn(14), CRm( 1), Op1( 0), Op2( 0), is32,
 			NULL, reset_val, c14_CNTKCTL, 0x00000000 },
 
+	/* CNTP */
+	{ CRn(14), CRm( 2), Op1( 0), Op2( 0), is32, access_cntp_tval },
+	{ CRn(14), CRm( 2), Op1( 0), Op2( 1), is32, access_cntp_ctl },
+
 	/* The Configuration Base Address Register. */
 	{ CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar},
 };
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index cdff963f133a..9046b53d87c1 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -142,7 +142,7 @@ unsigned long *vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num)
 /*
  * Return the SPSR for the current mode of the virtual CPU.
  */
-unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
+unsigned long *__vcpu_spsr(struct kvm_vcpu *vcpu)
 {
 	unsigned long mode = *vcpu_cpsr(vcpu) & MODE_MASK;
 	switch (mode) {
@@ -174,5 +174,5 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 {
-	vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VA);
+	*vcpu_hcr(vcpu) |= HCR_VA;
 }
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 63d6b404d88e..7fc0638f263a 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -9,7 +9,6 @@ KVM=../../../../virt/kvm
 
 CFLAGS_ARMV7VE		   :=$(call cc-option, -march=armv7ve)
 
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
 
diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c
index ae45ae96aac2..acf1c37fa49c 100644
--- a/arch/arm/kvm/hyp/switch.c
+++ b/arch/arm/kvm/hyp/switch.c
@@ -44,7 +44,7 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu, u32 *fpexc_host)
 		isb();
 	}
 
-	write_sysreg(vcpu->arch.hcr | vcpu->arch.irq_lines, HCR);
+	write_sysreg(vcpu->arch.hcr, HCR);
 	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
 	write_sysreg(HSTR_T(15), HSTR);
 	write_sysreg(HCPTR_TTA | HCPTR_TCP(10) | HCPTR_TCP(11), HCPTR);
@@ -90,18 +90,18 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
 
 static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
 {
-	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
 		__vgic_v3_save_state(vcpu);
-	else
-		__vgic_v2_save_state(vcpu);
+		__vgic_v3_deactivate_traps(vcpu);
+	}
 }
 
 static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
 {
-	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+		__vgic_v3_activate_traps(vcpu);
 		__vgic_v3_restore_state(vcpu);
-	else
-		__vgic_v2_restore_state(vcpu);
+	}
 }
 
 static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
@@ -154,7 +154,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
 	return true;
 }
 
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
 	struct kvm_cpu_context *guest_ctxt;
diff --git a/arch/arm/mach-npcm/npcm7xx.c b/arch/arm/mach-npcm/npcm7xx.c
index 5f7cd88103ef..c5f77d854c4f 100644
--- a/arch/arm/mach-npcm/npcm7xx.c
+++ b/arch/arm/mach-npcm/npcm7xx.c
@@ -17,4 +17,6 @@ static const char *const npcm7xx_dt_match[] = {
 DT_MACHINE_START(NPCM7XX_DT, "NPCM7XX Chip family")
 	.atag_offset	= 0x100,
 	.dt_compat	= npcm7xx_dt_match,
+	.l2c_aux_val	= 0x0,
+	.l2c_aux_mask	= ~0x0,
 MACHINE_END
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 07df3a59b13f..fde7ef1ab192 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -6,6 +6,8 @@ config SA1100_ASSABET
 	bool "Assabet"
 	select ARM_SA1110_CPUFREQ
 	select GPIO_REG
+	select REGULATOR
+	select REGULATOR_FIXED_VOLTAGE
 	help
 	  Say Y here if you are using the Intel(R) StrongARM(R) SA-1110
 	  Microprocessor Development Board (also known as the Assabet).
@@ -137,6 +139,8 @@ config SA1100_PLEB
 config SA1100_SHANNON
 	bool "Shannon"
 	select ARM_SA1100_CPUFREQ
+	select REGULATOR
+	select REGULATOR_FIXED_VOLTAGE
 	help
 	  The Shannon (also known as a Tuxscreen, and also as a IS2630) was a
 	  limited edition webphone produced by Philips. The Shannon is a SA1100
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index f68241d995f2..575ec085cffa 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -14,8 +14,11 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/gpio/gpio-reg.h>
+#include <linux/gpio/machine.h>
 #include <linux/ioport.h>
 #include <linux/platform_data/sa11x0-serial.h>
+#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
 #include <linux/serial_core.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/ucb1x00.h>
@@ -445,6 +448,29 @@ static struct resource neponset_resources[] = {
 };
 #endif
 
+static struct gpiod_lookup_table assabet_cf_gpio_table = {
+	.dev_id = "sa11x0-pcmcia.1",
+	.table = {
+		GPIO_LOOKUP("gpio", 21, "ready", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 22, "detect", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio", 24, "bvd2", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 25, "bvd1", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("assabet", 1, "reset", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("assabet", 7, "bus-enable", GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
+static struct regulator_consumer_supply assabet_cf_vcc_consumers[] = {
+	REGULATOR_SUPPLY("vcc", "sa11x0-pcmcia.1"),
+};
+
+static struct fixed_voltage_config assabet_cf_vcc_pdata __initdata = {
+	.supply_name = "cf-power",
+	.microvolts = 3300000,
+	.enable_high = 1,
+};
+
 static void __init assabet_init(void)
 {
 	/*
@@ -490,6 +516,11 @@ static void __init assabet_init(void)
 		platform_device_register_simple("neponset", 0,
 			neponset_resources, ARRAY_SIZE(neponset_resources));
 #endif
+	} else {
+		sa11x0_register_fixed_regulator(0, &assabet_cf_vcc_pdata,
+					 assabet_cf_vcc_consumers,
+					 ARRAY_SIZE(assabet_cf_vcc_consumers));
+
 	}
 
 #ifndef ASSABET_PAL_VIDEO
@@ -501,6 +532,9 @@ static void __init assabet_init(void)
 			    ARRAY_SIZE(assabet_flash_resources));
 	sa11x0_register_irda(&assabet_irda_data);
 	sa11x0_register_mcp(&assabet_mcp_data);
+
+	if (!machine_has_neponset())
+		sa11x0_register_pcmcia(1, &assabet_cf_gpio_table);
 }
 
 /*
@@ -768,6 +802,7 @@ fs_initcall(assabet_leds_init);
 
 void __init assabet_init_irq(void)
 {
+	unsigned int assabet_gpio_base;
 	u32 def_val;
 
 	sa1100_init_irq();
@@ -782,7 +817,9 @@ void __init assabet_init_irq(void)
 	 *
 	 * This must precede any driver calls to BCR_set() or BCR_clear().
 	 */
-	assabet_init_gpio((void *)&ASSABET_BCR, def_val);
+	assabet_gpio_base = assabet_init_gpio((void *)&ASSABET_BCR, def_val);
+
+	assabet_cf_vcc_pdata.gpio = assabet_gpio_base + 0;
 }
 
 MACHINE_START(ASSABET, "Intel-Assabet")
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 2d25ececb415..b2a4b41626ef 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/gpio/machine.h>
 #include <linux/kernel.h>
 #include <linux/tty.h>
 #include <linux/platform_data/sa11x0-serial.h>
@@ -45,6 +46,19 @@ static struct platform_device cerfuart2_device = {
 	.resource	= cerfuart2_resources,
 };
 
+/* Compact Flash */
+static struct gpiod_lookup_table cerf_cf_gpio_table = {
+	.dev_id = "sa11x0-pcmcia.1",
+	.table = {
+		GPIO_LOOKUP("gpio", 19, "bvd2", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 20, "bvd1", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 21, "reset", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 22, "ready", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 23, "detect", GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 /* LEDs */
 struct gpio_led cerf_gpio_leds[] = {
 	{
@@ -151,9 +165,6 @@ static void __init cerf_map_io(void)
 	sa1100_register_uart(0, 3);
 	sa1100_register_uart(1, 2); /* disable this and the uart2 device for sa1100_fir */
 	sa1100_register_uart(2, 1);
-
-	/* set some GPDR bits here while it's safe */
-	GPDR |= CERF_GPIO_CF_RESET;
 }
 
 static struct mcp_plat_data cerf_mcp_data = {
@@ -167,6 +178,7 @@ static void __init cerf_init(void)
 	platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices));
 	sa11x0_register_mtd(&cerf_flash_data, &cerf_flash_resource, 1);
 	sa11x0_register_mcp(&cerf_mcp_data);
+	sa11x0_register_pcmcia(1, &cerf_cf_gpio_table);
 }
 
 MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube")
diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c
index b2eb3d232e39..6199e87447ca 100644
--- a/arch/arm/mach-sa1100/clock.c
+++ b/arch/arm/mach-sa1100/clock.c
@@ -163,6 +163,8 @@ static struct clk_lookup sa11xx_clkregs[] = {
 	CLKDEV_INIT("sa1100-rtc", NULL, NULL),
 	CLKDEV_INIT("sa11x0-fb", NULL, &clk_cpu),
 	CLKDEV_INIT("sa11x0-pcmcia", NULL, &clk_cpu),
+	CLKDEV_INIT("sa11x0-pcmcia.0", NULL, &clk_cpu),
+	CLKDEV_INIT("sa11x0-pcmcia.1", NULL, &clk_cpu),
 	/* sa1111 names devices using internal offsets, PCMCIA is at 0x1800 */
 	CLKDEV_INIT("1800", NULL, &clk_cpu),
 	CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864),
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 2eb00691b07d..7167ddf84a0e 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -10,6 +10,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -20,6 +21,8 @@
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 #include <linux/reboot.h>
+#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
 #include <linux/irqchip/irq-sa11x0.h>
 
 #include <video/sa1100fb.h>
@@ -232,11 +235,20 @@ void sa11x0_register_lcd(struct sa1100fb_mach_info *inf)
 	sa11x0_register_device(&sa11x0fb_device, inf);
 }
 
+static bool sa11x0pcmcia_legacy = true;
 static struct platform_device sa11x0pcmcia_device = {
 	.name		= "sa11x0-pcmcia",
 	.id		= -1,
 };
 
+void sa11x0_register_pcmcia(int socket, struct gpiod_lookup_table *table)
+{
+	if (table)
+		gpiod_add_lookup_table(table);
+	platform_device_register_simple("sa11x0-pcmcia", socket, NULL, 0);
+	sa11x0pcmcia_legacy = false;
+}
+
 static struct platform_device sa11x0mtd_device = {
 	.name		= "sa1100-mtd",
 	.id		= -1,
@@ -311,7 +323,6 @@ static struct platform_device *sa11x0_devices[] __initdata = {
 	&sa11x0uart1_device,
 	&sa11x0uart3_device,
 	&sa11x0ssp_device,
-	&sa11x0pcmcia_device,
 	&sa11x0rtc_device,
 	&sa11x0dma_device,
 };
@@ -319,6 +330,12 @@ static struct platform_device *sa11x0_devices[] __initdata = {
 static int __init sa1100_init(void)
 {
 	pm_power_off = sa1100_power_off;
+
+	if (sa11x0pcmcia_legacy)
+		platform_device_register(&sa11x0pcmcia_device);
+
+	regulator_has_full_constraints();
+
 	return platform_add_devices(sa11x0_devices, ARRAY_SIZE(sa11x0_devices));
 }
 
@@ -329,6 +346,31 @@ void __init sa11x0_init_late(void)
 	sa11x0_pm_init();
 }
 
+int __init sa11x0_register_fixed_regulator(int n,
+	struct fixed_voltage_config *cfg,
+	struct regulator_consumer_supply *supplies, unsigned num_supplies)
+{
+	struct regulator_init_data *id;
+
+	cfg->init_data = id = kzalloc(sizeof(*cfg->init_data), GFP_KERNEL);
+	if (!cfg->init_data)
+		return -ENOMEM;
+
+	if (cfg->gpio < 0)
+		id->constraints.always_on = 1;
+	id->constraints.name = cfg->supply_name;
+	id->constraints.min_uV = cfg->microvolts;
+	id->constraints.max_uV = cfg->microvolts;
+	id->constraints.valid_modes_mask = REGULATOR_MODE_NORMAL;
+	id->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
+	id->consumer_supplies = supplies;
+	id->num_consumer_supplies = num_supplies;
+
+	platform_device_register_resndata(NULL, "reg-fixed-voltage", n,
+					  NULL, 0, cfg, sizeof(*cfg));
+	return 0;
+}
+
 /*
  * Common I/O mapping:
  *
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index 97502922a15d..5f3cb52fa6ab 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -47,3 +47,11 @@ static inline int sa11x0_pm_init(void) { return 0; }
 #endif
 
 int sa11xx_clk_init(void);
+
+struct gpiod_lookup_table;
+void sa11x0_register_pcmcia(int socket, struct gpiod_lookup_table *);
+
+struct fixed_voltage_config;
+struct regulator_consumer_supply;
+int sa11x0_register_fixed_regulator(int n, struct fixed_voltage_config *cfg,
+	struct regulator_consumer_supply *supplies, unsigned num_supplies);
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index b69e76614d5b..36a78b0c106f 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio.h>
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
@@ -264,8 +265,24 @@ static struct platform_device *h3xxx_devices[] = {
 	&h3xxx_micro_asic,
 };
 
+static struct gpiod_lookup_table h3xxx_pcmcia_gpio_table = {
+	.dev_id = "sa11x0-pcmcia",
+	.table = {
+		GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_CD0,
+			    "pcmcia0-detect", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_IRQ0,
+			    "pcmcia0-ready", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_CD1,
+			    "pcmcia1-detect", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio", H3XXX_GPIO_PCMCIA_IRQ1,
+			    "pcmcia1-ready", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 void __init h3xxx_mach_init(void)
 {
+	gpiod_add_lookup_table(&h3xxx_pcmcia_gpio_table);
 	sa1100_register_uart_fns(&h3xxx_port_fns);
 	sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1);
 	platform_add_devices(h3xxx_devices, ARRAY_SIZE(h3xxx_devices));
diff --git a/arch/arm/mach-sa1100/include/mach/assabet.h b/arch/arm/mach-sa1100/include/mach/assabet.h
index 558b45323a2d..641a961653af 100644
--- a/arch/arm/mach-sa1100/include/mach/assabet.h
+++ b/arch/arm/mach-sa1100/include/mach/assabet.h
@@ -96,10 +96,4 @@ extern void assabet_uda1341_reset(int set);
 #define ASSABET_GPIO_BATT_LOW		GPIO_GPIO (26)	/* Low battery */
 #define ASSABET_GPIO_RCLK		GPIO_GPIO (26)	/* CCLK/2  */
 
-/* These are gpiolib GPIO numbers, not bitmasks */
-#define ASSABET_GPIO_CF_IRQ		21	/* CF IRQ */
-#define ASSABET_GPIO_CF_CD		22	/* CF CD  */
-#define ASSABET_GPIO_CF_BVD2		24	/* CF BVD / IOSPKR */
-#define ASSABET_GPIO_CF_BVD1		25	/* CF BVD / IOSTSCHG */
-
 #endif
diff --git a/arch/arm/mach-sa1100/nanoengine.c b/arch/arm/mach-sa1100/nanoengine.c
index f1cb3784d525..4d35258a7b32 100644
--- a/arch/arm/mach-sa1100/nanoengine.c
+++ b/arch/arm/mach-sa1100/nanoengine.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/gpio/machine.h>
 #include <linux/kernel.h>
 #include <linux/platform_data/sa11x0-serial.h>
 #include <linux/mtd/mtd.h>
@@ -99,8 +100,30 @@ static void __init nanoengine_map_io(void)
 	Ser2HSCR0 = 0;
 }
 
+static struct gpiod_lookup_table nanoengine_pcmcia0_gpio_table = {
+	.dev_id = "sa11x0-pcmcia.0",
+	.table = {
+		GPIO_LOOKUP("gpio", 11, "ready", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 13, "detect", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio", 15, "reset", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
+static struct gpiod_lookup_table nanoengine_pcmcia1_gpio_table = {
+	.dev_id = "sa11x0-pcmcia.1",
+	.table = {
+		GPIO_LOOKUP("gpio", 12, "ready", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", 14, "detect", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio", 16, "reset", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static void __init nanoengine_init(void)
 {
+	sa11x0_register_pcmcia(0, &nanoengine_pcmcia0_gpio_table);
+	sa11x0_register_pcmcia(1, &nanoengine_pcmcia1_gpio_table);
 	sa11x0_register_mtd(&nanoengine_flash_data, nanoengine_flash_resources,
 		ARRAY_SIZE(nanoengine_flash_resources));
 }
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 856664c783d9..22f7fe0b809f 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -5,11 +5,14 @@
 
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/gpio/machine.h>
 #include <linux/kernel.h>
 #include <linux/platform_data/sa11x0-serial.h>
 #include <linux/tty.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
+#include <linux/regulator/fixed.h>
+#include <linux/regulator/machine.h>
 
 #include <video/sa1100fb.h>
 
@@ -72,8 +75,43 @@ static struct sa1100fb_mach_info shannon_lcd_info = {
 	.lccr3		= LCCR3_ACBsDiv(512),
 };
 
+static struct gpiod_lookup_table shannon_pcmcia0_gpio_table = {
+	.dev_id = "sa11x0-pcmcia.0",
+	.table = {
+		GPIO_LOOKUP("gpio", 24, "detect", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio", 26, "ready", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
+static struct gpiod_lookup_table shannon_pcmcia1_gpio_table = {
+	.dev_id = "sa11x0-pcmcia.1",
+	.table = {
+		GPIO_LOOKUP("gpio", 25, "detect", GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP("gpio", 27, "ready", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
+static struct regulator_consumer_supply shannon_cf_vcc_consumers[] = {
+	REGULATOR_SUPPLY("vcc", "sa11x0-pcmcia.0"),
+	REGULATOR_SUPPLY("vcc", "sa11x0-pcmcia.1"),
+};
+
+static struct fixed_voltage_config shannon_cf_vcc_pdata __initdata = {
+	.supply_name = "cf-power",
+	.microvolts = 3300000,
+	.enabled_at_boot = 1,
+	.gpio = -EINVAL,
+};
+
 static void __init shannon_init(void)
 {
+	sa11x0_register_fixed_regulator(0, &shannon_cf_vcc_pdata,
+					shannon_cf_vcc_consumers,
+					ARRAY_SIZE(shannon_cf_vcc_consumers));
+	sa11x0_register_pcmcia(0, &shannon_pcmcia0_gpio_table);
+	sa11x0_register_pcmcia(1, &shannon_pcmcia1_gpio_table);
 	sa11x0_ppc_configure_mcp();
 	sa11x0_register_lcd(&shannon_lcd_info);
 	sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1);
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 7d4feb8a49ac..ace010479eb6 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/gpio/machine.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/tty.h>
@@ -364,6 +365,15 @@ static struct platform_device *devices[] __initdata = {
 	&simpad_i2c,
 };
 
+/* Compact Flash */
+static struct gpiod_lookup_table simpad_cf_gpio_table = {
+	.dev_id = "sa11x0-pcmcia",
+	.table = {
+		GPIO_LOOKUP("gpio", GPIO_CF_IRQ, "cf-ready", GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio", GPIO_CF_CD, "cf-detect", GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
 
 
 static int __init simpad_init(void)
@@ -385,6 +395,7 @@ static int __init simpad_init(void)
 
 	pm_power_off = simpad_power_off;
 
+	sa11x0_register_pcmcia(-1, &simpad_cf_gpio_table);
 	sa11x0_ppc_configure_mcp();
 	sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources,
 			      ARRAY_SIZE(simpad_flash_resources));
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ada8eb206a90..8c398fedbbb6 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -466,6 +466,12 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
 void __init dma_contiguous_remap(void)
 {
 	int i;
+
+	if (!dma_mmu_remap_num)
+		return;
+
+	/* call flush_cache_all() since CMA area would be large enough */
+	flush_cache_all();
 	for (i = 0; i < dma_mmu_remap_num; i++) {
 		phys_addr_t start = dma_mmu_remap[i].base;
 		phys_addr_t end = start + dma_mmu_remap[i].size;
@@ -498,7 +504,15 @@ void __init dma_contiguous_remap(void)
 		flush_tlb_kernel_range(__phys_to_virt(start),
 				       __phys_to_virt(end));
 
-		iotable_init(&map, 1);
+		/*
+		 * All the memory in CMA region will be on ZONE_MOVABLE.
+		 * If that zone is considered as highmem, the memory in CMA
+		 * region is also considered as highmem even if it's
+		 * physical address belong to lowmem. In this case,
+		 * re-mapping isn't required.
+		 */
+		if (!is_highmem_idx(ZONE_MOVABLE))
+			iotable_init(&map, 1);
 	}
 }
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index bd6f4513539a..c186474422f3 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -758,20 +758,9 @@ void set_kernel_text_ro(void)
 static inline void fix_kernmem_perms(void) { }
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
-void free_tcmmem(void)
-{
-#ifdef CONFIG_HAVE_TCM
-	extern char __tcm_start, __tcm_end;
-
-	poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
-	free_reserved_area(&__tcm_start, &__tcm_end, -1, "TCM link");
-#endif
-}
-
 void free_initmem(void)
 {
 	fix_kernmem_perms();
-	free_tcmmem();
 
 	poison_init_mem(__init_begin, __init_end - __init_begin);
 	if (!machine_is_integrator() && !machine_is_cintegrator())
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index eb1de66517d5..f866870db749 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -21,20 +21,20 @@
 #define MIN_GAP (128*1024*1024UL)
 #define MAX_GAP ((TASK_SIZE)/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -180,18 +180,18 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
 	if (current->flags & PF_RANDOMIZE)
 		random_factor = arch_mmap_rnd();
 
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index d55d493f9a1e..b528a15f460d 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -272,6 +272,7 @@ ENDPROC(cpu_pj4b_do_resume)
 __v7_ca5mp_setup:
 __v7_ca9mp_setup:
 __v7_cr7mp_setup:
+__v7_cr8mp_setup:
 	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting
 	b	1f
 __v7_ca7mp_setup:
@@ -642,6 +643,16 @@ __v7_cr7mp_proc_info:
 	.size	__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
 
 	/*
+	 * ARM Ltd. Cortex R8 processor.
+	 */
+	.type	__v7_cr8mp_proc_info, #object
+__v7_cr8mp_proc_info:
+	.long	0x410fc180
+	.long	0xff0ffff0
+	__v7_proc __v7_cr8mp_proc_info, __v7_cr8mp_setup
+	.size	__v7_cr8mp_proc_info, . - __v7_cr8mp_proc_info
+
+	/*
 	 * ARM Ltd. Cortex A7 processor.
 	 */
 	.type	__v7_ca7mp_proc_info, #object
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 177be0d1d090..eb2cf4938f6d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -922,6 +922,22 @@ config HARDEN_BRANCH_PREDICTOR
 
 	  If unsure, say Y.
 
+config HARDEN_EL2_VECTORS
+	bool "Harden EL2 vector mapping against system register leak" if EXPERT
+	default y
+	help
+	  Speculation attacks against some high-performance processors can
+	  be used to leak privileged information such as the vector base
+	  register, resulting in a potential defeat of the EL2 layout
+	  randomization.
+
+	  This config option will map the vectors to a fixed location,
+	  independent of the EL2 code mapping, so that revealing VBAR_EL2
+	  to an attacker does not give away any extra information. This
+	  only gets enabled on affected CPUs.
+
+	  If unsure, say Y.
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on COMPAT
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 669028172fd6..a91933b1e2e6 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -5,6 +5,8 @@
 #include <asm/cpucaps.h>
 #include <asm/insn.h>
 
+#define ARM64_CB_PATCH ARM64_NCAPS
+
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
@@ -22,12 +24,19 @@ struct alt_instr {
 	u8  alt_len;		/* size of new instruction(s), <= orig_len */
 };
 
+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+				 __le32 *origptr, __le32 *updptr, int nr_inst);
+
 void __init apply_alternatives_all(void);
 void apply_alternatives(void *start, size_t length);
 
-#define ALTINSTR_ENTRY(feature)						      \
+#define ALTINSTR_ENTRY(feature,cb)					      \
 	" .word 661b - .\n"				/* label           */ \
+	" .if " __stringify(cb) " == 0\n"				      \
 	" .word 663f - .\n"				/* new instruction */ \
+	" .else\n"							      \
+	" .word " __stringify(cb) "- .\n"		/* callback */	      \
+	" .endif\n"							      \
 	" .hword " __stringify(feature) "\n"		/* feature bit     */ \
 	" .byte 662b-661b\n"				/* source len      */ \
 	" .byte 664f-663f\n"				/* replacement len */
@@ -45,15 +54,18 @@ void apply_alternatives(void *start, size_t length);
  * but most assemblers die if insn1 or insn2 have a .inst. This should
  * be fixed in a binutils release posterior to 2.25.51.0.2 (anything
  * containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
  */
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled)	\
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb)	\
 	".if "__stringify(cfg_enabled)" == 1\n"				\
 	"661:\n\t"							\
 	oldinstr "\n"							\
 	"662:\n"							\
 	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(feature)						\
+	ALTINSTR_ENTRY(feature,cb)					\
 	".popsection\n"							\
+	" .if " __stringify(cb) " == 0\n"				\
 	".pushsection .altinstr_replacement, \"a\"\n"			\
 	"663:\n\t"							\
 	newinstr "\n"							\
@@ -61,11 +73,17 @@ void apply_alternatives(void *start, size_t length);
 	".popsection\n\t"						\
 	".org	. - (664b-663b) + (662b-661b)\n\t"			\
 	".org	. - (662b-661b) + (664b-663b)\n"			\
+	".else\n\t"							\
+	"663:\n\t"							\
+	"664:\n\t"							\
+	".endif\n"							\
 	".endif\n"
 
 #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...)	\
-	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+	__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
 
+#define ALTERNATIVE_CB(oldinstr, cb) \
+	__ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb)
 #else
 
 #include <asm/assembler.h>
@@ -132,6 +150,14 @@ void apply_alternatives(void *start, size_t length);
 661:
 .endm
 
+.macro alternative_cb cb
+	.set .Lasm_alt_mode, 0
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0
+	.popsection
+661:
+.endm
+
 /*
  * Provide the other half of the alternative code sequence.
  */
@@ -158,6 +184,13 @@ void apply_alternatives(void *start, size_t length);
 .endm
 
 /*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
+/*
  * Provides a trivial alternative or default sequence consisting solely
  * of NOPs. The number of NOPs is chosen automatically to match the
  * previous case.
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 053d83e8db6f..0bcc98dbba56 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -565,4 +565,140 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 #endif
 	.endm
 
+	/*
+	 * frame_push - Push @regcount callee saved registers to the stack,
+	 *              starting at x19, as well as x29/x30, and set x29 to
+	 *              the new value of sp. Add @extra bytes of stack space
+	 *              for locals.
+	 */
+	.macro		frame_push, regcount:req, extra
+	__frame		st, \regcount, \extra
+	.endm
+
+	/*
+	 * frame_pop  - Pop the callee saved registers from the stack that were
+	 *              pushed in the most recent call to frame_push, as well
+	 *              as x29/x30 and any extra stack space that may have been
+	 *              allocated.
+	 */
+	.macro		frame_pop
+	__frame		ld
+	.endm
+
+	.macro		__frame_regs, reg1, reg2, op, num
+	.if		.Lframe_regcount == \num
+	\op\()r		\reg1, [sp, #(\num + 1) * 8]
+	.elseif		.Lframe_regcount > \num
+	\op\()p		\reg1, \reg2, [sp, #(\num + 1) * 8]
+	.endif
+	.endm
+
+	.macro		__frame, op, regcount, extra=0
+	.ifc		\op, st
+	.if		(\regcount) < 0 || (\regcount) > 10
+	.error		"regcount should be in the range [0 ... 10]"
+	.endif
+	.if		((\extra) % 16) != 0
+	.error		"extra should be a multiple of 16 bytes"
+	.endif
+	.ifdef		.Lframe_regcount
+	.if		.Lframe_regcount != -1
+	.error		"frame_push/frame_pop may not be nested"
+	.endif
+	.endif
+	.set		.Lframe_regcount, \regcount
+	.set		.Lframe_extra, \extra
+	.set		.Lframe_local_offset, ((\regcount + 3) / 2) * 16
+	stp		x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
+	mov		x29, sp
+	.endif
+
+	__frame_regs	x19, x20, \op, 1
+	__frame_regs	x21, x22, \op, 3
+	__frame_regs	x23, x24, \op, 5
+	__frame_regs	x25, x26, \op, 7
+	__frame_regs	x27, x28, \op, 9
+
+	.ifc		\op, ld
+	.if		.Lframe_regcount == -1
+	.error		"frame_push/frame_pop may not be nested"
+	.endif
+	ldp		x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
+	.set		.Lframe_regcount, -1
+	.endif
+	.endm
+
+/*
+ * Check whether to yield to another runnable task from kernel mode NEON code
+ * (which runs with preemption disabled).
+ *
+ * if_will_cond_yield_neon
+ *        // pre-yield patchup code
+ * do_cond_yield_neon
+ *        // post-yield patchup code
+ * endif_yield_neon    <label>
+ *
+ * where <label> is optional, and marks the point where execution will resume
+ * after a yield has been performed. If omitted, execution resumes right after
+ * the endif_yield_neon invocation. Note that the entire sequence, including
+ * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
+ * is not defined.
+ *
+ * As a convenience, in the case where no patchup code is required, the above
+ * sequence may be abbreviated to
+ *
+ * cond_yield_neon <label>
+ *
+ * Note that the patchup code does not support assembler directives that change
+ * the output section, any use of such directives is undefined.
+ *
+ * The yield itself consists of the following:
+ * - Check whether the preempt count is exactly 1, in which case disabling
+ *   preemption once will make the task preemptible. If this is not the case,
+ *   yielding is pointless.
+ * - Check whether TIF_NEED_RESCHED is set, and if so, disable and re-enable
+ *   kernel mode NEON (which will trigger a reschedule), and branch to the
+ *   yield fixup code.
+ *
+ * This macro sequence may clobber all CPU state that is not guaranteed by the
+ * AAPCS to be preserved across an ordinary function call.
+ */
+
+	.macro		cond_yield_neon, lbl
+	if_will_cond_yield_neon
+	do_cond_yield_neon
+	endif_yield_neon	\lbl
+	.endm
+
+	.macro		if_will_cond_yield_neon
+#ifdef CONFIG_PREEMPT
+	get_thread_info	x0
+	ldr		w1, [x0, #TSK_TI_PREEMPT]
+	ldr		x0, [x0, #TSK_TI_FLAGS]
+	cmp		w1, #PREEMPT_DISABLE_OFFSET
+	csel		x0, x0, xzr, eq
+	tbnz		x0, #TIF_NEED_RESCHED, .Lyield_\@	// needs rescheduling?
+	/* fall through to endif_yield_neon */
+	.subsection	1
+.Lyield_\@ :
+#else
+	.section	".discard.cond_yield_neon", "ax"
+#endif
+	.endm
+
+	.macro		do_cond_yield_neon
+	bl		kernel_neon_end
+	bl		kernel_neon_begin
+	.endm
+
+	.macro		endif_yield_neon, lbl
+	.ifnb		\lbl
+	b		\lbl
+	.else
+	b		.Lyield_out_\@
+	.endif
+	.previous
+.Lyield_out_\@ :
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7dfcec4700fe..0094c6653b06 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -140,10 +140,8 @@ static inline void __flush_icache_all(void)
 	dsb(ish);
 }
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
 /*
  * We don't appear to need to do anything here.  In fact, if we did, we'd
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 21bb624e0a7a..bc51b72fafd4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -32,7 +32,7 @@
 #define ARM64_HAS_VIRT_HOST_EXTN		11
 #define ARM64_WORKAROUND_CAVIUM_27456		12
 #define ARM64_HAS_32BIT_EL0			13
-#define ARM64_HYP_OFFSET_LOW			14
+#define ARM64_HARDEN_EL2_VECTORS		14
 #define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
 #define ARM64_HAS_NO_FPSIMD			16
 #define ARM64_WORKAROUND_REPEAT_TLBI		17
@@ -43,13 +43,12 @@
 #define ARM64_SVE				22
 #define ARM64_UNMAP_KERNEL_AT_EL0		23
 #define ARM64_HARDEN_BRANCH_PREDICTOR		24
-#define ARM64_HARDEN_BP_POST_GUEST_EXIT		25
-#define ARM64_HAS_RAS_EXTN			26
-#define ARM64_WORKAROUND_843419			27
-#define ARM64_HAS_CACHE_IDC			28
-#define ARM64_HAS_CACHE_DIC			29
-#define ARM64_HW_DBM				30
+#define ARM64_HAS_RAS_EXTN			25
+#define ARM64_WORKAROUND_843419			26
+#define ARM64_HAS_CACHE_IDC			27
+#define ARM64_HAS_CACHE_DIC			28
+#define ARM64_HW_DBM				29
 
-#define ARM64_NCAPS				31
+#define ARM64_NCAPS				30
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4214c38d016b..f62c56b1793f 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -70,6 +70,7 @@ enum aarch64_insn_imm_type {
 	AARCH64_INSN_IMM_6,
 	AARCH64_INSN_IMM_S,
 	AARCH64_INSN_IMM_R,
+	AARCH64_INSN_IMM_N,
 	AARCH64_INSN_IMM_MAX
 };
 
@@ -314,6 +315,11 @@ __AARCH64_INSN_FUNCS(eor,	0x7F200000, 0x4A000000)
 __AARCH64_INSN_FUNCS(eon,	0x7F200000, 0x4A200000)
 __AARCH64_INSN_FUNCS(ands,	0x7F200000, 0x6A000000)
 __AARCH64_INSN_FUNCS(bics,	0x7F200000, 0x6A200000)
+__AARCH64_INSN_FUNCS(and_imm,	0x7F800000, 0x12000000)
+__AARCH64_INSN_FUNCS(orr_imm,	0x7F800000, 0x32000000)
+__AARCH64_INSN_FUNCS(eor_imm,	0x7F800000, 0x52000000)
+__AARCH64_INSN_FUNCS(ands_imm,	0x7F800000, 0x72000000)
+__AARCH64_INSN_FUNCS(extr,	0x7FA00000, 0x13800000)
 __AARCH64_INSN_FUNCS(b,		0xFC000000, 0x14000000)
 __AARCH64_INSN_FUNCS(bl,	0xFC000000, 0x94000000)
 __AARCH64_INSN_FUNCS(cbz,	0x7F000000, 0x34000000)
@@ -423,6 +429,16 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 					 int shift,
 					 enum aarch64_insn_variant variant,
 					 enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+				       enum aarch64_insn_variant variant,
+				       enum aarch64_insn_register Rn,
+				       enum aarch64_insn_register Rd,
+				       u64 imm);
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+			  enum aarch64_insn_register Rm,
+			  enum aarch64_insn_register Rn,
+			  enum aarch64_insn_register Rd,
+			  u8 lsb);
 u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
 			      enum aarch64_insn_prfm_type type,
 			      enum aarch64_insn_prfm_target target,
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index b0c84171e6a3..6dd285e979c9 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -25,6 +25,7 @@
 /* Hyp Configuration Register (HCR) bits */
 #define HCR_TEA		(UL(1) << 37)
 #define HCR_TERR	(UL(1) << 36)
+#define HCR_TLOR	(UL(1) << 35)
 #define HCR_E2H		(UL(1) << 34)
 #define HCR_ID		(UL(1) << 33)
 #define HCR_CD		(UL(1) << 32)
@@ -64,6 +65,7 @@
 
 /*
  * The bits we set in HCR:
+ * TLOR:	Trap LORegion register accesses
  * RW:		64bit by default, can be overridden for 32bit VMs
  * TAC:		Trap ACTLR
  * TSC:		Trap SMC
@@ -81,9 +83,9 @@
  */
 #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
 			 HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
-			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW)
+			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
+			 HCR_FMO | HCR_IMO)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_INT_OVERRIDE   (HCR_FMO | HCR_IMO)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 24961b732e65..f6648a3e4152 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -33,6 +33,7 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+/* Translate a kernel address of @sym into its equivalent linear mapping */
 #define kvm_ksym_ref(sym)						\
 	({								\
 		void *val = &sym;					\
@@ -57,7 +58,9 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
 
-extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
+
+extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
 
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
 extern u64 __vgic_v3_read_vmcr(void);
@@ -68,7 +71,19 @@ extern u32 __kvm_get_mdcr_el2(void);
 
 extern u32 __init_stage2_translation(void);
 
-extern void __qcom_hyp_sanitize_btac_predictors(void);
+#else /* __ASSEMBLY__ */
+
+.macro get_host_ctxt reg, tmp
+	adr_l	\reg, kvm_host_cpu_state
+	mrs	\tmp, tpidr_el2
+	add	\reg, \reg, \tmp
+.endm
+
+.macro get_vcpu_ptr vcpu, ctxt
+	get_host_ctxt \ctxt, \vcpu
+	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
+	kern_hyp_va	\vcpu
+.endm
 
 #endif
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 413dc82b1e89..23b33e8ea03a 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -26,13 +26,15 @@
 
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_hyp.h>
 #include <asm/kvm_mmio.h>
 #include <asm/ptrace.h>
 #include <asm/cputype.h>
 #include <asm/virt.h>
 
 unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num);
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu);
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu);
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v);
 
 bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
 void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr);
@@ -45,6 +47,11 @@ void kvm_inject_undef32(struct kvm_vcpu *vcpu);
 void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.hcr_el2 & HCR_RW);
+}
+
 static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
@@ -59,16 +66,19 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 
 	if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
 		vcpu->arch.hcr_el2 &= ~HCR_RW;
-}
 
-static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.hcr_el2;
+	/*
+	 * TID3: trap feature register accesses that we virtualise.
+	 * For now this is conditional, since no AArch32 feature regs
+	 * are currently virtualised.
+	 */
+	if (!vcpu_el1_is_32bit(vcpu))
+		vcpu->arch.hcr_el2 |= HCR_TID3;
 }
 
-static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
+static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr_el2 = hcr;
+	return (unsigned long *)&vcpu->arch.hcr_el2;
 }
 
 static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
@@ -81,11 +91,27 @@ static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
 }
 
-static inline unsigned long *vcpu_elr_el1(const struct kvm_vcpu *vcpu)
+static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
 {
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->elr_el1;
 }
 
+static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sysregs_loaded_on_cpu)
+		return read_sysreg_el1(elr);
+	else
+		return *__vcpu_elr_el1(vcpu);
+}
+
+static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
+{
+	if (vcpu->arch.sysregs_loaded_on_cpu)
+		write_sysreg_el1(v, elr);
+	else
+		*__vcpu_elr_el1(vcpu) = v;
+}
+
 static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu)
 {
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pstate;
@@ -135,13 +161,28 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
 		vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val;
 }
 
-/* Get vcpu SPSR for current mode */
-static inline unsigned long *vcpu_spsr(const struct kvm_vcpu *vcpu)
+static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
 {
 	if (vcpu_mode_is_32bit(vcpu))
-		return vcpu_spsr32(vcpu);
+		return vcpu_read_spsr32(vcpu);
 
-	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+	if (vcpu->arch.sysregs_loaded_on_cpu)
+		return read_sysreg_el1(spsr);
+	else
+		return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
+}
+
+static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
+{
+	if (vcpu_mode_is_32bit(vcpu)) {
+		vcpu_write_spsr32(vcpu, v);
+		return;
+	}
+
+	if (vcpu->arch.sysregs_loaded_on_cpu)
+		write_sysreg_el1(v, spsr);
+	else
+		vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
 }
 
 static inline bool vcpu_mode_priv(const struct kvm_vcpu *vcpu)
@@ -282,15 +323,18 @@ static inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
 
 static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
 {
-	return vcpu_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
+	return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
 }
 
 static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
 {
-	if (vcpu_mode_is_32bit(vcpu))
+	if (vcpu_mode_is_32bit(vcpu)) {
 		*vcpu_cpsr(vcpu) |= COMPAT_PSR_E_BIT;
-	else
-		vcpu_sys_reg(vcpu, SCTLR_EL1) |= (1 << 25);
+	} else {
+		u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
+		sctlr |= (1 << 25);
+		vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr);
+	}
 }
 
 static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
@@ -298,7 +342,7 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu)
 	if (vcpu_mode_is_32bit(vcpu))
 		return !!(*vcpu_cpsr(vcpu) & COMPAT_PSR_E_BIT);
 
-	return !!(vcpu_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
+	return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25));
 }
 
 static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 596f8e414a4c..ab46bc70add6 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -272,9 +272,6 @@ struct kvm_vcpu_arch {
 	/* IO related fields */
 	struct kvm_decode mmio_decode;
 
-	/* Interrupt related fields */
-	u64 irq_lines;		/* IRQ and FIQ levels */
-
 	/* Cache some mmu pages needed inside spinlock regions */
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
@@ -287,10 +284,25 @@ struct kvm_vcpu_arch {
 
 	/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
 	u64 vsesr_el2;
+
+	/* True when deferrable sysregs are loaded on the physical CPU,
+	 * see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
+	bool sysregs_loaded_on_cpu;
 };
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
-#define vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
+
+/*
+ * Only use __vcpu_sys_reg if you know you want the memory backed version of a
+ * register, and not the one most recently accessed by a running VCPU.  For
+ * example, for userspace access or for system registers that are never context
+ * switched, but only emulated.
+ */
+#define __vcpu_sys_reg(v,r)	((v)->arch.ctxt.sys_regs[(r)])
+
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg);
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg);
+
 /*
  * CP14 and CP15 live in the same array, as they are backed by the
  * same system registers.
@@ -298,14 +310,6 @@ struct kvm_vcpu_arch {
 #define vcpu_cp14(v,r)		((v)->arch.ctxt.copro[(r)])
 #define vcpu_cp15(v,r)		((v)->arch.ctxt.copro[(r)])
 
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r))
-#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r) + 1)
-#else
-#define vcpu_cp15_64_high(v,r)	vcpu_cp15((v),(r) + 1)
-#define vcpu_cp15_64_low(v,r)	vcpu_cp15((v),(r))
-#endif
-
 struct kvm_vm_stat {
 	ulong remote_tlb_flush;
 };
@@ -358,10 +362,15 @@ int kvm_perf_teardown(void);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
+void __kvm_set_tpidr_el2(u64 tpidr_el2);
+DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
+
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 				       unsigned long hyp_stack_ptr,
 				       unsigned long vector_ptr)
 {
+	u64 tpidr_el2;
+
 	/*
 	 * Call initialization code, and switch to the full blown HYP code.
 	 * If the cpucaps haven't been finalized yet, something has gone very
@@ -370,6 +379,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	 */
 	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
 	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
+
+	/*
+	 * Calculate the raw per-cpu offset without a translation from the
+	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
+	 * so that we can use adr_l to access per-cpu variables in EL2.
+	 */
+	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
+		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
+
+	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
 }
 
 static inline void kvm_arch_hardware_unsetup(void) {}
@@ -416,6 +435,13 @@ static inline void kvm_arm_vhe_guest_enter(void)
 static inline void kvm_arm_vhe_guest_exit(void)
 {
 	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+
+	/*
+	 * When we exit from the guest we change a number of CPU configuration
+	 * parameters, such as traps.  Make sure these changes take effect
+	 * before running the host or additional guests.
+	 */
+	isb();
 }
 
 static inline bool kvm_arm_harden_branch_predictor(void)
@@ -423,4 +449,7 @@ static inline bool kvm_arm_harden_branch_predictor(void)
 	return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
 }
 
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index f26f9cd70c72..384c34397619 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -120,37 +120,38 @@ typeof(orig) * __hyp_text fname(void)					\
 	return val;							\
 }
 
-void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
-void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
 int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu);
+void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu);
 int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
 
 void __timer_enable_traps(struct kvm_vcpu *vcpu);
 void __timer_disable_traps(struct kvm_vcpu *vcpu);
 
-void __sysreg_save_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_host_state(struct kvm_cpu_context *ctxt);
-void __sysreg_save_guest_state(struct kvm_cpu_context *ctxt);
-void __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt);
+void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt);
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
 void __sysreg32_save_state(struct kvm_vcpu *vcpu);
 void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
 
-void __debug_save_state(struct kvm_vcpu *vcpu,
-			struct kvm_guest_debug_arch *dbg,
-			struct kvm_cpu_context *ctxt);
-void __debug_restore_state(struct kvm_vcpu *vcpu,
-			   struct kvm_guest_debug_arch *dbg,
-			   struct kvm_cpu_context *ctxt);
-void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
-void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
+void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 bool __fpsimd_enabled(void);
 
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
+void deactivate_traps_vhe_put(void);
+
 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
 void __noreturn __hyp_do_panic(unsigned long, ...);
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7faed6e48b46..082110993647 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -69,9 +69,6 @@
  * mappings, and none of this applies in that case.
  */
 
-#define HYP_PAGE_OFFSET_HIGH_MASK	((UL(1) << VA_BITS) - 1)
-#define HYP_PAGE_OFFSET_LOW_MASK	((UL(1) << (VA_BITS - 1)) - 1)
-
 #ifdef __ASSEMBLY__
 
 #include <asm/alternative.h>
@@ -81,28 +78,19 @@
  * Convert a kernel VA into a HYP VA.
  * reg: VA to be converted.
  *
- * This generates the following sequences:
- * - High mask:
- *		and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- *		nop
- * - Low mask:
- *		and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK
- *		and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK
- * - VHE:
- *		nop
- *		nop
- *
- * The "low mask" version works because the mask is a strict subset of
- * the "high mask", hence performing the first mask for nothing.
- * Should be completely invisible on any viable CPU.
+ * The actual code generation takes place in kvm_update_va_mask, and
+ * the instructions below are only there to reserve the space and
+ * perform the register allocation (kvm_update_va_mask uses the
+ * specific registers encoded in the instructions).
  */
 .macro kern_hyp_va	reg
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	and     \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
-alternative_else_nop_endif
-alternative_if ARM64_HYP_OFFSET_LOW
-	and     \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
-alternative_else_nop_endif
+alternative_cb kvm_update_va_mask
+	and     \reg, \reg, #1		/* mask with va_mask */
+	ror	\reg, \reg, #1		/* rotate to the first tag bit */
+	add	\reg, \reg, #0		/* insert the low 12 bits of the tag */
+	add	\reg, \reg, #0, lsl 12	/* insert the top 12 bits of the tag */
+	ror	\reg, \reg, #63		/* rotate back */
+alternative_cb_end
 .endm
 
 #else
@@ -113,24 +101,44 @@ alternative_else_nop_endif
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 
+void kvm_update_va_mask(struct alt_instr *alt,
+			__le32 *origptr, __le32 *updptr, int nr_inst);
+
 static inline unsigned long __kern_hyp_va(unsigned long v)
 {
-	asm volatile(ALTERNATIVE("and %0, %0, %1",
-				 "nop",
-				 ARM64_HAS_VIRT_HOST_EXTN)
-		     : "+r" (v)
-		     : "i" (HYP_PAGE_OFFSET_HIGH_MASK));
-	asm volatile(ALTERNATIVE("nop",
-				 "and %0, %0, %1",
-				 ARM64_HYP_OFFSET_LOW)
-		     : "+r" (v)
-		     : "i" (HYP_PAGE_OFFSET_LOW_MASK));
+	asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
+				    "ror %0, %0, #1\n"
+				    "add %0, %0, #0\n"
+				    "add %0, %0, #0, lsl 12\n"
+				    "ror %0, %0, #63\n",
+				    kvm_update_va_mask)
+		     : "+r" (v));
 	return v;
 }
 
 #define kern_hyp_va(v) 	((typeof(v))(__kern_hyp_va((unsigned long)(v))))
 
 /*
+ * Obtain the PC-relative address of a kernel symbol
+ * s: symbol
+ *
+ * The goal of this macro is to return a symbol's address based on a
+ * PC-relative computation, as opposed to a loading the VA from a
+ * constant pool or something similar. This works well for HYP, as an
+ * absolute VA is guaranteed to be wrong. Only use this if trying to
+ * obtain the address of a symbol (i.e. not something you obtained by
+ * following a pointer).
+ */
+#define hyp_symbol_addr(s)						\
+	({								\
+		typeof(s) *addr;					\
+		asm("adrp	%0, %1\n"				\
+		    "add	%0, %0, :lo12:%1\n"			\
+		    : "=r" (addr) : "S" (&s));				\
+		addr;							\
+	})
+
+/*
  * We currently only support a 40bit IPA.
  */
 #define KVM_PHYS_SHIFT	(40)
@@ -140,7 +148,11 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
 #include <asm/stage2_pgtable.h>
 
 int create_hyp_mappings(void *from, void *to, pgprot_t prot);
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t);
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+			   void __iomem **kaddr,
+			   void __iomem **haddr);
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+			     void **haddr);
 void free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
@@ -249,7 +261,7 @@ struct kvm;
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
-	return (vcpu_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
+	return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
 static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
@@ -348,36 +360,95 @@ static inline unsigned int kvm_get_vmid_bits(void)
 	return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
 }
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+/*
+ * EL2 vectors can be mapped and rerouted in a number of ways,
+ * depending on the kernel configuration and CPU present:
+ *
+ * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the
+ *   hardening sequence is placed in one of the vector slots, which is
+ *   executed before jumping to the real vectors.
+ *
+ * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the
+ *   ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the
+ *   hardening sequence is mapped next to the idmap page, and executed
+ *   before jumping to the real vectors.
+ *
+ * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an
+ *   empty slot is selected, mapped next to the idmap page, and
+ *   executed before jumping to the real vectors.
+ *
+ * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with
+ * VHE, as we don't have hypervisor-specific mappings. If the system
+ * is VHE and yet selects this capability, it will be ignored.
+ */
 #include <asm/mmu.h>
 
+extern void *__kvm_bp_vect_base;
+extern int __kvm_harden_el2_vector_slot;
+
 static inline void *kvm_get_hyp_vector(void)
 {
 	struct bp_hardening_data *data = arm64_get_bp_hardening_data();
-	void *vect = kvm_ksym_ref(__kvm_hyp_vector);
+	void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
+	int slot = -1;
 
-	if (data->fn) {
-		vect = __bp_harden_hyp_vecs_start +
-		       data->hyp_vectors_slot * SZ_2K;
+	if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) {
+		vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start));
+		slot = data->hyp_vectors_slot;
+	}
 
-		if (!has_vhe())
-			vect = lm_alias(vect);
+	if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) {
+		vect = __kvm_bp_vect_base;
+		if (slot == -1)
+			slot = __kvm_harden_el2_vector_slot;
 	}
 
+	if (slot != -1)
+		vect += slot * SZ_2K;
+
 	return vect;
 }
 
+/*  This is only called on a !VHE system */
 static inline int kvm_map_vectors(void)
 {
-	return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
-				   kvm_ksym_ref(__bp_harden_hyp_vecs_end),
-				   PAGE_HYP_EXEC);
-}
+	/*
+	 * HBP  = ARM64_HARDEN_BRANCH_PREDICTOR
+	 * HEL2 = ARM64_HARDEN_EL2_VECTORS
+	 *
+	 * !HBP + !HEL2 -> use direct vectors
+	 *  HBP + !HEL2 -> use hardened vectors in place
+	 * !HBP +  HEL2 -> allocate one vector slot and use exec mapping
+	 *  HBP +  HEL2 -> use hardened vertors and use exec mapping
+	 */
+	if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) {
+		__kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start);
+		__kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
+	}
+
+	if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+		phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs_start);
+		unsigned long size = (__bp_harden_hyp_vecs_end -
+				      __bp_harden_hyp_vecs_start);
+
+		/*
+		 * Always allocate a spare vector slot, as we don't
+		 * know yet which CPUs have a BP hardening slot that
+		 * we can reuse.
+		 */
+		__kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+		BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS);
+		return create_hyp_exec_mappings(vect_pa, size,
+						&__kvm_bp_vect_base);
+	}
 
+	return 0;
+}
 #else
 static inline void *kvm_get_hyp_vector(void)
 {
-	return kvm_ksym_ref(__kvm_hyp_vector);
+	return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
 }
 
 static inline int kvm_map_vectors(void)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 50fa96a49792..49d99214f43c 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -29,12 +29,6 @@
 #include <asm/sizes.h>
 
 /*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
-/*
  * Size of the PCI I/O space. This must remain a power of two so that
  * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.
  */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index a050d4f3615d..dd320df0d026 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -21,6 +21,8 @@
 #define USER_ASID_FLAG	(UL(1) << USER_ASID_BIT)
 #define TTBR_ASID_MASK	(UL(0xffff) << 48)
 
+#define BP_HARDEN_EL2_SLOTS 4
+
 #ifndef __ASSEMBLY__
 
 typedef struct {
@@ -49,9 +51,13 @@ struct bp_hardening_data {
 	bp_hardening_cb_t	fn;
 };
 
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) ||	\
+     defined(CONFIG_HARDEN_EL2_VECTORS))
 extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
+extern atomic_t arm64_el2_vector_last_slot;
+#endif  /* CONFIG_HARDEN_BRANCH_PREDICTOR || CONFIG_HARDEN_EL2_VECTORS */
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 
 static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index e7b9f154e476..6171178075dc 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -288,6 +288,12 @@
 #define SYS_MAIR_EL1			sys_reg(3, 0, 10, 2, 0)
 #define SYS_AMAIR_EL1			sys_reg(3, 0, 10, 3, 0)
 
+#define SYS_LORSA_EL1			sys_reg(3, 0, 10, 4, 0)
+#define SYS_LOREA_EL1			sys_reg(3, 0, 10, 4, 1)
+#define SYS_LORN_EL1			sys_reg(3, 0, 10, 4, 2)
+#define SYS_LORC_EL1			sys_reg(3, 0, 10, 4, 3)
+#define SYS_LORID_EL1			sys_reg(3, 0, 10, 4, 7)
+
 #define SYS_VBAR_EL1			sys_reg(3, 0, 12, 0, 0)
 #define SYS_DISR_EL1			sys_reg(3, 0, 12, 1, 1)
 
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6a4bd80c75bd..bf825f38d206 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -55,10 +55,6 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 arm64-obj-$(CONFIG_ARM_SDE_INTERFACE)	+= sdei.o
 
-ifeq ($(CONFIG_KVM),y)
-arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR)	+= bpi.o
-endif
-
 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)
 head-y					:= head.o
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 414288a558c8..5c4bce4ac381 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -107,32 +107,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
 	return insn;
 }
 
+static void patch_alternative(struct alt_instr *alt,
+			      __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	__le32 *replptr;
+	int i;
+
+	replptr = ALT_REPL_PTR(alt);
+	for (i = 0; i < nr_inst; i++) {
+		u32 insn;
+
+		insn = get_alt_insn(alt, origptr + i, replptr + i);
+		updptr[i] = cpu_to_le32(insn);
+	}
+}
+
 static void __apply_alternatives(void *alt_region, bool use_linear_alias)
 {
 	struct alt_instr *alt;
 	struct alt_region *region = alt_region;
-	__le32 *origptr, *replptr, *updptr;
+	__le32 *origptr, *updptr;
+	alternative_cb_t alt_cb;
 
 	for (alt = region->begin; alt < region->end; alt++) {
-		u32 insn;
-		int i, nr_inst;
+		int nr_inst;
 
-		if (!cpus_have_cap(alt->cpufeature))
+		/* Use ARM64_CB_PATCH as an unconditional patch */
+		if (alt->cpufeature < ARM64_CB_PATCH &&
+		    !cpus_have_cap(alt->cpufeature))
 			continue;
 
-		BUG_ON(alt->alt_len != alt->orig_len);
+		if (alt->cpufeature == ARM64_CB_PATCH)
+			BUG_ON(alt->alt_len != 0);
+		else
+			BUG_ON(alt->alt_len != alt->orig_len);
 
 		pr_info_once("patching kernel code\n");
 
 		origptr = ALT_ORIG_PTR(alt);
-		replptr = ALT_REPL_PTR(alt);
 		updptr = use_linear_alias ? lm_alias(origptr) : origptr;
-		nr_inst = alt->alt_len / sizeof(insn);
+		nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
 
-		for (i = 0; i < nr_inst; i++) {
-			insn = get_alt_insn(alt, origptr + i, replptr + i);
-			updptr[i] = cpu_to_le32(insn);
-		}
+		if (alt->cpufeature < ARM64_CB_PATCH)
+			alt_cb = patch_alternative;
+		else
+			alt_cb  = ALT_REPL_PTR(alt);
+
+		alt_cb(alt, origptr, updptr, nr_inst);
 
 		flush_icache_range((uintptr_t)origptr,
 				   (uintptr_t)(origptr + nr_inst));
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 1303e04110cd..5bdda651bd05 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/kvm_host.h>
+#include <linux/preempt.h>
 #include <linux/suspend.h>
 #include <asm/cpufeature.h>
 #include <asm/fixmap.h>
@@ -93,6 +94,8 @@ int main(void)
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
   BLANK();
+  DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET);
+  BLANK();
   DEFINE(CLOCK_REALTIME,	CLOCK_REALTIME);
   DEFINE(CLOCK_MONOTONIC,	CLOCK_MONOTONIC);
   DEFINE(CLOCK_MONOTONIC_RAW,	CLOCK_MONOTONIC_RAW);
@@ -138,6 +141,7 @@ int main(void)
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
   DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
+  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
deleted file mode 100644
index e5de33513b5d..000000000000
--- a/arch/arm64/kernel/bpi.S
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Contains CPU specific branch predictor invalidation sequences
- *
- * Copyright (C) 2018 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/arm-smccc.h>
-
-.macro ventry target
-	.rept 31
-	nop
-	.endr
-	b	\target
-.endm
-
-.macro vectors target
-	ventry \target + 0x000
-	ventry \target + 0x080
-	ventry \target + 0x100
-	ventry \target + 0x180
-
-	ventry \target + 0x200
-	ventry \target + 0x280
-	ventry \target + 0x300
-	ventry \target + 0x380
-
-	ventry \target + 0x400
-	ventry \target + 0x480
-	ventry \target + 0x500
-	ventry \target + 0x580
-
-	ventry \target + 0x600
-	ventry \target + 0x680
-	ventry \target + 0x700
-	ventry \target + 0x780
-.endm
-
-	.align	11
-ENTRY(__bp_harden_hyp_vecs_start)
-	.rept 4
-	vectors __kvm_hyp_vector
-	.endr
-ENTRY(__bp_harden_hyp_vecs_end)
-
-ENTRY(__qcom_hyp_sanitize_link_stack_start)
-	stp     x29, x30, [sp, #-16]!
-	.rept	16
-	bl	. + 4
-	.endr
-	ldp	x29, x30, [sp], #16
-ENTRY(__qcom_hyp_sanitize_link_stack_end)
-
-.macro smccc_workaround_1 inst
-	sub	sp, sp, #(8 * 4)
-	stp	x2, x3, [sp, #(8 * 0)]
-	stp	x0, x1, [sp, #(8 * 2)]
-	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_1
-	\inst	#0
-	ldp	x2, x3, [sp, #(8 * 0)]
-	ldp	x0, x1, [sp, #(8 * 2)]
-	add	sp, sp, #(8 * 4)
-.endm
-
-ENTRY(__smccc_workaround_1_smc_start)
-	smccc_workaround_1	smc
-ENTRY(__smccc_workaround_1_smc_end)
-
-ENTRY(__smccc_workaround_1_hvc_start)
-	smccc_workaround_1	hvc
-ENTRY(__smccc_workaround_1_hvc_end)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 2df792771053..a900befadfe8 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -78,19 +78,17 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
 	config_sctlr_el1(SCTLR_EL1_UCT, 0);
 }
 
+atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
+
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 
 DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 
-#ifdef CONFIG_KVM
-extern char __qcom_hyp_sanitize_link_stack_start[];
-extern char __qcom_hyp_sanitize_link_stack_end[];
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
 extern char __smccc_workaround_1_smc_start[];
 extern char __smccc_workaround_1_smc_end[];
-extern char __smccc_workaround_1_hvc_start[];
-extern char __smccc_workaround_1_hvc_end[];
 
 static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
 				const char *hyp_vecs_end)
@@ -108,7 +106,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
 				      const char *hyp_vecs_start,
 				      const char *hyp_vecs_end)
 {
-	static int last_slot = -1;
 	static DEFINE_SPINLOCK(bp_lock);
 	int cpu, slot = -1;
 
@@ -121,10 +118,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
 	}
 
 	if (slot == -1) {
-		last_slot++;
-		BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
-			/ SZ_2K) <= last_slot);
-		slot = last_slot;
+		slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+		BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
 		__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
 	}
 
@@ -133,12 +128,8 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
 	spin_unlock(&bp_lock);
 }
 #else
-#define __qcom_hyp_sanitize_link_stack_start	NULL
-#define __qcom_hyp_sanitize_link_stack_end	NULL
 #define __smccc_workaround_1_smc_start		NULL
 #define __smccc_workaround_1_smc_end		NULL
-#define __smccc_workaround_1_hvc_start		NULL
-#define __smccc_workaround_1_hvc_end		NULL
 
 static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
 				      const char *hyp_vecs_start,
@@ -146,7 +137,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
 {
 	__this_cpu_write(bp_hardening_data.fn, fn);
 }
-#endif	/* CONFIG_KVM */
+#endif	/* CONFIG_KVM_INDIRECT_VECTORS */
 
 static void  install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
 				     bp_hardening_cb_t fn,
@@ -179,12 +170,25 @@ static void call_hvc_arch_workaround_1(void)
 	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
+static void qcom_link_stack_sanitization(void)
+{
+	u64 tmp;
+
+	asm volatile("mov	%0, x30		\n"
+		     ".rept	16		\n"
+		     "bl	. + 4		\n"
+		     ".endr			\n"
+		     "mov	x30, %0		\n"
+		     : "=&r" (tmp));
+}
+
 static void
 enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
 {
 	bp_hardening_cb_t cb;
 	void *smccc_start, *smccc_end;
 	struct arm_smccc_res res;
+	u32 midr = read_cpuid_id();
 
 	if (!entry->matches(entry, SCOPE_LOCAL_CPU))
 		return;
@@ -199,8 +203,9 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
 		if ((int)res.a0 < 0)
 			return;
 		cb = call_hvc_arch_workaround_1;
-		smccc_start = __smccc_workaround_1_hvc_start;
-		smccc_end = __smccc_workaround_1_hvc_end;
+		/* This is a guest, no need to patch KVM vectors */
+		smccc_start = NULL;
+		smccc_end = NULL;
 		break;
 
 	case PSCI_CONDUIT_SMC:
@@ -217,30 +222,14 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
 		return;
 	}
 
+	if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+	    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
+		cb = qcom_link_stack_sanitization;
+
 	install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
 
 	return;
 }
-
-static void qcom_link_stack_sanitization(void)
-{
-	u64 tmp;
-
-	asm volatile("mov	%0, x30		\n"
-		     ".rept	16		\n"
-		     "bl	. + 4		\n"
-		     ".endr			\n"
-		     "mov	x30, %0		\n"
-		     : "=&r" (tmp));
-}
-
-static void
-qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry)
-{
-	install_bp_hardening_cb(entry, qcom_link_stack_sanitization,
-				__qcom_hyp_sanitize_link_stack_start,
-				__qcom_hyp_sanitize_link_stack_end);
-}
 #endif	/* CONFIG_HARDEN_BRANCH_PREDICTOR */
 
 #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max)	\
@@ -325,24 +314,18 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
 	MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
 	MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
-	{},
-};
-
-static const struct midr_range qcom_bp_harden_cpus[] = {
 	MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
 	MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
 	{},
 };
 
-static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = {
-	{
-		CAP_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
-		.cpu_enable = enable_smccc_arch_workaround_1,
-	},
-	{
-		CAP_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
-		.cpu_enable = qcom_enable_link_stack_sanitization,
-	},
+#endif
+
+#ifdef CONFIG_HARDEN_EL2_VECTORS
+
+static const struct midr_range arm64_harden_el2_vectors[] = {
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+	MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
 	{},
 };
 
@@ -492,13 +475,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 		.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
 		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
-		.matches = multi_entry_cap_matches,
-		.cpu_enable = multi_entry_cap_cpu_enable,
-		.match_list = arm64_bp_harden_list,
+		.cpu_enable = enable_smccc_arch_workaround_1,
+		ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
 	},
+#endif
+#ifdef CONFIG_HARDEN_EL2_VECTORS
 	{
-		.capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
-		ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus),
+		.desc = "EL2 vector hardening",
+		.capability = ARM64_HARDEN_EL2_VECTORS,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
 	},
 #endif
 	{
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 96b15d7b10a8..536d572e5596 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -838,19 +838,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _
 		MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK));
 }
 
-static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
-			   int __unused)
-{
-	phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
-
-	/*
-	 * Activate the lower HYP offset only if:
-	 * - the idmap doesn't clash with it,
-	 * - the kernel is not running at EL2.
-	 */
-	return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
-}
-
 static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
 {
 	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
@@ -1121,12 +1108,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
 		.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
 	},
-	{
-		.desc = "Reduced HYP mapping offset",
-		.capability = ARM64_HYP_OFFSET_LOW,
-		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
-		.matches = hyp_offset_low,
-	},
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	{
 		.desc = "Kernel page table isolation (KPTI)",
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2b6b8b24e5ab..b0853069702f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -577,6 +577,13 @@ set_hcr:
 7:
 	msr	mdcr_el2, x3			// Configure debug traps
 
+	/* LORegions */
+	mrs	x1, id_aa64mmfr1_el1
+	ubfx	x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4
+	cbz	x0, 1f
+	msr_s	SYS_LORC_EL1, xzr
+1:
+
 	/* Stage-2 translation */
 	msr	vttbr_el2, xzr
 
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2718a77da165..816d03c4c913 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -35,6 +35,7 @@
 
 #define AARCH64_INSN_SF_BIT	BIT(31)
 #define AARCH64_INSN_N_BIT	BIT(22)
+#define AARCH64_INSN_LSL_12	BIT(22)
 
 static int aarch64_insn_encoding_class[] = {
 	AARCH64_INSN_CLS_UNKNOWN,
@@ -343,6 +344,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
 		mask = BIT(6) - 1;
 		shift = 16;
 		break;
+	case AARCH64_INSN_IMM_N:
+		mask = 1;
+		shift = 22;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -899,9 +904,18 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
 		return AARCH64_BREAK_FAULT;
 	}
 
+	/* We can't encode more than a 24bit value (12bit + 12bit shift) */
+	if (imm & ~(BIT(24) - 1))
+		goto out;
+
+	/* If we have something in the top 12 bits... */
 	if (imm & ~(SZ_4K - 1)) {
-		pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
-		return AARCH64_BREAK_FAULT;
+		/* ... and in the low 12 bits -> error */
+		if (imm & (SZ_4K - 1))
+			goto out;
+
+		imm >>= 12;
+		insn |= AARCH64_INSN_LSL_12;
 	}
 
 	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
@@ -909,6 +923,10 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
 	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
 
 	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+
+out:
+	pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+	return AARCH64_BREAK_FAULT;
 }
 
 u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
@@ -1481,3 +1499,171 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
 	__check_hi, __check_ls, __check_ge, __check_lt,
 	__check_gt, __check_le, __check_al, __check_al
 };
+
+static bool range_of_ones(u64 val)
+{
+	/* Doesn't handle full ones or full zeroes */
+	u64 sval = val >> __ffs64(val);
+
+	/* One of Sean Eron Anderson's bithack tricks */
+	return ((sval + 1) & (sval)) == 0;
+}
+
+static u32 aarch64_encode_immediate(u64 imm,
+				    enum aarch64_insn_variant variant,
+				    u32 insn)
+{
+	unsigned int immr, imms, n, ones, ror, esz, tmp;
+	u64 mask = ~0UL;
+
+	/* Can't encode full zeroes or full ones */
+	if (!imm || !~imm)
+		return AARCH64_BREAK_FAULT;
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		if (upper_32_bits(imm))
+			return AARCH64_BREAK_FAULT;
+		esz = 32;
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		insn |= AARCH64_INSN_SF_BIT;
+		esz = 64;
+		break;
+	default:
+		pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	/*
+	 * Inverse of Replicate(). Try to spot a repeating pattern
+	 * with a pow2 stride.
+	 */
+	for (tmp = esz / 2; tmp >= 2; tmp /= 2) {
+		u64 emask = BIT(tmp) - 1;
+
+		if ((imm & emask) != ((imm >> tmp) & emask))
+			break;
+
+		esz = tmp;
+		mask = emask;
+	}
+
+	/* N is only set if we're encoding a 64bit value */
+	n = esz == 64;
+
+	/* Trim imm to the element size */
+	imm &= mask;
+
+	/* That's how many ones we need to encode */
+	ones = hweight64(imm);
+
+	/*
+	 * imms is set to (ones - 1), prefixed with a string of ones
+	 * and a zero if they fit. Cap it to 6 bits.
+	 */
+	imms  = ones - 1;
+	imms |= 0xf << ffs(esz);
+	imms &= BIT(6) - 1;
+
+	/* Compute the rotation */
+	if (range_of_ones(imm)) {
+		/*
+		 * Pattern: 0..01..10..0
+		 *
+		 * Compute how many rotate we need to align it right
+		 */
+		ror = __ffs64(imm);
+	} else {
+		/*
+		 * Pattern: 0..01..10..01..1
+		 *
+		 * Fill the unused top bits with ones, and check if
+		 * the result is a valid immediate (all ones with a
+		 * contiguous ranges of zeroes).
+		 */
+		imm |= ~mask;
+		if (!range_of_ones(~imm))
+			return AARCH64_BREAK_FAULT;
+
+		/*
+		 * Compute the rotation to get a continuous set of
+		 * ones, with the first bit set at position 0
+		 */
+		ror = fls(~imm);
+	}
+
+	/*
+	 * immr is the number of bits we need to rotate back to the
+	 * original set of ones. Note that this is relative to the
+	 * element size...
+	 */
+	immr = (esz - ror) % esz;
+
+	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n);
+	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+				       enum aarch64_insn_variant variant,
+				       enum aarch64_insn_register Rn,
+				       enum aarch64_insn_register Rd,
+				       u64 imm)
+{
+	u32 insn;
+
+	switch (type) {
+	case AARCH64_INSN_LOGIC_AND:
+		insn = aarch64_insn_get_and_imm_value();
+		break;
+	case AARCH64_INSN_LOGIC_ORR:
+		insn = aarch64_insn_get_orr_imm_value();
+		break;
+	case AARCH64_INSN_LOGIC_EOR:
+		insn = aarch64_insn_get_eor_imm_value();
+		break;
+	case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+		insn = aarch64_insn_get_ands_imm_value();
+		break;
+	default:
+		pr_err("%s: unknown logical encoding %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+	return aarch64_encode_immediate(imm, variant, insn);
+}
+
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+			  enum aarch64_insn_register Rm,
+			  enum aarch64_insn_register Rn,
+			  enum aarch64_insn_register Rd,
+			  u8 lsb)
+{
+	u32 insn;
+
+	insn = aarch64_insn_get_extr_value();
+
+	switch (variant) {
+	case AARCH64_INSN_VARIANT_32BIT:
+		if (lsb > 31)
+			return AARCH64_BREAK_FAULT;
+		break;
+	case AARCH64_INSN_VARIANT_64BIT:
+		if (lsb > 63)
+			return AARCH64_BREAK_FAULT;
+		insn |= AARCH64_INSN_SF_BIT;
+		insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1);
+		break;
+	default:
+		pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb);
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+	return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
+}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 2257dfcc44cc..a2e3a5af1113 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -57,6 +57,9 @@ config KVM_ARM_PMU
 	  Adds support for a virtual Performance Monitoring Unit (PMU) in
 	  virtual machines.
 
+config KVM_INDIRECT_VECTORS
+       def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS)
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 87c4f7ae24de..93afff91cb7c 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o
+kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o
 kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index fa63b28c65e0..a1f4ebdfe6d3 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -46,7 +46,9 @@ static DEFINE_PER_CPU(u32, mdcr_el2);
  */
 static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.guest_debug_preserved.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
+	u64 val = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+
+	vcpu->arch.guest_debug_preserved.mdscr_el1 = val;
 
 	trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
 				vcpu->arch.guest_debug_preserved.mdscr_el1);
@@ -54,10 +56,12 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
 
 static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
 {
-	vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_preserved.mdscr_el1;
+	u64 val = vcpu->arch.guest_debug_preserved.mdscr_el1;
+
+	vcpu_write_sys_reg(vcpu, val, MDSCR_EL1);
 
 	trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
-				vcpu_sys_reg(vcpu, MDSCR_EL1));
+				vcpu_read_sys_reg(vcpu, MDSCR_EL1));
 }
 
 /**
@@ -108,6 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 {
 	bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
+	unsigned long mdscr;
 
 	trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
 
@@ -152,9 +157,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 		 */
 		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
 			*vcpu_cpsr(vcpu) |=  DBG_SPSR_SS;
-			vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_SS;
+			mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+			mdscr |= DBG_MDSCR_SS;
+			vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
 		} else {
-			vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
+			mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+			mdscr &= ~DBG_MDSCR_SS;
+			vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
 		}
 
 		trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
@@ -170,7 +179,9 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 		 */
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW) {
 			/* Enable breakpoints/watchpoints */
-			vcpu_sys_reg(vcpu, MDSCR_EL1) |= DBG_MDSCR_MDE;
+			mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+			mdscr |= DBG_MDSCR_MDE;
+			vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
 
 			vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
 			vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
@@ -193,8 +204,12 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
 	if (trap_debug)
 		vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
 
+	/* If KDE or MDE are set, perform a full save/restore cycle. */
+	if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
 	trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
-	trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
+	trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
 }
 
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 5aa9ccf6db99..6fd91b31a131 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -117,7 +117,6 @@ CPU_BE(	orr	x4, x4, #SCTLR_ELx_EE)
 	/* Set the stack and new vectors */
 	kern_hyp_va	x1
 	mov	sp, x1
-	kern_hyp_va	x2
 	msr	vbar_el2, x2
 
 	/* copy tpidr_el1 into tpidr_el2 for use by HYP */
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index f04400d494b7..4313f7475333 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -7,10 +7,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 KVM=../../../../virt/kvm
 
-obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o
 
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o
 obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index dabb5cc7b087..3e717f66f011 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -66,11 +66,6 @@
 	default:	write_debug(ptr[0], reg, 0);			\
 	}
 
-static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1)
-{
-	/* The vcpu can run. but it can't hide. */
-}
-
 static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 {
 	u64 reg;
@@ -103,11 +98,7 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 	dsb(nsh);
 }
 
-static hyp_alternate_select(__debug_save_spe,
-			    __debug_save_spe_nvhe, __debug_save_spe_vhe,
-			    ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
+static void __hyp_text __debug_restore_spe_nvhe(u64 pmscr_el1)
 {
 	if (!pmscr_el1)
 		return;
@@ -119,16 +110,13 @@ static void __hyp_text __debug_restore_spe(u64 pmscr_el1)
 	write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
-void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
-				   struct kvm_guest_debug_arch *dbg,
-				   struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+					  struct kvm_guest_debug_arch *dbg,
+					  struct kvm_cpu_context *ctxt)
 {
 	u64 aa64dfr0;
 	int brps, wrps;
 
-	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
-		return;
-
 	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
 	brps = (aa64dfr0 >> 12) & 0xf;
 	wrps = (aa64dfr0 >> 20) & 0xf;
@@ -141,16 +129,13 @@ void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
 	ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
 }
 
-void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
-				      struct kvm_guest_debug_arch *dbg,
-				      struct kvm_cpu_context *ctxt)
+static void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+					     struct kvm_guest_debug_arch *dbg,
+					     struct kvm_cpu_context *ctxt)
 {
 	u64 aa64dfr0;
 	int brps, wrps;
 
-	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
-		return;
-
 	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
 
 	brps = (aa64dfr0 >> 12) & 0xf;
@@ -164,27 +149,54 @@ void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
 	write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
 }
 
-void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu)
 {
-	/* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
-	 * a full save/restore cycle. */
-	if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
-	    (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
-		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
-
-	__debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs,
-			   kern_hyp_va(vcpu->arch.host_cpu_context));
-	__debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1);
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	struct kvm_guest_debug_arch *host_dbg;
+	struct kvm_guest_debug_arch *guest_dbg;
+
+	/*
+	 * Non-VHE: Disable and flush SPE data generation
+	 * VHE: The vcpu can run, but it can't hide.
+	 */
+	if (!has_vhe())
+		__debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1);
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+	host_dbg = &vcpu->arch.host_debug_state.regs;
+	guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+	__debug_save_state(vcpu, host_dbg, host_ctxt);
+	__debug_restore_state(vcpu, guest_dbg, guest_ctxt);
 }
 
-void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
+void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu)
 {
-	__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
-	__debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs,
-			      kern_hyp_va(vcpu->arch.host_cpu_context));
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	struct kvm_guest_debug_arch *host_dbg;
+	struct kvm_guest_debug_arch *guest_dbg;
+
+	if (!has_vhe())
+		__debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1);
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+	host_dbg = &vcpu->arch.host_debug_state.regs;
+	guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
+
+	__debug_save_state(vcpu, guest_dbg, guest_ctxt);
+	__debug_restore_state(vcpu, host_dbg, host_ctxt);
 
-	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
-		vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+	vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
 }
 
 u32 __hyp_text __kvm_get_mdcr_el2(void)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fdd1068ee3a5..e41a161d313a 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,9 +62,6 @@ ENTRY(__guest_enter)
 	// Store the host regs
 	save_callee_saved_regs x1
 
-	// Store host_ctxt and vcpu for use at exit time
-	stp	x1, x0, [sp, #-16]!
-
 	add	x18, x0, #VCPU_CONTEXT
 
 	// Restore guest regs x0-x17
@@ -118,8 +115,7 @@ ENTRY(__guest_exit)
 	// Store the guest regs x19-x29, lr
 	save_callee_saved_regs x1
 
-	// Restore the host_ctxt from the stack
-	ldr	x2, [sp], #16
+	get_host_ctxt	x2, x3
 
 	// Now restore the host regs
 	restore_callee_saved_regs x2
@@ -213,15 +209,3 @@ alternative_endif
 
 	eret
 ENDPROC(__fpsimd_guest_restore)
-
-ENTRY(__qcom_hyp_sanitize_btac_predictors)
-	/**
-	 * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700)
-	 * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls
-	 * b15-b0: contains SiP functionID
-	 */
-	movz    x0, #0x1700
-	movk    x0, #0xc200, lsl #16
-	smc     #0
-	ret
-ENDPROC(__qcom_hyp_sanitize_btac_predictors)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index f36464bd57c5..bffece27b5c1 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2015 - ARM Ltd
+ * Copyright (C) 2015-2018 - ARM Ltd
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -24,6 +24,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/mmu.h>
 
 	.text
 	.pushsection	.hyp.text, "ax"
@@ -55,15 +56,9 @@ ENTRY(__vhe_hyp_call)
 ENDPROC(__vhe_hyp_call)
 
 el1_sync:				// Guest trapped into EL2
-	stp	x0, x1, [sp, #-16]!
-
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x1, esr_el2
-alternative_else
-	mrs	x1, esr_el1
-alternative_endif
-	lsr	x0, x1, #ESR_ELx_EC_SHIFT
 
+	mrs	x0, esr_el2
+	lsr	x0, x0, #ESR_ELx_EC_SHIFT
 	cmp	x0, #ESR_ELx_EC_HVC64
 	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
 	b.ne	el1_trap
@@ -117,10 +112,14 @@ el1_hvc_guest:
 	eret
 
 el1_trap:
+	get_vcpu_ptr	x1, x0
+
+	mrs		x0, esr_el2
+	lsr		x0, x0, #ESR_ELx_EC_SHIFT
 	/*
 	 * x0: ESR_EC
+	 * x1: vcpu pointer
 	 */
-	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
 
 	/*
 	 * We trap the first access to the FP/SIMD to save the host context
@@ -137,18 +136,18 @@ alternative_else_nop_endif
 	b	__guest_exit
 
 el1_irq:
-	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_vcpu_ptr	x1, x0
 	mov	x0, #ARM_EXCEPTION_IRQ
 	b	__guest_exit
 
 el1_error:
-	stp     x0, x1, [sp, #-16]!
-	ldr	x1, [sp, #16 + 8]
+	get_vcpu_ptr	x1, x0
 	mov	x0, #ARM_EXCEPTION_EL1_SERROR
 	b	__guest_exit
 
 el2_error:
+	ldp	x0, x1, [sp], #16
+
 	/*
 	 * Only two possibilities:
 	 * 1) Either we come from the exit path, having just unmasked
@@ -180,14 +179,7 @@ ENTRY(__hyp_do_panic)
 ENDPROC(__hyp_do_panic)
 
 ENTRY(__hyp_panic)
-	/*
-	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
-	 * not be accessible by this address from EL2, hyp_panic() converts
-	 * it with kern_hyp_va() before use.
-	 */
-	ldr	x0, =kvm_host_cpu_state
-	mrs	x1, tpidr_el2
-	add	x0, x0, x1
+	get_host_ctxt x0, x1
 	b	hyp_panic
 ENDPROC(__hyp_panic)
 
@@ -206,32 +198,104 @@ ENDPROC(\label)
 	invalid_vector	el2h_sync_invalid
 	invalid_vector	el2h_irq_invalid
 	invalid_vector	el2h_fiq_invalid
-	invalid_vector	el1_sync_invalid
-	invalid_vector	el1_irq_invalid
 	invalid_vector	el1_fiq_invalid
 
 	.ltorg
 
 	.align 11
 
+.macro valid_vect target
+	.align 7
+	stp	x0, x1, [sp, #-16]!
+	b	\target
+.endm
+
+.macro invalid_vect target
+	.align 7
+	b	\target
+	ldp	x0, x1, [sp], #16
+	b	\target
+.endm
+
 ENTRY(__kvm_hyp_vector)
-	ventry	el2t_sync_invalid		// Synchronous EL2t
-	ventry	el2t_irq_invalid		// IRQ EL2t
-	ventry	el2t_fiq_invalid		// FIQ EL2t
-	ventry	el2t_error_invalid		// Error EL2t
-
-	ventry	el2h_sync_invalid		// Synchronous EL2h
-	ventry	el2h_irq_invalid		// IRQ EL2h
-	ventry	el2h_fiq_invalid		// FIQ EL2h
-	ventry	el2_error			// Error EL2h
-
-	ventry	el1_sync			// Synchronous 64-bit EL1
-	ventry	el1_irq				// IRQ 64-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
-	ventry	el1_error			// Error 64-bit EL1
-
-	ventry	el1_sync			// Synchronous 32-bit EL1
-	ventry	el1_irq				// IRQ 32-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
-	ventry	el1_error			// Error 32-bit EL1
+	invalid_vect	el2t_sync_invalid	// Synchronous EL2t
+	invalid_vect	el2t_irq_invalid	// IRQ EL2t
+	invalid_vect	el2t_fiq_invalid	// FIQ EL2t
+	invalid_vect	el2t_error_invalid	// Error EL2t
+
+	invalid_vect	el2h_sync_invalid	// Synchronous EL2h
+	invalid_vect	el2h_irq_invalid	// IRQ EL2h
+	invalid_vect	el2h_fiq_invalid	// FIQ EL2h
+	valid_vect	el2_error		// Error EL2h
+
+	valid_vect	el1_sync		// Synchronous 64-bit EL1
+	valid_vect	el1_irq			// IRQ 64-bit EL1
+	invalid_vect	el1_fiq_invalid		// FIQ 64-bit EL1
+	valid_vect	el1_error		// Error 64-bit EL1
+
+	valid_vect	el1_sync		// Synchronous 32-bit EL1
+	valid_vect	el1_irq			// IRQ 32-bit EL1
+	invalid_vect	el1_fiq_invalid		// FIQ 32-bit EL1
+	valid_vect	el1_error		// Error 32-bit EL1
 ENDPROC(__kvm_hyp_vector)
+
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+.macro hyp_ventry
+	.align 7
+1:	.rept 27
+	nop
+	.endr
+/*
+ * The default sequence is to directly branch to the KVM vectors,
+ * using the computed offset. This applies for VHE as well as
+ * !ARM64_HARDEN_EL2_VECTORS.
+ *
+ * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
+ * with:
+ *
+ * stp	x0, x1, [sp, #-16]!
+ * movz	x0, #(addr & 0xffff)
+ * movk	x0, #((addr >> 16) & 0xffff), lsl #16
+ * movk	x0, #((addr >> 32) & 0xffff), lsl #32
+ * br	x0
+ *
+ * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
+ * See kvm_patch_vector_branch for details.
+ */
+alternative_cb	kvm_patch_vector_branch
+	b	__kvm_hyp_vector + (1b - 0b)
+	nop
+	nop
+	nop
+	nop
+alternative_cb_end
+.endm
+
+.macro generate_vectors
+0:
+	.rept 16
+	hyp_ventry
+	.endr
+	.org 0b + SZ_2K		// Safety measure
+.endm
+
+	.align	11
+ENTRY(__bp_harden_hyp_vecs_start)
+	.rept BP_HARDEN_EL2_SLOTS
+	generate_vectors
+	.endr
+ENTRY(__bp_harden_hyp_vecs_end)
+
+	.popsection
+
+ENTRY(__smccc_workaround_1_smc_start)
+	sub	sp, sp, #(8 * 4)
+	stp	x2, x3, [sp, #(8 * 0)]
+	stp	x0, x1, [sp, #(8 * 2)]
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_1
+	smc	#0
+	ldp	x2, x3, [sp, #(8 * 0)]
+	ldp	x0, x1, [sp, #(8 * 2)]
+	add	sp, sp, #(8 * 4)
+ENTRY(__smccc_workaround_1_smc_end)
+#endif
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 870f4b1587f9..d9645236e474 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -33,49 +33,22 @@ static bool __hyp_text __fpsimd_enabled_nvhe(void)
 	return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
 }
 
-static bool __hyp_text __fpsimd_enabled_vhe(void)
+static bool fpsimd_enabled_vhe(void)
 {
 	return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
 }
 
-static hyp_alternate_select(__fpsimd_is_enabled,
-			    __fpsimd_enabled_nvhe, __fpsimd_enabled_vhe,
-			    ARM64_HAS_VIRT_HOST_EXTN);
-
-bool __hyp_text __fpsimd_enabled(void)
-{
-	return __fpsimd_is_enabled()();
-}
-
-static void __hyp_text __activate_traps_vhe(void)
-{
-	u64 val;
-
-	val = read_sysreg(cpacr_el1);
-	val |= CPACR_EL1_TTA;
-	val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
-	write_sysreg(val, cpacr_el1);
-
-	write_sysreg(kvm_get_hyp_vector(), vbar_el1);
-}
-
-static void __hyp_text __activate_traps_nvhe(void)
+/* Save the 32-bit only FPSIMD system register state */
+static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
 {
-	u64 val;
+	if (!vcpu_el1_is_32bit(vcpu))
+		return;
 
-	val = CPTR_EL2_DEFAULT;
-	val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
-	write_sysreg(val, cptr_el2);
+	vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
 }
 
-static hyp_alternate_select(__activate_traps_arch,
-			    __activate_traps_nvhe, __activate_traps_vhe,
-			    ARM64_HAS_VIRT_HOST_EXTN);
-
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
 {
-	u64 val;
-
 	/*
 	 * We are about to set CPTR_EL2.TFP to trap all floating point
 	 * register accesses to EL2, however, the ARM ARM clearly states that
@@ -85,23 +58,17 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 	 * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
 	 * it will cause an exception.
 	 */
-	val = vcpu->arch.hcr_el2;
-
-	if (!(val & HCR_RW) && system_supports_fpsimd()) {
+	if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
 		write_sysreg(1 << 30, fpexc32_el2);
 		isb();
 	}
+}
 
-	if (val & HCR_RW) /* for AArch64 only: */
-		val |= HCR_TID3; /* TID3: trap feature register accesses */
-
-	write_sysreg(val, hcr_el2);
-
-	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE))
-		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
-	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+	/* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
 	write_sysreg(1 << 15, hstr_el2);
+
 	/*
 	 * Make sure we trap PMU access from EL0 to EL2. Also sanitize
 	 * PMSELR_EL0 to make sure it never contains the cycle
@@ -111,19 +78,56 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 	write_sysreg(0, pmselr_el0);
 	write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
 	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-	__activate_traps_arch()();
 }
 
-static void __hyp_text __deactivate_traps_vhe(void)
+static void __hyp_text __deactivate_traps_common(void)
 {
-	extern char vectors[];	/* kernel exception vectors */
-	u64 mdcr_el2 = read_sysreg(mdcr_el2);
+	write_sysreg(0, hstr_el2);
+	write_sysreg(0, pmuserenr_el0);
+}
 
-	mdcr_el2 &= MDCR_EL2_HPMN_MASK |
-		    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
-		    MDCR_EL2_TPMS;
+static void activate_traps_vhe(struct kvm_vcpu *vcpu)
+{
+	u64 val;
 
-	write_sysreg(mdcr_el2, mdcr_el2);
+	val = read_sysreg(cpacr_el1);
+	val |= CPACR_EL1_TTA;
+	val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+	write_sysreg(val, cpacr_el1);
+
+	write_sysreg(kvm_get_hyp_vector(), vbar_el1);
+}
+
+static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	__activate_traps_common(vcpu);
+
+	val = CPTR_EL2_DEFAULT;
+	val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+	write_sysreg(val, cptr_el2);
+}
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+	u64 hcr = vcpu->arch.hcr_el2;
+
+	write_sysreg(hcr, hcr_el2);
+
+	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
+	__activate_traps_fpsimd32(vcpu);
+	if (has_vhe())
+		activate_traps_vhe(vcpu);
+	else
+		__activate_traps_nvhe(vcpu);
+}
+
+static void deactivate_traps_vhe(void)
+{
+	extern char vectors[];	/* kernel exception vectors */
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 	write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
 	write_sysreg(vectors, vbar_el1);
@@ -133,6 +137,8 @@ static void __hyp_text __deactivate_traps_nvhe(void)
 {
 	u64 mdcr_el2 = read_sysreg(mdcr_el2);
 
+	__deactivate_traps_common();
+
 	mdcr_el2 &= MDCR_EL2_HPMN_MASK;
 	mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
 
@@ -141,10 +147,6 @@ static void __hyp_text __deactivate_traps_nvhe(void)
 	write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
 
-static hyp_alternate_select(__deactivate_traps_arch,
-			    __deactivate_traps_nvhe, __deactivate_traps_vhe,
-			    ARM64_HAS_VIRT_HOST_EXTN);
-
 static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 {
 	/*
@@ -156,14 +158,32 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.hcr_el2 & HCR_VSE)
 		vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
 
-	__deactivate_traps_arch()();
-	write_sysreg(0, hstr_el2);
-	write_sysreg(0, pmuserenr_el0);
+	if (has_vhe())
+		deactivate_traps_vhe();
+	else
+		__deactivate_traps_nvhe();
+}
+
+void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
+{
+	__activate_traps_common(vcpu);
+}
+
+void deactivate_traps_vhe_put(void)
+{
+	u64 mdcr_el2 = read_sysreg(mdcr_el2);
+
+	mdcr_el2 &= MDCR_EL2_HPMN_MASK |
+		    MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT |
+		    MDCR_EL2_TPMS;
+
+	write_sysreg(mdcr_el2, mdcr_el2);
+
+	__deactivate_traps_common();
 }
 
-static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+static void __hyp_text __activate_vm(struct kvm *kvm)
 {
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
 	write_sysreg(kvm->arch.vttbr, vttbr_el2);
 }
 
@@ -172,29 +192,22 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
 	write_sysreg(0, vttbr_el2);
 }
 
-static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
 {
-	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
 		__vgic_v3_save_state(vcpu);
-	else
-		__vgic_v2_save_state(vcpu);
-
-	write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
+		__vgic_v3_deactivate_traps(vcpu);
+	}
 }
 
-static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
 {
-	u64 val;
-
-	val = read_sysreg(hcr_el2);
-	val |= 	HCR_INT_OVERRIDE;
-	val |= vcpu->arch.irq_lines;
-	write_sysreg(val, hcr_el2);
-
-	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+	if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+		__vgic_v3_activate_traps(vcpu);
 		__vgic_v3_restore_state(vcpu);
-	else
-		__vgic_v2_restore_state(vcpu);
+	}
 }
 
 static bool __hyp_text __true_value(void)
@@ -305,54 +318,27 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	}
 }
 
-int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
-	struct kvm_cpu_context *host_ctxt;
-	struct kvm_cpu_context *guest_ctxt;
-	bool fp_enabled;
-	u64 exit_code;
-
-	vcpu = kern_hyp_va(vcpu);
-
-	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
-	host_ctxt->__hyp_running_vcpu = vcpu;
-	guest_ctxt = &vcpu->arch.ctxt;
-
-	__sysreg_save_host_state(host_ctxt);
-	__debug_cond_save_host_state(vcpu);
-
-	__activate_traps(vcpu);
-	__activate_vm(vcpu);
-
-	__vgic_restore_state(vcpu);
-	__timer_enable_traps(vcpu);
-
-	/*
-	 * We must restore the 32-bit state before the sysregs, thanks
-	 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
-	 */
-	__sysreg32_restore_state(vcpu);
-	__sysreg_restore_guest_state(guest_ctxt);
-	__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
-
-	/* Jump in the fire! */
-again:
-	exit_code = __guest_enter(vcpu, host_ctxt);
-	/* And we're baaack! */
-
-	if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ)
+	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
 		vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
+
 	/*
 	 * We're using the raw exception code in order to only process
 	 * the trap if no SError is pending. We will come back to the
 	 * same PC once the SError has been injected, and replay the
 	 * trapping instruction.
 	 */
-	if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
-		goto again;
+	if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+		return true;
 
 	if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
-	    exit_code == ARM_EXCEPTION_TRAP) {
+	    *exit_code == ARM_EXCEPTION_TRAP) {
 		bool valid;
 
 		valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@ -366,9 +352,9 @@ again:
 
 			if (ret == 1) {
 				if (__skip_instr(vcpu))
-					goto again;
+					return true;
 				else
-					exit_code = ARM_EXCEPTION_TRAP;
+					*exit_code = ARM_EXCEPTION_TRAP;
 			}
 
 			if (ret == -1) {
@@ -380,62 +366,135 @@ again:
 				 */
 				if (!__skip_instr(vcpu))
 					*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
-				exit_code = ARM_EXCEPTION_EL1_SERROR;
+				*exit_code = ARM_EXCEPTION_EL1_SERROR;
 			}
-
-			/* 0 falls through to be handler out of EL2 */
 		}
 	}
 
 	if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
-	    exit_code == ARM_EXCEPTION_TRAP &&
+	    *exit_code == ARM_EXCEPTION_TRAP &&
 	    (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
 	     kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
 		int ret = __vgic_v3_perform_cpuif_access(vcpu);
 
 		if (ret == 1) {
 			if (__skip_instr(vcpu))
-				goto again;
+				return true;
 			else
-				exit_code = ARM_EXCEPTION_TRAP;
+				*exit_code = ARM_EXCEPTION_TRAP;
 		}
-
-		/* 0 falls through to be handled out of EL2 */
 	}
 
-	if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
-		u32 midr = read_cpuid_id();
+	/* Return to the host kernel and handle the exit */
+	return false;
+}
 
-		/* Apply BTAC predictors mitigation to all Falkor chips */
-		if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
-		    ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
-			__qcom_hyp_sanitize_btac_predictors();
-		}
+/* Switch to the guest for VHE systems running in EL2 */
+int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	bool fp_enabled;
+	u64 exit_code;
+
+	host_ctxt = vcpu->arch.host_cpu_context;
+	host_ctxt->__hyp_running_vcpu = vcpu;
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	sysreg_save_host_state_vhe(host_ctxt);
+
+	__activate_traps(vcpu);
+	__activate_vm(vcpu->kvm);
+
+	sysreg_restore_guest_state_vhe(guest_ctxt);
+	__debug_switch_to_guest(vcpu);
+
+	do {
+		/* Jump in the fire! */
+		exit_code = __guest_enter(vcpu, host_ctxt);
+
+		/* And we're baaack! */
+	} while (fixup_guest_exit(vcpu, &exit_code));
+
+	fp_enabled = fpsimd_enabled_vhe();
+
+	sysreg_save_guest_state_vhe(guest_ctxt);
+
+	__deactivate_traps(vcpu);
+
+	sysreg_restore_host_state_vhe(host_ctxt);
+
+	if (fp_enabled) {
+		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
+		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+		__fpsimd_save_fpexc32(vcpu);
 	}
 
-	fp_enabled = __fpsimd_enabled();
+	__debug_switch_to_host(vcpu);
+
+	return exit_code;
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	bool fp_enabled;
+	u64 exit_code;
+
+	vcpu = kern_hyp_va(vcpu);
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	host_ctxt->__hyp_running_vcpu = vcpu;
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__sysreg_save_state_nvhe(host_ctxt);
+
+	__activate_traps(vcpu);
+	__activate_vm(kern_hyp_va(vcpu->kvm));
+
+	__hyp_vgic_restore_state(vcpu);
+	__timer_enable_traps(vcpu);
+
+	/*
+	 * We must restore the 32-bit state before the sysregs, thanks
+	 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+	 */
+	__sysreg32_restore_state(vcpu);
+	__sysreg_restore_state_nvhe(guest_ctxt);
+	__debug_switch_to_guest(vcpu);
+
+	do {
+		/* Jump in the fire! */
+		exit_code = __guest_enter(vcpu, host_ctxt);
+
+		/* And we're baaack! */
+	} while (fixup_guest_exit(vcpu, &exit_code));
 
-	__sysreg_save_guest_state(guest_ctxt);
+	fp_enabled = __fpsimd_enabled_nvhe();
+
+	__sysreg_save_state_nvhe(guest_ctxt);
 	__sysreg32_save_state(vcpu);
 	__timer_disable_traps(vcpu);
-	__vgic_save_state(vcpu);
+	__hyp_vgic_save_state(vcpu);
 
 	__deactivate_traps(vcpu);
 	__deactivate_vm(vcpu);
 
-	__sysreg_restore_host_state(host_ctxt);
+	__sysreg_restore_state_nvhe(host_ctxt);
 
 	if (fp_enabled) {
 		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
 		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+		__fpsimd_save_fpexc32(vcpu);
 	}
 
-	__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
 	/*
 	 * This must come after restoring the host sysregs, since a non-VHE
 	 * system may enable SPE here and make use of the TTBRs.
 	 */
-	__debug_cond_restore_host_state(vcpu);
+	__debug_switch_to_host(vcpu);
 
 	return exit_code;
 }
@@ -443,10 +502,20 @@ again:
 static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
 
 static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
-					     struct kvm_vcpu *vcpu)
+					     struct kvm_cpu_context *__host_ctxt)
 {
+	struct kvm_vcpu *vcpu;
 	unsigned long str_va;
 
+	vcpu = __host_ctxt->__hyp_running_vcpu;
+
+	if (read_sysreg(vttbr_el2)) {
+		__timer_disable_traps(vcpu);
+		__deactivate_traps(vcpu);
+		__deactivate_vm(vcpu);
+		__sysreg_restore_state_nvhe(__host_ctxt);
+	}
+
 	/*
 	 * Force the panic string to be loaded from the literal pool,
 	 * making sure it is a kernel address and not a PC-relative
@@ -460,40 +529,31 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
 		       read_sysreg(hpfar_el2), par, vcpu);
 }
 
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
-					    struct kvm_vcpu *vcpu)
+static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+				 struct kvm_cpu_context *host_ctxt)
 {
+	struct kvm_vcpu *vcpu;
+	vcpu = host_ctxt->__hyp_running_vcpu;
+
+	__deactivate_traps(vcpu);
+	sysreg_restore_host_state_vhe(host_ctxt);
+
 	panic(__hyp_panic_string,
 	      spsr,  elr,
 	      read_sysreg_el2(esr),   read_sysreg_el2(far),
 	      read_sysreg(hpfar_el2), par, vcpu);
 }
 
-static hyp_alternate_select(__hyp_call_panic,
-			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
-			    ARM64_HAS_VIRT_HOST_EXTN);
-
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
 {
-	struct kvm_vcpu *vcpu = NULL;
-
 	u64 spsr = read_sysreg_el2(spsr);
 	u64 elr = read_sysreg_el2(elr);
 	u64 par = read_sysreg(par_el1);
 
-	if (read_sysreg(vttbr_el2)) {
-		struct kvm_cpu_context *host_ctxt;
-
-		host_ctxt = kern_hyp_va(__host_ctxt);
-		vcpu = host_ctxt->__hyp_running_vcpu;
-		__timer_disable_traps(vcpu);
-		__deactivate_traps(vcpu);
-		__deactivate_vm(vcpu);
-		__sysreg_restore_host_state(host_ctxt);
-	}
-
-	/* Call panic for real */
-	__hyp_call_panic()(spsr, elr, par, vcpu);
+	if (!has_vhe())
+		__hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
+	else
+		__hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
 
 	unreachable();
 }
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 2c17afd2be96..b3894df6bf1a 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -19,32 +19,43 @@
 #include <linux/kvm_host.h>
 
 #include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 
-/* Yes, this does nothing, on purpose */
-static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
-
 /*
  * Non-VHE: Both host and guest must save everything.
  *
- * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
- * and guest must save everything.
+ * VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and pstate,
+ * which are handled as part of the el2 return state) on every switch.
+ * tpidr_el0 and tpidrro_el0 only need to be switched when going
+ * to host userspace or a different VCPU.  EL1 registers only need to be
+ * switched when potentially going to run a different VCPU.  The latter two
+ * classes are handled as part of kvm_arch_vcpu_load and kvm_arch_vcpu_put.
  */
 
 static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 {
-	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);
-	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
-	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
 	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
+
+	/*
+	 * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+	 * therefore be saved/restored on every entry/exit to/from the guest.
+	 */
 	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
 }
 
-static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
+{
+	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
+	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
 	ctxt->sys_regs[MPIDR_EL1]	= read_sysreg(vmpidr_el2);
 	ctxt->sys_regs[CSSELR_EL1]	= read_sysreg(csselr_el1);
 	ctxt->sys_regs[SCTLR_EL1]	= read_sysreg_el1(sctlr);
+	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);
 	ctxt->sys_regs[CPACR_EL1]	= read_sysreg_el1(cpacr);
 	ctxt->sys_regs[TTBR0_EL1]	= read_sysreg_el1(ttbr0);
 	ctxt->sys_regs[TTBR1_EL1]	= read_sysreg_el1(ttbr1);
@@ -64,6 +75,10 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
 	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
 	ctxt->gp_regs.elr_el1		= read_sysreg_el1(elr);
 	ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+}
+
+static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
+{
 	ctxt->gp_regs.regs.pc		= read_sysreg_el2(elr);
 	ctxt->gp_regs.regs.pstate	= read_sysreg_el2(spsr);
 
@@ -71,36 +86,48 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
 		ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
 }
 
-static hyp_alternate_select(__sysreg_call_save_host_state,
-			    __sysreg_save_state, __sysreg_do_nothing,
-			    ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+	__sysreg_save_el1_state(ctxt);
+	__sysreg_save_common_state(ctxt);
+	__sysreg_save_user_state(ctxt);
+	__sysreg_save_el2_return_state(ctxt);
+}
 
-void __hyp_text __sysreg_save_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_host_state_vhe(struct kvm_cpu_context *ctxt)
 {
-	__sysreg_call_save_host_state()(ctxt);
 	__sysreg_save_common_state(ctxt);
 }
 
-void __hyp_text __sysreg_save_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_save_guest_state_vhe(struct kvm_cpu_context *ctxt)
 {
-	__sysreg_save_state(ctxt);
 	__sysreg_save_common_state(ctxt);
+	__sysreg_save_el2_return_state(ctxt);
 }
 
 static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctxt)
 {
-	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  actlr_el1);
-	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
-	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
 	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
+
+	/*
+	 * The host arm64 Linux uses sp_el0 to point to 'current' and it must
+	 * therefore be saved/restored on every entry/exit to/from the guest.
+	 */
 	write_sysreg(ctxt->gp_regs.regs.sp,	  sp_el0);
 }
 
-static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
+{
+	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  	tpidr_el0);
+	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], 	tpidrro_el0);
+}
+
+static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 {
 	write_sysreg(ctxt->sys_regs[MPIDR_EL1],		vmpidr_el2);
 	write_sysreg(ctxt->sys_regs[CSSELR_EL1],	csselr_el1);
 	write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],	sctlr);
+	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  	actlr_el1);
 	write_sysreg_el1(ctxt->sys_regs[CPACR_EL1],	cpacr);
 	write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1],	ttbr0);
 	write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1],	ttbr1);
@@ -120,6 +147,11 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt->gp_regs.sp_el1,		sp_el1);
 	write_sysreg_el1(ctxt->gp_regs.elr_el1,		elr);
 	write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+}
+
+static void __hyp_text
+__sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
+{
 	write_sysreg_el2(ctxt->gp_regs.regs.pc,		elr);
 	write_sysreg_el2(ctxt->gp_regs.regs.pstate,	spsr);
 
@@ -127,27 +159,30 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
 		write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
 }
 
-static hyp_alternate_select(__sysreg_call_restore_host_state,
-			    __sysreg_restore_state, __sysreg_do_nothing,
-			    ARM64_HAS_VIRT_HOST_EXTN);
+void __hyp_text __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
+{
+	__sysreg_restore_el1_state(ctxt);
+	__sysreg_restore_common_state(ctxt);
+	__sysreg_restore_user_state(ctxt);
+	__sysreg_restore_el2_return_state(ctxt);
+}
 
-void __hyp_text __sysreg_restore_host_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_host_state_vhe(struct kvm_cpu_context *ctxt)
 {
-	__sysreg_call_restore_host_state()(ctxt);
 	__sysreg_restore_common_state(ctxt);
 }
 
-void __hyp_text __sysreg_restore_guest_state(struct kvm_cpu_context *ctxt)
+void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt)
 {
-	__sysreg_restore_state(ctxt);
 	__sysreg_restore_common_state(ctxt);
+	__sysreg_restore_el2_return_state(ctxt);
 }
 
 void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
 {
 	u64 *spsr, *sysreg;
 
-	if (read_sysreg(hcr_el2) & HCR_RW)
+	if (!vcpu_el1_is_32bit(vcpu))
 		return;
 
 	spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -161,10 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
 	sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
 	sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
 
-	if (__fpsimd_enabled())
-		sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-
-	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+	if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
 		sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
 }
 
@@ -172,7 +204,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
 {
 	u64 *spsr, *sysreg;
 
-	if (read_sysreg(hcr_el2) & HCR_RW)
+	if (!vcpu_el1_is_32bit(vcpu))
 		return;
 
 	spsr = vcpu->arch.ctxt.gp_regs.spsr;
@@ -186,6 +218,78 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
 	write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
 	write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
 
-	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+	if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
 		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
 }
+
+/**
+ * kvm_vcpu_load_sysregs - Load guest system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Load system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2.  This function is called from KVM's vcpu_load() function
+ * and loading system register state early avoids having to load them on
+ * every entry to the VM.
+ */
+void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+	struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+	if (!has_vhe())
+		return;
+
+	__sysreg_save_user_state(host_ctxt);
+
+	/*
+	 * Load guest EL1 and user state
+	 *
+	 * We must restore the 32-bit state before the sysregs, thanks
+	 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+	 */
+	__sysreg32_restore_state(vcpu);
+	__sysreg_restore_user_state(guest_ctxt);
+	__sysreg_restore_el1_state(guest_ctxt);
+
+	vcpu->arch.sysregs_loaded_on_cpu = true;
+
+	activate_traps_vhe_load(vcpu);
+}
+
+/**
+ * kvm_vcpu_put_sysregs - Restore host system registers to the physical CPU
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Save guest system registers that do not affect the host's execution, for
+ * example EL1 system registers on a VHE system where the host kernel
+ * runs at EL2.  This function is called from KVM's vcpu_put() function
+ * and deferring saving system register state until we're no longer running the
+ * VCPU avoids having to save them on every exit from the VM.
+ */
+void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpu_context *host_ctxt = vcpu->arch.host_cpu_context;
+	struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
+
+	if (!has_vhe())
+		return;
+
+	deactivate_traps_vhe_put();
+
+	__sysreg_save_el1_state(guest_ctxt);
+	__sysreg_save_user_state(guest_ctxt);
+	__sysreg32_save_state(vcpu);
+
+	/* Restore host user state */
+	__sysreg_restore_user_state(host_ctxt);
+
+	vcpu->arch.sysregs_loaded_on_cpu = false;
+}
+
+void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
+{
+	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
+}
diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
new file mode 100644
index 000000000000..86801b6055d6
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
+ *				     guest.
+ *
+ * @vcpu: the offending vcpu
+ *
+ * Returns:
+ *  1: GICV access successfully performed
+ *  0: Not a GICV access
+ * -1: Illegal GICV access
+ */
+int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	phys_addr_t fault_ipa;
+	void __iomem *addr;
+	int rd;
+
+	/* Build the full address */
+	fault_ipa  = kvm_vcpu_get_fault_ipa(vcpu);
+	fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
+
+	/* If not for GICV, move on */
+	if (fault_ipa <  vgic->vgic_cpu_base ||
+	    fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
+		return 0;
+
+	/* Reject anything but a 32bit access */
+	if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
+		return -1;
+
+	/* Not aligned? Don't bother */
+	if (fault_ipa & 3)
+		return -1;
+
+	rd = kvm_vcpu_dabt_get_rd(vcpu);
+	addr  = hyp_symbol_addr(kvm_vgic_global_state)->vcpu_hyp_va;
+	addr += fault_ipa - vgic->vgic_cpu_base;
+
+	if (kvm_vcpu_dabt_iswrite(vcpu)) {
+		u32 data = vcpu_data_guest_to_host(vcpu,
+						   vcpu_get_reg(vcpu, rd),
+						   sizeof(u32));
+		writel_relaxed(data, addr);
+	} else {
+		u32 data = readl_relaxed(addr);
+		vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
+							       sizeof(u32)));
+	}
+
+	return 1;
+}
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 60666a056944..d8e71659ba7e 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -58,7 +58,7 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
 		exc_offset = LOWER_EL_AArch32_VECTOR;
 	}
 
-	return vcpu_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
+	return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type;
 }
 
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
@@ -67,13 +67,13 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
 	bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
 	u32 esr = 0;
 
-	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+	vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-	*vcpu_spsr(vcpu) = cpsr;
+	vcpu_write_spsr(vcpu, cpsr);
 
-	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
+	vcpu_write_sys_reg(vcpu, addr, FAR_EL1);
 
 	/*
 	 * Build an {i,d}abort, depending on the level and the
@@ -94,7 +94,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
 	if (!is_iabt)
 		esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
 
-	vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT;
+	vcpu_write_sys_reg(vcpu, esr | ESR_ELx_FSC_EXTABT, ESR_EL1);
 }
 
 static void inject_undef64(struct kvm_vcpu *vcpu)
@@ -102,11 +102,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
-	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
+	vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu));
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
-	*vcpu_spsr(vcpu) = cpsr;
+	vcpu_write_spsr(vcpu, cpsr);
 
 	/*
 	 * Build an unknown exception, depending on the instruction
@@ -115,7 +115,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 	if (kvm_vcpu_trap_il_is32bit(vcpu))
 		esr |= ESR_ELx_IL;
 
-	vcpu_sys_reg(vcpu, ESR_EL1) = esr;
+	vcpu_write_sys_reg(vcpu, esr, ESR_EL1);
 }
 
 /**
@@ -128,7 +128,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
-	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+	if (vcpu_el1_is_32bit(vcpu))
 		kvm_inject_dabt32(vcpu, addr);
 	else
 		inject_abt64(vcpu, false, addr);
@@ -144,7 +144,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
  */
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
 {
-	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+	if (vcpu_el1_is_32bit(vcpu))
 		kvm_inject_pabt32(vcpu, addr);
 	else
 		inject_abt64(vcpu, true, addr);
@@ -158,7 +158,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr)
  */
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
-	if (!(vcpu->arch.hcr_el2 & HCR_RW))
+	if (vcpu_el1_is_32bit(vcpu))
 		kvm_inject_undef32(vcpu);
 	else
 		inject_undef64(vcpu);
@@ -167,7 +167,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
 {
 	vcpu_set_vsesr(vcpu, esr);
-	vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+	*vcpu_hcr(vcpu) |= HCR_VSE;
 }
 
 /**
diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c
index bbc6ae32e4af..eefe403a2e63 100644
--- a/arch/arm64/kvm/regmap.c
+++ b/arch/arm64/kvm/regmap.c
@@ -141,28 +141,61 @@ unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num)
 /*
  * Return the SPSR for the current mode of the virtual CPU.
  */
-unsigned long *vcpu_spsr32(const struct kvm_vcpu *vcpu)
+static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu)
 {
 	unsigned long mode = *vcpu_cpsr(vcpu) & COMPAT_PSR_MODE_MASK;
 	switch (mode) {
-	case COMPAT_PSR_MODE_SVC:
-		mode = KVM_SPSR_SVC;
-		break;
-	case COMPAT_PSR_MODE_ABT:
-		mode = KVM_SPSR_ABT;
-		break;
-	case COMPAT_PSR_MODE_UND:
-		mode = KVM_SPSR_UND;
-		break;
-	case COMPAT_PSR_MODE_IRQ:
-		mode = KVM_SPSR_IRQ;
-		break;
-	case COMPAT_PSR_MODE_FIQ:
-		mode = KVM_SPSR_FIQ;
-		break;
+	case COMPAT_PSR_MODE_SVC: return KVM_SPSR_SVC;
+	case COMPAT_PSR_MODE_ABT: return KVM_SPSR_ABT;
+	case COMPAT_PSR_MODE_UND: return KVM_SPSR_UND;
+	case COMPAT_PSR_MODE_IRQ: return KVM_SPSR_IRQ;
+	case COMPAT_PSR_MODE_FIQ: return KVM_SPSR_FIQ;
+	default: BUG();
+	}
+}
+
+unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
+{
+	int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+	if (!vcpu->arch.sysregs_loaded_on_cpu)
+		return vcpu_gp_regs(vcpu)->spsr[spsr_idx];
+
+	switch (spsr_idx) {
+	case KVM_SPSR_SVC:
+		return read_sysreg_el1(spsr);
+	case KVM_SPSR_ABT:
+		return read_sysreg(spsr_abt);
+	case KVM_SPSR_UND:
+		return read_sysreg(spsr_und);
+	case KVM_SPSR_IRQ:
+		return read_sysreg(spsr_irq);
+	case KVM_SPSR_FIQ:
+		return read_sysreg(spsr_fiq);
 	default:
 		BUG();
 	}
+}
+
+void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
+{
+	int spsr_idx = vcpu_spsr32_mode(vcpu);
+
+	if (!vcpu->arch.sysregs_loaded_on_cpu) {
+		vcpu_gp_regs(vcpu)->spsr[spsr_idx] = v;
+		return;
+	}
 
-	return (unsigned long *)&vcpu_gp_regs(vcpu)->spsr[mode];
+	switch (spsr_idx) {
+	case KVM_SPSR_SVC:
+		write_sysreg_el1(v, spsr);
+	case KVM_SPSR_ABT:
+		write_sysreg(v, spsr_abt);
+	case KVM_SPSR_UND:
+		write_sysreg(v, spsr_und);
+	case KVM_SPSR_IRQ:
+		write_sysreg(v, spsr_irq);
+	case KVM_SPSR_FIQ:
+		write_sysreg(v, spsr_fiq);
+	}
 }
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 50a43c7b97ca..806b0b126a64 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -35,6 +35,7 @@
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
+#include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 #include <asm/perf_event.h>
 #include <asm/sysreg.h>
@@ -76,6 +77,93 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu,
 	return false;
 }
 
+u64 vcpu_read_sys_reg(struct kvm_vcpu *vcpu, int reg)
+{
+	if (!vcpu->arch.sysregs_loaded_on_cpu)
+		goto immediate_read;
+
+	/*
+	 * System registers listed in the switch are not saved on every
+	 * exit from the guest but are only saved on vcpu_put.
+	 *
+	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+	 * should never be listed below, because the guest cannot modify its
+	 * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's
+	 * thread when emulating cross-VCPU communication.
+	 */
+	switch (reg) {
+	case CSSELR_EL1:	return read_sysreg_s(SYS_CSSELR_EL1);
+	case SCTLR_EL1:		return read_sysreg_s(sctlr_EL12);
+	case ACTLR_EL1:		return read_sysreg_s(SYS_ACTLR_EL1);
+	case CPACR_EL1:		return read_sysreg_s(cpacr_EL12);
+	case TTBR0_EL1:		return read_sysreg_s(ttbr0_EL12);
+	case TTBR1_EL1:		return read_sysreg_s(ttbr1_EL12);
+	case TCR_EL1:		return read_sysreg_s(tcr_EL12);
+	case ESR_EL1:		return read_sysreg_s(esr_EL12);
+	case AFSR0_EL1:		return read_sysreg_s(afsr0_EL12);
+	case AFSR1_EL1:		return read_sysreg_s(afsr1_EL12);
+	case FAR_EL1:		return read_sysreg_s(far_EL12);
+	case MAIR_EL1:		return read_sysreg_s(mair_EL12);
+	case VBAR_EL1:		return read_sysreg_s(vbar_EL12);
+	case CONTEXTIDR_EL1:	return read_sysreg_s(contextidr_EL12);
+	case TPIDR_EL0:		return read_sysreg_s(SYS_TPIDR_EL0);
+	case TPIDRRO_EL0:	return read_sysreg_s(SYS_TPIDRRO_EL0);
+	case TPIDR_EL1:		return read_sysreg_s(SYS_TPIDR_EL1);
+	case AMAIR_EL1:		return read_sysreg_s(amair_EL12);
+	case CNTKCTL_EL1:	return read_sysreg_s(cntkctl_EL12);
+	case PAR_EL1:		return read_sysreg_s(SYS_PAR_EL1);
+	case DACR32_EL2:	return read_sysreg_s(SYS_DACR32_EL2);
+	case IFSR32_EL2:	return read_sysreg_s(SYS_IFSR32_EL2);
+	case DBGVCR32_EL2:	return read_sysreg_s(SYS_DBGVCR32_EL2);
+	}
+
+immediate_read:
+	return __vcpu_sys_reg(vcpu, reg);
+}
+
+void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
+{
+	if (!vcpu->arch.sysregs_loaded_on_cpu)
+		goto immediate_write;
+
+	/*
+	 * System registers listed in the switch are not restored on every
+	 * entry to the guest but are only restored on vcpu_load.
+	 *
+	 * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but
+	 * should never be listed below, because the the MPIDR should only be
+	 * set once, before running the VCPU, and never changed later.
+	 */
+	switch (reg) {
+	case CSSELR_EL1:	write_sysreg_s(val, SYS_CSSELR_EL1);	return;
+	case SCTLR_EL1:		write_sysreg_s(val, sctlr_EL12);	return;
+	case ACTLR_EL1:		write_sysreg_s(val, SYS_ACTLR_EL1);	return;
+	case CPACR_EL1:		write_sysreg_s(val, cpacr_EL12);	return;
+	case TTBR0_EL1:		write_sysreg_s(val, ttbr0_EL12);	return;
+	case TTBR1_EL1:		write_sysreg_s(val, ttbr1_EL12);	return;
+	case TCR_EL1:		write_sysreg_s(val, tcr_EL12);		return;
+	case ESR_EL1:		write_sysreg_s(val, esr_EL12);		return;
+	case AFSR0_EL1:		write_sysreg_s(val, afsr0_EL12);	return;
+	case AFSR1_EL1:		write_sysreg_s(val, afsr1_EL12);	return;
+	case FAR_EL1:		write_sysreg_s(val, far_EL12);		return;
+	case MAIR_EL1:		write_sysreg_s(val, mair_EL12);		return;
+	case VBAR_EL1:		write_sysreg_s(val, vbar_EL12);		return;
+	case CONTEXTIDR_EL1:	write_sysreg_s(val, contextidr_EL12);	return;
+	case TPIDR_EL0:		write_sysreg_s(val, SYS_TPIDR_EL0);	return;
+	case TPIDRRO_EL0:	write_sysreg_s(val, SYS_TPIDRRO_EL0);	return;
+	case TPIDR_EL1:		write_sysreg_s(val, SYS_TPIDR_EL1);	return;
+	case AMAIR_EL1:		write_sysreg_s(val, amair_EL12);	return;
+	case CNTKCTL_EL1:	write_sysreg_s(val, cntkctl_EL12);	return;
+	case PAR_EL1:		write_sysreg_s(val, SYS_PAR_EL1);	return;
+	case DACR32_EL2:	write_sysreg_s(val, SYS_DACR32_EL2);	return;
+	case IFSR32_EL2:	write_sysreg_s(val, SYS_IFSR32_EL2);	return;
+	case DBGVCR32_EL2:	write_sysreg_s(val, SYS_DBGVCR32_EL2);	return;
+	}
+
+immediate_write:
+	 __vcpu_sys_reg(vcpu, reg) = val;
+}
+
 /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */
 static u32 cache_levels;
 
@@ -121,16 +209,26 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
 			  const struct sys_reg_desc *r)
 {
 	bool was_enabled = vcpu_has_cache_enabled(vcpu);
+	u64 val;
+	int reg = r->reg;
 
 	BUG_ON(!p->is_write);
 
-	if (!p->is_aarch32) {
-		vcpu_sys_reg(vcpu, r->reg) = p->regval;
+	/* See the 32bit mapping in kvm_host.h */
+	if (p->is_aarch32)
+		reg = r->reg / 2;
+
+	if (!p->is_aarch32 || !p->is_32bit) {
+		val = p->regval;
 	} else {
-		if (!p->is_32bit)
-			vcpu_cp15_64_high(vcpu, r->reg) = upper_32_bits(p->regval);
-		vcpu_cp15_64_low(vcpu, r->reg) = lower_32_bits(p->regval);
+		val = vcpu_read_sys_reg(vcpu, reg);
+		if (r->reg % 2)
+			val = (p->regval << 32) | (u64)lower_32_bits(val);
+		else
+			val = ((u64)upper_32_bits(val) << 32) |
+				lower_32_bits(p->regval);
 	}
+	vcpu_write_sys_reg(vcpu, val, reg);
 
 	kvm_toggle_cache(vcpu, was_enabled);
 	return true;
@@ -175,6 +273,14 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
 		return read_zero(vcpu, p);
 }
 
+static bool trap_undef(struct kvm_vcpu *vcpu,
+		       struct sys_reg_params *p,
+		       const struct sys_reg_desc *r)
+{
+	kvm_inject_undefined(vcpu);
+	return false;
+}
+
 static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
 			   struct sys_reg_params *p,
 			   const struct sys_reg_desc *r)
@@ -231,10 +337,10 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
 			    const struct sys_reg_desc *r)
 {
 	if (p->is_write) {
-		vcpu_sys_reg(vcpu, r->reg) = p->regval;
+		vcpu_write_sys_reg(vcpu, p->regval, r->reg);
 		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 	} else {
-		p->regval = vcpu_sys_reg(vcpu, r->reg);
+		p->regval = vcpu_read_sys_reg(vcpu, r->reg);
 	}
 
 	trace_trap_reg(__func__, r->reg, p->is_write, p->regval);
@@ -447,7 +553,8 @@ static void reset_wcr(struct kvm_vcpu *vcpu,
 
 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
-	vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1);
+	u64 amair = read_sysreg(amair_el1);
+	vcpu_write_sys_reg(vcpu, amair, AMAIR_EL1);
 }
 
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -464,7 +571,7 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 	mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0);
 	mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1);
 	mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2);
-	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1ULL << 31) | mpidr;
+	vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1);
 }
 
 static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -478,12 +585,12 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 	 */
 	val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
 	       | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
-	vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+	__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
 }
 
 static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
 {
-	u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0);
+	u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0);
 	bool enabled = (reg & flags) || vcpu_mode_priv(vcpu);
 
 	if (!enabled)
@@ -525,14 +632,14 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
 	if (p->is_write) {
 		/* Only update writeable bits of PMCR */
-		val = vcpu_sys_reg(vcpu, PMCR_EL0);
+		val = __vcpu_sys_reg(vcpu, PMCR_EL0);
 		val &= ~ARMV8_PMU_PMCR_MASK;
 		val |= p->regval & ARMV8_PMU_PMCR_MASK;
-		vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+		__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
 		kvm_pmu_handle_pmcr(vcpu, val);
 	} else {
 		/* PMCR.P & PMCR.C are RAZ */
-		val = vcpu_sys_reg(vcpu, PMCR_EL0)
+		val = __vcpu_sys_reg(vcpu, PMCR_EL0)
 		      & ~(ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C);
 		p->regval = val;
 	}
@@ -550,10 +657,10 @@ static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		return false;
 
 	if (p->is_write)
-		vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
+		__vcpu_sys_reg(vcpu, PMSELR_EL0) = p->regval;
 	else
 		/* return PMSELR.SEL field */
-		p->regval = vcpu_sys_reg(vcpu, PMSELR_EL0)
+		p->regval = __vcpu_sys_reg(vcpu, PMSELR_EL0)
 			    & ARMV8_PMU_COUNTER_MASK;
 
 	return true;
@@ -586,7 +693,7 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
 {
 	u64 pmcr, val;
 
-	pmcr = vcpu_sys_reg(vcpu, PMCR_EL0);
+	pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0);
 	val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
 	if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) {
 		kvm_inject_undefined(vcpu);
@@ -611,7 +718,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
 			if (pmu_access_event_counter_el0_disabled(vcpu))
 				return false;
 
-			idx = vcpu_sys_reg(vcpu, PMSELR_EL0)
+			idx = __vcpu_sys_reg(vcpu, PMSELR_EL0)
 			      & ARMV8_PMU_COUNTER_MASK;
 		} else if (r->Op2 == 0) {
 			/* PMCCNTR_EL0 */
@@ -666,7 +773,7 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
 	if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 1) {
 		/* PMXEVTYPER_EL0 */
-		idx = vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
+		idx = __vcpu_sys_reg(vcpu, PMSELR_EL0) & ARMV8_PMU_COUNTER_MASK;
 		reg = PMEVTYPER0_EL0 + idx;
 	} else if (r->CRn == 14 && (r->CRm & 12) == 12) {
 		idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
@@ -684,9 +791,9 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
 	if (p->is_write) {
 		kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
-		vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
+		__vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
 	} else {
-		p->regval = vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
+		p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
 	}
 
 	return true;
@@ -708,15 +815,15 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		val = p->regval & mask;
 		if (r->Op2 & 0x1) {
 			/* accessing PMCNTENSET_EL0 */
-			vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
+			__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
 			kvm_pmu_enable_counter(vcpu, val);
 		} else {
 			/* accessing PMCNTENCLR_EL0 */
-			vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
+			__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
 			kvm_pmu_disable_counter(vcpu, val);
 		}
 	} else {
-		p->regval = vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
 	}
 
 	return true;
@@ -740,12 +847,12 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
 		if (r->Op2 & 0x1)
 			/* accessing PMINTENSET_EL1 */
-			vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
+			__vcpu_sys_reg(vcpu, PMINTENSET_EL1) |= val;
 		else
 			/* accessing PMINTENCLR_EL1 */
-			vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
+			__vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= ~val;
 	} else {
-		p->regval = vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMINTENSET_EL1) & mask;
 	}
 
 	return true;
@@ -765,12 +872,12 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	if (p->is_write) {
 		if (r->CRm & 0x2)
 			/* accessing PMOVSSET_EL0 */
-			vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
+			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
 		else
 			/* accessing PMOVSCLR_EL0 */
-			vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
+			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
 	} else {
-		p->regval = vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
+		p->regval = __vcpu_sys_reg(vcpu, PMOVSSET_EL0) & mask;
 	}
 
 	return true;
@@ -807,10 +914,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			return false;
 		}
 
-		vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval
-						    & ARMV8_PMU_USERENR_MASK;
+		__vcpu_sys_reg(vcpu, PMUSERENR_EL0) =
+			       p->regval & ARMV8_PMU_USERENR_MASK;
 	} else {
-		p->regval = vcpu_sys_reg(vcpu, PMUSERENR_EL0)
+		p->regval = __vcpu_sys_reg(vcpu, PMUSERENR_EL0)
 			    & ARMV8_PMU_USERENR_MASK;
 	}
 
@@ -893,6 +1000,12 @@ static u64 read_id_reg(struct sys_reg_desc const *r, bool raz)
 				    task_pid_nr(current));
 
 		val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT);
+	} else if (id == SYS_ID_AA64MMFR1_EL1) {
+		if (val & (0xfUL << ID_AA64MMFR1_LOR_SHIFT))
+			pr_err_once("kvm [%i]: LORegions unsupported for guests, suppressing\n",
+				    task_pid_nr(current));
+
+		val &= ~(0xfUL << ID_AA64MMFR1_LOR_SHIFT);
 	}
 
 	return val;
@@ -1178,6 +1291,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
 	{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
 
+	{ SYS_DESC(SYS_LORSA_EL1), trap_undef },
+	{ SYS_DESC(SYS_LOREA_EL1), trap_undef },
+	{ SYS_DESC(SYS_LORN_EL1), trap_undef },
+	{ SYS_DESC(SYS_LORC_EL1), trap_undef },
+	{ SYS_DESC(SYS_LORID_EL1), trap_undef },
+
 	{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
 	{ SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
 
@@ -1545,6 +1664,11 @@ static const struct sys_reg_desc cp15_regs[] = {
 
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
+	/* CNTP_TVAL */
+	{ Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval },
+	/* CNTP_CTL */
+	{ Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl },
+
 	/* PMEVCNTRn */
 	PMU_PMEVCNTR(0),
 	PMU_PMEVCNTR(1),
@@ -1618,6 +1742,7 @@ static const struct sys_reg_desc cp15_64_regs[] = {
 	{ Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
 	{ Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+	{ Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
 };
 
 /* Target specific emulation tables */
@@ -2194,7 +2319,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
 	if (r->get_user)
 		return (r->get_user)(vcpu, r, reg, uaddr);
 
-	return reg_to_user(uaddr, &vcpu_sys_reg(vcpu, r->reg), reg->id);
+	return reg_to_user(uaddr, &__vcpu_sys_reg(vcpu, r->reg), reg->id);
 }
 
 int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
@@ -2215,7 +2340,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg
 	if (r->set_user)
 		return (r->set_user)(vcpu, r, reg, uaddr);
 
-	return reg_from_user(&vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
+	return reg_from_user(&__vcpu_sys_reg(vcpu, r->reg), uaddr, reg->id);
 }
 
 static unsigned int num_demux_regs(void)
@@ -2421,6 +2546,6 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
 	reset_sys_reg_descs(vcpu, table, num);
 
 	for (num = 1; num < NR_SYS_REGS; num++)
-		if (vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
-			panic("Didn't reset vcpu_sys_reg(%zi)", num);
+		if (__vcpu_sys_reg(vcpu, num) == 0x4242424242424242)
+			panic("Didn't reset __vcpu_sys_reg(%zi)", num);
 }
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 060f5348ef25..cd710f8b63e0 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -89,14 +89,14 @@ static inline void reset_unknown(struct kvm_vcpu *vcpu,
 {
 	BUG_ON(!r->reg);
 	BUG_ON(r->reg >= NR_SYS_REGS);
-	vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
+	__vcpu_sys_reg(vcpu, r->reg) = 0x1de7ec7edbadc0deULL;
 }
 
 static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
 	BUG_ON(!r->reg);
 	BUG_ON(r->reg >= NR_SYS_REGS);
-	vcpu_sys_reg(vcpu, r->reg) = r->val;
+	__vcpu_sys_reg(vcpu, r->reg) = r->val;
 }
 
 static inline int cmp_sys_reg(const struct sys_reg_desc *i1,
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 969ade1d333d..ddb8497d18d6 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -38,13 +38,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu,
 	if (p->is_write)
 		return ignore_write(vcpu, p);
 
-	p->regval = vcpu_sys_reg(vcpu, ACTLR_EL1);
+	p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1);
 	return true;
 }
 
 static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
-	vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
+	__vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
 }
 
 /*
diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c
new file mode 100644
index 000000000000..c712a7376bc1
--- /dev/null
+++ b/arch/arm64/kvm/va_layout.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2017 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/random.h>
+#include <linux/memblock.h>
+#include <asm/alternative.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/kvm_mmu.h>
+
+/*
+ * The LSB of the random hyp VA tag or 0 if no randomization is used.
+ */
+static u8 tag_lsb;
+/*
+ * The random hyp VA tag value with the region bit if hyp randomization is used
+ */
+static u64 tag_val;
+static u64 va_mask;
+
+static void compute_layout(void)
+{
+	phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start);
+	u64 hyp_va_msb;
+	int kva_msb;
+
+	/* Where is my RAM region? */
+	hyp_va_msb  = idmap_addr & BIT(VA_BITS - 1);
+	hyp_va_msb ^= BIT(VA_BITS - 1);
+
+	kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^
+			(u64)(high_memory - 1));
+
+	if (kva_msb == (VA_BITS - 1)) {
+		/*
+		 * No space in the address, let's compute the mask so
+		 * that it covers (VA_BITS - 1) bits, and the region
+		 * bit. The tag stays set to zero.
+		 */
+		va_mask  = BIT(VA_BITS - 1) - 1;
+		va_mask |= hyp_va_msb;
+	} else {
+		/*
+		 * We do have some free bits to insert a random tag.
+		 * Hyp VAs are now created from kernel linear map VAs
+		 * using the following formula (with V == VA_BITS):
+		 *
+		 *  63 ... V |     V-1    | V-2 .. tag_lsb | tag_lsb - 1 .. 0
+		 *  ---------------------------------------------------------
+		 * | 0000000 | hyp_va_msb |    random tag  |  kern linear VA |
+		 */
+		tag_lsb = kva_msb;
+		va_mask = GENMASK_ULL(tag_lsb - 1, 0);
+		tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb);
+		tag_val |= hyp_va_msb;
+		tag_val >>= tag_lsb;
+	}
+}
+
+static u32 compute_instruction(int n, u32 rd, u32 rn)
+{
+	u32 insn = AARCH64_BREAK_FAULT;
+
+	switch (n) {
+	case 0:
+		insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_AND,
+							  AARCH64_INSN_VARIANT_64BIT,
+							  rn, rd, va_mask);
+		break;
+
+	case 1:
+		/* ROR is a variant of EXTR with Rm = Rn */
+		insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+					     rn, rn, rd,
+					     tag_lsb);
+		break;
+
+	case 2:
+		insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+						    tag_val & GENMASK(11, 0),
+						    AARCH64_INSN_VARIANT_64BIT,
+						    AARCH64_INSN_ADSB_ADD);
+		break;
+
+	case 3:
+		insn = aarch64_insn_gen_add_sub_imm(rd, rn,
+						    tag_val & GENMASK(23, 12),
+						    AARCH64_INSN_VARIANT_64BIT,
+						    AARCH64_INSN_ADSB_ADD);
+		break;
+
+	case 4:
+		/* ROR is a variant of EXTR with Rm = Rn */
+		insn = aarch64_insn_gen_extr(AARCH64_INSN_VARIANT_64BIT,
+					     rn, rn, rd, 64 - tag_lsb);
+		break;
+	}
+
+	return insn;
+}
+
+void __init kvm_update_va_mask(struct alt_instr *alt,
+			       __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	int i;
+
+	BUG_ON(nr_inst != 5);
+
+	if (!has_vhe() && !va_mask)
+		compute_layout();
+
+	for (i = 0; i < nr_inst; i++) {
+		u32 rd, rn, insn, oinsn;
+
+		/*
+		 * VHE doesn't need any address translation, let's NOP
+		 * everything.
+		 *
+		 * Alternatively, if we don't have any spare bits in
+		 * the address, NOP everything after masking that
+		 * kernel VA.
+		 */
+		if (has_vhe() || (!tag_lsb && i > 0)) {
+			updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
+			continue;
+		}
+
+		oinsn = le32_to_cpu(origptr[i]);
+		rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
+		rn = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RN, oinsn);
+
+		insn = compute_instruction(i, rd, rn);
+		BUG_ON(insn == AARCH64_BREAK_FAULT);
+
+		updptr[i] = cpu_to_le32(insn);
+	}
+}
+
+void *__kvm_bp_vect_base;
+int __kvm_harden_el2_vector_slot;
+
+void kvm_patch_vector_branch(struct alt_instr *alt,
+			     __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	u64 addr;
+	u32 insn;
+
+	BUG_ON(nr_inst != 5);
+
+	if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) {
+		WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS));
+		return;
+	}
+
+	if (!va_mask)
+		compute_layout();
+
+	/*
+	 * Compute HYP VA by using the same computation as kern_hyp_va()
+	 */
+	addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector);
+	addr &= va_mask;
+	addr |= tag_val << tag_lsb;
+
+	/* Use PC[10:7] to branch to the same vector in KVM */
+	addr |= ((u64)origptr & GENMASK_ULL(10, 7));
+
+	/*
+	 * Branch to the second instruction in the vectors in order to
+	 * avoid the initial store on the stack (which we already
+	 * perform in the hardening vectors).
+	 */
+	addr += AARCH64_INSN_SIZE;
+
+	/* stp x0, x1, [sp, #-16]! */
+	insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,
+						AARCH64_INSN_REG_1,
+						AARCH64_INSN_REG_SP,
+						-16,
+						AARCH64_INSN_VARIANT_64BIT,
+						AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX);
+	*updptr++ = cpu_to_le32(insn);
+
+	/* movz x0, #(addr & 0xffff) */
+	insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+					 (u16)addr,
+					 0,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_ZERO);
+	*updptr++ = cpu_to_le32(insn);
+
+	/* movk x0, #((addr >> 16) & 0xffff), lsl #16 */
+	insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+					 (u16)(addr >> 16),
+					 16,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_KEEP);
+	*updptr++ = cpu_to_le32(insn);
+
+	/* movk x0, #((addr >> 32) & 0xffff), lsl #32 */
+	insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0,
+					 (u16)(addr >> 32),
+					 32,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_KEEP);
+	*updptr++ = cpu_to_le32(insn);
+
+	/* br x0 */
+	insn = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_0,
+					   AARCH64_INSN_BRANCH_NOLINK);
+	*updptr++ = cpu_to_le32(insn);
+}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index decccffb03ca..842c8a5fcd53 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -38,12 +38,12 @@
 #define MIN_GAP (SZ_128M)
 #define MAX_GAP	(STACK_TOP/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
@@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
 
 	/* Values close to RLIM_INFINITY can overflow. */
@@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd)
  * This function, called very early during the creation of a new process VM
  * image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
@@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality bit is set, or
 	 * if the expected stack growth is unlimited:
 	 */
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile
index 6f6096ff05a4..6ab942e6c534 100644
--- a/arch/c6x/Makefile
+++ b/arch/c6x/Makefile
@@ -25,6 +25,7 @@ KBUILD_AFLAGS   += -mbig-endian
 LINKFLAGS       += -mbig-endian
 KBUILD_LDFLAGS  += -mbig-endian
 LDFLAGS += -EB
+CHECKFLAGS	+= -D_BIG_ENDIAN
 endif
 
 head-y          := arch/c6x/kernel/head.o
diff --git a/arch/c6x/kernel/asm-offsets.c b/arch/c6x/kernel/asm-offsets.c
index cff57764fcad..0f8fde494875 100644
--- a/arch/c6x/kernel/asm-offsets.c
+++ b/arch/c6x/kernel/asm-offsets.c
@@ -107,7 +107,6 @@ void foo(void)
 	/* These would be unneccessary if we ran asm files
 	 * through the preprocessor.
 	 */
-	DEFINE(KTHREAD_SIZE, THREAD_SIZE);
 	DEFINE(KTHREAD_SHIFT, THREAD_SHIFT);
 	DEFINE(KTHREAD_START_SP, THREAD_START_SP);
 	DEFINE(ENOSYS_, ENOSYS);
diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c
index e8b6cc6a7b5a..1ef04b5ab93f 100644
--- a/arch/c6x/platforms/plldata.c
+++ b/arch/c6x/platforms/plldata.c
@@ -19,6 +19,7 @@
 
 #include <asm/clock.h>
 #include <asm/setup.h>
+#include <asm/special_insns.h>
 #include <asm/irq.h>
 
 /*
diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index 84938fdbbada..908d58347790 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -135,7 +135,11 @@ static struct platform_device mcf_fec0 = {
 	.id			= 0,
 	.num_resources		= ARRAY_SIZE(mcf_fec0_resources),
 	.resource		= mcf_fec0_resources,
-	.dev.platform_data	= FEC_PDATA,
+	.dev = {
+		.dma_mask		= &mcf_fec0.dev.coherent_dma_mask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.platform_data		= FEC_PDATA,
+	}
 };
 
 #ifdef MCFFEC_BASE1
@@ -167,7 +171,11 @@ static struct platform_device mcf_fec1 = {
 	.id			= 1,
 	.num_resources		= ARRAY_SIZE(mcf_fec1_resources),
 	.resource		= mcf_fec1_resources,
-	.dev.platform_data	= FEC_PDATA,
+	.dev = {
+		.dma_mask		= &mcf_fec1.dev.coherent_dma_mask,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
+		.platform_data		= FEC_PDATA,
+	}
 };
 #endif /* MCFFEC_BASE1 */
 #endif /* CONFIG_FEC */
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 114b93488193..5de871eb4a59 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -47,9 +47,10 @@ extern int pci_proc_domain(struct pci_bus *bus);
 
 struct vm_area_struct;
 
-/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
-#define HAVE_PCI_MMAP		1
-#define arch_can_pci_mmap_io()	1
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP			1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
+#define arch_can_pci_mmap_io()		1
 
 extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
 			   size_t count);
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index e53b8532353c..db8b1fa83452 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -33,6 +33,8 @@ extern int mem_init_done;
 #define PAGE_KERNEL		__pgprot(0) /* these mean nothing to non MMU */
 
 #define pgprot_noncached(x)	(x)
+#define pgprot_writecombine	pgprot_noncached
+#define pgprot_device		pgprot_noncached
 
 #define __swp_type(x)		(0)
 #define __swp_offset(x)		(0)
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index ae79e8638d50..161f9758c631 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -151,72 +151,22 @@ void pcibios_set_master(struct pci_dev *dev)
 }
 
 /*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
- *  -- paulus.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
  */
 
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap.  They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
-					       resource_size_t *offset,
-					       enum pci_mmap_state mmap_state)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	unsigned long io_offset = 0;
-	int i, res_bit;
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	resource_size_t ioaddr = pci_resource_start(pdev, bar);
 
 	if (!hose)
-		return NULL;		/* should never happen */
-
-	/* If memory, add on the PCI bridge address offset */
-	if (mmap_state == pci_mmap_mem) {
-#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
-		*offset += hose->pci_mem_offset;
-#endif
-		res_bit = IORESOURCE_MEM;
-	} else {
-		io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-		*offset += io_offset;
-		res_bit = IORESOURCE_IO;
-	}
-
-	/*
-	 * Check that the offset requested corresponds to one of the
-	 * resources of the device.
-	 */
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		struct resource *rp = &dev->resource[i];
-		int flags = rp->flags;
+		return -EINVAL;		/* should never happen */
 
-		/* treat ROM as memory (should be already) */
-		if (i == PCI_ROM_RESOURCE)
-			flags |= IORESOURCE_MEM;
-
-		/* Active and same type? */
-		if ((flags & res_bit) == 0)
-			continue;
-
-		/* In the range of this resource? */
-		if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
-			continue;
-
-		/* found it! construct the final physical address */
-		if (mmap_state == pci_mmap_io)
-			*offset += hose->io_base_phys - io_offset;
-		return rp;
-	}
+	/* Convert to an offset within this PCI controller */
+	ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
 
-	return NULL;
+	vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+	return 0;
 }
 
 /*
@@ -268,37 +218,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
 	return prot;
 }
 
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture.  The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, int bar, struct vm_area_struct *vma,
-			enum pci_mmap_state mmap_state, int write_combine)
-{
-	resource_size_t offset =
-		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
-	struct resource *rp;
-	int ret;
-
-	rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
-	if (rp == NULL)
-		return -EINVAL;
-
-	vma->vm_pgoff = offset >> PAGE_SHIFT;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
-	return ret;
-}
-
 /* This provides legacy IO read access on a bus */
 int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
 {
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 61e9a24297b7..225c95da23ce 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2029,6 +2029,7 @@ config CPU_MIPSR6
 	select CPU_HAS_RIXI
 	select HAVE_ARCH_BITREVERSE
 	select MIPS_ASID_BITS_VARIABLE
+	select MIPS_CRC_SUPPORT
 	select MIPS_SPRAM
 
 config EVA
@@ -2502,6 +2503,9 @@ config MIPS_ASID_BITS
 config MIPS_ASID_BITS_VARIABLE
 	bool
 
+config MIPS_CRC_SUPPORT
+	bool
+
 #
 # - Highmem only makes sense for the 32-bit kernel.
 # - The current highmem code will only work properly on physically indexed
@@ -2850,8 +2854,7 @@ config CRASH_DUMP
 
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded"
-	default "0xffffffff84000000" if 64BIT
-	default "0x84000000" if 32BIT
+	default "0xffffffff84000000"
 	depends on CRASH_DUMP
 	help
 	  This gives the CKSEG0 or KSEG0 address where the kernel is loaded.
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index d1ca839c3981..5e9fce076ab6 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -222,6 +222,8 @@ xpa-cflags-y				:= $(mips-cflags)
 xpa-cflags-$(micromips-ase)		+= -mmicromips -Wa$(comma)-fatal-warnings
 toolchain-xpa				:= $(call cc-option-yn,$(xpa-cflags-y) -mxpa)
 cflags-$(toolchain-xpa)			+= -DTOOLCHAIN_SUPPORTS_XPA
+toolchain-crc				:= $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
+cflags-$(toolchain-crc)			+= -DTOOLCHAIN_SUPPORTS_CRC
 
 #
 # Firmware support
@@ -249,20 +251,12 @@ ifdef CONFIG_PHYSICAL_START
 load-y					= $(CONFIG_PHYSICAL_START)
 endif
 
-entry-noisa-y				= 0x$(shell $(NM) vmlinux 2>/dev/null \
-					| grep "\bkernel_entry\b" | cut -f1 -d \ )
-ifdef CONFIG_CPU_MICROMIPS
-  #
-  # Set the ISA bit, since the kernel_entry symbol in the ELF will have it
-  # clear which would lead to images containing addresses which bootloaders may
-  # jump to as MIPS32 code.
-  #
-  entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \
-              $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \
-              $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y)))))))))
-else
-  entry-y = $(entry-noisa-y)
-endif
+# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
+entry-y		= $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
+			| sed -n '/^start address / { \
+				s/^.* //; \
+				s/0x\([0-7].......\)$$/0x00000000\1/; \
+				s/0x\(........\)$$/0xffffffff\1/; p }')
 
 cflags-y			+= -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)		+= arch/mips/pci/
@@ -330,6 +324,7 @@ libs-y			+= arch/mips/math-emu/
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/
 
+drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
 drivers-$(CONFIG_OPROFILE)	+= arch/mips/oprofile/
 
 # suspend and hibernation support
@@ -473,6 +468,21 @@ define archhelp
 	echo
 	echo '  {micro32,32,64}{r1,r2,r6}{el,}_defconfig <BOARDS="list of boards">'
 	echo
+	echo '  Where BOARDS is some subset of the following:'
+	for board in $(sort $(BOARDS)); do echo "    $${board}"; done
+	echo
+	echo '  Specifically the following generic default configurations are'
+	echo '  supported:'
+	echo
+	$(foreach cfg,$(generic_defconfigs),
+	  printf "  %-24s - Build generic kernel for $(call describe_generic_defconfig,$(cfg))\n" $(cfg);)
+	echo
+	echo '  The following legacy default configurations have been converted to'
+	echo '  generic and can still be used:'
+	echo
+	$(foreach cfg,$(sort $(legacy_defconfigs)),
+	  printf "  %-24s - Build $($(cfg)-y)\n" $(cfg);)
+	echo
 	echo '  Otherwise, the following default configurations are available:'
 endef
 
@@ -507,6 +517,10 @@ endef
 $(eval $(call gen_generic_defconfigs,32 64,r1 r2 r6,eb el))
 $(eval $(call gen_generic_defconfigs,micro32,r2,eb el))
 
+define describe_generic_defconfig
+$(subst 32r,MIPS32 r,$(subst 64r,MIPS64 r,$(subst el, little endian,$(patsubst %_defconfig,%,$(1)))))
+endef
+
 .PHONY: $(generic_defconfigs)
 $(generic_defconfigs):
 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
@@ -543,14 +557,18 @@ generic_defconfig:
 # now that the boards have been converted to use the generic kernel they are
 # wrappers around the generic rules above.
 #
-.PHONY: sead3_defconfig
-sead3_defconfig:
-	$(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs		+= ocelot_defconfig
+ocelot_defconfig-y		:= 32r2el_defconfig BOARDS=ocelot
+
+legacy_defconfigs		+= sead3_defconfig
+sead3_defconfig-y		:= 32r2el_defconfig BOARDS=sead-3
+
+legacy_defconfigs		+= sead3micro_defconfig
+sead3micro_defconfig-y		:= micro32r2el_defconfig BOARDS=sead-3
 
-.PHONY: sead3micro_defconfig
-sead3micro_defconfig:
-	$(Q)$(MAKE) -f $(srctree)/Makefile micro32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs		+= xilfpga_defconfig
+xilfpga_defconfig-y		:= 32r2el_defconfig BOARDS=xilfpga
 
-.PHONY: xilfpga_defconfig
-xilfpga_defconfig:
-	$(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=xilfpga
+.PHONY: $(legacy_defconfigs)
+$(legacy_defconfigs):
+	$(Q)$(MAKE) -f $(srctree)/Makefile $($@-y)
diff --git a/arch/mips/alchemy/board-gpr.c b/arch/mips/alchemy/board-gpr.c
index 328d697e72b4..4e79dbd54a33 100644
--- a/arch/mips/alchemy/board-gpr.c
+++ b/arch/mips/alchemy/board-gpr.c
@@ -190,7 +190,7 @@ static struct platform_device gpr_mtd_device = {
 /*
  * LEDs
  */
-static struct gpio_led gpr_gpio_leds[] = {
+static const struct gpio_led gpr_gpio_leds[] = {
 	{	/* green */
 		.name			= "gpr:green",
 		.gpio			= 4,
diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c
index 85bb75669b0d..aab55aaf3d62 100644
--- a/arch/mips/alchemy/board-mtx1.c
+++ b/arch/mips/alchemy/board-mtx1.c
@@ -145,7 +145,7 @@ static struct platform_device mtx1_wdt = {
 	.resource = mtx1_wdt_res,
 };
 
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
 	{
 		.name	= "mtx1:green",
 		.gpio = 211,
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index e1675c25d5d4..f09262e0a72f 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -346,7 +346,7 @@ static struct platform_device ar7_udc = {
 /*****************************************************************************
  * LEDs
  ****************************************************************************/
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
 	{
 		.name			= "status",
 		.gpio			= 8,
@@ -354,12 +354,12 @@ static struct gpio_led default_leds[] = {
 	},
 };
 
-static struct gpio_led titan_leds[] = {
+static const struct gpio_led titan_leds[] = {
 	{ .name = "status", .gpio = 8, .active_low = 1, },
 	{ .name = "wifi", .gpio = 13, .active_low = 1, },
 };
 
-static struct gpio_led dsl502t_leds[] = {
+static const struct gpio_led dsl502t_leds[] = {
 	{
 		.name			= "status",
 		.gpio			= 9,
@@ -377,7 +377,7 @@ static struct gpio_led dsl502t_leds[] = {
 	},
 };
 
-static struct gpio_led dg834g_leds[] = {
+static const struct gpio_led dg834g_leds[] = {
 	{
 		.name			= "ppp",
 		.gpio			= 6,
@@ -406,7 +406,7 @@ static struct gpio_led dg834g_leds[] = {
 	},
 };
 
-static struct gpio_led fb_sl_leds[] = {
+static const struct gpio_led fb_sl_leds[] = {
 	{
 		.name			= "1",
 		.gpio			= 7,
@@ -433,7 +433,7 @@ static struct gpio_led fb_sl_leds[] = {
 	},
 };
 
-static struct gpio_led fb_fon_leds[] = {
+static const struct gpio_led fb_fon_leds[] = {
 	{
 		.name			= "1",
 		.gpio			= 8,
@@ -459,7 +459,7 @@ static struct gpio_led fb_fon_leds[] = {
 	},
 };
 
-static struct gpio_led gt701_leds[] = {
+static const struct gpio_led gt701_leds[] = {
 	{
 		.name			= "inet:green",
 		.gpio			= 13,
diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c
index 88a8fb2bbc71..88d400d256c4 100644
--- a/arch/mips/bcm47xx/buttons.c
+++ b/arch/mips/bcm47xx/buttons.c
@@ -355,7 +355,7 @@ bcm47xx_buttons_luxul_xwr_600_v1[] = {
 
 static const struct gpio_keys_button
 bcm47xx_buttons_luxul_xwr_1750_v1[] = {
-	BCM47XX_GPIO_KEY(14, BTN_TASK),
+	BCM47XX_GPIO_KEY(14, KEY_RESTART),
 };
 
 /* Microsoft */
diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
index 8307a8a02667..34a7b3fbdfd9 100644
--- a/arch/mips/bcm47xx/leds.c
+++ b/arch/mips/bcm47xx/leds.c
@@ -409,6 +409,12 @@ bcm47xx_leds_luxul_xap_1500_v1[] __initconst = {
 };
 
 static const struct gpio_led
+bcm47xx_leds_luxul_xap1500_v1_extra[] __initconst = {
+	BCM47XX_GPIO_LED(44, "green", "5ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
 bcm47xx_leds_luxul_xbr_4400_v1[] __initconst = {
 	BCM47XX_GPIO_LED(12, "green", "usb", 0, LEDS_GPIO_DEFSTATE_OFF),
 	BCM47XX_GPIO_LED_TRIGGER(15, "green", "status", 0, "timer"),
@@ -435,6 +441,11 @@ bcm47xx_leds_luxul_xwr_1750_v1[] __initconst = {
 	BCM47XX_GPIO_LED(15, "green", "wps", 0, LEDS_GPIO_DEFSTATE_OFF),
 };
 
+static const struct gpio_led
+bcm47xx_leds_luxul_xwr1750_v1_extra[] __initconst = {
+	BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
 /* Microsoft */
 
 static const struct gpio_led
@@ -528,6 +539,12 @@ static struct gpio_led_platform_data bcm47xx_leds_pdata;
 	bcm47xx_leds_pdata.num_leds = ARRAY_SIZE(dev_leds);		\
 } while (0)
 
+static struct gpio_led_platform_data bcm47xx_leds_pdata_extra __initdata = {};
+#define bcm47xx_set_pdata_extra(dev_leds) do {				\
+	bcm47xx_leds_pdata_extra.leds = dev_leds;			\
+	bcm47xx_leds_pdata_extra.num_leds = ARRAY_SIZE(dev_leds);	\
+} while (0)
+
 void __init bcm47xx_leds_register(void)
 {
 	enum bcm47xx_board board = bcm47xx_board_get();
@@ -705,6 +722,7 @@ void __init bcm47xx_leds_register(void)
 		break;
 	case BCM47XX_BOARD_LUXUL_XAP_1500_V1:
 		bcm47xx_set_pdata(bcm47xx_leds_luxul_xap_1500_v1);
+		bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xap1500_v1_extra);
 		break;
 	case BCM47XX_BOARD_LUXUL_XBR_4400_V1:
 		bcm47xx_set_pdata(bcm47xx_leds_luxul_xbr_4400_v1);
@@ -717,6 +735,7 @@ void __init bcm47xx_leds_register(void)
 		break;
 	case BCM47XX_BOARD_LUXUL_XWR_1750_V1:
 		bcm47xx_set_pdata(bcm47xx_leds_luxul_xwr_1750_v1);
+		bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xwr1750_v1_extra);
 		break;
 
 	case BCM47XX_BOARD_MICROSOFT_MN700:
@@ -760,4 +779,6 @@ void __init bcm47xx_leds_register(void)
 	}
 
 	gpio_led_register_device(-1, &bcm47xx_leds_pdata);
+	if (bcm47xx_leds_pdata_extra.num_leds)
+		gpio_led_register_device(0, &bcm47xx_leds_pdata_extra);
 }
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index e2c6f131c8eb..1e79cab8e269 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -4,6 +4,7 @@ subdir-y	+= cavium-octeon
 subdir-y	+= img
 subdir-y	+= ingenic
 subdir-y	+= lantiq
+subdir-y	+= mscc
 subdir-y	+= mti
 subdir-y	+= netlogic
 subdir-y	+= ni
diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi
index 2f9ef565e5d0..5bf77b6fcceb 100644
--- a/arch/mips/boot/dts/brcm/bcm7125.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi
@@ -198,6 +198,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi
index 02e426fe6013..2afa0dada575 100644
--- a/arch/mips/boot/dts/brcm/bcm7346.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi
@@ -233,6 +233,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -243,6 +250,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@4067c0 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x4067c0 0x40>;
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x60>;
@@ -483,5 +501,49 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0xa000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi
index 1089d6ebc841..6375fc77f389 100644
--- a/arch/mips/boot/dts/brcm/bcm7358.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi
@@ -217,6 +217,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4066a8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4066a8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408240 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408240 0x30>;
@@ -362,5 +369,15 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi
index 4b87ebec407a..a57cacea91cf 100644
--- a/arch/mips/boot/dts/brcm/bcm7360.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi
@@ -209,6 +209,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4066a8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4066a8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -219,6 +226,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@406680 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x406680 0x40>;
+		};
+
 		upg_gio: gpio@406500 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406500 0xa0>;
@@ -402,5 +420,49 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0xa000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi
index ca657df34b6d..728b9e9f84b8 100644
--- a/arch/mips/boot/dts/brcm/bcm7362.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi
@@ -205,6 +205,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4066a8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4066a8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -215,6 +222,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@406680 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x406680 0x40>;
+		};
+
 		upg_gio: gpio@406500 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406500 0xa0>;
@@ -398,5 +416,49 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0xa000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi
index d262e11bc3f9..9540c27f12e7 100644
--- a/arch/mips/boot/dts/brcm/bcm7420.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi
@@ -214,6 +214,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi
index e4fb9b6e6dce..410e61ebaf9e 100644
--- a/arch/mips/boot/dts/brcm/bcm7425.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi
@@ -232,6 +232,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -242,6 +249,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@4067c0 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x4067c0 0x40>;
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
@@ -494,5 +512,76 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@409580 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x409580 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0x1a000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
+
+		memory-controller@1 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x10000 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi
index 1484e8990e52..8398b7f68bf4 100644
--- a/arch/mips/boot/dts/brcm/bcm7435.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi
@@ -247,6 +247,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -257,6 +264,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@4067c0 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x4067c0 0x40>;
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
@@ -509,5 +527,76 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@409580 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x409580 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0x1a000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
+
+		memory-controller@1 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x10000 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
index 7f59ea2ded6c..79e9769f7e00 100644
--- a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
@@ -50,6 +50,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 /* FIXME: USB is wonky; disable it for now */
 &ehci0 {
 	status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
index 9e7d5228f2b7..28370ff77eeb 100644
--- a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
@@ -59,6 +59,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -114,3 +118,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97358svmb.dts b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
index 708207a0002d..41c1b510c230 100644
--- a/arch/mips/boot/dts/brcm/bcm97358svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
@@ -55,6 +55,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -106,3 +110,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97360svmb.dts b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
index 73c6dc9c8c6d..9f6c6c9b7ea7 100644
--- a/arch/mips/boot/dts/brcm/bcm97360svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
@@ -50,6 +50,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -109,3 +113,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97362svmb.dts b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
index 37bacfdcf9d9..df8b755c390f 100644
--- a/arch/mips/boot/dts/brcm/bcm97362svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
@@ -47,6 +47,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -78,3 +82,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97420c.dts b/arch/mips/boot/dts/brcm/bcm97420c.dts
index f96241e94874..086faeaa384a 100644
--- a/arch/mips/boot/dts/brcm/bcm97420c.dts
+++ b/arch/mips/boot/dts/brcm/bcm97420c.dts
@@ -60,6 +60,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 /* FIXME: MAC driver comes up but cannot attach to PHY */
 &enet0 {
 	status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97425svmb.dts b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
index ce762c7b2e54..0ed22217bf3a 100644
--- a/arch/mips/boot/dts/brcm/bcm97425svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
@@ -61,6 +61,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -144,3 +148,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97435svmb.dts b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
index d4dd31a543fd..2c145a883aef 100644
--- a/arch/mips/boot/dts/brcm/bcm97435svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
@@ -61,6 +61,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -120,3 +124,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/img/boston.dts b/arch/mips/boot/dts/img/boston.dts
index 2cd49b60e030..1bd105428f61 100644
--- a/arch/mips/boot/dts/img/boston.dts
+++ b/arch/mips/boot/dts/img/boston.dts
@@ -157,7 +157,7 @@
 					#address-cells = <1>;
 					#size-cells = <0>;
 
-					rtc@0x68 {
+					rtc@68 {
 						compatible = "st,m41t81s";
 						reg = <0x68>;
 					};
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index a4cc52214dbd..38078594cf97 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -110,22 +110,22 @@
 					reg = <0x0 0x0 0x0 0x800000>;
 				};
 
-				partition@0x800000 {
+				partition@800000 {
 					label = "u-boot";
 					reg = <0x0 0x800000 0x0 0x200000>;
 				};
 
-				partition@0xa00000 {
+				partition@a00000 {
 					label = "u-boot-env";
 					reg = <0x0 0xa00000 0x0 0x200000>;
 				};
 
-				partition@0xc00000 {
+				partition@c00000 {
 					label = "boot";
 					reg = <0x0 0xc00000 0x0 0x4000000>;
 				};
 
-				partition@0x8c00000 {
+				partition@4c00000 {
 					label = "system";
 					reg = <0x0 0x4c00000 0x1 0xfb400000>;
 				};
diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile
new file mode 100644
index 000000000000..c51164537c02
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/Makefile
@@ -0,0 +1,3 @@
+dtb-$(CONFIG_LEGACY_BOARD_OCELOT)	+= ocelot_pcb123.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
new file mode 100644
index 000000000000..dd239cab2f9d
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "mscc,ocelot";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			compatible = "mips,mips24KEc";
+			device_type = "cpu";
+			clocks = <&cpu_clk>;
+			reg = <0>;
+		};
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	cpuintc: interrupt-controller {
+		#address-cells = <0>;
+		#interrupt-cells = <1>;
+		interrupt-controller;
+		compatible = "mti,cpu-interrupt-controller";
+	};
+
+	cpu_clk: cpu-clock {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <500000000>;
+	};
+
+	ahb_clk: ahb-clk {
+		compatible = "fixed-factor-clock";
+		#clock-cells = <0>;
+		clocks = <&cpu_clk>;
+		clock-div = <2>;
+		clock-mult = <1>;
+	};
+
+	ahb@70000000 {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0x70000000 0x2000000>;
+
+		interrupt-parent = <&intc>;
+
+		cpu_ctrl: syscon@0 {
+			compatible = "mscc,ocelot-cpu-syscon", "syscon";
+			reg = <0x0 0x2c>;
+		};
+
+		intc: interrupt-controller@70 {
+			compatible = "mscc,ocelot-icpu-intr";
+			reg = <0x70 0x70>;
+			#interrupt-cells = <1>;
+			interrupt-controller;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <2>;
+		};
+
+		uart0: serial@100000 {
+			pinctrl-0 = <&uart_pins>;
+			pinctrl-names = "default";
+			compatible = "ns16550a";
+			reg = <0x100000 0x20>;
+			interrupts = <6>;
+			clocks = <&ahb_clk>;
+			reg-io-width = <4>;
+			reg-shift = <2>;
+
+			status = "disabled";
+		};
+
+		uart2: serial@100800 {
+			pinctrl-0 = <&uart2_pins>;
+			pinctrl-names = "default";
+			compatible = "ns16550a";
+			reg = <0x100800 0x20>;
+			interrupts = <7>;
+			clocks = <&ahb_clk>;
+			reg-io-width = <4>;
+			reg-shift = <2>;
+
+			status = "disabled";
+		};
+
+		reset@1070008 {
+			compatible = "mscc,ocelot-chip-reset";
+			reg = <0x1070008 0x4>;
+		};
+
+		gpio: pinctrl@1070034 {
+			compatible = "mscc,ocelot-pinctrl";
+			reg = <0x1070034 0x68>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&gpio 0 0 22>;
+
+			uart_pins: uart-pins {
+				pins = "GPIO_6", "GPIO_7";
+				function = "uart";
+			};
+
+			uart2_pins: uart2-pins {
+				pins = "GPIO_12", "GPIO_13";
+				function = "uart2";
+			};
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
new file mode 100644
index 000000000000..29d6414f8886
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/dts-v1/;
+
+#include "ocelot.dtsi"
+
+/ {
+	compatible = "mscc,ocelot-pcb123", "mscc,ocelot";
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x0e000000>;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index d99f5242169e..b3aec101a65d 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	parent_irq = irq_of_parse_and_map(ciu_node, 0);
 	if (!parent_irq) {
-		pr_err("ERROR: Couldn't acquire parent_irq for %s\n.",
+		pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
 			ciu_node->name);
 		return -EINVAL;
 	}
@@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	addr = of_get_address(ciu_node, 0, NULL, NULL);
 	if (!addr) {
-		pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name);
+		pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
 		return -EINVAL;
 	}
 	host_data->raw_reg = (u64)phys_to_virt(
@@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	addr = of_get_address(ciu_node, 1, NULL, NULL);
 	if (!addr) {
-		pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name);
+		pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
 		return -EINVAL;
 	}
 	host_data->en_reg = (u64)phys_to_virt(
@@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
 	if (r) {
-		pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.",
+		pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
 			ciu_node->name);
 		return r;
 	}
@@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 					   &octeon_irq_domain_cib_ops,
 					   host_data);
 	if (!cib_domain) {
-		pr_err("ERROR: Couldn't irq_domain_add_linear()\n.");
+		pr_err("ERROR: Couldn't irq_domain_add_linear()\n");
 		return -ENOMEM;
 	}
 
diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig
index 3cefa6bc01dd..47aecb8750e6 100644
--- a/arch/mips/configs/bmips_stb_defconfig
+++ b/arch/mips/configs/bmips_stb_defconfig
@@ -72,6 +72,7 @@ CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
+CONFIG_SOC_BRCMSTB=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/generic/32r6.config
index ca606e71f4d0..1a5d5ea4ab2b 100644
--- a/arch/mips/configs/generic/32r6.config
+++ b/arch/mips/configs/generic/32r6.config
@@ -1,2 +1,4 @@
 CONFIG_CPU_MIPS32_R6=y
 CONFIG_HIGHMEM=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/generic/64r6.config
index 7cac0339c4d5..5dd8e8503e34 100644
--- a/arch/mips/configs/generic/64r6.config
+++ b/arch/mips/configs/generic/64r6.config
@@ -2,3 +2,5 @@ CONFIG_CPU_MIPS64_R6=y
 CONFIG_64BIT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
new file mode 100644
index 000000000000..aa815761d85e
--- /dev/null
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -0,0 +1,35 @@
+# require CONFIG_CPU_MIPS32_R2=y
+
+CONFIG_LEGACY_BOARD_OCELOT=y
+
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_PLATFORM=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_MTD_UBI=y
+
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+
+CONFIG_GPIO_SYSFS=y
+
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
+
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_SPIDEV=y
+
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_OCELOT_RESET=y
+
+CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
new file mode 100644
index 000000000000..e07aca572c2e
--- /dev/null
+++ b/arch/mips/crypto/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for MIPS crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
new file mode 100644
index 000000000000..7d1d2425746f
--- /dev/null
+++ b/arch/mips/crypto/crc32-mips.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
+ *
+ * Module based on arm64/crypto/crc32-arm.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ */
+
+#include <linux/unaligned/access_ok.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/mipsregs.h>
+
+#include <crypto/internal/hash.h>
+
+enum crc_op_size {
+	b, h, w, d,
+};
+
+enum crc_type {
+	crc32,
+	crc32c,
+};
+
+#ifndef TOOLCHAIN_SUPPORTS_CRC
+#define _ASM_MACRO_CRC32(OP, SZ, TYPE)					  \
+_ASM_MACRO_3R(OP, rt, rs, rt2,						  \
+	".ifnc	\\rt, \\rt2\n\t"					  \
+	".error	\"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \
+	".endif\n\t"							  \
+	_ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) |	  \
+			  ((SZ) <<  6) | ((TYPE) << 8))			  \
+	_ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) |	  \
+			  ((SZ) << 14) | ((TYPE) << 3)))
+_ASM_MACRO_CRC32(crc32b,  0, 0);
+_ASM_MACRO_CRC32(crc32h,  1, 0);
+_ASM_MACRO_CRC32(crc32w,  2, 0);
+_ASM_MACRO_CRC32(crc32d,  3, 0);
+_ASM_MACRO_CRC32(crc32cb, 0, 1);
+_ASM_MACRO_CRC32(crc32ch, 1, 1);
+_ASM_MACRO_CRC32(crc32cw, 2, 1);
+_ASM_MACRO_CRC32(crc32cd, 3, 1);
+#define _ASM_SET_CRC ""
+#else /* !TOOLCHAIN_SUPPORTS_CRC */
+#define _ASM_SET_CRC ".set\tcrc\n\t"
+#endif
+
+#define _CRC32(crc, value, size, type)		\
+do {						\
+	__asm__ __volatile__(			\
+		".set	push\n\t"		\
+		_ASM_SET_CRC			\
+		#type #size "	%0, %1, %0\n\t"	\
+		".set	pop"			\
+		: "+r" (crc)			\
+		: "r" (value));			\
+} while (0)
+
+#define CRC32(crc, value, size) \
+	_CRC32(crc, value, size, crc32)
+
+#define CRC32C(crc, value, size) \
+	_CRC32(crc, value, size, crc32c)
+
+static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		u64 value = get_unaligned_le64(p);
+
+		CRC32(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		u32 value = get_unaligned_le32(p);
+
+		CRC32(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		u16 value = get_unaligned_le16(p);
+
+		CRC32(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		u8 value = *p++;
+
+		CRC32(crc, value, b);
+	}
+
+	return crc;
+}
+
+static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		u64 value = get_unaligned_le64(p);
+
+		CRC32C(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		u32 value = get_unaligned_le32(p);
+
+		CRC32C(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		u16 value = get_unaligned_le16(p);
+
+		CRC32C(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		u8 value = *p++;
+
+		CRC32C(crc, value, b);
+	}
+	return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+struct chksum_ctx {
+	u32 key;
+};
+
+struct chksum_desc_ctx {
+	u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (keylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = get_unaligned_le32(key);
+	return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(ctx->crc, out);
+	return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(~ctx->crc, out);
+	return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static struct shash_alg crc32_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksum_update,
+	.final			=	chksum_final,
+	.finup			=	chksum_finup,
+	.digest			=	chksum_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32",
+		.cra_driver_name	=	"crc32-mips-hw",
+		.cra_priority		=	300,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static struct shash_alg crc32c_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksumc_update,
+	.final			=	chksumc_final,
+	.finup			=	chksumc_finup,
+	.digest			=	chksumc_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32c",
+		.cra_driver_name	=	"crc32c-mips-hw",
+		.cra_priority		=	300,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static int __init crc32_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_shash(&crc32_alg);
+
+	if (err)
+		return err;
+
+	err = crypto_register_shash(&crc32c_alg);
+
+	if (err) {
+		crypto_unregister_shash(&crc32_alg);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+	crypto_unregister_shash(&crc32_alg);
+	crypto_unregister_shash(&crc32c_alg);
+}
+
+MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
+MODULE_LICENSE("GPL v2");
+
+module_cpu_feature_match(MIPS_CRC32, crc32_mod_init);
+module_exit(crc32_mod_exit);
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index 2ff3b17bfab1..ba9b2c8cce68 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -27,6 +27,22 @@ config LEGACY_BOARD_SEAD3
 	  Enable this to include support for booting on MIPS SEAD-3 FPGA-based
 	  development boards, which boot using a legacy boot protocol.
 
+comment "MSCC Ocelot doesn't work with SEAD3 enabled"
+	depends on LEGACY_BOARD_SEAD3
+
+config LEGACY_BOARD_OCELOT
+	bool "Support MSCC Ocelot boards"
+	depends on LEGACY_BOARD_SEAD3=n
+	select LEGACY_BOARDS
+	select MSCC_OCELOT
+
+config MSCC_OCELOT
+	bool
+	select GPIOLIB
+	select MSCC_OCELOT_IRQ
+	select SYS_HAS_EARLY_PRINTK
+	select USE_GENERIC_EARLY_PRINTK_8250
+
 comment "FIT/UHI Boards"
 
 config FIT_IMAGE_FDT_BOSTON
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile
index 5c31e0c4697d..d03a36f869a4 100644
--- a/arch/mips/generic/Makefile
+++ b/arch/mips/generic/Makefile
@@ -14,5 +14,6 @@ obj-y += proc.o
 
 obj-$(CONFIG_YAMON_DT_SHIM)		+= yamon-dt.o
 obj-$(CONFIG_LEGACY_BOARD_SEAD3)	+= board-sead3.o
+obj-$(CONFIG_LEGACY_BOARD_OCELOT)	+= board-ocelot.o
 obj-$(CONFIG_KEXEC)			+= kexec.o
 obj-$(CONFIG_VIRT_BOARD_RANCHU)		+= board-ranchu.o
diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c
new file mode 100644
index 000000000000..06d92fb37769
--- /dev/null
+++ b/arch/mips/generic/board-ocelot.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi MIPS SoC support
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <asm/machine.h>
+#include <asm/prom.h>
+
+#define DEVCPU_GCB_CHIP_REGS_CHIP_ID	0x71070000
+#define CHIP_ID_PART_ID			GENMASK(27, 12)
+
+#define OCELOT_PART_ID			(0x7514 << 12)
+
+#define UART_UART			0x70100000
+
+static __init bool ocelot_detect(void)
+{
+	u32 rev;
+	int idx;
+
+	/* Look for the TLB entry set up by redboot before trying to use it */
+	write_c0_entryhi(DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	tlb_probe_hazard();
+	idx = read_c0_index();
+	if (idx < 0)
+		return 0;
+
+	/* A TLB entry exists, lets assume its usable and check the CHIP ID */
+	rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+
+	if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID)
+		return 0;
+
+	/* Copy command line from bootloader early for Initrd detection */
+	if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) {
+		unsigned int prom_argc = fw_arg0;
+		const char **prom_argv = (const char **)fw_arg1;
+
+		if (prom_argc > 1 && strlen(prom_argv[1]) > 0)
+			/* ignore all built-in args if any f/w args given */
+			strcpy(arcs_cmdline, prom_argv[1]);
+	}
+
+	return 1;
+}
+
+static void __init ocelot_earlyprintk_init(void)
+{
+	void __iomem *uart_base;
+
+	uart_base = ioremap_nocache(UART_UART, 0x20);
+	setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000);
+}
+
+static void __init ocelot_late_init(void)
+{
+	ocelot_earlyprintk_init();
+}
+
+static __init const void *ocelot_fixup_fdt(const void *fdt,
+					   const void *match_data)
+{
+	/* This has to be done so late because ioremap needs to work */
+	late_time_init = ocelot_late_init;
+
+	return fdt;
+}
+
+extern char __dtb_ocelot_pcb123_begin[];
+
+MIPS_MACHINE(ocelot) = {
+	.fdt = __dtb_ocelot_pcb123_begin,
+	.fixup_fdt = ocelot_fixup_fdt,
+	.detect = ocelot_detect,
+};
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 721b698bfe3c..5f74590e0bea 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -11,6 +11,7 @@
 
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/isa-rev.h>
 #include <cpu-feature-overrides.h>
 
 /*
@@ -493,7 +494,7 @@
 # define cpu_has_perf		(cpu_data[0].options & MIPS_CPU_PERF)
 #endif
 
-#if defined(CONFIG_SMP) && defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#if defined(CONFIG_SMP) && (MIPS_ISA_REV >= 6)
 /*
  * Some systems share FTLB RAMs between threads within a core (siblings in
  * kernel parlance). This means that FTLB entries may become invalid at almost
@@ -525,7 +526,7 @@
 #  define cpu_has_shared_ftlb_entries \
 	(current_cpu_data.options & MIPS_CPU_SHARED_FTLB_ENTRIES)
 # endif
-#endif /* SMP && __mips_isa_rev >= 6 */
+#endif /* SMP && MIPS_ISA_REV >= 6 */
 
 #ifndef cpu_has_shared_ftlb_ram
 # define cpu_has_shared_ftlb_ram 0
diff --git a/arch/mips/include/asm/isa-rev.h b/arch/mips/include/asm/isa-rev.h
new file mode 100644
index 000000000000..683ea3454dcb
--- /dev/null
+++ b/arch/mips/include/asm/isa-rev.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 MIPS Tech, LLC
+ * Author: Matt Redfearn <matt.redfearn@mips.com>
+ */
+
+#ifndef __MIPS_ASM_ISA_REV_H__
+#define __MIPS_ASM_ISA_REV_H__
+
+/*
+ * The ISA revision level. This is 0 for MIPS I to V and N for
+ * MIPS{32,64}rN.
+ */
+
+/* If the compiler has defined __mips_isa_rev, believe it. */
+#ifdef __mips_isa_rev
+#define MIPS_ISA_REV __mips_isa_rev
+#else
+/* The compiler hasn't defined the isa rev so assume it's MIPS I - V (0) */
+#define MIPS_ISA_REV 0
+#endif
+
+
+#endif /* __MIPS_ASM_ISA_REV_H__ */
diff --git a/arch/mips/include/asm/kvm_para.h b/arch/mips/include/asm/kvm_para.h
index 60b1aa0b7014..b57e978b0946 100644
--- a/arch/mips/include/asm/kvm_para.h
+++ b/arch/mips/include/asm/kvm_para.h
@@ -94,6 +94,11 @@ static inline unsigned int kvm_arch_para_features(void)
 	return 0;
 }
 
+static inline unsigned int kvm_arch_para_hints(void)
+{
+	return 0;
+}
+
 #ifdef CONFIG_MIPS_PARAVIRT
 static inline bool kvm_para_available(void)
 {
diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
index aa3800c82332..d99ca862dae3 100644
--- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
+++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
@@ -167,7 +167,7 @@
 #define AR71XX_AHB_DIV_MASK		0x7
 
 #define AR724X_PLL_REG_CPU_CONFIG	0x00
-#define AR724X_PLL_REG_PCIE_CONFIG	0x18
+#define AR724X_PLL_REG_PCIE_CONFIG	0x10
 
 #define AR724X_PLL_FB_SHIFT		0
 #define AR724X_PLL_FB_MASK		0x3ff
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 858752dac337..f65859784a4c 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -664,6 +664,7 @@
 #define MIPS_CONF5_FRE		(_ULCAST_(1) << 8)
 #define MIPS_CONF5_UFE		(_ULCAST_(1) << 9)
 #define MIPS_CONF5_CA2		(_ULCAST_(1) << 14)
+#define MIPS_CONF5_CRCP		(_ULCAST_(1) << 18)
 #define MIPS_CONF5_MSAEN	(_ULCAST_(1) << 27)
 #define MIPS_CONF5_EVA		(_ULCAST_(1) << 28)
 #define MIPS_CONF5_CV		(_ULCAST_(1) << 29)
diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h
index 600ad8fd6835..a2aba4b059e6 100644
--- a/arch/mips/include/uapi/asm/hwcap.h
+++ b/arch/mips/include/uapi/asm/hwcap.h
@@ -5,5 +5,6 @@
 /* HWCAP flags */
 #define HWCAP_MIPS_R6		(1 << 0)
 #define HWCAP_MIPS_MSA		(1 << 1)
+#define HWCAP_MIPS_CRC32	(1 << 2)
 
 #endif /* _UAPI_ASM_HWCAP_H */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 606e02ca4b6c..3035ca499cd8 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -50,6 +50,7 @@
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
 #define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 
 /*
  * Flags for msync
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index cf3fd549e16d..6b07b739f914 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -848,6 +848,9 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c)
 	if (config5 & MIPS_CONF5_CA2)
 		c->ases |= MIPS_ASE_MIPS16E2;
 
+	if (config5 & MIPS_CONF5_CRCP)
+		elf_hwcap |= HWCAP_MIPS_CRC32;
+
 	return config5 & MIPS_CONF_M;
 }
 
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 421e06dfee72..55c3fbeb2df6 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/suspend.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/cacheflush.h>
@@ -670,6 +671,34 @@ static int cps_pm_online_cpu(unsigned int cpu)
 	return 0;
 }
 
+static int cps_pm_power_notifier(struct notifier_block *this,
+				 unsigned long event, void *ptr)
+{
+	unsigned int stat;
+
+	switch (event) {
+	case PM_SUSPEND_PREPARE:
+		stat = read_cpc_cl_stat_conf();
+		/*
+		 * If we're attempting to suspend the system and power down all
+		 * of the cores, the JTAG detect bit indicates that the CPC will
+		 * instead put the cores into clock-off state. In this state
+		 * a connected debugger can cause the CPU to attempt
+		 * interactions with the powered down system. At best this will
+		 * fail. At worst, it can hang the NoC, requiring a hard reset.
+		 * To avoid this, just block system suspend if a JTAG probe
+		 * is detected.
+		 */
+		if (stat & CPC_Cx_STAT_CONF_EJTAG_PROBE) {
+			pr_warn("JTAG probe is connected - abort suspend\n");
+			return NOTIFY_BAD;
+		}
+		return NOTIFY_DONE;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
 static int __init cps_pm_init(void)
 {
 	/* A CM is required for all non-coherent states */
@@ -705,6 +734,8 @@ static int __init cps_pm_init(void)
 		pr_warn("pm-cps: no CPC, clock & power gating unavailable\n");
 	}
 
+	pm_notifier(cps_pm_power_notifier, 0);
+
 	return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mips/cps_pm:online",
 				 cps_pm_online_cpu, NULL);
 }
diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c
index 7c746d3458e7..6288780b779e 100644
--- a/arch/mips/kernel/reset.c
+++ b/arch/mips/kernel/reset.c
@@ -13,6 +13,9 @@
 #include <linux/reboot.h>
 #include <linux/delay.h>
 
+#include <asm/compiler.h>
+#include <asm/idle.h>
+#include <asm/mipsregs.h>
 #include <asm/reboot.h>
 
 /*
@@ -26,6 +29,62 @@ void (*pm_power_off)(void);
 
 EXPORT_SYMBOL(pm_power_off);
 
+static void machine_hang(void)
+{
+	/*
+	 * We're hanging the system so we don't want to be interrupted anymore.
+	 * Any interrupt handlers that ran would at best be useless & at worst
+	 * go awry because the system isn't in a functional state.
+	 */
+	local_irq_disable();
+
+	/*
+	 * Mask all interrupts, giving us a better chance of remaining in the
+	 * low power wait state.
+	 */
+	clear_c0_status(ST0_IM);
+
+	while (true) {
+		if (cpu_has_mips_r) {
+			/*
+			 * We know that the wait instruction is supported so
+			 * make use of it directly, leaving interrupts
+			 * disabled.
+			 */
+			asm volatile(
+				".set	push\n\t"
+				".set	" MIPS_ISA_ARCH_LEVEL "\n\t"
+				"wait\n\t"
+				".set	pop");
+		} else if (cpu_wait) {
+			/*
+			 * Try the cpu_wait() callback. This isn't ideal since
+			 * it'll re-enable interrupts, but that ought to be
+			 * harmless given that they're all masked.
+			 */
+			cpu_wait();
+			local_irq_disable();
+		} else {
+			/*
+			 * We're going to burn some power running round the
+			 * loop, but we don't really have a choice. This isn't
+			 * a path we should expect to run for long during
+			 * typical use anyway.
+			 */
+		}
+
+		/*
+		 * In most modern MIPS CPUs interrupts will cause the wait
+		 * instruction to graduate even when disabled, and in some
+		 * cases even when masked. In order to prevent a timer
+		 * interrupt from continuously taking us out of the low power
+		 * wait state, we clear any pending timer interrupt here.
+		 */
+		if (cpu_has_counter)
+			write_c0_compare(0);
+	}
+}
+
 void machine_restart(char *command)
 {
 	if (_machine_restart)
@@ -38,8 +97,7 @@ void machine_restart(char *command)
 	do_kernel_restart(command);
 	mdelay(1000);
 	pr_emerg("Reboot failed -- System halted\n");
-	local_irq_disable();
-	while (1);
+	machine_hang();
 }
 
 void machine_halt(void)
@@ -51,8 +109,7 @@ void machine_halt(void)
 	preempt_disable();
 	smp_send_stop();
 #endif
-	local_irq_disable();
-	while (1);
+	machine_hang();
 }
 
 void machine_power_off(void)
@@ -64,6 +121,5 @@ void machine_power_off(void)
 	preempt_disable();
 	smp_send_stop();
 #endif
-	local_irq_disable();
-	while (1);
+	machine_hang();
 }
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5f8b0a9e30b3..563188ac6fa2 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -155,7 +155,8 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add
 	add_memory_region(start, size, BOOT_MEM_RAM);
 }
 
-bool __init memory_region_available(phys_addr_t start, phys_addr_t size)
+static bool __init __maybe_unused memory_region_available(phys_addr_t start,
+							  phys_addr_t size)
 {
 	int i;
 	bool in_ram = false, free = true;
@@ -453,7 +454,7 @@ static void __init bootmem_init(void)
 		pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
 			(min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
 			min_low_pfn - ARCH_PFN_OFFSET);
-	} else if (min_low_pfn < ARCH_PFN_OFFSET) {
+	} else if (ARCH_PFN_OFFSET - min_low_pfn > 0UL) {
 		pr_info("%lu free pages won't be used\n",
 			ARCH_PFN_OFFSET - min_low_pfn);
 	}
diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 1e4658eee13f..5a4875cac1ec 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c
@@ -178,6 +178,8 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 /*
  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
  * back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 84b7b592b834..400676ce03f4 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -30,7 +30,6 @@
 #include <linux/hardirq.h>
 #include <linux/gfp.h>
 #include <linux/kcore.h>
-#include <linux/export.h>
 #include <linux/initrd.h>
 
 #include <asm/asm-offsets.h>
@@ -46,7 +45,6 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
-#include <asm/maar.h>
 
 /*
  * We have up to 8 empty zeroed pages so we can map one of the right colour
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 33d3251ecd37..2f616ebeb7e0 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -24,20 +24,20 @@ EXPORT_SYMBOL(shm_align_mask);
 #define MIN_GAP (128*1024*1024UL)
 #define MAX_GAP ((TASK_SIZE)/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -158,18 +158,18 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
 	if (current->flags & PF_RANDOMIZE)
 		random_factor = arch_mmap_rnd();
 
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
index 88a2075305d1..57154c5883b6 100644
--- a/arch/mips/net/bpf_jit_asm.S
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -11,6 +11,7 @@
  */
 
 #include <asm/asm.h>
+#include <asm/isa-rev.h>
 #include <asm/regdef.h>
 #include "bpf_jit.h"
 
@@ -65,7 +66,7 @@ FEXPORT(sk_load_word_positive)
 	lw	$r_A, 0(t1)
 	.set	noreorder
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	wsbh	t0, $r_A
 	rotr	$r_A, t0, 16
 # else
@@ -92,7 +93,7 @@ FEXPORT(sk_load_half_positive)
 	PTR_ADDU t1, $r_skb_data, offset
 	lhu	$r_A, 0(t1)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	wsbh	$r_A, $r_A
 # else
 	sll	t0, $r_A, 8
@@ -170,7 +171,7 @@ FEXPORT(sk_load_byte_positive)
 NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
 	bpf_slow_path_common(4)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	wsbh	t0, $r_s0
 	jr	$r_ra
 	 rotr	$r_A, t0, 16
@@ -196,7 +197,7 @@ NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
 NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
 	bpf_slow_path_common(2)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	jr	$r_ra
 	 wsbh	$r_A, $r_s0
 # else
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 407f155f0bb6..f6b77788124a 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -315,6 +315,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
 		break;
 
 	case MT762X_SOC_MT7628AN:
+	case MT762X_SOC_MT7688:
 		if (mt7628_pci_hw_init(pdev))
 			return -1;
 		break;
diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c
index f5b367e20dff..31955c1d5555 100644
--- a/arch/mips/txx9/rbtx4927/setup.c
+++ b/arch/mips/txx9/rbtx4927/setup.c
@@ -319,7 +319,7 @@ static void __init rbtx4927_mtd_init(void)
 
 static void __init rbtx4927_gpioled_init(void)
 {
-	static struct gpio_led leds[] = {
+	static const struct gpio_led leds[] = {
 		{ .name = "gpioled:green:0", .gpio = 0, .active_low = 1, },
 		{ .name = "gpioled:green:1", .gpio = 1, .active_low = 1, },
 	};
diff --git a/arch/mips/vdso/elf.S b/arch/mips/vdso/elf.S
index be37bbb1f061..428a1917afc6 100644
--- a/arch/mips/vdso/elf.S
+++ b/arch/mips/vdso/elf.S
@@ -10,6 +10,8 @@
 
 #include "vdso.h"
 
+#include <asm/isa-rev.h>
+
 #include <linux/elfnote.h>
 #include <linux/version.h>
 
@@ -40,11 +42,7 @@ __mips_abiflags:
 	.byte	__mips		/* isa_level */
 
 	/* isa_rev */
-#ifdef __mips_isa_rev
-	.byte	__mips_isa_rev
-#else
-	.byte	0
-#endif
+	.byte	MIPS_ISA_REV
 
 	/* gpr_size */
 #ifdef __mips64
@@ -54,7 +52,7 @@ __mips_abiflags:
 #endif
 
 	/* cpr1_size */
-#if (defined(__mips_isa_rev) && __mips_isa_rev >= 6) || defined(__mips64)
+#if (MIPS_ISA_REV >= 6) || defined(__mips64)
 	.byte	2		/* AFL_REG_64 */
 #else
 	.byte	1		/* AFL_REG_32 */
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 7b9b20a381cb..1240f148ec0f 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -34,8 +34,8 @@ void flush_anon_page(struct vm_area_struct *vma,
 void flush_kernel_dcache_page(struct page *page);
 void flush_icache_range(unsigned long start, unsigned long end);
 void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-#define flush_dcache_mmap_lock(mapping)   spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)   xa_lock_irq(&(mapping)->i_pages)
+#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
 
 #else
 #include <asm-generic/cacheflush.h>
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 55e383c173f7..18eb9f69f806 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -46,9 +46,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 extern void flush_dcache_range(unsigned long start, unsigned long end);
 extern void invalidate_dcache_range(unsigned long start, unsigned long end);
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #endif /* _ASM_NIOS2_CACHEFLUSH_H */
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 20e86209ef2e..ab88b6dd4679 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -336,9 +336,9 @@ static int __init nios2_time_init(struct device_node *timer)
 	return ret;
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
-	ts->tv_sec = mktime(2007, 1, 1, 0, 0, 0);
+	ts->tv_sec = mktime64(2007, 1, 1, 0, 0, 0);
 	ts->tv_nsec = 0;
 }
 
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 9a3ee389631e..11c5a58ab333 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -17,8 +17,6 @@
  * (at your option) any later version.
  */
 
-#define __ARCH_HAVE_MMU
-
 #define sys_mmap2 sys_mmap_pgoff
 
 #define __ARCH_WANT_RENAMEAT
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 7e0bb9836b58..fc5a574c3482 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -338,6 +338,7 @@ source "mm/Kconfig"
 config COMPAT
 	def_bool y
 	depends on 64BIT
+	select COMPAT_BINFMT_ELF if BINFMT_ELF
 
 config SYSVIPC_COMPAT
 	def_bool y
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index bd5ce31936f5..0c83644bfa5c 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -55,10 +55,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_page(vma,page)	do { 		\
 	flush_kernel_dcache_page(page);			\
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index c22db5323244..57b8b2a2fd4e 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -193,6 +193,12 @@ struct compat_shmid64_ds {
 };
 
 /*
+ * The type of struct elf_prstatus.pr_reg in compatible core dumps.
+ */
+#define COMPAT_ELF_NGREG 80
+typedef compat_ulong_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+
+/*
  * A pointer passed in from user mode. This should not
  * be used for syscall parameters, just declare them
  * as pointers because the syscall entry code will have
diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h
index 382d75a2ee4f..f019d3ec0c1c 100644
--- a/arch/parisc/include/asm/elf.h
+++ b/arch/parisc/include/asm/elf.h
@@ -6,7 +6,7 @@
  * ELF register definitions..
  */
 
-#include <asm/ptrace.h>
+#include <linux/types.h>
 
 #define EM_PARISC 15
 
@@ -169,16 +169,12 @@ typedef struct elf64_fdesc {
 	__u64	gp;
 } Elf64_Fdesc;
 
-#ifdef __KERNEL__
-
 #ifdef CONFIG_64BIT
 #define Elf_Fdesc	Elf64_Fdesc
 #else
 #define Elf_Fdesc	Elf32_Fdesc
 #endif /*CONFIG_64BIT*/
 
-#endif /*__KERNEL__*/
-
 /* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr.  */
 
 #define PT_HP_TLS		(PT_LOOS + 0x0)
@@ -213,44 +209,44 @@ typedef struct elf64_fdesc {
 #define PF_HP_SBP		0x08000000
 
 /*
+ * This yields a string that ld.so will use to load implementation
+ * specific libraries for optimization.  This is more specific in
+ * intent than poking at uname or /proc/cpuinfo.
+ */
+
+#define ELF_PLATFORM  ("PARISC")
+
+/*
  * The following definitions are those for 32-bit ELF binaries on a 32-bit
  * kernel and for 64-bit binaries on a 64-bit kernel.  To run 32-bit binaries
- * on a 64-bit kernel, arch/parisc/kernel/binfmt_elf32.c defines these
- * macros appropriately and then #includes binfmt_elf.c, which then includes
- * this file.
+ * on a 64-bit kernel, fs/compat_binfmt_elf.c defines ELF_CLASS and then
+ * #includes binfmt_elf.c, which then includes this file.
  */
 #ifndef ELF_CLASS
 
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- *
- * Note that this header file is used by default in fs/binfmt_elf.c. So
- * the following macros are for the default case. However, for the 64
- * bit kernel we also support 32 bit parisc binaries. To do that
- * arch/parisc/kernel/binfmt_elf32.c defines its own set of these
- * macros, and then it includes fs/binfmt_elf.c to provide an alternate
- * elf binary handler for 32 bit binaries (on the 64 bit kernel).
- */
 #ifdef CONFIG_64BIT
-#define ELF_CLASS   ELFCLASS64
+#define ELF_CLASS	ELFCLASS64
 #else
 #define ELF_CLASS	ELFCLASS32
 #endif
 
 typedef unsigned long elf_greg_t;
 
-/*
- * This yields a string that ld.so will use to load implementation
- * specific libraries for optimization.  This is more specific in
- * intent than poking at uname or /proc/cpuinfo.
- */
-
-#define ELF_PLATFORM  ("PARISC\0")
-
 #define SET_PERSONALITY(ex) \
+({	\
 	set_personality((current->personality & ~PER_MASK) | PER_LINUX); \
 	current->thread.map_base = DEFAULT_MAP_BASE; \
-	current->thread.task_size = DEFAULT_TASK_SIZE \
+	current->thread.task_size = DEFAULT_TASK_SIZE; \
+ })
+
+#endif /* ! ELF_CLASS */
+
+#define COMPAT_SET_PERSONALITY(ex) \
+({	\
+	set_thread_flag(TIF_32BIT); \
+	current->thread.map_base = DEFAULT_MAP_BASE32; \
+	current->thread.task_size = DEFAULT_TASK_SIZE32; \
+ })
 
 /*
  * Fill in general registers in a core dump.  This saves pretty
@@ -277,10 +273,12 @@ typedef unsigned long elf_greg_t;
 
 #define ELF_CORE_COPY_REGS(dst, pt)	\
 	memset(dst, 0, sizeof(dst));	/* don't leak any "random" bits */ \
-	memcpy(dst + 0, pt->gr, 32 * sizeof(elf_greg_t)); \
-	memcpy(dst + 32, pt->sr, 8 * sizeof(elf_greg_t)); \
-	memcpy(dst + 40, pt->iaoq, 2 * sizeof(elf_greg_t)); \
-	memcpy(dst + 42, pt->iasq, 2 * sizeof(elf_greg_t)); \
+	{	int i; \
+		for (i = 0; i < 32; i++) dst[i] = pt->gr[i]; \
+		for (i = 0; i < 8; i++) dst[32 + i] = pt->sr[i]; \
+	} \
+	dst[40] = pt->iaoq[0]; dst[41] = pt->iaoq[1]; \
+	dst[42] = pt->iasq[0]; dst[43] = pt->iasq[1]; \
 	dst[44] = pt->sar;   dst[45] = pt->iir; \
 	dst[46] = pt->isr;   dst[47] = pt->ior; \
 	dst[48] = mfctl(22); dst[49] = mfctl(0); \
@@ -292,7 +290,7 @@ typedef unsigned long elf_greg_t;
 	dst[60] = mfctl(12); dst[61] = mfctl(13); \
 	dst[62] = mfctl(10); dst[63] = mfctl(15);
 
-#endif /* ! ELF_CLASS */
+#define CORE_DUMP_USE_REGSET
 
 #define ELF_NGREG 80	/* We only need 64 at present, but leave space
 			   for expansion. */
@@ -310,7 +308,10 @@ extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *);
 struct pt_regs;	/* forward declaration... */
 
 
-#define elf_check_arch(x) ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define elf_check_arch(x)		\
+	((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS)
+#define compat_elf_check_arch(x)	\
+	((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELFCLASS32)
 
 /*
  * These are used to set parameters in the core dumps.
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index a056a642bb31..870fbf8c7088 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -26,6 +26,7 @@
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
 #define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 
 #define MS_SYNC		1		/* synchronous memory sync */
 #define MS_ASYNC	2		/* sync memory asynchronously */
diff --git a/arch/parisc/include/uapi/asm/siginfo.h b/arch/parisc/include/uapi/asm/siginfo.h
index be40331f757d..4a1062e05aaf 100644
--- a/arch/parisc/include/uapi/asm/siginfo.h
+++ b/arch/parisc/include/uapi/asm/siginfo.h
@@ -8,11 +8,4 @@
 
 #include <asm-generic/siginfo.h>
 
-/*
- * SIGFPE si_codes
- */
-#ifdef __KERNEL__
-#define FPE_FIXME	0	/* Broken dup of SI_USER */
-#endif /* __KERNEL__ */
-
 #endif
diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c
deleted file mode 100644
index 20dfa081ed0b..000000000000
--- a/arch/parisc/kernel/binfmt_elf32.c
+++ /dev/null
@@ -1,98 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Support for 32-bit Linux/Parisc ELF binaries on 64 bit kernels
- *
- * Copyright (C) 2000 John Marvin
- * Copyright (C) 2000 Hewlett Packard Co.
- *
- * Heavily inspired from various other efforts to do the same thing
- * (ia64,sparc64/mips64)
- */
-
-/* Make sure include/asm-parisc/elf.h does the right thing */
-
-#define ELF_CLASS	ELFCLASS32
-
-#define ELF_CORE_COPY_REGS(dst, pt)	\
-	memset(dst, 0, sizeof(dst));	/* don't leak any "random" bits */ \
-	{	int i; \
-		for (i = 0; i < 32; i++) dst[i] = (elf_greg_t) pt->gr[i]; \
-		for (i = 0; i < 8; i++) dst[32 + i] = (elf_greg_t) pt->sr[i]; \
-	} \
-	dst[40] = (elf_greg_t) pt->iaoq[0]; dst[41] = (elf_greg_t) pt->iaoq[1]; \
-	dst[42] = (elf_greg_t) pt->iasq[0]; dst[43] = (elf_greg_t) pt->iasq[1]; \
-	dst[44] = (elf_greg_t) pt->sar;   dst[45] = (elf_greg_t) pt->iir; \
-	dst[46] = (elf_greg_t) pt->isr;   dst[47] = (elf_greg_t) pt->ior; \
-	dst[48] = (elf_greg_t) mfctl(22); dst[49] = (elf_greg_t) mfctl(0); \
-	dst[50] = (elf_greg_t) mfctl(24); dst[51] = (elf_greg_t) mfctl(25); \
-	dst[52] = (elf_greg_t) mfctl(26); dst[53] = (elf_greg_t) mfctl(27); \
-	dst[54] = (elf_greg_t) mfctl(28); dst[55] = (elf_greg_t) mfctl(29); \
-	dst[56] = (elf_greg_t) mfctl(30); dst[57] = (elf_greg_t) mfctl(31); \
-	dst[58] = (elf_greg_t) mfctl( 8); dst[59] = (elf_greg_t) mfctl( 9); \
-	dst[60] = (elf_greg_t) mfctl(12); dst[61] = (elf_greg_t) mfctl(13); \
-	dst[62] = (elf_greg_t) mfctl(10); dst[63] = (elf_greg_t) mfctl(15);
-
-
-typedef unsigned int elf_greg_t;
-
-#include <linux/spinlock.h>
-#include <asm/processor.h>
-#include <linux/module.h>
-#include <linux/elfcore.h>
-#include <linux/compat.h>		/* struct compat_timeval */
-
-#define elf_prstatus elf_prstatus32
-struct elf_prstatus32
-{
-	struct elf_siginfo pr_info;	/* Info associated with signal */
-	short	pr_cursig;		/* Current signal */
-	unsigned int pr_sigpend;	/* Set of pending signals */
-	unsigned int pr_sighold;	/* Set of held signals */
-	pid_t	pr_pid;
-	pid_t	pr_ppid;
-	pid_t	pr_pgrp;
-	pid_t	pr_sid;
-	struct compat_timeval pr_utime;		/* User time */
-	struct compat_timeval pr_stime;		/* System time */
-	struct compat_timeval pr_cutime;	/* Cumulative user time */
-	struct compat_timeval pr_cstime;	/* Cumulative system time */
-	elf_gregset_t pr_reg;	/* GP registers */
-	int pr_fpvalid;		/* True if math co-processor being used.  */
-};
-
-#define elf_prpsinfo elf_prpsinfo32
-struct elf_prpsinfo32
-{
-	char	pr_state;	/* numeric process state */
-	char	pr_sname;	/* char for pr_state */
-	char	pr_zomb;	/* zombie */
-	char	pr_nice;	/* nice val */
-	unsigned int pr_flag;	/* flags */
-	u16	pr_uid;
-	u16	pr_gid;
-	pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
-	/* Lots missing */
-	char	pr_fname[16];	/* filename of executable */
-	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
-};
-
-#define init_elf_binfmt init_elf32_binfmt
-
-#define ELF_PLATFORM  ("PARISC32\0")
-
-/*
- * We should probably use this macro to set a flag somewhere to indicate
- * this is a 32 on 64 process. We could use PER_LINUX_32BIT, or we
- * could set a processor dependent flag in the thread_struct.
- */
-
-#undef SET_PERSONALITY
-#define SET_PERSONALITY(ex) \
-	set_thread_flag(TIF_32BIT); \
-	current->thread.map_base = DEFAULT_MAP_BASE32; \
-	current->thread.task_size = DEFAULT_TASK_SIZE32 \
-
-#undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
-
-#include "../../../fs/binfmt_elf.c"
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index a99da95fc9fd..bddd2acebdcc 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -254,7 +254,7 @@ parisc_cache_init(void)
 	}
 }
 
-void disable_sr_hashing(void)
+void __init disable_sr_hashing(void)
 {
 	int srhash_type, retval;
 	unsigned long space_bits;
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 67b0f7532e83..22e6374ece44 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -38,9 +38,10 @@
 #include <asm/cache.h>
 #include <asm/ldcw.h>
 #include <linux/linkage.h>
+#include <linux/init.h>
 
-	.text
-	.align	128
+	.section .text.hot
+	.align	16
 
 ENTRY_CFI(flush_tlb_all_local)
 	.proc
@@ -328,8 +329,6 @@ fdsync:
 	.procend
 ENDPROC_CFI(flush_data_cache_local)
 
-	.align	16
-
 /* Macros to serialize TLB purge operations on SMP.  */
 
 	.macro	tlb_lock	la,flags,tmp
@@ -1216,6 +1215,8 @@ ENTRY_CFI(flush_kernel_icache_range_asm)
 	.procend
 ENDPROC_CFI(flush_kernel_icache_range_asm)
 
+	__INIT
+
 	/* align should cover use of rfi in disable_sr_hashing_asm and
 	 * srdis_done.
 	 */
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index bbe46571ff96..b931745815e0 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -112,14 +112,6 @@ void machine_restart(char *cmd)
 
 }
 
-void machine_halt(void)
-{
-	/*
-	** The LED/ChassisCodes are updated by the led_halt()
-	** function, called by the reboot notifier chain.
-	*/
-}
-
 void (*chassis_power_off)(void);
 
 /*
@@ -158,6 +150,11 @@ void machine_power_off(void)
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
 
+void machine_halt(void)
+{
+	machine_power_off();
+}
+
 void flush_thread(void)
 {
 	/* Only needs to handle fpu stuff or perf monitors.
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 8c99ebbe2bac..43b308cfdf53 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -70,12 +70,18 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr,
  * Top of mmap area (just below the process stack).
  */
 
-static unsigned long mmap_upper_limit(void)
+/*
+ * When called from arch_get_unmapped_area(), rlim_stack will be NULL,
+ * indicating that "current" should be used instead of a passed-in
+ * value from the exec bprm as done with arch_pick_mmap_layout().
+ */
+static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
 {
 	unsigned long stack_base;
 
 	/* Limit stack size - see setup_arg_pages() in fs/exec.c */
-	stack_base = rlimit_max(RLIMIT_STACK);
+	stack_base = rlim_stack ? rlim_stack->rlim_max
+				: rlimit_max(RLIMIT_STACK);
 	if (stack_base > STACK_SIZE_MAX)
 		stack_base = STACK_SIZE_MAX;
 
@@ -127,7 +133,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	info.flags = 0;
 	info.length = len;
 	info.low_limit = mm->mmap_legacy_base;
-	info.high_limit = mmap_upper_limit();
+	info.high_limit = mmap_upper_limit(NULL);
 	info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
 	info.align_offset = shared_align_offset(last_mmap, pgoff);
 	addr = vm_unmapped_area(&info);
@@ -250,10 +256,10 @@ static unsigned long mmap_legacy_base(void)
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	mm->mmap_legacy_base = mmap_legacy_base();
-	mm->mmap_base = mmap_upper_limit();
+	mm->mmap_base = mmap_upper_limit(rlim_stack);
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mm->mmap_legacy_base;
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index f7e684560186..c3830400ca28 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -174,7 +174,7 @@ static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
 
 	/* we treat tod_sec as unsigned, so this can work until year 2106 */
 	rtc_time64_to_tm(tod_data.tod_sec, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index c919e6c0a687..68e671a11987 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -627,9 +627,10 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
 		   on condition  */
 		if(user_mode(regs)){
 			si.si_signo = SIGFPE;
-			/* Set to zero, and let the userspace app figure it out from
-			   the insn pointed to by si_addr */
-			si.si_code = FPE_FIXME;
+			/* Let userspace app figure it out from the insn pointed
+			 * to by si_addr.
+			 */
+			si.si_code = FPE_CONDTRAP;
 			si.si_addr = (void __user *) regs->iaoq[0];
 			force_sig_info(SIGFPE, &si, current);
 			return;
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 73ce5dd07642..c32a181a7cbb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -552,6 +552,9 @@ config KEXEC_FILE
 	  for kernel and initramfs as opposed to a list of segments as is the
 	  case for the older kexec call.
 
+config ARCH_HAS_KEXEC_PURGATORY
+	def_bool KEXEC_FILE
+
 config RELOCATABLE
 	bool "Build a relocatable kernel"
 	depends on PPC64 || (FLATMEM && (44x || FSL_BOOKE))
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index ccd2556bdb53..95813df90801 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -141,11 +141,18 @@ AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 endif
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)
+
 CFLAGS-$(CONFIG_PPC32)	:= -ffixed-r2 $(MULTIPLEWORD)
+CFLAGS-$(CONFIG_PPC32)	+= $(call cc-option,-mno-readonly-in-sdata)
 
 ifeq ($(CONFIG_PPC_BOOK3S_64),y)
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4)
-CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8)
+else
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5))
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4)
+endif
 else
 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
 endif
@@ -166,11 +173,11 @@ ifdef CONFIG_MPROFILE_KERNEL
 endif
 
 CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
-CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
 CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
 CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
 CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
 CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8)
+CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9)
 
 # Altivec option not allowed with e500mc64 in GCC.
 ifeq ($(CONFIG_ALTIVEC),y)
@@ -243,6 +250,7 @@ endif
 cpu-as-$(CONFIG_4xx)		+= -Wa,-m405
 cpu-as-$(CONFIG_ALTIVEC)	+= $(call as-option,-Wa$(comma)-maltivec)
 cpu-as-$(CONFIG_E200)		+= -Wa,-me200
+cpu-as-$(CONFIG_PPC_BOOK3S_64)	+= -Wa,-mpower4
 
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
index 86266159521e..deb52e41ab84 100644
--- a/arch/powerpc/boot/dts/acadia.dts
+++ b/arch/powerpc/boot/dts/acadia.dts
@@ -219,6 +219,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 	};
 };
diff --git a/arch/powerpc/boot/dts/adder875-redboot.dts b/arch/powerpc/boot/dts/adder875-redboot.dts
index 083984720b2f..7f5ff4168482 100644
--- a/arch/powerpc/boot/dts/adder875-redboot.dts
+++ b/arch/powerpc/boot/dts/adder875-redboot.dts
@@ -178,6 +178,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = &console;
+		stdout-path = &console;
 	};
 };
diff --git a/arch/powerpc/boot/dts/adder875-uboot.dts b/arch/powerpc/boot/dts/adder875-uboot.dts
index e4554caf8f8d..bd9f33c57737 100644
--- a/arch/powerpc/boot/dts/adder875-uboot.dts
+++ b/arch/powerpc/boot/dts/adder875-uboot.dts
@@ -177,6 +177,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = &console;
+		stdout-path = &console;
 	};
 };
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index 746779202a12..8a7a10139bc9 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -410,6 +410,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = &UART0;
+		stdout-path = &UART0;
 	};
 };
diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts
index 49ac36b16dd7..712430155b99 100644
--- a/arch/powerpc/boot/dts/amigaone.dts
+++ b/arch/powerpc/boot/dts/amigaone.dts
@@ -168,6 +168,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/pci@80000000/isa@7/serial@3f8";
+		stdout-path = "/pci@80000000/isa@7/serial@3f8";
 	};
 };
diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts
index 9198745f45fb..e987b5af9326 100644
--- a/arch/powerpc/boot/dts/asp834x-redboot.dts
+++ b/arch/powerpc/boot/dts/asp834x-redboot.dts
@@ -304,7 +304,7 @@
 
 	chosen {
 		bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2";
-		linux,stdout-path = &serial0;
+		stdout-path = &serial0;
 	};
 
 };
diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts
index aa68911f6560..538e42b1120d 100644
--- a/arch/powerpc/boot/dts/bamboo.dts
+++ b/arch/powerpc/boot/dts/bamboo.dts
@@ -295,6 +295,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 	};
 };
diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts
index 27f169e3ade9..c5beb72d18b7 100644
--- a/arch/powerpc/boot/dts/c2k.dts
+++ b/arch/powerpc/boot/dts/c2k.dts
@@ -361,6 +361,6 @@
 		};
 	};
 	chosen {
-		linux,stdout-path = &MPSC0;
+		stdout-path = &MPSC0;
 	};
 };
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index f2ad5815f08d..a04a4fcfde63 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -237,6 +237,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = &UART0;
+		stdout-path = &UART0;
 	};
 };
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
index c280e75c86bf..c3922fc03e0b 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -78,7 +78,7 @@
 			};
 
 			rtc@56 {
-				compatible = "mc,rv3029c2";
+				compatible = "microcrystal,rv3029";
 				reg = <0x56>;
 			};
 
diff --git a/arch/powerpc/boot/dts/ebony.dts b/arch/powerpc/boot/dts/ebony.dts
index ec2d142291b4..5d11e6ea7405 100644
--- a/arch/powerpc/boot/dts/ebony.dts
+++ b/arch/powerpc/boot/dts/ebony.dts
@@ -332,6 +332,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@40000200";
+		stdout-path = "/plb/opb/serial@40000200";
 	};
 };
diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts
index 48bcf7187924..7a1231d9d6f0 100644
--- a/arch/powerpc/boot/dts/eiger.dts
+++ b/arch/powerpc/boot/dts/eiger.dts
@@ -421,7 +421,7 @@
 
 	};
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600200";
+		stdout-path = "/plb/opb/serial@ef600200";
 	};
 
 };
diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts
index 53ef06cc2134..4ac9c5ab6e6b 100644
--- a/arch/powerpc/boot/dts/ep405.dts
+++ b/arch/powerpc/boot/dts/ep405.dts
@@ -225,6 +225,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 	};
 };
diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts
index e2d306ad37a6..721cb53758ae 100644
--- a/arch/powerpc/boot/dts/fsl/mvme7100.dts
+++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts
@@ -146,7 +146,7 @@
 	};
 
 	chosen {
-		linux,stdout-path = &serial0;
+		stdout-path = &serial0;
 	};
 };
 
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
index 6560283c5aec..9311b86b1bd9 100644
--- a/arch/powerpc/boot/dts/fsp2.dts
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -607,7 +607,7 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@b0020000";
+		stdout-path = "/plb/opb/serial@b0020000";
 		bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
 	};
 };
diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts
index 43e6f0c8e449..02bd304c7d38 100644
--- a/arch/powerpc/boot/dts/holly.dts
+++ b/arch/powerpc/boot/dts/holly.dts
@@ -191,6 +191,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/tsi109@c0000000/serial@7808";
+		stdout-path = "/tsi109@c0000000/serial@7808";
 	};
 };
diff --git a/arch/powerpc/boot/dts/hotfoot.dts b/arch/powerpc/boot/dts/hotfoot.dts
index 71d3bb4931dc..b93bf2d9dd5b 100644
--- a/arch/powerpc/boot/dts/hotfoot.dts
+++ b/arch/powerpc/boot/dts/hotfoot.dts
@@ -291,6 +291,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = &UART0;
+		stdout-path = &UART0;
 	};
 };
diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts
index 9c94fd737f7c..2e6e3a7b2604 100644
--- a/arch/powerpc/boot/dts/icon.dts
+++ b/arch/powerpc/boot/dts/icon.dts
@@ -442,6 +442,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@f0000200";
+		stdout-path = "/plb/opb/serial@f0000200";
 	};
 };
diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts
index 23e9d9b7e400..f7063198b2dc 100644
--- a/arch/powerpc/boot/dts/iss4xx-mpic.dts
+++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts
@@ -150,6 +150,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@40000200";
+		stdout-path = "/plb/opb/serial@40000200";
 	};
 };
diff --git a/arch/powerpc/boot/dts/iss4xx.dts b/arch/powerpc/boot/dts/iss4xx.dts
index 4ff6555c866d..5533aff25e41 100644
--- a/arch/powerpc/boot/dts/iss4xx.dts
+++ b/arch/powerpc/boot/dts/iss4xx.dts
@@ -111,6 +111,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@40000200";
+		stdout-path = "/plb/opb/serial@40000200";
 	};
 };
diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts
index f913dbe25d35..02629e119b87 100644
--- a/arch/powerpc/boot/dts/katmai.dts
+++ b/arch/powerpc/boot/dts/katmai.dts
@@ -505,6 +505,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@f0000200";
+		stdout-path = "/plb/opb/serial@f0000200";
 	};
 };
diff --git a/arch/powerpc/boot/dts/klondike.dts b/arch/powerpc/boot/dts/klondike.dts
index 8c9429033618..d9613b7b945f 100644
--- a/arch/powerpc/boot/dts/klondike.dts
+++ b/arch/powerpc/boot/dts/klondike.dts
@@ -222,6 +222,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@50001000";
+		stdout-path = "/plb/opb/serial@50001000";
 	};
 };
diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts
index 5d68236e7c3c..fe6c17c8812a 100644
--- a/arch/powerpc/boot/dts/ksi8560.dts
+++ b/arch/powerpc/boot/dts/ksi8560.dts
@@ -339,6 +339,6 @@
 
 
 	chosen {
-		linux,stdout-path = "/soc/cpm/serial@91a00";
+		stdout-path = "/soc/cpm/serial@91a00";
 	};
 };
diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts
index b5413cb85f13..843f156a49c4 100644
--- a/arch/powerpc/boot/dts/media5200.dts
+++ b/arch/powerpc/boot/dts/media5200.dts
@@ -25,7 +25,7 @@
 	};
 
 	chosen {
-		linux,stdout-path = &console;
+		stdout-path = &console;
 	};
 
 	cpus {
diff --git a/arch/powerpc/boot/dts/mpc8272ads.dts b/arch/powerpc/boot/dts/mpc8272ads.dts
index 6d2cddf64cfd..98282c18d989 100644
--- a/arch/powerpc/boot/dts/mpc8272ads.dts
+++ b/arch/powerpc/boot/dts/mpc8272ads.dts
@@ -262,6 +262,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/soc/cpm/serial@11a00";
+		stdout-path = "/soc/cpm/serial@11a00";
 	};
 };
diff --git a/arch/powerpc/boot/dts/mpc866ads.dts b/arch/powerpc/boot/dts/mpc866ads.dts
index 34c1f48b1a09..4443fac3f576 100644
--- a/arch/powerpc/boot/dts/mpc866ads.dts
+++ b/arch/powerpc/boot/dts/mpc866ads.dts
@@ -185,6 +185,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/soc/cpm/serial@a80";
+		stdout-path = "/soc/cpm/serial@a80";
 	};
 };
diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts
index 4e93bd961e0f..5b037f51741d 100644
--- a/arch/powerpc/boot/dts/mpc885ads.dts
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -227,6 +227,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/soc/cpm/serial@a80";
+		stdout-path = "/soc/cpm/serial@a80";
 	};
 };
diff --git a/arch/powerpc/boot/dts/mvme5100.dts b/arch/powerpc/boot/dts/mvme5100.dts
index 1ecb341a232a..a7eb6d25903d 100644
--- a/arch/powerpc/boot/dts/mvme5100.dts
+++ b/arch/powerpc/boot/dts/mvme5100.dts
@@ -179,7 +179,7 @@
 	};
 
 	chosen {
-		linux,stdout-path = &serial0;
+		stdout-path = &serial0;
         };
 
 };
diff --git a/arch/powerpc/boot/dts/obs600.dts b/arch/powerpc/boot/dts/obs600.dts
index 18e7d79ee4c3..d10b0411809b 100644
--- a/arch/powerpc/boot/dts/obs600.dts
+++ b/arch/powerpc/boot/dts/obs600.dts
@@ -309,6 +309,6 @@
 		};
 	};
         chosen {
-                linux,stdout-path = "/plb/opb/serial@ef600200";
+                stdout-path = "/plb/opb/serial@ef600200";
         };
 };
diff --git a/arch/powerpc/boot/dts/pq2fads.dts b/arch/powerpc/boot/dts/pq2fads.dts
index 0c525ff0c257..a477615e3468 100644
--- a/arch/powerpc/boot/dts/pq2fads.dts
+++ b/arch/powerpc/boot/dts/pq2fads.dts
@@ -242,6 +242,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/soc/cpm/serial@11a00";
+		stdout-path = "/soc/cpm/serial@11a00";
 	};
 };
diff --git a/arch/powerpc/boot/dts/rainier.dts b/arch/powerpc/boot/dts/rainier.dts
index 9684c80e4093..e59829cff556 100644
--- a/arch/powerpc/boot/dts/rainier.dts
+++ b/arch/powerpc/boot/dts/rainier.dts
@@ -344,7 +344,7 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 		bootargs = "console=ttyS0,115200";
 	};
 };
diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts
index d86a3a498118..f3e046fb49e2 100644
--- a/arch/powerpc/boot/dts/redwood.dts
+++ b/arch/powerpc/boot/dts/redwood.dts
@@ -381,7 +381,7 @@
 
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600200";
+		stdout-path = "/plb/opb/serial@ef600200";
 	};
 
 };
diff --git a/arch/powerpc/boot/dts/sam440ep.dts b/arch/powerpc/boot/dts/sam440ep.dts
index 088361cf4636..7d15f18e1180 100644
--- a/arch/powerpc/boot/dts/sam440ep.dts
+++ b/arch/powerpc/boot/dts/sam440ep.dts
@@ -288,6 +288,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 	};
 };
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index e41b88a5eaee..60d211da9593 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -406,7 +406,7 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 		bootargs = "console=ttyS0,115200";
 	};
 };
diff --git a/arch/powerpc/boot/dts/storcenter.dts b/arch/powerpc/boot/dts/storcenter.dts
index 2a555738517e..99f6f544dc5f 100644
--- a/arch/powerpc/boot/dts/storcenter.dts
+++ b/arch/powerpc/boot/dts/storcenter.dts
@@ -137,6 +137,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = &serial0;
+		stdout-path = &serial0;
 	};
 };
diff --git a/arch/powerpc/boot/dts/taishan.dts b/arch/powerpc/boot/dts/taishan.dts
index 1657ad0bf8a6..803f1bff7fa8 100644
--- a/arch/powerpc/boot/dts/taishan.dts
+++ b/arch/powerpc/boot/dts/taishan.dts
@@ -422,6 +422,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@40000300";
+		stdout-path = "/plb/opb/serial@40000300";
 	};
 };
diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts
index 391a4e299783..66f1c6312de6 100644
--- a/arch/powerpc/boot/dts/virtex440-ml507.dts
+++ b/arch/powerpc/boot/dts/virtex440-ml507.dts
@@ -32,7 +32,7 @@
 	} ;
 	chosen {
 		bootargs = "console=ttyS0 root=/dev/ram";
-		linux,stdout-path = &RS232_Uart_1;
+		stdout-path = &RS232_Uart_1;
 	} ;
 	cpus {
 		#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts
index 81201d3907e2..3b736ca26ddc 100644
--- a/arch/powerpc/boot/dts/virtex440-ml510.dts
+++ b/arch/powerpc/boot/dts/virtex440-ml510.dts
@@ -26,7 +26,7 @@
 	} ;
 	chosen {
 		bootargs = "console=ttyS0 root=/dev/ram";
-		linux,stdout-path = "/plb@0/serial@83e00000";
+		stdout-path = "/plb@0/serial@83e00000";
 	} ;
 	cpus {
 		#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts
index 4a9f726ada13..0872862c9363 100644
--- a/arch/powerpc/boot/dts/walnut.dts
+++ b/arch/powerpc/boot/dts/walnut.dts
@@ -241,6 +241,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 	};
 };
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index ea9053ef4819..b4f32740870e 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -304,6 +304,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 	};
 };
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index 17a5babb098d..104b1d6d5695 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -13,6 +13,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 
 /*
  * This is commented-out for now.
@@ -176,6 +177,15 @@
 			compatible = "nintendo,hollywood-gpio";
 			reg = <0x0d8000c0 0x40>;
 			gpio-controller;
+			ngpios = <24>;
+
+			gpio-line-names =
+				"POWER", "SHUTDOWN", "FAN", "DC_DC",
+				"DI_SPIN", "SLOT_LED", "EJECT_BTN", "SLOT_IN",
+				"SENSOR_BAR", "DO_EJECT", "EEP_CS", "EEP_CLK",
+				"EEP_MOSI", "EEP_MISO", "AVE_SCL", "AVE_SDA",
+				"DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3",
+				"DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7";
 
 			/*
 			 * This is commented out while a standard binding
@@ -214,5 +224,16 @@
 			interrupts = <2>;
 		};
 	};
+
+	gpio-leds {
+		compatible = "gpio-leds";
+
+		/* This is the blue LED in the disk drive slot */
+		drive-slot {
+			label = "wii:blue:drive_slot";
+			gpios = <&GPIO 5 GPIO_ACTIVE_HIGH>;
+			panic-indicator;
+		};
+	};
 };
 
diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
index 646acfbef0dd..d5e14421c39a 100644
--- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts
+++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
@@ -503,6 +503,6 @@
 
 	/* Needed for dtbImage boot wrapper compatibility */
 	chosen {
-		linux,stdout-path = &serial0;
+		stdout-path = &serial0;
 	};
 };
diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts
index 30bb4753577a..56508785ce13 100644
--- a/arch/powerpc/boot/dts/yosemite.dts
+++ b/arch/powerpc/boot/dts/yosemite.dts
@@ -327,6 +327,6 @@
 	};
 
 	chosen {
-		linux,stdout-path = "/plb/opb/serial@ef600300";
+		stdout-path = "/plb/opb/serial@ef600300";
 	};
 };
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index f52c31b1f48f..2a0c8b1bf147 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -7,8 +7,6 @@
 
 #include "of.h"
 
-typedef u32 uint32_t;
-typedef u64 uint64_t;
 typedef unsigned long uintptr_t;
 
 typedef __be16 fdt16_t;
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 7330150bfe34..d9713ad62e3c 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -62,6 +62,7 @@ void RunModeException(struct pt_regs *regs);
 void single_step_exception(struct pt_regs *regs);
 void program_check_exception(struct pt_regs *regs);
 void alignment_exception(struct pt_regs *regs);
+void slb_miss_bad_addr(struct pt_regs *regs);
 void StackOverflow(struct pt_regs *regs);
 void nonrecoverable_exception(struct pt_regs *regs);
 void kernel_fp_unavailable_exception(struct pt_regs *regs);
@@ -88,7 +89,18 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
 long sys_swapcontext(struct ucontext __user *old_ctx,
 		    struct ucontext __user *new_ctx,
 		    int ctx_size, int r6, int r7, int r8, struct pt_regs *regs);
+int sys_debug_setcontext(struct ucontext __user *ctx,
+			 int ndbg, struct sig_dbg_op __user *dbg,
+			 int r6, int r7, int r8,
+			 struct pt_regs *regs);
+int
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp);
+unsigned long __init early_init(unsigned long dt_ptr);
+void __init machine_init(u64 dt_ptr);
 #endif
+
+long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
+		      u32 len_high, u32 len_low);
 long sys_switch_endian(void);
 notrace unsigned int __check_irq_replay(void);
 void notrace restore_interrupts(void);
@@ -126,4 +138,7 @@ extern int __ucmpdi2(u64, u64);
 void _mcount(void);
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
 
+void pnv_power9_force_smt4_catch(void);
+void pnv_power9_force_smt4_release(void);
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 10daa1d56e0a..c7c63959ba91 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -35,7 +35,8 @@
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
-#ifdef __SUBARCH_HAS_LWSYNC
+/* The sub-arch has lwsync */
+#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
 #    define SMPWMB      LWSYNC
 #else
 #    define SMPWMB      eieio
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 67c5475311ee..4b5423030d4b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -11,6 +11,12 @@
 #define H_PUD_INDEX_SIZE  9
 #define H_PGD_INDEX_SIZE  9
 
+/*
+ * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
+ * Hence also limit max EA bits to 64TB.
+ */
+#define MAX_EA_BITS_PER_CONTEXT		46
+
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE	(sizeof(pte_t) << H_PTE_INDEX_SIZE)
 #define H_PMD_TABLE_SIZE	(sizeof(pmd_t) << H_PMD_INDEX_SIZE)
@@ -34,6 +40,14 @@
 #define H_PAGE_COMBO	0x0
 #define H_PTE_FRAG_NR	0
 #define H_PTE_FRAG_SIZE_SHIFT  0
+
+/* memory key bits, only 8 keys supported */
+#define H_PTE_PKEY_BIT0	0
+#define H_PTE_PKEY_BIT1	0
+#define H_PTE_PKEY_BIT2	_RPAGE_RSV3
+#define H_PTE_PKEY_BIT3	_RPAGE_RSV4
+#define H_PTE_PKEY_BIT4	_RPAGE_RSV5
+
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
  */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 3bcf269f8f55..cc82745355b3 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -4,10 +4,16 @@
 
 #define H_PTE_INDEX_SIZE  8
 #define H_PMD_INDEX_SIZE  10
-#define H_PUD_INDEX_SIZE  7
+#define H_PUD_INDEX_SIZE  10
 #define H_PGD_INDEX_SIZE  8
 
 /*
+ * Each context is 512TB size. SLB miss for first context/default context
+ * is handled in the hotpath.
+ */
+#define MAX_EA_BITS_PER_CONTEXT		49
+
+/*
  * 64k aligned address free up few of the lower bits of RPN for us
  * We steal that here. For more deatils look at pte_pfn/pfn_pte()
  */
@@ -16,6 +22,13 @@
 #define H_PAGE_BUSY	_RPAGE_RPN44     /* software: PTE & hash are busy */
 #define H_PAGE_HASHPTE	_RPAGE_RPN43	/* PTE has associated HPTE */
 
+/* memory key bits. */
+#define H_PTE_PKEY_BIT0	_RPAGE_RSV1
+#define H_PTE_PKEY_BIT1	_RPAGE_RSV2
+#define H_PTE_PKEY_BIT2	_RPAGE_RSV3
+#define H_PTE_PKEY_BIT3	_RPAGE_RSV4
+#define H_PTE_PKEY_BIT4	_RPAGE_RSV5
+
 /*
  * We need to differentiate between explicit huge page and THP huge
  * page, since THP huge page also need to track real subpage details
@@ -25,15 +38,13 @@
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
 /*
- * we support 16 fragments per PTE page of 64K size.
- */
-#define H_PTE_FRAG_NR	16
-/*
  * We use a 2K PTE page fragment and another 2K for storing
  * real_pte_t hash index
+ * 8 bytes per each pte entry and another 8 bytes for storing
+ * slot details.
  */
-#define H_PTE_FRAG_SIZE_SHIFT  12
-#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+#define H_PTE_FRAG_SIZE_SHIFT  (H_PTE_INDEX_SIZE + 3 + 1)
+#define H_PTE_FRAG_NR	(PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
 
 #ifndef __ASSEMBLY__
 #include <asm/errno.h>
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 935adcd92a81..cc8cd656ccfe 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -212,7 +212,7 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
 extern void hash__vmemmap_remove_mapping(unsigned long start,
 				     unsigned long page_size);
 
-int hash__create_section_mapping(unsigned long start, unsigned long end);
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid);
 int hash__remove_section_mapping(unsigned long start, unsigned long end);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 37671feb2bf6..5094696eecd6 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -80,8 +80,29 @@ struct spinlock;
 /* Maximum possible number of NPUs in a system. */
 #define NV_MAX_NPUS 8
 
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+	u64 low_slices;
+	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
 typedef struct {
-	mm_context_id_t id;
+	union {
+		/*
+		 * We use id as the PIDR content for radix. On hash we can use
+		 * more than one id. The extended ids are used when we start
+		 * having address above 512TB. We allocate one extended id
+		 * for each 512TB. The new id is then used with the 49 bit
+		 * EA to build a new VA. We always use ESID_BITS_1T_MASK bits
+		 * from EA and new context ids to build the new VAs.
+		 */
+		mm_context_id_t id;
+		mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+	};
 	u16 user_psize;		/* page size index */
 
 	/* Number of bits in the mm_cpumask */
@@ -94,9 +115,18 @@ typedef struct {
 	struct npu_context *npu_context;
 
 #ifdef CONFIG_PPC_MM_SLICES
-	u64 low_slices_psize;	/* SLB page size encodings */
+	 /* SLB page size encodings*/
+	unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
 	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
 	unsigned long slb_addr_limit;
+# ifdef CONFIG_PPC_64K_PAGES
+	struct slice_mask mask_64k;
+# endif
+	struct slice_mask mask_4k;
+# ifdef CONFIG_HUGETLB_PAGE
+	struct slice_mask mask_16m;
+	struct slice_mask mask_16g;
+# endif
 #else
 	u16 sllp;		/* SLB page size encoding */
 #endif
@@ -177,5 +207,25 @@ extern void radix_init_pseries(void);
 static inline void radix_init_pseries(void) { };
 #endif
 
+static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
+{
+	int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+	if (likely(index < ARRAY_SIZE(ctx->extended_id)))
+		return ctx->extended_id[index];
+
+	/* should never happen */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline unsigned long get_user_vsid(mm_context_t *ctx,
+					  unsigned long ea, int ssize)
+{
+	unsigned long context = get_ea_context(ctx, ea);
+
+	return get_vsid(context, ea, ssize);
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 4746bc68d446..558a159600ad 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -80,8 +80,18 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
 			       pgtable_gfp_flags(mm, GFP_KERNEL));
+	/*
+	 * With hugetlb, we don't clear the second half of the page table.
+	 * If we share the same slab cache with the pmd or pud level table,
+	 * we need to make sure we zero out the full table on alloc.
+	 * With 4K we don't store slot in the second half. Hence we don't
+	 * need to do this for 4k.
+	 */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \
+	((H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX) ||		     \
+	 (H_PGD_INDEX_SIZE == H_PMD_CACHE_INDEX))
 	memset(pgd, 0, PGD_TABLE_SIZE);
-
+#endif
 	return pgd;
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a6b9f1d74600..47b5ffc8715d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -60,25 +60,6 @@
 /* Max physical address bit as per radix table */
 #define _RPAGE_PA_MAX		57
 
-#ifdef CONFIG_PPC_MEM_KEYS
-#ifdef CONFIG_PPC_64K_PAGES
-#define H_PTE_PKEY_BIT0	_RPAGE_RSV1
-#define H_PTE_PKEY_BIT1	_RPAGE_RSV2
-#else /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT0	0 /* _RPAGE_RSV1 is not available */
-#define H_PTE_PKEY_BIT1	0 /* _RPAGE_RSV2 is not available */
-#endif /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT2	_RPAGE_RSV3
-#define H_PTE_PKEY_BIT3	_RPAGE_RSV4
-#define H_PTE_PKEY_BIT4	_RPAGE_RSV5
-#else /*  CONFIG_PPC_MEM_KEYS */
-#define H_PTE_PKEY_BIT0	0
-#define H_PTE_PKEY_BIT1	0
-#define H_PTE_PKEY_BIT2	0
-#define H_PTE_PKEY_BIT3	0
-#define H_PTE_PKEY_BIT4	0
-#endif /*  CONFIG_PPC_MEM_KEYS */
-
 /*
  * Max physical address bit we will use for now.
  *
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index a61aa9cd63ec..ca366ec86310 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -9,5 +9,10 @@
 #define RADIX_PMD_INDEX_SIZE  9  /* 1G huge page */
 #define RADIX_PUD_INDEX_SIZE	 9
 #define RADIX_PGD_INDEX_SIZE  13
+/*
+ * One fragment per per page
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR	(PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
 
 #endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index c7e71ba29555..830082496876 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -10,4 +10,10 @@
 #define RADIX_PUD_INDEX_SIZE	 9
 #define RADIX_PGD_INDEX_SIZE  13
 
+/*
+ * We use a 256 byte PTE page fragment in radix
+ * 8 bytes per each PTE entry.
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR	(PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
 #endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 365010f66570..705193e7192f 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -313,7 +313,7 @@ static inline unsigned long radix__get_tree_size(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int radix__create_section_mapping(unsigned long start, unsigned long end);
+int radix__create_section_mapping(unsigned long start, unsigned long end, int nid);
 int radix__remove_section_mapping(unsigned long start, unsigned long end);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
new file mode 100644
index 000000000000..db0dedab65ee
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
+#define _ASM_POWERPC_BOOK3S_64_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT		28
+#define SLICE_LOW_TOP		(0x100000000ul)
+#define SLICE_NUM_LOW		(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT	40
+#define SLICE_NUM_HIGH		(H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
+
+#else /* CONFIG_PPC_MM_SLICES */
+
+#define get_slice_psize(mm, addr)	((mm)->context.user_psize)
+#define slice_set_user_psize(mm, psize)		\
+do {						\
+	(mm)->context.user_psize = (psize);	\
+	(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
+} while (0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index b77f0364df94..11843e37d9cf 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -99,7 +99,6 @@ static inline void invalidate_dcache_range(unsigned long start,
 #ifdef CONFIG_PPC64
 extern void flush_dcache_range(unsigned long start, unsigned long stop);
 extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
 #endif
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 2e2bacbdf6ed..66fcab13c8b4 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -131,41 +131,48 @@ static inline void cpu_feature_keys_init(void) { }
 
 /* CPU kernel features */
 
-/* Retain the 32b definitions all use bottom half of word */
+/* Definitions for features that we have on both 32-bit and 64-bit chips */
 #define CPU_FTR_COHERENT_ICACHE		ASM_CONST(0x00000001)
-#define CPU_FTR_L2CR			ASM_CONST(0x00000002)
-#define CPU_FTR_SPEC7450		ASM_CONST(0x00000004)
-#define CPU_FTR_ALTIVEC			ASM_CONST(0x00000008)
-#define CPU_FTR_TAU			ASM_CONST(0x00000010)
-#define CPU_FTR_CAN_DOZE		ASM_CONST(0x00000020)
-#define CPU_FTR_USE_TB			ASM_CONST(0x00000040)
-#define CPU_FTR_L2CSR			ASM_CONST(0x00000080)
-#define CPU_FTR_601			ASM_CONST(0x00000100)
-#define CPU_FTR_DBELL			ASM_CONST(0x00000200)
-#define CPU_FTR_CAN_NAP			ASM_CONST(0x00000400)
-#define CPU_FTR_L3CR			ASM_CONST(0x00000800)
-#define CPU_FTR_L3_DISABLE_NAP		ASM_CONST(0x00001000)
-#define CPU_FTR_NAP_DISABLE_L2_PR	ASM_CONST(0x00002000)
-#define CPU_FTR_DUAL_PLL_750FX		ASM_CONST(0x00004000)
-#define CPU_FTR_NO_DPM			ASM_CONST(0x00008000)
-#define CPU_FTR_476_DD2			ASM_CONST(0x00010000)
-#define CPU_FTR_NEED_COHERENT		ASM_CONST(0x00020000)
-#define CPU_FTR_NO_BTIC			ASM_CONST(0x00040000)
-#define CPU_FTR_DEBUG_LVL_EXC		ASM_CONST(0x00080000)
-#define CPU_FTR_NODSISRALIGN		ASM_CONST(0x00100000)
-#define CPU_FTR_PPC_LE			ASM_CONST(0x00200000)
-#define CPU_FTR_REAL_LE			ASM_CONST(0x00400000)
-#define CPU_FTR_FPU_UNAVAILABLE		ASM_CONST(0x00800000)
-#define CPU_FTR_UNIFIED_ID_CACHE	ASM_CONST(0x01000000)
-#define CPU_FTR_SPE			ASM_CONST(0x02000000)
-#define CPU_FTR_NEED_PAIRED_STWCX	ASM_CONST(0x04000000)
-#define CPU_FTR_LWSYNC			ASM_CONST(0x08000000)
-#define CPU_FTR_NOEXECUTE		ASM_CONST(0x10000000)
-#define CPU_FTR_INDEXED_DCR		ASM_CONST(0x20000000)
-#define CPU_FTR_EMB_HV			ASM_CONST(0x40000000)
+#define CPU_FTR_ALTIVEC			ASM_CONST(0x00000002)
+#define CPU_FTR_DBELL			ASM_CONST(0x00000004)
+#define CPU_FTR_CAN_NAP			ASM_CONST(0x00000008)
+#define CPU_FTR_DEBUG_LVL_EXC		ASM_CONST(0x00000010)
+#define CPU_FTR_NODSISRALIGN		ASM_CONST(0x00000020)
+#define CPU_FTR_FPU_UNAVAILABLE		ASM_CONST(0x00000040)
+#define CPU_FTR_LWSYNC			ASM_CONST(0x00000080)
+#define CPU_FTR_NOEXECUTE		ASM_CONST(0x00000100)
+#define CPU_FTR_EMB_HV			ASM_CONST(0x00000200)
+
+/* Definitions for features that only exist on 32-bit chips */
+#ifdef CONFIG_PPC32
+#define CPU_FTR_601			ASM_CONST(0x00001000)
+#define CPU_FTR_L2CR			ASM_CONST(0x00002000)
+#define CPU_FTR_SPEC7450		ASM_CONST(0x00004000)
+#define CPU_FTR_TAU			ASM_CONST(0x00008000)
+#define CPU_FTR_CAN_DOZE		ASM_CONST(0x00010000)
+#define CPU_FTR_USE_RTC			ASM_CONST(0x00020000)
+#define CPU_FTR_L3CR			ASM_CONST(0x00040000)
+#define CPU_FTR_L3_DISABLE_NAP		ASM_CONST(0x00080000)
+#define CPU_FTR_NAP_DISABLE_L2_PR	ASM_CONST(0x00100000)
+#define CPU_FTR_DUAL_PLL_750FX		ASM_CONST(0x00200000)
+#define CPU_FTR_NO_DPM			ASM_CONST(0x00400000)
+#define CPU_FTR_476_DD2			ASM_CONST(0x00800000)
+#define CPU_FTR_NEED_COHERENT		ASM_CONST(0x01000000)
+#define CPU_FTR_NO_BTIC			ASM_CONST(0x02000000)
+#define CPU_FTR_PPC_LE			ASM_CONST(0x04000000)
+#define CPU_FTR_UNIFIED_ID_CACHE	ASM_CONST(0x08000000)
+#define CPU_FTR_SPE			ASM_CONST(0x10000000)
+#define CPU_FTR_NEED_PAIRED_STWCX	ASM_CONST(0x20000000)
+#define CPU_FTR_INDEXED_DCR		ASM_CONST(0x40000000)
+
+#else	/* CONFIG_PPC32 */
+/* Define these to 0 for the sake of tests in common code */
+#define CPU_FTR_601			(0)
+#define CPU_FTR_PPC_LE			(0)
+#endif
 
 /*
- * Add the 64-bit processor unique features in the top half of the word;
+ * Definitions for the 64-bit processor unique features;
  * on 32-bit, make the names available but defined to be 0.
  */
 #ifdef __powerpc64__
@@ -174,38 +181,40 @@ static inline void cpu_feature_keys_init(void) { }
 #define LONG_ASM_CONST(x)		0
 #endif
 
-#define CPU_FTR_HVMODE			LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_ARCH_201		LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_ARCH_206		LONG_ASM_CONST(0x0000000400000000)
-#define CPU_FTR_ARCH_207S		LONG_ASM_CONST(0x0000000800000000)
-#define CPU_FTR_ARCH_300		LONG_ASM_CONST(0x0000001000000000)
-#define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000002000000000)
-#define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000004000000000)
-#define CPU_FTR_SMT			LONG_ASM_CONST(0x0000008000000000)
-#define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_PURR			LONG_ASM_CONST(0x0000020000000000)
-#define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_SPURR			LONG_ASM_CONST(0x0000080000000000)
-#define CPU_FTR_DSCR			LONG_ASM_CONST(0x0000100000000000)
-#define CPU_FTR_VSX			LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_SAO			LONG_ASM_CONST(0x0000400000000000)
-#define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0000800000000000)
-#define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0001000000000000)
-#define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0008000000000000)
-#define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0010000000000000)
-#define CPU_FTR_PKEY			LONG_ASM_CONST(0x0020000000000000)
-#define CPU_FTR_VMX_COPY		LONG_ASM_CONST(0x0040000000000000)
-#define CPU_FTR_TM			LONG_ASM_CONST(0x0080000000000000)
-#define CPU_FTR_CFAR			LONG_ASM_CONST(0x0100000000000000)
-#define	CPU_FTR_HAS_PPR			LONG_ASM_CONST(0x0200000000000000)
-#define CPU_FTR_DAWR			LONG_ASM_CONST(0x0400000000000000)
-#define CPU_FTR_DABRX			LONG_ASM_CONST(0x0800000000000000)
-#define CPU_FTR_PMAO_BUG		LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_P9_TLBIE_BUG		LONG_ASM_CONST(0x2000000000000000)
-#define CPU_FTR_POWER9_DD1		LONG_ASM_CONST(0x4000000000000000)
-#define CPU_FTR_POWER9_DD2_1		LONG_ASM_CONST(0x8000000000000000)
+#define CPU_FTR_REAL_LE			LONG_ASM_CONST(0x0000000000001000)
+#define CPU_FTR_HVMODE			LONG_ASM_CONST(0x0000000000002000)
+#define CPU_FTR_ARCH_206		LONG_ASM_CONST(0x0000000000008000)
+#define CPU_FTR_ARCH_207S		LONG_ASM_CONST(0x0000000000010000)
+#define CPU_FTR_ARCH_300		LONG_ASM_CONST(0x0000000000020000)
+#define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000000000040000)
+#define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000000000080000)
+#define CPU_FTR_SMT			LONG_ASM_CONST(0x0000000000100000)
+#define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000000000200000)
+#define CPU_FTR_PURR			LONG_ASM_CONST(0x0000000000400000)
+#define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000000000800000)
+#define CPU_FTR_SPURR			LONG_ASM_CONST(0x0000000001000000)
+#define CPU_FTR_DSCR			LONG_ASM_CONST(0x0000000002000000)
+#define CPU_FTR_VSX			LONG_ASM_CONST(0x0000000004000000)
+#define CPU_FTR_SAO			LONG_ASM_CONST(0x0000000008000000)
+#define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0000000010000000)
+#define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0000000020000000)
+#define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0000000040000000)
+#define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0000000080000000)
+#define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0000000100000000)
+#define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0000000200000000)
+#define CPU_FTR_PKEY			LONG_ASM_CONST(0x0000000400000000)
+#define CPU_FTR_VMX_COPY		LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_TM			LONG_ASM_CONST(0x0000001000000000)
+#define CPU_FTR_CFAR			LONG_ASM_CONST(0x0000002000000000)
+#define	CPU_FTR_HAS_PPR			LONG_ASM_CONST(0x0000004000000000)
+#define CPU_FTR_DAWR			LONG_ASM_CONST(0x0000008000000000)
+#define CPU_FTR_DABRX			LONG_ASM_CONST(0x0000010000000000)
+#define CPU_FTR_PMAO_BUG		LONG_ASM_CONST(0x0000020000000000)
+#define CPU_FTR_POWER9_DD1		LONG_ASM_CONST(0x0000040000000000)
+#define CPU_FTR_POWER9_DD2_1		LONG_ASM_CONST(0x0000080000000000)
+#define CPU_FTR_P9_TM_HV_ASSIST		LONG_ASM_CONST(0x0000100000000000)
+#define CPU_FTR_P9_TM_XER_SO_BUG	LONG_ASM_CONST(0x0000200000000000)
+#define CPU_FTR_P9_TLBIE_BUG		LONG_ASM_CONST(0x0000400000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -286,21 +295,19 @@ static inline void cpu_feature_keys_init(void) { }
 #endif
 
 #define CPU_FTRS_PPC601	(CPU_FTR_COMMON | CPU_FTR_601 | \
-	CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
-#define CPU_FTRS_603	(CPU_FTR_COMMON | \
-	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+	CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC)
+#define CPU_FTRS_603	(CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
-#define CPU_FTRS_604	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | CPU_FTR_PPC_LE)
+#define CPU_FTRS_604	(CPU_FTR_COMMON | CPU_FTR_PPC_LE)
 #define CPU_FTRS_740_NOTAU	(CPU_FTR_COMMON | \
-	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
 #define CPU_FTRS_740	(CPU_FTR_COMMON | \
-	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
 	    CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
 	    CPU_FTR_PPC_LE)
 #define CPU_FTRS_750	(CPU_FTR_COMMON | \
-	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
 	    CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
 	    CPU_FTR_PPC_LE)
 #define CPU_FTRS_750CL	(CPU_FTRS_750)
@@ -309,125 +316,114 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTRS_750FX	(CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
 #define CPU_FTRS_750GX	(CPU_FTRS_750FX)
 #define CPU_FTRS_7400_NOTAU	(CPU_FTR_COMMON | \
-	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
 	    CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7400	(CPU_FTR_COMMON | \
-	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
 	    CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7450_20	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+	    CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7450_21	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7450_23	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+	    CPU_FTR_NEED_PAIRED_STWCX | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7455_1	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+	    CPU_FTR_NEED_PAIRED_STWCX | \
 	    CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
 	    CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7455_20	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+	    CPU_FTR_NEED_PAIRED_STWCX | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7455	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7447_10	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
 	    CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
 	    CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7447	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7447A	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7448	(CPU_FTR_COMMON | \
-	    CPU_FTR_USE_TB | \
 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
 	    CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
-#define CPU_FTRS_82XX	(CPU_FTR_COMMON | \
-	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
+#define CPU_FTRS_82XX	(CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE)
 #define CPU_FTRS_G2_LE	(CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
-	    CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP)
+	    CPU_FTR_MAYBE_CAN_NAP)
 #define CPU_FTRS_E300	(CPU_FTR_MAYBE_CAN_DOZE | \
-	    CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_MAYBE_CAN_NAP | \
 	    CPU_FTR_COMMON)
 #define CPU_FTRS_E300C2	(CPU_FTR_MAYBE_CAN_DOZE | \
-	    CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+	    CPU_FTR_MAYBE_CAN_NAP | \
 	    CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
-#define CPU_FTRS_CLASSIC32	(CPU_FTR_COMMON | CPU_FTR_USE_TB)
-#define CPU_FTRS_8XX	(CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_40X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_44X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_440x6	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_CLASSIC32	(CPU_FTR_COMMON)
+#define CPU_FTRS_8XX	(CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_40X	(CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_44X	(CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_440x6	(CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_INDEXED_DCR)
 #define CPU_FTRS_47X	(CPU_FTRS_440x6)
-#define CPU_FTRS_E200	(CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
+#define CPU_FTRS_E200	(CPU_FTR_SPE_COMP | \
 	    CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DEBUG_LVL_EXC)
-#define CPU_FTRS_E500	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500	(CPU_FTR_MAYBE_CAN_DOZE | \
 	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
 	    CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500_2	(CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500_2	(CPU_FTR_MAYBE_CAN_DOZE | \
 	    CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
 	    CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
-	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E500MC	(CPU_FTR_NODSISRALIGN | \
+	    CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
 /*
  * e5500/e6500 erratum A-006958 is a timebase bug that can use the
  * same workaround as CPU_FTR_CELL_TB_BUG.
  */
-#define CPU_FTRS_E5500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
-	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E5500	(CPU_FTR_NODSISRALIGN | \
+	    CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG)
-#define CPU_FTRS_E6500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
-	    CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E6500	(CPU_FTR_NODSISRALIGN | \
+	    CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
 	    CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT)
 #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
-#define CPU_FTRS_POWER4	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PPC970	(CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
-	    CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS)
-#define CPU_FTRS_PPC970	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
 	    CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \
 	    CPU_FTR_HVMODE | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER5	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER5	(CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
@@ -435,7 +431,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \
 	    CPU_FTR_DABRX)
-#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
@@ -444,7 +440,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | \
 	    CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
-#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
@@ -456,7 +452,7 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
 #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
-#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
@@ -464,33 +460,45 @@ static inline void cpu_feature_keys_init(void) { }
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
-	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
-	    CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG)
+	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+	    CPU_FTR_P9_TLBIE_BUG)
 #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
 			     (~CPU_FTR_SAO))
 #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
-#define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+			       CPU_FTR_P9_TM_HV_ASSIST | \
+			       CPU_FTR_P9_TM_XER_SO_BUG)
+#define CPU_FTRS_CELL	(CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_PAUSE_ZERO  | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
 	    CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX)
-#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
 	    CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX)
-#define CPU_FTRS_COMPATIBLE	(CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
+#define CPU_FTRS_COMPATIBLE	(CPU_FTR_PPCAS_ARCH_V2)
 
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3E
 #define CPU_FTRS_POSSIBLE	(CPU_FTRS_E6500 | CPU_FTRS_E5500)
 #else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
 #define CPU_FTRS_POSSIBLE	\
-	    (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
+	    (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
+	     CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \
+	     CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+	     CPU_FTRS_POWER9_DD2_2)
+#else
+#define CPU_FTRS_POSSIBLE	\
+	    (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
 	     CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
 	     CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
-	     CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
-	     CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1)
+	     CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \
+	     CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+	     CPU_FTRS_POWER9_DD2_2)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
 #else
 enum {
@@ -537,12 +545,38 @@ enum {
 #ifdef CONFIG_PPC_BOOK3E
 #define CPU_FTRS_ALWAYS		(CPU_FTRS_E6500 & CPU_FTRS_E5500)
 #else
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+#define CPU_FTRS_DT_CPU_BASE			\
+	(CPU_FTR_LWSYNC |			\
+	 CPU_FTR_FPU_UNAVAILABLE |		\
+	 CPU_FTR_NODSISRALIGN |			\
+	 CPU_FTR_NOEXECUTE |			\
+	 CPU_FTR_COHERENT_ICACHE |		\
+	 CPU_FTR_STCX_CHECKS_ADDRESS |		\
+	 CPU_FTR_POPCNTB | CPU_FTR_POPCNTD |	\
+	 CPU_FTR_DAWR |				\
+	 CPU_FTR_ARCH_206 |			\
+	 CPU_FTR_ARCH_207S)
+#else
+#define CPU_FTRS_DT_CPU_BASE	(~0ul)
+#endif
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define CPU_FTRS_ALWAYS \
+	    (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
+	     CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \
+	     CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
+	     CPU_FTRS_DT_CPU_BASE)
+#else
 #define CPU_FTRS_ALWAYS		\
-	    (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
+	    (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
 	     CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
 	     CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
 	     CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
-	     CPU_FTRS_POWER9)
+	     CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1 & \
+	     CPU_FTRS_DT_CPU_BASE)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
 #else
 enum {
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index fc97404de0a3..ce5da214ffe5 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -47,6 +47,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 
 void set_breakpoint(struct arch_hw_breakpoint *brk);
 void __set_breakpoint(struct arch_hw_breakpoint *brk);
+bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
 			 unsigned long error_code, int brkpt);
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index fd37cc101f4f..c2266ca61853 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -256,6 +256,12 @@ static inline void eeh_serialize_unlock(unsigned long flags)
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 }
 
+static inline bool eeh_state_active(int state)
+{
+	return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
+	== (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+}
+
 typedef void *(*eeh_traverse_func)(void *data, void *flag);
 void eeh_set_pe_aux_size(int size);
 int eeh_phb_pe_create(struct pci_controller *phb);
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 1e551a2d6f82..9884e872686f 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -34,7 +34,8 @@ struct eeh_event {
 int eeh_event_init(void);
 int eeh_send_failure_event(struct eeh_pe *pe);
 void eeh_remove_event(struct eeh_pe *pe, bool force);
-void eeh_handle_event(struct eeh_pe *pe);
+void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_special_event(void);
 
 #endif /* __KERNEL__ */
 #endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 90863245df53..d3a7e36f1402 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -466,17 +466,17 @@ static inline unsigned long epapr_hypercall(unsigned long *in,
 			    unsigned long *out,
 			    unsigned long nr)
 {
-	unsigned long register r0 asm("r0");
-	unsigned long register r3 asm("r3") = in[0];
-	unsigned long register r4 asm("r4") = in[1];
-	unsigned long register r5 asm("r5") = in[2];
-	unsigned long register r6 asm("r6") = in[3];
-	unsigned long register r7 asm("r7") = in[4];
-	unsigned long register r8 asm("r8") = in[5];
-	unsigned long register r9 asm("r9") = in[6];
-	unsigned long register r10 asm("r10") = in[7];
-	unsigned long register r11 asm("r11") = nr;
-	unsigned long register r12 asm("r12");
+	register unsigned long r0 asm("r0");
+	register unsigned long r3 asm("r3") = in[0];
+	register unsigned long r4 asm("r4") = in[1];
+	register unsigned long r5 asm("r5") = in[2];
+	register unsigned long r6 asm("r6") = in[3];
+	register unsigned long r7 asm("r7") = in[4];
+	register unsigned long r8 asm("r8") = in[5];
+	register unsigned long r9 asm("r9") = in[6];
+	register unsigned long r10 asm("r10") = in[7];
+	register unsigned long r11 asm("r11") = nr;
+	register unsigned long r12 asm("r12");
 
 	asm volatile("bl	epapr_hypercall_start"
 		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 6f6751d3eba9..78540c074d70 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -89,17 +89,17 @@ pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
 
 void flush_dcache_icache_hugepage(struct page *page);
 
-#if defined(CONFIG_PPC_MM_SLICES)
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 			   unsigned long len);
-#else
+
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,
 					 unsigned long len)
 {
+	if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled())
+		return slice_is_hugepage_only_range(mm, addr, len);
 	return 0;
 }
-#endif
 
 void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
 			    pte_t pte);
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index eca3f9c68907..2e2dddab5d65 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -88,6 +88,7 @@
 #define H_P8		-61
 #define H_P9		-62
 #define H_TOO_BIG	-64
+#define H_UNSUPPORTED	-67
 #define H_OVERLAP	-68
 #define H_INTERRUPT	-69
 #define H_BAD_DATA	-70
@@ -337,6 +338,9 @@
 #define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2
 #define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3
 #define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4
+#define H_CPU_CHAR_BRANCH_HINTS_HONORED	(1ull << 58) // IBM bit 5
+#define H_CPU_CHAR_THREAD_RECONFIG_CTRL	(1ull << 57) // IBM bit 6
+#define H_CPU_CHAR_COUNT_CACHE_DISABLED	(1ull << 56) // IBM bit 7
 
 #define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ac6432d9be46..8e7b09703ca4 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -66,6 +66,7 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
 						unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
 void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void arch_unregister_hw_breakpoint(struct perf_event *bp);
 void hw_breakpoint_pmu_read(struct perf_event *bp);
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
@@ -79,9 +80,11 @@ static inline void hw_breakpoint_disable(void)
 	brk.address = 0;
 	brk.type = 0;
 	brk.len = 0;
-	__set_breakpoint(&brk);
+	if (ppc_breakpoint_available())
+		__set_breakpoint(&brk);
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
+int hw_breakpoint_handler(struct die_args *args);
 
 #else	/* CONFIG_HAVE_HW_BREAKPOINT */
 static inline void hw_breakpoint_disable(void) { }
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 422f99cf9924..af074923d598 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -33,8 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev;
 #include <asm/mmu.h>
 #include <asm/ppc_asm.h>
 
-#include <asm-generic/iomap.h>
-
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #endif
@@ -663,6 +661,8 @@ static inline void name at					\
 #define writel_relaxed(v, addr)	writel(v, addr)
 #define writeq_relaxed(v, addr)	writeq(v, addr)
 
+#include <asm-generic/iomap.h>
+
 #ifdef CONFIG_PPC32
 #define mmiowb()
 #else
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index e8e3a0a04eb0..ee39ce56b2a2 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -66,6 +66,7 @@ extern void irq_ctx_init(void);
 extern void call_do_softirq(struct thread_info *tp);
 extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
 extern void do_IRQ(struct pt_regs *regs);
+extern void __init init_IRQ(void);
 extern void __do_irq(struct pt_regs *regs);
 
 int irq_choose_cpu(const struct cpumask *mask);
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
index c6d3078bd8c3..b8b0be8f1a07 100644
--- a/arch/powerpc/include/asm/irq_work.h
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -6,5 +6,6 @@ static inline bool arch_irq_work_has_interrupt(void)
 {
 	return true;
 }
+extern void arch_irq_work_raise(void);
 
 #endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index d8b1e8e7e035..4a585cba1787 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -95,7 +95,7 @@ static inline bool kdump_in_progress(void)
 }
 
 #ifdef CONFIG_KEXEC_FILE
-extern struct kexec_file_ops kexec_elf64_ops;
+extern const struct kexec_file_ops kexec_elf64_ops;
 
 #ifdef CONFIG_IMA_KEXEC
 #define ARCH_HAS_KIMAGE_ARCH
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 09a802bb702f..a790d5cf6ea3 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -108,6 +108,8 @@
 
 /* book3s_hv */
 
+#define BOOK3S_INTERRUPT_HV_SOFTPATCH	0x1500
+
 /*
  * Special trap used to indicate to host that this is a
  * passthrough interrupt that could not be handled
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 376ae803b69c..4c02a7378d06 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -241,6 +241,10 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
 			unsigned long mask);
 extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
 
+extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu);
+extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu);
+extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu);
+
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 998f7b7aaa9e..c424e44f4c00 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -472,6 +472,49 @@ static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
 			set_bit_le(i, map);
 }
 
+static inline u64 sanitize_msr(u64 msr)
+{
+	msr &= ~MSR_HV;
+	msr |= MSR_ME;
+	return msr;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.cr  = vcpu->arch.cr_tm;
+	vcpu->arch.xer = vcpu->arch.xer_tm;
+	vcpu->arch.lr  = vcpu->arch.lr_tm;
+	vcpu->arch.ctr = vcpu->arch.ctr_tm;
+	vcpu->arch.amr = vcpu->arch.amr_tm;
+	vcpu->arch.ppr = vcpu->arch.ppr_tm;
+	vcpu->arch.dscr = vcpu->arch.dscr_tm;
+	vcpu->arch.tar = vcpu->arch.tar_tm;
+	memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm,
+	       sizeof(vcpu->arch.gpr));
+	vcpu->arch.fp  = vcpu->arch.fp_tm;
+	vcpu->arch.vr  = vcpu->arch.vr_tm;
+	vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.cr_tm  = vcpu->arch.cr;
+	vcpu->arch.xer_tm = vcpu->arch.xer;
+	vcpu->arch.lr_tm  = vcpu->arch.lr;
+	vcpu->arch.ctr_tm = vcpu->arch.ctr;
+	vcpu->arch.amr_tm = vcpu->arch.amr;
+	vcpu->arch.ppr_tm = vcpu->arch.ppr;
+	vcpu->arch.dscr_tm = vcpu->arch.dscr;
+	vcpu->arch.tar_tm = vcpu->arch.tar;
+	memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr,
+	       sizeof(vcpu->arch.gpr));
+	vcpu->arch.fp_tm  = vcpu->arch.fp;
+	vcpu->arch.vr_tm  = vcpu->arch.vr;
+	vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index ab386af2904f..d978fdf698af 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -119,6 +119,7 @@ struct kvmppc_host_state {
 	u8 host_ipi;
 	u8 ptid;		/* thread number within subcore when split */
 	u8 tid;			/* thread number within whole core */
+	u8 fake_suspend;
 	struct kvm_vcpu *kvm_vcpu;
 	struct kvmppc_vcore *kvm_vcore;
 	void __iomem *xics_phys;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1f53b562726f..17498e9a26e4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -60,7 +60,6 @@
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
 extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
@@ -610,6 +609,7 @@ struct kvm_vcpu_arch {
 	u64 tfhar;
 	u64 texasr;
 	u64 tfiar;
+	u64 orig_texasr;
 
 	u32 cr_tm;
 	u64 xer_tm;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 336a91acb8b1..5ceb4efca65f 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -61,6 +61,11 @@ static inline unsigned int kvm_arch_para_features(void)
 	return r;
 }
 
+static inline unsigned int kvm_arch_para_hints(void)
+{
+	return 0;
+}
+
 static inline bool kvm_check_and_clear_guest_paused(void)
 {
 	return false;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 7765a800ddae..abe7032cdb54 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -295,7 +295,6 @@ struct kvmppc_ops {
 				     const struct kvm_userspace_memory_region *mem,
 				     const struct kvm_memory_slot *old,
 				     const struct kvm_memory_slot *new);
-	int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
 	int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
 			   unsigned long end);
 	int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
@@ -436,15 +435,15 @@ struct openpic;
 extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
-	paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
+	paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr;
 }
 
 static inline void kvmppc_set_xive_tima(int cpu,
 					unsigned long phys_addr,
 					void __iomem *virt_addr)
 {
-	paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
-	paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
+	paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+	paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr;
 }
 
 static inline u32 kvmppc_get_xics_latch(void)
@@ -458,7 +457,7 @@ static inline u32 kvmppc_get_xics_latch(void)
 
 static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 {
-	paca[cpu].kvm_hstate.host_ipi = host_ipi;
+	paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
 }
 
 static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index d0a2a2f99564..7c23ce8a5a4c 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -34,16 +34,19 @@
 #include <linux/threads.h>
 #include <asm/types.h>
 #include <asm/mmu.h>
+#include <asm/firmware.h>
 
 /*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#define NR_LPPACAS	1
-
-/*
- * The Hypervisor barfs if the lppaca crosses a page boundary.  A 1k
- * alignment is sufficient to prevent this
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+ * H_REGISTER_VPA etc.
+ *
+ * According to PAPR, the structure is 640 bytes long, must be L1 cache line
+ * aligned, and must not cross a 4kB boundary. Its size field must be at
+ * least 640 bytes (but may be more).
+ *
+ * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
+ * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
+ * this structure as the canonical 640 byte size.
  */
 struct lppaca {
 	/* cacheline 1 contains read-only data */
@@ -97,13 +100,11 @@ struct lppaca {
 
 	__be32	page_ins;		/* CMO Hint - # page ins by OS */
 	u8	reserved11[148];
-	volatile __be64 dtl_idx;		/* Dispatch Trace Log head index */
+	volatile __be64 dtl_idx;	/* Dispatch Trace Log head index */
 	u8	reserved12[96];
-} __attribute__((__aligned__(0x400)));
-
-extern struct lppaca lppaca[];
+} ____cacheline_aligned;
 
-#define lppaca_of(cpu)	(*paca[cpu].lppaca_ptr)
+#define lppaca_of(cpu)	(*paca_ptrs[cpu]->lppaca_ptr)
 
 /*
  * We are using a non architected field to determine if a partition is
@@ -114,6 +115,8 @@ extern struct lppaca lppaca[];
 
 static inline bool lppaca_shared_proc(struct lppaca *l)
 {
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return false;
 	return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
 }
 
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 2f806e329648..4f547752ae79 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -186,11 +186,32 @@
 #define M_APG2		0x00000040
 #define M_APG3		0x00000060
 
+#ifdef CONFIG_PPC_MM_SLICES
+#include <asm/nohash/32/slice.h>
+#define SLICE_ARRAY_SIZE	(1 << (32 - SLICE_LOW_SHIFT - 1))
+#endif
+
 #ifndef __ASSEMBLY__
+struct slice_mask {
+	u64 low_slices;
+	DECLARE_BITMAP(high_slices, 0);
+};
+
 typedef struct {
 	unsigned int id;
 	unsigned int active;
 	unsigned long vdso_base;
+#ifdef CONFIG_PPC_MM_SLICES
+	u16 user_psize;		/* page size index */
+	unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
+	unsigned char high_slices_psize[0];
+	unsigned long slb_addr_limit;
+	struct slice_mask mask_base_psize; /* 4k or 16k */
+# ifdef CONFIG_HUGETLB_PAGE
+	struct slice_mask mask_512k;
+	struct slice_mask mask_8m;
+# endif
+#endif
 } mm_context_t;
 
 #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index bb38312cff28..61d15ce92278 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -111,9 +111,9 @@
 /* MMU feature bit sets for various CPUs */
 #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2	\
 	MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
-#define MMU_FTRS_POWER4		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
-#define MMU_FTRS_PPC970		MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
-#define MMU_FTRS_POWER5		MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER		MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970		MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA
+#define MMU_FTRS_POWER5		MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
 #define MMU_FTRS_POWER6		MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
 #define MMU_FTRS_POWER7		MMU_FTRS_POWER6
 #define MMU_FTRS_POWER8		MMU_FTRS_POWER6
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 3a15b6db9501..1835ca1505d6 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void);
 extern void hash__reserve_context_id(int id);
 extern void __destroy_context(int context_id);
 static inline void mmu_context_init(void) { }
+
+static inline int alloc_extended_context(struct mm_struct *mm,
+					 unsigned long ea)
+{
+	int context_id;
+
+	int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+	context_id = hash__alloc_context_id();
+	if (context_id < 0)
+		return context_id;
+
+	VM_WARN_ON(mm->context.extended_id[index]);
+	mm->context.extended_id[index] = context_id;
+	return context_id;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+	int context_id;
+
+	context_id = get_ea_context(&mm->context, ea);
+	if (!context_id)
+		return true;
+	return false;
+}
+
 #else
 extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
 			       struct task_struct *tsk);
 extern unsigned long __init_new_context(void);
 extern void __destroy_context(unsigned long context_id);
 extern void mmu_context_init(void);
+static inline int alloc_extended_context(struct mm_struct *mm,
+					 unsigned long ea)
+{
+	/* non book3s_64 should never find this called */
+	WARN_ON(1);
+	return -ENOMEM;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+	return false;
+}
 #endif
 
 #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 7e28442827f1..4f6573934792 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -15,9 +15,19 @@
 
 
 #ifdef CC_USING_MPROFILE_KERNEL
-#define MODULE_ARCH_VERMAGIC	"mprofile-kernel"
+#define MODULE_ARCH_VERMAGIC_FTRACE	"mprofile-kernel "
+#else
+#define MODULE_ARCH_VERMAGIC_FTRACE	""
 #endif
 
+#ifdef CONFIG_RELOCATABLE
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE	"relocatable "
+#else
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE	""
+#endif
+
+#define MODULE_ARCH_VERMAGIC MODULE_ARCH_VERMAGIC_FTRACE MODULE_ARCH_VERMAGIC_RELOCATABLE
+
 #ifndef __powerpc64__
 /*
  * Thanks to Paul M for explaining this.
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
new file mode 100644
index 000000000000..777d62e40ac0
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/slice.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H
+#define _ASM_POWERPC_NOHASH_32_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT		26	/* 64 slices */
+#define SLICE_LOW_TOP		(0x100000000ull)
+#define SLICE_NUM_LOW		(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT	0
+#define SLICE_NUM_HIGH		0ul
+#define GET_HIGH_SLICE_INDEX(addr)	(addr & 0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h
new file mode 100644
index 000000000000..ad0d6e3cc1c5
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/slice.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H
+#define _ASM_POWERPC_NOHASH_64_SLICE_H
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define get_slice_psize(mm, addr)	MMU_PAGE_64K
+#else /* CONFIG_PPC_64K_PAGES */
+#define get_slice_psize(mm, addr)	MMU_PAGE_4K
+#endif /* !CONFIG_PPC_64K_PAGES */
+#define slice_set_user_psize(mm, psize)	do { BUG(); } while (0)
+
+#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 94bd1bf2c873..d886a5b7ff21 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -204,7 +204,9 @@
 #define OPAL_NPU_SPA_SETUP			159
 #define OPAL_NPU_SPA_CLEAR_CACHE		160
 #define OPAL_NPU_TL_SET				161
-#define OPAL_LAST				161
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR		164
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
+#define OPAL_LAST				165
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 12e70fb58700..03e1a920491e 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -21,6 +21,9 @@
 /* We calculate number of sg entries based on PAGE_SIZE */
 #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
 
+/* Default time to sleep or delay between OPAL_BUSY/OPAL_BUSY_EVENT loops */
+#define OPAL_BUSY_DELAY_MS	10
+
 /* /sys/firmware/opal */
 extern struct kobject *opal_kobj;
 
@@ -204,6 +207,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
@@ -323,7 +328,7 @@ struct rtc_time;
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
 extern void opal_flash_update_init(void);
-extern void opal_flash_term_callback(void);
+extern void opal_flash_update_print_message(void);
 extern int opal_elog_init(void);
 extern void opal_platform_dump_init(void);
 extern void opal_sys_param_init(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b62c31037cad..4185f1c96125 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -32,6 +32,7 @@
 #include <asm/accounting.h>
 #include <asm/hmi.h>
 #include <asm/cpuidle.h>
+#include <asm/atomic.h>
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -46,7 +47,10 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
 #define get_paca()	local_paca
 #endif
 
+#ifdef CONFIG_PPC_PSERIES
 #define get_lppaca()	(get_paca()->lppaca_ptr)
+#endif
+
 #define get_slb_shadow()	(get_paca()->slb_shadow_ptr)
 
 struct task_struct;
@@ -58,7 +62,7 @@ struct task_struct;
  * processor.
  */
 struct paca_struct {
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_PSERIES
 	/*
 	 * Because hw_cpu_id, unlike other paca fields, is accessed
 	 * routinely from other CPUs (from the IRQ code), we stick to
@@ -67,7 +71,8 @@ struct paca_struct {
 	 */
 
 	struct lppaca *lppaca_ptr;	/* Pointer to LpPaca for PLIC */
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
+
 	/*
 	 * MAGIC: the spinlock functions in arch/powerpc/lib/locks.c 
 	 * load lock_token and paca_index with a single lwz
@@ -141,7 +146,7 @@ struct paca_struct {
 #ifdef CONFIG_PPC_BOOK3S
 	mm_context_id_t mm_ctx_id;
 #ifdef CONFIG_PPC_MM_SLICES
-	u64 mm_ctx_low_slices_psize;
+	unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
 	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
 	unsigned long mm_ctx_slb_addr_limit;
 #else
@@ -160,10 +165,14 @@ struct paca_struct {
 	u64 saved_msr;			/* MSR saved here by enter_rtas */
 	u16 trap_save;			/* Used when bad stack is encountered */
 	u8 irq_soft_mask;		/* mask for irq soft masking */
+	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 irq_happened;		/* irq happened while soft-disabled */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
 	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
 	u8 nap_state_lost;		/* NV GPR values lost in power7_idle */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	u8 pmcregs_in_use;		/* pseries puts this in lppaca */
+#endif
 	u64 sprg_vdso;			/* Saved user-visible sprg */
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	u64 tm_scratch;                 /* TM scratch area for reclaim */
@@ -177,6 +186,8 @@ struct paca_struct {
 	u8 thread_mask;
 	/* Mask to denote subcore sibling threads */
 	u8 subcore_sibling_mask;
+	/* Flag to request this thread not to stop */
+	atomic_t dont_stop;
 	/*
 	 * Pointer to an array which contains pointer
 	 * to the sibling threads' paca.
@@ -241,18 +252,20 @@ struct paca_struct {
 	void *rfi_flush_fallback_area;
 	u64 l1d_flush_size;
 #endif
-};
+} ____cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
-extern struct paca_struct *paca;
+extern struct paca_struct **paca_ptrs;
 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
 extern void setup_paca(struct paca_struct *new_paca);
-extern void allocate_pacas(void);
+extern void allocate_paca_ptrs(void);
+extern void allocate_paca(int cpu);
 extern void free_unused_pacas(void);
 
 #else /* CONFIG_PPC64 */
 
-static inline void allocate_pacas(void) { };
+static inline void allocate_paca_ptrs(void) { };
+static inline void allocate_paca(int cpu) { };
 static inline void free_unused_pacas(void) { };
 
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 8da5d4c1cab2..dec9ce5ba8af 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -126,7 +126,15 @@ extern long long virt_phys_offset;
 
 #ifdef CONFIG_FLATMEM
 #define ARCH_PFN_OFFSET		((unsigned long)(MEMORY_START >> PAGE_SHIFT))
-#define pfn_valid(pfn)		((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
+#ifndef __ASSEMBLY__
+extern unsigned long max_mapnr;
+static inline bool pfn_valid(unsigned long pfn)
+{
+	unsigned long min_pfn = ARCH_PFN_OFFSET;
+
+	return pfn >= min_pfn && pfn < max_mapnr;
+}
+#endif
 #endif
 
 #define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
@@ -344,5 +352,6 @@ typedef struct page *pgtable_t;
 
 #include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
+#include <asm/slice.h>
 
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 56234c6fcd61..af04acdb873f 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -86,65 +86,6 @@ extern u64 ppc64_pft_size;
 
 #endif /* __ASSEMBLY__ */
 
-#ifdef CONFIG_PPC_MM_SLICES
-
-#define SLICE_LOW_SHIFT		28
-#define SLICE_HIGH_SHIFT	40
-
-#define SLICE_LOW_TOP		(0x100000000ul)
-#define SLICE_NUM_LOW		(SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH		(H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
-
-#define GET_LOW_SLICE_INDEX(addr)	((addr) >> SLICE_LOW_SHIFT)
-#define GET_HIGH_SLICE_INDEX(addr)	((addr) >> SLICE_HIGH_SHIFT)
-
-#ifndef __ASSEMBLY__
-struct mm_struct;
-
-extern unsigned long slice_get_unmapped_area(unsigned long addr,
-					     unsigned long len,
-					     unsigned long flags,
-					     unsigned int psize,
-					     int topdown);
-
-extern unsigned int get_slice_psize(struct mm_struct *mm,
-				    unsigned long addr);
-
-extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
-				  unsigned long len, unsigned int psize);
-
-#endif /* __ASSEMBLY__ */
-#else
-#define slice_init()
-#ifdef CONFIG_PPC_BOOK3S_64
-#define get_slice_psize(mm, addr)	((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize)		\
-do {						\
-	(mm)->context.user_psize = (psize);	\
-	(mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-#else /* !CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr)	MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr)	MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize)	do { BUG(); } while(0)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#define slice_set_range_psize(mm, start, len, psize)	\
-	slice_set_user_psize((mm), (psize))
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
-#endif /* !CONFIG_HUGETLB_PAGE */
-
 #define VM_DATA_DEFAULT_FLAGS \
 	(is_32bit_task() ? \
 	 VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 723bf48e7494..67a8a9585d50 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -53,6 +53,8 @@ struct power_pmu {
 			       [PERF_COUNT_HW_CACHE_OP_MAX]
 			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
 
+	int		n_blacklist_ev;
+	int 		*blacklist_ev;
 	/* BHRB entries in the PMU */
 	int		bhrb_nr;
 };
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 55eddf50d149..96c1a46acbd0 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H
 #define _ASM_POWERPC_PLPAR_WRAPPERS_H
 
+#ifdef CONFIG_PPC_PSERIES
+
 #include <linux/string.h>
 #include <linux/irqflags.h>
 
@@ -9,14 +11,6 @@
 #include <asm/paca.h>
 #include <asm/page.h>
 
-/* Get state of physical CPU from query_cpu_stopped */
-int smp_query_cpu_stopped(unsigned int pcpu);
-#define QCSS_STOPPED 0
-#define QCSS_STOPPING 1
-#define QCSS_NOT_STOPPED 2
-#define QCSS_HARDWARE_ERROR -1
-#define QCSS_HARDWARE_BUSY -2
-
 static inline long poll_pending(void)
 {
 	return plpar_hcall_norets(H_POLL_PENDING);
@@ -311,17 +305,17 @@ static inline long enable_little_endian_exceptions(void)
 	return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0);
 }
 
-static inline long plapr_set_ciabr(unsigned long ciabr)
+static inline long plpar_set_ciabr(unsigned long ciabr)
 {
 	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0);
 }
 
-static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
+static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
 {
 	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
 }
 
-static inline long plapr_signal_sys_reset(long cpu)
+static inline long plpar_signal_sys_reset(long cpu)
 {
 	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
@@ -340,4 +334,12 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
 	return rc;
 }
 
+#else /* !CONFIG_PPC_PSERIES */
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+	return 0;
+}
+#endif /* CONFIG_PPC_PSERIES */
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index 5a9ede4962cb..7ac3586c38ab 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -31,10 +31,21 @@ void ppc_enable_pmcs(void);
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/lppaca.h>
+#include <asm/firmware.h>
 
 static inline void ppc_set_pmu_inuse(int inuse)
 {
-	get_lppaca()->pmcregs_in_use = inuse;
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+#ifdef CONFIG_PPC_PSERIES
+		get_lppaca()->pmcregs_in_use = inuse;
+#endif
+	} else {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+		get_paca()->pmcregs_in_use = inuse;
+#endif
+	}
+#endif
 }
 
 extern void power4_enable_pmcs(void);
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..d2d8c28db336 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
 			   u64 desc);
 
+extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
+extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+				  int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+				      u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
 			   unsigned int virq);
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index dc5f6a5d4575..d1c2d2e658cf 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -40,6 +40,7 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
 }
 
 static inline void pnv_tm_init(void) { }
+static inline void pnv_power9_force_smt4(void) { }
 #endif
 
 #endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index f1083bcf449c..18883b8a6dac 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -232,6 +232,7 @@
 #define PPC_INST_MSGSYNC		0x7c0006ec
 #define PPC_INST_MSGSNDP		0x7c00011c
 #define PPC_INST_MSGCLRP		0x7c00015c
+#define PPC_INST_MTMSRD			0x7c000164
 #define PPC_INST_MTTMR			0x7c0003dc
 #define PPC_INST_NOP			0x60000000
 #define PPC_INST_PASTE			0x7c20070d
@@ -239,8 +240,10 @@
 #define PPC_INST_POPCNTB_MASK		0xfc0007fe
 #define PPC_INST_POPCNTD		0x7c0003f4
 #define PPC_INST_POPCNTW		0x7c0002f4
+#define PPC_INST_RFEBB			0x4c000124
 #define PPC_INST_RFCI			0x4c000066
 #define PPC_INST_RFDI			0x4c00004e
+#define PPC_INST_RFID			0x4c000024
 #define PPC_INST_RFMCI			0x4c00004c
 #define PPC_INST_MFSPR			0x7c0002a6
 #define PPC_INST_MFSPR_DSCR		0x7c1102a6
@@ -271,12 +274,14 @@
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_VPMSUMW		0x10000488
 #define PPC_INST_VPMSUMD		0x100004c8
+#define PPC_INST_VPERMXOR		0x1000002d
 #define PPC_INST_XXLOR			0xf0000490
 #define PPC_INST_XXSWAPD		0xf0000250
 #define PPC_INST_XVCPSGNDP		0xf0000780
 #define PPC_INST_TRECHKPT		0x7c0007dd
 #define PPC_INST_TRECLAIM		0x7c00075d
 #define PPC_INST_TABORT			0x7c00071d
+#define PPC_INST_TSR			0x7c0005dd
 
 #define PPC_INST_NAP			0x4c000364
 #define PPC_INST_SLEEP			0x4c0003a4
@@ -517,6 +522,11 @@
 #define XVCPSGNDP(t, a, b)	stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
 					       VSX_XX3((t), (a), (b))))
 
+#define VPERMXOR(vrt, vra, vrb, vrc)				\
+	stringify_in_c(.long (PPC_INST_VPERMXOR |		\
+			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+			      ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+
 #define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
 #define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
 #define PPC_WINKLE		stringify_in_c(.long PPC_INST_WINKLE)
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index ae94b3626b6c..13f7f4c0e1ea 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -439,14 +439,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
 
 /* The following stops all load and store data streams associated with stream
  * ID (ie. streams created explicitly).  The embedded and server mnemonics for
- * dcbt are different so we use machine "power4" here explicitly.
+ * dcbt are different so this must only be used for server.
  */
-#define DCBT_STOP_ALL_STREAM_IDS(scratch)	\
-.machine push ;					\
-.machine "power4" ;				\
-       lis     scratch,0x60000000@h;		\
-       dcbt    0,scratch,0b01010;		\
-.machine pop
+#define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch)	\
+       lis     scratch,0x60000000@h;			\
+       dcbt    0,scratch,0b01010
 
 /*
  * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 01299cdc9806..c4b36a494a63 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -109,6 +109,13 @@ void release_thread(struct task_struct *);
 #define TASK_SIZE_64TB  (0x0000400000000000UL)
 #define TASK_SIZE_128TB (0x0000800000000000UL)
 #define TASK_SIZE_512TB (0x0002000000000000UL)
+#define TASK_SIZE_1PB   (0x0004000000000000UL)
+#define TASK_SIZE_2PB   (0x0008000000000000UL)
+/*
+ * With 52 bits in the address we can support
+ * upto 4PB of range.
+ */
+#define TASK_SIZE_4PB   (0x0010000000000000UL)
 
 /*
  * For now 512TB is only supported with book3s and 64K linux page size.
@@ -117,11 +124,17 @@ void release_thread(struct task_struct *);
 /*
  * Max value currently used:
  */
-#define TASK_SIZE_USER64		TASK_SIZE_512TB
+#define TASK_SIZE_USER64		TASK_SIZE_4PB
 #define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE		TASK_SIZE_512TB
 #else
 #define TASK_SIZE_USER64		TASK_SIZE_64TB
 #define DEFAULT_MAP_WINDOW_USER64	TASK_SIZE_64TB
+/*
+ * We don't need to allocate extended context ids for 4K page size, because
+ * we limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE		TASK_SIZE_64TB
 #endif
 
 /*
@@ -505,6 +518,7 @@ extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
 extern void power7_idle_type(unsigned long type);
 extern unsigned long power9_idle_stop(unsigned long psscr_val);
+extern unsigned long power9_offline_stop(unsigned long psscr_val);
 extern void power9_idle_type(unsigned long stop_psscr_val,
 			      unsigned long stop_psscr_mask);
 
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index e6c7eadf6bce..cb0f272ce123 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -156,6 +156,8 @@
 #define PSSCR_SD		0x00400000 /* Status Disable */
 #define PSSCR_PLS	0xf000000000000000 /* Power-saving Level Status */
 #define PSSCR_GUEST_VIS	0xf0000000000003ff /* Guest-visible PSSCR fields */
+#define PSSCR_FAKE_SUSPEND	0x00000400 /* Fake-suspend bit (P9 DD2.2) */
+#define PSSCR_FAKE_SUSPEND_LG	10	   /* Fake-suspend bit position */
 
 /* Floating Point Status and Control Register (FPSCR) Fields */
 #define FPSCR_FX	0x80000000	/* FPU exception summary */
@@ -237,7 +239,12 @@
 #define SPRN_TFIAR	0x81	/* Transaction Failure Inst Addr   */
 #define SPRN_TEXASR	0x82	/* Transaction EXception & Summary */
 #define SPRN_TEXASRU	0x83	/* ''	   ''	   ''	 Upper 32  */
+#define   TEXASR_ABORT	__MASK(63-31) /* terminated by tabort or treclaim */
+#define   TEXASR_SUSP	__MASK(63-32) /* tx failed in suspended state */
+#define   TEXASR_HV	__MASK(63-34) /* MSR[HV] when failure occurred */
+#define   TEXASR_PR	__MASK(63-35) /* MSR[PR] when failure occurred */
 #define   TEXASR_FS	__MASK(63-36) /* TEXASR Failure Summary */
+#define   TEXASR_EXACT	__MASK(63-37) /* TFIAR value is exact */
 #define SPRN_TFHAR	0x80	/* Transaction Failure Handler Addr */
 #define SPRN_TIDR	144	/* Thread ID register */
 #define SPRN_CTRLF	0x088
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
new file mode 100644
index 000000000000..fa4d2e1cf772
--- /dev/null
+++ b/arch/powerpc/include/asm/security_features.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Security related feature bit definitions.
+ *
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
+#define _ASM_POWERPC_SECURITY_FEATURES_H
+
+
+extern unsigned long powerpc_security_features;
+extern bool rfi_flush;
+
+static inline void security_ftr_set(unsigned long feature)
+{
+	powerpc_security_features |= feature;
+}
+
+static inline void security_ftr_clear(unsigned long feature)
+{
+	powerpc_security_features &= ~feature;
+}
+
+static inline bool security_ftr_enabled(unsigned long feature)
+{
+	return !!(powerpc_security_features & feature);
+}
+
+
+// Features indicating support for Spectre/Meltdown mitigations
+
+// The L1-D cache can be flushed with ori r30,r30,0
+#define SEC_FTR_L1D_FLUSH_ORI30		0x0000000000000001ull
+
+// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
+#define SEC_FTR_L1D_FLUSH_TRIG2		0x0000000000000002ull
+
+// ori r31,r31,0 acts as a speculation barrier
+#define SEC_FTR_SPEC_BAR_ORI31		0x0000000000000004ull
+
+// Speculation past bctr is disabled
+#define SEC_FTR_BCCTRL_SERIALISED	0x0000000000000008ull
+
+// Entries in L1-D are private to a SMT thread
+#define SEC_FTR_L1D_THREAD_PRIV		0x0000000000000010ull
+
+// Indirect branch prediction cache disabled
+#define SEC_FTR_COUNT_CACHE_DISABLED	0x0000000000000020ull
+
+
+// Features indicating need for Spectre/Meltdown mitigations
+
+// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
+#define SEC_FTR_L1D_FLUSH_HV		0x0000000000000040ull
+
+// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
+#define SEC_FTR_L1D_FLUSH_PR		0x0000000000000080ull
+
+// A speculation barrier should be used for bounds checks (Spectre variant 1)
+#define SEC_FTR_BNDS_CHK_SPEC_BAR	0x0000000000000100ull
+
+// Firmware configuration indicates user favours security over performance
+#define SEC_FTR_FAVOUR_SECURITY		0x0000000000000200ull
+
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+	(SEC_FTR_L1D_FLUSH_HV | \
+	 SEC_FTR_L1D_FLUSH_PR | \
+	 SEC_FTR_BNDS_CHK_SPEC_BAR | \
+	 SEC_FTR_FAVOUR_SECURITY)
+
+#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 469b7fdc9be4..27fa52ed6d00 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -23,6 +23,7 @@ extern void reloc_got2(unsigned long);
 #define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
 
 void check_for_initrd(void);
+void mem_topology_setup(void);
 void initmem_init(void);
 void setup_panic(void);
 #define ARCH_PANIC_TIMEOUT 180
@@ -49,7 +50,7 @@ enum l1d_flush_type {
 	L1D_FLUSH_MTTRIG	= 0x8,
 };
 
-void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
new file mode 100644
index 000000000000..e40406cf5628
--- /dev/null
+++ b/arch/powerpc/include/asm/slice.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SLICE_H
+#define _ASM_POWERPC_SLICE_H
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/slice.h>
+#elif defined(CONFIG_PPC64)
+#include <asm/nohash/64/slice.h>
+#elif defined(CONFIG_PPC_MMU_NOHASH)
+#include <asm/nohash/32/slice.h>
+#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+				      unsigned long flags, unsigned int psize,
+				      int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+			   unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_SLICE_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index fac963e10d39..cfecfee1194b 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -31,6 +31,7 @@
 
 extern int boot_cpuid;
 extern int spinning_secondaries;
+extern u32 *cpu_to_phys_id;
 
 extern void cpu_die(void);
 extern int cpu_to_chip_id(int cpu);
@@ -170,12 +171,12 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu)
 #ifdef CONFIG_PPC64
 static inline int get_hard_smp_processor_id(int cpu)
 {
-	return paca[cpu].hw_cpu_id;
+	return paca_ptrs[cpu]->hw_cpu_id;
 }
 
 static inline void set_hard_smp_processor_id(int cpu, int phys)
 {
-	paca[cpu].hw_cpu_id = phys;
+	paca_ptrs[cpu]->hw_cpu_id = phys;
 }
 #else
 /* 32-bit */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index a7916ee6dfb6..bc66712bdc3c 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,7 +17,7 @@
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end);
+extern int create_section_mapping(unsigned long start, unsigned long end, int nid);
 extern int remove_section_mapping(unsigned long start, unsigned long end);
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index b9ebc3085fb7..72dc4ddc2972 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -56,6 +56,8 @@
 #define vcpu_is_preempted vcpu_is_preempted
 static inline bool vcpu_is_preempted(int cpu)
 {
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return false;
 	return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
 }
 #endif
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index c3ca42cdc9f5..be8c9fa23983 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -35,7 +35,6 @@ static inline void disable_kernel_fp(void)
 	msr_check_and_clear(MSR_FP);
 }
 #else
-static inline void __giveup_fpu(struct task_struct *t) { }
 static inline void save_fpu(struct task_struct *t) { }
 static inline void flush_fp_to_thread(struct task_struct *t) { }
 #endif
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index 63e7f5a1f105..6ec546090ba1 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -6,10 +6,6 @@
 #include <linux/stringify.h>
 #include <asm/feature-fixups.h>
 
-#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
-#define __SUBARCH_HAS_LWSYNC
-#endif
-
 #ifndef __ASSEMBLY__
 extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
 extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 4a12c00f8de3..5964145db03d 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -70,6 +70,7 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)val;
 }
 
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index b240666b7bc1..db546c034905 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -31,6 +31,7 @@ extern void to_tm(int tim, struct rtc_time * tm);
 extern void tick_broadcast_ipi_handler(void);
 
 extern void generic_calibrate_decr(void);
+extern void hdec_interrupt(struct pt_regs *regs);
 
 /* Some sane defaults: 125 MHz timebase, 1GHz processor */
 extern unsigned long ppc_proc_freq;
@@ -46,7 +47,7 @@ struct div_result {
 /* Accessor functions for the timebase (RTC on 601) registers. */
 /* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
 #ifdef CONFIG_6xx
-#define __USE_RTC()	(!cpu_has_feature(CPU_FTR_USE_TB))
+#define __USE_RTC()	(cpu_has_feature(CPU_FTR_USE_RTC))
 #else
 #define __USE_RTC()	0
 #endif
@@ -204,6 +205,7 @@ struct cpu_usage {
 DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
 
 extern void secondary_cpu_time_init(void);
+extern void __init time_init(void);
 
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 51bfeb8777f0..a62ee663b2c8 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -47,9 +47,13 @@
 
 #else
 
-#define __access_ok(addr, size, segment)	\
-	(((addr) <= (segment).seg) &&		\
-	 (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr)))))
+static inline int __access_ok(unsigned long addr, unsigned long size,
+			mm_segment_t seg)
+{
+	if (addr > seg.seg)
+		return 0;
+	return (size == 0 || size - 1 <= seg.seg - addr);
+}
 
 #endif
 
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2358f97d62ec..2b4c40b255e4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32)		+= vdso32/
 obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o security.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ea5eb91b836e..6bee65f3cfd3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -221,12 +221,17 @@ int main(void)
 	OFFSET(PACA_EXMC, paca_struct, exmc);
 	OFFSET(PACA_EXSLB, paca_struct, exslb);
 	OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_PSERIES
 	OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
+#endif
 	OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
 	OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
 	OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
 	OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
 	OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+#endif
 	OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
 	OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
 	OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
@@ -568,6 +573,7 @@ int main(void)
 	OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
 	OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
 	OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+	OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr);
 	OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
 	OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
 	OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
@@ -650,6 +656,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_PTID, ptid);
 	HSTATE_FIELD(HSTATE_TID, tid);
+	HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
 	HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
 	HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
 	HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@ -759,6 +766,7 @@ int main(void)
 	OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
 	OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
 	OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+	OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
 #define STOP_SPR(x, f)	OFFSET(x, paca_struct, stop_sprs.f)
 	STOP_SPR(STOP_PID, pid);
 	STOP_SPR(STOP_LDBAR, ldbar);
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index c5e5a94d9892..a9f3970693e1 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -226,7 +226,7 @@ BEGIN_FTR_SECTION
 	beq	1f
 END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
 	lwz	r6,CPU_SPEC_FEATURES(r4)
-	andi.	r0,r6,CPU_FTR_L3_DISABLE_NAP
+	andis.	r0,r6,CPU_FTR_L3_DISABLE_NAP@h
 	beq	1f
 	li	r7,CPU_FTR_CAN_NAP
 	andc	r6,r6,r7
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 462aed9bcf51..8d142e5d84cd 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500)
 	 * the feature on the primary core, avoid doing it on the
 	 * secondary core.
 	 */
-	andis.	r6, r3, CPU_FTR_EMB_HV@h
+	andi.	r6, r3, CPU_FTR_EMB_HV
 	beq	2f
 	rlwinm	r3, r3, 0, ~CPU_FTR_EMB_HV
 	stw	r3, CPU_SPEC_FEATURES(r4)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c40a9fc1e5d1..c8fc9691f8c7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -133,36 +133,6 @@ extern void __restore_cpu_e6500(void);
 
 static struct cpu_spec __initdata cpu_specs[] = {
 #ifdef CONFIG_PPC_BOOK3S_64
-	{	/* Power4 */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00350000,
-		.cpu_name		= "POWER4 (gp)",
-		.cpu_features		= CPU_FTRS_POWER4,
-		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/power4",
-		.oprofile_type		= PPC_OPROFILE_POWER4,
-		.platform		= "power4",
-	},
-	{	/* Power4+ */
-		.pvr_mask		= 0xffff0000,
-		.pvr_value		= 0x00380000,
-		.cpu_name		= "POWER4+ (gq)",
-		.cpu_features		= CPU_FTRS_POWER4,
-		.cpu_user_features	= COMMON_USER_POWER4,
-		.mmu_features		= MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
-		.icache_bsize		= 128,
-		.dcache_bsize		= 128,
-		.num_pmcs		= 8,
-		.pmc_type		= PPC_PMC_IBM,
-		.oprofile_cpu_type	= "ppc64/power4",
-		.oprofile_type		= PPC_OPROFILE_POWER4,
-		.platform		= "power4",
-	},
 	{	/* PPC970 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x00390000,
@@ -553,11 +523,30 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check_early	= __machine_check_early_realmode_p9,
 		.platform		= "power9",
 	},
-	{	/* Power9 DD 2.1 or later (see DD2.0 above) */
+	{	/* Power9 DD 2.1 */
+		.pvr_mask		= 0xffffefff,
+		.pvr_value		= 0x004e0201,
+		.cpu_name		= "POWER9 (raw)",
+		.cpu_features		= CPU_FTRS_POWER9_DD2_1,
+		.cpu_user_features	= COMMON_USER_POWER9,
+		.cpu_user_features2	= COMMON_USER2_POWER9,
+		.mmu_features		= MMU_FTRS_POWER9,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.oprofile_cpu_type	= "ppc64/power9",
+		.oprofile_type		= PPC_OPROFILE_INVALID,
+		.cpu_setup		= __setup_cpu_power9,
+		.cpu_restore		= __restore_cpu_power9,
+		.machine_check_early	= __machine_check_early_realmode_p9,
+		.platform		= "power9",
+	},
+	{	/* Power9 DD2.2 or later */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x004e0000,
 		.cpu_name		= "POWER9 (raw)",
-		.cpu_features		= CPU_FTRS_POWER9_DD2_1,
+		.cpu_features		= CPU_FTRS_POWER9_DD2_2,
 		.cpu_user_features	= COMMON_USER_POWER9,
 		.cpu_user_features2	= COMMON_USER2_POWER9,
 		.mmu_features		= MMU_FTRS_POWER9,
@@ -609,15 +598,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
-		.cpu_name		= "POWER4 (compatible)",
+		.cpu_name		= "POWER5 (compatible)",
 		.cpu_features		= CPU_FTRS_COMPATIBLE,
 		.cpu_user_features	= COMMON_USER_PPC64,
-		.mmu_features		= MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
+		.mmu_features		= MMU_FTRS_POWER,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
 		.num_pmcs		= 6,
 		.pmc_type		= PPC_PMC_IBM,
-		.platform		= "power4",
+		.platform		= "power5",
 	}
 #endif	/* CONFIG_PPC_BOOK3S_64 */
 
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 00b215125d3e..17c8b99680f2 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -238,7 +238,7 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu)
 		if (i == cpu)
 			continue;
 
-		while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+		while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
 			barrier();
 			if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
 				break;
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8ca5d5b74618..8ab51f6ca03a 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -53,19 +53,6 @@ struct dt_cpu_feature {
 	int disabled;
 };
 
-#define CPU_FTRS_BASE \
-	   (CPU_FTR_USE_TB | \
-	    CPU_FTR_LWSYNC | \
-	    CPU_FTR_FPU_UNAVAILABLE |\
-	    CPU_FTR_NODSISRALIGN |\
-	    CPU_FTR_NOEXECUTE |\
-	    CPU_FTR_COHERENT_ICACHE | \
-	    CPU_FTR_STCX_CHECKS_ADDRESS |\
-	    CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
-	    CPU_FTR_DAWR | \
-	    CPU_FTR_ARCH_206 |\
-	    CPU_FTR_ARCH_207S)
-
 #define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
 
 #define COMMON_USER_BASE	(PPC_FEATURE_32 | PPC_FEATURE_64 | \
@@ -84,6 +71,7 @@ static int hv_mode;
 
 static struct {
 	u64	lpcr;
+	u64	lpcr_clear;
 	u64	hfscr;
 	u64	fscr;
 } system_registers;
@@ -92,6 +80,8 @@ static void (*init_pmu_registers)(void);
 
 static void __restore_cpu_cpufeatures(void)
 {
+	u64 lpcr;
+
 	/*
 	 * LPCR is restored by the power on engine already. It can be changed
 	 * after early init e.g., by radix enable, and we have no unified API
@@ -104,8 +94,10 @@ static void __restore_cpu_cpufeatures(void)
 	 * The best we can do to accommodate secondary boot and idle restore
 	 * for now is "or" LPCR with existing.
 	 */
-
-	mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+	lpcr = mfspr(SPRN_LPCR);
+	lpcr |= system_registers.lpcr;
+	lpcr &= ~system_registers.lpcr_clear;
+	mtspr(SPRN_LPCR, lpcr);
 	if (hv_mode) {
 		mtspr(SPRN_LPID, 0);
 		mtspr(SPRN_HFSCR, system_registers.hfscr);
@@ -120,7 +112,7 @@ static char dt_cpu_name[64];
 
 static struct cpu_spec __initdata base_cpu_spec = {
 	.cpu_name		= NULL,
-	.cpu_features		= CPU_FTRS_BASE,
+	.cpu_features		= CPU_FTRS_DT_CPU_BASE,
 	.cpu_user_features	= COMMON_USER_BASE,
 	.cpu_user_features2	= COMMON_USER2_BASE,
 	.mmu_features		= 0,
@@ -325,8 +317,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
 {
 	u64 lpcr;
 
+	system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
 	lpcr = mfspr(SPRN_LPCR);
-	lpcr &= ~LPCR_ISL;
+	lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
 	mtspr(SPRN_LPCR, lpcr);
 
 	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
@@ -590,6 +583,8 @@ static struct dt_cpu_feature_match __initdata
 	{"virtual-page-class-key-protection", feat_enable, 0},
 	{"transactional-memory", feat_enable_tm, CPU_FTR_TM},
 	{"transactional-memory-v3", feat_enable_tm, 0},
+	{"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
+	{"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
 	{"idle-nap", feat_enable_idle_nap, 0},
 	{"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
 	{"idle-stop", feat_enable_idle_stop, 0},
@@ -707,11 +702,28 @@ static __init void cpufeatures_cpu_quirks(void)
 	 */
 	if ((version & 0xffffff00) == 0x004e0100)
 		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+	else if ((version & 0xffffefff) == 0x004e0200)
+		; /* DD2.0 has no feature flag */
 	else if ((version & 0xffffefff) == 0x004e0201)
 		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+	else if ((version & 0xffffefff) == 0x004e0202) {
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+	} else /* DD2.1 and up have DD2_1 */
+		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
 
-	if ((version & 0xffff0000) == 0x004e0000)
+	if ((version & 0xffff0000) == 0x004e0000) {
+		cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
 		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+	}
+
+	/*
+	 * PKEY was not in the initial base or feature node
+	 * specification, but it should become optional in the next
+	 * cpu feature version sequence.
+	 */
+	cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
 }
 
 static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 2b9df0040d6b..bc640e4c5ca5 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
 	/* Check PHB state */
 	ret = eeh_ops->get_state(phb_pe, NULL);
 	if ((ret < 0) ||
-	    (ret == EEH_STATE_NOT_SUPPORT) ||
-	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
-	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+	    (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
 		ret = 0;
 		goto out;
 	}
@@ -433,7 +431,6 @@ out:
 int eeh_dev_check_failure(struct eeh_dev *edev)
 {
 	int ret;
-	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	unsigned long flags;
 	struct device_node *dn;
 	struct pci_dev *dev;
@@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	 * state, PE is in good state.
 	 */
 	if ((ret < 0) ||
-	    (ret == EEH_STATE_NOT_SUPPORT) ||
-	    ((ret & active_flags) == active_flags)) {
+	    (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
 		eeh_stats.false_positives++;
 		pe->false_positives++;
 		rc = 0;
@@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 
 		/* Frozen parent PE ? */
 		ret = eeh_ops->get_state(parent_pe, NULL);
-		if (ret > 0 &&
-		    (ret & active_flags) != active_flags)
+		if (ret > 0 && !eeh_state_active(ret))
 			pe = parent_pe;
 
 		/* Next parent level */
@@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag)
  */
 int eeh_pe_reset_full(struct eeh_pe *pe)
 {
-	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
 	int type = EEH_RESET_HOT;
 	unsigned int freset = 0;
@@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
 
 		/* Wait until the PE is in a functioning state */
 		state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-		if ((state & active_flags) == active_flags)
+		if (eeh_state_active(state))
 			break;
 
 		if (state < 0) {
@@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
 	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
 	struct pci_device_id *id;
-	int flags, ret;
+	int ret;
 
 	/* Check PE state */
-	flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	ret = eeh_ops->get_state(pe, NULL);
 	if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
 		return 0;
 
 	/* Unfrozen PE, nothing to do */
-	if ((ret & flags) == flags)
+	if (eeh_state_active(ret))
 		return 0;
 
 	/* Frozen PE, check if it needs PE level reset */
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index d4cc26618809..201943d54a6e 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
  * @addr: mmio (PIO) phys address or i/o port number
  *
  * Given an mmio phys address, or a port number, find a pci device
- * that implements this address.  Be sure to pci_dev_put the device
- * when finished.  I/O port numbers are assumed to be offset
+ * that implements this address.  I/O port numbers are assumed to be offset
  * from zero (that is, they do *not* have pci_io_addr added in).
  * It is safe to call this function within an interrupt.
  */
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 0c0b66fc5bfb..b8a329f04814 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
 
 	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 		return NULL;
+
+	device_lock(&dev->dev);
 	dev->error_state = pci_channel_io_frozen;
 
 	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
+	if (!driver) goto out_no_dev;
 
 	eeh_disable_irq(dev);
 
 	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
+	    !driver->err_handler->error_detected)
+		goto out;
 
 	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
@@ -227,8 +227,12 @@ static void *eeh_report_error(void *data, void *userdata)
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
 	edev->in_error = true;
-	eeh_pcid_put(dev);
 	pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+
+out:
+	eeh_pcid_put(dev);
+out_no_dev:
+	device_unlock(&dev->dev);
 	return NULL;
 }
 
@@ -251,15 +255,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
 	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 		return NULL;
 
+	device_lock(&dev->dev);
 	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
+	if (!driver) goto out_no_dev;
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->mmio_enabled ||
-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
+	    (edev->mode & EEH_DEV_NO_HANDLER))
+		goto out;
 
 	rc = driver->err_handler->mmio_enabled(dev);
 
@@ -267,7 +270,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
+out:
 	eeh_pcid_put(dev);
+out_no_dev:
+	device_unlock(&dev->dev);
 	return NULL;
 }
 
@@ -290,20 +296,20 @@ static void *eeh_report_reset(void *data, void *userdata)
 
 	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 		return NULL;
+
+	device_lock(&dev->dev);
 	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
+	if (!driver) goto out_no_dev;
 
 	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->slot_reset ||
 	    (edev->mode & EEH_DEV_NO_HANDLER) ||
-	    (!edev->in_error)) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
+	    (!edev->in_error))
+		goto out;
 
 	rc = driver->err_handler->slot_reset(dev);
 	if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -311,7 +317,10 @@ static void *eeh_report_reset(void *data, void *userdata)
 	if (*res == PCI_ERS_RESULT_DISCONNECT &&
 	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 
+out:
 	eeh_pcid_put(dev);
+out_no_dev:
+	device_unlock(&dev->dev);
 	return NULL;
 }
 
@@ -362,10 +371,12 @@ static void *eeh_report_resume(void *data, void *userdata)
 
 	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 		return NULL;
+
+	device_lock(&dev->dev);
 	dev->error_state = pci_channel_io_normal;
 
 	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
+	if (!driver) goto out_no_dev;
 
 	was_in_error = edev->in_error;
 	edev->in_error = false;
@@ -375,18 +386,20 @@ static void *eeh_report_resume(void *data, void *userdata)
 	    !driver->err_handler->resume ||
 	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
 		edev->mode &= ~EEH_DEV_NO_HANDLER;
-		eeh_pcid_put(dev);
-		return NULL;
+		goto out;
 	}
 
 	driver->err_handler->resume(dev);
 
-	eeh_pcid_put(dev);
 	pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+out:
+	eeh_pcid_put(dev);
 #ifdef CONFIG_PCI_IOV
 	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
 		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 #endif
+out_no_dev:
+	device_unlock(&dev->dev);
 	return NULL;
 }
 
@@ -406,23 +419,26 @@ static void *eeh_report_failure(void *data, void *userdata)
 
 	if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
 		return NULL;
+
+	device_lock(&dev->dev);
 	dev->error_state = pci_channel_io_perm_failure;
 
 	driver = eeh_pcid_get(dev);
-	if (!driver) return NULL;
+	if (!driver) goto out_no_dev;
 
 	eeh_disable_irq(dev);
 
 	if (!driver->err_handler ||
-	    !driver->err_handler->error_detected) {
-		eeh_pcid_put(dev);
-		return NULL;
-	}
+	    !driver->err_handler->error_detected)
+		goto out;
 
 	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
 
-	eeh_pcid_put(dev);
 	pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+out:
+	eeh_pcid_put(dev);
+out_no_dev:
+	device_unlock(&dev->dev);
 	return NULL;
 }
 
@@ -619,17 +635,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
 
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
  * @pe: EEH PE
  * @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
  *
  * This routine must be called to do reset on the indicated PE.
  * During the reset, udev might be invoked because those affected
  * PCI devices will be removed and then added.
  */
 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
-				struct eeh_rmv_data *rmv_data)
+			    struct eeh_rmv_data *rmv_data,
+			    bool driver_eeh_aware)
 {
-	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
 	time64_t tstamp;
 	int cnt, rc;
 	struct eeh_dev *edev;
@@ -645,16 +663,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 	 * into pci_hp_add_devices().
 	 */
 	eeh_pe_state_mark(pe, EEH_PE_KEEP);
-	if (bus) {
-		if (pe->type & EEH_PE_VF) {
-			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-		} else {
-			pci_lock_rescan_remove();
-			pci_hp_remove_devices(bus);
-			pci_unlock_rescan_remove();
-		}
-	} else if (frozen_bus) {
+	if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
 		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+	} else {
+		pci_lock_rescan_remove();
+		pci_hp_remove_devices(bus);
+		pci_unlock_rescan_remove();
 	}
 
 	/*
@@ -689,8 +703,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 	 * the device up before the scripts have taken it down,
 	 * potentially weird things happen.
 	 */
-	if (bus) {
-		pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+	if (!driver_eeh_aware || rmv_data->removed) {
+		pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+			(driver_eeh_aware ? "partial" : "complete"));
 		ssleep(5);
 
 		/*
@@ -703,19 +718,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 		if (pe->type & EEH_PE_VF) {
 			eeh_add_virt_device(edev, NULL);
 		} else {
-			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+			if (!driver_eeh_aware)
+				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
 			pci_hp_add_devices(bus);
 		}
-	} else if (frozen_bus && rmv_data->removed) {
-		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
-		ssleep(5);
-
-		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
-		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		if (pe->type & EEH_PE_VF)
-			eeh_add_virt_device(edev, NULL);
-		else
-			pci_hp_add_devices(frozen_bus);
 	}
 	eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
@@ -733,28 +739,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 
 /**
  * eeh_handle_normal_event - Handle EEH events on a specific PE
- * @pe: EEH PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
  *
  * Attempts to recover the given PE.  If recovery fails or the PE has failed
  * too many times, remove the PE.
  *
- * Returns true if @pe should no longer be used, else false.
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
  */
-static bool eeh_handle_normal_event(struct eeh_pe *pe)
+void eeh_handle_normal_event(struct eeh_pe *pe)
 {
-	struct pci_bus *frozen_bus;
+	struct pci_bus *bus;
 	struct eeh_dev *edev, *tmp;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
 	struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
 
-	frozen_bus = eeh_pe_bus_get(pe);
-	if (!frozen_bus) {
+	bus = eeh_pe_bus_get(pe);
+	if (!bus) {
 		pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
 			__func__, pe->phb->global_number, pe->addr);
-		return false;
+		return;
 	}
 
+	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
 	eeh_pe_update_time_stamp(pe);
 	pe->freeze_count++;
 	if (pe->freeze_count > eeh_max_freezes) {
@@ -806,7 +826,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
 	 */
 	if (result == PCI_ERS_RESULT_NONE) {
 		pr_info("EEH: Reset with hotplug activity\n");
-		rc = eeh_reset_device(pe, frozen_bus, NULL);
+		rc = eeh_reset_device(pe, bus, NULL, false);
 		if (rc) {
 			pr_warn("%s: Unable to reset, err=%d\n",
 				__func__, rc);
@@ -858,7 +878,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
 	/* If any device called out for a reset, then reset the slot */
 	if (result == PCI_ERS_RESULT_NEED_RESET) {
 		pr_info("EEH: Reset without hotplug activity\n");
-		rc = eeh_reset_device(pe, NULL, &rmv_data);
+		rc = eeh_reset_device(pe, bus, &rmv_data, true);
 		if (rc) {
 			pr_warn("%s: Cannot reset, err=%d\n",
 				__func__, rc);
@@ -891,7 +911,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
 	pr_info("EEH: Notify device driver to resume\n");
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
-	return false;
+	goto final;
 
 hard_fail:
 	/*
@@ -916,23 +936,21 @@ hard_fail:
 	 * all removed devices correctly to avoid access
 	 * the their PCI config any more.
 	 */
-	if (frozen_bus) {
-		if (pe->type & EEH_PE_VF) {
-			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-		} else {
-			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
-			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
-			pci_lock_rescan_remove();
-			pci_hp_remove_devices(frozen_bus);
-			pci_unlock_rescan_remove();
+	if (pe->type & EEH_PE_VF) {
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+	} else {
+		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 
-			/* The passed PE should no longer be used */
-			return true;
-		}
+		pci_lock_rescan_remove();
+		pci_hp_remove_devices(bus);
+		pci_unlock_rescan_remove();
+		/* The passed PE should no longer be used */
+		return;
 	}
-	return false;
+final:
+	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 }
 
 /**
@@ -942,7 +960,7 @@ hard_fail:
  * specific PE.  Iterates through possible failures and handles them as
  * necessary.
  */
-static void eeh_handle_special_event(void)
+void eeh_handle_special_event(void)
 {
 	struct eeh_pe *pe, *phb_pe;
 	struct pci_bus *bus;
@@ -1005,15 +1023,7 @@ static void eeh_handle_special_event(void)
 		 */
 		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
 		    rc == EEH_NEXT_ERR_FENCED_PHB) {
-			/*
-			 * eeh_handle_normal_event() can make the PE stale if it
-			 * determines that the PE cannot possibly be recovered.
-			 * Don't modify the PE state if that's the case.
-			 */
-			if (eeh_handle_normal_event(pe))
-				continue;
-
-			eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+			eeh_handle_normal_event(pe);
 		} else {
 			pci_lock_rescan_remove();
 			list_for_each_entry(hose, &hose_list, list_node) {
@@ -1049,28 +1059,3 @@ static void eeh_handle_special_event(void)
 			break;
 	} while (rc != EEH_NEXT_ERR_NONE);
 }
-
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
-	if (pe)
-		eeh_handle_normal_event(pe);
-	else
-		eeh_handle_special_event();
-}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index accbf8b5fd46..61c9356bf9c9 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy)
 		/* We might have event without binding PE */
 		pe = event->pe;
 		if (pe) {
-			eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
 			if (pe->type & EEH_PE_PHB)
 				pr_info("EEH: Detected error on PHB#%x\n",
 					 pe->phb->global_number);
@@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy)
 				pr_info("EEH: Detected PCI bus error on "
 					"PHB#%x-PE#%x\n",
 					pe->phb->global_number, pe->addr);
-			eeh_handle_event(pe);
-			eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+			eeh_handle_normal_event(pe);
 		} else {
-			eeh_handle_event(NULL);
+			eeh_handle_special_event();
 		}
 
 		kfree(event);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2cb5109a7ea3..51695608c68b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -545,7 +545,7 @@ _GLOBAL(_switch)
 /* Cancel all explict user streams as they will have no use after context
  * switch and will stop the HW from creating streams itself
  */
-	DCBT_STOP_ALL_STREAM_IDS(r6)
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
 #endif
 
 	addi	r6,r4,-THREAD	/* Convert THREAD to 'current' */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1ecfd8ffb098..ae6a849db60b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -139,6 +139,21 @@ EXC_COMMON_BEGIN(system_reset_idle_common)
 	b	pnv_powersave_wakeup
 #endif
 
+/*
+ * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * the right thing. We do not want to reconcile because that goes
+ * through irq tracing which we don't want in NMI.
+ *
+ * Save PACAIRQHAPPENED because some code will do a hard disable
+ * (e.g., xmon). So we want to restore this back to where it was
+ * when we return. DAR is unused in the stack, so save it there.
+ */
+#define ADD_RECONCILE_NMI						\
+	li	r10,IRQS_ALL_DISABLED;					\
+	stb	r10,PACAIRQSOFTMASK(r13);				\
+	lbz	r10,PACAIRQHAPPENED(r13);				\
+	std	r10,_DAR(r1)
+
 EXC_COMMON_BEGIN(system_reset_common)
 	/*
 	 * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
@@ -157,16 +172,56 @@ EXC_COMMON_BEGIN(system_reset_common)
 	subi	r1,r1,INT_FRAME_SIZE
 	EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
 			system_reset, system_reset_exception,
-			ADD_NVGPRS;ADD_RECONCILE)
+			ADD_NVGPRS;ADD_RECONCILE_NMI)
+
+	/* This (and MCE) can be simplified with mtmsrd L=1 */
+	/* Clear MSR_RI before setting SRR0 and SRR1. */
+	li	r0,MSR_RI
+	mfmsr	r9
+	andc	r9,r9,r0
+	mtmsrd	r9,1
 
 	/*
-	 * The stack is no longer in use, decrement in_nmi.
+	 * MSR_RI is clear, now we can decrement paca->in_nmi.
 	 */
 	lhz	r10,PACA_IN_NMI(r13)
 	subi	r10,r10,1
 	sth	r10,PACA_IN_NMI(r13)
 
-	b	ret_from_except
+	/*
+	 * Restore soft mask settings.
+	 */
+	ld	r10,_DAR(r1)
+	stb	r10,PACAIRQHAPPENED(r13)
+	ld	r10,SOFTE(r1)
+	stb	r10,PACAIRQSOFTMASK(r13)
+
+	/*
+	 * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
+	 * Should share common bits...
+	 */
+
+	/* Move original SRR0 and SRR1 into the respective regs */
+	ld	r9,_MSR(r1)
+	mtspr	SPRN_SRR1,r9
+	ld	r3,_NIP(r1)
+	mtspr	SPRN_SRR0,r3
+	ld	r9,_CTR(r1)
+	mtctr	r9
+	ld	r9,_XER(r1)
+	mtxer	r9
+	ld	r9,_LINK(r1)
+	mtlr	r9
+	REST_GPR(0, r1)
+	REST_8GPRS(2, r1)
+	REST_GPR(10, r1)
+	ld	r11,_CCR(r1)
+	mtcr	r11
+	REST_GPR(11, r1)
+	REST_2GPRS(12, r1)
+	/* restore original r1. */
+	ld	r1,GPR1(r1)
+	RFI_TO_USER_OR_KERNEL
 
 #ifdef CONFIG_PPC_PSERIES
 /*
@@ -621,7 +676,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
 	mtlr	r10
 
-	beq-	8f		/* if bad address, make full stack frame */
+	/*
+	 * Large address, check whether we have to allocate new contexts.
+	 */
+	beq-	8f
 
 	bne-	cr5,2f		/* if unrecoverable exception, oops */
 
@@ -629,14 +687,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
 	bne	cr4,1f		/* returning to kernel */
 
-.machine	push
-.machine	"power4"
 	mtcrf	0x80,r9
 	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
 	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
 	mtcrf	0x02,r9		/* I/D indication is in cr6 */
 	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
-.machine	pop
 
 	RESTORE_CTR(r9, PACA_EXSLB)
 	RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -649,14 +704,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	RFI_TO_USER
 	b	.	/* prevent speculative execution */
 1:
-.machine	push
-.machine	"power4"
 	mtcrf	0x80,r9
 	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */
 	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */
 	mtcrf	0x02,r9		/* I/D indication is in cr6 */
 	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
-.machine	pop
 
 	RESTORE_CTR(r9, PACA_EXSLB)
 	RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -685,7 +737,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	mr	r3,r12
 	mfspr	r11,SPRN_SRR0
 	mfspr	r12,SPRN_SRR1
-	LOAD_HANDLER(r10,bad_addr_slb)
+	LOAD_HANDLER(r10, large_addr_slb)
 	mtspr	SPRN_SRR0,r10
 	ld	r10,PACAKMSR(r13)
 	mtspr	SPRN_SRR1,r10
@@ -700,7 +752,7 @@ EXC_COMMON_BEGIN(unrecov_slb)
 	bl	unrecoverable_exception
 	b	1b
 
-EXC_COMMON_BEGIN(bad_addr_slb)
+EXC_COMMON_BEGIN(large_addr_slb)
 	EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
 	RECONCILE_IRQ_STATE(r10, r11)
 	ld	r3, PACA_EXSLB+EX_DAR(r13)
@@ -710,7 +762,7 @@ EXC_COMMON_BEGIN(bad_addr_slb)
 	std	r10, _TRAP(r1)
 2:	bl	save_nvgprs
 	addi	r3, r1, STACK_FRAME_OVERHEAD
-	bl	slb_miss_bad_addr
+	bl	slb_miss_large_addr
 	b	ret_from_except
 
 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1273,7 +1325,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
 	bne+	denorm_assist
 #endif
 
-	KVMTEST_PR(0x1500)
+	KVMTEST_HV(0x1500)
 	EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
 EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
 
@@ -1285,7 +1337,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
 EXC_VIRT_NONE(0x5500, 0x100)
 #endif
 
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500)
+TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
 
 #ifdef CONFIG_PPC_DENORMALISATION
 TRAMP_REAL_BEGIN(denorm_assist)
@@ -1466,7 +1518,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
 	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
 	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
 	mtctr	r11
-	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
 	/* order ld/st prior to dcbt stop all streams with flushing */
 	sync
@@ -1506,7 +1558,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
 	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
 	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
 	mtctr	r11
-	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
 	/* order ld/st prior to dcbt stop all streams with flushing */
 	sync
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index a61151a6ea5e..6eca15f25c73 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -392,19 +392,20 @@ generic_secondary_common_init:
 	 * physical cpu id in r24, we need to search the pacas to find
 	 * which logical id maps to our physical one.
 	 */
-	LOAD_REG_ADDR(r13, paca)	/* Load paca pointer		 */
-	ld	r13,0(r13)		/* Get base vaddr of paca array	 */
 #ifndef CONFIG_SMP
-	addi	r13,r13,PACA_SIZE	/* know r13 if used accidentally */
 	b	kexec_wait		/* wait for next kernel if !SMP	 */
 #else
+	LOAD_REG_ADDR(r8, paca_ptrs)	/* Load paca_ptrs pointe	 */
+	ld	r8,0(r8)		/* Get base vaddr of array	 */
 	LOAD_REG_ADDR(r7, nr_cpu_ids)	/* Load nr_cpu_ids address       */
 	lwz	r7,0(r7)		/* also the max paca allocated 	 */
 	li	r5,0			/* logical cpu id                */
-1:	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
+1:
+	sldi	r9,r5,3			/* get paca_ptrs[] index from cpu id */
+	ldx	r13,r9,r8		/* r13 = paca_ptrs[cpu id]       */
+	lhz	r6,PACAHWCPUID(r13)	/* Load HW procid from paca      */
 	cmpw	r6,r24			/* Compare to our id             */
 	beq	2f
-	addi	r13,r13,PACA_SIZE	/* Loop to next PACA on miss     */
 	addi	r5,r5,1
 	cmpw	r5,r7			/* Check if more pacas exist     */
 	blt	1b
@@ -756,10 +757,10 @@ _GLOBAL(pmac_secondary_start)
 	mtmsrd	r3			/* RI on */
 
 	/* Set up a paca value for this processor. */
-	LOAD_REG_ADDR(r4,paca)		/* Load paca pointer		*/
-	ld	r4,0(r4)		/* Get base vaddr of paca array	*/
-	mulli	r13,r24,PACA_SIZE	/* Calculate vaddr of right paca */
-	add	r13,r13,r4		/* for this processor.		*/
+	LOAD_REG_ADDR(r4,paca_ptrs)	/* Load paca pointer		*/
+	ld	r4,0(r4)		/* Get base vaddr of paca_ptrs array */
+	sldi	r5,r24,3		/* get paca_ptrs[] index from cpu id */
+	ldx	r13,r5,r4		/* r13 = paca_ptrs[cpu id]       */
 	SET_PACA(r13)			/* Save vaddr of paca in an SPRG*/
 
 	/* Mark interrupts soft and hard disabled (they might be enabled
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 53b9c1dfd7d9..4c1012b80d3b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -33,6 +33,7 @@
 #include <asm/hw_breakpoint.h>
 #include <asm/processor.h>
 #include <asm/sstep.h>
+#include <asm/debug.h>
 #include <linux/uaccess.h>
 
 /*
@@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
 	 * 'symbolsize' should satisfy the check below.
 	 */
+	if (!ppc_breakpoint_available())
+		return -ENODEV;
 	length_max = 8; /* DABR */
 	if (cpu_has_feature(CPU_FTR_DAWR)) {
 		length_max = 512 ; /* 64 doublewords */
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 01e1c1997893..79d005445c6c 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -325,12 +325,6 @@ enter_winkle:
  * r3 - PSSCR value corresponding to the requested stop state.
  */
 power_enter_stop:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	/* Tell KVM we're entering idle */
-	li	r4,KVM_HWTHREAD_IN_IDLE
-	/* DO THIS IN REAL MODE!  See comment above. */
-	stb	r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
 /*
  * Check if we are executing the lite variant with ESL=EC=0
  */
@@ -339,6 +333,7 @@ power_enter_stop:
 	bne	 .Lhandle_esl_ec_set
 	PPC_STOP
 	li	r3,0  /* Since we didn't lose state, return 0 */
+	std	r3, PACA_REQ_PSSCR(r13)
 
 	/*
 	 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
@@ -427,13 +422,49 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
 /*
  * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
  * r3 contains desired PSSCR register value.
+ *
+ * Offline (CPU unplug) case also must notify KVM that the CPU is
+ * idle.
  */
+_GLOBAL(power9_offline_stop)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/*
+	 * Tell KVM we're entering idle.
+	 * This does not have to be done in real mode because the P9 MMU
+	 * is independent per-thread. Some steppings share radix/hash mode
+	 * between threads, but in that case KVM has a barrier sync in real
+	 * mode before and after switching between radix and hash.
+	 */
+	li	r4,KVM_HWTHREAD_IN_IDLE
+	stb	r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+	/* fall through */
+
 _GLOBAL(power9_idle_stop)
 	std	r3, PACA_REQ_PSSCR(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+	sync
+	lwz	r5, PACA_DONT_STOP(r13)
+	cmpwi	r5, 0
+	bne	1f
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+#endif
 	mtspr 	SPRN_PSSCR,r3
 	LOAD_REG_ADDR(r4,power_enter_stop)
 	b	pnv_powersave_common
 	/* No return */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+1:
+	/*
+	 * We get here when TM / thread reconfiguration bug workaround
+	 * code wants to get the CPU into SMT4 mode, and therefore
+	 * we are being asked not to stop.
+	 */
+	li	r3, 0
+	std	r3, PACA_REQ_PSSCR(r13)
+	blr		/* return 0 for wakeup cause / SRR1 value */
+#endif
 
 /*
  * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
@@ -520,6 +551,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 	mr	r3,r12
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	lbz	r0,HSTATE_HWTHREAD_STATE(r13)
+	cmpwi	r0,KVM_HWTHREAD_IN_KERNEL
+	beq	1f
 	li	r0,KVM_HWTHREAD_IN_KERNEL
 	stb	r0,HSTATE_HWTHREAD_STATE(r13)
 	/* Order setting hwthread_state vs. testing hwthread_req */
@@ -584,6 +618,8 @@ FTR_SECTION_ELSE_NESTED(71)
 	mfspr	r5, SPRN_PSSCR
 	rldicl  r5,r5,4,60
 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
+	li	r0, 0		/* clear requested_psscr to say we're awake */
+	std	r0, PACA_REQ_PSSCR(r13)
 	cmpd	cr4,r5,r4
 	bge	cr4,pnv_wakeup_tb_loss /* returns to caller */
 
@@ -834,6 +870,8 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_PTCR,r4
 	ld	r4,_RPR(r1)
 	mtspr	SPRN_RPR,r4
+	ld	r4,_AMOR(r1)
+	mtspr	SPRN_AMOR,r4
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 	ld	r4,_TSCR(r1)
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index aab456ed2a00..5ac84efc6ede 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -45,12 +45,32 @@ u64 ioread64(void __iomem *addr)
 {
 	return readq(addr);
 }
+u64 ioread64_lo_hi(void __iomem *addr)
+{
+	return readq(addr);
+}
+u64 ioread64_hi_lo(void __iomem *addr)
+{
+	return readq(addr);
+}
 u64 ioread64be(void __iomem *addr)
 {
 	return readq_be(addr);
 }
+u64 ioread64be_lo_hi(void __iomem *addr)
+{
+	return readq_be(addr);
+}
+u64 ioread64be_hi_lo(void __iomem *addr)
+{
+	return readq_be(addr);
+}
 EXPORT_SYMBOL(ioread64);
+EXPORT_SYMBOL(ioread64_lo_hi);
+EXPORT_SYMBOL(ioread64_hi_lo);
 EXPORT_SYMBOL(ioread64be);
+EXPORT_SYMBOL(ioread64be_lo_hi);
+EXPORT_SYMBOL(ioread64be_hi_lo);
 #endif /* __powerpc64__ */
 
 void iowrite8(u8 val, void __iomem *addr)
@@ -83,12 +103,32 @@ void iowrite64(u64 val, void __iomem *addr)
 {
 	writeq(val, addr);
 }
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+	writeq(val, addr);
+}
+void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+	writeq(val, addr);
+}
 void iowrite64be(u64 val, void __iomem *addr)
 {
 	writeq_be(val, addr);
 }
+void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+	writeq_be(val, addr);
+}
+void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+	writeq_be(val, addr);
+}
 EXPORT_SYMBOL(iowrite64);
+EXPORT_SYMBOL(iowrite64_lo_hi);
+EXPORT_SYMBOL(iowrite64_hi_lo);
 EXPORT_SYMBOL(iowrite64be);
+EXPORT_SYMBOL(iowrite64be_lo_hi);
+EXPORT_SYMBOL(iowrite64be_hi_lo);
 #endif /* __powerpc64__ */
 
 /*
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index 9a42309b091a..ba4f18a43ee8 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -572,7 +572,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 {
 	int ret;
 	unsigned int fdt_size;
-	unsigned long kernel_load_addr, purgatory_load_addr;
+	unsigned long kernel_load_addr;
 	unsigned long initrd_load_addr = 0, fdt_load_addr;
 	void *fdt;
 	const void *slave_code;
@@ -580,6 +580,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 	struct elf_info elf_info;
 	struct kexec_buf kbuf = { .image = image, .buf_min = 0,
 				  .buf_max = ppc64_rma_size };
+	struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+				  .buf_max = ppc64_rma_size, .top_down = true };
 
 	ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
 	if (ret)
@@ -591,14 +593,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 
 	pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
 
-	ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true,
-				   &purgatory_load_addr);
+	ret = kexec_load_purgatory(image, &pbuf);
 	if (ret) {
 		pr_err("Loading purgatory failed.\n");
 		goto out;
 	}
 
-	pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+	pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
 	if (initrd != NULL) {
 		kbuf.buffer = initrd;
@@ -657,7 +658,7 @@ out:
 	return ret ? ERR_PTR(ret) : fdt;
 }
 
-struct kexec_file_ops kexec_elf64_ops = {
+const struct kexec_file_ops kexec_elf64_ops = {
 	.probe = elf64_probe,
 	.load = elf64_load,
 };
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index ca5d5a081e75..e4c5bf33970b 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -455,29 +455,33 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-	regs->nip = orig_ret_address;
+
 	/*
-	 * Make LR point to the orig_ret_address.
-	 * When the 'nop' inside the kretprobe_trampoline
-	 * is optimized, we can do a 'blr' after executing the
-	 * detour buffer code.
+	 * We get here through one of two paths:
+	 * 1. by taking a trap -> kprobe_handler() -> here
+	 * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here
+	 *
+	 * When going back through (1), we need regs->nip to be setup properly
+	 * as it is used to determine the return address from the trap.
+	 * For (2), since nip is not honoured with optprobes, we instead setup
+	 * the link register properly so that the subsequent 'blr' in
+	 * kretprobe_trampoline jumps back to the right instruction.
+	 *
+	 * For nip, we should set the address to the previous instruction since
+	 * we end up emulating it in kprobe_handler(), which increments the nip
+	 * again.
 	 */
+	regs->nip = orig_ret_address - 4;
 	regs->link = orig_ret_address;
 
-	reset_current_kprobe();
 	kretprobe_hash_unlock(current, &flags);
-	preempt_enable_no_resched();
 
 	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
 		hlist_del(&ri->hlist);
 		kfree(ri);
 	}
-	/*
-	 * By returning a non-zero value, we are telling
-	 * kprobe_handler() that we don't want the post_handler
-	 * to run (and have re-enabled preemption)
-	 */
-	return 1;
+
+	return 0;
 }
 NOKPROBE_SYMBOL(trampoline_probe_handler);
 
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 49d34d7271e7..1044bf15d5ed 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -168,24 +168,25 @@ static void kexec_prepare_cpus_wait(int wait_state)
 	 * are correctly onlined.  If somehow we start a CPU on boot with RTAS
 	 * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
 	 * time, the boot CPU will timeout.  If it does eventually execute
-	 * stuff, the secondary will start up (paca[].cpu_start was written) and
-	 * get into a peculiar state.  If the platform supports
-	 * smp_ops->take_timebase(), the secondary CPU will probably be spinning
-	 * in there.  If not (i.e. pseries), the secondary will continue on and
-	 * try to online itself/idle/etc. If it survives that, we need to find
-	 * these possible-but-not-online-but-should-be CPUs and chaperone them
-	 * into kexec_smp_wait().
+	 * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+	 * written) and get into a peculiar state.
+	 * If the platform supports smp_ops->take_timebase(), the secondary CPU
+	 * will probably be spinning in there.  If not (i.e. pseries), the
+	 * secondary will continue on and try to online itself/idle/etc. If it
+	 * survives that, we need to find these
+	 * possible-but-not-online-but-should-be CPUs and chaperone them into
+	 * kexec_smp_wait().
 	 */
 	for_each_online_cpu(i) {
 		if (i == my_cpu)
 			continue;
 
-		while (paca[i].kexec_state < wait_state) {
+		while (paca_ptrs[i]->kexec_state < wait_state) {
 			barrier();
 			if (i != notified) {
 				printk(KERN_INFO "kexec: waiting for cpu %d "
 				       "(physical %d) to enter %i state\n",
-				       i, paca[i].hw_cpu_id, wait_state);
+				       i, paca_ptrs[i]->hw_cpu_id, wait_state);
 				notified = i;
 			}
 		}
@@ -322,18 +323,24 @@ void default_machine_kexec(struct kimage *image)
 	kexec_stack.thread_info.cpu = current_thread_info()->cpu;
 
 	/* We need a static PACA, too; copy this CPU's PACA over and switch to
-	 * it.  Also poison per_cpu_offset to catch anyone using non-static
-	 * data.
+	 * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+	 * non-static data.
 	 */
 	memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
 	kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
-	paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
-		kexec_paca.paca_index;
+#ifdef CONFIG_PPC_PSERIES
+	kexec_paca.lppaca_ptr = NULL;
+#endif
+	paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
 	setup_paca(&kexec_paca);
 
-	/* XXX: If anyone does 'dynamic lppacas' this will also need to be
-	 * switched to a static version!
+	/*
+	 * The lppaca should be unregistered at this point so the HV won't
+	 * touch it. In the case of a crash, none of the lppacas are
+	 * unregistered so there is not much we can do about it here.
 	 */
+
 	/*
 	 * On Book3S, the copy must happen with the MMU off if we are either
 	 * using Radix page tables or we are not in an LPAR since we can
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index e4395f937d63..0bd23dc789a4 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -31,52 +31,19 @@
 
 #define SLAVE_CODE_SIZE		256
 
-static struct kexec_file_ops *kexec_file_loaders[] = {
+const struct kexec_file_ops * const kexec_file_loaders[] = {
 	&kexec_elf64_ops,
+	NULL
 };
 
 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
 				  unsigned long buf_len)
 {
-	int i, ret = -ENOEXEC;
-	struct kexec_file_ops *fops;
-
 	/* We don't support crash kernels yet. */
 	if (image->type == KEXEC_TYPE_CRASH)
-		return -ENOTSUPP;
-
-	for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
-		fops = kexec_file_loaders[i];
-		if (!fops || !fops->probe)
-			continue;
-
-		ret = fops->probe(buf, buf_len);
-		if (!ret) {
-			image->fops = fops;
-			return ret;
-		}
-	}
-
-	return ret;
-}
-
-void *arch_kexec_kernel_image_load(struct kimage *image)
-{
-	if (!image->fops || !image->fops->load)
-		return ERR_PTR(-ENOEXEC);
-
-	return image->fops->load(image, image->kernel_buf,
-				 image->kernel_buf_len, image->initrd_buf,
-				 image->initrd_buf_len, image->cmdline_buf,
-				 image->cmdline_buf_len);
-}
-
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
-	if (!image->fops || !image->fops->cleanup)
-		return 0;
+		return -EOPNOTSUPP;
 
-	return image->fops->cleanup(image->image_loader_data);
+	return kexec_image_probe_default(image, buf, buf_len);
 }
 
 /**
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 3280953a82cf..fa267e94090a 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range)
 	blr
 EXPORT_SYMBOL(flush_dcache_range)
 
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory 
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
- 	ld	r10,PPC64_CACHES@toc(r2)
-	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
-	addi	r5,r7,-1
-	andc	r6,r3,r5		/* round low to line bdy */
-	subf	r8,r6,r4		/* compute length */
-	add	r8,r8,r5		/* ensure we get enough */
-	lwz	r9,DCACHEL1LOGBLOCKSIZE(r10)	/* Get log-2 of dcache block size */
-	srw.	r8,r8,r9		/* compute line count */
-	beqlr				/* nothing to do? */
-	mfmsr	r5			/* Disable MMU Data Relocation */
-	ori	r0,r5,MSR_DR
-	xori	r0,r0,MSR_DR
-	sync
-	mtmsr	r0
-	sync
-	isync
-	mtctr	r8
-0:	dcbst	0,r6
-	add	r6,r6,r7
-	bdnz	0b
-	sync
-	isync
-	mtmsr	r5			/* Re-enable MMU Data Relocation */
-	sync
-	isync
-	blr
-
 _GLOBAL(flush_inval_dcache_range)
  	ld	r10,PPC64_CACHES@toc(r2)
 	lwz	r7,DCACHEL1BLOCKSIZE(r10)	/* Get dcache block size */
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 496d6393bd41..ba681dac7b46 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part,
 
 	tmp_index = part->index;
 
-	rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info),
-				&tmp_index);
+	rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index);
 	if (rc <= 0) {
 		pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
 		return rc;
@@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff,
 	tmp_index = part->index;
 
 	if (part->os_partition) {
-		rc = ppc_md.nvram_read((char *)&info,
-					sizeof(struct err_log_info),
-					&tmp_index);
+		rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index);
 		if (rc <= 0) {
 			pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
 			return rc;
@@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void)
 			       "detected: 0-length partition\n");
 			goto out;
 		}
-		tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+		tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL);
 		err = -ENOMEM;
 		if (!tmp_part) {
 			printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 95ffedf14885..0ee3e6d50f28 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -20,116 +20,105 @@
 
 #include "setup.h"
 
-#ifdef CONFIG_PPC_BOOK3S
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+				unsigned long limit, int cpu)
+{
+	unsigned long pa;
+	int nid;
+
+	/*
+	 * boot_cpuid paca is allocated very early before cpu_to_node is up.
+	 * Set bottom-up mode, because the boot CPU should be on node-0,
+	 * which will put its paca in the right place.
+	 */
+	if (cpu == boot_cpuid) {
+		nid = -1;
+		memblock_set_bottom_up(true);
+	} else {
+		nid = early_cpu_to_node(cpu);
+	}
+
+	pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
+	if (!pa) {
+		pa = memblock_alloc_base(size, align, limit);
+		if (!pa)
+			panic("cannot allocate paca data");
+	}
+
+	if (cpu == boot_cpuid)
+		memblock_set_bottom_up(false);
+
+	return __va(pa);
+}
+
+#ifdef CONFIG_PPC_PSERIES
 
 /*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary.  The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary.  The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
  */
-struct lppaca lppaca[] = {
-	[0 ... (NR_LPPACAS-1)] = {
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+	BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+	*lppaca = (struct lppaca) {
 		.desc = cpu_to_be32(0xd397d781),	/* "LpPa" */
-		.size = cpu_to_be16(sizeof(struct lppaca)),
+		.size = cpu_to_be16(0x400),
 		.fpregs_in_use = 1,
 		.slb_count = cpu_to_be16(64),
 		.vmxregs_in_use = 0,
-		.page_ins = 0,
-	},
+		.page_ins = 0, };
 };
 
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
-{
-	if (nr_cpus <= NR_LPPACAS)
-		return;
-
-	lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
-				 (nr_cpus - NR_LPPACAS));
-	extra_lppacas = __va(memblock_alloc_base(lppaca_size,
-						 PAGE_SIZE, limit));
-}
-
-static struct lppaca * __init new_lppaca(int cpu)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
 {
 	struct lppaca *lp;
+	size_t size = 0x400;
 
-	if (cpu < NR_LPPACAS)
-		return &lppaca[cpu];
+	BUILD_BUG_ON(size < sizeof(struct lppaca));
+
+	if (early_cpu_has_feature(CPU_FTR_HVMODE))
+		return NULL;
 
-	lp = extra_lppacas + (cpu - NR_LPPACAS);
-	*lp = lppaca[0];
+	lp = alloc_paca_data(size, 0x400, limit, cpu);
+	init_lppaca(lp);
 
 	return lp;
 }
-
-static void __init free_lppacas(void)
-{
-	long new_size = 0, nr;
-
-	if (!lppaca_size)
-		return;
-	nr = num_possible_cpus() - NR_LPPACAS;
-	if (nr > 0)
-		new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
-	if (new_size >= lppaca_size)
-		return;
-
-	memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
-	lppaca_size = new_size;
-}
-
-#else
-
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
-
 #endif /* CONFIG_PPC_BOOK3S */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 
 /*
- * 3 persistent SLBs are registered here.  The buffer will be zero
+ * 3 persistent SLBs are allocated here.  The buffer will be zero
  * initially, hence will all be invaild until we actually write them.
  *
  * If you make the number of persistent SLB entries dynamic, please also
  * update PR KVM to flush and restore them accordingly.
  */
-static struct slb_shadow * __initdata slb_shadow;
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit)
-{
-	int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
-
-	if (early_radix_enabled())
-		return;
-
-	slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
-	memset(slb_shadow, 0, size);
-}
-
-static struct slb_shadow * __init init_slb_shadow(int cpu)
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
 {
 	struct slb_shadow *s;
 
-	if (early_radix_enabled())
-		return NULL;
-
-	s = &slb_shadow[cpu];
+	if (cpu != boot_cpuid) {
+		/*
+		 * Boot CPU comes here before early_radix_enabled
+		 * is parsed (e.g., for disable_radix). So allocate
+		 * always and this will be fixed up in free_unused_pacas.
+		 */
+		if (early_radix_enabled())
+			return NULL;
+	}
 
-	/*
-	 * When we come through here to initialise boot_paca, the slb_shadow
-	 * buffers are not allocated yet. That's OK, we'll get one later in
-	 * boot, but make sure we don't corrupt memory at 0.
-	 */
-	if (!slb_shadow)
-		return NULL;
+	s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
+	memset(s, 0, sizeof(*s));
 
 	s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
 	s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -137,10 +126,6 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
 	return s;
 }
 
-#else /* !CONFIG_PPC_BOOK3S_64 */
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 /* The Paca is an array with one entry per processor.  Each contains an
@@ -152,14 +137,15 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
  * processors.  The processor VPD array needs one entry per physical
  * processor (not thread).
  */
-struct paca_struct *paca;
-EXPORT_SYMBOL(paca);
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
 
 void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 {
-#ifdef CONFIG_PPC_BOOK3S
-	new_paca->lppaca_ptr = new_lppaca(cpu);
-#else
+#ifdef CONFIG_PPC_PSERIES
+	new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E
 	new_paca->kernel_pgd = swapper_pg_dir;
 #endif
 	new_paca->lock_token = 0x8000;
@@ -173,7 +159,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 	new_paca->__current = &init_task;
 	new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
 #ifdef CONFIG_PPC_BOOK3S_64
-	new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
+	new_paca->slb_shadow_ptr = NULL;
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E
@@ -203,12 +189,25 @@ void setup_paca(struct paca_struct *new_paca)
 
 }
 
-static int __initdata paca_size;
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
+
+void __init allocate_paca_ptrs(void)
+{
+	paca_nr_cpu_ids = nr_cpu_ids;
+
+	paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+	paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+	memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
 
-void __init allocate_pacas(void)
+void __init allocate_paca(int cpu)
 {
 	u64 limit;
-	int cpu;
+	struct paca_struct *paca;
+
+	BUG_ON(cpu >= paca_nr_cpu_ids);
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	/*
@@ -220,40 +219,44 @@ void __init allocate_pacas(void)
 	limit = ppc64_rma_size;
 #endif
 
-	paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-
-	paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
-	memset(paca, 0, paca_size);
-
-	printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n",
-		paca_size, nr_cpu_ids, paca);
-
-	allocate_lppacas(nr_cpu_ids, limit);
-
-	allocate_slb_shadows(nr_cpu_ids, limit);
+	paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+				limit, cpu);
+	paca_ptrs[cpu] = paca;
+	memset(paca, 0, sizeof(struct paca_struct));
 
-	/* Can't use for_each_*_cpu, as they aren't functional yet */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
-		initialise_paca(&paca[cpu], cpu);
+	initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+	paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+	paca_struct_size += sizeof(struct paca_struct);
 }
 
 void __init free_unused_pacas(void)
 {
-	int new_size;
-
-	new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+	int new_ptrs_size;
 
-	if (new_size >= paca_size)
-		return;
+	new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+	if (new_ptrs_size < paca_ptrs_size)
+		memblock_free(__pa(paca_ptrs) + new_ptrs_size,
+					paca_ptrs_size - new_ptrs_size);
 
-	memblock_free(__pa(paca) + new_size, paca_size - new_size);
+	paca_nr_cpu_ids = nr_cpu_ids;
+	paca_ptrs_size = new_ptrs_size;
 
-	printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
-		paca_size - new_size);
-
-	paca_size = new_size;
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (early_radix_enabled()) {
+		/* Ugly fixup, see new_slb_shadow() */
+		memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+				sizeof(struct slb_shadow));
+		paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+	}
+#endif
 
-	free_lppacas();
+	printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+			paca_ptrs_size + paca_struct_size, nr_cpu_ids);
 }
 
 void copy_mm_to_paca(struct mm_struct *mm)
@@ -265,7 +268,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
 #ifdef CONFIG_PPC_MM_SLICES
 	VM_BUG_ON(!mm->context.slb_addr_limit);
 	get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
-	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+	memcpy(&get_paca()->mm_ctx_low_slices_psize,
+	       &context->low_slices_psize, sizeof(context->low_slices_psize));
 	memcpy(&get_paca()->mm_ctx_high_slices_psize,
 	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
 #else /* CONFIG_PPC_MM_SLICES */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1738c4127b32..1237f13fed51 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits)
 EXPORT_SYMBOL(__msr_check_and_clear);
 
 #ifdef CONFIG_PPC_FPU
-void __giveup_fpu(struct task_struct *tsk)
+static void __giveup_fpu(struct task_struct *tsk)
 {
 	unsigned long msr;
 
@@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs)
 	regs->msr = msr;
 }
 
-void save_all(struct task_struct *tsk)
+static void save_all(struct task_struct *tsk)
 {
 	unsigned long usermsr;
 
@@ -718,7 +718,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
 {
 	thread->hw_brk.address = 0;
 	thread->hw_brk.type = 0;
-	set_breakpoint(&thread->hw_brk);
+	if (ppc_breakpoint_available())
+		set_breakpoint(&thread->hw_brk);
 }
 #endif /* !CONFIG_HAVE_HW_BREAKPOINT */
 #endif	/* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -815,9 +816,14 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
 	memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
 
 	if (cpu_has_feature(CPU_FTR_DAWR))
+		// Power8 or later
 		set_dawr(brk);
-	else
+	else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		// Power7 or earlier
 		set_dabr(brk);
+	else
+		// Shouldn't happen due to higher level checks
+		WARN_ON_ONCE(1);
 }
 
 void set_breakpoint(struct arch_hw_breakpoint *brk)
@@ -827,6 +833,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk)
 	preempt_enable();
 }
 
+/* Check if we have DAWR or DABR hardware */
+bool ppc_breakpoint_available(void)
+{
+	if (cpu_has_feature(CPU_FTR_DAWR))
+		return true; /* POWER8 DAWR */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return false; /* POWER9 with DAWR disabled */
+	/* DABR: Everything but POWER8 and POWER9 */
+	return true;
+}
+EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+
 #ifdef CONFIG_PPC64
 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
 #endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4dffef947b8a..9dbed488aba1 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node)
 
 static void __init check_cpu_feature_properties(unsigned long node)
 {
-	unsigned long i;
+	int i;
 	struct feature_property *fp = feature_properties;
 	const __be32 *prop;
 
-	for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
+	for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) {
 		prop = of_get_flat_dt_prop(node, fp->name, NULL);
 		if (prop && be32_to_cpup(prop) >= fp->min_value) {
 			cur_cpu_spec->cpu_features |= fp->cpu_feature;
@@ -365,7 +365,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	DBG("boot cpu: logical %d physical %d\n", found,
 	    be32_to_cpu(intserv[found_thread]));
 	boot_cpuid = found;
-	set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
 
 	/*
 	 * PAPR defines "logical" PVR values for cpus that
@@ -403,7 +402,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 		cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
 	else if (!dt_cpu_ftrs_in_use())
 		cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+	allocate_paca(boot_cpuid);
 #endif
+	set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
 
 	return 0;
 }
@@ -744,7 +745,7 @@ void __init early_init_devtree(void *params)
 	 * FIXME .. and the initrd too? */
 	move_device_tree();
 
-	allocate_pacas();
+	allocate_paca_ptrs();
 
 	DBG("Scanning CPUs ...\n");
 
@@ -874,5 +875,15 @@ EXPORT_SYMBOL(cpu_to_chip_id);
 
 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 {
+#ifdef CONFIG_SMP
+	/*
+	 * Early firmware scanning must use this rather than
+	 * get_hard_smp_processor_id because we don't have pacas allocated
+	 * until memory topology is discovered.
+	 */
+	if (cpu_to_phys_id != NULL)
+		return (int)phys_id == cpu_to_phys_id[cpu];
+#endif
+
 	return (int)phys_id == get_hard_smp_processor_id(cpu);
 }
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index acf4b2e0530c..f9d6befb55a6 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -171,7 +171,7 @@ static unsigned long __initdata prom_tce_alloc_start;
 static unsigned long __initdata prom_tce_alloc_end;
 #endif
 
-static bool __initdata prom_radix_disable;
+static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
 
 struct platform_support {
 	bool hash_mmu;
@@ -641,9 +641,19 @@ static void __init early_cmdline_parse(void)
 
 	opt = strstr(prom_cmd_line, "disable_radix");
 	if (opt) {
-		prom_debug("Radix disabled from cmdline\n");
-		prom_radix_disable = true;
+		opt += 13;
+		if (*opt && *opt == '=') {
+			bool val;
+
+			if (kstrtobool(++opt, &val))
+				prom_radix_disable = false;
+			else
+				prom_radix_disable = val;
+		} else
+			prom_radix_disable = true;
 	}
+	if (prom_radix_disable)
+		prom_debug("Radix disabled from cmdline\n");
 }
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -1110,7 +1120,8 @@ static void __init prom_check_platform_support(void)
 		}
 	}
 
-	if (supported.radix_mmu && supported.radix_gtse) {
+	if (supported.radix_mmu && supported.radix_gtse &&
+	    IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
 		/* Radix preferred - but we require GTSE for now */
 		prom_debug("Asking for radix with GTSE\n");
 		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
@@ -1809,16 +1820,8 @@ static void __init prom_initialize_tce_table(void)
 		 * size to 4 MB.  This is enough to map 2GB of PCI DMA space.
 		 * By doing this, we avoid the pitfalls of trying to DMA to
 		 * MMIO space and the DMA alias hole.
-		 *
-		 * On POWER4, firmware sets the TCE region by assuming
-		 * each TCE table is 8MB. Using this memory for anything
-		 * else will impact performance, so we always allocate 8MB.
-		 * Anton
 		 */
-		if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p))
-			minsize = 8UL << 20;
-		else
-			minsize = 4UL << 20;
+		minsize = 4UL << 20;
 
 		/* Align to the greater of the align or size */
 		align = max(minalign, minsize);
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 12640f7e726b..acb6b9226352 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -19,7 +19,7 @@
 WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
 _end enter_prom memcpy memset reloc_offset __secondary_hold
 __secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
 reloc_got2 kernstart_addr memstart_addr linux_banner _stext
 __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
 
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index ca72d7391d40..d23cf632edf0 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -41,6 +41,7 @@
 #include <asm/switch_to.h>
 #include <asm/tm.h>
 #include <asm/asm-prototypes.h>
+#include <asm/debug.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	struct perf_event_attr attr;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #ifndef CONFIG_PPC_ADV_DEBUG_REGS
+	bool set_bp = true;
 	struct arch_hw_breakpoint hw_brk;
 #endif
 
@@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	hw_brk.address = data & (~HW_BRK_TYPE_DABR);
 	hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
 	hw_brk.len = 8;
+	set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	bp = thread->ptrace_bps[0];
-	if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
+	if (!set_bp) {
 		if (bp) {
 			unregister_hw_breakpoint(bp);
 			thread->ptrace_bps[0] = NULL;
@@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 		return PTR_ERR(bp);
 	}
 
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+	if (set_bp && (!ppc_breakpoint_available()))
+		return -ENODEV;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 	task->thread.hw_brk = hw_brk;
 #else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
 	if (child->thread.hw_brk.address)
 		return -ENOSPC;
 
+	if (!ppc_breakpoint_available())
+		return -ENODEV;
+
 	child->thread.hw_brk = brk;
 
 	return 1;
@@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 #else /* !CONFIG_PPC_ADV_DEBUG_REGS */
 		dbginfo.num_instruction_bps = 0;
-		dbginfo.num_data_bps = 1;
+		if (ppc_breakpoint_available())
+			dbginfo.num_data_bps = 1;
+		else
+			dbginfo.num_data_bps = 0;
 		dbginfo.num_condition_regs = 0;
 #ifdef CONFIG_PPC64
 		dbginfo.data_bp_alignment = 8;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644
index 000000000000..bab5a27ea805
--- /dev/null
+++ b/arch/powerpc/kernel/security.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/seq_buf.h>
+
+#include <asm/security_features.h>
+
+
+unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool thread_priv;
+
+	thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (rfi_flush || thread_priv) {
+		struct seq_buf s;
+		seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+		seq_buf_printf(&s, "Mitigation: ");
+
+		if (rfi_flush)
+			seq_buf_printf(&s, "RFI Flush");
+
+		if (rfi_flush && thread_priv)
+			seq_buf_printf(&s, ", ");
+
+		if (thread_priv)
+			seq_buf_printf(&s, "L1D private per thread");
+
+		seq_buf_printf(&s, "\n");
+
+		return s.len;
+	}
+
+	if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+	    !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool bcs, ccd, ori;
+	struct seq_buf s;
+
+	seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+	bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+	ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+	ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (bcs || ccd) {
+		seq_buf_printf(&s, "Mitigation: ");
+
+		if (bcs)
+			seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+		if (bcs && ccd)
+			seq_buf_printf(&s, ", ");
+
+		if (ccd)
+			seq_buf_printf(&s, "Indirect branch cache disabled");
+	} else
+		seq_buf_printf(&s, "Vulnerable");
+
+	if (ori)
+		seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+	seq_buf_printf(&s, "\n");
+
+	return s.len;
+}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d73ec518ef80..0af5c11b9e78 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -437,6 +437,8 @@ static void __init cpu_init_thread_core_maps(int tpc)
 }
 
 
+u32 *cpu_to_phys_id = NULL;
+
 /**
  * setup_cpu_maps - initialize the following cpu maps:
  *                  cpu_possible_mask
@@ -463,6 +465,10 @@ void __init smp_setup_cpu_maps(void)
 
 	DBG("smp_setup_cpu_maps()\n");
 
+	cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
+							__alignof__(u32)));
+	memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+
 	for_each_node_by_type(dn, "cpu") {
 		const __be32 *intserv;
 		__be32 cpu_be;
@@ -480,6 +486,7 @@ void __init smp_setup_cpu_maps(void)
 			intserv = of_get_property(dn, "reg", &len);
 			if (!intserv) {
 				cpu_be = cpu_to_be32(cpu);
+				/* XXX: what is this? uninitialized?? */
 				intserv = &cpu_be;	/* assume logical == phys */
 				len = 4;
 			}
@@ -499,8 +506,8 @@ void __init smp_setup_cpu_maps(void)
 						"enable-method", "spin-table");
 
 			set_cpu_present(cpu, avail);
-			set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
 			set_cpu_possible(cpu, true);
+			cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
 			cpu++;
 		}
 
@@ -835,6 +842,23 @@ static __init void print_system_info(void)
 	pr_info("-----------------------------------------------------\n");
 }
 
+#ifdef CONFIG_SMP
+static void smp_setup_pacas(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		allocate_paca(cpu);
+		set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
+	}
+
+	memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+	cpu_to_phys_id = NULL;
+}
+#endif
+
 /*
  * Called into from start_kernel this initializes memblock, which is used
  * to manage page allocation until mem_init is called.
@@ -888,8 +912,8 @@ void __init setup_arch(char **cmdline_p)
 	/* Check the SMT related command line arguments (ppc64). */
 	check_smt_enabled();
 
-	/* On BookE, setup per-core TLB data structures. */
-	setup_tlb_core_data();
+	/* Parse memory topology */
+	mem_topology_setup();
 
 	/*
 	 * Release secondary cpus out of their spinloops at 0x60 now that
@@ -899,6 +923,11 @@ void __init setup_arch(char **cmdline_p)
 	 * so smp_release_cpus() does nothing for them.
 	 */
 #ifdef CONFIG_SMP
+	smp_setup_pacas();
+
+	/* On BookE, setup per-core TLB data structures. */
+	setup_tlb_core_data();
+
 	smp_release_cpus();
 #endif
 
@@ -919,6 +948,8 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_PPC64
 	if (!radix_enabled())
 		init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#elif defined(CONFIG_PPC_8xx)
+	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
 #else
 #error	"context.addr_limit not initialized."
 #endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 3fc11e30308f..d144df54ad40 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -46,13 +46,10 @@ static inline void emergency_stack_init(void) { };
 #endif
 
 #ifdef CONFIG_PPC64
-void record_spr_defaults(void);
-#else
-static inline void record_spr_defaults(void) { };
-#endif
-
-#ifdef CONFIG_PPC64
 u64 ppc64_bolted_size(void);
+
+/* Default SPR values from firmware/kexec */
+extern unsigned long spr_default_dscr;
 #endif
 
 /*
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 51ebc01fff52..74457485574b 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -39,6 +39,7 @@
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
 #include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
 
 #define DBG(fmt...)
 
@@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr)
 }
 
 /* Checks "l2cr=xxxx" command-line option */
-int __init ppc_setup_l2cr(char *str)
+static int __init ppc_setup_l2cr(char *str)
 {
 	if (cpu_has_feature(CPU_FTR_L2CR)) {
 		unsigned long val = simple_strtoul(str, NULL, 0);
@@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str)
 __setup("l2cr=", ppc_setup_l2cr);
 
 /* Checks "l3cr=xxxx" command-line option */
-int __init ppc_setup_l3cr(char *str)
+static int __init ppc_setup_l3cr(char *str)
 {
 	if (cpu_has_feature(CPU_FTR_L3CR)) {
 		unsigned long val = simple_strtoul(str, NULL, 0);
@@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync);
 
 #endif /* CONFIG_NVRAM */
 
-int __init ppc_init(void)
+static int __init ppc_init(void)
 {
 	/* clear the progress line */
 	if (ppc_md.progress)
@@ -192,7 +193,6 @@ int __init ppc_init(void)
 	}
 	return 0;
 }
-
 arch_initcall(ppc_init);
 
 void __init irqstack_early_init(void)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c388cc3357fa..44c30dd38067 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -110,7 +110,7 @@ void __init setup_tlb_core_data(void)
 		if (cpu_first_thread_sibling(boot_cpuid) == first)
 			first = boot_cpuid;
 
-		paca[cpu].tcd_ptr = &paca[first].tcd;
+		paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
 
 		/*
 		 * If we have threads, we need either tlbsrx.
@@ -254,6 +254,14 @@ static void cpu_ready_for_interrupts(void)
 	get_paca()->kernel_msr = MSR_KERNEL;
 }
 
+unsigned long spr_default_dscr = 0;
+
+void __init record_spr_defaults(void)
+{
+	if (early_cpu_has_feature(CPU_FTR_DSCR))
+		spr_default_dscr = mfspr(SPRN_DSCR);
+}
+
 /*
  * Early initialization entry point. This is called by head.S
  * with MMU translation disabled. We rely on the "feature" of
@@ -304,7 +312,11 @@ void __init early_setup(unsigned long dt_ptr)
 	early_init_devtree(__va(dt_ptr));
 
 	/* Now we know the logical id of our boot cpu, setup the paca. */
-	setup_paca(&paca[boot_cpuid]);
+	if (boot_cpuid != 0) {
+		/* Poison paca_ptrs[0] again if it's not the boot cpu */
+		memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
+	}
+	setup_paca(paca_ptrs[boot_cpuid]);
 	fixup_boot_paca();
 
 	/*
@@ -599,6 +611,21 @@ __init u64 ppc64_bolted_size(void)
 #endif
 }
 
+static void *__init alloc_stack(unsigned long limit, int cpu)
+{
+	unsigned long pa;
+
+	pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
+					early_cpu_to_node(cpu), MEMBLOCK_NONE);
+	if (!pa) {
+		pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+		if (!pa)
+			panic("cannot allocate stacks");
+	}
+
+	return __va(pa);
+}
+
 void __init irqstack_early_init(void)
 {
 	u64 limit = ppc64_bolted_size();
@@ -610,12 +637,8 @@ void __init irqstack_early_init(void)
 	 * accessed in realmode.
 	 */
 	for_each_possible_cpu(i) {
-		softirq_ctx[i] = (struct thread_info *)
-			__va(memblock_alloc_base(THREAD_SIZE,
-					    THREAD_SIZE, limit));
-		hardirq_ctx[i] = (struct thread_info *)
-			__va(memblock_alloc_base(THREAD_SIZE,
-					    THREAD_SIZE, limit));
+		softirq_ctx[i] = alloc_stack(limit, i);
+		hardirq_ctx[i] = alloc_stack(limit, i);
 	}
 }
 
@@ -623,20 +646,21 @@ void __init irqstack_early_init(void)
 void __init exc_lvl_early_init(void)
 {
 	unsigned int i;
-	unsigned long sp;
 
 	for_each_possible_cpu(i) {
-		sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-		critirq_ctx[i] = (struct thread_info *)__va(sp);
-		paca[i].crit_kstack = __va(sp + THREAD_SIZE);
+		void *sp;
+
+		sp = alloc_stack(ULONG_MAX, i);
+		critirq_ctx[i] = sp;
+		paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
 
-		sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-		dbgirq_ctx[i] = (struct thread_info *)__va(sp);
-		paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
+		sp = alloc_stack(ULONG_MAX, i);
+		dbgirq_ctx[i] = sp;
+		paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
 
-		sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-		mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
-		paca[i].mc_kstack = __va(sp + THREAD_SIZE);
+		sp = alloc_stack(ULONG_MAX, i);
+		mcheckirq_ctx[i] = sp;
+		paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
 	}
 
 	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
@@ -690,23 +714,24 @@ void __init emergency_stack_init(void)
 
 	for_each_possible_cpu(i) {
 		struct thread_info *ti;
-		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+
+		ti = alloc_stack(limit, i);
 		memset(ti, 0, THREAD_SIZE);
 		emerg_stack_init_thread_info(ti, i);
-		paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
+		paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
 		/* emergency stack for NMI exception handling. */
-		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+		ti = alloc_stack(limit, i);
 		memset(ti, 0, THREAD_SIZE);
 		emerg_stack_init_thread_info(ti, i);
-		paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+		paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
 		/* emergency stack for machine check exception handling. */
-		ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+		ti = alloc_stack(limit, i);
 		memset(ti, 0, THREAD_SIZE);
 		emerg_stack_init_thread_info(ti, i);
-		paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
+		paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
 	}
 }
@@ -762,7 +787,7 @@ void __init setup_per_cpu_areas(void)
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
                 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
-		paca[cpu].data_offset = __per_cpu_offset[cpu];
+		paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
 	}
 }
 #endif
@@ -846,9 +871,6 @@ static void do_nothing(void *unused)
 
 void rfi_flush_enable(bool enable)
 {
-	if (rfi_flush == enable)
-		return;
-
 	if (enable) {
 		do_rfi_flush_fixups(enabled_flush_types);
 		on_each_cpu(do_nothing, NULL, 1);
@@ -858,11 +880,15 @@ void rfi_flush_enable(bool enable)
 	rfi_flush = enable;
 }
 
-static void init_fallback_flush(void)
+static void __ref init_fallback_flush(void)
 {
 	u64 l1d_size, limit;
 	int cpu;
 
+	/* Only allocate the fallback flush area once (at boot time). */
+	if (l1d_flush_fallback_area)
+		return;
+
 	l1d_size = ppc64_caches.l1d.size;
 	limit = min(ppc64_bolted_size(), ppc64_rma_size);
 
@@ -875,23 +901,24 @@ static void init_fallback_flush(void)
 	memset(l1d_flush_fallback_area, 0, l1d_size * 2);
 
 	for_each_possible_cpu(cpu) {
-		paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
-		paca[cpu].l1d_flush_size = l1d_size;
+		struct paca_struct *paca = paca_ptrs[cpu];
+		paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+		paca->l1d_flush_size = l1d_size;
 	}
 }
 
-void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 {
 	if (types & L1D_FLUSH_FALLBACK) {
-		pr_info("rfi-flush: Using fallback displacement flush\n");
+		pr_info("rfi-flush: fallback displacement flush available\n");
 		init_fallback_flush();
 	}
 
 	if (types & L1D_FLUSH_ORI)
-		pr_info("rfi-flush: Using ori type flush\n");
+		pr_info("rfi-flush: ori type flush available\n");
 
 	if (types & L1D_FLUSH_MTTRIG)
-		pr_info("rfi-flush: Using mttrig type flush\n");
+		pr_info("rfi-flush: mttrig type flush available\n");
 
 	enabled_flush_types = types;
 
@@ -902,13 +929,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
 #ifdef CONFIG_DEBUG_FS
 static int rfi_flush_set(void *data, u64 val)
 {
+	bool enable;
+
 	if (val == 1)
-		rfi_flush_enable(true);
+		enable = true;
 	else if (val == 0)
-		rfi_flush_enable(false);
+		enable = false;
 	else
 		return -EINVAL;
 
+	/* Only do anything if we're changing state */
+	if (enable != rfi_flush)
+		rfi_flush_enable(enable);
+
 	return 0;
 }
 
@@ -927,12 +960,4 @@ static __init int rfi_flush_debugfs_init(void)
 }
 device_initcall(rfi_flush_debugfs_init);
 #endif
-
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	if (rfi_flush)
-		return sprintf(buf, "Mitigation: RFI Flush\n");
-
-	return sprintf(buf, "Vulnerable\n");
-}
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 7c59d88b9d86..a6467f843acf 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 #else /* CONFIG_PPC64 */
 
+extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+		     struct pt_regs *regs);
+extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+		       struct pt_regs *regs);
+
 static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 				     struct task_struct *tsk)
 {
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a46de0035214..492f03451877 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
 		     struct ucontext __user *new_ctx,
 		     int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
 {
-	unsigned char tmp;
+	unsigned char tmp __maybe_unused;
 	int ctx_has_vsx_region = 0;
 
 #ifdef CONFIG_PPC64
@@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 {
 	struct sig_dbg_op op;
 	int i;
-	unsigned char tmp;
+	unsigned char tmp __maybe_unused;
 	unsigned long new_msr = regs->msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	unsigned long new_dbcr0 = current->thread.debug.dbcr0;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bbe7634b3a43..e16ec7b3b427 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -123,8 +123,8 @@ int smp_generic_kick_cpu(int nr)
 	 * cpu_start field to become non-zero After we set cpu_start,
 	 * the processor will continue on to secondary_start
 	 */
-	if (!paca[nr].cpu_start) {
-		paca[nr].cpu_start = 1;
+	if (!paca_ptrs[nr]->cpu_start) {
+		paca_ptrs[nr]->cpu_start = 1;
 		smp_mb();
 		return 0;
 	}
@@ -565,19 +565,28 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 }
 #endif
 
+#ifdef CONFIG_NMI_IPI
+static void stop_this_cpu(struct pt_regs *regs)
+#else
 static void stop_this_cpu(void *dummy)
+#endif
 {
 	/* Remove this CPU */
 	set_cpu_online(smp_processor_id(), false);
 
-	local_irq_disable();
+	hard_irq_disable();
+	spin_begin();
 	while (1)
-		;
+		spin_cpu_relax();
 }
 
 void smp_send_stop(void)
 {
+#ifdef CONFIG_NMI_IPI
+	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000);
+#else
 	smp_call_function(stop_this_cpu, NULL, 0);
+#endif
 }
 
 struct thread_info *current_set[NR_CPUS];
@@ -657,7 +666,7 @@ void smp_prepare_boot_cpu(void)
 {
 	BUG_ON(smp_processor_id() != boot_cpuid);
 #ifdef CONFIG_PPC64
-	paca[boot_cpuid].__current = current;
+	paca_ptrs[boot_cpuid]->__current = current;
 #endif
 	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
 	current_set[boot_cpuid] = task_thread_info(current);
@@ -748,8 +757,8 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
 	struct thread_info *ti = task_thread_info(idle);
 
 #ifdef CONFIG_PPC64
-	paca[cpu].__current = idle;
-	paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+	paca_ptrs[cpu]->__current = idle;
+	paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
 #endif
 	ti->cpu = cpu;
 	secondary_ti = current_set[cpu] = ti;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 04d0bbd7a1dd..755dc98a57ae 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -20,6 +20,7 @@
 #include <asm/firmware.h>
 
 #include "cacheinfo.h"
+#include "setup.h"
 
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
@@ -588,21 +589,18 @@ static DEVICE_ATTR(dscr_default, 0600,
 
 static void sysfs_create_dscr_default(void)
 {
-	int err = 0;
-	if (cpu_has_feature(CPU_FTR_DSCR))
-		err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
-}
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		int err = 0;
+		int cpu;
 
-void __init record_spr_defaults(void)
-{
-	int cpu;
+		dscr_default = spr_default_dscr;
+		for_each_possible_cpu(cpu)
+			paca_ptrs[cpu]->dscr_default = dscr_default;
 
-	if (cpu_has_feature(CPU_FTR_DSCR)) {
-		dscr_default = mfspr(SPRN_DSCR);
-		for (cpu = 0; cpu < nr_cpu_ids; cpu++)
-			paca[cpu].dscr_default = dscr_default;
+		err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
 	}
 }
+
 #endif /* CONFIG_PPC64 */
 
 #ifdef HAS_PPC_PMC_PA6T
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a32823dcd9a4..360e71d455cc 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -266,6 +266,9 @@ void accumulate_stolen_time(void)
 
 static inline u64 calculate_stolen_time(u64 stop_tb)
 {
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return 0;
+
 	if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
 		return scan_dispatch_log(stop_tb);
 
@@ -1234,7 +1237,7 @@ void calibrate_delay(void)
 static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
 {
 	ppc_md.get_rtc_time(tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1e48d157196a..0904492e7032 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -208,6 +208,12 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
 	}
 	raw_local_irq_restore(flags);
 
+	/*
+	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
+	 */
+	if (TRAP(regs) == 0x100)
+		return;
+
 	crash_fadump(regs, "die oops");
 
 	if (kexec_should_crash(current))
@@ -272,8 +278,13 @@ void die(const char *str, struct pt_regs *regs, long err)
 {
 	unsigned long flags;
 
-	if (debugger(regs))
-		return;
+	/*
+	 * system_reset_excption handles debugger, crash dump, panic, for 0x100
+	 */
+	if (TRAP(regs) != 0x100) {
+		if (debugger(regs))
+			return;
+	}
 
 	flags = oops_begin(regs);
 	if (__die(str, regs, err))
@@ -460,7 +471,7 @@ static inline int check_io_access(struct pt_regs *regs)
 /* single-step stuff */
 #define single_stepping(regs)	(current->thread.debug.dbcr0 & DBCR0_IC)
 #define clear_single_step(regs)	(current->thread.debug.dbcr0 &= ~DBCR0_IC)
-
+#define clear_br_trace(regs)	do {} while(0)
 #else
 /* On non-4xx, the reason for the machine check or program
    exception is in the MSR. */
@@ -473,6 +484,7 @@ static inline int check_io_access(struct pt_regs *regs)
 
 #define single_stepping(regs)	((regs)->msr & MSR_SE)
 #define clear_single_step(regs)	((regs)->msr &= ~MSR_SE)
+#define clear_br_trace(regs)	((regs)->msr &= ~MSR_BE)
 #endif
 
 #if defined(CONFIG_E500)
@@ -988,6 +1000,7 @@ void single_step_exception(struct pt_regs *regs)
 	enum ctx_state prev_state = exception_enter();
 
 	clear_single_step(regs);
+	clear_br_trace(regs);
 
 	if (kprobe_post_handler(regs))
 		return;
@@ -1495,18 +1508,6 @@ bail:
 	exception_exit(prev_state);
 }
 
-void slb_miss_bad_addr(struct pt_regs *regs)
-{
-	enum ctx_state prev_state = exception_enter();
-
-	if (user_mode(regs))
-		_exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
-	else
-		bad_page_fault(regs, regs->dar, SIGSEGV);
-
-	exception_exit(prev_state);
-}
-
 void StackOverflow(struct pt_regs *regs)
 {
 	printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
@@ -1612,6 +1613,22 @@ void facility_unavailable_exception(struct pt_regs *regs)
 		value = mfspr(SPRN_FSCR);
 
 	status = value >> 56;
+	if ((hv || status >= 2) &&
+	    (status < ARRAY_SIZE(facility_strings)) &&
+	    facility_strings[status])
+		facility = facility_strings[status];
+
+	/* We should not have taken this interrupt in kernel */
+	if (!user_mode(regs)) {
+		pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
+			 facility, status, regs->nip);
+		die("Unexpected facility unavailable exception", regs, SIGABRT);
+	}
+
+	/* We restore the interrupt state now */
+	if (!arch_irq_disabled_regs(regs))
+		local_irq_enable();
+
 	if (status == FSCR_DSCR_LG) {
 		/*
 		 * User is accessing the DSCR register using the problem
@@ -1678,25 +1695,11 @@ void facility_unavailable_exception(struct pt_regs *regs)
 		return;
 	}
 
-	if ((hv || status >= 2) &&
-	    (status < ARRAY_SIZE(facility_strings)) &&
-	    facility_strings[status])
-		facility = facility_strings[status];
-
-	/* We restore the interrupt state now */
-	if (!arch_irq_disabled_regs(regs))
-		local_irq_enable();
-
 	pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
 		hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
 
 out:
-	if (user_mode(regs)) {
-		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-		return;
-	}
-
-	die("Unexpected facility unavailable exception", regs, SIGABRT);
+	_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
 }
 #endif
 
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 22b01a3962f0..b44ec104a5a1 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = {
 		CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
 		"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
 	},
+#ifdef CONFIG_PPC32
 	{
-		CPU_FTR_USE_TB, 0,
+		CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
 		"__kernel_gettimeofday", NULL
 	},
 	{
-		CPU_FTR_USE_TB, 0,
+		CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
 		"__kernel_clock_gettime", NULL
 	},
 	{
-		CPU_FTR_USE_TB, 0,
+		CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
 		"__kernel_clock_getres", NULL
 	},
 	{
-		CPU_FTR_USE_TB, 0,
+		CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
 		"__kernel_get_tbfreq", NULL
 	},
 	{
-		CPU_FTR_USE_TB, 0,
+		CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
 		"__kernel_time", NULL
 	},
+#endif
 };
 
 /*
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 85ba80de7133..4b19da8c87ae 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -74,9 +74,15 @@ kvm-hv-y += \
 	book3s_64_mmu_hv.o \
 	book3s_64_mmu_radix.o
 
+kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+	book3s_hv_tm.o
+
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
 	book3s_hv_rm_xics.o book3s_hv_rm_xive.o
 
+kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+	book3s_hv_tm_builtin.o
+
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_hv_hmi.o \
@@ -84,6 +90,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_hv_rm_mmu.o \
 	book3s_hv_ras.o \
 	book3s_hv_builtin.o \
+	$(kvm-book3s_64-builtin-tm-objs-y) \
 	$(kvm-book3s_64-builtin-xics-objs-y)
 endif
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 234531d1bee1..97d4a112648f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -819,12 +819,6 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 	kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-	return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
-}
-EXPORT_SYMBOL_GPL(kvm_unmap_hva);
-
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index d2b3ec088b8c..4ad5e287b8bc 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -14,7 +14,6 @@
 
 extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
 					 struct kvm_memory_slot *memslot);
-extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
 				  unsigned long end);
 extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ef243fed2f2b..a670fa5fbe50 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -877,15 +877,6 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 	return 0;
 }
 
-int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
-{
-	hva_handler_fn handler;
-
-	handler = kvm_is_radix(kvm) ? kvm_unmap_radix : kvm_unmap_rmapp;
-	kvm_handle_hva(kvm, hva, handler);
-	return 0;
-}
-
 int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	hva_handler_fn handler;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 5d9bafe9a371..a57eafec4dc2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -150,7 +150,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 {
 	int psize = MMU_BASE_PSIZE;
 
-	if (pshift >= PMD_SHIFT)
+	if (pshift >= PUD_SHIFT)
+		psize = MMU_PAGE_1G;
+	else if (pshift >= PMD_SHIFT)
 		psize = MMU_PAGE_2M;
 	addr &= ~0xfffUL;
 	addr |= mmu_psize_defs[psize].ap << 5;
@@ -163,6 +165,17 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 	asm volatile("ptesync": : :"memory");
 }
 
+static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr)
+{
+	unsigned long rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+
+	asm volatile("ptesync": : :"memory");
+	/* RIC=1 PRS=0 R=1 IS=2 */
+	asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1)
+		     : : "r" (rb), "r" (kvm->arch.lpid) : "memory");
+	asm volatile("ptesync": : :"memory");
+}
+
 unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
 				      unsigned long clr, unsigned long set,
 				      unsigned long addr, unsigned int shift)
@@ -223,9 +236,9 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 		new_pud = pud_alloc_one(kvm->mm, gpa);
 
 	pmd = NULL;
-	if (pud && pud_present(*pud))
+	if (pud && pud_present(*pud) && !pud_huge(*pud))
 		pmd = pmd_offset(pud, gpa);
-	else
+	else if (level <= 1)
 		new_pmd = pmd_alloc_one(kvm->mm, gpa);
 
 	if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
@@ -246,6 +259,50 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 		new_pud = NULL;
 	}
 	pud = pud_offset(pgd, gpa);
+	if (pud_huge(*pud)) {
+		unsigned long hgpa = gpa & PUD_MASK;
+
+		/*
+		 * If we raced with another CPU which has just put
+		 * a 1GB pte in after we saw a pmd page, try again.
+		 */
+		if (level <= 1 && !new_pmd) {
+			ret = -EAGAIN;
+			goto out_unlock;
+		}
+		/* Check if we raced and someone else has set the same thing */
+		if (level == 2 && pud_raw(*pud) == pte_raw(pte)) {
+			ret = 0;
+			goto out_unlock;
+		}
+		/* Valid 1GB page here already, remove it */
+		old = kvmppc_radix_update_pte(kvm, (pte_t *)pud,
+					      ~0UL, 0, hgpa, PUD_SHIFT);
+		kvmppc_radix_tlbie_page(kvm, hgpa, PUD_SHIFT);
+		if (old & _PAGE_DIRTY) {
+			unsigned long gfn = hgpa >> PAGE_SHIFT;
+			struct kvm_memory_slot *memslot;
+			memslot = gfn_to_memslot(kvm, gfn);
+			if (memslot && memslot->dirty_bitmap)
+				kvmppc_update_dirty_map(memslot,
+							gfn, PUD_SIZE);
+		}
+	}
+	if (level == 2) {
+		if (!pud_none(*pud)) {
+			/*
+			 * There's a page table page here, but we wanted to
+			 * install a large page, so remove and free the page
+			 * table page.  new_pmd will be NULL since level == 2.
+			 */
+			new_pmd = pmd_offset(pud, 0);
+			pud_clear(pud);
+			kvmppc_radix_flush_pwc(kvm, gpa);
+		}
+		kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+		ret = 0;
+		goto out_unlock;
+	}
 	if (pud_none(*pud)) {
 		if (!new_pmd)
 			goto out_unlock;
@@ -264,6 +321,11 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 			ret = -EAGAIN;
 			goto out_unlock;
 		}
+		/* Check if we raced and someone else has set the same thing */
+		if (level == 1 && pmd_raw(*pmd) == pte_raw(pte)) {
+			ret = 0;
+			goto out_unlock;
+		}
 		/* Valid 2MB page here already, remove it */
 		old = kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
 					      ~0UL, 0, lgpa, PMD_SHIFT);
@@ -276,35 +338,43 @@ static int kvmppc_create_pte(struct kvm *kvm, pte_t pte, unsigned long gpa,
 				kvmppc_update_dirty_map(memslot,
 							gfn, PMD_SIZE);
 		}
-	} else if (level == 1 && !pmd_none(*pmd)) {
-		/*
-		 * There's a page table page here, but we wanted
-		 * to install a large page.  Tell the caller and let
-		 * it try installing a normal page if it wants.
-		 */
-		ret = -EBUSY;
-		goto out_unlock;
 	}
-	if (level == 0) {
-		if (pmd_none(*pmd)) {
-			if (!new_ptep)
-				goto out_unlock;
-			pmd_populate(kvm->mm, pmd, new_ptep);
-			new_ptep = NULL;
-		}
-		ptep = pte_offset_kernel(pmd, gpa);
-		if (pte_present(*ptep)) {
-			/* PTE was previously valid, so invalidate it */
-			old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
-						      0, gpa, 0);
-			kvmppc_radix_tlbie_page(kvm, gpa, 0);
-			if (old & _PAGE_DIRTY)
-				mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+	if (level == 1) {
+		if (!pmd_none(*pmd)) {
+			/*
+			 * There's a page table page here, but we wanted to
+			 * install a large page, so remove and free the page
+			 * table page.  new_ptep will be NULL since level == 1.
+			 */
+			new_ptep = pte_offset_kernel(pmd, 0);
+			pmd_clear(pmd);
+			kvmppc_radix_flush_pwc(kvm, gpa);
 		}
-		kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
-	} else {
 		kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+		ret = 0;
+		goto out_unlock;
 	}
+	if (pmd_none(*pmd)) {
+		if (!new_ptep)
+			goto out_unlock;
+		pmd_populate(kvm->mm, pmd, new_ptep);
+		new_ptep = NULL;
+	}
+	ptep = pte_offset_kernel(pmd, gpa);
+	if (pte_present(*ptep)) {
+		/* Check if someone else set the same thing */
+		if (pte_raw(*ptep) == pte_raw(pte)) {
+			ret = 0;
+			goto out_unlock;
+		}
+		/* PTE was previously valid, so invalidate it */
+		old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT,
+					      0, gpa, 0);
+		kvmppc_radix_tlbie_page(kvm, gpa, 0);
+		if (old & _PAGE_DIRTY)
+			mark_page_dirty(kvm, gpa >> PAGE_SHIFT);
+	}
+	kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
 	ret = 0;
 
  out_unlock:
@@ -325,11 +395,11 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	unsigned long mmu_seq, pte_size;
 	unsigned long gpa, gfn, hva, pfn;
 	struct kvm_memory_slot *memslot;
-	struct page *page = NULL, *pages[1];
-	long ret, npages, ok;
-	unsigned int writing;
-	struct vm_area_struct *vma;
-	unsigned long flags;
+	struct page *page = NULL;
+	long ret;
+	bool writing;
+	bool upgrade_write = false;
+	bool *upgrade_p = &upgrade_write;
 	pte_t pte, *ptep;
 	unsigned long pgflags;
 	unsigned int shift, level;
@@ -369,122 +439,131 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 					      dsisr & DSISR_ISSTORE);
 	}
 
-	/* used to check for invalidations in progress */
-	mmu_seq = kvm->mmu_notifier_seq;
-	smp_rmb();
-
 	writing = (dsisr & DSISR_ISSTORE) != 0;
-	hva = gfn_to_hva_memslot(memslot, gfn);
+	if (memslot->flags & KVM_MEM_READONLY) {
+		if (writing) {
+			/* give the guest a DSI */
+			dsisr = DSISR_ISSTORE | DSISR_PROTFAULT;
+			kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
+			return RESUME_GUEST;
+		}
+		upgrade_p = NULL;
+	}
+
 	if (dsisr & DSISR_SET_RC) {
 		/*
 		 * Need to set an R or C bit in the 2nd-level tables;
-		 * if the relevant bits aren't already set in the linux
-		 * page tables, fall through to do the gup_fast to
-		 * set them in the linux page tables too.
+		 * since we are just helping out the hardware here,
+		 * it is sufficient to do what the hardware does.
 		 */
-		ok = 0;
 		pgflags = _PAGE_ACCESSED;
 		if (writing)
 			pgflags |= _PAGE_DIRTY;
-		local_irq_save(flags);
-		ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
-		if (ptep) {
-			pte = READ_ONCE(*ptep);
-			if (pte_present(pte) &&
-			    (pte_val(pte) & pgflags) == pgflags)
-				ok = 1;
-		}
-		local_irq_restore(flags);
-		if (ok) {
-			spin_lock(&kvm->mmu_lock);
-			if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
-				spin_unlock(&kvm->mmu_lock);
-				return RESUME_GUEST;
-			}
-			/*
-			 * We are walking the secondary page table here. We can do this
-			 * without disabling irq.
-			 */
-			ptep = __find_linux_pte(kvm->arch.pgtable,
-						gpa, NULL, &shift);
-			if (ptep && pte_present(*ptep)) {
-				kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
-							gpa, shift);
-				spin_unlock(&kvm->mmu_lock);
-				return RESUME_GUEST;
-			}
-			spin_unlock(&kvm->mmu_lock);
+		/*
+		 * We are walking the secondary page table here. We can do this
+		 * without disabling irq.
+		 */
+		spin_lock(&kvm->mmu_lock);
+		ptep = __find_linux_pte(kvm->arch.pgtable,
+					gpa, NULL, &shift);
+		if (ptep && pte_present(*ptep) &&
+		    (!writing || pte_write(*ptep))) {
+			kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
+						gpa, shift);
+			dsisr &= ~DSISR_SET_RC;
 		}
+		spin_unlock(&kvm->mmu_lock);
+		if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+			       DSISR_PROTFAULT | DSISR_SET_RC)))
+			return RESUME_GUEST;
 	}
 
-	ret = -EFAULT;
-	pfn = 0;
-	pte_size = PAGE_SIZE;
-	pgflags = _PAGE_READ | _PAGE_EXEC;
-	level = 0;
-	npages = get_user_pages_fast(hva, 1, writing, pages);
-	if (npages < 1) {
-		/* Check if it's an I/O mapping */
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, hva);
-		if (vma && vma->vm_start <= hva && hva < vma->vm_end &&
-		    (vma->vm_flags & VM_PFNMAP)) {
-			pfn = vma->vm_pgoff +
-				((hva - vma->vm_start) >> PAGE_SHIFT);
-			pgflags = pgprot_val(vma->vm_page_prot);
-		}
-		up_read(&current->mm->mmap_sem);
-		if (!pfn)
-			return -EFAULT;
-	} else {
-		page = pages[0];
+	/* used to check for invalidations in progress */
+	mmu_seq = kvm->mmu_notifier_seq;
+	smp_rmb();
+
+	/*
+	 * Do a fast check first, since __gfn_to_pfn_memslot doesn't
+	 * do it with !atomic && !async, which is how we call it.
+	 * We always ask for write permission since the common case
+	 * is that the page is writable.
+	 */
+	hva = gfn_to_hva_memslot(memslot, gfn);
+	if (upgrade_p && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
 		pfn = page_to_pfn(page);
-		if (PageCompound(page)) {
-			pte_size <<= compound_order(compound_head(page));
-			/* See if we can insert a 2MB large-page PTE here */
-			if (pte_size >= PMD_SIZE &&
-			    (gpa & (PMD_SIZE - PAGE_SIZE)) ==
-			    (hva & (PMD_SIZE - PAGE_SIZE))) {
-				level = 1;
-				pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
-			}
+		upgrade_write = true;
+	} else {
+		/* Call KVM generic code to do the slow-path check */
+		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+					   writing, upgrade_p);
+		if (is_error_noslot_pfn(pfn))
+			return -EFAULT;
+		page = NULL;
+		if (pfn_valid(pfn)) {
+			page = pfn_to_page(pfn);
+			if (PageReserved(page))
+				page = NULL;
 		}
-		/* See if we can provide write access */
-		if (writing) {
-			pgflags |= _PAGE_WRITE;
-		} else {
-			local_irq_save(flags);
-			ptep = find_current_mm_pte(current->mm->pgd,
-						   hva, NULL, NULL);
-			if (ptep && pte_write(*ptep))
-				pgflags |= _PAGE_WRITE;
-			local_irq_restore(flags);
+	}
+
+	/* See if we can insert a 1GB or 2MB large PTE here */
+	level = 0;
+	if (page && PageCompound(page)) {
+		pte_size = PAGE_SIZE << compound_order(compound_head(page));
+		if (pte_size >= PUD_SIZE &&
+		    (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+		    (hva & (PUD_SIZE - PAGE_SIZE))) {
+			level = 2;
+			pfn &= ~((PUD_SIZE >> PAGE_SHIFT) - 1);
+		} else if (pte_size >= PMD_SIZE &&
+			   (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+			   (hva & (PMD_SIZE - PAGE_SIZE))) {
+			level = 1;
+			pfn &= ~((PMD_SIZE >> PAGE_SHIFT) - 1);
 		}
 	}
 
 	/*
 	 * Compute the PTE value that we need to insert.
 	 */
-	pgflags |= _PAGE_PRESENT | _PAGE_PTE | _PAGE_ACCESSED;
-	if (pgflags & _PAGE_WRITE)
-		pgflags |= _PAGE_DIRTY;
-	pte = pfn_pte(pfn, __pgprot(pgflags));
-
-	/* Allocate space in the tree and write the PTE */
-	ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
-	if (ret == -EBUSY) {
+	if (page) {
+		pgflags = _PAGE_READ | _PAGE_EXEC | _PAGE_PRESENT | _PAGE_PTE |
+			_PAGE_ACCESSED;
+		if (writing || upgrade_write)
+			pgflags |= _PAGE_WRITE | _PAGE_DIRTY;
+		pte = pfn_pte(pfn, __pgprot(pgflags));
+	} else {
 		/*
-		 * There's already a PMD where wanted to install a large page;
-		 * for now, fall back to installing a small page.
+		 * Read the PTE from the process' radix tree and use that
+		 * so we get the attribute bits.
 		 */
-		level = 0;
-		pfn |= gfn & ((PMD_SIZE >> PAGE_SHIFT) - 1);
-		pte = pfn_pte(pfn, __pgprot(pgflags));
-		ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+		local_irq_disable();
+		ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);
+		pte = *ptep;
+		local_irq_enable();
+		if (shift == PUD_SHIFT &&
+		    (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+		    (hva & (PUD_SIZE - PAGE_SIZE))) {
+			level = 2;
+		} else if (shift == PMD_SHIFT &&
+			   (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+			   (hva & (PMD_SIZE - PAGE_SIZE))) {
+			level = 1;
+		} else if (shift && shift != PAGE_SHIFT) {
+			/* Adjust PFN */
+			unsigned long mask = (1ul << shift) - PAGE_SIZE;
+			pte = __pte(pte_val(pte) | (hva & mask));
+		}
+		if (!(writing || upgrade_write))
+			pte = __pte(pte_val(pte) & ~ _PAGE_WRITE);
+		pte = __pte(pte_val(pte) | _PAGE_EXEC);
 	}
 
+	/* Allocate space in the tree and write the PTE */
+	ret = kvmppc_create_pte(kvm, pte, gpa, level, mmu_seq);
+
 	if (page) {
-		if (!ret && (pgflags & _PAGE_WRITE))
+		if (!ret && (pte_val(pte) & _PAGE_WRITE))
 			set_page_dirty_lock(page);
 		put_page(page);
 	}
@@ -662,6 +741,10 @@ void kvmppc_free_radix(struct kvm *kvm)
 		for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++pud) {
 			if (!pud_present(*pud))
 				continue;
+			if (pud_huge(*pud)) {
+				pud_clear(pud);
+				continue;
+			}
 			pmd = pmd_offset(pud, 0);
 			for (im = 0; im < PTRS_PER_PMD; ++im, ++pmd) {
 				if (pmd_is_leaf(*pmd)) {
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index c32e9bfe75b1..6651f736a0b1 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -450,7 +450,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 
 		/*
 		 * Synchronize with the MMU notifier callbacks in
-		 * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+		 * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.).
 		 * While we have the rmap lock, code running on other CPUs
 		 * cannot finish unmapping the host real page that backs
 		 * this guest real page, so we are OK to access the host
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9cb9448163c4..4d07fca5121c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,6 +49,7 @@
 #include <asm/reg.h>
 #include <asm/ppc-opcode.h>
 #include <asm/asm-prototypes.h>
+#include <asm/debug.h>
 #include <asm/disassemble.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
@@ -170,7 +171,7 @@ static bool kvmppc_ipi_thread(int cpu)
 
 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
 	if (cpu >= 0 && cpu < nr_cpu_ids) {
-		if (paca[cpu].kvm_hstate.xics_phys) {
+		if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
 			xics_wake_cpu(cpu);
 			return true;
 		}
@@ -498,7 +499,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 		 * use 640 bytes of the structure though, so we should accept
 		 * clients that set a size of 640.
 		 */
-		if (len < 640)
+		BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+		if (len < sizeof(struct lppaca))
 			break;
 		vpap = &tvcpu->arch.vpa;
 		err = 0;
@@ -741,6 +743,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 	case H_SET_MODE_RESOURCE_SET_DAWR:
 		if (!kvmppc_power8_compatible(vcpu))
 			return H_P2;
+		if (!ppc_breakpoint_available())
+			return H_P2;
 		if (mflags)
 			return H_UNSUPPORTED_FLAG_START;
 		if (value2 & DABRX_HYP)
@@ -1206,6 +1210,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			r = RESUME_GUEST;
 		}
 		break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+		/*
+		 * This occurs for various TM-related instructions that
+		 * we need to emulate on POWER9 DD2.2.  We have already
+		 * handled the cases where the guest was in real-suspend
+		 * mode and was transitioning to transactional state.
+		 */
+		r = kvmhv_p9_tm_emulation(vcpu);
+		break;
+#endif
+
 	case BOOK3S_INTERRUPT_HV_RM_HARD:
 		r = RESUME_PASSTHROUGH;
 		break;
@@ -1978,7 +1995,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	 * turn off the HFSCR bit, which causes those instructions to trap.
 	 */
 	vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
-	if (!cpu_has_feature(CPU_FTR_TM))
+	if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+		vcpu->arch.hfscr |= HFSCR_TM;
+	else if (!cpu_has_feature(CPU_FTR_TM_COMP))
 		vcpu->arch.hfscr &= ~HFSCR_TM;
 	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		vcpu->arch.hfscr &= ~HFSCR_MSGP;
@@ -2140,7 +2159,7 @@ static int kvmppc_grab_hwthread(int cpu)
 	struct paca_struct *tpaca;
 	long timeout = 10000;
 
-	tpaca = &paca[cpu];
+	tpaca = paca_ptrs[cpu];
 
 	/* Ensure the thread won't go into the kernel if it wakes */
 	tpaca->kvm_hstate.kvm_vcpu = NULL;
@@ -2173,7 +2192,7 @@ static void kvmppc_release_hwthread(int cpu)
 {
 	struct paca_struct *tpaca;
 
-	tpaca = &paca[cpu];
+	tpaca = paca_ptrs[cpu];
 	tpaca->kvm_hstate.hwthread_req = 0;
 	tpaca->kvm_hstate.kvm_vcpu = NULL;
 	tpaca->kvm_hstate.kvm_vcore = NULL;
@@ -2239,9 +2258,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
 		vcpu->arch.thread_cpu = cpu;
 		cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
 	}
-	tpaca = &paca[cpu];
+	tpaca = paca_ptrs[cpu];
 	tpaca->kvm_hstate.kvm_vcpu = vcpu;
 	tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
+	tpaca->kvm_hstate.fake_suspend = 0;
 	/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
 	smp_wmb();
 	tpaca->kvm_hstate.kvm_vcore = vc;
@@ -2264,7 +2284,7 @@ static void kvmppc_wait_for_nap(int n_threads)
 		 * for any threads that still have a non-NULL vcore ptr.
 		 */
 		for (i = 1; i < n_threads; ++i)
-			if (paca[cpu + i].kvm_hstate.kvm_vcore)
+			if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
 				break;
 		if (i == n_threads) {
 			HMT_medium();
@@ -2274,7 +2294,7 @@ static void kvmppc_wait_for_nap(int n_threads)
 	}
 	HMT_medium();
 	for (i = 1; i < n_threads; ++i)
-		if (paca[cpu + i].kvm_hstate.kvm_vcore)
+		if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
 			pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
 }
 
@@ -2806,9 +2826,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	}
 
 	for (thr = 0; thr < controlled_threads; ++thr) {
-		paca[pcpu + thr].kvm_hstate.tid = thr;
-		paca[pcpu + thr].kvm_hstate.napping = 0;
-		paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+		struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+		paca->kvm_hstate.tid = thr;
+		paca->kvm_hstate.napping = 0;
+		paca->kvm_hstate.kvm_split_mode = sip;
 	}
 
 	/* Initiate micro-threading (split-core) on POWER8 if required */
@@ -2923,7 +2945,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	} else if (hpt_on_radix) {
 		/* Wait for all threads to have seen final sync */
 		for (thr = 1; thr < controlled_threads; ++thr) {
-			while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
+			struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+			while (paca->kvm_hstate.kvm_split_mode) {
 				HMT_low();
 				barrier();
 			}
@@ -4351,7 +4375,6 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.flush_memslot  = kvmppc_core_flush_memslot_hv,
 	.prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
 	.commit_memory_region  = kvmppc_core_commit_memory_region_hv,
-	.unmap_hva = kvm_unmap_hva_hv,
 	.unmap_hva_range = kvm_unmap_hva_range_hv,
 	.age_hva  = kvm_age_hva_hv,
 	.test_age_hva = kvm_test_age_hva_hv,
@@ -4388,7 +4411,7 @@ static int kvm_init_subcore_bitmap(void)
 		int node = cpu_to_node(first_cpu);
 
 		/* Ignore if it is already allocated. */
-		if (paca[first_cpu].sibling_subcore_state)
+		if (paca_ptrs[first_cpu]->sibling_subcore_state)
 			continue;
 
 		sibling_subcore_state =
@@ -4403,7 +4426,8 @@ static int kvm_init_subcore_bitmap(void)
 		for (j = 0; j < threads_per_core; j++) {
 			int cpu = first_cpu + j;
 
-			paca[cpu].sibling_subcore_state = sibling_subcore_state;
+			paca_ptrs[cpu]->sibling_subcore_state =
+						sibling_subcore_state;
 		}
 	}
 	return 0;
@@ -4430,7 +4454,7 @@ static int kvmppc_book3s_init_hv(void)
 
 	/*
 	 * We need a way of accessing the XICS interrupt controller,
-	 * either directly, via paca[cpu].kvm_hstate.xics_phys, or
+	 * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or
 	 * indirectly, via OPAL.
 	 */
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 49a2c7825e04..de18299f92b7 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -251,7 +251,7 @@ void kvmhv_rm_send_ipi(int cpu)
 	    return;
 
 	/* Else poke the target with an IPI */
-	xics_phys = paca[cpu].kvm_hstate.xics_phys;
+	xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
 	if (xics_phys)
 		__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
 	else
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index dc54373c8780..0e8493033288 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -79,8 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	li	r5, 0
 	mtspr	SPRN_MMCRA, r5
 	isync
-	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
-	lbz	r5, LPPACA_PMCINUSE(r3)
+	lbz	r5, PACA_PMCINUSE(r13)	/* is the host using the PMU? */
 	cmpwi	r5, 0
 	beq	31f			/* skip if not */
 	mfspr	r5, SPRN_MMCR1
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index e1c083fbe434..78e6a392330f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -470,8 +470,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		for (i = 0; i < npages; ++i) {
 			asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
 				     "r" (rbvalues[i]), "r" (kvm->arch.lpid));
-			trace_tlbie(kvm->arch.lpid, 0, rbvalues[i],
-				kvm->arch.lpid, 0, 0, 0);
 		}
 
 		if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
@@ -492,8 +490,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		for (i = 0; i < npages; ++i) {
 			asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
 				     "r" (rbvalues[i]), "r" (0));
-			trace_tlbie(kvm->arch.lpid, 1, rbvalues[i],
-				0, 0, 0, 0);
 		}
 		asm volatile("ptesync" : : : "memory");
 	}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f86a20270e50..bd63fa8a08b5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -113,8 +113,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtspr	SPRN_SPRG_VDSO_WRITE,r3
 
 	/* Reload the host's PMU registers */
-	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
-	lbz	r4, LPPACA_PMCINUSE(r3)
+	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
 	cmpwi	r4, 0
 	beq	23f			/* skip if not */
 BEGIN_FTR_SECTION
@@ -786,12 +785,18 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
 	bl	kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
 	/* Load guest PMU registers */
@@ -885,8 +890,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	ld	r6, VCPU_DAWRX(r4)
 	ld	r7, VCPU_CIABR(r4)
 	ld	r8, VCPU_TAR(r4)
+	/*
+	 * Handle broken DAWR case by not writing it. This means we
+	 * can still store the DAWR register for migration.
+	 */
+BEGIN_FTR_SECTION
 	mtspr	SPRN_DAWR, r5
 	mtspr	SPRN_DAWRX, r6
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
 	mtspr	SPRN_CIABR, r7
 	mtspr	SPRN_TAR, r8
 	ld	r5, VCPU_IC(r4)
@@ -914,11 +925,14 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_ACOP, r6
 	mtspr	SPRN_CSIGR, r7
 	mtspr	SPRN_TACR, r8
+	nop
 FTR_SECTION_ELSE
 	/* POWER9-only registers */
 	ld	r5, VCPU_TID(r4)
 	ld	r6, VCPU_PSSCR(r4)
+	lbz	r8, HSTATE_FAKE_SUSPEND(r13)
 	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
+	rldimi	r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
 	ld	r7, VCPU_HFSCR(r4)
 	mtspr	SPRN_TIDR, r5
 	mtspr	SPRN_PSSCR, r6
@@ -1370,6 +1384,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	std	r3, VCPU_CTR(r9)
 	std	r4, VCPU_XER(r9)
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/* For softpatch interrupt, go off and do TM instruction emulation */
+	cmpwi	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+	beq	kvmppc_tm_emul
+#endif
+
 	/* If this is a page table miss then see if it's theirs or ours */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	kvmppc_hdsi
@@ -1747,12 +1767,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	bl	kvmppc_save_fp
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
 	bl	kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
 	/* Increment yield count if they have a VPA */
@@ -1852,6 +1878,10 @@ BEGIN_FTR_SECTION
 	ld	r6, STACK_SLOT_DAWR(r1)
 	ld	r7, STACK_SLOT_DAWRX(r1)
 	mtspr	SPRN_CIABR, r5
+	/*
+	 * If the DAWR doesn't work, it's ok to write these here as
+	 * this value should always be zero
+	*/
 	mtspr	SPRN_DAWR, r6
 	mtspr	SPRN_DAWRX, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
@@ -2055,6 +2085,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mtlr	r0
 	blr
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2.  This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+kvmppc_tm_emul:
+	/* Save instruction image in HEIR */
+	mfspr	r3, SPRN_HEIR
+	stw	r3, VCPU_HEIR(r9)
+
+	/*
+	 * The cases we want to handle here are those where the guest
+	 * is in real suspend mode and is trying to transition to
+	 * transactional mode.
+	 */
+	lbz	r0, HSTATE_FAKE_SUSPEND(r13)
+	cmpwi	r0, 0		/* keep exiting guest if in fake suspend */
+	bne	guest_exit_cont
+	rldicl	r3, r11, 64 - MSR_TS_S_LG, 62
+	cmpwi	r3, 1		/* or if not in suspend state */
+	bne	guest_exit_cont
+
+	/* Call C code to do the emulation */
+	mr	r3, r9
+	bl	kvmhv_p9_tm_emulation_early
+	nop
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	li	r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+	cmpwi	r3, 0
+	beq	guest_exit_cont		/* continue exiting if not handled */
+	ld	r10, VCPU_PC(r9)
+	ld	r11, VCPU_MSR(r9)
+	b	fast_interrupt_c_return	/* go back to guest if handled */
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
@@ -2507,8 +2573,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	li	r3,0
 	blr
 
+2:
+BEGIN_FTR_SECTION
+	/* POWER9 with disabled DAWR */
+	li	r3, H_HARDWARE
+	blr
+END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
 	/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
-2:	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
+	rlwimi	r5, r4, 5, DAWRX_DR | DAWRX_DW
 	rlwimi	r5, r4, 2, DAWRX_WT
 	clrrdi	r4, r4, 3
 	std	r4, VCPU_DAWR(r3)
@@ -2588,13 +2660,19 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
 	bl	kvmppc_save_fp
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
 	ld	r9, HSTATE_KVM_VCPU(r13)
 	bl	kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
 	/*
@@ -2701,12 +2779,18 @@ kvm_end_cede:
 #endif
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+	b	91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 	 */
 	bl	kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
 	/* load up FP state */
@@ -3033,6 +3117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 kvmppc_save_tm:
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
+	stdu	r1, -PPC_MIN_STKFRM(r1)
 
 	/* Turn on TM. */
 	mfmsr	r8
@@ -3047,6 +3132,24 @@ kvmppc_save_tm:
 	std	r1, HSTATE_HOST_R1(r13)
 	li	r3, TM_CAUSE_KVM_RESCHED
 
+BEGIN_FTR_SECTION
+	lbz	r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+	cmpwi	r0, 0
+	beq	3f
+	rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
+	beq	4f
+BEGIN_FTR_SECTION_NESTED(96)
+	bl	pnv_power9_force_smt4_catch
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+	nop
+	b	6f
+3:
+	/* Emulation of the treclaim instruction needs TEXASR before treclaim */
+	mfspr	r6, SPRN_TEXASR
+	std	r6, VCPU_ORIG_TEXASR(r9)
+6:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
 	/* Clear the MSR RI since r1, r13 are all going to be foobar. */
 	li	r5, 0
 	mtmsrd	r5, 1
@@ -3058,6 +3161,43 @@ kvmppc_save_tm:
 	SET_SCRATCH0(r13)
 	GET_PACA(r13)
 	std	r9, PACATMSCRATCH(r13)
+
+	/* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */
+BEGIN_FTR_SECTION
+	lbz	r9, HSTATE_FAKE_SUSPEND(r13)
+	cmpwi	r9, 0
+	beq	2f
+	/*
+	 * We were in fake suspend, so we are not going to save the
+	 * register state as the guest checkpointed state (since
+	 * we already have it), therefore we can now use any volatile GPR.
+	 */
+	/* Reload stack pointer and TOC. */
+	ld	r1, HSTATE_HOST_R1(r13)
+	ld	r2, PACATOC(r13)
+	/* Set MSR RI now we have r1 and r13 back. */
+	li	r5, MSR_RI
+	mtmsrd	r5, 1
+	HMT_MEDIUM
+	ld	r6, HSTATE_DSCR(r13)
+	mtspr	SPRN_DSCR, r6
+BEGIN_FTR_SECTION_NESTED(96)
+	bl	pnv_power9_force_smt4_release
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+	nop
+
+4:
+	mfspr	r3, SPRN_PSSCR
+	/* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
+	li	r0, PSSCR_FAKE_SUSPEND
+	andc	r3, r3, r0
+	mtspr	SPRN_PSSCR, r3
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	/* Don't save TEXASR, use value from last exit in real suspend state */
+	b	11f
+2:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
 	ld	r9, HSTATE_KVM_VCPU(r13)
 
 	/* Get a few more GPRs free. */
@@ -3128,13 +3268,15 @@ kvmppc_save_tm:
 	 * change these outside of a transaction, so they must always be
 	 * context switched.
 	 */
+	mfspr	r7, SPRN_TEXASR
+	std	r7, VCPU_TEXASR(r9)
+11:
 	mfspr	r5, SPRN_TFHAR
 	mfspr	r6, SPRN_TFIAR
-	mfspr	r7, SPRN_TEXASR
 	std	r5, VCPU_TFHAR(r9)
 	std	r6, VCPU_TFIAR(r9)
-	std	r7, VCPU_TEXASR(r9)
 
+	addi	r1, r1, PPC_MIN_STKFRM
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
@@ -3169,6 +3311,8 @@ kvmppc_restore_tm:
 	mtspr	SPRN_TFIAR, r6
 	mtspr	SPRN_TEXASR, r7
 
+	li	r0, 0
+	stb	r0, HSTATE_FAKE_SUSPEND(r13)
 	ld	r5, VCPU_MSR(r4)
 	rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
 	beqlr		/* TM not active in guest */
@@ -3183,6 +3327,15 @@ kvmppc_restore_tm:
 	mtspr	SPRN_TEXASR, r7
 
 	/*
+	 * If we are doing TM emulation for the guest on a POWER9 DD2,
+	 * then we don't actually do a trechkpt -- we either set up
+	 * fake-suspend mode, or emulate a TM rollback.
+	 */
+BEGIN_FTR_SECTION
+	b	.Ldo_tm_fake_load
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
+	/*
 	 * We need to load up the checkpointed state for the guest.
 	 * We need to do this early as it will blow away any GPRs, VSRs and
 	 * some SPRs.
@@ -3254,10 +3407,24 @@ kvmppc_restore_tm:
 	/* Set the MSR RI since we have our registers back. */
 	li	r5, MSR_RI
 	mtmsrd	r5, 1
-
+9:
 	ld	r0, PPC_LR_STKOFF(r1)
 	mtlr	r0
 	blr
+
+.Ldo_tm_fake_load:
+	cmpwi	r5, 1		/* check for suspended state */
+	bgt	10f
+	stb	r5, HSTATE_FAKE_SUSPEND(r13)
+	b	9b		/* and return */
+10:	stdu	r1, -PPC_MIN_STKFRM(r1)
+	/* guest is in transactional state, so simulate rollback */
+	mr	r3, r4
+	bl	kvmhv_emulate_tm_rollback
+	nop
+	ld      r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
+	addi	r1, r1, PPC_MIN_STKFRM
+	b	9b
 #endif
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
new file mode 100644
index 000000000000..bf710ad3a6d7
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
+{
+	u64 texasr, tfiar;
+	u64 msr = vcpu->arch.shregs.msr;
+
+	tfiar = vcpu->arch.pc & ~0x3ull;
+	texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
+	if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
+		texasr |= TEXASR_SUSP;
+	if (msr & MSR_PR) {
+		texasr |= TEXASR_PR;
+		tfiar |= 1;
+	}
+	vcpu->arch.tfiar = tfiar;
+	/* Preserve ROT and TL fields of existing TEXASR */
+	vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr;
+}
+
+/*
+ * This gets called on a softpatch interrupt on POWER9 DD2.2 processors.
+ * We expect to find a TM-related instruction to be emulated.  The
+ * instruction image is in vcpu->arch.emul_inst.  If the guest was in
+ * TM suspended or transactional state, the checkpointed state has been
+ * reclaimed and is in the vcpu struct.  The CPU is in virtual mode in
+ * host context.
+ */
+int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+{
+	u32 instr = vcpu->arch.emul_inst;
+	u64 msr = vcpu->arch.shregs.msr;
+	u64 newmsr, bescr;
+	int ra, rs;
+
+	switch (instr & 0xfc0007ff) {
+	case PPC_INST_RFID:
+		/* XXX do we need to check for PR=0 here? */
+		newmsr = vcpu->arch.shregs.srr1;
+		/* should only get here for Sx -> T1 transition */
+		WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+			       MSR_TM_TRANSACTIONAL(newmsr) &&
+			       (newmsr & MSR_TM)));
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.cfar = vcpu->arch.pc - 4;
+		vcpu->arch.pc = vcpu->arch.shregs.srr0;
+		return RESUME_GUEST;
+
+	case PPC_INST_RFEBB:
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		/* check EBB facility is available */
+		if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+				((u64)FSCR_EBB_LG << 56);
+			kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		bescr = vcpu->arch.bescr;
+		/* expect to see a S->T transition requested */
+		WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+			       ((bescr >> 30) & 3) == 2));
+		bescr &= ~BESCR_GE;
+		if (instr & (1 << 11))
+			bescr |= BESCR_GE;
+		vcpu->arch.bescr = bescr;
+		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.cfar = vcpu->arch.pc - 4;
+		vcpu->arch.pc = vcpu->arch.ebbrr;
+		return RESUME_GUEST;
+
+	case PPC_INST_MTMSRD:
+		/* XXX do we need to check for PR=0 here? */
+		rs = (instr >> 21) & 0x1f;
+		newmsr = kvmppc_get_gpr(vcpu, rs);
+		/* check this is a Sx -> T1 transition */
+		WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+			       MSR_TM_TRANSACTIONAL(newmsr) &&
+			       (newmsr & MSR_TM)));
+		/* mtmsrd doesn't change LE */
+		newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		return RESUME_GUEST;
+
+	case PPC_INST_TSR:
+		/* check for PR=1 and arch 2.06 bit set in PCR */
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		if (!(msr & MSR_TM)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+				((u64)FSCR_TM_LG << 56);
+			kvmppc_book3s_queue_irqprio(vcpu,
+						BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		/* Set CR0 to indicate previous transactional state */
+		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+		/* L=1 => tresume, L=0 => tsuspend */
+		if (instr & (1 << 21)) {
+			if (MSR_TM_SUSPENDED(msr))
+				msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		} else {
+			if (MSR_TM_TRANSACTIONAL(msr))
+				msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
+		}
+		vcpu->arch.shregs.msr = msr;
+		return RESUME_GUEST;
+
+	case PPC_INST_TRECLAIM:
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		if (!(msr & MSR_TM)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+				((u64)FSCR_TM_LG << 56);
+			kvmppc_book3s_queue_irqprio(vcpu,
+						BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		/* If no transaction active, generate TM bad thing */
+		if (!MSR_TM_ACTIVE(msr)) {
+			kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+			return RESUME_GUEST;
+		}
+		/* If failure was not previously recorded, recompute TEXASR */
+		if (!(vcpu->arch.orig_texasr & TEXASR_FS)) {
+			ra = (instr >> 16) & 0x1f;
+			if (ra)
+				ra = kvmppc_get_gpr(vcpu, ra) & 0xff;
+			emulate_tx_failure(vcpu, ra);
+		}
+
+		copy_from_checkpoint(vcpu);
+
+		/* Set CR0 to indicate previous transactional state */
+		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+		vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+		return RESUME_GUEST;
+
+	case PPC_INST_TRECHKPT:
+		/* XXX do we need to check for PR=0 here? */
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+			/* generate an illegal instruction interrupt */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+		if (!(msr & MSR_TM)) {
+			/* generate a facility unavailable interrupt */
+			vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+				((u64)FSCR_TM_LG << 56);
+			kvmppc_book3s_queue_irqprio(vcpu,
+						BOOK3S_INTERRUPT_FAC_UNAVAIL);
+			return RESUME_GUEST;
+		}
+		/* If transaction active or TEXASR[FS] = 0, bad thing */
+		if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) {
+			kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+			return RESUME_GUEST;
+		}
+
+		copy_to_checkpoint(vcpu);
+
+		/* Set CR0 to indicate previous transactional state */
+		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+			(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+		vcpu->arch.shregs.msr = msr | MSR_TS_S;
+		return RESUME_GUEST;
+	}
+
+	/* What should we do here? We didn't recognize the instruction */
+	WARN_ON_ONCE(1);
+	return RESUME_GUEST;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
new file mode 100644
index 000000000000..d98ccfd2b88c
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+/*
+ * This handles the cases where the guest is in real suspend mode
+ * and we want to get back to the guest without dooming the transaction.
+ * The caller has checked that the guest is in real-suspend mode
+ * (MSR[TS] = S and the fake-suspend flag is not set).
+ */
+int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+{
+	u32 instr = vcpu->arch.emul_inst;
+	u64 newmsr, msr, bescr;
+	int rs;
+
+	switch (instr & 0xfc0007ff) {
+	case PPC_INST_RFID:
+		/* XXX do we need to check for PR=0 here? */
+		newmsr = vcpu->arch.shregs.srr1;
+		/* should only get here for Sx -> T1 transition */
+		if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+			return 0;
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		vcpu->arch.cfar = vcpu->arch.pc - 4;
+		vcpu->arch.pc = vcpu->arch.shregs.srr0;
+		return 1;
+
+	case PPC_INST_RFEBB:
+		/* check for PR=1 and arch 2.06 bit set in PCR */
+		msr = vcpu->arch.shregs.msr;
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+			return 0;
+		/* check EBB facility is available */
+		if (!(vcpu->arch.hfscr & HFSCR_EBB) ||
+		    ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB)))
+			return 0;
+		bescr = mfspr(SPRN_BESCR);
+		/* expect to see a S->T transition requested */
+		if (((bescr >> 30) & 3) != 2)
+			return 0;
+		bescr &= ~BESCR_GE;
+		if (instr & (1 << 11))
+			bescr |= BESCR_GE;
+		mtspr(SPRN_BESCR, bescr);
+		msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		vcpu->arch.shregs.msr = msr;
+		vcpu->arch.cfar = vcpu->arch.pc - 4;
+		vcpu->arch.pc = mfspr(SPRN_EBBRR);
+		return 1;
+
+	case PPC_INST_MTMSRD:
+		/* XXX do we need to check for PR=0 here? */
+		rs = (instr >> 21) & 0x1f;
+		newmsr = kvmppc_get_gpr(vcpu, rs);
+		msr = vcpu->arch.shregs.msr;
+		/* check this is a Sx -> T1 transition */
+		if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+			return 0;
+		/* mtmsrd doesn't change LE */
+		newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+		newmsr = sanitize_msr(newmsr);
+		vcpu->arch.shregs.msr = newmsr;
+		return 1;
+
+	case PPC_INST_TSR:
+		/* we know the MSR has the TS field = S (0b01) here */
+		msr = vcpu->arch.shregs.msr;
+		/* check for PR=1 and arch 2.06 bit set in PCR */
+		if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+			return 0;
+		/* check for TM disabled in the HFSCR or MSR */
+		if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM))
+			return 0;
+		/* L=1 => tresume => set TS to T (0b10) */
+		if (instr & (1 << 21))
+			vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+		/* Set CR0 to 0b0010 */
+		vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000;
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * This is called when we are returning to a guest in TM transactional
+ * state.  We roll the guest state back to the checkpointed state.
+ */
+void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.shregs.msr &= ~MSR_TS_MASK;	/* go to N state */
+	vcpu->arch.pc = vcpu->arch.tfhar;
+	copy_from_checkpoint(vcpu);
+	vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
+}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3ae752314b34..d3f304d06adf 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -277,15 +277,6 @@ static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
 	}
 }
 
-static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
-{
-	trace_kvm_unmap_hva(hva);
-
-	do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
-
-	return 0;
-}
-
 static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
 				  unsigned long end)
 {
@@ -1773,7 +1764,6 @@ static struct kvmppc_ops kvm_ops_pr = {
 	.flush_memslot = kvmppc_core_flush_memslot_pr,
 	.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
 	.commit_memory_region = kvmppc_core_commit_memory_region_pr,
-	.unmap_hva = kvm_unmap_hva_pr,
 	.unmap_hva_range = kvm_unmap_hva_range_pr,
 	.age_hva  = kvm_age_hva_pr,
 	.test_age_hva = kvm_test_age_hva_pr,
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 423b21393bc9..c878b4ffb86f 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -724,7 +724,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
 
 /************* MMU Notifiers *************/
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
 	trace_kvm_unmap_hva(hva);
 
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4d8b4d6cebff..fa888bfc347e 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -45,12 +45,6 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_PPC_BOOK3S
 	/* mtdec lowers the interrupt line when positive. */
 	kvmppc_core_dequeue_dec(vcpu);
-
-	/* POWER4+ triggers a dec interrupt if the value is < 0 */
-	if (vcpu->arch.dec & 0x80000000) {
-		kvmppc_core_queue_dec(vcpu);
-		return;
-	}
 #endif
 
 #ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 52c205373986..4e387647b5af 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -646,10 +646,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = hv_enabled;
 		break;
 #endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	case KVM_CAP_PPC_HTM:
 		r = hv_enabled &&
-		    (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
+		    (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
+		     cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
 		break;
+#endif
 	default:
 		r = 0;
 		break;
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 85785a370c0e..2f9a8829552b 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -254,21 +254,6 @@ TRACE_EVENT(kvm_exit,
 		)
 );
 
-TRACE_EVENT(kvm_unmap_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("unmap hva 0x%lx\n", __entry->hva)
-);
-
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 3c29c9009bbf..653901042ad7 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -22,9 +22,11 @@ ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
 extra-$(CONFIG_PPC64)	+= crtsavres.o
 endif
 
+obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
+			       memcpy_power7.o
+
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-	   copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
-	   memcpy_64.o memcmp_64.o pmem.o
+	   string_64.o memcpy_64.o memcmp_64.o pmem.o
 
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index 4bcc9e76fb55..8d5034f645f3 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -21,7 +21,9 @@ _GLOBAL_TOC(copy_page)
 BEGIN_FTR_SECTION
 	lis	r5,PAGE_SIZE@h
 FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
 	b	copypage_power7
+#endif
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 	ori	r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index ca5fc8fa7efc..8fa73b7ab20e 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -42,8 +42,6 @@ _GLOBAL(copypage_power7)
 	lis	r8,0x8000	/* GO=1 */
 	clrldi	r8,r8,32
 
-.machine push
-.machine "power4"
 	/* setup read stream 0  */
 	dcbt	0,r4,0b01000  	/* addr from */
 	dcbt	0,r7,0b01010   /* length and depth from */
@@ -52,7 +50,6 @@ _GLOBAL(copypage_power7)
 	dcbtst	0,r10,0b01010  /* length and depth to */
 	eieio
 	dcbt	0,r8,0b01010	/* all streams GO */
-.machine pop
 
 #ifdef CONFIG_ALTIVEC
 	mflr	r0
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index 08da06e1bd72..506677395681 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -20,11 +20,13 @@
 
 	.align	7
 _GLOBAL_TOC(__copy_tofrom_user)
+#ifdef CONFIG_PPC_BOOK3S_64
 BEGIN_FTR_SECTION
 	nop
 FTR_SECTION_ELSE
 	b	__copy_tofrom_user_power7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#endif
 _GLOBAL(__copy_tofrom_user_base)
 	/* first check for a whole page copy on a page boundary */
 	cmpldi	cr1,r5,16
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index d416a4a66578..215e4760c09f 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -312,8 +312,6 @@ err1;	stb	r0,0(r3)
 	lis	r8,0x8000	/* GO=1 */
 	clrldi	r8,r8,32
 
-.machine push
-.machine "power4"
 	/* setup read stream 0 */
 	dcbt	0,r6,0b01000   /* addr from */
 	dcbt	0,r7,0b01010   /* length and depth from */
@@ -322,7 +320,6 @@ err1;	stb	r0,0(r3)
 	dcbtst	0,r10,0b01010  /* length and depth to */
 	eieio
 	dcbt	0,r8,0b01010	/* all streams GO */
-.machine pop
 
 	beq	cr1,.Lunwind_stack_nonvmx_copy
 
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 73697c4e3468..35f80ab7cbd8 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
 		patch_instruction(dest + 2, instrs[2]);
 	}
 
-	printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+	printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+		(types == L1D_FLUSH_NONE)       ? "no" :
+		(types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+		(types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+							? "ori+mttrig type"
+							: "ori type" :
+		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+						: "unknown");
 }
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index f4d6088e2d53..8d8265be1a59 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -19,9 +19,11 @@ BEGIN_FTR_SECTION
 	std	r3,-STACKFRAMESIZE+STK_REG(R31)(r1)	/* save destination pointer for return value */
 #endif
 FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
 #ifndef SELFTEST
 	b	memcpy_power7
 #endif
+#endif
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 #ifdef __LITTLE_ENDIAN__
 	/* dumb little-endian memcpy that will get replaced at runtime */
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 193909abd18b..df7de9d3da08 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -259,15 +259,12 @@ _GLOBAL(memcpy_power7)
 	lis	r8,0x8000	/* GO=1 */
 	clrldi	r8,r8,32
 
-.machine push
-.machine "power4"
 	dcbt	0,r6,0b01000
 	dcbt	0,r7,0b01010
 	dcbtst	0,r9,0b01000
 	dcbtst	0,r10,0b01010
 	eieio
 	dcbt	0,r8,0b01010	/* GO */
-.machine pop
 
 	beq	cr1,.Lunwind_stack_nonvmx_copy
 
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 70274b7b4773..34d68f1b1b40 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -280,7 +280,7 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest,
  * Copy from userspace to a buffer, using the largest possible
  * aligned accesses, up to sizeof(long).
  */
-static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb,
 				       struct pt_regs *regs)
 {
 	int err = 0;
@@ -385,7 +385,7 @@ static nokprobe_inline int write_mem_aligned(unsigned long val,
  * Copy from a buffer to userspace, using the largest possible
  * aligned accesses, up to sizeof(long).
  */
-static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb,
 					struct pt_regs *regs)
 {
 	int err = 0;
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 849f50cd62f2..cf77d755246d 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
 	mtspr(SPRN_M_TW, __pa(pgd) - offset);
 
 	/* Update context */
-	mtspr(SPRN_M_CASID, id);
+	mtspr(SPRN_M_CASID, id - 1);
 	/* sync */
 	mb();
 }
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 697b70ad1195..7d0945bd3a61 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
 			return 1;
 		psize = get_slice_psize(mm, ea);
 		ssize = user_segment_size(ea);
-		vsid = get_vsid(mm->context.id, ea, ssize);
+		vsid = get_user_vsid(&mm->context, ea, ssize);
 		vsidkey = SLB_VSID_USER;
 		break;
 	case VMALLOC_REGION_ID:
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 866446cf2d9a..c01d627e687a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -297,7 +297,12 @@ static bool access_error(bool is_write, bool is_exec,
 
 	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
 		return true;
-
+	/*
+	 * We should ideally do the vma pkey access check here. But in the
+	 * fault path, handle_mm_fault() also does the same check. To avoid
+	 * these multiple checks, we skip it here and handle access error due
+	 * to pkeys later.
+	 */
 	return false;
 }
 
@@ -518,25 +523,16 @@ good_area:
 
 #ifdef CONFIG_PPC_MEM_KEYS
 	/*
-	 * if the HPTE is not hashed, hardware will not detect
-	 * a key fault. Lets check if we failed because of a
-	 * software detected key fault.
+	 * we skipped checking for access error due to key earlier.
+	 * Check that using handle_mm_fault error return.
 	 */
 	if (unlikely(fault & VM_FAULT_SIGSEGV) &&
-		!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
-			is_exec, 0)) {
-		/*
-		 * The PGD-PDT...PMD-PTE tree may not have been fully setup.
-		 * Hence we cannot walk the tree to locate the PTE, to locate
-		 * the key. Hence let's use vma_pkey() to get the key; instead
-		 * of get_mm_addr_key().
-		 */
+		!arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
+
 		int pkey = vma_pkey(vma);
 
-		if (likely(pkey)) {
-			up_read(&mm->mmap_sem);
-			return bad_key_fault_exception(regs, address, pkey);
-		}
+		up_read(&mm->mmap_sem);
+		return bad_key_fault_exception(regs, address, pkey);
 	}
 #endif /* CONFIG_PPC_MEM_KEYS */
 
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 656933c85925..1d049c78c82a 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -866,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local)
 	local_irq_restore(flags);
 }
 
-static int native_register_proc_table(unsigned long base, unsigned long page_size,
-				      unsigned long table_size)
-{
-	unsigned long patb1 = base << 25; /* VSID */
-
-	patb1 |= (page_size << 5);  /* sllp */
-	patb1 |= table_size;
-
-	partition_tb->patb1 = cpu_to_be64(patb1);
-	return 0;
-}
-
 void __init hpte_init_native(void)
 {
 	mmu_hash_ops.hpte_invalidate	= native_hpte_invalidate;
@@ -889,7 +877,4 @@ void __init hpte_init_native(void)
 	mmu_hash_ops.hpte_clear_all	= native_hpte_clear;
 	mmu_hash_ops.flush_hash_range = native_flush_hash_range;
 	mmu_hash_ops.hugepage_invalidate   = native_hugepage_invalidate;
-
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		register_process_table = native_register_proc_table;
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index cf290d415dcd..0bd3790d35df 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -132,9 +132,10 @@ EXPORT_SYMBOL(mmu_hash_ops);
  * is provided by the firmware.
  */
 
-/* Pre-POWER4 CPUs (4k pages only)
+/*
+ * Fallback (4k pages only)
  */
-static struct mmu_psize_def mmu_psize_defaults_old[] = {
+static struct mmu_psize_def mmu_psize_defaults[] = {
 	[MMU_PAGE_4K] = {
 		.shift	= 12,
 		.sllp	= 0,
@@ -554,8 +555,8 @@ static void __init htab_scan_page_sizes(void)
 	mmu_psize_set_default_penc();
 
 	/* Default to 4K pages only */
-	memcpy(mmu_psize_defs, mmu_psize_defaults_old,
-	       sizeof(mmu_psize_defaults_old));
+	memcpy(mmu_psize_defs, mmu_psize_defaults,
+	       sizeof(mmu_psize_defaults));
 
 	/*
 	 * Try to find the available page sizes in the device-tree
@@ -781,7 +782,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
 	}
 }
 
-int hash__create_section_mapping(unsigned long start, unsigned long end)
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
 	int rc = htab_bolt_mapping(start, end, __pa(start),
 				   pgprot_val(PAGE_KERNEL), mmu_linear_psize,
@@ -875,6 +876,12 @@ static void __init htab_initialize(void)
 		/* Using a hypervisor which owns the htab */
 		htab_address = NULL;
 		_SDR1 = 0; 
+		/*
+		 * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+		 * to inform the hypervisor that we wish to use the HPT.
+		 */
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
+			register_process_table(0, 0, 0);
 #ifdef CONFIG_FA_DUMP
 		/*
 		 * If firmware assisted dump is active firmware preserves
@@ -1110,19 +1117,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 #ifdef CONFIG_PPC_MM_SLICES
 static unsigned int get_paca_psize(unsigned long addr)
 {
-	u64 lpsizes;
-	unsigned char *hpsizes;
+	unsigned char *psizes;
 	unsigned long index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		lpsizes = get_paca()->mm_ctx_low_slices_psize;
+		psizes = get_paca()->mm_ctx_low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-		return (lpsizes >> (index * 4)) & 0xF;
+	} else {
+		psizes = get_paca()->mm_ctx_high_slices_psize;
+		index = GET_HIGH_SLICE_INDEX(addr);
 	}
-	hpsizes = get_paca()->mm_ctx_high_slices_psize;
-	index = GET_HIGH_SLICE_INDEX(addr);
 	mask_index = index & 0x1;
-	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+	return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
 }
 
 #else
@@ -1262,7 +1268,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 		}
 		psize = get_slice_psize(mm, ea);
 		ssize = user_segment_size(ea);
-		vsid = get_vsid(mm->context.id, ea, ssize);
+		vsid = get_user_vsid(&mm->context, ea, ssize);
 		break;
 	case VMALLOC_REGION_ID:
 		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
@@ -1527,7 +1533,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 
 	/* Get VSID */
 	ssize = user_segment_size(ea);
-	vsid = get_vsid(mm->context.id, ea, ssize);
+	vsid = get_user_vsid(&mm->context, ea, ssize);
 	if (!vsid)
 		return;
 	/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 3a08d211d2ee..f1153f8254e3 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -122,9 +122,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
 #define HUGEPD_PGD_SHIFT PGDIR_SHIFT
 #define HUGEPD_PUD_SHIFT PUD_SHIFT
-#else
-#define HUGEPD_PGD_SHIFT PUD_SHIFT
-#define HUGEPD_PUD_SHIFT PMD_SHIFT
 #endif
 
 /*
@@ -553,9 +550,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 	struct hstate *hstate = hstate_file(file);
 	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
 
+#ifdef CONFIG_PPC_RADIX_MMU
 	if (radix_enabled())
 		return radix__hugetlb_get_unmapped_area(file, addr, len,
 						       pgoff, flags);
+#endif
 	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
 }
 #endif
@@ -563,10 +562,12 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 {
 #ifdef CONFIG_PPC_MM_SLICES
-	unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
 	/* With radix we don't use slice, so derive it from vma*/
-	if (!radix_enabled())
+	if (!radix_enabled()) {
+		unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
 		return 1UL << mmu_psize_to_shift(psize);
+	}
 #endif
 	return vma_kernel_pagesize(vma);
 }
@@ -663,15 +664,26 @@ static int __init hugetlbpage_init(void)
 
 		shift = mmu_psize_to_shift(psize);
 
-		if (add_huge_page_size(1ULL << shift) < 0)
+#ifdef CONFIG_PPC_BOOK3S_64
+		if (shift > PGDIR_SHIFT)
 			continue;
-
+		else if (shift > PUD_SHIFT)
+			pdshift = PGDIR_SHIFT;
+		else if (shift > PMD_SHIFT)
+			pdshift = PUD_SHIFT;
+		else
+			pdshift = PMD_SHIFT;
+#else
 		if (shift < HUGEPD_PUD_SHIFT)
 			pdshift = PMD_SHIFT;
 		else if (shift < HUGEPD_PGD_SHIFT)
 			pdshift = PUD_SHIFT;
 		else
 			pdshift = PGDIR_SHIFT;
+#endif
+
+		if (add_huge_page_size(1ULL << shift) < 0)
+			continue;
 		/*
 		 * if we have pdshift and shift value same, we don't
 		 * use pgt cache for hugepd.
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6419b33ca309..3e59e5d64b01 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -88,18 +88,13 @@ void MMU_init(void);
 int __map_without_bats;
 int __map_without_ltlbs;
 
-/*
- * This tells the system to allow ioremapping memory marked as reserved.
- */
-int __allow_ioremap_reserved;
-
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
 /*
  * Check for command-line options that affect what MMU_init will do.
  */
-void __init MMU_setup(void)
+static void __init MMU_setup(void)
 {
 	/* Check for nobats option (used in mapin_ram). */
 	if (strstr(boot_command_line, "nobats")) {
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fdb424a29f03..51ce091914f9 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -68,12 +68,6 @@
 
 #include "mmu_decl.h"
 
-#ifdef CONFIG_PPC_BOOK3S_64
-#if H_PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
 phys_addr_t memstart_addr = ~0;
 EXPORT_SYMBOL_GPL(memstart_addr);
 phys_addr_t kernstart_addr;
@@ -372,7 +366,7 @@ static int __init parse_disable_radix(char *p)
 {
 	bool val;
 
-	if (strlen(p) == 0)
+	if (!p)
 		val = true;
 	else if (kstrtobool(p, &val))
 		return -EINVAL;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index fe8c61149fb8..737f8a4632cc 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -82,17 +82,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
 
 int page_is_ram(unsigned long pfn)
 {
-#ifndef CONFIG_PPC64	/* XXX for now */
-	return pfn < max_pfn;
-#else
-	unsigned long paddr = (pfn << PAGE_SHIFT);
-	struct memblock_region *reg;
-
-	for_each_memblock(memory, reg)
-		if (paddr >= reg->base && paddr < (reg->base + reg->size))
-			return 1;
-	return 0;
-#endif
+	return memblock_is_memory(__pfn_to_phys(pfn));
 }
 
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -117,7 +107,7 @@ int memory_add_physaddr_to_nid(u64 start)
 }
 #endif
 
-int __weak create_section_mapping(unsigned long start, unsigned long end)
+int __weak create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
 	return -ENODEV;
 }
@@ -127,7 +117,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
 	return -ENODEV;
 }
 
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 		bool want_memblock)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -137,7 +127,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 	resize_hpt_for_hotplug(memblock_phys_mem_size());
 
 	start = (unsigned long)__va(start);
-	rc = create_section_mapping(start, start + size);
+	rc = create_section_mapping(start, start + size, nid);
 	if (rc) {
 		pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
 			start, start + size, rc);
@@ -148,7 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
 	unsigned long start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -212,7 +202,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 EXPORT_SYMBOL_GPL(walk_system_ram_range);
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
 {
 	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	min_low_pfn = MEMORY_START >> PAGE_SHIFT;
@@ -224,7 +214,10 @@ void __init initmem_init(void)
 	 * memblock_regions
 	 */
 	memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+}
 
+void __init initmem_init(void)
+{
 	/* XXX need to clip this if using highmem? */
 	sparse_memory_present_with_active_regions(0);
 	sparse_init();
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index d503f344e476..b24ce40acd47 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -39,12 +39,12 @@
 #define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
@@ -76,9 +76,10 @@ static inline unsigned long stack_maxrandom_size(void)
 		return (1<<30);
 }
 
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+				      struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
 
 	/* Values close to RLIM_INFINITY can overflow. */
@@ -196,26 +197,28 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 }
 
 static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
-					unsigned long random_factor)
+					unsigned long random_factor,
+					struct rlimit *rlim_stack)
 {
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = radix__arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
 	}
 }
 #else
 /* dummy */
 extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
-					unsigned long random_factor);
+					unsigned long random_factor,
+					struct rlimit *rlim_stack);
 #endif
 /*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
@@ -223,16 +226,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		random_factor = arch_mmap_rnd();
 
 	if (radix_enabled())
-		return radix__arch_pick_mmap_layout(mm, random_factor);
+		return radix__arch_pick_mmap_layout(mm, random_factor,
+						    rlim_stack);
 	/*
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 3f980baade4c..b75194dff64c 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -94,13 +94,6 @@ static int hash__init_new_context(struct mm_struct *mm)
 		return index;
 
 	/*
-	 * In the case of exec, use the default limit,
-	 * otherwise inherit it from the mm we are duplicating.
-	 */
-	if (!mm->context.slb_addr_limit)
-		mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-
-	/*
 	 * The old code would re-promote on fork, we don't do that when using
 	 * slices as it could cause problem promoting slices that have been
 	 * forced down to 4K.
@@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm)
 	 * check against 0 is OK.
 	 */
 	if (mm->context.id == 0)
-		slice_set_user_psize(mm, mmu_virtual_psize);
+		slice_init_new_context_exec(mm);
 
 	subpage_prot_init_new_context(mm);
 
@@ -186,6 +179,19 @@ void __destroy_context(int context_id)
 }
 EXPORT_SYMBOL_GPL(__destroy_context);
 
+static void destroy_contexts(mm_context_t *ctx)
+{
+	int index, context_id;
+
+	spin_lock(&mmu_context_lock);
+	for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+		context_id = ctx->extended_id[index];
+		if (context_id)
+			ida_remove(&mmu_context_ida, context_id);
+	}
+	spin_unlock(&mmu_context_lock);
+}
+
 #ifdef CONFIG_PPC_64K_PAGES
 static void destroy_pagetable_page(struct mm_struct *mm)
 {
@@ -224,7 +230,7 @@ void destroy_context(struct mm_struct *mm)
 	else
 		subpage_prot_free(mm);
 	destroy_pagetable_page(mm);
-	__destroy_context(mm->context.id);
+	destroy_contexts(&mm->context);
 	mm->context.id = MMU_NO_CONTEXT;
 }
 
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 9a8a084e4aba..4c615fcb0cf0 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -75,8 +75,7 @@ EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
 /*
  * Taken from alloc_migrate_target with changes to remove CMA allocations
  */
-struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
-					int **resultp)
+struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
 {
 	gfp_t gfp_mask = GFP_USER;
 	struct page *new_page;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4554d6527682..be8f5c9d4d08 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
 {
 	pr_hard("initing context for mm @%p\n", mm);
 
+#ifdef	CONFIG_PPC_MM_SLICES
+	/*
+	 * We have MMU_NO_CONTEXT set to be ~0. Hence check
+	 * explicitly against context.id == 0. This ensures that we properly
+	 * initialize context slice details for newly allocated mm's (which will
+	 * have id == 0) and don't alter context slice inherited via fork (which
+	 * will have id != 0).
+	 */
+	if (mm->context.id == 0)
+		slice_init_new_context_exec(mm);
+#endif
 	mm->context.id = MMU_NO_CONTEXT;
 	mm->context.active = 0;
 	return 0;
@@ -428,8 +439,8 @@ void __init mmu_context_init(void)
 	 *      -- BenH
 	 */
 	if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
-		first_context = 0;
-		last_context = 15;
+		first_context = 1;
+		last_context = 16;
 		no_selective_tlbil = true;
 	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
 		first_context = 1;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 57fbc554c785..c4c0a09a7775 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -98,7 +98,6 @@ extern void setbat(int index, unsigned long virt, phys_addr_t phys,
 		   unsigned int size, pgprot_t prot);
 
 extern int __map_without_bats;
-extern int __allow_ioremap_reserved;
 extern unsigned int rtas_data, rtas_size;
 
 struct hash_pte;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index edd8d0bc9364..57a5029b4521 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -831,18 +831,13 @@ out:
 	of_node_put(rtas);
 }
 
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
 {
-	int nid, cpu;
-
-	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
-	max_pfn = max_low_pfn;
+	int cpu;
 
 	if (parse_numa_properties())
 		setup_nonnuma();
 
-	memblock_dump_all();
-
 	/*
 	 * Modify the set of possible NUMA nodes to reflect information
 	 * available about the set of online nodes, and the set of nodes
@@ -853,6 +848,23 @@ void __init initmem_init(void)
 
 	find_possible_nodes();
 
+	setup_node_to_cpumask_map();
+
+	reset_numa_cpu_lookup_table();
+
+	for_each_present_cpu(cpu)
+		numa_setup_cpu(cpu);
+}
+
+void __init initmem_init(void)
+{
+	int nid;
+
+	max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	max_pfn = max_low_pfn;
+
+	memblock_dump_all();
+
 	for_each_online_node(nid) {
 		unsigned long start_pfn, end_pfn;
 
@@ -863,10 +875,6 @@ void __init initmem_init(void)
 
 	sparse_init();
 
-	setup_node_to_cpumask_map();
-
-	reset_numa_cpu_lookup_table();
-
 	/*
 	 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
 	 * even before we online them, so that we can use cpu_to_{node,mem}
@@ -876,8 +884,6 @@ void __init initmem_init(void)
 	 */
 	cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
 				  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
-	for_each_present_cpu(cpu)
-		numa_setup_cpu(cpu);
 }
 
 static int __init early_numa(char *p)
@@ -1105,7 +1111,7 @@ static void setup_cpu_associativity_change_counters(void)
 	for_each_possible_cpu(cpu) {
 		int i;
 		u8 *counts = vphn_cpu_change_counts[cpu];
-		volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+		volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
 
 		for (i = 0; i < distance_ref_points_depth; i++)
 			counts[i] = hypervisor_counts[i];
@@ -1131,7 +1137,7 @@ static int update_cpu_associativity_changes_mask(void)
 	for_each_possible_cpu(cpu) {
 		int i, changed = 0;
 		u8 *counts = vphn_cpu_change_counts[cpu];
-		volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+		volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
 
 		for (i = 0; i < distance_ref_points_depth; i++) {
 			if (hypervisor_counts[i] != counts[i]) {
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 422e80253a33..518518fb7c45 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -155,15 +155,15 @@ void mmu_cleanup_all(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
+int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
 	if (radix_enabled())
-		return radix__create_section_mapping(start, end);
+		return radix__create_section_mapping(start, end, nid);
 
-	return hash__create_section_mapping(start, end);
+	return hash__create_section_mapping(start, end, nid);
 }
 
-int remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit remove_section_mapping(unsigned long start, unsigned long end)
 {
 	if (radix_enabled())
 		return radix__remove_section_mapping(start, end);
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 469808e77e58..199bfda5f0d9 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -24,6 +24,10 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/thp.h>
 
+#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * vmemmap is the starting address of the virtual address space where
@@ -320,7 +324,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 
 	if (!is_kernel_addr(addr)) {
 		ssize = user_segment_size(addr);
-		vsid = get_vsid(mm->context.id, addr, ssize);
+		vsid = get_user_vsid(&mm->context, addr, ssize);
 		WARN_ON(vsid == 0);
 	} else {
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 2e10a964e290..f1891e215e39 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
 	return 0;
 }
 
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+			unsigned long region_start, unsigned long region_end)
 {
+	unsigned long pa = 0;
 	void *pt;
 
-	pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+	if (region_start || region_end) /* has region hint */
+		pa = memblock_alloc_range(size, size, region_start, region_end,
+						MEMBLOCK_NONE);
+	else if (nid != -1) /* has node hint */
+		pa = memblock_alloc_base_nid(size, size,
+						MEMBLOCK_ALLOC_ANYWHERE,
+						nid, MEMBLOCK_NONE);
+
+	if (!pa)
+		pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+	BUG_ON(!pa);
+
+	pt = __va(pa);
 	memset(pt, 0, size);
 
 	return pt;
 }
 
-int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 			  pgprot_t flags,
-			  unsigned int map_page_size)
+			  unsigned int map_page_size,
+			  int nid,
+			  unsigned long region_start, unsigned long region_end)
 {
+	unsigned long pfn = pa >> PAGE_SHIFT;
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pgdp = pgd_offset_k(ea);
+	if (pgd_none(*pgdp)) {
+		pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
+						region_start, region_end);
+		pgd_populate(&init_mm, pgdp, pudp);
+	}
+	pudp = pud_offset(pgdp, ea);
+	if (map_page_size == PUD_SIZE) {
+		ptep = (pte_t *)pudp;
+		goto set_the_pte;
+	}
+	if (pud_none(*pudp)) {
+		pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
+						region_start, region_end);
+		pud_populate(&init_mm, pudp, pmdp);
+	}
+	pmdp = pmd_offset(pudp, ea);
+	if (map_page_size == PMD_SIZE) {
+		ptep = pmdp_ptep(pmdp);
+		goto set_the_pte;
+	}
+	if (!pmd_present(*pmdp)) {
+		ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+						region_start, region_end);
+		pmd_populate_kernel(&init_mm, pmdp, ptep);
+	}
+	ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+	smp_wmb();
+	return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size,
+			  int nid,
+			  unsigned long region_start, unsigned long region_end)
+{
+	unsigned long pfn = pa >> PAGE_SHIFT;
 	pgd_t *pgdp;
 	pud_t *pudp;
 	pmd_t *pmdp;
@@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 	 * Make sure task size is correct as per the max adddr
 	 */
 	BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
-	if (slab_is_available()) {
-		pgdp = pgd_offset_k(ea);
-		pudp = pud_alloc(&init_mm, pgdp, ea);
-		if (!pudp)
-			return -ENOMEM;
-		if (map_page_size == PUD_SIZE) {
-			ptep = (pte_t *)pudp;
-			goto set_the_pte;
-		}
-		pmdp = pmd_alloc(&init_mm, pudp, ea);
-		if (!pmdp)
-			return -ENOMEM;
-		if (map_page_size == PMD_SIZE) {
-			ptep = pmdp_ptep(pmdp);
-			goto set_the_pte;
-		}
-		ptep = pte_alloc_kernel(pmdp, ea);
-		if (!ptep)
-			return -ENOMEM;
-	} else {
-		pgdp = pgd_offset_k(ea);
-		if (pgd_none(*pgdp)) {
-			pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
-			BUG_ON(pudp == NULL);
-			pgd_populate(&init_mm, pgdp, pudp);
-		}
-		pudp = pud_offset(pgdp, ea);
-		if (map_page_size == PUD_SIZE) {
-			ptep = (pte_t *)pudp;
-			goto set_the_pte;
-		}
-		if (pud_none(*pudp)) {
-			pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
-			BUG_ON(pmdp == NULL);
-			pud_populate(&init_mm, pudp, pmdp);
-		}
-		pmdp = pmd_offset(pudp, ea);
-		if (map_page_size == PMD_SIZE) {
-			ptep = pmdp_ptep(pmdp);
-			goto set_the_pte;
-		}
-		if (!pmd_present(*pmdp)) {
-			ptep = early_alloc_pgtable(PAGE_SIZE);
-			BUG_ON(ptep == NULL);
-			pmd_populate_kernel(&init_mm, pmdp, ptep);
-		}
-		ptep = pte_offset_kernel(pmdp, ea);
+
+	if (unlikely(!slab_is_available()))
+		return early_map_kernel_page(ea, pa, flags, map_page_size,
+						nid, region_start, region_end);
+
+	/*
+	 * Should make page table allocation functions be able to take a
+	 * node, so we can place kernel page tables on the right nodes after
+	 * boot.
+	 */
+	pgdp = pgd_offset_k(ea);
+	pudp = pud_alloc(&init_mm, pgdp, ea);
+	if (!pudp)
+		return -ENOMEM;
+	if (map_page_size == PUD_SIZE) {
+		ptep = (pte_t *)pudp;
+		goto set_the_pte;
+	}
+	pmdp = pmd_alloc(&init_mm, pudp, ea);
+	if (!pmdp)
+		return -ENOMEM;
+	if (map_page_size == PMD_SIZE) {
+		ptep = pmdp_ptep(pmdp);
+		goto set_the_pte;
 	}
+	ptep = pte_alloc_kernel(pmdp, ea);
+	if (!ptep)
+		return -ENOMEM;
 
 set_the_pte:
-	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
 	smp_wmb();
 	return 0;
 }
 
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size)
+{
+	return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void radix__change_memory_range(unsigned long start, unsigned long end,
 				unsigned long clear)
@@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start,
 }
 
 static int __meminit create_physical_mapping(unsigned long start,
-					     unsigned long end)
+					     unsigned long end,
+					     int nid)
 {
 	unsigned long vaddr, addr, mapping_size = 0;
 	pgprot_t prot;
@@ -267,7 +323,7 @@ retry:
 		else
 			prot = PAGE_KERNEL;
 
-		rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+		rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
 		if (rc)
 			return rc;
 	}
@@ -276,7 +332,7 @@ retry:
 	return 0;
 }
 
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
 {
 	unsigned long rts_field;
 	struct memblock_region *reg;
@@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void)
 	/*
 	 * Create the linear mapping, using standard page size for now
 	 */
-	for_each_memblock(memory, reg)
+	for_each_memblock(memory, reg) {
+		/*
+		 * The memblock allocator  is up at this point, so the
+		 * page tables will be allocated within the range. No
+		 * need or a node (which we don't have yet).
+		 */
 		WARN_ON(create_physical_mapping(reg->base,
-						reg->base + reg->size));
+						reg->base + reg->size,
+						-1));
+	}
 
 	/* Find out how many PID bits are supported */
 	if (cpu_has_feature(CPU_FTR_HVMODE)) {
@@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void)
 	 * host.
 	 */
 	BUG_ON(PRTB_SIZE_SHIFT > 36);
-	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
 	/*
 	 * Fill in the process table.
 	 */
@@ -575,12 +638,8 @@ void __init radix__early_init_mmu(void)
 #ifdef CONFIG_PCI
 	pci_io_base = ISA_IO_BASE;
 #endif
-
-	/*
-	 * For now radix also use the same frag size
-	 */
-	__pte_frag_nr = H_PTE_FRAG_NR;
-	__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+	__pte_frag_nr = RADIX_PTE_FRAG_NR;
+	__pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
 
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		radix_init_native();
@@ -695,7 +754,7 @@ struct change_mapping_params {
 	unsigned long aligned_end;
 };
 
-static int stop_machine_change_mapping(void *data)
+static int __meminit stop_machine_change_mapping(void *data)
 {
 	struct change_mapping_params *params =
 			(struct change_mapping_params *)data;
@@ -705,8 +764,8 @@ static int stop_machine_change_mapping(void *data)
 
 	spin_unlock(&init_mm.page_table_lock);
 	pte_clear(&init_mm, params->aligned_start, params->pte);
-	create_physical_mapping(params->aligned_start, params->start);
-	create_physical_mapping(params->end, params->aligned_end);
+	create_physical_mapping(params->aligned_start, params->start, -1);
+	create_physical_mapping(params->end, params->aligned_end, -1);
 	spin_lock(&init_mm.page_table_lock);
 	return 0;
 }
@@ -742,7 +801,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
 /*
  * clear the pte and potentially split the mapping helper
  */
-static void split_kernel_mapping(unsigned long addr, unsigned long end,
+static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
 				unsigned long size, pte_t *pte)
 {
 	unsigned long mask = ~(size - 1);
@@ -835,7 +894,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
 	}
 }
 
-static void remove_pagetable(unsigned long start, unsigned long end)
+static void __meminit remove_pagetable(unsigned long start, unsigned long end)
 {
 	unsigned long addr, next;
 	pud_t *pud_base;
@@ -863,12 +922,12 @@ static void remove_pagetable(unsigned long start, unsigned long end)
 	radix__flush_tlb_kernel_range(start, end);
 }
 
-int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
-	return create_physical_mapping(start, end);
+	return create_physical_mapping(start, end, nid);
 }
 
-int radix__remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
 {
 	remove_pagetable(start, end);
 	return 0;
@@ -876,19 +935,30 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end)
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+				 pgprot_t flags, unsigned int map_page_size,
+				 int nid)
+{
+	return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
 int __meminit radix__vmemmap_create_mapping(unsigned long start,
 				      unsigned long page_size,
 				      unsigned long phys)
 {
 	/* Create a PTE encoding */
 	unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+	int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+	int ret;
+
+	ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
+	BUG_ON(ret);
 
-	BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
 	return 0;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
 {
 	remove_pagetable(start, start + page_size);
 }
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d35d9ad3c1cd..120a49bfb9c6 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -148,7 +148,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
 	 * mem_init() sets high_memory so only do the check after that.
 	 */
 	if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
-	    !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
+	    page_is_ram(__phys_to_pfn(p))) {
 		printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
 		       (unsigned long long)p, __builtin_return_address(0));
 		return NULL;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index adf469f312f2..9bf659d5078c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -57,11 +57,6 @@
 
 #include "mmu_decl.h"
 
-#ifdef CONFIG_PPC_BOOK3S_64
-#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
-#error TASK_SIZE_USER64 exceeds user VSID range
-#endif
-#endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
 /*
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index ba71c5481f42..0eafdf01edc7 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -119,18 +119,15 @@ int pkey_initialize(void)
 #else
 	os_reserved = 0;
 #endif
+	initial_allocation_mask = ~0x0;
+	pkey_amr_uamor_mask = ~0x0ul;
+	pkey_iamr_mask = ~0x0ul;
 	/*
-	 * Bits are in LE format. NOTE: 1, 0 are reserved.
+	 * key 0, 1 are reserved.
 	 * key 0 is the default key, which allows read/write/execute.
 	 * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
 	 * programming note.
 	 */
-	initial_allocation_mask = ~0x0;
-
-	/* register mask is in BE format */
-	pkey_amr_uamor_mask = ~0x0ul;
-	pkey_iamr_mask = ~0x0ul;
-
 	for (i = 2; i < (pkeys_total - os_reserved); i++) {
 		initial_allocation_mask &= ~(0x1 << i);
 		pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
@@ -308,9 +305,9 @@ void thread_pkey_regs_init(struct thread_struct *thread)
 	if (static_branch_likely(&pkey_disabled))
 		return;
 
-	write_amr(read_amr() & pkey_amr_uamor_mask);
-	write_iamr(read_iamr() & pkey_iamr_mask);
-	write_uamor(read_uamor() & pkey_amr_uamor_mask);
+	thread->amr = read_amr() & pkey_amr_uamor_mask;
+	thread->iamr = read_iamr() & pkey_iamr_mask;
+	thread->uamor = read_uamor() & pkey_amr_uamor_mask;
 }
 
 static inline bool pkey_allows_readwrite(int pkey)
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 13cfe413b40d..66577cc66dc9 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -22,6 +22,7 @@
 #include <asm/cacheflush.h>
 #include <asm/smp.h>
 #include <linux/compiler.h>
+#include <linux/context_tracking.h>
 #include <linux/mm_types.h>
 
 #include <asm/udbg.h>
@@ -340,3 +341,110 @@ void slb_initialize(void)
 
 	asm volatile("isync":::"memory");
 }
+
+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
+			     int bpsize, int ssize)
+{
+	unsigned long flags, vsid_data, esid_data;
+	enum slb_index index;
+	int slb_cache_index;
+
+	/*
+	 * We are irq disabled, hence should be safe to access PACA.
+	 */
+	index = get_paca()->stab_rr;
+
+	/*
+	 * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+	 */
+	if (index < (mmu_slb_size - 1))
+		index++;
+	else
+		index = SLB_NUM_BOLTED;
+
+	get_paca()->stab_rr = index;
+
+	flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+	vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
+		    ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
+	esid_data = mk_esid_data(ea, ssize, index);
+
+	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
+		     : "memory");
+
+	/*
+	 * Now update slb cache entries
+	 */
+	slb_cache_index = get_paca()->slb_cache_ptr;
+	if (slb_cache_index < SLB_CACHE_ENTRIES) {
+		/*
+		 * We have space in slb cache for optimized switch_slb().
+		 * Top 36 bits from esid_data as per ISA
+		 */
+		get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
+		get_paca()->slb_cache_ptr++;
+	} else {
+		/*
+		 * Our cache is full and the current cache content strictly
+		 * doesn't indicate the active SLB conents. Bump the ptr
+		 * so that switch_slb() will ignore the cache.
+		 */
+		get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+	}
+}
+
+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long vsid;
+	int bpsize;
+
+	/*
+	 * We are always above 1TB, hence use high user segment size.
+	 */
+	vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
+	bpsize = get_slice_psize(mm, ea);
+	insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+}
+
+void slb_miss_large_addr(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+	unsigned long ea = regs->dar;
+	int context;
+
+	if (REGION_ID(ea) != USER_REGION_ID)
+		goto slb_bad_addr;
+
+	/*
+	 * Are we beyound what the page table layout supports ?
+	 */
+	if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+		goto slb_bad_addr;
+
+	/* Lower address should have been handled by asm code */
+	if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
+		goto slb_bad_addr;
+
+	/*
+	 * consider this as bad access if we take a SLB miss
+	 * on an address above addr limit.
+	 */
+	if (ea >= current->mm->context.slb_addr_limit)
+		goto slb_bad_addr;
+
+	context = get_ea_context(&current->mm->context, ea);
+	if (!context)
+		goto slb_bad_addr;
+
+	handle_multi_context_slb_miss(context, ea);
+	exception_exit(prev_state);
+	return;
+
+slb_bad_addr:
+	if (user_mode(regs))
+		_exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+	else
+		bad_page_fault(regs, ea, SIGSEGV);
+	exception_exit(prev_state);
+}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 2cf5ef3fc50d..a83fbd2a4a24 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
  */
 _GLOBAL(slb_allocate)
 	/*
-	 * check for bad kernel/user address
-	 * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+	 * Check if the address falls within the range of the first context, or
+	 * if we may need to handle multi context. For the first context we
+	 * allocate the slb entry via the fast path below. For large address we
+	 * branch out to C-code and see if additional contexts have been
+	 * allocated.
+	 * The test here is:
+	 *   (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
 	 */
-	rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
+	rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
 	bne-	8f
 
 	srdi	r9,r3,60		/* get region */
@@ -200,10 +205,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 5:
 	/*
 	 * Handle lpsizes
-	 * r9 is get_paca()->context.low_slices_psize, r11 is index
+	 * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
 	 */
-	ld	r9,PACALOWSLICESPSIZE(r13)
-	mr	r11,r10
+	srdi    r11,r10,1 /* index */
+	addi	r9,r11,PACALOWSLICESPSIZE
+	lbzx	r9,r13,r9		/* r9 is lpsizes[r11] */
+	rldicl	r11,r10,0,63		/* r11 = r10 & 0x1 */
 6:
 	sldi	r11,r11,2  /* index * 4 */
 	/* Extract the psize and multiply to get an array offset */
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 23ec2c5e3b78..205fe557ca10 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -35,34 +35,28 @@
 #include <asm/mmu.h>
 #include <asm/copro.h>
 #include <asm/hugetlb.h>
+#include <asm/mmu_context.h>
 
 static DEFINE_SPINLOCK(slice_convert_lock);
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
-	u64 low_slices;
-	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
 
 #ifdef DEBUG
 int _slice_debug = 1;
 
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
 {
 	if (!_slice_debug)
 		return;
-	pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
-	pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+	pr_devel("%s low_slice: %*pbl\n", label,
+			(int)SLICE_NUM_LOW, &mask->low_slices);
+	pr_devel("%s high_slice: %*pbl\n", label,
+			(int)SLICE_NUM_HIGH, mask->high_slices);
 }
 
 #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
 
 #else
 
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
 #define slice_dbg(fmt...)
 
 #endif
@@ -73,10 +67,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
 	unsigned long end = start + len - 1;
 
 	ret->low_slices = 0;
-	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+	if (SLICE_NUM_HIGH)
+		bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	if (start < SLICE_LOW_TOP) {
-		unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
+		unsigned long mend = min(end,
+					 (unsigned long)(SLICE_LOW_TOP - 1));
 
 		ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
 			- (1u << GET_LOW_SLICE_INDEX(start));
@@ -113,11 +109,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
 	unsigned long start = slice << SLICE_HIGH_SHIFT;
 	unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
 
+#ifdef CONFIG_PPC64
 	/* Hack, so that each addresses is controlled by exactly one
 	 * of the high or low area bitmaps, the first high area starts
 	 * at 4GB, not 0 */
 	if (start == 0)
 		start = SLICE_LOW_TOP;
+#endif
 
 	return !slice_area_is_free(mm, start, end - start);
 }
@@ -128,7 +126,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
 	unsigned long i;
 
 	ret->low_slices = 0;
-	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+	if (SLICE_NUM_HIGH)
+		bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
 	for (i = 0; i < SLICE_NUM_LOW; i++)
 		if (!slice_low_has_vma(mm, i))
@@ -142,53 +141,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
 			__set_bit(i, ret->high_slices);
 }
 
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
-				unsigned long high_limit)
+#ifdef CONFIG_PPC_BOOK3S_64
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
 {
-	unsigned char *hpsizes;
-	int index, mask_index;
-	unsigned long i;
-	u64 lpsizes;
-
-	ret->low_slices = 0;
-	bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+#ifdef CONFIG_PPC_64K_PAGES
+	if (psize == MMU_PAGE_64K)
+		return &mm->context.mask_64k;
+#endif
+	if (psize == MMU_PAGE_4K)
+		return &mm->context.mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+	if (psize == MMU_PAGE_16M)
+		return &mm->context.mask_16m;
+	if (psize == MMU_PAGE_16G)
+		return &mm->context.mask_16g;
+#endif
+	BUG();
+}
+#elif defined(CONFIG_PPC_8xx)
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+	if (psize == mmu_virtual_psize)
+		return &mm->context.mask_base_psize;
+#ifdef CONFIG_HUGETLB_PAGE
+	if (psize == MMU_PAGE_512K)
+		return &mm->context.mask_512k;
+	if (psize == MMU_PAGE_8M)
+		return &mm->context.mask_8m;
+#endif
+	BUG();
+}
+#else
+#error "Must define the slice masks for page sizes supported by the platform"
+#endif
 
-	lpsizes = mm->context.low_slices_psize;
-	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (((lpsizes >> (i * 4)) & 0xf) == psize)
-			ret->low_slices |= 1u << i;
+static bool slice_check_range_fits(struct mm_struct *mm,
+			   const struct slice_mask *available,
+			   unsigned long start, unsigned long len)
+{
+	unsigned long end = start + len - 1;
+	u64 low_slices = 0;
 
-	if (high_limit <= SLICE_LOW_TOP)
-		return;
+	if (start < SLICE_LOW_TOP) {
+		unsigned long mend = min(end,
+					 (unsigned long)(SLICE_LOW_TOP - 1));
 
-	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
-		mask_index = i & 0x1;
-		index = i >> 1;
-		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
-			__set_bit(i, ret->high_slices);
+		low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+				- (1u << GET_LOW_SLICE_INDEX(start));
 	}
-}
+	if ((low_slices & available->low_slices) != low_slices)
+		return false;
 
-static int slice_check_fit(struct mm_struct *mm,
-			   struct slice_mask mask, struct slice_mask available)
-{
-	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-	/*
-	 * Make sure we just do bit compare only to the max
-	 * addr limit and not the full bit map size.
-	 */
-	unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
+	if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) {
+		unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+		unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+		unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+		unsigned long i;
 
-	bitmap_and(result, mask.high_slices,
-		   available.high_slices, slice_count);
+		for (i = start_index; i < start_index + count; i++) {
+			if (!test_bit(i, available->high_slices))
+				return false;
+		}
+	}
 
-	return (mask.low_slices & available.low_slices) == mask.low_slices &&
-		bitmap_equal(result, mask.high_slices, slice_count);
+	return true;
 }
 
 static void slice_flush_segments(void *parm)
 {
+#ifdef CONFIG_PPC64
 	struct mm_struct *mm = parm;
 	unsigned long flags;
 
@@ -200,40 +221,64 @@ static void slice_flush_segments(void *parm)
 	local_irq_save(flags);
 	slb_flush_and_rebolt();
 	local_irq_restore(flags);
+#endif
 }
 
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+				const struct slice_mask *mask, int psize)
 {
 	int index, mask_index;
 	/* Write the new slice psize bits */
-	unsigned char *hpsizes;
-	u64 lpsizes;
+	unsigned char *hpsizes, *lpsizes;
+	struct slice_mask *psize_mask, *old_mask;
 	unsigned long i, flags;
+	int old_psize;
 
 	slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
 	slice_print_mask(" mask", mask);
 
+	psize_mask = slice_mask_for_size(mm, psize);
+
 	/* We need to use a spinlock here to protect against
 	 * concurrent 64k -> 4k demotion ...
 	 */
 	spin_lock_irqsave(&slice_convert_lock, flags);
 
 	lpsizes = mm->context.low_slices_psize;
-	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (mask.low_slices & (1u << i))
-			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
+	for (i = 0; i < SLICE_NUM_LOW; i++) {
+		if (!(mask->low_slices & (1u << i)))
+			continue;
+
+		mask_index = i & 0x1;
+		index = i >> 1;
 
-	/* Assign the value back */
-	mm->context.low_slices_psize = lpsizes;
+		/* Update the slice_mask */
+		old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
+		old_mask = slice_mask_for_size(mm, old_psize);
+		old_mask->low_slices &= ~(1u << i);
+		psize_mask->low_slices |= 1u << i;
+
+		/* Update the sizes array */
+		lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
+				(((unsigned long)psize) << (mask_index * 4));
+	}
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+		if (!test_bit(i, mask->high_slices))
+			continue;
+
 		mask_index = i & 0x1;
 		index = i >> 1;
-		if (test_bit(i, mask.high_slices))
-			hpsizes[index] = (hpsizes[index] &
-					  ~(0xf << (mask_index * 4))) |
+
+		/* Update the slice_mask */
+		old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
+		old_mask = slice_mask_for_size(mm, old_psize);
+		__clear_bit(i, old_mask->high_slices);
+		__set_bit(i, psize_mask->high_slices);
+
+		/* Update the sizes array */
+		hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
 				(((unsigned long)psize) << (mask_index * 4));
 	}
 
@@ -254,26 +299,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
  * 'available' slice_mark.
  */
 static bool slice_scan_available(unsigned long addr,
-				 struct slice_mask available,
-				 int end,
-				 unsigned long *boundary_addr)
+				 const struct slice_mask *available,
+				 int end, unsigned long *boundary_addr)
 {
 	unsigned long slice;
 	if (addr < SLICE_LOW_TOP) {
 		slice = GET_LOW_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
-		return !!(available.low_slices & (1u << slice));
+		return !!(available->low_slices & (1u << slice));
 	} else {
 		slice = GET_HIGH_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) ?
 			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-		return !!test_bit(slice, available.high_slices);
+		return !!test_bit(slice, available->high_slices);
 	}
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 					      unsigned long len,
-					      struct slice_mask available,
+					      const struct slice_mask *available,
 					      int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -319,7 +363,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 					     unsigned long len,
-					     struct slice_mask available,
+					     const struct slice_mask *available,
 					     int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -377,7 +421,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 
 
 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
-				     struct slice_mask mask, int psize,
+				     const struct slice_mask *mask, int psize,
 				     int topdown, unsigned long high_limit)
 {
 	if (topdown)
@@ -386,23 +430,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 		return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
 }
 
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_copy_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
 {
-	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
-	dst->low_slices |= src->low_slices;
-	bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
-	bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+	dst->low_slices = src->low_slices;
+	if (!SLICE_NUM_HIGH)
+		return;
+	bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
 }
 
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+					const struct slice_mask *src1,
+					const struct slice_mask *src2)
 {
-	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
-	dst->low_slices &= ~src->low_slices;
+	dst->low_slices = src1->low_slices | src2->low_slices;
+	if (!SLICE_NUM_HIGH)
+		return;
+	bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
 
-	bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
-	bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+static inline void slice_andnot_mask(struct slice_mask *dst,
+					const struct slice_mask *src1,
+					const struct slice_mask *src2)
+{
+	dst->low_slices = src1->low_slices & ~src2->low_slices;
+	if (!SLICE_NUM_HIGH)
+		return;
+	bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
@@ -415,10 +469,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 				      unsigned long flags, unsigned int psize,
 				      int topdown)
 {
-	struct slice_mask mask;
 	struct slice_mask good_mask;
 	struct slice_mask potential_mask;
-	struct slice_mask compat_mask;
+	const struct slice_mask *maskp;
+	const struct slice_mask *compat_maskp = NULL;
 	int fixed = (flags & MAP_FIXED);
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
 	unsigned long page_size = 1UL << pshift;
@@ -442,23 +496,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	}
 
 	if (high_limit > mm->context.slb_addr_limit) {
+		/*
+		 * Increasing the slb_addr_limit does not require
+		 * slice mask cache to be recalculated because it should
+		 * be already initialised beyond the old address limit.
+		 */
 		mm->context.slb_addr_limit = high_limit;
+
 		on_each_cpu(slice_flush_segments, mm, 1);
 	}
 
-	/*
-	 * init different masks
-	 */
-	mask.low_slices = 0;
-	bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
-
-	/* silence stupid warning */;
-	potential_mask.low_slices = 0;
-	bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
-
-	compat_mask.low_slices = 0;
-	bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
-
 	/* Sanity checks */
 	BUG_ON(mm->task_size == 0);
 	BUG_ON(mm->context.slb_addr_limit == 0);
@@ -481,8 +528,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* First make up a "good" mask of slices that have the right size
 	 * already
 	 */
-	slice_mask_for_size(mm, psize, &good_mask, high_limit);
-	slice_print_mask(" good_mask", good_mask);
+	maskp = slice_mask_for_size(mm, psize);
 
 	/*
 	 * Here "good" means slices that are already the right page size,
@@ -503,40 +549,47 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 *	search in good | compat | free, found => convert free.
 	 */
 
-#ifdef CONFIG_PPC_64K_PAGES
-	/* If we support combo pages, we can allow 64k pages in 4k slices */
-	if (psize == MMU_PAGE_64K) {
-		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
+	/*
+	 * If we support combo pages, we can allow 64k pages in 4k slices
+	 * The mask copies could be avoided in most cases here if we had
+	 * a pointer to good mask for the next code to use.
+	 */
+	if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+		compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
 		if (fixed)
-			slice_or_mask(&good_mask, &compat_mask);
+			slice_or_mask(&good_mask, maskp, compat_maskp);
+		else
+			slice_copy_mask(&good_mask, maskp);
+	} else {
+		slice_copy_mask(&good_mask, maskp);
 	}
-#endif
+
+	slice_print_mask(" good_mask", &good_mask);
+	if (compat_maskp)
+		slice_print_mask(" compat_mask", compat_maskp);
 
 	/* First check hint if it's valid or if we have MAP_FIXED */
 	if (addr != 0 || fixed) {
-		/* Build a mask for the requested range */
-		slice_range_to_mask(addr, len, &mask);
-		slice_print_mask(" mask", mask);
-
 		/* Check if we fit in the good mask. If we do, we just return,
 		 * nothing else to do
 		 */
-		if (slice_check_fit(mm, mask, good_mask)) {
+		if (slice_check_range_fits(mm, &good_mask, addr, len)) {
 			slice_dbg(" fits good !\n");
-			return addr;
+			newaddr = addr;
+			goto return_addr;
 		}
 	} else {
 		/* Now let's see if we can find something in the existing
 		 * slices for that size
 		 */
-		newaddr = slice_find_area(mm, len, good_mask,
+		newaddr = slice_find_area(mm, len, &good_mask,
 					  psize, topdown, high_limit);
 		if (newaddr != -ENOMEM) {
 			/* Found within the good mask, we don't have to setup,
 			 * we thus return directly
 			 */
 			slice_dbg(" found area at 0x%lx\n", newaddr);
-			return newaddr;
+			goto return_addr;
 		}
 	}
 	/*
@@ -544,12 +597,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * empty and thus can be converted
 	 */
 	slice_mask_for_free(mm, &potential_mask, high_limit);
-	slice_or_mask(&potential_mask, &good_mask);
-	slice_print_mask(" potential", potential_mask);
+	slice_or_mask(&potential_mask, &potential_mask, &good_mask);
+	slice_print_mask(" potential", &potential_mask);
 
-	if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
-		slice_dbg(" fits potential !\n");
-		goto convert;
+	if (addr != 0 || fixed) {
+		if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
+			slice_dbg(" fits potential !\n");
+			newaddr = addr;
+			goto convert;
+		}
 	}
 
 	/* If we have MAP_FIXED and failed the above steps, then error out */
@@ -562,46 +618,64 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * anywhere in the good area.
 	 */
 	if (addr) {
-		addr = slice_find_area(mm, len, good_mask,
-				       psize, topdown, high_limit);
-		if (addr != -ENOMEM) {
-			slice_dbg(" found area at 0x%lx\n", addr);
-			return addr;
+		newaddr = slice_find_area(mm, len, &good_mask,
+					  psize, topdown, high_limit);
+		if (newaddr != -ENOMEM) {
+			slice_dbg(" found area at 0x%lx\n", newaddr);
+			goto return_addr;
 		}
 	}
 
 	/* Now let's see if we can find something in the existing slices
 	 * for that size plus free slices
 	 */
-	addr = slice_find_area(mm, len, potential_mask,
-			       psize, topdown, high_limit);
+	newaddr = slice_find_area(mm, len, &potential_mask,
+				  psize, topdown, high_limit);
 
 #ifdef CONFIG_PPC_64K_PAGES
-	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
+	if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
-		slice_or_mask(&potential_mask, &compat_mask);
-		addr = slice_find_area(mm, len, potential_mask,
-				       psize, topdown, high_limit);
+		slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
+		newaddr = slice_find_area(mm, len, &potential_mask,
+					  psize, topdown, high_limit);
 	}
 #endif
 
-	if (addr == -ENOMEM)
+	if (newaddr == -ENOMEM)
 		return -ENOMEM;
 
-	slice_range_to_mask(addr, len, &mask);
-	slice_dbg(" found potential area at 0x%lx\n", addr);
-	slice_print_mask(" mask", mask);
+	slice_range_to_mask(newaddr, len, &potential_mask);
+	slice_dbg(" found potential area at 0x%lx\n", newaddr);
+	slice_print_mask(" mask", &potential_mask);
 
  convert:
-	slice_andnot_mask(&mask, &good_mask);
-	slice_andnot_mask(&mask, &compat_mask);
-	if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
-		slice_convert(mm, mask, psize);
+	/*
+	 * Try to allocate the context before we do slice convert
+	 * so that we handle the context allocation failure gracefully.
+	 */
+	if (need_extra_context(mm, newaddr)) {
+		if (alloc_extended_context(mm, newaddr) < 0)
+			return -ENOMEM;
+	}
+
+	slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+	if (compat_maskp && !fixed)
+		slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+	if (potential_mask.low_slices ||
+		(SLICE_NUM_HIGH &&
+		 !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+		slice_convert(mm, &potential_mask, psize);
 		if (psize > MMU_PAGE_BASE)
 			on_each_cpu(slice_flush_segments, mm, 1);
 	}
-	return addr;
+	return newaddr;
 
+return_addr:
+	if (need_extra_context(mm, newaddr)) {
+		if (alloc_extended_context(mm, newaddr) < 0)
+			return -ENOMEM;
+	}
+	return newaddr;
 }
 EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
 
@@ -627,94 +701,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 
 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
-	unsigned char *hpsizes;
+	unsigned char *psizes;
 	int index, mask_index;
 
-	/*
-	 * Radix doesn't use slice, but can get enabled along with MMU_SLICE
-	 */
-	if (radix_enabled()) {
-#ifdef CONFIG_PPC_64K_PAGES
-		return MMU_PAGE_64K;
-#else
-		return MMU_PAGE_4K;
-#endif
-	}
+	VM_BUG_ON(radix_enabled());
+
 	if (addr < SLICE_LOW_TOP) {
-		u64 lpsizes;
-		lpsizes = mm->context.low_slices_psize;
+		psizes = mm->context.low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
-		return (lpsizes >> (index * 4)) & 0xf;
+	} else {
+		psizes = mm->context.high_slices_psize;
+		index = GET_HIGH_SLICE_INDEX(addr);
 	}
-	hpsizes = mm->context.high_slices_psize;
-	index = GET_HIGH_SLICE_INDEX(addr);
 	mask_index = index & 0x1;
-	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+	return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
 }
 EXPORT_SYMBOL_GPL(get_slice_psize);
 
-/*
- * This is called by hash_page when it needs to do a lazy conversion of
- * an address space from real 64K pages to combo 4K pages (typically
- * when hitting a non cacheable mapping on a processor or hypervisor
- * that won't allow them for 64K pages).
- *
- * This is also called in init_new_context() to change back the user
- * psize from whatever the parent context had it set to
- * N.B. This may be called before mm->context.id has been set.
- *
- * This function will only change the content of the {low,high)_slice_psize
- * masks, it will not flush SLBs as this shall be handled lazily by the
- * caller.
- */
-void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+void slice_init_new_context_exec(struct mm_struct *mm)
 {
-	int index, mask_index;
-	unsigned char *hpsizes;
-	unsigned long flags, lpsizes;
-	unsigned int old_psize;
-	int i;
+	unsigned char *hpsizes, *lpsizes;
+	struct slice_mask *mask;
+	unsigned int psize = mmu_virtual_psize;
 
-	slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+	slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
 
-	VM_BUG_ON(radix_enabled());
-	spin_lock_irqsave(&slice_convert_lock, flags);
-
-	old_psize = mm->context.user_psize;
-	slice_dbg(" old_psize=%d\n", old_psize);
-	if (old_psize == psize)
-		goto bail;
+	/*
+	 * In the case of exec, use the default limit. In the
+	 * case of fork it is just inherited from the mm being
+	 * duplicated.
+	 */
+#ifdef CONFIG_PPC64
+	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#else
+	mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
 
 	mm->context.user_psize = psize;
-	wmb();
 
+	/*
+	 * Set all slice psizes to the default.
+	 */
 	lpsizes = mm->context.low_slices_psize;
-	for (i = 0; i < SLICE_NUM_LOW; i++)
-		if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
-			lpsizes = (lpsizes & ~(0xful << (i * 4))) |
-				(((unsigned long)psize) << (i * 4));
-	/* Assign the value back */
-	mm->context.low_slices_psize = lpsizes;
+	memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
 
 	hpsizes = mm->context.high_slices_psize;
-	for (i = 0; i < SLICE_NUM_HIGH; i++) {
-		mask_index = i & 0x1;
-		index = i >> 1;
-		if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
-			hpsizes[index] = (hpsizes[index] &
-					  ~(0xf << (mask_index * 4))) |
-				(((unsigned long)psize) << (mask_index * 4));
-	}
-
-
-
-
-	slice_dbg(" lsps=%lx, hsps=%lx\n",
-		  (unsigned long)mm->context.low_slices_psize,
-		  (unsigned long)mm->context.high_slices_psize);
+	memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
 
- bail:
-	spin_unlock_irqrestore(&slice_convert_lock, flags);
+	/*
+	 * Slice mask cache starts zeroed, fill the default size cache.
+	 */
+	mask = slice_mask_for_size(mm, psize);
+	mask->low_slices = ~0UL;
+	if (SLICE_NUM_HIGH)
+		bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
 }
 
 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
@@ -725,7 +765,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 	VM_BUG_ON(radix_enabled());
 
 	slice_range_to_mask(start, len, &mask);
-	slice_convert(mm, mask, psize);
+	slice_convert(mm, &mask, psize);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -748,33 +788,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
  * for now as we only use slices with hugetlbfs enabled. This should
  * be fixed as the generic code gets fixed.
  */
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 			   unsigned long len)
 {
-	struct slice_mask mask, available;
+	const struct slice_mask *maskp;
 	unsigned int psize = mm->context.user_psize;
-	unsigned long high_limit = mm->context.slb_addr_limit;
 
-	if (radix_enabled())
-		return 0;
+	VM_BUG_ON(radix_enabled());
 
-	slice_range_to_mask(addr, len, &mask);
-	slice_mask_for_size(mm, psize, &available, high_limit);
+	maskp = slice_mask_for_size(mm, psize);
 #ifdef CONFIG_PPC_64K_PAGES
 	/* We need to account for 4k slices too */
 	if (psize == MMU_PAGE_64K) {
-		struct slice_mask compat_mask;
-		slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
-		slice_or_mask(&available, &compat_mask);
+		const struct slice_mask *compat_maskp;
+		struct slice_mask available;
+
+		compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+		slice_or_mask(&available, maskp, compat_maskp);
+		return !slice_check_range_fits(mm, &available, addr, len);
 	}
 #endif
 
-#if 0 /* too verbose */
-	slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
-		 mm, addr, len);
-	slice_print_mask(" mask", mask);
-	slice_print_mask(" available", available);
-#endif
-	return !slice_check_fit(mm, mask, available);
+	return !slice_check_range_fits(mm, maskp, addr, len);
 }
 #endif
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index a07f5372a4bf..a5d7309c2d05 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -33,13 +33,12 @@ static inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
 {
 	unsigned long rb;
 	unsigned long rs;
-	unsigned int r = 1; /* radix format */
 
 	rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
 	rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
 
-	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
-		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r)
+	asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
+		     : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
 		     : "memory");
 }
 
@@ -98,7 +97,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
 	rb |= set << PPC_BITLSHIFT(51);
 	rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
-	r = 1;   /* raidx format */
+	r = 1;   /* radix format */
 
 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -112,7 +111,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
 	rb = PPC_BIT(53); /* IS = 1 */
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
-	r = 1;   /* raidx format */
+	r = 1;   /* radix format */
 
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -128,7 +127,7 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
-	r = 1;   /* raidx format */
+	r = 1;   /* radix format */
 
 	asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -144,7 +143,7 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
 	rb |= ap << PPC_BITLSHIFT(58);
 	rs = pid << PPC_BITLSHIFT(31);
 	prs = 1; /* process scoped */
-	r = 1;   /* raidx format */
+	r = 1;   /* radix format */
 
 	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -668,7 +667,7 @@ void radix__flush_tlb_all(void)
 
 	rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
 	prs = 0; /* partition scoped */
-	r = 1;   /* raidx format */
+	r = 1;   /* radix format */
 	rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
 
 	asm volatile("ptesync": : :"memory");
@@ -706,7 +705,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 {
-	unsigned int pid = mm->context.id;
+	unsigned long pid = mm->context.id;
 
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		return;
@@ -734,7 +733,7 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 		for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
 			if (sib == cpu)
 				continue;
-			if (paca[sib].kvm_hstate.kvm_vcpu)
+			if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
 				flush = true;
 		}
 		if (flush)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 9b23f12e863c..87d71dd25441 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 	/* Build full vaddr */
 	if (!is_kernel_addr(addr)) {
 		ssize = user_segment_size(addr);
-		vsid = get_vsid(mm->context.id, addr, ssize);
+		vsid = get_user_vsid(&mm->context, addr, ssize);
 	} else {
 		vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
 		ssize = mmu_kernel_ssize;
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 44d67b167e0b..2668cc414e4e 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -208,7 +208,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
 	/* Create cached_info and set spu_info[spu->number] to point to it.
 	 * spu->number is a system-wide value, not a per-node value.
 	 */
-	info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		printk(KERN_ERR "SPU_PROF: "
 		       "%s, line %d: create vma_map failed\n",
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
index c579b16845da..f40e37316dd6 100644
--- a/arch/powerpc/oprofile/cell/vma_map.c
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -69,8 +69,8 @@ vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
 	    unsigned int size, unsigned int offset, unsigned int guard_ptr,
 	    unsigned int guard_val)
 {
-	struct vma_to_fileoffset_map *new =
-		kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
+	struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL);
+
 	if (!new) {
 		printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
 		       __func__, __LINE__);
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 57ebc655d2ac..82986d2acd9b 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -4,7 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 obj-$(CONFIG_PERF_EVENTS)	+= callchain.o perf_regs.o
 
 obj-$(CONFIG_PPC_PERF_CTRS)	+= core-book3s.o bhrb.o
-obj64-$(CONFIG_PPC_PERF_CTRS)	+= power4-pmu.o ppc970-pmu.o power5-pmu.o \
+obj64-$(CONFIG_PPC_PERF_CTRS)	+= ppc970-pmu.o power5-pmu.o \
 				   power5+-pmu.o power6-pmu.o power7-pmu.o \
 				   isa207-common.o power8-pmu.o power9-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index f8908ea4ea73..3f66fcf8ad99 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -198,6 +198,10 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 
 	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
 		*addrp = mfspr(SPRN_SDAR);
+
+	if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+		is_kernel_addr(mfspr(SPRN_SDAR)))
+		*addrp = 0;
 }
 
 static bool regs_sihv(struct pt_regs *regs)
@@ -457,6 +461,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
 				/* invalid entry */
 				continue;
 
+			/*
+			 * BHRB rolling buffer could very much contain the kernel
+			 * addresses at this point. Check the privileges before
+			 * exporting it to userspace (avoid exposure of regions
+			 * where we could have speculative execution)
+			 */
+			if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+				is_kernel_addr(addr))
+				continue;
+
 			/* Branches are read most recent first (ie. mfbhrb 0 is
 			 * the most recent branch).
 			 * There are two types of valid entries:
@@ -1226,6 +1240,7 @@ static void power_pmu_disable(struct pmu *pmu)
 		 */
 		write_mmcr0(cpuhw, val);
 		mb();
+		isync();
 
 		/*
 		 * Disable instruction sampling if it was enabled
@@ -1234,12 +1249,26 @@ static void power_pmu_disable(struct pmu *pmu)
 			mtspr(SPRN_MMCRA,
 			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
 			mb();
+			isync();
 		}
 
 		cpuhw->disabled = 1;
 		cpuhw->n_added = 0;
 
 		ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+		/*
+		 * These are readable by userspace, may contain kernel
+		 * addresses and are not switched by context switch, so clear
+		 * them now to avoid leaking anything to userspace in general
+		 * including to another process.
+		 */
+		if (ppmu->flags & PPMU_ARCH_207S) {
+			mtspr(SPRN_SDAR, 0);
+			mtspr(SPRN_SIAR, 0);
+		}
+#endif
 	}
 
 	local_irq_restore(flags);
@@ -1810,6 +1839,18 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
+static bool is_event_blacklisted(u64 ev)
+{
+	int i;
+
+	for (i=0; i < ppmu->n_blacklist_ev; i++) {
+		if (ppmu->blacklist_ev[i] == ev)
+			return true;
+	}
+
+	return false;
+}
+
 static int power_pmu_event_init(struct perf_event *event)
 {
 	u64 ev;
@@ -1835,15 +1876,24 @@ static int power_pmu_event_init(struct perf_event *event)
 		ev = event->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
 			return -EOPNOTSUPP;
+
+		if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+			return -EINVAL;
 		ev = ppmu->generic_events[ev];
 		break;
 	case PERF_TYPE_HW_CACHE:
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		if (err)
 			return err;
+
+		if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+			return -EINVAL;
 		break;
 	case PERF_TYPE_RAW:
 		ev = event->attr.config;
+
+		if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+			return -EINVAL;
 		break;
 	default:
 		return -ENOENT;
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
deleted file mode 100644
index ce6072fa481b..000000000000
--- a/arch/powerpc/perf/power4-pmu.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
- *
- * Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/perf_event.h>
-#include <linux/string.h>
-#include <asm/reg.h>
-#include <asm/cputable.h>
-
-/*
- * Bits in event code for POWER4
- */
-#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
-#define PM_PMC_MSK	0xf
-#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
-#define PM_UNIT_MSK	0xf
-#define PM_LOWER_SH	6
-#define PM_LOWER_MSK	1
-#define PM_LOWER_MSKS	0x40
-#define PM_BYTE_SH	4	/* Byte number of event bus to use */
-#define PM_BYTE_MSK	3
-#define PM_PMCSEL_MSK	7
-
-/*
- * Unit code values
- */
-#define PM_FPU		1
-#define PM_ISU1		2
-#define PM_IFU		3
-#define PM_IDU0		4
-#define PM_ISU1_ALT	6
-#define PM_ISU2		7
-#define PM_IFU_ALT	8
-#define PM_LSU0		9
-#define PM_LSU1		0xc
-#define PM_GPS		0xf
-
-/*
- * Bits in MMCR0 for POWER4
- */
-#define MMCR0_PMC1SEL_SH	8
-#define MMCR0_PMC2SEL_SH	1
-#define MMCR_PMCSEL_MSK		0x1f
-
-/*
- * Bits in MMCR1 for POWER4
- */
-#define MMCR1_TTM0SEL_SH	62
-#define MMCR1_TTC0SEL_SH	61
-#define MMCR1_TTM1SEL_SH	59
-#define MMCR1_TTC1SEL_SH	58
-#define MMCR1_TTM2SEL_SH	56
-#define MMCR1_TTC2SEL_SH	55
-#define MMCR1_TTM3SEL_SH	53
-#define MMCR1_TTC3SEL_SH	52
-#define MMCR1_TTMSEL_MSK	3
-#define MMCR1_TD_CP_DBG0SEL_SH	50
-#define MMCR1_TD_CP_DBG1SEL_SH	48
-#define MMCR1_TD_CP_DBG2SEL_SH	46
-#define MMCR1_TD_CP_DBG3SEL_SH	44
-#define MMCR1_DEBUG0SEL_SH	43
-#define MMCR1_DEBUG1SEL_SH	42
-#define MMCR1_DEBUG2SEL_SH	41
-#define MMCR1_DEBUG3SEL_SH	40
-#define MMCR1_PMC1_ADDER_SEL_SH	39
-#define MMCR1_PMC2_ADDER_SEL_SH	38
-#define MMCR1_PMC6_ADDER_SEL_SH	37
-#define MMCR1_PMC5_ADDER_SEL_SH	36
-#define MMCR1_PMC8_ADDER_SEL_SH	35
-#define MMCR1_PMC7_ADDER_SEL_SH	34
-#define MMCR1_PMC3_ADDER_SEL_SH	33
-#define MMCR1_PMC4_ADDER_SEL_SH	32
-#define MMCR1_PMC3SEL_SH	27
-#define MMCR1_PMC4SEL_SH	22
-#define MMCR1_PMC5SEL_SH	17
-#define MMCR1_PMC6SEL_SH	12
-#define MMCR1_PMC7SEL_SH	7
-#define MMCR1_PMC8SEL_SH	2	/* note bit 0 is in MMCRA for GP */
-
-static short mmcr1_adder_bits[8] = {
-	MMCR1_PMC1_ADDER_SEL_SH,
-	MMCR1_PMC2_ADDER_SEL_SH,
-	MMCR1_PMC3_ADDER_SEL_SH,
-	MMCR1_PMC4_ADDER_SEL_SH,
-	MMCR1_PMC5_ADDER_SEL_SH,
-	MMCR1_PMC6_ADDER_SEL_SH,
-	MMCR1_PMC7_ADDER_SEL_SH,
-	MMCR1_PMC8_ADDER_SEL_SH
-};
-
-/*
- * Bits in MMCRA
- */
-#define MMCRA_PMC8SEL0_SH	17	/* PMC8SEL bit 0 for GP */
-
-/*
- * Layout of constraint bits:
- * 6666555555555544444444443333333333222222222211111111110000000000
- * 3210987654321098765432109876543210987654321098765432109876543210
- *        |[  >[  >[   >|||[  >[  ><  ><  ><  ><  ><><><><><><><><>
- *        | UC1 UC2 UC3 ||| PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
- * 	  \SMPL	        ||\TTC3SEL
- * 		        |\TTC_IFU_SEL
- * 		        \TTM2SEL0
- *
- * SMPL - SAMPLE_ENABLE constraint
- *     56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
- *
- * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
- *     55: UC1 error 0x0080_0000_0000_0000
- *     54: FPU events needed 0x0040_0000_0000_0000
- *     53: ISU1 events needed 0x0020_0000_0000_0000
- *     52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
- *
- * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
- *     51: UC2 error 0x0008_0000_0000_0000
- *     50: FPU events needed 0x0004_0000_0000_0000
- *     49: IFU events needed 0x0002_0000_0000_0000
- *     48: LSU0 events needed 0x0001_0000_0000_0000
- *
- * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
- *     47: UC3 error 0x8000_0000_0000
- *     46: LSU0 events needed 0x4000_0000_0000
- *     45: IFU events needed 0x2000_0000_0000
- *     44: IDU0|ISU2 events needed 0x1000_0000_0000
- *     43: ISU1 events needed 0x0800_0000_0000
- *
- * TTM2SEL0
- *     42: 0 = IDU0 events needed
- *     	   1 = ISU2 events needed 0x0400_0000_0000
- *
- * TTC_IFU_SEL
- *     41: 0 = IFU.U events needed
- *     	   1 = IFU.L events needed 0x0200_0000_0000
- *
- * TTC3SEL
- *     40: 0 = LSU1.U events needed
- *     	   1 = LSU1.L events needed 0x0100_0000_0000
- *
- * PS1
- *     39: PS1 error 0x0080_0000_0000
- *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
- *
- * PS2
- *     35: PS2 error 0x0008_0000_0000
- *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
- *
- * B0
- *     28-31: Byte 0 event source 0xf000_0000
- *     	   1 = FPU
- * 	   2 = ISU1
- * 	   3 = IFU
- * 	   4 = IDU0
- * 	   7 = ISU2
- * 	   9 = LSU0
- * 	   c = LSU1
- * 	   f = GPS
- *
- * B1, B2, B3
- *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
- *
- * P8
- *     15: P8 error 0x8000
- *     14-15: Count of events needing PMC8
- *
- * P1..P7
- *     0-13: Count of events needing PMC1..PMC7
- *
- * Note: this doesn't allow events using IFU.U to be combined with events
- * using IFU.L, though that is feasible (using TTM0 and TTM2).  However
- * there are no listed events for IFU.L (they are debug events not
- * verified for performance monitoring) so this shouldn't cause a
- * problem.
- */
-
-static struct unitinfo {
-	unsigned long	value, mask;
-	int		unit;
-	int		lowerbit;
-} p4_unitinfo[16] = {
-	[PM_FPU]  = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
-	[PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
-	[PM_ISU1_ALT] =
-		    { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
-	[PM_IFU]  = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
-	[PM_IFU_ALT] =
-		    { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
-	[PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
-	[PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
-	[PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
-	[PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
-	[PM_GPS]  = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
-};
-
-static unsigned char direct_marked_event[8] = {
-	(1<<2) | (1<<3),	/* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
-	(1<<3) | (1<<5),	/* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
-	(1<<3),			/* PMC3: PM_MRK_ST_CMPL_INT */
-	(1<<4) | (1<<5),	/* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
-	(1<<4) | (1<<5),	/* PMC5: PM_MRK_GRP_TIMEO */
-	(1<<3) | (1<<4) | (1<<5),
-		/* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
-	(1<<4) | (1<<5),	/* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
-	(1<<4),			/* PMC8: PM_MRK_LSU_FIN */
-};
-
-/*
- * Returns 1 if event counts things relating to marked instructions
- * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
- */
-static int p4_marked_instr_event(u64 event)
-{
-	int pmc, psel, unit, byte, bit;
-	unsigned int mask;
-
-	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
-	psel = event & PM_PMCSEL_MSK;
-	if (pmc) {
-		if (direct_marked_event[pmc - 1] & (1 << psel))
-			return 1;
-		if (psel == 0)		/* add events */
-			bit = (pmc <= 4)? pmc - 1: 8 - pmc;
-		else if (psel == 6)	/* decode events */
-			bit = 4;
-		else
-			return 0;
-	} else
-		bit = psel;
-
-	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
-	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
-	mask = 0;
-	switch (unit) {
-	case PM_LSU1:
-		if (event & PM_LOWER_MSKS)
-			mask = 1 << 28;		/* byte 7 bit 4 */
-		else
-			mask = 6 << 24;		/* byte 3 bits 1 and 2 */
-		break;
-	case PM_LSU0:
-		/* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
-		mask = 0x083dff00;
-	}
-	return (mask >> (byte * 8 + bit)) & 1;
-}
-
-static int p4_get_constraint(u64 event, unsigned long *maskp,
-			     unsigned long *valp)
-{
-	int pmc, byte, unit, lower, sh;
-	unsigned long mask = 0, value = 0;
-	int grp = -1;
-
-	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
-	if (pmc) {
-		if (pmc > 8)
-			return -1;
-		sh = (pmc - 1) * 2;
-		mask |= 2 << sh;
-		value |= 1 << sh;
-		grp = ((pmc - 1) >> 1) & 1;
-	}
-	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
-	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
-	if (unit) {
-		lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
-
-		/*
-		 * Bus events on bytes 0 and 2 can be counted
-		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
-		 */
-		if (!pmc)
-			grp = byte & 1;
-
-		if (!p4_unitinfo[unit].unit)
-			return -1;
-		mask  |= p4_unitinfo[unit].mask;
-		value |= p4_unitinfo[unit].value;
-		sh = p4_unitinfo[unit].lowerbit;
-		if (sh > 1)
-			value |= (unsigned long)lower << sh;
-		else if (lower != sh)
-			return -1;
-		unit = p4_unitinfo[unit].unit;
-
-		/* Set byte lane select field */
-		mask  |= 0xfULL << (28 - 4 * byte);
-		value |= (unsigned long)unit << (28 - 4 * byte);
-	}
-	if (grp == 0) {
-		/* increment PMC1/2/5/6 field */
-		mask  |= 0x8000000000ull;
-		value |= 0x1000000000ull;
-	} else {
-		/* increment PMC3/4/7/8 field */
-		mask  |= 0x800000000ull;
-		value |= 0x100000000ull;
-	}
-
-	/* Marked instruction events need sample_enable set */
-	if (p4_marked_instr_event(event)) {
-		mask  |= 1ull << 56;
-		value |= 1ull << 56;
-	}
-
-	/* PMCSEL=6 decode events on byte 2 need sample_enable clear */
-	if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
-		mask  |= 1ull << 56;
-
-	*maskp = mask;
-	*valp = value;
-	return 0;
-}
-
-static unsigned int ppc_inst_cmpl[] = {
-	0x1001, 0x4001, 0x6001, 0x7001, 0x8001
-};
-
-static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
-{
-	int i, j, na;
-
-	alt[0] = event;
-	na = 1;
-
-	/* 2 possibilities for PM_GRP_DISP_REJECT */
-	if (event == 0x8003 || event == 0x0224) {
-		alt[1] = event ^ (0x8003 ^ 0x0224);
-		return 2;
-	}
-
-	/* 2 possibilities for PM_ST_MISS_L1 */
-	if (event == 0x0c13 || event == 0x0c23) {
-		alt[1] = event ^ (0x0c13 ^ 0x0c23);
-		return 2;
-	}
-
-	/* several possibilities for PM_INST_CMPL */
-	for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
-		if (event == ppc_inst_cmpl[i]) {
-			for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
-				if (j != i)
-					alt[na++] = ppc_inst_cmpl[j];
-			break;
-		}
-	}
-
-	return na;
-}
-
-static int p4_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
-{
-	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
-	unsigned int pmc, unit, byte, psel, lower;
-	unsigned int ttm, grp;
-	unsigned int pmc_inuse = 0;
-	unsigned int pmc_grp_use[2];
-	unsigned char busbyte[4];
-	unsigned char unituse[16];
-	unsigned int unitlower = 0;
-	int i;
-
-	if (n_ev > 8)
-		return -1;
-
-	/* First pass to count resource use */
-	pmc_grp_use[0] = pmc_grp_use[1] = 0;
-	memset(busbyte, 0, sizeof(busbyte));
-	memset(unituse, 0, sizeof(unituse));
-	for (i = 0; i < n_ev; ++i) {
-		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
-		if (pmc) {
-			if (pmc_inuse & (1 << (pmc - 1)))
-				return -1;
-			pmc_inuse |= 1 << (pmc - 1);
-			/* count 1/2/5/6 vs 3/4/7/8 use */
-			++pmc_grp_use[((pmc - 1) >> 1) & 1];
-		}
-		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
-		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
-		lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
-		if (unit) {
-			if (!pmc)
-				++pmc_grp_use[byte & 1];
-			if (unit == 6 || unit == 8)
-				/* map alt ISU1/IFU codes: 6->2, 8->3 */
-				unit = (unit >> 1) - 1;
-			if (busbyte[byte] && busbyte[byte] != unit)
-				return -1;
-			busbyte[byte] = unit;
-			lower <<= unit;
-			if (unituse[unit] && lower != (unitlower & lower))
-				return -1;
-			unituse[unit] = 1;
-			unitlower |= lower;
-		}
-	}
-	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
-		return -1;
-
-	/*
-	 * Assign resources and set multiplexer selects.
-	 *
-	 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
-	 * Each TTMx can only select one unit, but since
-	 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
-	 * we have some choices.
-	 */
-	if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
-		unituse[6] = 1;		/* Move 2 to 6 */
-		unituse[2] = 0;
-	}
-	if (unituse[3] & (unituse[1] | unituse[2])) {
-		unituse[8] = 1;		/* Move 3 to 8 */
-		unituse[3] = 0;
-		unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
-	}
-	/* Check only one unit per TTMx */
-	if (unituse[1] + unituse[2] + unituse[3] > 1 ||
-	    unituse[4] + unituse[6] + unituse[7] > 1 ||
-	    unituse[8] + unituse[9] > 1 ||
-	    (unituse[5] | unituse[10] | unituse[11] |
-	     unituse[13] | unituse[14]))
-		return -1;
-
-	/* Set TTMxSEL fields.  Note, units 1-3 => TTM0SEL codes 0-2 */
-	mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
-		<< MMCR1_TTM0SEL_SH;
-	mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
-		<< MMCR1_TTM1SEL_SH;
-	mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
-
-	/* Set TTCxSEL fields. */
-	if (unitlower & 0xe)
-		mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
-	if (unitlower & 0xf0)
-		mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
-	if (unitlower & 0xf00)
-		mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
-	if (unitlower & 0x7000)
-		mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
-
-	/* Set byte lane select fields. */
-	for (byte = 0; byte < 4; ++byte) {
-		unit = busbyte[byte];
-		if (!unit)
-			continue;
-		if (unit == 0xf) {
-			/* special case for GPS */
-			mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
-		} else {
-			if (!unituse[unit])
-				ttm = unit - 1;		/* 2->1, 3->2 */
-			else
-				ttm = unit >> 2;
-			mmcr1 |= (unsigned long)ttm
-				<< (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
-		}
-	}
-
-	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
-	for (i = 0; i < n_ev; ++i) {
-		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
-		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
-		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
-		psel = event[i] & PM_PMCSEL_MSK;
-		if (!pmc) {
-			/* Bus event or 00xxx direct event (off or cycles) */
-			if (unit)
-				psel |= 0x10 | ((byte & 2) << 2);
-			for (pmc = 0; pmc < 8; ++pmc) {
-				if (pmc_inuse & (1 << pmc))
-					continue;
-				grp = (pmc >> 1) & 1;
-				if (unit) {
-					if (grp == (byte & 1))
-						break;
-				} else if (pmc_grp_use[grp] < 4) {
-					++pmc_grp_use[grp];
-					break;
-				}
-			}
-			pmc_inuse |= 1 << pmc;
-		} else {
-			/* Direct event */
-			--pmc;
-			if (psel == 0 && (byte & 2))
-				/* add events on higher-numbered bus */
-				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
-			else if (psel == 6 && byte == 3)
-				/* seem to need to set sample_enable here */
-				mmcra |= MMCRA_SAMPLE_ENABLE;
-			psel |= 8;
-		}
-		if (pmc <= 1)
-			mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
-		else
-			mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
-		if (pmc == 7)	/* PMC8 */
-			mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
-		hwc[i] = pmc;
-		if (p4_marked_instr_event(event[i]))
-			mmcra |= MMCRA_SAMPLE_ENABLE;
-	}
-
-	if (pmc_inuse & 1)
-		mmcr0 |= MMCR0_PMC1CE;
-	if (pmc_inuse & 0xfe)
-		mmcr0 |= MMCR0_PMCjCE;
-
-	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
-
-	/* Return MMCRx values */
-	mmcr[0] = mmcr0;
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcra;
-	return 0;
-}
-
-static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
-{
-	/*
-	 * Setting the PMCxSEL field to 0 disables PMC x.
-	 * (Note that pmc is 0-based here, not 1-based.)
-	 */
-	if (pmc <= 1) {
-		mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
-	} else {
-		mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
-		if (pmc == 7)
-			mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
-	}
-}
-
-static int p4_generic_events[] = {
-	[PERF_COUNT_HW_CPU_CYCLES]		= 7,
-	[PERF_COUNT_HW_INSTRUCTIONS]		= 0x1001,
-	[PERF_COUNT_HW_CACHE_REFERENCES]	= 0x8c10, /* PM_LD_REF_L1 */
-	[PERF_COUNT_HW_CACHE_MISSES]		= 0x3c10, /* PM_LD_MISS_L1 */
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x330,  /* PM_BR_ISSUED */
-	[PERF_COUNT_HW_BRANCH_MISSES]		= 0x331,  /* PM_BR_MPRED_CR */
-};
-
-#define C(x)	PERF_COUNT_HW_CACHE_##x
-
-/*
- * Table of generalized cache-related events.
- * 0 means not supported, -1 means nonsensical, other values
- * are event codes.
- */
-static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
-	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0x8c10,		0x3c10	},
-		[C(OP_WRITE)] = {	0x7c10,		0xc13	},
-		[C(OP_PREFETCH)] = {	0xc35,		0	},
-	},
-	[C(L1I)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0,		0	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	0,		0	},
-	},
-	[C(LL)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0,		0	},
-		[C(OP_WRITE)] = {	0,		0	},
-		[C(OP_PREFETCH)] = {	0xc34,		0	},
-	},
-	[C(DTLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0,		0x904	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
-	[C(ITLB)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0,		0x900	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
-	[C(BPU)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	0x330,		0x331	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
-	[C(NODE)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
-		[C(OP_READ)] = {	-1,		-1	},
-		[C(OP_WRITE)] = {	-1,		-1	},
-		[C(OP_PREFETCH)] = {	-1,		-1	},
-	},
-};
-
-static struct power_pmu power4_pmu = {
-	.name			= "POWER4/4+",
-	.n_counter		= 8,
-	.max_alternatives	= 5,
-	.add_fields		= 0x0000001100005555ul,
-	.test_adder		= 0x0011083300000000ul,
-	.compute_mmcr		= p4_compute_mmcr,
-	.get_constraint		= p4_get_constraint,
-	.get_alternatives	= p4_get_alternatives,
-	.disable_pmc		= p4_disable_pmc,
-	.n_generic		= ARRAY_SIZE(p4_generic_events),
-	.generic_events		= p4_generic_events,
-	.cache_events		= &power4_cache_events,
-	.flags			= PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
-};
-
-static int __init init_power4_pmu(void)
-{
-	if (!cur_cpu_spec->oprofile_cpu_type ||
-	    strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
-		return -ENODEV;
-
-	return register_power_pmu(&power4_pmu);
-}
-
-early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index e99c6bf4d391..7de344b7d9cc 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -69,3 +69,31 @@ EVENT(PM_BR_CMPL_ALT,				0x10012)
 EVENT(PM_BR_2PATH,				0x20036)
 /* ALternate branch event that are not strongly biased */
 EVENT(PM_BR_2PATH_ALT,				0x40036)
+
+/* Blacklisted events */
+EVENT(PM_MRK_ST_DONE_L2,			0x10134)
+EVENT(PM_RADIX_PWC_L1_HIT,			0x1f056)
+EVENT(PM_FLOP_CMPL,				0x100f4)
+EVENT(PM_MRK_NTF_FIN,				0x20112)
+EVENT(PM_RADIX_PWC_L2_HIT,			0x2d024)
+EVENT(PM_IFETCH_THROTTLE,			0x3405e)
+EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER,		0x3e15c)
+EVENT(PM_RADIX_PWC_L3_HIT,			0x3f056)
+EVENT(PM_RUN_CYC_SMT2_MODE,			0x3006c)
+EVENT(PM_TM_TX_PASS_RUN_INST,			0x4e014)
+EVENT(PM_DISP_HELD_SYNC_HOLD,			0x4003c)
+EVENT(PM_DTLB_MISS_16G,				0x1c058)
+EVENT(PM_DERAT_MISS_2M,				0x1c05a)
+EVENT(PM_DTLB_MISS_2M,				0x1c05c)
+EVENT(PM_MRK_DTLB_MISS_1G,			0x1d15c)
+EVENT(PM_DTLB_MISS_4K,				0x2c056)
+EVENT(PM_DERAT_MISS_1G,				0x2c05a)
+EVENT(PM_MRK_DERAT_MISS_2M,			0x2d152)
+EVENT(PM_MRK_DTLB_MISS_4K,			0x2d156)
+EVENT(PM_MRK_DTLB_MISS_16G,			0x2d15e)
+EVENT(PM_DTLB_MISS_64K,				0x3c056)
+EVENT(PM_MRK_DERAT_MISS_1G,			0x3d152)
+EVENT(PM_MRK_DTLB_MISS_64K,			0x3d156)
+EVENT(PM_DTLB_MISS_16M,				0x4c056)
+EVENT(PM_DTLB_MISS_1G,				0x4c05a)
+EVENT(PM_MRK_DTLB_MISS_16M,			0x4c15e)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 24b5b5b7a206..2ca0b33b4efb 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -101,9 +101,45 @@ enum {
 #define POWER9_MMCRA_IFM2		0x0000000080000000UL
 #define POWER9_MMCRA_IFM3		0x00000000C0000000UL
 
+/* Nasty Power9 specific hack */
+#define PVR_POWER9_CUMULUS		0x00002000
+
 /* PowerISA v2.07 format attribute structure*/
 extern struct attribute_group isa207_pmu_format_group;
 
+int p9_dd21_bl_ev[] = {
+	PM_MRK_ST_DONE_L2,
+	PM_RADIX_PWC_L1_HIT,
+	PM_FLOP_CMPL,
+	PM_MRK_NTF_FIN,
+	PM_RADIX_PWC_L2_HIT,
+	PM_IFETCH_THROTTLE,
+	PM_MRK_L2_TM_ST_ABORT_SISTER,
+	PM_RADIX_PWC_L3_HIT,
+	PM_RUN_CYC_SMT2_MODE,
+	PM_TM_TX_PASS_RUN_INST,
+	PM_DISP_HELD_SYNC_HOLD,
+};
+
+int p9_dd22_bl_ev[] = {
+	PM_DTLB_MISS_16G,
+	PM_DERAT_MISS_2M,
+	PM_DTLB_MISS_2M,
+	PM_MRK_DTLB_MISS_1G,
+	PM_DTLB_MISS_4K,
+	PM_DERAT_MISS_1G,
+	PM_MRK_DERAT_MISS_2M,
+	PM_MRK_DTLB_MISS_4K,
+	PM_MRK_DTLB_MISS_16G,
+	PM_DTLB_MISS_64K,
+	PM_MRK_DERAT_MISS_1G,
+	PM_MRK_DTLB_MISS_64K,
+	PM_DISP_HELD_SYNC_HOLD,
+	PM_DTLB_MISS_16M,
+	PM_DTLB_MISS_1G,
+	PM_MRK_DTLB_MISS_16M,
+};
+
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power9_event_alternatives[][MAX_ALT] = {
 	{ PM_INST_DISP,			PM_INST_DISP_ALT },
@@ -446,12 +482,24 @@ static struct power_pmu power9_pmu = {
 static int __init init_power9_pmu(void)
 {
 	int rc = 0;
+	unsigned int pvr = mfspr(SPRN_PVR);
 
 	/* Comes from cpu_specs[] */
 	if (!cur_cpu_spec->oprofile_cpu_type ||
 	    strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
 		return -ENODEV;
 
+	/* Blacklist events */
+	if (!(pvr & PVR_POWER9_CUMULUS)) {
+		if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) {
+			power9_pmu.blacklist_ev = p9_dd21_bl_ev;
+			power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev);
+		} else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) {
+			power9_pmu.blacklist_ev = p9_dd22_bl_ev;
+			power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev);
+		}
+	}
+
 	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
 		/*
 		 * Since PM_INST_CMPL may not provide right counts in all
diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c
index d50417e23add..96aaae678928 100644
--- a/arch/powerpc/platforms/4xx/msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -223,7 +223,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
 
 	dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
 
-	msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
+	msi = kzalloc(sizeof(*msi), GFP_KERNEL);
 	if (!msi) {
 		dev_err(&dev->dev, "No memory for MSI structure\n");
 		return -ENOMEM;
@@ -241,7 +241,8 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
 	if (!msi_irqs)
 		return -ENODEV;
 
-	if (ppc4xx_setup_pcieh_hw(dev, res, msi))
+	err = ppc4xx_setup_pcieh_hw(dev, res, msi);
+	if (err)
 		goto error_out;
 
 	err = ppc4xx_msi_init_allocator(dev, msi);
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 85d9e37f5ccb..69d9f60d9fe5 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -339,7 +339,7 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
 		if (IS_ERR_VALUE(offset))
 			continue;
 
-		ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL);
+		ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
 		if (!ocm_blk) {
 			printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block");
 			rh_free(ocm_reg->rh, offset);
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index f51fd35f4618..7e966f4cf19a 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -147,7 +147,7 @@ static void qoriq_cpu_kill(unsigned int cpu)
 	for (i = 0; i < 500; i++) {
 		if (is_cpu_dead(cpu)) {
 #ifdef CONFIG_PPC64
-			paca[cpu].cpu_start = 0;
+			paca_ptrs[cpu]->cpu_start = 0;
 #endif
 			return;
 		}
@@ -328,7 +328,7 @@ static int smp_85xx_kick_cpu(int nr)
 		return ret;
 
 done:
-	paca[nr].cpu_start = 1;
+	paca_ptrs[nr]->cpu_start = 1;
 	generic_set_cpu_up(nr);
 
 	return ret;
@@ -409,14 +409,14 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
 	}
 
 	if (disable_threadbit) {
-		while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) {
+		while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) {
 			barrier();
 			now = mftb();
 			if (!notified && now - start > 1000000) {
 				pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
 					__func__, smp_processor_id(),
 					disable_cpu,
-					paca[disable_cpu].kexec_state);
+					paca_ptrs[disable_cpu]->kexec_state);
 				notified = true;
 			}
 		}
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index e1274db53d48..2188d691a40f 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -217,13 +217,7 @@ void __noreturn mpc8xx_restart(char *cmd)
 
 static void cpm_cascade(struct irq_desc *desc)
 {
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-	int cascade_irq = cpm_get_irq();
-
-	if (cascade_irq >= 0)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
+	generic_handle_irq(cpm_get_irq());
 }
 
 /* Initialize the internal interrupt controllers.  The number of
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index a429d859f15d..67d3125d0610 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -61,7 +61,7 @@ choice
 	help
 	  There are two families of 64 bit PowerPC chips supported.
 	  The most common ones are the desktop and server CPUs
-	  (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...)
+	  (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...)
 
 	  The other are the "embedded" processors compliant with the
 	  "Book 3E" variant of the architecture
@@ -87,7 +87,6 @@ endchoice
 choice
 	prompt "CPU selection"
 	depends on PPC64
-	default POWER8_CPU if CPU_LITTLE_ENDIAN
 	default GENERIC_CPU
 	help
 	  This will create a kernel which is optimised for a particular CPU.
@@ -96,17 +95,18 @@ choice
 	  If unsure, select Generic.
 
 config GENERIC_CPU
-	bool "Generic"
+	bool "Generic (POWER4 and above)"
 	depends on !CPU_LITTLE_ENDIAN
 
+config GENERIC_CPU
+	bool "Generic (POWER8 and above)"
+	depends on CPU_LITTLE_ENDIAN
+	select ARCH_HAS_FAST_MULTIPLIER
+
 config CELL_CPU
 	bool "Cell Broadband Engine"
 	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 
-config POWER4_CPU
-	bool "POWER4"
-	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
-
 config POWER5_CPU
 	bool "POWER5"
 	depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
@@ -125,6 +125,11 @@ config POWER8_CPU
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
 
+config POWER9_CPU
+	bool "POWER9"
+	depends on PPC_BOOK3S_64
+	select ARCH_HAS_FAST_MULTIPLIER
+
 config E5500_CPU
 	bool "Freescale e5500"
 	depends on E500
@@ -326,6 +331,7 @@ config PPC_BOOK3E_MMU
 config PPC_MM_SLICES
 	bool
 	default y if PPC_BOOK3S_64
+	default y if PPC_8xx && HUGETLB_PAGE
 	default n
 
 config PPC_HAVE_PMU_SUPPORT
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 6ea3f248b155..326d34e2aa02 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -342,7 +342,7 @@ static int axon_msi_probe(struct platform_device *device)
 
 	pr_devel("axon_msi: setting up dn %pOF\n", dn);
 
-	msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+	msic = kzalloc(sizeof(*msic), GFP_KERNEL);
 	if (!msic) {
 		printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
 		       dn);
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index f84d52a2db40..1aeac5761e0b 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -83,7 +83,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 	pcpu = get_hard_smp_processor_id(lcpu);
 
 	/* Fixup atomic count: it exited inside IRQ handler. */
-	task_thread_info(paca[lcpu].__current)->preempt_count	= 0;
+	task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count	= 0;
 
 	/*
 	 * If the RTAS start-cpu token does not exist then presume the
@@ -126,7 +126,7 @@ static int smp_cell_kick_cpu(int nr)
 	 * cpu_start field to become non-zero After we set cpu_start,
 	 * the processor will continue on to secondary_start
 	 */
-	paca[nr].cpu_start = 1;
+	paca_ptrs[nr]->cpu_start = 1;
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
index d1e61e273e64..1200d0dea512 100644
--- a/arch/powerpc/platforms/cell/spider-pci.c
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -133,7 +133,7 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
 	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
 		 np);
 
-	priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
 		pr_err("SPIDERPCI-IOWA:"
 		       "Can't allocate struct spiderpci_iowa_private");
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
index b847e9403566..d9de848dae47 100644
--- a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -36,7 +36,7 @@ int spu_alloc_lscsa(struct spu_state *csa)
 	struct spu_lscsa *lscsa;
 	unsigned char *p;
 
-	lscsa = vzalloc(sizeof(struct spu_lscsa));
+	lscsa = vzalloc(sizeof(*lscsa));
 	if (!lscsa)
 		return -ENOMEM;
 	csa->lscsa = lscsa;
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index ade83829d5e8..7206f3f573d4 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -132,7 +132,7 @@ static void __flipper_quiesce(void __iomem *io_base)
 	out_be32(io_base + FLIPPER_ICR, 0xffffffff);
 }
 
-struct irq_domain * __init flipper_pic_init(struct device_node *np)
+static struct irq_domain * __init flipper_pic_init(struct device_node *np)
 {
 	struct device_node *pi;
 	struct irq_domain *irq_domain = NULL;
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
index 7feb325b636b..5c7e7ce6dbab 100644
--- a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
@@ -169,7 +169,7 @@ static int ug_getc(void)
 /*
  * Transmits a character.
  */
-void ug_udbg_putc(char ch)
+static void ug_udbg_putc(char ch)
 {
 	ug_putc(ch);
 }
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 3fd683e40bc9..8bb46dcbebd8 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -44,6 +44,7 @@
 #define HW_GPIO_BASE(idx)	(idx * 0x20)
 #define HW_GPIO_OUT(idx)	(HW_GPIO_BASE(idx) + 0)
 #define HW_GPIO_DIR(idx)	(HW_GPIO_BASE(idx) + 4)
+#define HW_GPIO_OWNER		(HW_GPIO_BASE(1) + 0x1c)
 
 #define HW_GPIO_SHUTDOWN	(1<<1)
 #define HW_GPIO_SLOT_LED	(1<<5)
@@ -79,21 +80,9 @@ void __init wii_memory_fixups(void)
 	BUG_ON(memblock.memory.cnt != 2);
 	BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
 
-	/* trim unaligned tail */
-	memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
-			(phys_addr_t)ULLONG_MAX);
-
-	/* determine hole, add & reserve them */
+	/* determine hole */
 	wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
 	wii_hole_size = p[1].base - wii_hole_start;
-	memblock_add(wii_hole_start, wii_hole_size);
-	memblock_reserve(wii_hole_start, wii_hole_size);
-
-	BUG_ON(memblock.memory.cnt != 1);
-	__memblock_dump_all();
-
-	/* allow ioremapping the address space in the hole */
-	__allow_ioremap_reserved = 1;
 }
 
 unsigned long __init wii_mmu_mapin_mem2(unsigned long top)
@@ -176,6 +165,12 @@ static void wii_power_off(void)
 	local_irq_disable();
 
 	if (hw_gpio) {
+		/*
+		 * set the owner of the shutdown pin to ARM, because it is
+		 * accessed through the registers for the ARM, below
+		 */
+		clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
+
 		/* make sure that the poweroff GPIO is configured as output */
 		setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
 
@@ -239,7 +234,7 @@ static int __init wii_device_probe(void)
 	if (!machine_is(wii))
 		return 0;
 
-	of_platform_bus_probe(NULL, wii_of_bus, NULL);
+	of_platform_populate(NULL, wii_of_bus, NULL, NULL);
 	return 0;
 }
 device_initcall(wii_device_probe);
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 3408f315ef48..fa89f30e7f27 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -492,7 +492,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
 	const u32		*psteps, *prate, *addrp;
 	u32			steps;
 
-	host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
+	host = kzalloc(sizeof(*host), GFP_KERNEL);
 	if (host == NULL) {
 		printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
 		       np);
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index df3c93bef228..e0462fedcdb8 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -643,7 +643,7 @@ static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata,
 
 	while (length >= 12) {
 		/* Allocate a structure */
-		func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL);
+		func = kzalloc(sizeof(*func), GFP_KERNEL);
 		if (func == NULL)
 			goto bail;
 		kref_init(&func->ref);
@@ -719,7 +719,7 @@ int pmf_register_driver(struct device_node *np,
 		return -EBUSY;
 	}
 
-	dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (dev == NULL) {
 		DBG("pmf: no memory !\n");
 		return -ENOMEM;
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 6c9d5199a7e2..703a350a7f4e 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -16,5 +16,4 @@ obj-$(CONFIG_OPAL_PRD)	+= opal-prd.o
 obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
 obj-$(CONFIG_PPC_MEMTRACE)	+= memtrace.o
 obj-$(CONFIG_PPC_VAS)	+= vas.o vas-window.o vas-debug.o
-obj-$(CONFIG_PPC_FTW)	+= nx-ftw.o
 obj-$(CONFIG_OCXL_BASE)	+= ocxl.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 33c86c1a1720..ddfc3544d285 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -1425,11 +1425,8 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
 	dev_pe = dev_pe->parent;
 	while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
 		int ret;
-		int active_flags = (EEH_STATE_MMIO_ACTIVE |
-				    EEH_STATE_DMA_ACTIVE);
-
 		ret = eeh_ops->get_state(dev_pe, NULL);
-		if (ret <= 0 || (ret & active_flags) == active_flags) {
+		if (ret <= 0 || eeh_state_active(ret)) {
 			dev_pe = dev_pe->parent;
 			continue;
 		}
@@ -1463,7 +1460,6 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
 	struct eeh_pe *phb_pe, *parent_pe;
 	__be64 frozen_pe_no;
 	__be16 err_type, severity;
-	int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
 	long rc;
 	int state, ret = EEH_NEXT_ERR_NONE;
 
@@ -1626,8 +1622,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
 
 				/* Frozen parent PE ? */
 				state = eeh_ops->get_state(parent_pe, NULL);
-				if (state > 0 &&
-				    (state & active_flags) != active_flags)
+				if (state > 0 && !eeh_state_active(state))
 					*pe = parent_pe;
 
 				/* Next parent level */
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 443d5ca71995..1f12ab1e6030 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -24,6 +24,7 @@
 #include <asm/code-patching.h>
 #include <asm/smp.h>
 #include <asm/runlatch.h>
+#include <asm/dbell.h>
 
 #include "powernv.h"
 #include "subcore.h"
@@ -80,7 +81,7 @@ static int pnv_save_sprs_for_deep_states(void)
 
 	for_each_possible_cpu(cpu) {
 		uint64_t pir = get_hard_smp_processor_id(cpu);
-		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+		uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
 
 		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
 		if (rc != 0)
@@ -173,12 +174,12 @@ static void pnv_alloc_idle_core_states(void)
 		for (j = 0; j < threads_per_core; j++) {
 			int cpu = first_cpu + j;
 
-			paca[cpu].core_idle_state_ptr = core_idle_state;
-			paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
-			paca[cpu].thread_mask = 1 << j;
+			paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
+			paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
+			paca_ptrs[cpu]->thread_mask = 1 << j;
 			if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
 				continue;
-			paca[cpu].thread_sibling_pacas =
+			paca_ptrs[cpu]->thread_sibling_pacas =
 				kmalloc_node(paca_ptr_array_size,
 					     GFP_KERNEL, node);
 		}
@@ -387,6 +388,78 @@ void power9_idle(void)
 	power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * This is used in working around bugs in thread reconfiguration
+ * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
+ * memory and the way that XER[SO] is checkpointed.
+ * This function forces the core into SMT4 in order by asking
+ * all other threads not to stop, and sending a message to any
+ * that are in a stop state.
+ * Must be called with preemption disabled.
+ */
+void pnv_power9_force_smt4_catch(void)
+{
+	int cpu, cpu0, thr;
+	int awake_threads = 1;		/* this thread is awake */
+	int poke_threads = 0;
+	int need_awake = threads_per_core;
+
+	cpu = smp_processor_id();
+	cpu0 = cpu & ~(threads_per_core - 1);
+	for (thr = 0; thr < threads_per_core; ++thr) {
+		if (cpu != cpu0 + thr)
+			atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
+	}
+	/* order setting dont_stop vs testing requested_psscr */
+	mb();
+	for (thr = 0; thr < threads_per_core; ++thr) {
+		if (!paca_ptrs[cpu0+thr]->requested_psscr)
+			++awake_threads;
+		else
+			poke_threads |= (1 << thr);
+	}
+
+	/* If at least 3 threads are awake, the core is in SMT4 already */
+	if (awake_threads < need_awake) {
+		/* We have to wake some threads; we'll use msgsnd */
+		for (thr = 0; thr < threads_per_core; ++thr) {
+			if (poke_threads & (1 << thr)) {
+				ppc_msgsnd_sync();
+				ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
+					   paca_ptrs[cpu0+thr]->hw_cpu_id);
+			}
+		}
+		/* now spin until at least 3 threads are awake */
+		do {
+			for (thr = 0; thr < threads_per_core; ++thr) {
+				if ((poke_threads & (1 << thr)) &&
+				    !paca_ptrs[cpu0+thr]->requested_psscr) {
+					++awake_threads;
+					poke_threads &= ~(1 << thr);
+				}
+			}
+		} while (awake_threads < need_awake);
+	}
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
+
+void pnv_power9_force_smt4_release(void)
+{
+	int cpu, cpu0, thr;
+
+	cpu = smp_processor_id();
+	cpu0 = cpu & ~(threads_per_core - 1);
+
+	/* clear all the dont_stop flags */
+	for (thr = 0; thr < threads_per_core; ++thr) {
+		if (cpu != cpu0 + thr)
+			atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
+	}
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
 #ifdef CONFIG_HOTPLUG_CPU
 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
 {
@@ -434,7 +507,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
 		psscr = mfspr(SPRN_PSSCR);
 		psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
 						pnv_deepest_stop_psscr_val;
-		srr1 = power9_idle_stop(psscr);
+		srr1 = power9_offline_stop(psscr);
 
 	} else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
 		   (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
@@ -749,7 +822,8 @@ static int __init pnv_init_idle_states(void)
 			for (i = 0; i < threads_per_core; i++) {
 				int j = base_cpu + i;
 
-				paca[j].thread_sibling_pacas[idx] = &paca[cpu];
+				paca_ptrs[j]->thread_sibling_pacas[idx] =
+					paca_ptrs[cpu];
 			}
 		}
 	}
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 0a253b64ac5f..69a4f9e8bd55 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -410,6 +410,11 @@ struct npu_context {
 	void *priv;
 };
 
+struct mmio_atsd_reg {
+	struct npu *npu;
+	int reg;
+};
+
 /*
  * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
  * if none are available.
@@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu)
 	int i;
 
 	for (i = 0; i < npu->mmio_atsd_count; i++) {
-		if (!test_and_set_bit(i, &npu->mmio_atsd_usage))
+		if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
 			return i;
 	}
 
@@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu)
 
 static void put_mmio_atsd_reg(struct npu *npu, int reg)
 {
-	clear_bit(reg, &npu->mmio_atsd_usage);
+	clear_bit_unlock(reg, &npu->mmio_atsd_usage);
 }
 
 /* MMIO ATSD register offsets */
 #define XTS_ATSD_AVA  1
 #define XTS_ATSD_STAT 2
 
-static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
-				unsigned long va)
+static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
+				unsigned long launch, unsigned long va)
 {
-	int mmio_atsd_reg;
-
-	do {
-		mmio_atsd_reg = get_mmio_atsd_reg(npu);
-		cpu_relax();
-	} while (mmio_atsd_reg < 0);
+	struct npu *npu = mmio_atsd_reg->npu;
+	int reg = mmio_atsd_reg->reg;
 
 	__raw_writeq(cpu_to_be64(va),
-		npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA);
+		npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
 	eieio();
-	__raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]);
-
-	return mmio_atsd_reg;
+	__raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]);
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+				unsigned long pid, bool flush)
 {
+	int i;
 	unsigned long launch;
 
-	/* IS set to invalidate matching PID */
-	launch = PPC_BIT(12);
+	for (i = 0; i <= max_npu2_index; i++) {
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
+
+		/* IS set to invalidate matching PID */
+		launch = PPC_BIT(12);
 
-	/* PRS set to process-scoped */
-	launch |= PPC_BIT(13);
+		/* PRS set to process-scoped */
+		launch |= PPC_BIT(13);
 
-	/* AP */
-	launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+		/* AP */
+		launch |= (u64)
+			mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
 
-	/* PID */
-	launch |= pid << PPC_BITLSHIFT(38);
+		/* PID */
+		launch |= pid << PPC_BITLSHIFT(38);
 
-	/* No flush */
-	launch |= !flush << PPC_BITLSHIFT(39);
+		/* No flush */
+		launch |= !flush << PPC_BITLSHIFT(39);
 
-	/* Invalidating the entire process doesn't use a va */
-	return mmio_launch_invalidate(npu, launch, 0);
+		/* Invalidating the entire process doesn't use a va */
+		mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
+	}
 }
 
-static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-			unsigned long pid, bool flush)
+static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+			unsigned long va, unsigned long pid, bool flush)
 {
+	int i;
 	unsigned long launch;
 
-	/* IS set to invalidate target VA */
-	launch = 0;
+	for (i = 0; i <= max_npu2_index; i++) {
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
 
-	/* PRS set to process scoped */
-	launch |= PPC_BIT(13);
+		/* IS set to invalidate target VA */
+		launch = 0;
 
-	/* AP */
-	launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+		/* PRS set to process scoped */
+		launch |= PPC_BIT(13);
 
-	/* PID */
-	launch |= pid << PPC_BITLSHIFT(38);
+		/* AP */
+		launch |= (u64)
+			mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
 
-	/* No flush */
-	launch |= !flush << PPC_BITLSHIFT(39);
+		/* PID */
+		launch |= pid << PPC_BITLSHIFT(38);
 
-	return mmio_launch_invalidate(npu, launch, va);
+		/* No flush */
+		launch |= !flush << PPC_BITLSHIFT(39);
+
+		mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
+	}
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
-struct mmio_atsd_reg {
-	struct npu *npu;
-	int reg;
-};
-
 static void mmio_invalidate_wait(
-	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
 {
 	struct npu *npu;
 	int i, reg;
@@ -522,16 +531,67 @@ static void mmio_invalidate_wait(
 		reg = mmio_atsd_reg[i].reg;
 		while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
 			cpu_relax();
+	}
+}
 
-		put_mmio_atsd_reg(npu, reg);
+/*
+ * Acquires all the address translation shootdown (ATSD) registers required to
+ * launch an ATSD on all links this npu_context is active on.
+ */
+static void acquire_atsd_reg(struct npu_context *npu_context,
+			struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+	int i, j;
+	struct npu *npu;
+	struct pci_dev *npdev;
+	struct pnv_phb *nphb;
 
+	for (i = 0; i <= max_npu2_index; i++) {
+		mmio_atsd_reg[i].reg = -1;
+		for (j = 0; j < NV_MAX_LINKS; j++) {
+			/*
+			 * There are no ordering requirements with respect to
+			 * the setup of struct npu_context, but to ensure
+			 * consistent behaviour we need to ensure npdev[][] is
+			 * only read once.
+			 */
+			npdev = READ_ONCE(npu_context->npdev[i][j]);
+			if (!npdev)
+				continue;
+
+			nphb = pci_bus_to_host(npdev->bus)->private_data;
+			npu = &nphb->npu;
+			mmio_atsd_reg[i].npu = npu;
+			mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+			while (mmio_atsd_reg[i].reg < 0) {
+				mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+				cpu_relax();
+			}
+			break;
+		}
+	}
+}
+
+/*
+ * Release previously acquired ATSD registers. To avoid deadlocks the registers
+ * must be released in the same order they were acquired above in
+ * acquire_atsd_reg.
+ */
+static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+	int i;
+
+	for (i = 0; i <= max_npu2_index; i++) {
 		/*
-		 * The GPU requires two flush ATSDs to ensure all entries have
-		 * been flushed. We use PID 0 as it will never be used for a
-		 * process on the GPU.
+		 * We can't rely on npu_context->npdev[][] being the same here
+		 * as when acquire_atsd_reg() was called, hence we use the
+		 * values stored in mmio_atsd_reg during the acquire phase
+		 * rather than re-reading npdev[][].
 		 */
-		if (flush)
-			mmio_invalidate_pid(npu, 0, true);
+		if (mmio_atsd_reg[i].reg < 0)
+			continue;
+
+		put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
 	}
 }
 
@@ -542,10 +602,6 @@ static void mmio_invalidate_wait(
 static void mmio_invalidate(struct npu_context *npu_context, int va,
 			unsigned long address, bool flush)
 {
-	int i, j;
-	struct npu *npu;
-	struct pnv_phb *nphb;
-	struct pci_dev *npdev;
 	struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
 	unsigned long pid = npu_context->mm->context.id;
 
@@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
 	 * Loop over all the NPUs this process is active on and launch
 	 * an invalidate.
 	 */
-	for (i = 0; i <= max_npu2_index; i++) {
-		mmio_atsd_reg[i].reg = -1;
-		for (j = 0; j < NV_MAX_LINKS; j++) {
-			npdev = npu_context->npdev[i][j];
-			if (!npdev)
-				continue;
-
-			nphb = pci_bus_to_host(npdev->bus)->private_data;
-			npu = &nphb->npu;
-			mmio_atsd_reg[i].npu = npu;
-
-			if (va)
-				mmio_atsd_reg[i].reg =
-					mmio_invalidate_va(npu, address, pid,
-							flush);
-			else
-				mmio_atsd_reg[i].reg =
-					mmio_invalidate_pid(npu, pid, flush);
-
-			/*
-			 * The NPU hardware forwards the shootdown to all GPUs
-			 * so we only have to launch one shootdown per NPU.
-			 */
-			break;
-		}
+	acquire_atsd_reg(npu_context, mmio_atsd_reg);
+	if (va)
+		mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
+	else
+		mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
+
+	mmio_invalidate_wait(mmio_atsd_reg);
+	if (flush) {
+		/*
+		 * The GPU requires two flush ATSDs to ensure all entries have
+		 * been flushed. We use PID 0 as it will never be used for a
+		 * process on the GPU.
+		 */
+		mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+		mmio_invalidate_wait(mmio_atsd_reg);
+		mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+		mmio_invalidate_wait(mmio_atsd_reg);
 	}
-
-	mmio_invalidate_wait(mmio_atsd_reg, flush);
-	if (flush)
-		/* Wait for the flush to complete */
-		mmio_invalidate_wait(mmio_atsd_reg, false);
+	release_atsd_reg(mmio_atsd_reg);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -680,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 		/* No nvlink associated with this GPU device */
 		return ERR_PTR(-ENODEV);
 
+	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
+	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
+							&nvlink_index)))
+		return ERR_PTR(-ENODEV);
+
 	if (!mm || mm->context.id == 0) {
 		/*
 		 * Kernel thread contexts are not supported and context id 0 is
@@ -707,26 +756,40 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 	 */
 	npu_context = mm->context.npu_context;
 	if (!npu_context) {
+		rc = -ENOMEM;
 		npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
-		if (!npu_context)
-			return ERR_PTR(-ENOMEM);
+		if (npu_context) {
+			kref_init(&npu_context->kref);
+			npu_context->mm = mm;
+			npu_context->mn.ops = &nv_nmmu_notifier_ops;
+			rc = __mmu_notifier_register(&npu_context->mn, mm);
+		}
+
+		if (rc) {
+			kfree(npu_context);
+			opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+					PCI_DEVID(gpdev->bus->number,
+						gpdev->devfn));
+			return ERR_PTR(rc);
+		}
 
 		mm->context.npu_context = npu_context;
-		npu_context->mm = mm;
-		npu_context->mn.ops = &nv_nmmu_notifier_ops;
-		__mmu_notifier_register(&npu_context->mn, mm);
-		kref_init(&npu_context->kref);
 	} else {
-		kref_get(&npu_context->kref);
+		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
 	}
 
 	npu_context->release_cb = cb;
 	npu_context->priv = priv;
-	nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
-	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
-							&nvlink_index)))
-		return ERR_PTR(-ENODEV);
-	npu_context->npdev[npu->index][nvlink_index] = npdev;
+
+	/*
+	 * npdev is a pci_dev pointer setup by the PCI code. We assign it to
+	 * npdev[][] to indicate to the mmu notifiers that an invalidation
+	 * should also be sent over this nvlink. The notifiers don't use any
+	 * other fields in npu_context, so we just need to ensure that when they
+	 * deference npu_context->npdev[][] it is either a valid pointer or
+	 * NULL.
+	 */
+	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
 
 	if (!nphb->npu.nmmu_flush) {
 		/*
@@ -778,7 +841,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
 	if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
 							&nvlink_index)))
 		return;
-	npu_context->npdev[npu->index][nvlink_index] = NULL;
+	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
 	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
 				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
 	kref_put(&npu_context->kref, pnv_npu2_release_context);
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
index 2fa3ac80cb4e..b37015101bf6 100644
--- a/arch/powerpc/platforms/powernv/opal-flash.c
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -303,26 +303,9 @@ invalid_img:
 	return rc;
 }
 
-/* Return CPUs to OPAL before starting FW update */
-static void flash_return_cpu(void *info)
-{
-	int cpu = smp_processor_id();
-
-	if (!cpu_online(cpu))
-		return;
-
-	/* Disable IRQ */
-	hard_irq_disable();
-
-	/* Return the CPU to OPAL */
-	opal_return_cpu();
-}
-
 /* This gets called just before system reboots */
-void opal_flash_term_callback(void)
+void opal_flash_update_print_message(void)
 {
-	struct cpumask mask;
-
 	if (update_flash_data.status != FLASH_IMG_READY)
 		return;
 
@@ -333,15 +316,6 @@ void opal_flash_term_callback(void)
 
 	/* Small delay to help getting the above message out */
 	msleep(500);
-
-	/* Return secondary CPUs to firmware */
-	cpumask_copy(&mask, cpu_online_mask);
-	cpumask_clear_cpu(smp_processor_id(), &mask);
-	if (!cpumask_empty(&mask))
-		smp_call_function_many(&mask,
-				       flash_return_cpu, NULL, false);
-	/* Hard disable interrupts */
-	hard_irq_disable();
 }
 
 /*
@@ -418,12 +392,12 @@ static int alloc_image_buf(char *buffer, size_t count)
 	void *addr;
 	int size;
 
-	if (count < sizeof(struct image_header_t)) {
+	if (count < sizeof(image_header)) {
 		pr_warn("FLASH: Invalid candidate image\n");
 		return -EINVAL;
 	}
 
-	memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
+	memcpy(&image_header, (void *)buffer, sizeof(image_header));
 	image_data.size = be32_to_cpu(image_header.size);
 	pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
 
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index c9e1a4ff295c..4efc95b4c7d4 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -314,7 +314,7 @@ static int opal_handle_hmi_event(struct notifier_block *nb,
 		pr_err("HMI: out of memory, Opal message event not handled\n");
 		return -ENOMEM;
 	}
-	memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
+	memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
 
 	spin_lock_irqsave(&opal_hmi_evt_lock, flags);
 	list_add(&msg_node->list, &opal_hmi_evt_list);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index f6f55ab4980e..2a14fda5ea26 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -110,11 +110,11 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 	if (nr_chips <= 0)
 		return -ENODEV;
 
-	base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL);
+	base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL);
 	if (!base_addr_arr)
 		return -ENOMEM;
 
-	chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL);
+	chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL);
 	if (!chipid_arr)
 		return -ENOMEM;
 
@@ -125,8 +125,8 @@ static int imc_get_mem_addr_nest(struct device_node *node,
 								nr_chips))
 		goto error;
 
-	pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info),
-								GFP_KERNEL);
+	pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info),
+				    GFP_KERNEL);
 	if (!pmu_ptr->mem_info)
 		goto error;
 
@@ -161,7 +161,7 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
 	u32 offset;
 
 	/* memory for pmu */
-	pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
+	pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
 	if (!pmu_ptr)
 		return -ENOMEM;
 
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
index 8ddc1accf199..dcb42bcb5efa 100644
--- a/arch/powerpc/platforms/powernv/opal-memory-errors.c
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -112,7 +112,7 @@ static int opal_memory_err_event(struct notifier_block *nb,
 		       "handled\n");
 		return -ENOMEM;
 	}
-	memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+	memcpy(&msg_node->msg, msg, sizeof(msg_node->msg));
 
 	spin_lock_irqsave(&opal_mem_err_lock, flags);
 	list_add(&msg_node->list, &opal_memory_err_list);
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index 9db4398ded5d..1bceb95f422d 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -11,6 +11,7 @@
 
 #define DEBUG
 
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/of.h>
@@ -56,9 +57,17 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
 
 	while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
 		rc = opal_write_nvram(__pa(buf), count, off);
-		if (rc == OPAL_BUSY_EVENT)
+		if (rc == OPAL_BUSY_EVENT) {
+			msleep(OPAL_BUSY_DELAY_MS);
 			opal_poll_events(NULL);
+		} else if (rc == OPAL_BUSY) {
+			msleep(OPAL_BUSY_DELAY_MS);
+		}
 	}
+
+	if (rc)
+		return -EIO;
+
 	*index += count;
 	return count;
 }
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
index 7313b7fc9071..74986b35cf77 100644
--- a/arch/powerpc/platforms/powernv/opal-psr.c
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -136,7 +136,7 @@ void __init opal_psr_init(void)
 		return;
 	}
 
-	psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr),
+	psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs),
 			    GFP_KERNEL);
 	if (!psr_attrs)
 		return;
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
index 7e5a235ebf76..541c9ea04a32 100644
--- a/arch/powerpc/platforms/powernv/opal-sensor-groups.c
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -166,13 +166,13 @@ void __init opal_sensor_groups_init(void)
 		if (!nr_attrs)
 			continue;
 
-		sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr),
+		sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs),
 					 GFP_KERNEL);
 		if (!sgs[i].sgattrs)
 			goto out_sgs_sgattrs;
 
 		sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
-					  sizeof(struct attribute *),
+					  sizeof(*sgs[i].sg.attrs),
 					  GFP_KERNEL);
 
 		if (!sgs[i].sg.attrs) {
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 1b2936ba6040..3da30c2f26b4 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_group_clear,		OPAL_SENSOR_GROUP_CLEAR);
 OPAL_CALL(opal_npu_spa_setup,			OPAL_NPU_SPA_SETUP);
 OPAL_CALL(opal_npu_spa_clear_cache,		OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,			OPAL_NPU_TL_SET);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,		OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,		OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 81c0a943dea9..22d5e1110dbb 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -46,7 +46,7 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
 			__func__, dev);
 		return SCOM_MAP_INVALID;
 	}
-	m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
+	m = kmalloc(sizeof(*m), GFP_KERNEL);
 	if (!m)
 		return NULL;
 	m->chip = be32_to_cpup(gcid);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index c15182765ff5..48fbb41af5d1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -490,9 +490,12 @@ void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
 	 *    opal to trigger checkstop explicitly for error analysis.
 	 *    The FSP PRD component would have already got notified
 	 *    about this error through other channels.
+	 * 4. We are running on a newer skiboot that by default does
+	 *    not cause a checkstop, drops us back to the kernel to
+	 *    extract context and state at the time of the error.
 	 */
 
-	ppc_md.restart(NULL);
+	panic(msg);
 }
 
 int opal_machine_check(struct pt_regs *regs)
@@ -821,6 +824,9 @@ static int __init opal_init(void)
 	/* Create i2c platform devices */
 	opal_pdev_init("ibm,opal-i2c");
 
+	/* Handle non-volatile memory devices */
+	opal_pdev_init("pmem-region");
+
 	/* Setup a heatbeat thread if requested by OPAL */
 	opal_init_heartbeat();
 
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
index 94498a04558b..cee003de63af 100644
--- a/arch/powerpc/platforms/powernv/pci-cxl.c
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -16,14 +16,6 @@
 
 #include "pci.h"
 
-struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
-{
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-	return of_node_get(hose->dn);
-}
-EXPORT_SYMBOL(pnv_pci_get_phb_node);
-
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index a6c92c78c9b2..3f9c69d7623a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2681,14 +2681,23 @@ static struct pnv_ioda_pe *gpe_table_group_to_npe(
 static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
 		int num, struct iommu_table *tbl)
 {
+	struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+	int num2 = (num == 0) ? 1 : 0;
 	long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
 
 	if (ret)
 		return ret;
 
-	ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl);
-	if (ret)
+	if (table_group->tables[num2])
+		pnv_npu_unset_window(npe, num2);
+
+	ret = pnv_npu_set_window(npe, num, tbl);
+	if (ret) {
 		pnv_pci_ioda2_unset_window(table_group, num);
+		if (table_group->tables[num2])
+			pnv_npu_set_window(npe, num2,
+					table_group->tables[num2]);
+	}
 
 	return ret;
 }
@@ -2697,12 +2706,24 @@ static long pnv_pci_ioda2_npu_unset_window(
 		struct iommu_table_group *table_group,
 		int num)
 {
+	struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+	int num2 = (num == 0) ? 1 : 0;
 	long ret = pnv_pci_ioda2_unset_window(table_group, num);
 
 	if (ret)
 		return ret;
 
-	return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num);
+	if (!npe->table_group.tables[num])
+		return 0;
+
+	ret = pnv_npu_unset_window(npe, num);
+	if (ret)
+		return ret;
+
+	if (table_group->tables[num2])
+		ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]);
+
+	return ret;
 }
 
 static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
@@ -3843,7 +3864,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	phb_id = be64_to_cpup(prop64);
 	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
-	phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
+	phb = memblock_virt_alloc(sizeof(*phb), 0);
 
 	/* Allocate PCI controller */
 	phb->hose = hose = pcibios_alloc_controller(np);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 69d102cbf48f..b265ecc0836a 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/msi.h>
 #include <linux/iommu.h>
+#include <linux/sched/mm.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -38,6 +39,7 @@
 #include "pci.h"
 
 static DEFINE_MUTEX(p2p_mutex);
+static DEFINE_MUTEX(tunnel_mutex);
 
 int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
 {
@@ -1092,6 +1094,139 @@ out:
 }
 EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
 
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
+{
+	struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+	return of_node_get(hose->dn);
+}
+EXPORT_SYMBOL(pnv_pci_get_phb_node);
+
+int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind)
+{
+	struct device_node *np;
+	const __be32 *prop;
+	struct pnv_ioda_pe *pe;
+	uint16_t window_id;
+	int rc;
+
+	if (!radix_enabled())
+		return -ENXIO;
+
+	if (!(np = pnv_pci_get_phb_node(dev)))
+		return -ENXIO;
+
+	prop = of_get_property(np, "ibm,phb-indications", NULL);
+	of_node_put(np);
+
+	if (!prop || !prop[1])
+		return -ENXIO;
+
+	*asnind = (u64)be32_to_cpu(prop[1]);
+	pe = pnv_ioda_get_pe(dev);
+	if (!pe)
+		return -ENODEV;
+
+	/* Increase real window size to accept as_notify messages. */
+	window_id = (pe->pe_number << 1 ) + 1;
+	rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number,
+					     window_id, pe->tce_bypass_base,
+					     (uint64_t)1 << 48);
+	return opal_error_code(rc);
+}
+EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel);
+
+int pnv_pci_disable_tunnel(struct pci_dev *dev)
+{
+	struct pnv_ioda_pe *pe;
+
+	pe = pnv_ioda_get_pe(dev);
+	if (!pe)
+		return -ENODEV;
+
+	/* Restore default real window size. */
+	pnv_pci_ioda2_set_bypass(pe, true);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel);
+
+int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
+{
+	__be64 val;
+	struct pci_controller *hose;
+	struct pnv_phb *phb;
+	u64 tunnel_bar;
+	int rc;
+
+	if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
+		return -ENXIO;
+	if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
+		return -ENXIO;
+
+	hose = pci_bus_to_host(dev->bus);
+	phb = hose->private_data;
+
+	mutex_lock(&tunnel_mutex);
+	rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
+	if (rc != OPAL_SUCCESS) {
+		rc = -EIO;
+		goto out;
+	}
+	tunnel_bar = be64_to_cpu(val);
+	if (enable) {
+		/*
+		* Only one device per PHB can use atomics.
+		* Our policy is first-come, first-served.
+		*/
+		if (tunnel_bar) {
+			if (tunnel_bar != addr)
+				rc = -EBUSY;
+			else
+				rc = 0;	/* Setting same address twice is ok */
+			goto out;
+		}
+	} else {
+		/*
+		* The device that owns atomics and wants to release
+		* them must pass the same address with enable == 0.
+		*/
+		if (tunnel_bar != addr) {
+			rc = -EPERM;
+			goto out;
+		}
+		addr = 0x0ULL;
+	}
+	rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr);
+	rc = opal_error_code(rc);
+out:
+	mutex_unlock(&tunnel_mutex);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
+
+#ifdef CONFIG_PPC64	/* for thread.tidr */
+int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid,
+			       u32 *tid)
+{
+	struct mm_struct *mm = NULL;
+
+	if (task == NULL)
+		return -EINVAL;
+
+	mm = get_task_mm(task);
+	if (mm == NULL)
+		return -EINVAL;
+
+	*pid = mm->context.id;
+	mmput(mm);
+
+	*tid = task->thread.tidr;
+	*lpid = mfspr(SPRN_LPID);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info);
+#endif
+
 void pnv_pci_shutdown(void)
 {
 	struct pci_controller *hose;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 092715b9674b..ef8c9ce53a61 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -38,57 +38,92 @@
 #include <asm/smp.h>
 #include <asm/tm.h>
 #include <asm/setup.h>
+#include <asm/security_features.h>
 
 #include "powernv.h"
 
+
+static bool fw_feature_is(const char *state, const char *name,
+			  struct device_node *fw_features)
+{
+	struct device_node *np;
+	bool rc = false;
+
+	np = of_get_child_by_name(fw_features, name);
+	if (np) {
+		rc = of_property_read_bool(np, state);
+		of_node_put(np);
+	}
+
+	return rc;
+}
+
+static void init_fw_feat_flags(struct device_node *np)
+{
+	if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
+		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+	if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
+		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+	if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
+		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+	if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
+		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
+		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+	/*
+	 * The features below are enabled by default, so we instead look to see
+	 * if firmware has *disabled* them, and clear them if so.
+	 */
+	if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
+		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+	if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+	if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+	if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
+		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
 static void pnv_setup_rfi_flush(void)
 {
 	struct device_node *np, *fw_features;
 	enum l1d_flush_type type;
-	int enable;
+	bool enable;
 
 	/* Default to fallback in case fw-features are not available */
 	type = L1D_FLUSH_FALLBACK;
-	enable = 1;
 
 	np = of_find_node_by_name(NULL, "ibm,opal");
 	fw_features = of_get_child_by_name(np, "fw-features");
 	of_node_put(np);
 
 	if (fw_features) {
-		np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
-		if (np && of_property_read_bool(np, "enabled"))
-			type = L1D_FLUSH_MTTRIG;
+		init_fw_feat_flags(fw_features);
+		of_node_put(fw_features);
 
-		of_node_put(np);
+		if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+			type = L1D_FLUSH_MTTRIG;
 
-		np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
-		if (np && of_property_read_bool(np, "enabled"))
+		if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
 			type = L1D_FLUSH_ORI;
-
-		of_node_put(np);
-
-		/* Enable unless firmware says NOT to */
-		enable = 2;
-		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
-		if (np && of_property_read_bool(np, "disabled"))
-			enable--;
-
-		of_node_put(np);
-
-		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
-		if (np && of_property_read_bool(np, "disabled"))
-			enable--;
-
-		np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
-		if (np && of_property_read_bool(np, "disabled"))
-			enable = 0;
-
-		of_node_put(np);
-		of_node_put(fw_features);
 	}
 
-	setup_rfi_flush(type, enable > 0);
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
+		  security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
+
+	setup_rfi_flush(type, enable);
 }
 
 static void __init pnv_setup_arch(void)
@@ -166,17 +201,12 @@ static void pnv_prepare_going_down(void)
 	 */
 	opal_event_shutdown();
 
-	/* Soft disable interrupts */
-	local_irq_disable();
+	/* Print flash update message if one is scheduled. */
+	opal_flash_update_print_message();
 
-	/*
-	 * Return secondary CPUs to firwmare if a flash update
-	 * is pending otherwise we will get all sort of error
-	 * messages about CPU being stuck etc.. This will also
-	 * have the side effect of hard disabling interrupts so
-	 * past this point, the kernel is effectively dead.
-	 */
-	opal_flash_term_callback();
+	smp_send_stop();
+
+	hard_irq_disable();
 }
 
 static void  __noreturn pnv_restart(char *cmd)
@@ -258,7 +288,7 @@ static void pnv_kexec_wait_secondaries_down(void)
 			if (i != notified) {
 				printk(KERN_INFO "kexec: waiting for cpu %d "
 				       "(physical %d) to enter OPAL\n",
-				       i, paca[i].hw_cpu_id);
+				       i, paca_ptrs[i]->hw_cpu_id);
 				notified = i;
 			}
 
@@ -270,7 +300,7 @@ static void pnv_kexec_wait_secondaries_down(void)
 			if (timeout-- == 0) {
 				printk(KERN_ERR "kexec: timed out waiting for "
 				       "cpu %d (physical %d) to enter OPAL\n",
-				       i, paca[i].hw_cpu_id);
+				       i, paca_ptrs[i]->hw_cpu_id);
 				break;
 			}
 		}
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 9664c8461f03..19af6de6b6f0 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -80,7 +80,7 @@ static int pnv_smp_kick_cpu(int nr)
 	 * If we already started or OPAL is not supported, we just
 	 * kick the CPU via the PACA
 	 */
-	if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
+	if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
 		goto kick;
 
 	/*
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 596ae2e98040..45563004feda 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -280,7 +280,7 @@ void update_subcore_sibling_mask(void)
 		int offset = (tid / threads_per_subcore) * threads_per_subcore;
 		int mask = sibling_mask_first_cpu << offset;
 
-		paca[cpu].subcore_sibling_mask = mask;
+		paca_ptrs[cpu]->subcore_sibling_mask = mask;
 
 	}
 }
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
index ca22f1eae050..4f7276ebdf9c 100644
--- a/arch/powerpc/platforms/powernv/vas-debug.c
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -166,19 +166,20 @@ void vas_window_init_dbgdir(struct vas_window *window)
 
 	return;
 
-free_name:
-	kfree(window->dbgname);
-	window->dbgname = NULL;
-
 remove_dir:
 	debugfs_remove_recursive(window->dbgdir);
 	window->dbgdir = NULL;
+
+free_name:
+	kfree(window->dbgname);
+	window->dbgname = NULL;
 }
 
 void vas_instance_init_dbgdir(struct vas_instance *vinst)
 {
 	struct dentry *d;
 
+	vas_init_dbgdir();
 	if (!vas_debugfs)
 		return;
 
@@ -201,8 +202,18 @@ free_name:
 	vinst->dbgdir = NULL;
 }
 
+/*
+ * Set up the "root" VAS debugfs dir. Return if we already set it up
+ * (or failed to) in an earlier instance of VAS.
+ */
 void vas_init_dbgdir(void)
 {
+	static bool first_time = true;
+
+	if (!first_time)
+		return;
+
+	first_time = false;
 	vas_debugfs = debugfs_create_dir("vas", NULL);
 	if (IS_ERR(vas_debugfs))
 		vas_debugfs = NULL;
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
new file mode 100644
index 000000000000..a449b9f0c12e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-trace.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM	vas
+
+#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _VAS_TRACE_H
+#include <linux/tracepoint.h>
+#include <linux/sched.h>
+#include <asm/vas.h>
+
+TRACE_EVENT(	vas_rx_win_open,
+
+		TP_PROTO(struct task_struct *tsk,
+			 int vasid,
+			 int cop,
+			 struct vas_rx_win_attr *rxattr),
+
+		TP_ARGS(tsk, vasid, cop, rxattr),
+
+		TP_STRUCT__entry(
+			__field(struct task_struct *, tsk)
+			__field(int, pid)
+			__field(int, cop)
+			__field(int, vasid)
+			__field(struct vas_rx_win_attr *, rxattr)
+			__field(int, lnotify_lpid)
+			__field(int, lnotify_pid)
+			__field(int, lnotify_tid)
+		),
+
+		TP_fast_assign(
+			__entry->pid = tsk->pid;
+			__entry->vasid = vasid;
+			__entry->cop = cop;
+			__entry->lnotify_lpid = rxattr->lnotify_lpid;
+			__entry->lnotify_pid = rxattr->lnotify_pid;
+			__entry->lnotify_tid = rxattr->lnotify_tid;
+		),
+
+		TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d",
+			__entry->pid, __entry->vasid, __entry->cop,
+			__entry->lnotify_lpid, __entry->lnotify_pid,
+			__entry->lnotify_tid)
+);
+
+TRACE_EVENT(	vas_tx_win_open,
+
+		TP_PROTO(struct task_struct *tsk,
+			 int vasid,
+			 int cop,
+			 struct vas_tx_win_attr *txattr),
+
+		TP_ARGS(tsk, vasid, cop, txattr),
+
+		TP_STRUCT__entry(
+			__field(struct task_struct *, tsk)
+			__field(int, pid)
+			__field(int, cop)
+			__field(int, vasid)
+			__field(struct vas_tx_win_attr *, txattr)
+			__field(int, lpid)
+			__field(int, pidr)
+		),
+
+		TP_fast_assign(
+			__entry->pid = tsk->pid;
+			__entry->vasid = vasid;
+			__entry->cop = cop;
+			__entry->lpid = txattr->lpid;
+			__entry->pidr = txattr->pidr;
+		),
+
+		TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d",
+			__entry->pid, __entry->vasid, __entry->cop,
+			__entry->lpid, __entry->pidr)
+);
+
+TRACE_EVENT(	vas_paste_crb,
+
+		TP_PROTO(struct task_struct *tsk,
+			struct vas_window *win),
+
+		TP_ARGS(tsk, win),
+
+		TP_STRUCT__entry(
+			__field(struct task_struct *, tsk)
+			__field(struct vas_window *, win)
+			__field(int, pid)
+			__field(int, vasid)
+			__field(int, winid)
+			__field(unsigned long, paste_kaddr)
+		),
+
+		TP_fast_assign(
+			__entry->pid = tsk->pid;
+			__entry->vasid = win->vinst->vas_id;
+			__entry->winid = win->winid;
+			__entry->paste_kaddr = (unsigned long)win->paste_kaddr
+		),
+
+		TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n",
+			__entry->pid, __entry->vasid, __entry->winid,
+			__entry->paste_kaddr)
+);
+
+#endif /* _VAS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv
+#define TRACE_INCLUDE_FILE vas-trace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
index b7c53a51c31b..ff9f48812331 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -21,6 +21,9 @@
 #include "vas.h"
 #include "copy-paste.h"
 
+#define CREATE_TRACE_POINTS
+#include "vas-trace.h"
+
 /*
  * Compute the paste address region for the window @window using the
  * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
@@ -880,6 +883,8 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
 	struct vas_winctx winctx;
 	struct vas_instance *vinst;
 
+	trace_vas_rx_win_open(current, vasid, cop, rxattr);
+
 	if (!rx_win_args_valid(cop, rxattr))
 		return ERR_PTR(-EINVAL);
 
@@ -1008,6 +1013,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
 	struct vas_winctx winctx;
 	struct vas_instance *vinst;
 
+	trace_vas_tx_win_open(current, vasid, cop, attr);
+
 	if (!tx_win_args_valid(cop, attr))
 		return ERR_PTR(-EINVAL);
 
@@ -1100,6 +1107,8 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
 	void *addr;
 	uint64_t val;
 
+	trace_vas_paste_crb(current, txwin);
+
 	/*
 	 * Only NX windows are supported for now and hardware assumes
 	 * report-enable flag is set for NX windows. Ensure software
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
index aebbe95c9230..5a2b24cbbc88 100644
--- a/arch/powerpc/platforms/powernv/vas.c
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -160,8 +160,6 @@ static int __init vas_init(void)
 	int found = 0;
 	struct device_node *dn;
 
-	vas_init_dbgdir();
-
 	platform_driver_register(&vas_driver);
 
 	for_each_compatible_node(dn, NULL, "ibm,vas") {
@@ -169,8 +167,10 @@ static int __init vas_init(void)
 		found++;
 	}
 
-	if (!found)
+	if (!found) {
+		platform_driver_unregister(&vas_driver);
 		return -ENODEV;
+	}
 
 	pr_devel("Found %d instances\n", found);
 
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index 7f870ec29daf..8c7009d001d9 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -524,8 +524,7 @@ static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
 	int result;
 	struct dma_chunk *c;
 
-	c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+	c = kzalloc(sizeof(*c), GFP_ATOMIC);
 	if (!c) {
 		result = -ENOMEM;
 		goto fail_alloc;
@@ -570,8 +569,7 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
 
 	DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__,
 	    phys_addr, ps3_mm_phys_to_lpar(phys_addr), len);
-	c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+	c = kzalloc(sizeof(*c), GFP_ATOMIC);
 	if (!c) {
 		result = -ENOMEM;
 		goto fail_alloc;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 652d3e96b812..6ef77caf7bcf 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -234,7 +234,7 @@ static void pseries_cpu_die(unsigned int cpu)
 	 * done here.  Change isolate state to Isolate and
 	 * change allocation-state to Unusable.
 	 */
-	paca[cpu].cpu_start = 0;
+	paca_ptrs[cpu]->cpu_start = 0;
 }
 
 /*
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
index eeb13429d685..3fe126796975 100644
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -23,7 +23,12 @@
 
 void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 {
-	/* Don't risk a hypervisor call if we're crashing */
+	/*
+	 * Don't risk a hypervisor call if we're crashing
+	 * XXX: Why? The hypervisor is not crashing. It might be better
+	 * to at least attempt unregister to avoid the hypervisor stepping
+	 * on our memory.
+	 */
 	if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
 		int ret;
 		int cpu = smp_processor_id();
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 0ee4a469a4ae..adb996ed51e1 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -99,7 +99,7 @@ void vpa_init(int cpu)
 	 * reports that.  All SPLPAR support SLB shadow buffer.
 	 */
 	if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
-		addr = __pa(paca[cpu].slb_shadow_ptr);
+		addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr);
 		ret = register_slb_shadow(hwcpu, addr);
 		if (ret)
 			pr_err("WARNING: SLB shadow buffer registration for "
@@ -111,7 +111,7 @@ void vpa_init(int cpu)
 	/*
 	 * Register dispatch trace log, if one has been allocated.
 	 */
-	pp = &paca[cpu];
+	pp = paca_ptrs[cpu];
 	dtl = pp->dispatch_log;
 	if (dtl) {
 		pp->dtl_ridx = 0;
@@ -306,14 +306,14 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 
 	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
-	pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
-		 want_v, slot, flags, psize);
-
 	flags = (newpp & 7) | H_AVPN;
 	if (mmu_has_feature(MMU_FTR_KERNEL_RO))
 		/* Move pp0 into bit 8 (IBM 55) */
 		flags |= (newpp & HPTE_R_PP0) >> 55;
 
+	pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+		 want_v, slot, flags, psize);
+
 	lpar_rc = plpar_pte_protect(flags, slot, want_v);
 
 	if (lpar_rc == H_NOT_FOUND) {
@@ -726,15 +726,18 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
 	return 0;
 }
 
-/* Actually only used for radix, so far */
 static int pseries_lpar_register_process_table(unsigned long base,
 			unsigned long page_size, unsigned long table_size)
 {
 	long rc;
-	unsigned long flags = PROC_TABLE_NEW;
+	unsigned long flags = 0;
 
+	if (table_size)
+		flags |= PROC_TABLE_NEW;
 	if (radix_enabled())
 		flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
+	else
+		flags |= PROC_TABLE_HPT_SLB;
 	for (;;) {
 		rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
 					page_size, table_size);
@@ -760,6 +763,7 @@ void __init hpte_init_pseries(void)
 	mmu_hash_ops.flush_hash_range	 = pSeries_lpar_flush_hash_range;
 	mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
 	mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+	register_process_table		 = pseries_lpar_register_process_table;
 
 	if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
 		mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 0f7fb7170b03..8a8033a249c7 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -348,6 +348,9 @@ void post_mobility_fixup(void)
 		printk(KERN_ERR "Post-mobility device tree update "
 			"failed: %d\n", rc);
 
+	/* Possibly switch to a new RFI flush type */
+	pseries_setup_rfi_flush();
+
 	return;
 }
 
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 1ae1d9f4dbe9..60db2ee511fb 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -27,6 +27,14 @@ extern int pSeries_machine_check_exception(struct pt_regs *regs);
 
 #ifdef CONFIG_SMP
 extern void smp_init_pseries(void);
+
+/* Get state of physical CPU from query_cpu_stopped */
+int smp_query_cpu_stopped(unsigned int pcpu);
+#define QCSS_STOPPED 0
+#define QCSS_STOPPING 1
+#define QCSS_NOT_STOPPED 2
+#define QCSS_HARDWARE_ERROR -1
+#define QCSS_HARDWARE_BUSY -2
 #else
 static inline void smp_init_pseries(void) { };
 #endif
@@ -100,4 +108,6 @@ static inline unsigned long cmo_get_page_size(void)
 
 int dlpar_workqueue_init(void);
 
+void pseries_setup_rfi_flush(void);
+
 #endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1a527625acf7..b55ad4286dc7 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -68,6 +68,7 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/kexec.h>
 #include <asm/isa-bridge.h>
+#include <asm/security_features.h>
 
 #include "pseries.h"
 
@@ -246,7 +247,7 @@ static int alloc_dispatch_logs(void)
 		return 0;
 
 	for_each_possible_cpu(cpu) {
-		pp = &paca[cpu];
+		pp = paca_ptrs[cpu];
 		dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
 		if (!dtl) {
 			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
@@ -459,36 +460,78 @@ static void __init find_and_init_phbs(void)
 	of_pci_check_probe_only();
 }
 
-static void pseries_setup_rfi_flush(void)
+static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
+{
+	/*
+	 * The features below are disabled by default, so we instead look to see
+	 * if firmware has *enabled* them, and set them if so.
+	 */
+	if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
+		security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
+		security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+	if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+		security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+	if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+		security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+	if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
+		security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+	if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
+		security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+	/*
+	 * The features below are enabled by default, so we instead look to see
+	 * if firmware has *disabled* them, and clear them if so.
+	 */
+	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
+		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
+void pseries_setup_rfi_flush(void)
 {
 	struct h_cpu_char_result result;
 	enum l1d_flush_type types;
 	bool enable;
 	long rc;
 
-	/* Enable by default */
-	enable = true;
+	/*
+	 * Set features to the defaults assumed by init_cpu_char_feature_flags()
+	 * so it can set/clear again any features that might have changed after
+	 * migration, and in case the hypercall fails and it is not even called.
+	 */
+	powerpc_security_features = SEC_FTR_DEFAULT;
 
 	rc = plpar_get_cpu_characteristics(&result);
-	if (rc == H_SUCCESS) {
-		types = L1D_FLUSH_NONE;
+	if (rc == H_SUCCESS)
+		init_cpu_char_feature_flags(&result);
 
-		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
-			types |= L1D_FLUSH_MTTRIG;
-		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
-			types |= L1D_FLUSH_ORI;
+	/*
+	 * We're the guest so this doesn't apply to us, clear it to simplify
+	 * handling of it elsewhere.
+	 */
+	security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
 
-		/* Use fallback if nothing set in hcall */
-		if (types == L1D_FLUSH_NONE)
-			types = L1D_FLUSH_FALLBACK;
+	types = L1D_FLUSH_FALLBACK;
 
-		if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
-		    (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
-			enable = false;
-	} else {
-		/* Default to fallback if case hcall is not available */
-		types = L1D_FLUSH_FALLBACK;
-	}
+	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+		types |= L1D_FLUSH_MTTRIG;
+
+	if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+		types |= L1D_FLUSH_ORI;
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+		 security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
 
 	setup_rfi_flush(types, enable);
 }
@@ -739,7 +782,7 @@ static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
 	/* PAPR says we can't set HYP */
 	dawrx &= ~DAWRX_HYP;
 
-	return  plapr_set_watchpoint0(dawr, dawrx);
+	return  plpar_set_watchpoint0(dawr, dawrx);
 }
 
 #define CMO_CHARACTERISTICS_TOKEN 44
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 2e184829e5d4..3df46123cce3 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -110,7 +110,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
 	}
 
 	/* Fixup atomic count: it exited inside IRQ handler. */
-	task_thread_info(paca[lcpu].__current)->preempt_count	= 0;
+	task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count	= 0;
 #ifdef CONFIG_HOTPLUG_CPU
 	if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
 		goto out;
@@ -165,7 +165,7 @@ static int smp_pSeries_kick_cpu(int nr)
 	 * cpu_start field to become non-zero After we set cpu_start,
 	 * the processor will continue on to secondary_start
 	 */
-	paca[nr].cpu_start = 1;
+	paca_ptrs[nr]->cpu_start = 1;
 #ifdef CONFIG_HOTPLUG_CPU
 	set_preferred_offline_state(nr, CPU_STATE_ONLINE);
 
@@ -215,7 +215,7 @@ static int pseries_cause_nmi_ipi(int cpu)
 		hwcpu = get_hard_smp_processor_id(cpu);
 	}
 
-	if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS)
+	if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS)
 		return 1;
 
 	return 0;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index 73067805300a..1d4e0ef658d3 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask)
 	int i;
 	u32 mask = 0;
 
-	for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
+	for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
 		mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
 	return mask;
 }
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 1459f4e8b698..37bfbc54aacb 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -164,7 +164,7 @@ void icp_native_cause_ipi_rm(int cpu)
 	 * Just like the cause_ipi functions, it is required to
 	 * include a full barrier before causing the IPI.
 	 */
-	xics_phys = paca[cpu].kvm_hstate.xics_phys;
+	xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
 	mb();
 	__raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
 }
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 40c06110821c..3459015092fa 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -246,7 +246,7 @@ notrace void xmon_xive_do_dump(int cpu)
 		u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
 		xmon_printf("  IPI state: %x:%c%c\n", xc->hw_ipi,
 			val & XIVE_ESB_VAL_P ? 'P' : 'p',
-			val & XIVE_ESB_VAL_P ? 'Q' : 'q');
+			val & XIVE_ESB_VAL_Q ? 'Q' : 'q');
 	}
 #endif
 }
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 82e1a3ee6e0f..a0842f1ff72c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -41,6 +41,7 @@
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
+#include <asm/plpar_wrappers.h>
 #include <asm/cputable.h>
 #include <asm/rtas.h>
 #include <asm/sstep.h>
@@ -61,12 +62,6 @@
 #include <asm/paca.h>
 #endif
 
-#if defined(CONFIG_PPC_SPLPAR)
-#include <asm/plpar_wrappers.h>
-#else
-static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; };
-#endif
-
 #include "nonstdio.h"
 #include "dis-asm.h"
 
@@ -328,7 +323,7 @@ static void write_ciabr(unsigned long ciabr)
 		mtspr(SPRN_CIABR, ciabr);
 		return;
 	}
-	plapr_set_ciabr(ciabr);
+	plpar_set_ciabr(ciabr);
 }
 
 /**
@@ -1273,6 +1268,16 @@ static long check_bp_loc(unsigned long addr)
 	return 1;
 }
 
+/* Force enable xmon if not already enabled */
+static inline void force_enable_xmon(void)
+{
+	/* Enable xmon hooks if needed */
+	if (!xmon_on) {
+		printf("xmon: Enabling debugger hooks\n");
+		xmon_on = 1;
+	}
+}
+
 static char *breakpoint_help_string =
     "Breakpoint command usage:\n"
     "b                show breakpoints\n"
@@ -1297,6 +1302,10 @@ bpt_cmds(void)
 	static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
 	int mode;
 	case 'd':	/* bd - hardware data breakpoint */
+		if (!ppc_breakpoint_available()) {
+			printf("Hardware data breakpoint not supported on this cpu\n");
+			break;
+		}
 		mode = 7;
 		cmd = inchar();
 		if (cmd == 'r')
@@ -1315,6 +1324,8 @@ bpt_cmds(void)
 			dabr.address &= ~HW_BRK_TYPE_DABR;
 			dabr.enabled = mode | BP_DABR;
 		}
+
+		force_enable_xmon();
 		break;
 
 	case 'i':	/* bi - hardware instr breakpoint */
@@ -1335,6 +1346,7 @@ bpt_cmds(void)
 		if (bp != NULL) {
 			bp->enabled |= BP_CIABR;
 			iabr = bp;
+			force_enable_xmon();
 		}
 		break;
 #endif
@@ -1399,8 +1411,10 @@ bpt_cmds(void)
 		if (!check_bp_loc(a))
 			break;
 		bp = new_breakpoint(a);
-		if (bp != NULL)
+		if (bp != NULL) {
 			bp->enabled |= BP_TRAP;
+			force_enable_xmon();
+		}
 		break;
 	}
 }
@@ -2327,7 +2341,7 @@ static void dump_one_paca(int cpu)
 	catch_memory_errors = 1;
 	sync();
 
-	p = &paca[cpu];
+	p = paca_ptrs[cpu];
 
 	printf("paca for cpu 0x%x @ %px:\n", cpu, p);
 
@@ -3649,11 +3663,35 @@ device_initcall(setup_xmon_sysrq);
 #endif /* CONFIG_MAGIC_SYSRQ */
 
 #ifdef CONFIG_DEBUG_FS
+static void clear_all_bpt(void)
+{
+	int i;
+
+	/* clear/unpatch all breakpoints */
+	remove_bpts();
+	remove_cpu_bpts();
+
+	/* Disable all breakpoints */
+	for (i = 0; i < NBPTS; ++i)
+		bpts[i].enabled = 0;
+
+	/* Clear any data or iabr breakpoints */
+	if (iabr || dabr.enabled) {
+		iabr = NULL;
+		dabr.enabled = 0;
+	}
+
+	printf("xmon: All breakpoints cleared\n");
+}
+
 static int xmon_dbgfs_set(void *data, u64 val)
 {
 	xmon_on = !!val;
 	xmon_init(xmon_on);
 
+	/* make sure all breakpoints removed when disabling */
+	if (!xmon_on)
+		clear_all_bpt();
 	return 0;
 }
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index eaee7087886f..32a0d5b958bf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -120,6 +120,7 @@ config S390
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_DEVICES if !SMP
+	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
@@ -576,7 +577,7 @@ choice
 config EXPOLINE_OFF
 	bool "spectre_v2=off"
 
-config EXPOLINE_MEDIUM
+config EXPOLINE_AUTO
 	bool "spectre_v2=auto"
 
 config EXPOLINE_FULL
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 2ced3239cb84..c79936d02f7b 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -47,9 +47,6 @@ cflags-$(CONFIG_MARCH_Z14_TUNE)		+= -mtune=z14
 
 cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
 
-#KBUILD_IMAGE is necessary for make rpm
-KBUILD_IMAGE	:=arch/s390/boot/image
-
 #
 # Prevent tail-call optimizations, to get clearer backtraces:
 #
@@ -84,7 +81,7 @@ ifdef CONFIG_EXPOLINE
     CC_FLAGS_EXPOLINE += -mfunction-return=thunk
     CC_FLAGS_EXPOLINE += -mindirect-branch-table
     export CC_FLAGS_EXPOLINE
-    cflags-y += $(CC_FLAGS_EXPOLINE)
+    cflags-y += $(CC_FLAGS_EXPOLINE) -DCC_USING_EXPOLINE
   endif
 endif
 
@@ -126,6 +123,9 @@ tools		:= arch/s390/tools
 
 all: image bzImage
 
+#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg...
+KBUILD_IMAGE	:= $(boot)/bzImage
+
 install: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $@
 
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 26d6a94f40f6..5766f7b9b271 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -29,11 +29,16 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS)
 	$(call if_changed,ld)
 
-sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 0x\1/p'
+TRIM_HEAD_SIZE := 0x11000
 
-quiet_cmd_sizes = GEN $@
+sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\)$$/\#define SZ\2 (0x\1 - $(TRIM_HEAD_SIZE))/p'
+
+quiet_cmd_sizes = GEN     $@
       cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
 
+quiet_cmd_trim_head = TRIM    $@
+      cmd_trim_head = tail -c +$$(($(TRIM_HEAD_SIZE) + 1)) $< > $@
+
 $(obj)/sizes.h: vmlinux
 	$(call if_changed,sizes)
 
@@ -43,10 +48,13 @@ $(obj)/head.o: $(obj)/sizes.h
 CFLAGS_misc.o += -I$(objtree)/$(obj)
 $(obj)/misc.o: $(obj)/sizes.h
 
-OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
-$(obj)/vmlinux.bin: vmlinux
+OBJCOPYFLAGS_vmlinux.bin.full :=  -R .comment -S
+$(obj)/vmlinux.bin.full: vmlinux
 	$(call if_changed,objcopy)
 
+$(obj)/vmlinux.bin: $(obj)/vmlinux.bin.full
+	$(call if_changed,trim_head)
+
 vmlinux.bin.all-y := $(obj)/vmlinux.bin
 
 suffix-$(CONFIG_KERNEL_GZIP)  := gz
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
index 231d1491d431..9f94eca0f467 100644
--- a/arch/s390/boot/compressed/head.S
+++ b/arch/s390/boot/compressed/head.S
@@ -23,12 +23,10 @@ ENTRY(startup_continue)
 	aghi	%r15,-160
 	brasl	%r14,decompress_kernel
 	# Set up registers for memory mover. We move the decompressed image to
-	# 0x11000, starting at offset 0x11000 in the decompressed image so
-	# that code living at 0x11000 in the image will end up at 0x11000 in
-	# memory.
+	# 0x11000, where startup_continue of the decompressed image is supposed
+	# to be.
 	lgr	%r4,%r2
 	lg	%r2,.Loffset-.LPG1(%r13)
-	la	%r4,0(%r2,%r4)
 	lg	%r3,.Lmvsize-.LPG1(%r13)
 	lgr	%r5,%r3
 	# Move the memory mover someplace safe so it doesn't overwrite itself.
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index cecf38b9ec82..511b2cc9b91a 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -27,8 +27,8 @@
 /* Symbols defined by linker scripts */
 extern char input_data[];
 extern int input_len;
-extern char _text, _end;
-extern char _bss, _ebss;
+extern char _end[];
+extern char _bss[], _ebss[];
 
 static void error(char *m);
 
@@ -119,34 +119,12 @@ static void error(char *x)
 	asm volatile("lpsw %0" : : "Q" (psw));
 }
 
-/*
- * Safe guard the ipl parameter block against a memory area that will be
- * overwritten. The validity check for the ipl parameter block is complex
- * (see cio_get_iplinfo and ipl_save_parameters) but if the pointer to
- * the ipl parameter block intersects with the passed memory area we can
- * safely assume that we can read from that memory. In that case just copy
- * the memory to IPL_PARMBLOCK_ORIGIN even if there is no ipl parameter
- * block.
- */
-static void check_ipl_parmblock(void *start, unsigned long size)
-{
-	void *src, *dst;
-
-	src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr;
-	if (src + PAGE_SIZE <= start || src >= start + size)
-		return;
-	dst = (void *) IPL_PARMBLOCK_ORIGIN;
-	memmove(dst, src, PAGE_SIZE);
-	S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
-}
-
 unsigned long decompress_kernel(void)
 {
 	void *output, *kernel_end;
 
-	output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE);
+	output = (void *) ALIGN((unsigned long) _end + HEAP_SIZE, PAGE_SIZE);
 	kernel_end = output + SZ__bss_start;
-	check_ipl_parmblock((void *) 0, (unsigned long) kernel_end);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/*
@@ -156,7 +134,6 @@ unsigned long decompress_kernel(void)
 	 * current bss section..
 	 */
 	if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) {
-		check_ipl_parmblock(kernel_end, INITRD_SIZE);
 		memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE);
 		INITRD_START = (unsigned long) kernel_end;
 	}
@@ -166,8 +143,8 @@ unsigned long decompress_kernel(void)
 	 * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be
 	 * initialized afterwards since they reside in bss.
 	 */
-	memset(&_bss, 0, &_ebss - &_bss);
-	free_mem_ptr = (unsigned long) &_end;
+	memset(_bss, 0, _ebss - _bss);
+	free_mem_ptr = (unsigned long) _end;
 	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
 
 	__decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 8150132b144f..d43c2db12d30 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -52,6 +52,7 @@ SECTIONS
 	/* Sections to be discarded */
 	/DISCARD/ : {
 		*(.eh_frame)
+		*(__ex_table)
 		*(*__ksymtab*)
 	}
 }
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index d60798737d86..ad47abd08630 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -329,7 +329,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
 static struct crypto_alg ecb_aes_alg = {
 	.cra_name		=	"ecb(aes)",
 	.cra_driver_name	=	"ecb-aes-s390",
-	.cra_priority		=	400,	/* combo: aes + ecb */
+	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
 					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
@@ -426,7 +426,7 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc,
 static struct crypto_alg cbc_aes_alg = {
 	.cra_name		=	"cbc(aes)",
 	.cra_driver_name	=	"cbc-aes-s390",
-	.cra_priority		=	400,	/* combo: aes + cbc */
+	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
 					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
@@ -633,7 +633,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
 static struct crypto_alg xts_aes_alg = {
 	.cra_name		=	"xts(aes)",
 	.cra_driver_name	=	"xts-aes-s390",
-	.cra_priority		=	400,	/* combo: aes + xts */
+	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
 					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
@@ -763,7 +763,7 @@ static int ctr_aes_decrypt(struct blkcipher_desc *desc,
 static struct crypto_alg ctr_aes_alg = {
 	.cra_name		=	"ctr(aes)",
 	.cra_driver_name	=	"ctr-aes-s390",
-	.cra_priority		=	400,	/* combo: aes + ctr */
+	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
 					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	1,
@@ -1047,6 +1047,7 @@ static struct aead_alg gcm_aes_aead = {
 
 static struct crypto_alg *aes_s390_algs_ptr[5];
 static int aes_s390_algs_num;
+static struct aead_alg *aes_s390_aead_alg;
 
 static int aes_s390_register_alg(struct crypto_alg *alg)
 {
@@ -1065,7 +1066,8 @@ static void aes_s390_fini(void)
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
 
-	crypto_unregister_aead(&gcm_aes_aead);
+	if (aes_s390_aead_alg)
+		crypto_unregister_aead(aes_s390_aead_alg);
 }
 
 static int __init aes_s390_init(void)
@@ -1123,6 +1125,7 @@ static int __init aes_s390_init(void)
 		ret = crypto_register_aead(&gcm_aes_aead);
 		if (ret)
 			goto out_err;
+		aes_s390_aead_alg = &gcm_aes_aead;
 	}
 
 	return 0;
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 003932db8d12..80b27294c1de 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -138,7 +138,7 @@ static int ecb_paes_decrypt(struct blkcipher_desc *desc,
 static struct crypto_alg ecb_paes_alg = {
 	.cra_name		=	"ecb(paes)",
 	.cra_driver_name	=	"ecb-paes-s390",
-	.cra_priority		=	400,	/* combo: aes + ecb */
+	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
@@ -241,7 +241,7 @@ static int cbc_paes_decrypt(struct blkcipher_desc *desc,
 static struct crypto_alg cbc_paes_alg = {
 	.cra_name		=	"cbc(paes)",
 	.cra_driver_name	=	"cbc-paes-s390",
-	.cra_priority		=	400,	/* combo: aes + cbc */
+	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
@@ -377,7 +377,7 @@ static int xts_paes_decrypt(struct blkcipher_desc *desc,
 static struct crypto_alg xts_paes_alg = {
 	.cra_name		=	"xts(paes)",
 	.cra_driver_name	=	"xts-paes-s390",
-	.cra_priority		=	400,	/* combo: aes + xts */
+	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct s390_pxts_ctx),
@@ -523,7 +523,7 @@ static int ctr_paes_decrypt(struct blkcipher_desc *desc,
 static struct crypto_alg ctr_paes_alg = {
 	.cra_name		=	"ctr(paes)",
 	.cra_driver_name	=	"ctr-paes-s390",
-	.cra_priority		=	400,	/* combo: aes + ctr */
+	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
 	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
 	.cra_blocksize		=	1,
 	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
new file mode 100644
index 000000000000..955d620db23e
--- /dev/null
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_ASM_H
+#define _ASM_S390_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Check the length of an instruction sequence. The length may not be larger
+ * than 254 bytes and it has to be divisible by 2.
+ */
+.macro alt_len_check start,end
+	.if ( \end - \start ) > 254
+	.error "cpu alternatives does not support instructions blocks > 254 bytes\n"
+	.endif
+	.if ( \end - \start ) % 2
+	.error "cpu alternatives instructions length is odd\n"
+	.endif
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+	.long	\orig_start - .
+	.long	\alt_start - .
+	.word	\feature
+	.byte	\orig_end - \orig_start
+	.byte	\alt_end - \alt_start
+.endm
+
+/*
+ * Fill up @bytes with nops. The macro emits 6-byte nop instructions
+ * for the bulk of the area, possibly followed by a 4-byte and/or
+ * a 2-byte nop if the size of the area is not divisible by 6.
+ */
+.macro alt_pad_fill bytes
+	.fill	( \bytes ) / 6, 6, 0xc0040000
+	.fill	( \bytes ) % 6 / 4, 4, 0x47000000
+	.fill	( \bytes ) % 6 % 4 / 2, 2, 0x0700
+.endm
+
+/*
+ * Fill up @bytes with nops. If the number of bytes is larger
+ * than 6, emit a jg instruction to branch over all nops, then
+ * fill an area of size (@bytes - 6) with nop instructions.
+ */
+.macro alt_pad bytes
+	.if ( \bytes > 0 )
+	.if ( \bytes > 6 )
+	jg	. + \bytes
+	alt_pad_fill \bytes - 6
+	.else
+	alt_pad_fill \bytes
+	.endif
+	.endif
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr
+771:	.popsection
+772:	\oldinstr
+773:	alt_len_check 770b, 771b
+	alt_len_check 772b, 773b
+	alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
+774:	.pushsection .altinstructions,"a"
+	alt_entry 772b, 774b, 770b, 771b, \feature
+	.popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+	.pushsection .altinstr_replacement,"ax"
+770:	\newinstr1
+771:	\newinstr2
+772:	.popsection
+773:	\oldinstr
+774:	alt_len_check 770b, 771b
+	alt_len_check 771b, 772b
+	alt_len_check 773b, 774b
+	.if ( 771b - 770b > 772b - 771b )
+	alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
+	.else
+	alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
+	.endif
+775:	.pushsection .altinstructions,"a"
+	alt_entry 773b, 775b, 770b, 771b,\feature1
+	alt_entry 773b, 775b, 771b, 772b,\feature2
+	.popsection
+.endm
+
+#endif	/*  __ASSEMBLY__  */
+
+#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h
index cfce6835b109..c1bedb4c8de0 100644
--- a/arch/s390/include/asm/ap.h
+++ b/arch/s390/include/asm/ap.h
@@ -20,9 +20,9 @@
  */
 typedef unsigned int ap_qid_t;
 
-#define AP_MKQID(_card, _queue) (((_card) & 63) << 8 | ((_queue) & 255))
-#define AP_QID_CARD(_qid) (((_qid) >> 8) & 63)
-#define AP_QID_QUEUE(_qid) ((_qid) & 255)
+#define AP_MKQID(_card, _queue) (((_card) & 0xff) << 8 | ((_queue) & 0xff))
+#define AP_QID_CARD(_qid) (((_qid) >> 8) & 0xff)
+#define AP_QID_QUEUE(_qid) ((_qid) & 0xff)
 
 /**
  * struct ap_queue_status - Holds the AP queue status.
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 633f8da86137..20bce136b2e5 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -230,5 +230,5 @@ int ccw_device_siosl(struct ccw_device *);
 
 extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);
 
-struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *, int);
+struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
 #endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/chpid.h b/arch/s390/include/asm/chpid.h
index 4773f747915c..20e0d22f29e9 100644
--- a/arch/s390/include/asm/chpid.h
+++ b/arch/s390/include/asm/chpid.h
@@ -9,7 +9,7 @@
 #include <uapi/asm/chpid.h>
 #include <asm/cio.h>
 
-struct channel_path_desc {
+struct channel_path_desc_fmt0 {
 	u8 flags;
 	u8 lsn;
 	u8 desc;
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index dc84a0171bb3..225667652069 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -227,7 +227,7 @@ struct esw_eadm {
  * a field is valid; a field not being valid is always passed as %0.
  * If a unit check occurred, @ecw may contain sense data; this is retrieved
  * by the common I/O layer itself if the device doesn't support concurrent
- * sense (so that the device driver never needs to perform basic sene itself).
+ * sense (so that the device driver never needs to perform basic sense itself).
  * For unsolicited interrupts, the irb is passed as-is (expect for sense data,
  * if applicable).
  */
@@ -328,16 +328,6 @@ static inline u8 pathmask_to_pos(u8 mask)
 void channel_subsystem_reinit(void);
 extern void css_schedule_reprobe(void);
 
-extern void reipl_ccw_dev(struct ccw_dev_id *id);
-
-struct cio_iplinfo {
-	u8 ssid;
-	u16 devno;
-	int is_qdio;
-};
-
-extern int cio_get_iplinfo(struct cio_iplinfo *iplinfo);
-
 /* Function from drivers/s390/cio/chsc.c */
 int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
 int chsc_sstpi(void *page, void *result, size_t size);
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index dd08db491b89..f58d17e9dd65 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -29,12 +29,12 @@
 /* CPU measurement facility support */
 static inline int cpum_cf_avail(void)
 {
-	return MACHINE_HAS_LPP && test_facility(67);
+	return test_facility(40) && test_facility(67);
 }
 
 static inline int cpum_sf_avail(void)
 {
-	return MACHINE_HAS_LPP && test_facility(68);
+	return test_facility(40) && test_facility(68);
 }
 
 
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index fb56fa3283a2..0563fd3e8458 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -32,8 +32,10 @@ struct css_general_char {
 	u32 fcx : 1;	 /* bit 88 */
 	u32 : 19;
 	u32 alt_ssi : 1; /* bit 108 */
-	u32:1;
-	u32 narf:1;	 /* bit 110 */
+	u32 : 1;
+	u32 narf : 1;	 /* bit 110 */
+	u32 : 12;
+	u32 util_str : 1;/* bit 123 */
 } __packed;
 
 extern struct css_general_char css_general_characteristics;
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 186c7b5f5511..ae5135704616 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -15,8 +15,6 @@
 
 #define NSS_NAME_SIZE	8
 
-#define IPL_PARMBLOCK_ORIGIN	0x2000
-
 #define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
 			      sizeof(struct ipl_block_fcp))
 
@@ -29,10 +27,6 @@
 
 #define IPL_MAX_SUPPORTED_VERSION (0)
 
-#define IPL_PARMBLOCK_START	((struct ipl_parameter_block *) \
-				 IPL_PARMBLOCK_ORIGIN)
-#define IPL_PARMBLOCK_SIZE	(IPL_PARMBLOCK_START->hdr.len)
-
 struct ipl_list_hdr {
 	u32 len;
 	u8  reserved1[3];
@@ -83,33 +77,21 @@ struct ipl_parameter_block {
 	union {
 		struct ipl_block_fcp fcp;
 		struct ipl_block_ccw ccw;
+		char raw[PAGE_SIZE - sizeof(struct ipl_list_hdr)];
 	} ipl_info;
 } __packed __aligned(PAGE_SIZE);
 
-/*
- * IPL validity flags
- */
-extern u32 ipl_flags;
-
 struct save_area;
 struct save_area * __init save_area_alloc(bool is_boot_cpu);
 struct save_area * __init save_area_boot_cpu(void);
 void __init save_area_add_regs(struct save_area *, void *regs);
 void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
 
-extern void do_reipl(void);
-extern void do_halt(void);
-extern void do_poff(void);
-extern void ipl_verify_parameters(void);
-extern void ipl_update_parameters(void);
+extern void s390_reset_system(void);
+extern void ipl_store_parameters(void);
 extern size_t append_ipl_vmparm(char *, size_t);
 extern size_t append_ipl_scpdata(char *, size_t);
 
-enum {
-	IPL_DEVNO_VALID		= 1,
-	IPL_PARMBLOCK_VALID	= 2,
-};
-
 enum ipl_type {
 	IPL_TYPE_UNKNOWN	= 1,
 	IPL_TYPE_CCW		= 2,
@@ -138,6 +120,7 @@ struct ipl_info
 
 extern struct ipl_info ipl_info;
 extern void setup_ipl(void);
+extern void set_os_info_reipl_block(void);
 
 /*
  * DIAG 308 support
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index afb0f08b8021..81cdb6b55118 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -294,6 +294,7 @@ struct kvm_vcpu_stat {
 	u64 exit_userspace;
 	u64 exit_null;
 	u64 exit_external_request;
+	u64 exit_io_request;
 	u64 exit_external_interrupt;
 	u64 exit_stop_request;
 	u64 exit_validity;
@@ -310,16 +311,29 @@ struct kvm_vcpu_stat {
 	u64 exit_program_interruption;
 	u64 exit_instr_and_program;
 	u64 exit_operation_exception;
+	u64 deliver_ckc;
+	u64 deliver_cputm;
 	u64 deliver_external_call;
 	u64 deliver_emergency_signal;
 	u64 deliver_service_signal;
-	u64 deliver_virtio_interrupt;
+	u64 deliver_virtio;
 	u64 deliver_stop_signal;
 	u64 deliver_prefix_signal;
 	u64 deliver_restart_signal;
-	u64 deliver_program_int;
-	u64 deliver_io_int;
+	u64 deliver_program;
+	u64 deliver_io;
+	u64 deliver_machine_check;
 	u64 exit_wait_state;
+	u64 inject_ckc;
+	u64 inject_cputm;
+	u64 inject_external_call;
+	u64 inject_emergency_signal;
+	u64 inject_mchk;
+	u64 inject_pfault_init;
+	u64 inject_program;
+	u64 inject_restart;
+	u64 inject_set_prefix;
+	u64 inject_stop_signal;
 	u64 instruction_epsw;
 	u64 instruction_gs;
 	u64 instruction_io_other;
@@ -644,7 +658,12 @@ struct kvm_vcpu_arch {
 };
 
 struct kvm_vm_stat {
-	ulong remote_tlb_flush;
+	u64 inject_io;
+	u64 inject_float_mchk;
+	u64 inject_pfault_done;
+	u64 inject_service_signal;
+	u64 inject_virtio;
+	u64 remote_tlb_flush;
 };
 
 struct kvm_arch_memory_slot {
@@ -792,6 +811,7 @@ struct kvm_arch{
 	int css_support;
 	int use_irqchip;
 	int use_cmma;
+	int use_pfmfi;
 	int user_cpu_state_ctrl;
 	int user_sigp;
 	int user_stsi;
diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h
index 74eeec9c0a80..cbc7c3a68e4d 100644
--- a/arch/s390/include/asm/kvm_para.h
+++ b/arch/s390/include/asm/kvm_para.h
@@ -193,6 +193,11 @@ static inline unsigned int kvm_arch_para_features(void)
 	return 0;
 }
 
+static inline unsigned int kvm_arch_para_hints(void)
+{
+	return 0;
+}
+
 static inline bool kvm_check_and_clear_guest_paused(void)
 {
 	return false;
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index db35c41a59d5..c639c95850e4 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -22,8 +22,8 @@ typedef struct {
 	unsigned int has_pgste:1;
 	/* The mmu context uses storage keys. */
 	unsigned int use_skey:1;
-	/* The mmu context uses CMMA. */
-	unsigned int use_cmma:1;
+	/* The mmu context uses CMM. */
+	unsigned int uses_cmm:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 6c8ce15cde7b..324f6f452982 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -31,7 +31,7 @@ static inline int init_new_context(struct task_struct *tsk,
 		(current->mm && current->mm->context.alloc_pgste);
 	mm->context.has_pgste = 0;
 	mm->context.use_skey = 0;
-	mm->context.use_cmma = 0;
+	mm->context.uses_cmm = 0;
 #endif
 	switch (mm->context.asce_limit) {
 	case _REGION2_SIZE:
diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h
index 7df48e5cf36f..b4bd8c41e9d3 100644
--- a/arch/s390/include/asm/nospec-branch.h
+++ b/arch/s390/include/asm/nospec-branch.h
@@ -6,12 +6,11 @@
 
 #include <linux/types.h>
 
-extern int nospec_call_disable;
-extern int nospec_return_disable;
+extern int nospec_disable;
 
 void nospec_init_branches(void);
-void nospec_call_revert(s32 *start, s32 *end);
-void nospec_return_revert(s32 *start, s32 *end);
+void nospec_auto_detect(void);
+void nospec_revert(s32 *start, s32 *end);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index c7b4333d1de0..f0f9bcf94c03 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -151,4 +151,7 @@ void vmem_map_init(void);
 void *vmem_crst_alloc(unsigned long val);
 pte_t *vmem_pte_alloc(void);
 
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages);
+void base_asce_free(unsigned long asce);
+
 #endif /* _S390_PGALLOC_H */
diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h
deleted file mode 100644
index 6450b31ade03..000000000000
--- a/arch/s390/include/asm/reset.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *    Copyright IBM Corp. 2006
- *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#ifndef _ASM_S390_RESET_H
-#define _ASM_S390_RESET_H
-
-#include <linux/list.h>
-
-struct reset_call {
-	struct list_head list;
-	void (*fn)(void);
-};
-
-extern void register_reset_call(struct reset_call *reset);
-extern void unregister_reset_call(struct reset_call *reset);
-extern void s390_reset_system(void);
-#endif /* _ASM_S390_RESET_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 79b7ffa91832..c00f7b031628 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -390,10 +390,10 @@ static inline int scsw_cmd_is_valid_key(union scsw *scsw)
 }
 
 /**
- * scsw_cmd_is_valid_sctl - check fctl field validity
+ * scsw_cmd_is_valid_sctl - check sctl field validity
  * @scsw: pointer to scsw
  *
- * Return non-zero if the fctl field of the specified command mode scsw is
+ * Return non-zero if the sctl field of the specified command mode scsw is
  * valid, zero otherwise.
  */
 static inline int scsw_cmd_is_valid_sctl(union scsw *scsw)
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 2eb0c8a7b664..124154fdfc97 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -25,7 +25,6 @@
 #define MACHINE_FLAG_DIAG44	_BITUL(6)
 #define MACHINE_FLAG_EDAT1	_BITUL(7)
 #define MACHINE_FLAG_EDAT2	_BITUL(8)
-#define MACHINE_FLAG_LPP	_BITUL(9)
 #define MACHINE_FLAG_TOPOLOGY	_BITUL(10)
 #define MACHINE_FLAG_TE		_BITUL(11)
 #define MACHINE_FLAG_TLB_LC	_BITUL(12)
@@ -66,7 +65,6 @@ extern void detect_memory_memblock(void);
 #define MACHINE_HAS_DIAG44	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
 #define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
 #define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
-#define MACHINE_HAS_LPP		(S390_lowcore.machine_flags & MACHINE_FLAG_LPP)
 #define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
 #define MACHINE_HAS_TE		(S390_lowcore.machine_flags & MACHINE_FLAG_TE)
 #define MACHINE_HAS_TLB_LC	(S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
diff --git a/arch/s390/include/uapi/asm/dasd.h b/arch/s390/include/uapi/asm/dasd.h
index 451c601406b6..832be5c2584f 100644
--- a/arch/s390/include/uapi/asm/dasd.h
+++ b/arch/s390/include/uapi/asm/dasd.h
@@ -68,25 +68,27 @@ typedef struct dasd_information2_t {
 #define DASD_FORMAT_CDL  2
 /*
  * values to be used for dasd_information_t.features
- * 0x00: default features
- * 0x01: readonly (ro)
- * 0x02: use diag discipline (diag)
- * 0x04: set the device initially online (internal use only)
- * 0x08: enable ERP related logging
- * 0x10: allow I/O to fail on lost paths
- * 0x20: allow I/O to fail when a lock was stolen
- * 0x40: give access to raw eckd data
- * 0x80: enable discard support
+ * 0x100: default features
+ * 0x001: readonly (ro)
+ * 0x002: use diag discipline (diag)
+ * 0x004: set the device initially online (internal use only)
+ * 0x008: enable ERP related logging
+ * 0x010: allow I/O to fail on lost paths
+ * 0x020: allow I/O to fail when a lock was stolen
+ * 0x040: give access to raw eckd data
+ * 0x080: enable discard support
+ * 0x100: enable autodisable for IFCC errors (default)
  */
-#define DASD_FEATURE_DEFAULT	     0x00
-#define DASD_FEATURE_READONLY	     0x01
-#define DASD_FEATURE_USEDIAG	     0x02
-#define DASD_FEATURE_INITIAL_ONLINE  0x04
-#define DASD_FEATURE_ERPLOG	     0x08
-#define DASD_FEATURE_FAILFAST	     0x10
-#define DASD_FEATURE_FAILONSLCK      0x20
-#define DASD_FEATURE_USERAW	     0x40
-#define DASD_FEATURE_DISCARD	     0x80
+#define DASD_FEATURE_READONLY	      0x001
+#define DASD_FEATURE_USEDIAG	      0x002
+#define DASD_FEATURE_INITIAL_ONLINE   0x004
+#define DASD_FEATURE_ERPLOG	      0x008
+#define DASD_FEATURE_FAILFAST	      0x010
+#define DASD_FEATURE_FAILONSLCK       0x020
+#define DASD_FEATURE_USERAW	      0x040
+#define DASD_FEATURE_DISCARD	      0x080
+#define DASD_FEATURE_PATH_AUTODISABLE 0x100
+#define DASD_FEATURE_DEFAULT	      DASD_FEATURE_PATH_AUTODISABLE
 
 #define DASD_PARTN_BITS 2
 
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index d568307321fc..b62e0614e440 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -203,9 +203,9 @@ struct ep11_urb {
 } __attribute__((packed));
 
 /**
- * struct zcrypt_device_status
+ * struct zcrypt_device_status_ext
  * @hwtype:		raw hardware type
- * @qid:		6 bit device index, 8 bit domain
+ * @qid:		8 bit device index, 8 bit domain
  * @functions:		AP device function bit field 'abcdef'
  *			a, b, c = reserved
  *			d = CCA coprocessor
@@ -214,28 +214,23 @@ struct ep11_urb {
  * @online		online status
  * @reserved		reserved
  */
-struct zcrypt_device_status {
+struct zcrypt_device_status_ext {
 	unsigned int hwtype:8;
-	unsigned int qid:14;
+	unsigned int qid:16;
 	unsigned int online:1;
 	unsigned int functions:6;
-	unsigned int reserved:3;
+	unsigned int reserved:1;
 };
 
-#define MAX_ZDEV_CARDIDS 64
-#define MAX_ZDEV_DOMAINS 256
+#define MAX_ZDEV_CARDIDS_EXT 256
+#define MAX_ZDEV_DOMAINS_EXT 256
 
-/**
- * Maximum number of zcrypt devices
- */
-#define MAX_ZDEV_ENTRIES (MAX_ZDEV_CARDIDS * MAX_ZDEV_DOMAINS)
+/* Maximum number of zcrypt devices */
+#define MAX_ZDEV_ENTRIES_EXT (MAX_ZDEV_CARDIDS_EXT * MAX_ZDEV_DOMAINS_EXT)
 
-/**
- * zcrypt_device_matrix
- * Device matrix of all zcrypt devices
- */
-struct zcrypt_device_matrix {
-	struct zcrypt_device_status device[MAX_ZDEV_ENTRIES];
+/* Device matrix of all zcrypt devices */
+struct zcrypt_device_matrix_ext {
+	struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT];
 };
 
 #define AUTOSELECT ((unsigned int)0xFFFFFFFF)
@@ -270,71 +265,35 @@ struct zcrypt_device_matrix {
  *   ZSENDEP11CPRB
  *     Send an arbitrary EP11 CPRB to an EP11 coprocessor crypto card.
  *
- *   Z90STAT_STATUS_MASK
- *     Return an 64 element array of unsigned chars for the status of
- *     all devices.
+ *   ZCRYPT_DEVICE_STATUS
+ *     The given struct zcrypt_device_matrix_ext is updated with
+ *     status information for each currently known apqn.
+ *
+ *   ZCRYPT_STATUS_MASK
+ *     Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ *     status of all devices.
  *	 0x01: PCICA
  *	 0x02: PCICC
  *	 0x03: PCIXCC_MCL2
  *	 0x04: PCIXCC_MCL3
  *	 0x05: CEX2C
  *	 0x06: CEX2A
- *	 0x0d: device is disabled via the proc filesystem
- *
- *   Z90STAT_QDEPTH_MASK
- *     Return an 64 element array of unsigned chars for the queue
- *     depth of all devices.
- *
- *   Z90STAT_PERDEV_REQCNT
- *     Return an 64 element array of unsigned integers for the number
- *     of successfully completed requests per device since the device
- *     was detected and made available.
- *
- *   Z90STAT_REQUESTQ_COUNT
- *     Return an integer count of the number of entries waiting to be
- *     sent to a device.
- *
- *   Z90STAT_PENDINGQ_COUNT
- *     Return an integer count of the number of entries sent to all
- *     devices awaiting the reply.
- *
- *   Z90STAT_TOTALOPEN_COUNT
- *     Return an integer count of the number of open file handles.
- *
- *   Z90STAT_DOMAIN_INDEX
- *     Return the integer value of the Cryptographic Domain.
- *
- *   The following ioctls are deprecated and should be no longer used:
- *
- *   Z90STAT_TOTALCOUNT
- *     Return an integer count of all device types together.
- *
- *   Z90STAT_PCICACOUNT
- *     Return an integer count of all PCICAs.
- *
- *   Z90STAT_PCICCCOUNT
- *     Return an integer count of all PCICCs.
- *
- *   Z90STAT_PCIXCCMCL2COUNT
- *     Return an integer count of all MCL2 PCIXCCs.
- *
- *   Z90STAT_PCIXCCMCL3COUNT
- *     Return an integer count of all MCL3 PCIXCCs.
- *
- *   Z90STAT_CEX2CCOUNT
- *     Return an integer count of all CEX2Cs.
+ *	 0x07: CEX3C
+ *	 0x08: CEX3A
+ *	 0x0a: CEX4
+ *	 0x0b: CEX5
+ *	 0x0c: CEX6
+ *	 0x0d: device is disabled
  *
- *   Z90STAT_CEX2ACOUNT
- *     Return an integer count of all CEX2As.
+ *   ZCRYPT_QDEPTH_MASK
+ *     Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned chars for the
+ *     queue depth of all devices.
  *
- *   ICAZ90STATUS
- *     Return some device driver status in a ica_z90_status struct
- *     This takes an ica_z90_status struct as its arg.
+ *   ZCRYPT_PERDEV_REQCNT
+ *     Return an MAX_ZDEV_CARDIDS_EXT element array of unsigned integers for
+ *     the number of successfully completed requests per device since the
+ *     device was detected and made available.
  *
- *   Z90STAT_PCIXCCCOUNT
- *     Return an integer count of all PCIXCCs (MCL2 + MCL3).
- *     This is DEPRECATED now that MCL3 PCIXCCs are treated differently from
- *     MCL2 PCIXCCs.
  */
 
 /**
@@ -344,22 +303,56 @@ struct zcrypt_device_matrix {
 #define ICARSACRT	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x06, 0)
 #define ZSECSENDCPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x81, 0)
 #define ZSENDEP11CPRB	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x04, 0)
-#define ZDEVICESTATUS	_IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
 
-/* New status calls */
-#define Z90STAT_TOTALCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x40, int)
-#define Z90STAT_PCICACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x41, int)
-#define Z90STAT_PCICCCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x42, int)
-#define Z90STAT_PCIXCCMCL2COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4b, int)
-#define Z90STAT_PCIXCCMCL3COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4c, int)
-#define Z90STAT_CEX2CCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4d, int)
-#define Z90STAT_CEX2ACOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4e, int)
+#define ZCRYPT_DEVICE_STATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x5f, 0)
+#define ZCRYPT_STATUS_MASK   _IOR(ZCRYPT_IOCTL_MAGIC, 0x58, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_QDEPTH_MASK   _IOR(ZCRYPT_IOCTL_MAGIC, 0x59, char[MAX_ZDEV_CARDIDS_EXT])
+#define ZCRYPT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x5a, int[MAX_ZDEV_CARDIDS_EXT])
+
+/*
+ * Only deprecated defines, structs and ioctls below this line.
+ */
+
+/* Deprecated: use MAX_ZDEV_CARDIDS_EXT */
+#define MAX_ZDEV_CARDIDS 64
+/* Deprecated: use MAX_ZDEV_DOMAINS_EXT */
+#define MAX_ZDEV_DOMAINS 256
+
+/* Deprecated: use MAX_ZDEV_ENTRIES_EXT */
+#define MAX_ZDEV_ENTRIES (MAX_ZDEV_CARDIDS * MAX_ZDEV_DOMAINS)
+
+/* Deprecated: use struct zcrypt_device_status_ext */
+struct zcrypt_device_status {
+	unsigned int hwtype:8;
+	unsigned int qid:14;
+	unsigned int online:1;
+	unsigned int functions:6;
+	unsigned int reserved:3;
+};
+
+/* Deprecated: use struct zcrypt_device_matrix_ext */
+struct zcrypt_device_matrix {
+	struct zcrypt_device_status device[MAX_ZDEV_ENTRIES];
+};
+
+/* Deprecated: use ZCRYPT_DEVICE_STATUS */
+#define ZDEVICESTATUS _IOC(_IOC_READ|_IOC_WRITE, ZCRYPT_IOCTL_MAGIC, 0x4f, 0)
+/* Deprecated: use ZCRYPT_STATUS_MASK */
+#define Z90STAT_STATUS_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
+/* Deprecated: use ZCRYPT_QDEPTH_MASK */
+#define Z90STAT_QDEPTH_MASK _IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
+/* Deprecated: use ZCRYPT_PERDEV_REQCNT */
+#define Z90STAT_PERDEV_REQCNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+/* Deprecated: use sysfs to query these values */
 #define Z90STAT_REQUESTQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x44, int)
 #define Z90STAT_PENDINGQ_COUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x45, int)
 #define Z90STAT_TOTALOPEN_COUNT _IOR(ZCRYPT_IOCTL_MAGIC, 0x46, int)
 #define Z90STAT_DOMAIN_INDEX	_IOR(ZCRYPT_IOCTL_MAGIC, 0x47, int)
-#define Z90STAT_STATUS_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x48, char[64])
-#define Z90STAT_QDEPTH_MASK	_IOR(ZCRYPT_IOCTL_MAGIC, 0x49, char[64])
-#define Z90STAT_PERDEV_REQCNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x4a, int[64])
+
+/*
+ * The ioctl number ranges 0x40 - 0x42 and 0x4b - 0x4e had been used in the
+ * past, don't assign new ioctls for these.
+ */
 
 #endif /* __ASM_S390_ZCRYPT_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7f27e3da9709..b06a6f79c1ec 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -61,11 +61,11 @@ obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
 obj-y	+= sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
 obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
 obj-y	+= entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
+obj-y	+= nospec-branch.o
 
 extra-y				+= head.o head64.o vmlinux.lds
 
-obj-$(CONFIG_EXPOLINE)		+= nospec-branch.o
-CFLAGS_REMOVE_expoline.o	+= $(CC_FLAGS_EXPOLINE)
+CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
 
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
index 22476135f738..8e1f2aee85ef 100644
--- a/arch/s390/kernel/alternative.c
+++ b/arch/s390/kernel/alternative.c
@@ -2,6 +2,7 @@
 #include <linux/module.h>
 #include <asm/alternative.h>
 #include <asm/facility.h>
+#include <asm/nospec-branch.h>
 
 #define MAX_PATCH_LEN (255 - 1)
 
@@ -15,29 +16,6 @@ static int __init disable_alternative_instructions(char *str)
 
 early_param("noaltinstr", disable_alternative_instructions);
 
-static int __init nobp_setup_early(char *str)
-{
-	bool enabled;
-	int rc;
-
-	rc = kstrtobool(str, &enabled);
-	if (rc)
-		return rc;
-	if (enabled && test_facility(82))
-		__set_facility(82, S390_lowcore.alt_stfle_fac_list);
-	else
-		__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
-	return 0;
-}
-early_param("nobp", nobp_setup_early);
-
-static int __init nospec_setup_early(char *str)
-{
-	__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
-	return 0;
-}
-early_param("nospec", nospec_setup_early);
-
 struct brcl_insn {
 	u16 opc;
 	s32 disp;
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 587b195b588d..cfe2c45c5180 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -63,6 +63,7 @@ int main(void)
 	OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
 	OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
 	OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
+	OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
 	BLANK();
 	/* timeval/timezone offsets for use by vdso */
 	OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 18c1eeb847b2..6f2a193ccccc 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -279,7 +279,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
 	if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask,
 			      set, sizeof(compat_sigset_t)))
 		return -EFAULT;
-	if (__put_user(ptr_to_compat(&frame->sc), &frame->sc.sregs))
+	if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs))
 		return -EFAULT;
 
 	/* Store registers needed to create the signal frame */
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ac707a9f729e..32daa0f84325 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -67,7 +67,7 @@ static noinline __init void init_kernel_storage_key(void)
 #if PAGE_DEFAULT_KEY
 	unsigned long end_pfn, init_pfn;
 
-	end_pfn = PFN_UP(__pa(&_end));
+	end_pfn = PFN_UP(__pa(_end));
 
 	for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
 		page_set_storage_key(init_pfn << PAGE_SHIFT,
@@ -242,8 +242,6 @@ static __init void detect_machine_facilities(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
 	if (test_facility(3))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
-	if (test_facility(40))
-		S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
 	if (test_facility(50) && test_facility(73)) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
 		__ctl_set_bit(0, 55);
@@ -344,16 +342,6 @@ static __init void memmove_early(void *dst, const void *src, size_t n)
 	S390_lowcore.program_new_psw = old;
 }
 
-static __init noinline void ipl_save_parameters(void)
-{
-	void *src, *dst;
-
-	src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr;
-	dst = (void *) IPL_PARMBLOCK_ORIGIN;
-	memmove_early(dst, src, PAGE_SIZE);
-	S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
-}
-
 static __init noinline void rescue_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -423,10 +411,8 @@ static void __init setup_boot_command_line(void)
 void __init startup_init(void)
 {
 	reset_tod_clock();
-	ipl_save_parameters();
 	rescue_initrd();
 	clear_bss_section();
-	ipl_verify_parameters();
 	time_early_init();
 	init_kernel_storage_key();
 	lockdep_off();
@@ -434,7 +420,7 @@ void __init startup_init(void)
 	setup_facility_list();
 	detect_machine_type();
 	setup_arch_string();
-	ipl_update_parameters();
+	ipl_store_parameters();
 	setup_boot_command_line();
 	detect_diag9c();
 	detect_diag44();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index a5621ea6d123..3f22f139a041 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -11,6 +11,7 @@
 
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <asm/alternative-asm.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
 #include <asm/ctl_reg.h>
@@ -57,6 +58,8 @@ _CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
 		   _CIF_ASCE_SECONDARY | _CIF_FPU)
 _PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
 
+_LPP_OFFSET	= __LC_LPP
+
 #define BASED(name) name-cleanup_critical(%r13)
 
 	.macro	TRACE_IRQS_ON
@@ -162,65 +165,22 @@ _PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
 	.endm
 
 	.macro BPOFF
-	.pushsection .altinstr_replacement, "ax"
-660:	.long	0xb2e8c000
-	.popsection
-661:	.long	0x47000000
-	.pushsection .altinstructions, "a"
-	.long 661b - .
-	.long 660b - .
-	.word 82
-	.byte 4
-	.byte 4
-	.popsection
+	ALTERNATIVE "", ".long 0xb2e8c000", 82
 	.endm
 
 	.macro BPON
-	.pushsection .altinstr_replacement, "ax"
-662:	.long	0xb2e8d000
-	.popsection
-663:	.long	0x47000000
-	.pushsection .altinstructions, "a"
-	.long 663b - .
-	.long 662b - .
-	.word 82
-	.byte 4
-	.byte 4
-	.popsection
+	ALTERNATIVE "", ".long 0xb2e8d000", 82
 	.endm
 
 	.macro BPENTER tif_ptr,tif_mask
-	.pushsection .altinstr_replacement, "ax"
-662:	.word	0xc004, 0x0000, 0x0000	# 6 byte nop
-	.word	0xc004, 0x0000, 0x0000	# 6 byte nop
-	.popsection
-664:	TSTMSK	\tif_ptr,\tif_mask
-	jz	. + 8
-	.long	0xb2e8d000
-	.pushsection .altinstructions, "a"
-	.long 664b - .
-	.long 662b - .
-	.word 82
-	.byte 12
-	.byte 12
-	.popsection
+	ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .long 0xb2e8d000", \
+		    "", 82
 	.endm
 
 	.macro BPEXIT tif_ptr,tif_mask
 	TSTMSK	\tif_ptr,\tif_mask
-	.pushsection .altinstr_replacement, "ax"
-662:	jnz	. + 8
-	.long	0xb2e8d000
-	.popsection
-664:	jz	. + 8
-	.long	0xb2e8c000
-	.pushsection .altinstructions, "a"
-	.long 664b - .
-	.long 662b - .
-	.word 82
-	.byte 8
-	.byte 8
-	.popsection
+	ALTERNATIVE "jz .+8;  .long 0xb2e8c000", \
+		    "jnz .+8; .long 0xb2e8d000", 82
 	.endm
 
 #ifdef CONFIG_EXPOLINE
@@ -323,10 +283,8 @@ ENTRY(__switch_to)
 	aghi	%r3,__TASK_pid
 	mvc	__LC_CURRENT_PID(4,%r0),0(%r3)	# store pid of next
 	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
-	jz	0f
-	.insn	s,0xb2800000,__LC_LPP		# set program parameter
-0:	BR_R1USE_R14
+	ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+	BR_R1USE_R14
 
 .L__critical_start:
 
@@ -339,10 +297,10 @@ ENTRY(__switch_to)
 ENTRY(sie64a)
 	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
 	lg	%r12,__LC_CURRENT
-	stg	%r2,__SF_EMPTY(%r15)		# save control block pointer
-	stg	%r3,__SF_EMPTY+8(%r15)		# save guest register save area
-	xc	__SF_EMPTY+16(8,%r15),__SF_EMPTY+16(%r15) # reason code = 0
-	mvc	__SF_EMPTY+24(8,%r15),__TI_flags(%r12) # copy thread flags
+	stg	%r2,__SF_SIE_CONTROL(%r15)	# save control block pointer
+	stg	%r3,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
+	xc	__SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+	mvc	__SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
 	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU		# load guest fp/vx registers ?
 	jno	.Lsie_load_guest_gprs
 	brasl	%r14,load_fpu_regs		# load guest fp/vx regs
@@ -353,18 +311,18 @@ ENTRY(sie64a)
 	jz	.Lsie_gmap
 	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
 .Lsie_gmap:
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
 	tm	__SIE_PROG20+3(%r14),3		# last exit...
 	jnz	.Lsie_skip
 	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lsie_skip			# exit if fp/vx regs changed
-	BPEXIT	__SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+	BPEXIT	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
 .Lsie_entry:
 	sie	0(%r14)
 .Lsie_exit:
 	BPOFF
-	BPENTER	__SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
 .Lsie_skip:
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
@@ -383,7 +341,7 @@ ENTRY(sie64a)
 	nopr	7
 	.globl sie_exit
 sie_exit:
-	lg	%r14,__SF_EMPTY+8(%r15)		# load guest register save area
+	lg	%r14,__SF_SIE_SAVEAREA(%r15)	# load guest register save area
 	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
 	xgr	%r0,%r0				# clear guest registers to
 	xgr	%r1,%r1				# prevent speculative use
@@ -392,11 +350,11 @@ sie_exit:
 	xgr	%r4,%r4
 	xgr	%r5,%r5
 	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
-	lg	%r2,__SF_EMPTY+16(%r15)		# return exit reason code
+	lg	%r2,__SF_SIE_REASON(%r15)	# return exit reason code
 	BR_R1USE_R14
 .Lsie_fault:
 	lghi	%r14,-EFAULT
-	stg	%r14,__SF_EMPTY+16(%r15)	# set exit reason code
+	stg	%r14,__SF_SIE_REASON(%r15)	# set exit reason code
 	j	sie_exit
 
 	EX_TABLE(.Lrewind_pad6,.Lsie_fault)
@@ -685,7 +643,7 @@ ENTRY(pgm_check_handler)
 	slg	%r14,BASED(.Lsie_critical_start)
 	clg	%r14,BASED(.Lsie_critical_length)
 	jhe	0f
-	lg	%r14,__SF_EMPTY(%r15)		# get control block pointer
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
@@ -1285,10 +1243,8 @@ ENTRY(mcck_int_handler)
 # PSW restart interrupt handler
 #
 ENTRY(restart_int_handler)
-	TSTMSK	__LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
-	jz	0f
-	.insn	s,0xb2800000,__LC_LPP
-0:	stg	%r15,__LC_SAVE_AREA_RESTART
+	ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
+	stg	%r15,__LC_SAVE_AREA_RESTART
 	lg	%r15,__LC_RESTART_STACK
 	aghi	%r15,-__PT_SIZE			# create pt_regs on stack
 	xc	0(__PT_SIZE,%r15),0(%r15)
@@ -1397,8 +1353,8 @@ cleanup_critical:
 	clg     %r9,BASED(.Lsie_crit_mcck_length)
 	jh      1f
 	oi      __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1:	BPENTER __SF_EMPTY+24(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
-	lg	%r9,__SF_EMPTY(%r15)		# get control block pointer
+1:	BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+	lg	%r9,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 34477c1aee6d..4296d7e61fb6 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -24,9 +24,7 @@
 #include <asm/smp.h>
 #include <asm/setup.h>
 #include <asm/cpcmd.h>
-#include <asm/cio.h>
 #include <asm/ebcdic.h>
-#include <asm/reset.h>
 #include <asm/sclp.h>
 #include <asm/checksum.h>
 #include <asm/debug.h>
@@ -119,39 +117,12 @@ static char *dump_type_str(enum dump_type type)
 	}
 }
 
-static u8 ipl_ssid;
-static u16 ipl_devno;
-u32 ipl_flags;
-
-enum ipl_method {
-	REIPL_METHOD_CCW_CIO,
-	REIPL_METHOD_CCW_DIAG,
-	REIPL_METHOD_CCW_VM,
-	REIPL_METHOD_FCP_RO_DIAG,
-	REIPL_METHOD_FCP_RW_DIAG,
-	REIPL_METHOD_FCP_RO_VM,
-	REIPL_METHOD_FCP_DUMP,
-	REIPL_METHOD_NSS,
-	REIPL_METHOD_NSS_DIAG,
-	REIPL_METHOD_DEFAULT,
-};
-
-enum dump_method {
-	DUMP_METHOD_NONE,
-	DUMP_METHOD_CCW_CIO,
-	DUMP_METHOD_CCW_DIAG,
-	DUMP_METHOD_CCW_VM,
-	DUMP_METHOD_FCP_DIAG,
-};
-
-static int diag308_set_works;
-
+static int ipl_block_valid;
 static struct ipl_parameter_block ipl_block;
 
 static int reipl_capabilities = IPL_TYPE_UNKNOWN;
 
 static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
-static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
 static struct ipl_parameter_block *reipl_block_fcp;
 static struct ipl_parameter_block *reipl_block_ccw;
 static struct ipl_parameter_block *reipl_block_nss;
@@ -159,7 +130,6 @@ static struct ipl_parameter_block *reipl_block_actual;
 
 static int dump_capabilities = DUMP_TYPE_NONE;
 static enum dump_type dump_type = DUMP_TYPE_NONE;
-static enum dump_method dump_method = DUMP_METHOD_NONE;
 static struct ipl_parameter_block *dump_block_fcp;
 static struct ipl_parameter_block *dump_block_ccw;
 
@@ -260,33 +230,25 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
 			sys_##_prefix##_##_name##_show,			\
 			sys_##_prefix##_##_name##_store)
 
-static void make_attrs_ro(struct attribute **attrs)
-{
-	while (*attrs) {
-		(*attrs)->mode = S_IRUGO;
-		attrs++;
-	}
-}
-
 /*
  * ipl section
  */
 
 static __init enum ipl_type get_ipl_type(void)
 {
-	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
-	if (!(ipl_flags & IPL_DEVNO_VALID))
+	if (!ipl_block_valid)
 		return IPL_TYPE_UNKNOWN;
-	if (!(ipl_flags & IPL_PARMBLOCK_VALID))
+
+	switch (ipl_block.hdr.pbt) {
+	case DIAG308_IPL_TYPE_CCW:
 		return IPL_TYPE_CCW;
-	if (ipl->hdr.version > IPL_MAX_SUPPORTED_VERSION)
-		return IPL_TYPE_UNKNOWN;
-	if (ipl->hdr.pbt != DIAG308_IPL_TYPE_FCP)
-		return IPL_TYPE_UNKNOWN;
-	if (ipl->ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
-		return IPL_TYPE_FCP_DUMP;
-	return IPL_TYPE_FCP;
+	case DIAG308_IPL_TYPE_FCP:
+		if (ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+			return IPL_TYPE_FCP_DUMP;
+		else
+			return IPL_TYPE_FCP;
+	}
+	return IPL_TYPE_UNKNOWN;
 }
 
 struct ipl_info ipl_info;
@@ -338,7 +300,7 @@ size_t append_ipl_vmparm(char *dest, size_t size)
 	size_t rc;
 
 	rc = 0;
-	if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+	if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)
 		rc = reipl_get_ascii_vmparm(dest, size, &ipl_block);
 	else
 		dest[0] = 0;
@@ -401,7 +363,7 @@ size_t append_ipl_scpdata(char *dest, size_t len)
 	size_t rc;
 
 	rc = 0;
-	if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
+	if (ipl_block_valid && ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP)
 		rc = reipl_append_ascii_scpdata(dest, len, &ipl_block);
 	else
 		dest[0] = 0;
@@ -415,14 +377,14 @@ static struct kobj_attribute sys_ipl_vm_parm_attr =
 static ssize_t sys_ipl_device_show(struct kobject *kobj,
 				   struct kobj_attribute *attr, char *page)
 {
-	struct ipl_parameter_block *ipl = IPL_PARMBLOCK_START;
-
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		return sprintf(page, "0.%x.%04x\n", ipl_ssid, ipl_devno);
+		return sprintf(page, "0.%x.%04x\n", ipl_block.ipl_info.ccw.ssid,
+			       ipl_block.ipl_info.ccw.devno);
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
-		return sprintf(page, "0.0.%04x\n", ipl->ipl_info.fcp.devno);
+		return sprintf(page, "0.0.%04x\n",
+			       ipl_block.ipl_info.fcp.devno);
 	default:
 		return 0;
 	}
@@ -435,8 +397,8 @@ static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *attr, char *buf,
 				  loff_t off, size_t count)
 {
-	return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
-					IPL_PARMBLOCK_SIZE);
+	return memory_read_from_buffer(buf, count, &off, &ipl_block,
+				       ipl_block.hdr.len);
 }
 static struct bin_attribute ipl_parameter_attr =
 	__BIN_ATTR(binary_parameter, S_IRUGO, ipl_parameter_read, NULL,
@@ -446,8 +408,8 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *attr, char *buf,
 				 loff_t off, size_t count)
 {
-	unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
-	void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
+	unsigned int size = ipl_block.ipl_info.fcp.scp_data_len;
+	void *scp_data = &ipl_block.ipl_info.fcp.scp_data;
 
 	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
@@ -462,14 +424,14 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
 
 /* FCP ipl device attributes */
 
-DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n", (unsigned long long)
-		   IPL_PARMBLOCK_START->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n", (unsigned long long)
-		   IPL_PARMBLOCK_START->ipl_info.fcp.lun);
-DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
-		   IPL_PARMBLOCK_START->ipl_info.fcp.bootprog);
-DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
-		   IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
+		   (unsigned long long)ipl_block.ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
+		   (unsigned long long)ipl_block.ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
+		   (unsigned long long)ipl_block.ipl_info.fcp.br_lba);
 
 static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
 				     struct kobj_attribute *attr, char *page)
@@ -545,10 +507,6 @@ static void __ipl_run(void *unused)
 {
 	__bpon();
 	diag308(DIAG308_LOAD_CLEAR, NULL);
-	if (MACHINE_IS_VM)
-		__cpcmd("IPL", NULL, 0, NULL);
-	else if (ipl_info.type == IPL_TYPE_CCW)
-		reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
 }
 
 static void ipl_run(struct shutdown_trigger *trigger)
@@ -776,6 +734,7 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
 	/* copy and convert to ebcdic */
 	memcpy(ipb->hdr.loadparm, buf, lp_len);
 	ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
+	ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID;
 	return len;
 }
 
@@ -938,11 +897,10 @@ static struct attribute_group reipl_nss_attr_group = {
 	.attrs = reipl_nss_attrs,
 };
 
-static void set_reipl_block_actual(struct ipl_parameter_block *reipl_block)
+void set_os_info_reipl_block(void)
 {
-	reipl_block_actual = reipl_block;
 	os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
-			  reipl_block->hdr.len);
+			  reipl_block_actual->hdr.len);
 }
 
 /* reipl type */
@@ -954,38 +912,16 @@ static int reipl_set_type(enum ipl_type type)
 
 	switch(type) {
 	case IPL_TYPE_CCW:
-		if (diag308_set_works)
-			reipl_method = REIPL_METHOD_CCW_DIAG;
-		else if (MACHINE_IS_VM)
-			reipl_method = REIPL_METHOD_CCW_VM;
-		else
-			reipl_method = REIPL_METHOD_CCW_CIO;
-		set_reipl_block_actual(reipl_block_ccw);
+		reipl_block_actual = reipl_block_ccw;
 		break;
 	case IPL_TYPE_FCP:
-		if (diag308_set_works)
-			reipl_method = REIPL_METHOD_FCP_RW_DIAG;
-		else if (MACHINE_IS_VM)
-			reipl_method = REIPL_METHOD_FCP_RO_VM;
-		else
-			reipl_method = REIPL_METHOD_FCP_RO_DIAG;
-		set_reipl_block_actual(reipl_block_fcp);
-		break;
-	case IPL_TYPE_FCP_DUMP:
-		reipl_method = REIPL_METHOD_FCP_DUMP;
+		reipl_block_actual = reipl_block_fcp;
 		break;
 	case IPL_TYPE_NSS:
-		if (diag308_set_works)
-			reipl_method = REIPL_METHOD_NSS_DIAG;
-		else
-			reipl_method = REIPL_METHOD_NSS;
-		set_reipl_block_actual(reipl_block_nss);
-		break;
-	case IPL_TYPE_UNKNOWN:
-		reipl_method = REIPL_METHOD_DEFAULT;
+		reipl_block_actual = reipl_block_nss;
 		break;
 	default:
-		BUG();
+		break;
 	}
 	reipl_type = type;
 	return 0;
@@ -1018,77 +954,25 @@ static struct kobj_attribute reipl_type_attr =
 static struct kset *reipl_kset;
 static struct kset *reipl_fcp_kset;
 
-static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
-			   const enum ipl_method m)
-{
-	char loadparm[LOADPARM_LEN + 1] = {};
-	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
-	char nss_name[NSS_NAME_SIZE + 1] = {};
-	size_t pos = 0;
-
-	reipl_get_ascii_loadparm(loadparm, ipb);
-	reipl_get_ascii_nss_name(nss_name, ipb);
-	reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
-
-	switch (m) {
-	case REIPL_METHOD_CCW_VM:
-		pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
-		break;
-	case REIPL_METHOD_NSS:
-		pos = sprintf(dst, "IPL %s", nss_name);
-		break;
-	default:
-		break;
-	}
-	if (strlen(loadparm) > 0)
-		pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
-	if (strlen(vmparm) > 0)
-		sprintf(dst + pos, " PARM %s", vmparm);
-}
-
 static void __reipl_run(void *unused)
 {
-	struct ccw_dev_id devid;
-	static char buf[128];
-
-	switch (reipl_method) {
-	case REIPL_METHOD_CCW_CIO:
-		devid.ssid  = reipl_block_ccw->ipl_info.ccw.ssid;
-		devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
-		reipl_ccw_dev(&devid);
-		break;
-	case REIPL_METHOD_CCW_VM:
-		get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
-		__cpcmd(buf, NULL, 0, NULL);
-		break;
-	case REIPL_METHOD_CCW_DIAG:
+	switch (reipl_type) {
+	case IPL_TYPE_CCW:
 		diag308(DIAG308_SET, reipl_block_ccw);
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
-	case REIPL_METHOD_FCP_RW_DIAG:
+	case IPL_TYPE_FCP:
 		diag308(DIAG308_SET, reipl_block_fcp);
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
-	case REIPL_METHOD_FCP_RO_DIAG:
-		diag308(DIAG308_LOAD_CLEAR, NULL);
-		break;
-	case REIPL_METHOD_FCP_RO_VM:
-		__cpcmd("IPL", NULL, 0, NULL);
-		break;
-	case REIPL_METHOD_NSS_DIAG:
+	case IPL_TYPE_NSS:
 		diag308(DIAG308_SET, reipl_block_nss);
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
-	case REIPL_METHOD_NSS:
-		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
-		__cpcmd(buf, NULL, 0, NULL);
-		break;
-	case REIPL_METHOD_DEFAULT:
-		if (MACHINE_IS_VM)
-			__cpcmd("IPL", NULL, 0, NULL);
+	case IPL_TYPE_UNKNOWN:
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
-	case REIPL_METHOD_FCP_DUMP:
+	case IPL_TYPE_FCP_DUMP:
 		break;
 	}
 	disabled_wait((unsigned long) __builtin_return_address(0));
@@ -1119,7 +1003,7 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
 	ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
 
 	/* VM PARM */
-	if (MACHINE_IS_VM && diag308_set_works &&
+	if (MACHINE_IS_VM && ipl_block_valid &&
 	    (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
 
 		ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
@@ -1141,9 +1025,6 @@ static int __init reipl_nss_init(void)
 	if (!reipl_block_nss)
 		return -ENOMEM;
 
-	if (!diag308_set_works)
-		sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
-
 	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
 	if (rc)
 		return rc;
@@ -1161,24 +1042,16 @@ static int __init reipl_ccw_init(void)
 	if (!reipl_block_ccw)
 		return -ENOMEM;
 
-	if (MACHINE_IS_VM) {
-		if (!diag308_set_works)
-			sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
-		rc = sysfs_create_group(&reipl_kset->kobj,
-					&reipl_ccw_attr_group_vm);
-	} else {
-		if(!diag308_set_works)
-			sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
-		rc = sysfs_create_group(&reipl_kset->kobj,
-					&reipl_ccw_attr_group_lpar);
-	}
+	rc = sysfs_create_group(&reipl_kset->kobj,
+				MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
+					      : &reipl_ccw_attr_group_lpar);
 	if (rc)
 		return rc;
 
 	reipl_block_ccw_init(reipl_block_ccw);
 	if (ipl_info.type == IPL_TYPE_CCW) {
-		reipl_block_ccw->ipl_info.ccw.ssid = ipl_ssid;
-		reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+		reipl_block_ccw->ipl_info.ccw.ssid = ipl_block.ipl_info.ccw.ssid;
+		reipl_block_ccw->ipl_info.ccw.devno = ipl_block.ipl_info.ccw.devno;
 		reipl_block_ccw_fill_parms(reipl_block_ccw);
 	}
 
@@ -1190,14 +1063,6 @@ static int __init reipl_fcp_init(void)
 {
 	int rc;
 
-	if (!diag308_set_works) {
-		if (ipl_info.type == IPL_TYPE_FCP) {
-			make_attrs_ro(reipl_fcp_attrs);
-			sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO;
-		} else
-			return 0;
-	}
-
 	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!reipl_block_fcp)
 		return -ENOMEM;
@@ -1218,7 +1083,7 @@ static int __init reipl_fcp_init(void)
 	}
 
 	if (ipl_info.type == IPL_TYPE_FCP) {
-		memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
+		memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
 		/*
 		 * Fix loadparm: There are systems where the (SCSI) LOADPARM
 		 * is invalid in the SCSI IPL parameter block, so take it
@@ -1340,21 +1205,6 @@ static int dump_set_type(enum dump_type type)
 {
 	if (!(dump_capabilities & type))
 		return -EINVAL;
-	switch (type) {
-	case DUMP_TYPE_CCW:
-		if (diag308_set_works)
-			dump_method = DUMP_METHOD_CCW_DIAG;
-		else if (MACHINE_IS_VM)
-			dump_method = DUMP_METHOD_CCW_VM;
-		else
-			dump_method = DUMP_METHOD_CCW_CIO;
-		break;
-	case DUMP_TYPE_FCP:
-		dump_method = DUMP_METHOD_FCP_DIAG;
-		break;
-	default:
-		dump_method = DUMP_METHOD_NONE;
-	}
 	dump_type = type;
 	return 0;
 }
@@ -1397,25 +1247,11 @@ static void diag308_dump(void *dump_block)
 
 static void __dump_run(void *unused)
 {
-	struct ccw_dev_id devid;
-	static char buf[100];
-
-	switch (dump_method) {
-	case DUMP_METHOD_CCW_CIO:
-		devid.ssid  = dump_block_ccw->ipl_info.ccw.ssid;
-		devid.devno = dump_block_ccw->ipl_info.ccw.devno;
-		reipl_ccw_dev(&devid);
-		break;
-	case DUMP_METHOD_CCW_VM:
-		sprintf(buf, "STORE STATUS");
-		__cpcmd(buf, NULL, 0, NULL);
-		sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
-		__cpcmd(buf, NULL, 0, NULL);
-		break;
-	case DUMP_METHOD_CCW_DIAG:
+	switch (dump_type) {
+	case DUMP_TYPE_CCW:
 		diag308_dump(dump_block_ccw);
 		break;
-	case DUMP_METHOD_FCP_DIAG:
+	case DUMP_TYPE_FCP:
 		diag308_dump(dump_block_fcp);
 		break;
 	default:
@@ -1425,7 +1261,7 @@ static void __dump_run(void *unused)
 
 static void dump_run(struct shutdown_trigger *trigger)
 {
-	if (dump_method == DUMP_METHOD_NONE)
+	if (dump_type == DUMP_TYPE_NONE)
 		return;
 	smp_send_stop();
 	smp_call_ipl_cpu(__dump_run, NULL);
@@ -1457,8 +1293,6 @@ static int __init dump_fcp_init(void)
 
 	if (!sclp_ipl_info.has_dump)
 		return 0; /* LDIPL DUMP is not installed */
-	if (!diag308_set_works)
-		return 0;
 	dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!dump_block_fcp)
 		return -ENOMEM;
@@ -1516,18 +1350,9 @@ static void dump_reipl_run(struct shutdown_trigger *trigger)
 	dump_run(trigger);
 }
 
-static int __init dump_reipl_init(void)
-{
-	if (!diag308_set_works)
-		return -EOPNOTSUPP;
-	else
-		return 0;
-}
-
 static struct shutdown_action __refdata dump_reipl_action = {
 	.name	= SHUTDOWN_ACTION_DUMP_REIPL_STR,
 	.fn	= dump_reipl_run,
-	.init	= dump_reipl_init,
 };
 
 /*
@@ -1838,10 +1663,8 @@ static int __init s390_ipl_init(void)
 	 * case the system is booted from HMC. Fortunately in this case
 	 * READ SCP info provides the correct value.
 	 */
-	if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 &&
-	    diag308_set_works)
-		memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm,
-		       LOADPARM_LEN);
+	if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
+		memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm, LOADPARM_LEN);
 	shutdown_actions_init();
 	shutdown_triggers_init();
 	return 0;
@@ -1921,19 +1744,20 @@ static struct notifier_block on_panic_nb = {
 
 void __init setup_ipl(void)
 {
+	BUILD_BUG_ON(sizeof(struct ipl_parameter_block) != PAGE_SIZE);
+
 	ipl_info.type = get_ipl_type();
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		ipl_info.data.ccw.dev_id.ssid = ipl_ssid;
-		ipl_info.data.ccw.dev_id.devno = ipl_devno;
+		ipl_info.data.ccw.dev_id.ssid = ipl_block.ipl_info.ccw.ssid;
+		ipl_info.data.ccw.dev_id.devno = ipl_block.ipl_info.ccw.devno;
 		break;
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
 		ipl_info.data.fcp.dev_id.ssid = 0;
-		ipl_info.data.fcp.dev_id.devno =
-			IPL_PARMBLOCK_START->ipl_info.fcp.devno;
-		ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
-		ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
+		ipl_info.data.fcp.dev_id.devno = ipl_block.ipl_info.fcp.devno;
+		ipl_info.data.fcp.wwpn = ipl_block.ipl_info.fcp.wwpn;
+		ipl_info.data.fcp.lun = ipl_block.ipl_info.fcp.lun;
 		break;
 	case IPL_TYPE_NSS:
 	case IPL_TYPE_UNKNOWN:
@@ -1943,85 +1767,21 @@ void __init setup_ipl(void)
 	atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
 }
 
-void __init ipl_update_parameters(void)
+void __init ipl_store_parameters(void)
 {
 	int rc;
 
 	rc = diag308(DIAG308_STORE, &ipl_block);
-	if ((rc == DIAG308_RC_OK) || (rc == DIAG308_RC_NOCONFIG))
-		diag308_set_works = 1;
-}
-
-void __init ipl_verify_parameters(void)
-{
-	struct cio_iplinfo iplinfo;
-
-	if (cio_get_iplinfo(&iplinfo))
-		return;
-
-	ipl_ssid = iplinfo.ssid;
-	ipl_devno = iplinfo.devno;
-	ipl_flags |= IPL_DEVNO_VALID;
-	if (!iplinfo.is_qdio)
-		return;
-	ipl_flags |= IPL_PARMBLOCK_VALID;
-}
-
-static LIST_HEAD(rcall);
-static DEFINE_MUTEX(rcall_mutex);
-
-void register_reset_call(struct reset_call *reset)
-{
-	mutex_lock(&rcall_mutex);
-	list_add(&reset->list, &rcall);
-	mutex_unlock(&rcall_mutex);
-}
-EXPORT_SYMBOL_GPL(register_reset_call);
-
-void unregister_reset_call(struct reset_call *reset)
-{
-	mutex_lock(&rcall_mutex);
-	list_del(&reset->list);
-	mutex_unlock(&rcall_mutex);
-}
-EXPORT_SYMBOL_GPL(unregister_reset_call);
-
-static void do_reset_calls(void)
-{
-	struct reset_call *reset;
-
-	if (diag308_set_works) {
-		diag308_reset();
-		return;
-	}
-	list_for_each_entry(reset, &rcall, list)
-		reset->fn();
+	if (rc == DIAG308_RC_OK && ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+		ipl_block_valid = 1;
 }
 
 void s390_reset_system(void)
 {
-	struct lowcore *lc;
-
-	lc = (struct lowcore *)(unsigned long) store_prefix();
-
-	/* Stack for interrupt/machine check handler */
-	lc->panic_stack = S390_lowcore.panic_stack;
-
 	/* Disable prefixing */
 	set_prefix(0);
 
 	/* Disable lowcore protection */
-	__ctl_clear_bit(0,28);
-
-	/* Set new machine check handler */
-	S390_lowcore.mcck_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
-	S390_lowcore.mcck_new_psw.addr =
-		(unsigned long) s390_base_mcck_handler;
-
-	/* Set new program check handler */
-	S390_lowcore.program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT;
-	S390_lowcore.program_new_psw.addr =
-		(unsigned long) s390_base_pgm_handler;
-
-	do_reset_calls();
+	__ctl_clear_bit(0, 28);
+	diag308_reset();
 }
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index a80050bbe2e4..b7020e721ae3 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -20,7 +20,6 @@
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/smp.h>
-#include <asm/reset.h>
 #include <asm/ipl.h>
 #include <asm/diag.h>
 #include <asm/elf.h>
@@ -253,6 +252,7 @@ void machine_shutdown(void)
 
 void machine_crash_shutdown(struct pt_regs *regs)
 {
+	set_os_info_reipl_block();
 }
 
 /*
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 1fc6d1ff92d3..5a83be955c70 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -159,7 +159,7 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 	me->core_layout.size += me->arch.got_size;
 	me->arch.plt_offset = me->core_layout.size;
 	if (me->arch.plt_size) {
-		if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_call_disable)
+		if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
 			me->arch.plt_size += PLT_ENTRY_SIZE;
 		me->core_layout.size += me->arch.plt_size;
 	}
@@ -318,8 +318,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 				info->plt_offset;
 			ip[0] = 0x0d10e310;	/* basr 1,0  */
 			ip[1] = 0x100a0004;	/* lg	1,10(1) */
-			if (IS_ENABLED(CONFIG_EXPOLINE) &&
-			    !nospec_call_disable) {
+			if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
 				unsigned int *ij;
 				ij = me->core_layout.base +
 					me->arch.plt_offset +
@@ -440,7 +439,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 	void *aseg;
 
 	if (IS_ENABLED(CONFIG_EXPOLINE) &&
-	    !nospec_call_disable && me->arch.plt_size) {
+	    !nospec_disable && me->arch.plt_size) {
 		unsigned int *ij;
 
 		ij = me->core_layout.base + me->arch.plt_offset +
@@ -467,11 +466,11 @@ int module_finalize(const Elf_Ehdr *hdr,
 
 		if (IS_ENABLED(CONFIG_EXPOLINE) &&
 		    (!strcmp(".nospec_call_table", secname)))
-			nospec_call_revert(aseg, aseg + s->sh_size);
+			nospec_revert(aseg, aseg + s->sh_size);
 
 		if (IS_ENABLED(CONFIG_EXPOLINE) &&
 		    (!strcmp(".nospec_return_table", secname)))
-			nospec_return_revert(aseg, aseg + s->sh_size);
+			nospec_revert(aseg, aseg + s->sh_size);
 	}
 
 	jump_label_apply_nops(me);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 9aff72d3abda..f236ce8757e8 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,32 +1,104 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
+#include <linux/device.h>
 #include <asm/nospec-branch.h>
 
-int nospec_call_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
-int nospec_return_disable = !IS_ENABLED(CONFIG_EXPOLINE_FULL);
+static int __init nobp_setup_early(char *str)
+{
+	bool enabled;
+	int rc;
+
+	rc = kstrtobool(str, &enabled);
+	if (rc)
+		return rc;
+	if (enabled && test_facility(82)) {
+		/*
+		 * The user explicitely requested nobp=1, enable it and
+		 * disable the expoline support.
+		 */
+		__set_facility(82, S390_lowcore.alt_stfle_fac_list);
+		if (IS_ENABLED(CONFIG_EXPOLINE))
+			nospec_disable = 1;
+	} else {
+		__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+	}
+	return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+	__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+	return 0;
+}
+early_param("nospec", nospec_setup_early);
+
+static int __init nospec_report(void)
+{
+	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+		pr_info("Spectre V2 mitigation: execute trampolines.\n");
+	if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+		pr_info("Spectre V2 mitigation: limited branch prediction.\n");
+	return 0;
+}
+arch_initcall(nospec_report);
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_spectre_v1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+		return sprintf(buf, "Mitigation: execute trampolines\n");
+	if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+		return sprintf(buf, "Mitigation: limited branch prediction.\n");
+	return sprintf(buf, "Vulnerable\n");
+}
+#endif
+
+#ifdef CONFIG_EXPOLINE
+
+int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
 
 static int __init nospectre_v2_setup_early(char *str)
 {
-	nospec_call_disable = 1;
-	nospec_return_disable = 1;
+	nospec_disable = 1;
 	return 0;
 }
 early_param("nospectre_v2", nospectre_v2_setup_early);
 
+void __init nospec_auto_detect(void)
+{
+	if (IS_ENABLED(CC_USING_EXPOLINE)) {
+		/*
+		 * The kernel has been compiled with expolines.
+		 * Keep expolines enabled and disable nobp.
+		 */
+		nospec_disable = 0;
+		__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
+	}
+	/*
+	 * If the kernel has not been compiled with expolines the
+	 * nobp setting decides what is done, this depends on the
+	 * CONFIG_KERNEL_NP option and the nobp/nospec parameters.
+	 */
+}
+
 static int __init spectre_v2_setup_early(char *str)
 {
 	if (str && !strncmp(str, "on", 2)) {
-		nospec_call_disable = 0;
-		nospec_return_disable = 0;
-	}
-	if (str && !strncmp(str, "off", 3)) {
-		nospec_call_disable = 1;
-		nospec_return_disable = 1;
-	}
-	if (str && !strncmp(str, "auto", 4)) {
-		nospec_call_disable = 0;
-		nospec_return_disable = 1;
+		nospec_disable = 0;
+		__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
 	}
+	if (str && !strncmp(str, "off", 3))
+		nospec_disable = 1;
+	if (str && !strncmp(str, "auto", 4))
+		nospec_auto_detect();
 	return 0;
 }
 early_param("spectre_v2", spectre_v2_setup_early);
@@ -79,15 +151,9 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
 	}
 }
 
-void __init_or_module nospec_call_revert(s32 *start, s32 *end)
-{
-	if (nospec_call_disable)
-		__nospec_revert(start, end);
-}
-
-void __init_or_module nospec_return_revert(s32 *start, s32 *end)
+void __init_or_module nospec_revert(s32 *start, s32 *end)
 {
-	if (nospec_return_disable)
+	if (nospec_disable)
 		__nospec_revert(start, end);
 }
 
@@ -95,6 +161,8 @@ extern s32 __nospec_call_start[], __nospec_call_end[];
 extern s32 __nospec_return_start[], __nospec_return_end[];
 void __init nospec_init_branches(void)
 {
-	nospec_call_revert(__nospec_call_start, __nospec_call_end);
-	nospec_return_revert(__nospec_return_start, __nospec_return_end);
+	nospec_revert(__nospec_call_start, __nospec_call_end);
+	nospec_revert(__nospec_return_start, __nospec_return_end);
 }
+
+#endif /* CONFIG_EXPOLINE */
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index a40ebd1d29d0..73cc3750f0d3 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -75,90 +75,3 @@ ENTRY(store_status)
 	.align	8
 .Lclkcmp:	.quad	0x0000000000000000
 	.previous
-
-#
-# do_reipl_asm
-# Parameter: r2 = schid of reipl device
-#
-
-ENTRY(do_reipl_asm)
-		basr	%r13,0
-.Lpg0:		lpswe	.Lnewpsw-.Lpg0(%r13)
-.Lpg1:		lgr	%r3,%r2
-		larl	%r2,.Lstatus
-		brasl	%r14,store_status
-
-.Lstatus:	lctlg	%c6,%c6,.Lall-.Lpg0(%r13)
-		lgr	%r1,%r2
-		mvc	__LC_PGM_NEW_PSW(16),.Lpcnew-.Lpg0(%r13)
-		stsch	.Lschib-.Lpg0(%r13)
-		oi	.Lschib+5-.Lpg0(%r13),0x84
-.Lecs:		xi	.Lschib+27-.Lpg0(%r13),0x01
-		msch	.Lschib-.Lpg0(%r13)
-		lghi	%r0,5
-.Lssch:		ssch	.Liplorb-.Lpg0(%r13)
-		jz	.L001
-		brct	%r0,.Lssch
-		bas	%r14,.Ldisab-.Lpg0(%r13)
-.L001:		mvc	__LC_IO_NEW_PSW(16),.Lionew-.Lpg0(%r13)
-.Ltpi:		lpswe	.Lwaitpsw-.Lpg0(%r13)
-.Lcont:		c	%r1,__LC_SUBCHANNEL_ID
-		jnz	.Ltpi
-		clc	__LC_IO_INT_PARM(4),.Liplorb-.Lpg0(%r13)
-		jnz	.Ltpi
-		tsch	.Liplirb-.Lpg0(%r13)
-		tm	.Liplirb+9-.Lpg0(%r13),0xbf
-		jz	.L002
-		bas	%r14,.Ldisab-.Lpg0(%r13)
-.L002:		tm	.Liplirb+8-.Lpg0(%r13),0xf3
-		jz	.L003
-		bas	%r14,.Ldisab-.Lpg0(%r13)
-.L003:		st	%r1,__LC_SUBCHANNEL_ID
-		lhi	%r1,0		 # mode 0 = esa
-		slr	%r0,%r0		 # set cpuid to zero
-		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esa mode
-		lpsw	0
-.Ldisab:	sll	%r14,1
-		srl	%r14,1		 # need to kill hi bit to avoid specification exceptions.
-		st	%r14,.Ldispsw+12-.Lpg0(%r13)
-		lpswe	.Ldispsw-.Lpg0(%r13)
-		.align	8
-.Lall:		.quad	0x00000000ff000000
-		.align	16
-/*
- * These addresses have to be 31 bit otherwise
- * the sigp will throw a specifcation exception
- * when switching to ESA mode as bit 31 be set
- * in the ESA psw.
- * Bit 31 of the addresses has to be 0 for the
- * 31bit lpswe instruction a fact they appear to have
- * omitted from the pop.
- */
-.Lnewpsw:	.quad	0x0000000080000000
-		.quad	.Lpg1
-.Lpcnew:	.quad	0x0000000080000000
-		.quad	.Lecs
-.Lionew:	.quad	0x0000000080000000
-		.quad	.Lcont
-.Lwaitpsw:	.quad	0x0202000080000000
-		.quad	.Ltpi
-.Ldispsw:	.quad	0x0002000080000000
-		.quad	0x0000000000000000
-.Liplccws:	.long	0x02000000,0x60000018
-		.long	0x08000008,0x20000001
-.Liplorb:	.long	0x0049504c,0x0040ff80
-		.long	0x00000000+.Liplccws
-.Lschib:	.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-.Liplirb:	.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
-		.long	0x00000000,0x00000000
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index 9c2c96da23d0..c97c2d40fe15 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -29,33 +29,6 @@
 ENTRY(relocate_kernel)
 		basr	%r13,0		# base address
 	.base:
-		stctg	%c0,%c15,ctlregs-.base(%r13)
-		stmg	%r0,%r15,gprregs-.base(%r13)
-		lghi	%r0,3
-		sllg	%r0,%r0,31
-		stg	%r0,0x1d0(%r0)
-		la	%r0,.back_pgm-.base(%r13)
-		stg	%r0,0x1d8(%r0)
-		la	%r1,load_psw-.base(%r13)
-		mvc	0(8,%r0),0(%r1)
-		la	%r0,.back-.base(%r13)
-		st	%r0,4(%r0)
-		oi	4(%r0),0x80
-		lghi	%r0,0
-		diag	%r0,%r0,0x308
-	.back:
-		lhi	%r1,1		# mode 1 = esame
-		sigp	%r1,%r0,SIGP_SET_ARCHITECTURE # switch to esame mode
-		sam64			# switch to 64 bit addressing mode
-		basr	%r13,0
-	.back_base:
-		oi	have_diag308-.back_base(%r13),0x01
-		lctlg	%c0,%c15,ctlregs-.back_base(%r13)
-		lmg	%r0,%r15,gprregs-.back_base(%r13)
-		j	.top
-	.back_pgm:
-		lmg	%r0,%r15,gprregs-.base(%r13)
-	.top:
 		lghi	%r7,PAGE_SIZE	# load PAGE_SIZE in r7
 		lghi	%r9,PAGE_SIZE	# load PAGE_SIZE in r9
 		lg	%r5,0(%r2)	# read another word for indirection page
@@ -64,55 +37,36 @@ ENTRY(relocate_kernel)
 		je	.indir_check	# NO, goto "indir_check"
 		lgr	%r6,%r5		# r6 = r5
 		nill	%r6,0xf000	# mask it out and...
-		j	.top		# ...next iteration
+		j	.base		# ...next iteration
 	.indir_check:
 		tml	%r5,0x2		# is it a indirection page?
 		je	.done_test	# NO, goto "done_test"
 		nill	%r5,0xf000	# YES, mask out,
 		lgr	%r2,%r5		# move it into the right register,
-		j	.top		# and read next...
+		j	.base		# and read next...
 	.done_test:
 		tml	%r5,0x4		# is it the done indicator?
 		je	.source_test	# NO! Well, then it should be the source indicator...
 		j	.done		# ok, lets finish it here...
 	.source_test:
 		tml	%r5,0x8		# it should be a source indicator...
-		je	.top		# NO, ignore it...
+		je	.base		# NO, ignore it...
 		lgr	%r8,%r5		# r8 = r5
 		nill	%r8,0xf000	# masking
 	0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
 		jo	0b
-		j	.top
+		j	.base
 	.done:
 		sgr	%r0,%r0		# clear register r0
 		la	%r4,load_psw-.base(%r13)	# load psw-address into the register
 		o	%r3,4(%r4)	# or load address into psw
 		st	%r3,4(%r4)
 		mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
-		tm	have_diag308-.base(%r13),0x01
-		jno	.no_diag308
 		diag	%r0,%r0,0x308
-	.no_diag308:
-		sam31			# 31 bit mode
-		sr	%r1,%r1		# erase register r1
-		sr	%r2,%r2		# erase register r2
-		sigp	%r1,%r2,SIGP_SET_ARCHITECTURE # set cpuid to zero
-		lpsw	0		# hopefully start new kernel...
 
 		.align	8
 	load_psw:
 		.long	0x00080000,0x80000000
-	ctlregs:
-		.rept	16
-		.quad	0
-		.endr
-	gprregs:
-		.rept	16
-		.quad	0
-		.endr
-	have_diag308:
-		.byte	0
-		.align	8
 	relocate_kernel_end:
 	.align 8
 	.globl	relocate_kernel_len
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a6a91f01a17a..fc3b4aa185cc 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -221,6 +221,8 @@ static void __init conmode_default(void)
 		SET_CONSOLE_SCLP;
 #endif
 	}
+	if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
+		conswitchp = &dummy_con;
 }
 
 #ifdef CONFIG_CRASH_DUMP
@@ -413,12 +415,12 @@ static void __init setup_resources(void)
 	struct memblock_region *reg;
 	int j;
 
-	code_resource.start = (unsigned long) &_text;
-	code_resource.end = (unsigned long) &_etext - 1;
-	data_resource.start = (unsigned long) &_etext;
-	data_resource.end = (unsigned long) &_edata - 1;
-	bss_resource.start = (unsigned long) &__bss_start;
-	bss_resource.end = (unsigned long) &__bss_stop - 1;
+	code_resource.start = (unsigned long) _text;
+	code_resource.end = (unsigned long) _etext - 1;
+	data_resource.start = (unsigned long) _etext;
+	data_resource.end = (unsigned long) _edata - 1;
+	bss_resource.start = (unsigned long) __bss_start;
+	bss_resource.end = (unsigned long) __bss_stop - 1;
 
 	for_each_memblock(memory, reg) {
 		res = memblock_virt_alloc(sizeof(*res), 8);
@@ -667,7 +669,7 @@ static void __init check_initrd(void)
  */
 static void __init reserve_kernel(void)
 {
-	unsigned long start_pfn = PFN_UP(__pa(&_end));
+	unsigned long start_pfn = PFN_UP(__pa(_end));
 
 #ifdef CONFIG_DMA_API_DEBUG
 	/*
@@ -888,9 +890,12 @@ void __init setup_arch(char **cmdline_p)
 
 	/* Is init_mm really needed? */
 	init_mm.start_code = PAGE_OFFSET;
-	init_mm.end_code = (unsigned long) &_etext;
-	init_mm.end_data = (unsigned long) &_edata;
-	init_mm.brk = (unsigned long) &_end;
+	init_mm.end_code = (unsigned long) _etext;
+	init_mm.end_data = (unsigned long) _edata;
+	init_mm.brk = (unsigned long) _end;
+
+	if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
+		nospec_auto_detect();
 
 	parse_early_param();
 #ifdef CONFIG_CRASH_DUMP
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index ce329c876d8c..75b7b307946e 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -153,8 +153,8 @@ int pfn_is_nosave(unsigned long pfn)
 {
 	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
 	unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
-	unsigned long end_rodata_pfn = PFN_DOWN(__pa(&__end_rodata)) - 1;
-	unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
+	unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1;
+	unsigned long stext_pfn = PFN_DOWN(__pa(_stext));
 
 	/* Always save lowcore pages (LC protection might be enabled). */
 	if (pfn <= LC_PAGES)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index c24bfa72baf7..8e2b8647ee12 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1050,8 +1050,7 @@ shadow_r2t:
 		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
 		if (rc)
 			return rc;
-		/* fallthrough */
-	}
+	} /* fallthrough */
 	case ASCE_TYPE_REGION2: {
 		union region2_table_entry rste;
 
@@ -1077,8 +1076,7 @@ shadow_r3t:
 		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
 		if (rc)
 			return rc;
-		/* fallthrough */
-	}
+	} /* fallthrough */
 	case ASCE_TYPE_REGION3: {
 		union region3_table_entry rtte;
 
@@ -1113,8 +1111,7 @@ shadow_sgt:
 		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
 		if (rc)
 			return rc;
-		/* fallthrough */
-	}
+	} /* fallthrough */
 	case ASCE_TYPE_SEGMENT: {
 		union segment_table_entry ste;
 
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 07c6e81163bf..a389fa85cca2 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -50,18 +50,6 @@ u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
 	return ilen;
 }
 
-static int handle_noop(struct kvm_vcpu *vcpu)
-{
-	switch (vcpu->arch.sie_block->icptcode) {
-	case 0x10:
-		vcpu->stat.exit_external_request++;
-		break;
-	default:
-		break; /* nothing */
-	}
-	return 0;
-}
-
 static int handle_stop(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -465,8 +453,11 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 
 	switch (vcpu->arch.sie_block->icptcode) {
 	case ICPT_EXTREQ:
+		vcpu->stat.exit_external_request++;
+		return 0;
 	case ICPT_IOREQ:
-		return handle_noop(vcpu);
+		vcpu->stat.exit_io_request++;
+		return 0;
 	case ICPT_INST:
 		rc = handle_instruction(vcpu);
 		break;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index b04616b57a94..37d06e022238 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -391,6 +391,7 @@ static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc;
 
+	vcpu->stat.deliver_cputm++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
 					 0, 0);
 
@@ -410,6 +411,7 @@ static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	int rc;
 
+	vcpu->stat.deliver_ckc++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
 					 0, 0);
 
@@ -595,6 +597,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 						 KVM_S390_MCHK,
 						 mchk.cr14, mchk.mcic);
+		vcpu->stat.deliver_machine_check++;
 		rc = __write_machine_check(vcpu, &mchk);
 	}
 	return rc;
@@ -710,7 +713,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 	ilen = pgm_info.flags & KVM_S390_PGM_FLAGS_ILC_MASK;
 	VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilen:%d",
 		   pgm_info.code, ilen);
-	vcpu->stat.deliver_program_int++;
+	vcpu->stat.deliver_program++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 					 pgm_info.code, 0);
 
@@ -899,7 +902,7 @@ static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 		VCPU_EVENT(vcpu, 4,
 			   "deliver: virtio parm: 0x%x,parm64: 0x%llx",
 			   inti->ext.ext_params, inti->ext.ext_params2);
-		vcpu->stat.deliver_virtio_interrupt++;
+		vcpu->stat.deliver_virtio++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 				inti->type,
 				inti->ext.ext_params,
@@ -975,7 +978,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 			inti->io.subchannel_id >> 1 & 0x3,
 			inti->io.subchannel_nr);
 
-		vcpu->stat.deliver_io_int++;
+		vcpu->stat.deliver_io++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 				inti->type,
 				((__u32)inti->io.subchannel_id << 16) |
@@ -1004,7 +1007,7 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
 		VCPU_EVENT(vcpu, 4, "%s isc %u", "deliver: I/O (AI/gisa)", isc);
 		memset(&io, 0, sizeof(io));
 		io.io_int_word = isc_to_int_word(isc);
-		vcpu->stat.deliver_io_int++;
+		vcpu->stat.deliver_io++;
 		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 			KVM_S390_INT_IO(1, 0, 0, 0),
 			((__u32)io.subchannel_id << 16) |
@@ -1268,6 +1271,7 @@ static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	vcpu->stat.inject_program++;
 	VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 				   irq->u.pgm.code, 0);
@@ -1309,6 +1313,7 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	vcpu->stat.inject_pfault_init++;
 	VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
 		   irq->u.ext.ext_params2);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
@@ -1327,6 +1332,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
 	uint16_t src_id = irq->u.extcall.code;
 
+	vcpu->stat.inject_external_call++;
 	VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
 		   src_id);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
@@ -1351,6 +1357,7 @@ static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
 
+	vcpu->stat.inject_set_prefix++;
 	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
 		   irq->u.prefix.address);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
@@ -1371,6 +1378,7 @@ static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_stop_info *stop = &li->irq.stop;
 	int rc = 0;
 
+	vcpu->stat.inject_stop_signal++;
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
 
 	if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
@@ -1395,6 +1403,7 @@ static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	vcpu->stat.inject_restart++;
 	VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
@@ -1407,6 +1416,7 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	vcpu->stat.inject_emergency_signal++;
 	VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
 		   irq->u.emerg.code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
@@ -1427,6 +1437,7 @@ static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
 
+	vcpu->stat.inject_mchk++;
 	VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
 		   irq->u.mchk.mcic);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
@@ -1457,6 +1468,7 @@ static int __inject_ckc(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	vcpu->stat.inject_ckc++;
 	VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
 				   0, 0);
@@ -1470,6 +1482,7 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
+	vcpu->stat.inject_cputm++;
 	VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
 				   0, 0);
@@ -1596,6 +1609,7 @@ static int __inject_service(struct kvm *kvm,
 {
 	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
 
+	kvm->stat.inject_service_signal++;
 	spin_lock(&fi->lock);
 	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
 	/*
@@ -1621,6 +1635,7 @@ static int __inject_virtio(struct kvm *kvm,
 {
 	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
 
+	kvm->stat.inject_virtio++;
 	spin_lock(&fi->lock);
 	if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
 		spin_unlock(&fi->lock);
@@ -1638,6 +1653,7 @@ static int __inject_pfault_done(struct kvm *kvm,
 {
 	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
 
+	kvm->stat.inject_pfault_done++;
 	spin_lock(&fi->lock);
 	if (fi->counters[FIRQ_CNTR_PFAULT] >=
 		(ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
@@ -1657,6 +1673,7 @@ static int __inject_float_mchk(struct kvm *kvm,
 {
 	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
 
+	kvm->stat.inject_float_mchk++;
 	spin_lock(&fi->lock);
 	fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
 	fi->mchk.mcic |= inti->mchk.mcic;
@@ -1672,6 +1689,7 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 	struct list_head *list;
 	int isc;
 
+	kvm->stat.inject_io++;
 	isc = int_word_to_isc(inti->io.io_int_word);
 
 	if (kvm->arch.gisa && inti->type & KVM_S390_INT_IO_AI_MASK) {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 339ac0964590..64c986243018 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -57,6 +57,7 @@
 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
@@ -64,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exit_validity", VCPU_STAT(exit_validity) },
 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
+	{ "exit_io_request", VCPU_STAT(exit_io_request) },
 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
 	{ "exit_pei", VCPU_STAT(exit_pei) },
@@ -78,16 +80,34 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
+	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
+	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
-	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
-	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
-	{ "deliver_io_interrupt", VCPU_STAT(deliver_io_int) },
+	{ "deliver_program", VCPU_STAT(deliver_program) },
+	{ "deliver_io", VCPU_STAT(deliver_io) },
+	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+	{ "inject_ckc", VCPU_STAT(inject_ckc) },
+	{ "inject_cputm", VCPU_STAT(inject_cputm) },
+	{ "inject_external_call", VCPU_STAT(inject_external_call) },
+	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
+	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
+	{ "inject_io", VM_STAT(inject_io) },
+	{ "inject_mchk", VCPU_STAT(inject_mchk) },
+	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
+	{ "inject_program", VCPU_STAT(inject_program) },
+	{ "inject_restart", VCPU_STAT(inject_restart) },
+	{ "inject_service_signal", VM_STAT(inject_service_signal) },
+	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
+	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
+	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
+	{ "inject_virtio", VM_STAT(inject_virtio) },
 	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
 	{ "instruction_gs", VCPU_STAT(instruction_gs) },
 	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
@@ -152,13 +172,33 @@ static int nested;
 module_param(nested, int, S_IRUGO);
 MODULE_PARM_DESC(nested, "Nested virtualization support");
 
-/* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 
-unsigned long kvm_s390_fac_list_mask_size(void)
+/*
+ * For now we handle at most 16 double words as this is what the s390 base
+ * kernel handles and stores in the prefix page. If we ever need to go beyond
+ * this, this requires changes to code, but the external uapi can stay.
+ */
+#define SIZE_INTERNAL 16
+
+/*
+ * Base feature mask that defines default mask for facilities. Consists of the
+ * defines in FACILITIES_KVM and the non-hypervisor managed bits.
+ */
+static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
+/*
+ * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
+ * and defines the facilities that can be enabled via a cpu model.
+ */
+static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
+
+static unsigned long kvm_s390_fac_size(void)
 {
-	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
-	return ARRAY_SIZE(kvm_s390_fac_list_mask);
+	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
+	BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
+	BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
+		sizeof(S390_lowcore.stfle_fac_list));
+
+	return SIZE_INTERNAL;
 }
 
 /* available cpu features supported by kvm */
@@ -679,6 +719,8 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		mutex_lock(&kvm->lock);
 		if (!kvm->created_vcpus) {
 			kvm->arch.use_cmma = 1;
+			/* Not compatible with cmma. */
+			kvm->arch.use_pfmfi = 0;
 			ret = 0;
 		}
 		mutex_unlock(&kvm->lock);
@@ -1583,7 +1625,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
 		return -EINVAL;
 	/* CMMA is disabled or was not used, or the buffer has length zero */
 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
-	if (!bufsize || !kvm->mm->context.use_cmma) {
+	if (!bufsize || !kvm->mm->context.uses_cmm) {
 		memset(args, 0, sizeof(*args));
 		return 0;
 	}
@@ -1660,7 +1702,7 @@ static int kvm_s390_get_cmma_bits(struct kvm *kvm,
 /*
  * This function sets the CMMA attributes for the given pages. If the input
  * buffer has zero length, no action is taken, otherwise the attributes are
- * set and the mm->context.use_cmma flag is set.
+ * set and the mm->context.uses_cmm flag is set.
  */
 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
 				  const struct kvm_s390_cmma_log *args)
@@ -1710,9 +1752,9 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 	up_read(&kvm->mm->mmap_sem);
 
-	if (!kvm->mm->context.use_cmma) {
+	if (!kvm->mm->context.uses_cmm) {
 		down_write(&kvm->mm->mmap_sem);
-		kvm->mm->context.use_cmma = 1;
+		kvm->mm->context.uses_cmm = 1;
 		up_write(&kvm->mm->mmap_sem);
 	}
 out:
@@ -1967,20 +2009,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (!kvm->arch.sie_page2)
 		goto out_err;
 
-	/* Populate the facility mask initially. */
-	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
-	       sizeof(S390_lowcore.stfle_fac_list));
-	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
-		if (i < kvm_s390_fac_list_mask_size())
-			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
-		else
-			kvm->arch.model.fac_mask[i] = 0UL;
-	}
-
-	/* Populate the facility list initially. */
 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
-	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
-	       S390_ARCH_FAC_LIST_SIZE_BYTE);
+
+	for (i = 0; i < kvm_s390_fac_size(); i++) {
+		kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
+					      (kvm_s390_fac_base[i] |
+					       kvm_s390_fac_ext[i]);
+		kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
+					      kvm_s390_fac_base[i];
+	}
 
 	/* we are always in czam mode - even on pre z14 machines */
 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
@@ -2028,6 +2065,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm->arch.css_support = 0;
 	kvm->arch.use_irqchip = 0;
+	kvm->arch.use_pfmfi = sclp.has_pfmfi;
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
@@ -2454,8 +2492,6 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
 	if (!vcpu->arch.sie_block->cbrlo)
 		return -ENOMEM;
-
-	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
 	return 0;
 }
 
@@ -2491,7 +2527,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	if (test_kvm_facility(vcpu->kvm, 73))
 		vcpu->arch.sie_block->ecb |= ECB_TE;
 
-	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
+	if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
 	if (test_kvm_facility(vcpu->kvm, 130))
 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
@@ -3023,7 +3059,7 @@ retry:
 
 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
 		/*
-		 * Disable CMMA virtualization; we will emulate the ESSA
+		 * Disable CMM virtualization; we will emulate the ESSA
 		 * instruction manually, in order to provide additional
 		 * functionalities needed for live migration.
 		 */
@@ -3033,11 +3069,11 @@ retry:
 
 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
 		/*
-		 * Re-enable CMMA virtualization if CMMA is available and
-		 * was used.
+		 * Re-enable CMM virtualization if CMMA is available and
+		 * CMM has been used.
 		 */
 		if ((vcpu->kvm->arch.use_cmma) &&
-		    (vcpu->kvm->mm->context.use_cmma))
+		    (vcpu->kvm->mm->context.uses_cmm))
 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
 		goto retry;
 	}
@@ -4044,7 +4080,7 @@ static int __init kvm_s390_init(void)
 	}
 
 	for (i = 0; i < 16; i++)
-		kvm_s390_fac_list_mask[i] |=
+		kvm_s390_fac_base[i] |=
 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
 
 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f55ac0ef99ea..1b5621f4fe5b 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -294,8 +294,6 @@ void exit_sie(struct kvm_vcpu *vcpu);
 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu);
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
-unsigned long kvm_s390_fac_list_mask_size(void);
-extern unsigned long kvm_s390_fac_list_mask[];
 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm);
 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu);
 
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f0b4185158af..ebfa0442e569 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -1078,9 +1078,9 @@ static int handle_essa(struct kvm_vcpu *vcpu)
 		 * value really needs to be written to; if the value is
 		 * already correct, we do nothing and avoid the lock.
 		 */
-		if (vcpu->kvm->mm->context.use_cmma == 0) {
+		if (vcpu->kvm->mm->context.uses_cmm == 0) {
 			down_write(&vcpu->kvm->mm->mmap_sem);
-			vcpu->kvm->mm->context.use_cmma = 1;
+			vcpu->kvm->mm->context.uses_cmm = 1;
 			up_write(&vcpu->kvm->mm->mmap_sem);
 		}
 		/*
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 507f23ba2034..7cdea2ec51e9 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -24,8 +24,8 @@ enum address_markers_idx {
 
 static struct addr_marker address_markers[] = {
 	[IDENTITY_NR]	  = {0, "Identity Mapping"},
-	[KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
-	[KERNEL_END_NR]	  = {(unsigned long)&_end, "Kernel Image End"},
+	[KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
+	[KERNEL_END_NR]	  = {(unsigned long)_end, "Kernel Image End"},
 	[VMEMMAP_NR]	  = {0, "vmemmap Area"},
 	[VMALLOC_NR]	  = {0, "vmalloc Area"},
 	[MODULES_NR]	  = {0, "Modules Area"},
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 05c8abd864f1..2809d11c7a28 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -220,6 +220,8 @@ static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 /*
  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
  * back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 831bdcf407bb..0a7627cdb34e 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void)
 #define MIN_GAP (32*1024*1024)
 #define MAX_GAP (STACK_TOP/6*5)
 
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 	return sysctl_legacy_va_layout;
 }
@@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
 	return TASK_UNMAPPED_BASE + rnd;
 }
 
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+				      struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -184,7 +185,7 @@ check_asce_limit:
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
@@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = mmap_base_legacy(random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index cb364153c43c..562f72955956 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -6,8 +6,9 @@
  *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
 
-#include <linux/mm.h>
 #include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
 #include <asm/mmu_context.h>
 #include <asm/pgalloc.h>
 #include <asm/gmap.h>
@@ -366,3 +367,293 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 	if ((*batch)->nr == MAX_TABLE_BATCH)
 		tlb_flush_mmu(tlb);
 }
+
+/*
+ * Base infrastructure required to generate basic asces, region, segment,
+ * and page tables that do not make use of enhanced features like EDAT1.
+ */
+
+static struct kmem_cache *base_pgt_cache;
+
+static unsigned long base_pgt_alloc(void)
+{
+	u64 *table;
+
+	table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
+	if (table)
+		memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
+	return (unsigned long) table;
+}
+
+static void base_pgt_free(unsigned long table)
+{
+	kmem_cache_free(base_pgt_cache, (void *) table);
+}
+
+static unsigned long base_crst_alloc(unsigned long val)
+{
+	unsigned long table;
+
+	table =	 __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+	if (table)
+		crst_table_init((unsigned long *)table, val);
+	return table;
+}
+
+static void base_crst_free(unsigned long table)
+{
+	free_pages(table, CRST_ALLOC_ORDER);
+}
+
+#define BASE_ADDR_END_FUNC(NAME, SIZE)					\
+static inline unsigned long base_##NAME##_addr_end(unsigned long addr,	\
+						   unsigned long end)	\
+{									\
+	unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1);		\
+									\
+	return (next - 1) < (end - 1) ? next : end;			\
+}
+
+BASE_ADDR_END_FUNC(page,    _PAGE_SIZE)
+BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
+BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
+BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
+BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
+
+static inline unsigned long base_lra(unsigned long address)
+{
+	unsigned long real;
+
+	asm volatile(
+		"	lra	%0,0(%1)\n"
+		: "=d" (real) : "a" (address) : "cc");
+	return real;
+}
+
+static int base_page_walk(unsigned long origin, unsigned long addr,
+			  unsigned long end, int alloc)
+{
+	unsigned long *pte, next;
+
+	if (!alloc)
+		return 0;
+	pte = (unsigned long *) origin;
+	pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
+	do {
+		next = base_page_addr_end(addr, end);
+		*pte = base_lra(addr);
+	} while (pte++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_segment_walk(unsigned long origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *ste, next, table;
+	int rc;
+
+	ste = (unsigned long *) origin;
+	ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+	do {
+		next = base_segment_addr_end(addr, end);
+		if (*ste & _SEGMENT_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_pgt_alloc();
+			if (!table)
+				return -ENOMEM;
+			*ste = table | _SEGMENT_ENTRY;
+		}
+		table = *ste & _SEGMENT_ENTRY_ORIGIN;
+		rc = base_page_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_pgt_free(table);
+		cond_resched();
+	} while (ste++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_region3_walk(unsigned long origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *rtte, next, table;
+	int rc;
+
+	rtte = (unsigned long *) origin;
+	rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
+	do {
+		next = base_region3_addr_end(addr, end);
+		if (*rtte & _REGION_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+			if (!table)
+				return -ENOMEM;
+			*rtte = table | _REGION3_ENTRY;
+		}
+		table = *rtte & _REGION_ENTRY_ORIGIN;
+		rc = base_segment_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_crst_free(table);
+	} while (rtte++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_region2_walk(unsigned long origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *rste, next, table;
+	int rc;
+
+	rste = (unsigned long *) origin;
+	rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
+	do {
+		next = base_region2_addr_end(addr, end);
+		if (*rste & _REGION_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+			if (!table)
+				return -ENOMEM;
+			*rste = table | _REGION2_ENTRY;
+		}
+		table = *rste & _REGION_ENTRY_ORIGIN;
+		rc = base_region3_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_crst_free(table);
+	} while (rste++, addr = next, addr < end);
+	return 0;
+}
+
+static int base_region1_walk(unsigned long origin, unsigned long addr,
+			     unsigned long end, int alloc)
+{
+	unsigned long *rfte, next, table;
+	int rc;
+
+	rfte = (unsigned long *) origin;
+	rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
+	do {
+		next = base_region1_addr_end(addr, end);
+		if (*rfte & _REGION_ENTRY_INVALID) {
+			if (!alloc)
+				continue;
+			table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+			if (!table)
+				return -ENOMEM;
+			*rfte = table | _REGION1_ENTRY;
+		}
+		table = *rfte & _REGION_ENTRY_ORIGIN;
+		rc = base_region2_walk(table, addr, next, alloc);
+		if (rc)
+			return rc;
+		if (!alloc)
+			base_crst_free(table);
+	} while (rfte++, addr = next, addr < end);
+	return 0;
+}
+
+/**
+ * base_asce_free - free asce and tables returned from base_asce_alloc()
+ * @asce: asce to be freed
+ *
+ * Frees all region, segment, and page tables that were allocated with a
+ * corresponding base_asce_alloc() call.
+ */
+void base_asce_free(unsigned long asce)
+{
+	unsigned long table = asce & _ASCE_ORIGIN;
+
+	if (!asce)
+		return;
+	switch (asce & _ASCE_TYPE_MASK) {
+	case _ASCE_TYPE_SEGMENT:
+		base_segment_walk(table, 0, _REGION3_SIZE, 0);
+		break;
+	case _ASCE_TYPE_REGION3:
+		base_region3_walk(table, 0, _REGION2_SIZE, 0);
+		break;
+	case _ASCE_TYPE_REGION2:
+		base_region2_walk(table, 0, _REGION1_SIZE, 0);
+		break;
+	case _ASCE_TYPE_REGION1:
+		base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+		break;
+	}
+	base_crst_free(table);
+}
+
+static int base_pgt_cache_init(void)
+{
+	static DEFINE_MUTEX(base_pgt_cache_mutex);
+	unsigned long sz = _PAGE_TABLE_SIZE;
+
+	if (base_pgt_cache)
+		return 0;
+	mutex_lock(&base_pgt_cache_mutex);
+	if (!base_pgt_cache)
+		base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
+	mutex_unlock(&base_pgt_cache_mutex);
+	return base_pgt_cache ? 0 : -ENOMEM;
+}
+
+/**
+ * base_asce_alloc - create kernel mapping without enhanced DAT features
+ * @addr: virtual start address of kernel mapping
+ * @num_pages: number of consecutive pages
+ *
+ * Generate an asce, including all required region, segment and page tables,
+ * that can be used to access the virtual kernel mapping. The difference is
+ * that the returned asce does not make use of any enhanced DAT features like
+ * e.g. large pages. This is required for some I/O functions that pass an
+ * asce, like e.g. some service call requests.
+ *
+ * Note: the returned asce may NEVER be attached to any cpu. It may only be
+ *	 used for I/O requests. tlb entries that might result because the
+ *	 asce was attached to a cpu won't be cleared.
+ */
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
+{
+	unsigned long asce, table, end;
+	int rc;
+
+	if (base_pgt_cache_init())
+		return 0;
+	end = addr + num_pages * PAGE_SIZE;
+	if (end <= _REGION3_SIZE) {
+		table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_segment_walk(table, addr, end, 1);
+		asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
+	} else if (end <= _REGION2_SIZE) {
+		table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_region3_walk(table, addr, end, 1);
+		asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+	} else if (end <= _REGION1_SIZE) {
+		table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_region2_walk(table, addr, end, 1);
+		asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+	} else {
+		table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
+		if (!table)
+			return 0;
+		rc = base_region1_walk(table, addr, end, 1);
+		asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
+	}
+	if (rc) {
+		base_asce_free(asce);
+		asce = 0;
+	}
+	return asce;
+}
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 424a1ba4f874..90a8c9e84ca6 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -62,6 +62,13 @@ static struct facility_def facility_defs[] = {
 		}
 	},
 	{
+		/*
+		 * FACILITIES_KVM contains the list of facilities that are part
+		 * of the default facility mask and list that are passed to the
+		 * initial CPU model. If no CPU model is used, this, together
+		 * with the non-hypervisor managed bits, is the maximum list of
+		 * guest facilities supported by KVM.
+		 */
 		.name = "FACILITIES_KVM",
 		.bits = (int[]){
 			0,  /* N3 instructions */
@@ -89,6 +96,19 @@ static struct facility_def facility_defs[] = {
 			-1  /* END */
 		}
 	},
+	{
+		/*
+		 * FACILITIES_KVM_CPUMODEL contains the list of facilities
+		 * that can be enabled by CPU model code if the host supports
+		 * it. These facilities are not passed to the guest without
+		 * CPU model support.
+		 */
+
+		.name = "FACILITIES_KVM_CPUMODEL",
+		.bits = (int[]){
+			-1  /* END */
+		}
+	},
 };
 
 static void print_facility_list(struct facility_def *def)
diff --git a/arch/sh/boards/board-sh7785lcr.c b/arch/sh/boards/board-sh7785lcr.c
index 2c4771ee84cd..d7d232dea33e 100644
--- a/arch/sh/boards/board-sh7785lcr.c
+++ b/arch/sh/boards/board-sh7785lcr.c
@@ -25,6 +25,7 @@
 #include <linux/io.h>
 #include <linux/clk.h>
 #include <linux/errno.h>
+#include <linux/gpio/machine.h>
 #include <mach/sh7785lcr.h>
 #include <cpu/sh7785.h>
 #include <asm/heartbeat.h>
@@ -243,8 +244,15 @@ static struct resource i2c_resources[] = {
 	},
 };
 
+static struct gpiod_lookup_table i2c_gpio_table = {
+	.dev_id = "i2c.0",
+	.table = {
+		GPIO_LOOKUP("pfc-sh7757", 0, "reset-gpios", GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static struct i2c_pca9564_pf_platform_data i2c_platform_data = {
-	.gpio			= 0,
 	.i2c_clock_speed	= I2C_PCA_CON_330kHz,
 	.timeout		= HZ,
 };
@@ -283,6 +291,7 @@ static int __init sh7785lcr_devices_setup(void)
 		i2c_device.num_resources = ARRAY_SIZE(i2c_proto_resources);
 	}
 
+	gpiod_add_lookup_table(&i2c_gpio_table);
 	return platform_add_devices(sh7785lcr_devices,
 				    ARRAY_SIZE(sh7785lcr_devices));
 }
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 4feb7c86f4ac..46b2481eec90 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -126,12 +126,6 @@ static void __init sh_of_setup(char **cmdline_p)
 {
 	struct device_node *root;
 
-#ifdef CONFIG_USE_BUILTIN_DTB
-	unflatten_and_copy_device_tree();
-#else
-	unflatten_device_tree();
-#endif
-
 	board_time_init = sh_of_time_init;
 
 	sh_mv.mv_name = "Unknown SH model";
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 5976a2c8a3e3..e5b7437ab4af 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -49,6 +49,8 @@ static void pcibios_scanbus(struct pci_channel *hose)
 	for (i = 0; i < hose->nr_resources; i++) {
 		res = hose->resources + i;
 		offset = 0;
+		if (res->flags & IORESOURCE_DISABLED)
+			continue;
 		if (res->flags & IORESOURCE_IO)
 			offset = hose->io_offset;
 		else if (res->flags & IORESOURCE_MEM)
@@ -102,6 +104,9 @@ int register_pci_controller(struct pci_channel *hose)
 	for (i = 0; i < hose->nr_resources; i++) {
 		struct resource *res = hose->resources + i;
 
+		if (res->flags & IORESOURCE_DISABLED)
+			continue;
+
 		if (res->flags & IORESOURCE_IO) {
 			if (request_resource(&ioport_resource, res) < 0)
 				goto out;
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index 0167a7352719..382e7ecf4c82 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -19,6 +19,7 @@
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
 #include <linux/sh_intc.h>
+#include <cpu/sh7786.h>
 #include "pcie-sh7786.h"
 #include <asm/sizes.h>
 
@@ -32,6 +33,7 @@ struct sh7786_pcie_port {
 
 static struct sh7786_pcie_port *sh7786_pcie_ports;
 static unsigned int nr_ports;
+static unsigned long dma_pfn_offset;
 
 static struct sh7786_pcie_hwops {
 	int (*core_init)(void);
@@ -40,73 +42,73 @@ static struct sh7786_pcie_hwops {
 
 static struct resource sh7786_pci0_resources[] = {
 	{
-		.name	= "PCIe0 IO",
+		.name	= "PCIe0 MEM 0",
 		.start	= 0xfd000000,
 		.end	= 0xfd000000 + SZ_8M - 1,
-		.flags	= IORESOURCE_IO,
+		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe0 MEM 0",
+		.name	= "PCIe0 MEM 1",
 		.start	= 0xc0000000,
 		.end	= 0xc0000000 + SZ_512M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe0 MEM 1",
+		.name	= "PCIe0 MEM 2",
 		.start	= 0x10000000,
 		.end	= 0x10000000 + SZ_64M - 1,
 		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe0 MEM 2",
+		.name	= "PCIe0 IO",
 		.start	= 0xfe100000,
 		.end	= 0xfe100000 + SZ_1M - 1,
-		.flags	= IORESOURCE_MEM,
+		.flags	= IORESOURCE_IO,
 	},
 };
 
 static struct resource sh7786_pci1_resources[] = {
 	{
-		.name	= "PCIe1 IO",
+		.name	= "PCIe1 MEM 0",
 		.start	= 0xfd800000,
 		.end	= 0xfd800000 + SZ_8M - 1,
-		.flags	= IORESOURCE_IO,
+		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe1 MEM 0",
+		.name	= "PCIe1 MEM 1",
 		.start	= 0xa0000000,
 		.end	= 0xa0000000 + SZ_512M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe1 MEM 1",
+		.name	= "PCIe1 MEM 2",
 		.start	= 0x30000000,
 		.end	= 0x30000000 + SZ_256M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe1 MEM 2",
+		.name	= "PCIe1 IO",
 		.start	= 0xfe300000,
 		.end	= 0xfe300000 + SZ_1M - 1,
-		.flags	= IORESOURCE_MEM,
+		.flags	= IORESOURCE_IO,
 	},
 };
 
 static struct resource sh7786_pci2_resources[] = {
 	{
-		.name	= "PCIe2 IO",
+		.name	= "PCIe2 MEM 0",
 		.start	= 0xfc800000,
 		.end	= 0xfc800000 + SZ_4M - 1,
-		.flags	= IORESOURCE_IO,
+		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe2 MEM 0",
+		.name	= "PCIe2 MEM 1",
 		.start	= 0x80000000,
 		.end	= 0x80000000 + SZ_512M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe2 MEM 1",
+		.name	= "PCIe2 MEM 2",
 		.start	= 0x20000000,
 		.end	= 0x20000000 + SZ_256M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe2 MEM 2",
+		.name	= "PCIe2 IO",
 		.start	= 0xfcd00000,
 		.end	= 0xfcd00000 + SZ_1M - 1,
-		.flags	= IORESOURCE_MEM,
+		.flags	= IORESOURCE_IO,
 	},
 };
 
@@ -301,7 +303,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 {
 	struct pci_channel *chan = port->hose;
 	unsigned int data;
-	phys_addr_t memphys;
+	phys_addr_t memstart, memend;
 	size_t memsize;
 	int ret, i, win;
 
@@ -357,15 +359,26 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 	data |= (0xff << 16);
 	pci_write_reg(chan, data, SH4A_PCIEMACCTLR);
 
-	memphys = __pa(memory_start);
-	memsize = roundup_pow_of_two(memory_end - memory_start);
+	memstart = __pa(memory_start);
+	memend   = __pa(memory_end);
+	memsize = roundup_pow_of_two(memend - memstart);
+
+	/*
+	 * The start address must be aligned on its size. So we round
+	 * it down, and then recalculate the size so that it covers
+	 * the entire memory.
+	 */
+	memstart = ALIGN_DOWN(memstart, memsize);
+	memsize = roundup_pow_of_two(memend - memstart);
+
+	dma_pfn_offset = memstart >> PAGE_SHIFT;
 
 	/*
 	 * If there's more than 512MB of memory, we need to roll over to
 	 * LAR1/LAMR1.
 	 */
 	if (memsize > SZ_512M) {
-		pci_write_reg(chan, memphys + SZ_512M, SH4A_PCIELAR1);
+		pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1);
 		pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1,
 			      SH4A_PCIELAMR1);
 		memsize = SZ_512M;
@@ -381,7 +394,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 	 * LAR0/LAMR0 covers up to the first 512MB, which is enough to
 	 * cover all of lowmem on most platforms.
 	 */
-	pci_write_reg(chan, memphys, SH4A_PCIELAR0);
+	pci_write_reg(chan, memstart, SH4A_PCIELAR0);
 	pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0);
 
 	/* Finish initialization */
@@ -438,6 +451,9 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 		 * mode, so just skip them entirely.
 		 */
 		if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode())
+			res->flags |= IORESOURCE_DISABLED;
+
+		if (res->flags & IORESOURCE_DISABLED)
 			continue;
 
 		pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win));
@@ -472,6 +488,11 @@ int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
         return evt2irq(0xae0);
 }
 
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+	pdev->dev.dma_pfn_offset = dma_pfn_offset;
+}
+
 static int __init sh7786_pcie_core_init(void)
 {
 	/* Return the number of ports */
@@ -527,6 +548,7 @@ static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = {
 static int __init sh7786_pcie_init(void)
 {
 	struct clk *platclk;
+	u32 mm_sel;
 	int i;
 
 	printk(KERN_NOTICE "PCI: Starting initialization.\n");
@@ -560,6 +582,16 @@ static int __init sh7786_pcie_init(void)
 
 	clk_enable(platclk);
 
+	mm_sel = sh7786_mm_sel();
+
+	/*
+	 * Depending on the MMSELR register value, the PCIe0 MEM 1
+	 * area may not be available. See Table 13.11 of the SH7786
+	 * datasheet.
+	 */
+	if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6)
+		sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED;
+
 	printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports);
 
 	for (i = 0; i < nr_ports; i++) {
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 15bf07bfa96b..6d192f4908a7 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -37,10 +37,7 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
 	pagefault_disable();
 
 	do {
-		if (op == FUTEX_OP_SET)
-			ret = oldval = 0;
-		else
-			ret = get_user(oldval, uaddr);
+		ret = get_user(oldval, uaddr);
 
 		if (ret) break;
 
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7786.h b/arch/sh/include/cpu-sh4/cpu/sh7786.h
index 0df09e638f09..96b8cb1f754a 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7786.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7786.h
@@ -14,6 +14,8 @@
 #ifndef __CPU_SH7786_H__
 #define __CPU_SH7786_H__
 
+#include <linux/io.h>
+
 enum {
 	/* PA */
 	GPIO_PA7, GPIO_PA6, GPIO_PA5, GPIO_PA4,
@@ -131,4 +133,9 @@ enum {
 	GPIO_FN_IRL7, GPIO_FN_IRL6, GPIO_FN_IRL5, GPIO_FN_IRL4,
 };
 
+static inline u32 sh7786_mm_sel(void)
+{
+	return __raw_readl(0xFC400020) & 0x7;
+}
+
 #endif /* __CPU_SH7786_H__ */
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 62b485107eae..178457d7620c 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -16,7 +16,8 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 				 enum dma_data_direction dir,
 				 unsigned long attrs)
 {
-	dma_addr_t addr = page_to_phys(page) + offset;
+	dma_addr_t addr = page_to_phys(page) + offset
+		- PFN_PHYS(dev->dma_pfn_offset);
 
 	WARN_ON(size == 0);
 
@@ -36,12 +37,14 @@ static int nommu_map_sg(struct device *dev, struct scatterlist *sg,
 	WARN_ON(nents == 0 || sg[0].length == 0);
 
 	for_each_sg(sg, s, nents, i) {
+		dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
+
 		BUG_ON(!sg_page(s));
 
 		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 			sh_sync_dma_for_device(sg_virt(s), s->length, dir);
 
-		s->dma_address = sg_phys(s);
+		s->dma_address = sg_phys(s) - offset;
 		s->dma_length = s->length;
 	}
 
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index c001f782c5f1..28cc61216b64 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -255,7 +255,7 @@ debug_trap:
 	mov.l	@r8, r8
 	jsr	@r8
 	 nop
-	bra	__restore_all
+	bra	ret_from_exception
 	 nop
 	CFI_ENDPROC
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index b95c411d0333..d34e998b809f 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -330,6 +330,14 @@ void __init setup_arch(char **cmdline_p)
 	/* Let earlyprintk output early console messages */
 	early_platform_driver_probe("earlyprintk", 1, 1);
 
+#ifdef CONFIG_OF_FLATTREE
+#ifdef CONFIG_USE_BUILTIN_DTB
+	unflatten_and_copy_device_tree();
+#else
+	unflatten_device_tree();
+#endif
+#endif
+
 	paging_init();
 
 #ifdef CONFIG_DUMMY_CONSOLE
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 6ea3aab508f2..8ce98691d822 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -59,7 +59,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 
 	split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
 
-	*dma_handle = virt_to_phys(ret);
+	*dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
 
 	return ret_nocache;
 }
@@ -69,7 +69,7 @@ void dma_generic_free_coherent(struct device *dev, size_t size,
 			       unsigned long attrs)
 {
 	int order = get_order(size);
-	unsigned long pfn = dma_handle >> PAGE_SHIFT;
+	unsigned long pfn = (dma_handle >> PAGE_SHIFT) + dev->dma_pfn_offset;
 	int k;
 
 	for (k = 0; k < (1 << order); k++)
diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c
index 8045b5bb7075..56c86ca98ecf 100644
--- a/arch/sh/mm/gup.c
+++ b/arch/sh/mm/gup.c
@@ -160,6 +160,8 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 /*
  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
  * back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 358fe4ef08a2..4d3696973325 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -801,27 +801,12 @@ SUN_PI_(lda	[%l4] ASI_M_MMUREGS, %l5)	! read sfsr last
 	RESTORE_ALL
 
 	.align	4
-	.globl	sys_nis_syscall
-sys_nis_syscall:
-	mov	%o7, %l5
-	add	%sp, STACKFRAME_SZ, %o0		! pt_regs *regs arg
-	call	c_sys_nis_syscall
-	 mov	%l5, %o7
-
 sunos_execv:
 	.globl	sunos_execv
 	b	sys_execve
 	 clr	%i2
 
 	.align	4
-	.globl	sys_sparc_pipe
-sys_sparc_pipe:
-	mov	%o7, %l5
-	add	%sp, STACKFRAME_SZ, %o0		! pt_regs *regs arg
-	call	sparc_pipe
-	 mov	%l5, %o7
-
-	.align	4
 	.globl	sys_sigstack
 sys_sigstack:
 	mov	%o7, %l5
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 7e7011a1e712..489ffab918a8 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -13,44 +13,6 @@
 
 	.text
 
-#define SIGN1(STUB,SYSCALL,REG1) \
-	.align	32; \
-	.globl	STUB; \
-STUB:	sethi	%hi(SYSCALL), %g1; \
-	jmpl	%g1 + %lo(SYSCALL), %g0; \
-	sra	REG1, 0, REG1
-
-#define SIGN2(STUB,SYSCALL,REG1,REG2) \
-	.align	32; \
-	.globl	STUB; \
-STUB:	sethi	%hi(SYSCALL), %g1; \
-	sra	REG1, 0, REG1; \
-	jmpl	%g1 + %lo(SYSCALL), %g0; \
-	sra	REG2, 0, REG2
-
-#define SIGN3(STUB,SYSCALL,REG1,REG2,REG3) \
-	.align	32; \
-	.globl	STUB; \
-STUB:	sra	REG1, 0, REG1; \
-	sethi	%hi(SYSCALL), %g1; \
-	sra	REG2, 0, REG2; \
-	jmpl	%g1 + %lo(SYSCALL), %g0; \
-	sra	REG3, 0, REG3
-
-SIGN1(sys32_readahead, compat_sys_readahead, %o0)
-SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4)
-SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5)
-SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1)
-SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1)
-SIGN1(sys32_io_submit, compat_sys_io_submit, %o1)
-SIGN1(sys32_mq_open, compat_sys_mq_open, %o1)
-SIGN1(sys32_select, compat_sys_select, %o0)
-SIGN1(sys32_futex, compat_sys_futex, %o1)
-SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
-SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0)
-SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0)
-SIGN2(sys32_renameat2, sys_renameat2, %o0, %o2)
-
 	.globl		sys32_mmap2
 sys32_mmap2:
 	sethi		%hi(sys_mmap), %g1
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f166e5bbf506..b5da3bfdc225 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -52,20 +52,14 @@
 
 #include "systbls.h"
 
-asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low)
+COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, path, u32, high, u32, low)
 {
-	if ((int)high < 0)
-		return -EINVAL;
-	else
-		return ksys_truncate(path, (high << 32) | low);
+	return ksys_truncate(path, ((u64)high << 32) | low);
 }
 
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low)
+COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, u32, high, u32, low)
 {
-	if ((int)high < 0)
-		return -EINVAL;
-	else
-		return ksys_ftruncate(fd, (high << 32) | low);
+	return ksys_ftruncate(fd, ((u64)high << 32) | low);
 }
 
 static int cp_compat_stat64(struct kstat *stat,
@@ -98,8 +92,8 @@ static int cp_compat_stat64(struct kstat *stat,
 	return err;
 }
 
-asmlinkage long compat_sys_stat64(const char __user * filename,
-		struct compat_stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(stat64, const char __user *, filename,
+		struct compat_stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int error = vfs_stat(filename, &stat);
@@ -109,8 +103,8 @@ asmlinkage long compat_sys_stat64(const char __user * filename,
 	return error;
 }
 
-asmlinkage long compat_sys_lstat64(const char __user * filename,
-		struct compat_stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(lstat64, const char __user *, filename,
+		struct compat_stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int error = vfs_lstat(filename, &stat);
@@ -120,8 +114,8 @@ asmlinkage long compat_sys_lstat64(const char __user * filename,
 	return error;
 }
 
-asmlinkage long compat_sys_fstat64(unsigned int fd,
-		struct compat_stat64 __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(fstat64, unsigned int, fd,
+		struct compat_stat64 __user *, statbuf)
 {
 	struct kstat stat;
 	int error = vfs_fstat(fd, &stat);
@@ -131,9 +125,9 @@ asmlinkage long compat_sys_fstat64(unsigned int fd,
 	return error;
 }
 
-asmlinkage long compat_sys_fstatat64(unsigned int dfd,
-		const char __user *filename,
-		struct compat_stat64 __user * statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(fstatat64, unsigned int, dfd,
+		const char __user *, filename,
+		struct compat_stat64 __user *, statbuf, int, flag)
 {
 	struct kstat stat;
 	int error;
@@ -194,61 +188,50 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig,
         return ret;
 }
 
-asmlinkage compat_ssize_t sys32_pread64(unsigned int fd,
-					char __user *ubuf,
-					compat_size_t count,
-					unsigned long poshi,
-					unsigned long poslo)
+COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, ubuf,
+			compat_size_t, count, u32, poshi, u32, poslo)
 {
-	return ksys_pread64(fd, ubuf, count, (poshi << 32) | poslo);
+	return ksys_pread64(fd, ubuf, count, ((u64)poshi << 32) | poslo);
 }
 
-asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd,
-					 char __user *ubuf,
-					 compat_size_t count,
-					 unsigned long poshi,
-					 unsigned long poslo)
+COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, char __user *, ubuf,
+			compat_size_t, count, u32, poshi, u32, poslo)
 {
-	return ksys_pwrite64(fd, ubuf, count, (poshi << 32) | poslo);
+	return ksys_pwrite64(fd, ubuf, count, ((u64)poshi << 32) | poslo);
 }
 
-asmlinkage long compat_sys_readahead(int fd,
-				     unsigned long offhi,
-				     unsigned long offlo,
-				     compat_size_t count)
+COMPAT_SYSCALL_DEFINE4(readahead, int, fd, u32, offhi, u32, offlo,
+		     compat_size_t, count)
 {
-	return ksys_readahead(fd, (offhi << 32) | offlo, count);
+	return ksys_readahead(fd, ((u64)offhi << 32) | offlo, count);
 }
 
-long compat_sys_fadvise64(int fd,
-			  unsigned long offhi,
-			  unsigned long offlo,
-			  compat_size_t len, int advice)
+COMPAT_SYSCALL_DEFINE5(fadvise64, int, fd, u32, offhi, u32, offlo,
+			  compat_size_t, len, int, advice)
 {
-	return ksys_fadvise64_64(fd, (offhi << 32) | offlo, len, advice);
+	return ksys_fadvise64_64(fd, ((u64)offhi << 32) | offlo, len, advice);
 }
 
-long compat_sys_fadvise64_64(int fd,
-			     unsigned long offhi, unsigned long offlo,
-			     unsigned long lenhi, unsigned long lenlo,
-			     int advice)
+COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, u32, offhi, u32, offlo,
+			     u32, lenhi, u32, lenlo, int, advice)
 {
 	return ksys_fadvise64_64(fd,
-				 (offhi << 32) | offlo,
-				 (lenhi << 32) | lenlo,
+				 ((u64)offhi << 32) | offlo,
+				 ((u64)lenhi << 32) | lenlo,
 				 advice);
 }
 
-long sys32_sync_file_range(unsigned int fd, unsigned long off_high, unsigned long off_low, unsigned long nb_high, unsigned long nb_low, unsigned int flags)
+COMPAT_SYSCALL_DEFINE6(sync_file_range, unsigned int, fd, u32, off_high, u32, off_low,
+			u32, nb_high, u32, nb_low, unsigned int, flags)
 {
 	return ksys_sync_file_range(fd,
-				   (off_high << 32) | off_low,
-				   (nb_high << 32) | nb_low,
-				   flags);
+				    ((u64)off_high << 32) | off_low,
+				    ((u64)nb_high << 32) | nb_low,
+				    flags);
 }
 
-asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
-				     u32 lenhi, u32 lenlo)
+COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, u32, offhi, u32, offlo,
+				     u32, lenhi, u32, lenlo)
 {
 	return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
 			      ((loff_t)lenhi << 32) | lenlo);
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index d980da4ffd7b..e8c3cb6b6d08 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -34,7 +34,7 @@
 /* XXX Make this per-binary type, this way we can detect the type of
  * XXX a binary.  Every Sparc executable calls this very early on.
  */
-asmlinkage unsigned long sys_getpagesize(void)
+SYSCALL_DEFINE0(getpagesize)
 {
 	return PAGE_SIZE; /* Possibly older binaries want 8192 on sun4's? */
 }
@@ -73,7 +73,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
  * sys_pipe() is the normal C calling standard for creating
  * a pipe. It's not the way unix traditionally does this, though.
  */
-asmlinkage long sparc_pipe(struct pt_regs *regs)
+SYSCALL_DEFINE0(sparc_pipe)
 {
 	int fd[2];
 	int error;
@@ -81,7 +81,7 @@ asmlinkage long sparc_pipe(struct pt_regs *regs)
 	error = do_pipe_flags(fd, 0);
 	if (error)
 		goto out;
-	regs->u_regs[UREG_I1] = fd[1];
+	current_pt_regs()->u_regs[UREG_I1] = fd[1];
 	error = fd[0];
 out:
 	return error;
@@ -98,9 +98,9 @@ int sparc_mmap_check(unsigned long addr, unsigned long len)
 
 /* Linux version of mmap */
 
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags, unsigned long fd,
-	unsigned long pgoff)
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
+	unsigned long, prot, unsigned long, flags, unsigned long, fd,
+	unsigned long, pgoff)
 {
 	/* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE
 	   we have. */
@@ -108,17 +108,17 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			       pgoff >> (PAGE_SHIFT - 12));
 }
 
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
-	unsigned long prot, unsigned long flags, unsigned long fd,
-	unsigned long off)
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+	unsigned long, prot, unsigned long, flags, unsigned long, fd,
+	unsigned long, off)
 {
 	/* no alignment check? */
 	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
 }
 
-long sparc_remap_file_pages(unsigned long start, unsigned long size,
-			   unsigned long prot, unsigned long pgoff,
-			   unsigned long flags)
+SYSCALL_DEFINE5(sparc_remap_file_pages, unsigned long, start, unsigned long, size,
+			   unsigned long, prot, unsigned long, pgoff,
+			   unsigned long, flags)
 {
 	/* This works on an existing mmap so we don't need to validate
 	 * the range as that was done at the original mmap call.
@@ -127,11 +127,10 @@ long sparc_remap_file_pages(unsigned long start, unsigned long size,
 				    (pgoff >> (PAGE_SHIFT - 12)), flags);
 }
 
-/* we come to here via sys_nis_syscall so it can setup the regs argument */
-asmlinkage unsigned long
-c_sys_nis_syscall (struct pt_regs *regs)
+SYSCALL_DEFINE0(nis_syscall)
 {
 	static int count = 0;
+	struct pt_regs *regs = current_pt_regs();
 
 	if (count++ > 5)
 		return -ENOSYS;
@@ -202,7 +201,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig,
 	return ret;
 }
 
-asmlinkage long sys_getdomainname(char __user *name, int len)
+SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
 {
  	int nlen, err;
  	
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index ebb84dc8a5a7..9ef8de63f28b 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -39,7 +39,7 @@
 
 /* #define DEBUG_UNIMP_SYSCALL */
 
-asmlinkage unsigned long sys_getpagesize(void)
+SYSCALL_DEFINE0(getpagesize)
 {
 	return PAGE_SIZE;
 }
@@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = mmap_rnd();
 	unsigned long gap;
@@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
-	gap = rlimit(RLIMIT_STACK);
+	gap = rlim_stack->rlim_cur;
 	if (!test_thread_flag(TIF_32BIT) ||
 	    (current->personality & ADDR_COMPAT_LAYOUT) ||
 	    gap == RLIM_INFINITY ||
@@ -310,7 +310,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
  * sys_pipe() is the normal C calling standard for creating
  * a pipe. It's not the way unix traditionally does this, though.
  */
-SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs)
+SYSCALL_DEFINE0(sparc_pipe)
 {
 	int fd[2];
 	int error;
@@ -318,7 +318,7 @@ SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs)
 	error = do_pipe_flags(fd, 0);
 	if (error)
 		goto out;
-	regs->u_regs[UREG_I1] = fd[1];
+	current_pt_regs()->u_regs[UREG_I1] = fd[1];
 	error = fd[0];
 out:
 	return error;
@@ -480,10 +480,10 @@ SYSCALL_DEFINE5(64_mremap, unsigned long, addr,	unsigned long, old_len,
 	return sys_mremap(addr, old_len, new_len, flags, new_addr);
 }
 
-/* we come to here via sys_nis_syscall so it can setup the regs argument */
-asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs)
+SYSCALL_DEFINE0(nis_syscall)
 {
 	static int count;
+	struct pt_regs *regs = current_pt_regs();
 	
 	/* Don't make the system unusable, if someone goes stuck */
 	if (count++ > 5)
@@ -523,8 +523,6 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs)
 	exception_exit(prev_state);
 }
 
-extern void check_pending(int signum);
-
 SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
 {
         int nlen, err;
@@ -608,9 +606,9 @@ SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
 	return 0;
 }
 
-asmlinkage long sparc_memory_ordering(unsigned long model,
-				      struct pt_regs *regs)
+SYSCALL_DEFINE1(memory_ordering, unsigned long, model)
 {
+	struct pt_regs *regs = current_pt_regs();
 	if (model >= 3)
 		return -EINVAL;
 	regs->tstate = (regs->tstate & ~TSTATE_MM) | (model << 14);
@@ -644,7 +642,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
 	return ret;
 }
 
-asmlinkage long sys_kern_features(void)
+SYSCALL_DEFINE0(kern_features)
 {
 	return KERN_FEATURE_MIXED_MODE_STACK;
 }
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index c5f9ec8c52eb..db42b4fb3708 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -27,15 +27,6 @@ sys32_execveat:
 #endif
 
 	.align	32
-sys_sparc_pipe:
-	ba,pt	%xcc, sys_sparc_pipe_real
-	 add	%sp, PTREGS_OFF, %o0
-sys_nis_syscall:
-	ba,pt	%xcc, c_sys_nis_syscall
-	 add	%sp, PTREGS_OFF, %o0
-sys_memory_ordering:
-	ba,pt	%xcc, sparc_memory_ordering
-	 add	%sp, PTREGS_OFF, %o1
 #ifdef CONFIG_COMPAT
 sys32_sigstack:
 	ba,pt	%xcc, do_sys32_sigstack
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 5a01cfe19a0e..bf014267d619 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -9,9 +9,9 @@
 
 #include <asm/utrap.h>
 
-asmlinkage unsigned long sys_getpagesize(void);
-asmlinkage long sparc_pipe(struct pt_regs *regs);
-asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs);
+asmlinkage long sys_getpagesize(void);
+asmlinkage long sys_sparc_pipe(void);
+asmlinkage long sys_nis_syscall(void);
 asmlinkage long sys_getdomainname(char __user *name, int len);
 void do_rt_sigreturn(struct pt_regs *regs);
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
@@ -23,7 +23,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs);
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff);
-long sparc_remap_file_pages(unsigned long start, unsigned long size,
+long sys_sparc_remap_file_pages(unsigned long start, unsigned long size,
 			    unsigned long prot, unsigned long pgoff,
 			    unsigned long flags);
 
@@ -46,16 +46,15 @@ asmlinkage long sys_utrap_install(utrap_entry_t type,
 				  utrap_handler_t new_d,
 				  utrap_handler_t __user *old_p,
 				  utrap_handler_t __user *old_d);
-asmlinkage long sparc_memory_ordering(unsigned long model,
-				      struct pt_regs *regs);
+asmlinkage long sys_memory_ordering(unsigned long model);
 asmlinkage void sparc64_set_context(struct pt_regs *regs);
 asmlinkage void sparc64_get_context(struct pt_regs *regs);
-asmlinkage long sys32_truncate64(const char __user * path,
-				 unsigned long high,
-				 unsigned long low);
-asmlinkage long sys32_ftruncate64(unsigned int fd,
-				  unsigned long high,
-				  unsigned long low);
+asmlinkage long compat_sys_truncate64(const char __user * path,
+				 u32 high,
+				 u32 low);
+asmlinkage long compat_sys_ftruncate64(unsigned int fd,
+				  u32 high,
+				  u32 low);
 struct compat_stat64;
 asmlinkage long compat_sys_stat64(const char __user * filename,
 				  struct compat_stat64 __user *statbuf);
@@ -66,31 +65,31 @@ asmlinkage long compat_sys_fstat64(unsigned int fd,
 asmlinkage long compat_sys_fstatat64(unsigned int dfd,
 				     const char __user *filename,
 				     struct compat_stat64 __user * statbuf, int flag);
-asmlinkage compat_ssize_t sys32_pread64(unsigned int fd,
+asmlinkage long compat_sys_pread64(unsigned int fd,
 					char __user *ubuf,
 					compat_size_t count,
-					unsigned long poshi,
-					unsigned long poslo);
-asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd,
+					u32 poshi,
+					u32 poslo);
+asmlinkage long compat_sys_pwrite64(unsigned int fd,
 					 char __user *ubuf,
 					 compat_size_t count,
-					 unsigned long poshi,
-					 unsigned long poslo);
+					 u32 poshi,
+					 u32 poslo);
 asmlinkage long compat_sys_readahead(int fd,
-				     unsigned long offhi,
-				     unsigned long offlo,
+				     unsigned offhi,
+				     unsigned offlo,
 				     compat_size_t count);
 long compat_sys_fadvise64(int fd,
-			  unsigned long offhi,
-			  unsigned long offlo,
+			  unsigned offhi,
+			  unsigned offlo,
 			  compat_size_t len, int advice);
 long compat_sys_fadvise64_64(int fd,
-			     unsigned long offhi, unsigned long offlo,
-			     unsigned long lenhi, unsigned long lenlo,
+			     unsigned offhi, unsigned offlo,
+			     unsigned lenhi, unsigned lenlo,
 			     int advice);
-long sys32_sync_file_range(unsigned int fd,
-			   unsigned long off_high, unsigned long off_low,
-			   unsigned long nb_high, unsigned long nb_low,
+long compat_sys_sync_file_range(unsigned int fd,
+			   unsigned off_high, unsigned off_low,
+			   unsigned nb_high, unsigned nb_low,
 			   unsigned int flags);
 asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
 				     u32 lenhi, u32 lenlo);
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 731b25d572a1..12bee14b552c 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -55,7 +55,7 @@ sys_call_table:
 /*175*/	.long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr
 /*180*/	.long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_ni_syscall
 /*185*/	.long sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname
-/*190*/	.long sys_init_module, sys_personality, sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl
+/*190*/	.long sys_init_module, sys_personality, sys_sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl
 /*195*/	.long sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_sparc_sigaction, sys_sgetmask
 /*200*/	.long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_old_readdir
 /*205*/	.long sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 293c1cb31262..387ef993880a 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -32,12 +32,12 @@ sys_call_table32:
 /*50*/	.word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl
 	.word sys_reboot, sys32_mmap2, sys_symlink, sys_readlink, sys32_execve
 /*60*/	.word sys_umask, sys_chroot, compat_sys_newfstat, compat_sys_fstat64, sys_getpagesize
-	.word sys_msync, sys_vfork, sys32_pread64, sys32_pwrite64, sys_geteuid
+	.word sys_msync, sys_vfork, compat_sys_pread64, compat_sys_pwrite64, sys_geteuid
 /*70*/	.word sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect
-	.word sys_madvise, sys_vhangup, sys32_truncate64, sys_mincore, sys_getgroups16
-/*80*/	.word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, sys32_ftruncate64
+	.word sys_madvise, sys_vhangup, compat_sys_truncate64, sys_mincore, sys_getgroups16
+/*80*/	.word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, compat_sys_ftruncate64
 	.word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
-/*90*/	.word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid
+/*90*/	.word sys_dup2, sys_setfsuid, compat_sys_fcntl, compat_sys_select, sys_setfsgid
 	.word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
 /*100*/ .word sys_getpriority, sys32_rt_sigreturn, compat_sys_rt_sigaction, compat_sys_rt_sigprocmask, compat_sys_rt_sigpending
 	.word compat_sys_rt_sigtimedwait, compat_sys_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid
@@ -47,7 +47,7 @@ sys_call_table32:
 	.word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
 /*130*/	.word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
 	.word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
-/*140*/	.word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
+/*140*/	.word sys_sendfile64, sys_nis_syscall, compat_sys_futex, sys_gettid, compat_sys_getrlimit
 	.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
 /*150*/	.word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
 	.word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
@@ -60,20 +60,20 @@ sys_call_table32:
 /*190*/	.word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl
 	.word sys_epoll_wait, sys_ioprio_set, sys_getppid, compat_sys_sparc_sigaction, sys_sgetmask
 /*200*/	.word sys_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir
-	.word sys32_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, sys32_fadvise64
-/*210*/	.word sys32_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo
+	.word compat_sys_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, compat_sys_fadvise64
+/*210*/	.word compat_sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo
 	.word compat_sys_ipc, sys32_sigreturn, sys_clone, sys_ioprio_get, compat_sys_adjtimex
 /*220*/	.word compat_sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
 	.word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
-/*230*/	.word sys32_select, compat_sys_time, sys_splice, compat_sys_stime, compat_sys_statfs64
+/*230*/	.word compat_sys_select, compat_sys_time, sys_splice, compat_sys_stime, compat_sys_statfs64
 	.word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
 /*240*/	.word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
 	.word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, compat_sys_sched_rr_get_interval, compat_sys_nanosleep
 /*250*/	.word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
-	.word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep
-/*260*/	.word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
+	.word compat_sys_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, compat_sys_clock_nanosleep
+/*260*/	.word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_sys_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
 	.word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
-/*270*/	.word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink
+/*270*/	.word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, compat_sys_mq_open, sys_mq_unlink
 	.word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
 /*280*/	.word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
 	.word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64
@@ -88,7 +88,7 @@ sys_call_table32:
 /*330*/	.word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
 	.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
 /*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
-	.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
+	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
 /*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
 	.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
 /*360*/	.word sys_statx
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 357b6047653a..aee6dba83d0e 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -193,6 +193,10 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 	return 1;
 }
 
+/*
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
+ */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
 {
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index e871af24d9cd..c390f3deb0dc 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -109,6 +109,17 @@ config UML_NET_DAEMON
         more than one without conflict.  If you don't need UML networking,
         say N.
 
+config UML_NET_VECTOR
+	bool "Vector I/O high performance network devices"
+	depends on UML_NET
+	help
+	This User-Mode Linux network driver uses multi-message send
+	and receive functions. The host running the UML guest must have
+	a linux kernel version above 3.0 and a libc version > 2.13.
+	This driver provides tap, raw, gre and l2tpv3 network transports
+	with up to 4 times higher network throughput than the UML network
+	drivers.
+
 config UML_NET_VDE
 	bool "VDE transport"
 	depends on UML_NET
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index e7582e1d248c..16b3cebddafb 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -9,6 +9,7 @@
 slip-objs := slip_kern.o slip_user.o
 slirp-objs := slirp_kern.o slirp_user.o
 daemon-objs := daemon_kern.o daemon_user.o
+vector-objs := vector_kern.o vector_user.o vector_transports.o
 umcast-objs := umcast_kern.o umcast_user.o
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
 obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
+obj-$(CONFIG_UML_NET_VECTOR) += vector.o
 obj-$(CONFIG_UML_NET_VDE) += vde.o
 obj-$(CONFIG_UML_NET_MCAST) += umcast.o
 obj-$(CONFIG_UML_NET_PCAP) += pcap.o
@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
 
 # pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
 CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
 
 include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index acbe6c67afba..05588f9466c7 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -171,56 +171,19 @@ int enable_chan(struct line *line)
 	return err;
 }
 
-/* Items are added in IRQ context, when free_irq can't be called, and
- * removed in process context, when it can.
- * This handles interrupt sources which disappear, and which need to
- * be permanently disabled.  This is discovered in IRQ context, but
- * the freeing of the IRQ must be done later.
- */
-static DEFINE_SPINLOCK(irqs_to_free_lock);
-static LIST_HEAD(irqs_to_free);
-
-void free_irqs(void)
-{
-	struct chan *chan;
-	LIST_HEAD(list);
-	struct list_head *ele;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irqs_to_free_lock, flags);
-	list_splice_init(&irqs_to_free, &list);
-	spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-
-	list_for_each(ele, &list) {
-		chan = list_entry(ele, struct chan, free_list);
-
-		if (chan->input && chan->enabled)
-			um_free_irq(chan->line->driver->read_irq, chan);
-		if (chan->output && chan->enabled)
-			um_free_irq(chan->line->driver->write_irq, chan);
-		chan->enabled = 0;
-	}
-}
-
 static void close_one_chan(struct chan *chan, int delay_free_irq)
 {
-	unsigned long flags;
-
 	if (!chan->opened)
 		return;
 
-	if (delay_free_irq) {
-		spin_lock_irqsave(&irqs_to_free_lock, flags);
-		list_add(&chan->free_list, &irqs_to_free);
-		spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-	}
-	else {
-		if (chan->input && chan->enabled)
-			um_free_irq(chan->line->driver->read_irq, chan);
-		if (chan->output && chan->enabled)
-			um_free_irq(chan->line->driver->write_irq, chan);
-		chan->enabled = 0;
-	}
+    /* we can safely call free now - it will be marked
+     *  as free and freed once the IRQ stopped processing
+     */
+	if (chan->input && chan->enabled)
+		um_free_irq(chan->line->driver->read_irq, chan);
+	if (chan->output && chan->enabled)
+		um_free_irq(chan->line->driver->write_irq, chan);
+	chan->enabled = 0;
 	if (chan->ops->close != NULL)
 		(*chan->ops->close)(chan->fd, chan->data);
 
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 366e57f5e8d6..8d80b27502e6 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
 	if (err)
 		return err;
 	if (output)
-		err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+		err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
 				     line_write_interrupt, IRQF_SHARED,
 				     driver->write_irq_name, data);
 	return err;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index b305f8247909..3ef1b48e064a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t)
 #endif
 }
 
-static void setup_etheraddr(struct net_device *dev, char *str)
+void uml_net_setup_etheraddr(struct net_device *dev, char *str)
 {
 	unsigned char *addr = dev->dev_addr;
 	char *end;
@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
 	 */
 	snprintf(dev->name, sizeof(dev->name), "eth%d", n);
 
-	setup_etheraddr(dev, mac);
+	uml_net_setup_etheraddr(dev, mac);
 
 	printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
 
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 37c51a6be690..778a0e52d5a5 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -13,6 +13,7 @@
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
 #include <linux/uaccess.h>
+#include <init.h>
 #include <irq_kern.h>
 #include <os.h>
 
@@ -154,7 +155,14 @@ err_out_cleanup_hw:
 /*
  * rng_cleanup - shutdown RNG module
  */
-static void __exit rng_cleanup (void)
+
+static void cleanup(void)
+{
+	free_irq_by_fd(random_fd);
+	os_close_file(random_fd);
+}
+
+static void __exit rng_cleanup(void)
 {
 	os_close_file(random_fd);
 	misc_deregister (&rng_miscdev);
@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void)
 
 module_init (rng_init);
 module_exit (rng_cleanup);
+__uml_exitcall(cleanup);
 
 MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
 MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index b55fe9bf5d3e..d4e8c497ae86 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1587,11 +1587,11 @@ int io_thread(void *arg)
 
 		do {
 			res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
-			if (res > 0) {
+			if (res >= 0) {
 				written += res;
 			} else {
 				if (res != -EAGAIN) {
-					printk("io_thread - read failed, fd = %d, "
+					printk("io_thread - write failed, fd = %d, "
 					       "err = %d\n", kernel_fd, -n);
 				}
 			}
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
new file mode 100644
index 000000000000..02168fe25105
--- /dev/null
+++ b/arch/um/drivers/vector_kern.c
@@ -0,0 +1,1633 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
+ * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 by various other people who didn't put their name here.
+ * Licensed under the GPL.
+ */
+
+#include <linux/version.h>
+#include <linux/bootmem.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <net_kern.h>
+#include <os.h>
+#include "mconsole_kern.h"
+#include "vector_user.h"
+#include "vector_kern.h"
+
+/*
+ * Adapted from network devices with the following major changes:
+ * All transports are static - simplifies the code significantly
+ * Multiple FDs/IRQs per device
+ * Vector IO optionally used for read/write, falling back to legacy
+ * based on configuration and/or availability
+ * Configuration is no longer positional - L2TPv3 and GRE require up to
+ * 10 parameters, passing this as positional is not fit for purpose.
+ * Only socket transports are supported
+ */
+
+
+#define DRIVER_NAME "uml-vector"
+#define DRIVER_VERSION "01"
+struct vector_cmd_line_arg {
+	struct list_head list;
+	int unit;
+	char *arguments;
+};
+
+struct vector_device {
+	struct list_head list;
+	struct net_device *dev;
+	struct platform_device pdev;
+	int unit;
+	int opened;
+};
+
+static LIST_HEAD(vec_cmd_line);
+
+static DEFINE_SPINLOCK(vector_devices_lock);
+static LIST_HEAD(vector_devices);
+
+static int driver_registered;
+
+static void vector_eth_configure(int n, struct arglist *def);
+
+/* Argument accessors to set variables (and/or set default values)
+ * mtu, buffer sizing, default headroom, etc
+ */
+
+#define DEFAULT_HEADROOM 2
+#define SAFETY_MARGIN 32
+#define DEFAULT_VECTOR_SIZE 64
+#define TX_SMALL_PACKET 128
+#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
+
+static const struct {
+	const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+	{ "rx_queue_max" },
+	{ "rx_queue_running_average" },
+	{ "tx_queue_max" },
+	{ "tx_queue_running_average" },
+	{ "rx_encaps_errors" },
+	{ "tx_timeout_count" },
+	{ "tx_restart_queue" },
+	{ "tx_kicks" },
+	{ "tx_flow_control_xon" },
+	{ "tx_flow_control_xoff" },
+	{ "rx_csum_offload_good" },
+	{ "rx_csum_offload_errors"},
+	{ "sg_ok"},
+	{ "sg_linearized"},
+};
+
+#define VECTOR_NUM_STATS	ARRAY_SIZE(ethtool_stats_keys)
+
+static void vector_reset_stats(struct vector_private *vp)
+{
+	vp->estats.rx_queue_max = 0;
+	vp->estats.rx_queue_running_average = 0;
+	vp->estats.tx_queue_max = 0;
+	vp->estats.tx_queue_running_average = 0;
+	vp->estats.rx_encaps_errors = 0;
+	vp->estats.tx_timeout_count = 0;
+	vp->estats.tx_restart_queue = 0;
+	vp->estats.tx_kicks = 0;
+	vp->estats.tx_flow_control_xon = 0;
+	vp->estats.tx_flow_control_xoff = 0;
+	vp->estats.sg_ok = 0;
+	vp->estats.sg_linearized = 0;
+}
+
+static int get_mtu(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "mtu");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return ETH_MAX_PACKET;
+}
+
+static int get_depth(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "depth");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return DEFAULT_VECTOR_SIZE;
+}
+
+static int get_headroom(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "headroom");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return DEFAULT_HEADROOM;
+}
+
+static int get_req_size(struct arglist *def)
+{
+	char *gro = uml_vector_fetch_arg(def, "gro");
+	long result;
+
+	if (gro != NULL) {
+		if (kstrtoul(gro, 10, &result) == 0) {
+			if (result > 0)
+				return 65536;
+		}
+	}
+	return get_mtu(def) + ETH_HEADER_OTHER +
+		get_headroom(def) + SAFETY_MARGIN;
+}
+
+
+static int get_transport_options(struct arglist *def)
+{
+	char *transport = uml_vector_fetch_arg(def, "transport");
+	char *vector = uml_vector_fetch_arg(def, "vec");
+
+	int vec_rx = VECTOR_RX;
+	int vec_tx = VECTOR_TX;
+	long parsed;
+
+	if (vector != NULL) {
+		if (kstrtoul(vector, 10, &parsed) == 0) {
+			if (parsed == 0) {
+				vec_rx = 0;
+				vec_tx = 0;
+			}
+		}
+	}
+
+
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return (vec_rx | VECTOR_BPF);
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return (vec_rx | vec_tx);
+	return (vec_rx | vec_tx);
+}
+
+
+/* A mini-buffer for packet drop read
+ * All of our supported transports are datagram oriented and we always
+ * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
+ * than the packet size it still counts as full packet read and will
+ * clean the incoming stream to keep sigio/epoll happy
+ */
+
+#define DROP_BUFFER_SIZE 32
+
+static char *drop_buffer;
+
+/* Array backed queues optimized for bulk enqueue/dequeue and
+ * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
+ * For more details and full design rationale see
+ * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
+ */
+
+
+/*
+ * Advance the mmsg queue head by n = advance. Resets the queue to
+ * maximum enqueue/dequeue-at-once capacity if possible. Called by
+ * dequeuers. Caller must hold the head_lock!
+ */
+
+static int vector_advancehead(struct vector_queue *qi, int advance)
+{
+	int queue_depth;
+
+	qi->head =
+		(qi->head + advance)
+			% qi->max_depth;
+
+
+	spin_lock(&qi->tail_lock);
+	qi->queue_depth -= advance;
+
+	/* we are at 0, use this to
+	 * reset head and tail so we can use max size vectors
+	 */
+
+	if (qi->queue_depth == 0) {
+		qi->head = 0;
+		qi->tail = 0;
+	}
+	queue_depth = qi->queue_depth;
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+}
+
+/*	Advance the queue tail by n = advance.
+ *	This is called by enqueuers which should hold the
+ *	head lock already
+ */
+
+static int vector_advancetail(struct vector_queue *qi, int advance)
+{
+	int queue_depth;
+
+	qi->tail =
+		(qi->tail + advance)
+			% qi->max_depth;
+	spin_lock(&qi->head_lock);
+	qi->queue_depth += advance;
+	queue_depth = qi->queue_depth;
+	spin_unlock(&qi->head_lock);
+	return queue_depth;
+}
+
+static int prep_msg(struct vector_private *vp,
+	struct sk_buff *skb,
+	struct iovec *iov)
+{
+	int iov_index = 0;
+	int nr_frags, frag;
+	skb_frag_t *skb_frag;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags > MAX_IOV_SIZE) {
+		if (skb_linearize(skb) != 0)
+			goto drop;
+	}
+	if (vp->header_size > 0) {
+		iov[iov_index].iov_len = vp->header_size;
+		vp->form_header(iov[iov_index].iov_base, skb, vp);
+		iov_index++;
+	}
+	iov[iov_index].iov_base = skb->data;
+	if (nr_frags > 0) {
+		iov[iov_index].iov_len = skb->len - skb->data_len;
+		vp->estats.sg_ok++;
+	} else
+		iov[iov_index].iov_len = skb->len;
+	iov_index++;
+	for (frag = 0; frag < nr_frags; frag++) {
+		skb_frag = &skb_shinfo(skb)->frags[frag];
+		iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+		iov[iov_index].iov_len = skb_frag_size(skb_frag);
+		iov_index++;
+	}
+	return iov_index;
+drop:
+	return -1;
+}
+/*
+ * Generic vector enqueue with support for forming headers using transport
+ * specific callback. Allows GRE, L2TPv3, RAW and other transports
+ * to use a common enqueue procedure in vector mode
+ */
+
+static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	int queue_depth;
+	int packet_len;
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	int iov_count;
+
+	spin_lock(&qi->tail_lock);
+	spin_lock(&qi->head_lock);
+	queue_depth = qi->queue_depth;
+	spin_unlock(&qi->head_lock);
+
+	if (skb)
+		packet_len = skb->len;
+
+	if (queue_depth < qi->max_depth) {
+
+		*(qi->skbuff_vector + qi->tail) = skb;
+		mmsg_vector += qi->tail;
+		iov_count = prep_msg(
+			vp,
+			skb,
+			mmsg_vector->msg_hdr.msg_iov
+		);
+		if (iov_count < 1)
+			goto drop;
+		mmsg_vector->msg_hdr.msg_iovlen = iov_count;
+		mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
+		mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
+		queue_depth = vector_advancetail(qi, 1);
+	} else
+		goto drop;
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+drop:
+	qi->dev->stats.tx_dropped++;
+	if (skb != NULL) {
+		packet_len = skb->len;
+		dev_consume_skb_any(skb);
+		netdev_completed_queue(qi->dev, 1, packet_len);
+	}
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+}
+
+static int consume_vector_skbs(struct vector_queue *qi, int count)
+{
+	struct sk_buff *skb;
+	int skb_index;
+	int bytes_compl = 0;
+
+	for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
+		skb = *(qi->skbuff_vector + skb_index);
+		/* mark as empty to ensure correct destruction if
+		 * needed
+		 */
+		bytes_compl += skb->len;
+		*(qi->skbuff_vector + skb_index) = NULL;
+		dev_consume_skb_any(skb);
+	}
+	qi->dev->stats.tx_bytes += bytes_compl;
+	qi->dev->stats.tx_packets += count;
+	netdev_completed_queue(qi->dev, count, bytes_compl);
+	return vector_advancehead(qi, count);
+}
+
+/*
+ * Generic vector deque via sendmmsg with support for forming headers
+ * using transport specific callback. Allows GRE, L2TPv3, RAW and
+ * other transports to use a common dequeue procedure in vector mode
+ */
+
+
+static int vector_send(struct vector_queue *qi)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *send_from;
+	int result = 0, send_len, queue_depth = qi->max_depth;
+
+	if (spin_trylock(&qi->head_lock)) {
+		if (spin_trylock(&qi->tail_lock)) {
+			/* update queue_depth to current value */
+			queue_depth = qi->queue_depth;
+			spin_unlock(&qi->tail_lock);
+			while (queue_depth > 0) {
+				/* Calculate the start of the vector */
+				send_len = queue_depth;
+				send_from = qi->mmsg_vector;
+				send_from += qi->head;
+				/* Adjust vector size if wraparound */
+				if (send_len + qi->head > qi->max_depth)
+					send_len = qi->max_depth - qi->head;
+				/* Try to TX as many packets as possible */
+				if (send_len > 0) {
+					result = uml_vector_sendmmsg(
+						 vp->fds->tx_fd,
+						 send_from,
+						 send_len,
+						 0
+					);
+					vp->in_write_poll =
+						(result != send_len);
+				}
+				/* For some of the sendmmsg error scenarios
+				 * we may end being unsure in the TX success
+				 * for all packets. It is safer to declare
+				 * them all TX-ed and blame the network.
+				 */
+				if (result < 0) {
+					if (net_ratelimit())
+						netdev_err(vp->dev, "sendmmsg err=%i\n",
+							result);
+					result = send_len;
+				}
+				if (result > 0) {
+					queue_depth =
+						consume_vector_skbs(qi, result);
+					/* This is equivalent to an TX IRQ.
+					 * Restart the upper layers to feed us
+					 * more packets.
+					 */
+					if (result > vp->estats.tx_queue_max)
+						vp->estats.tx_queue_max = result;
+					vp->estats.tx_queue_running_average =
+						(vp->estats.tx_queue_running_average + result) >> 1;
+				}
+				netif_trans_update(qi->dev);
+				netif_wake_queue(qi->dev);
+				/* if TX is busy, break out of the send loop,
+				 *  poll write IRQ will reschedule xmit for us
+				 */
+				if (result != send_len) {
+					vp->estats.tx_restart_queue++;
+					break;
+				}
+			}
+		}
+		spin_unlock(&qi->head_lock);
+	} else {
+		tasklet_schedule(&vp->tx_poll);
+	}
+	return queue_depth;
+}
+
+/* Queue destructor. Deliberately stateless so we can use
+ * it in queue cleanup if initialization fails.
+ */
+
+static void destroy_queue(struct vector_queue *qi)
+{
+	int i;
+	struct iovec *iov;
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *mmsg_vector;
+
+	if (qi == NULL)
+		return;
+	/* deallocate any skbuffs - we rely on any unused to be
+	 * set to NULL.
+	 */
+	if (qi->skbuff_vector != NULL) {
+		for (i = 0; i < qi->max_depth; i++) {
+			if (*(qi->skbuff_vector + i) != NULL)
+				dev_kfree_skb_any(*(qi->skbuff_vector + i));
+		}
+		kfree(qi->skbuff_vector);
+	}
+	/* deallocate matching IOV structures including header buffs */
+	if (qi->mmsg_vector != NULL) {
+		mmsg_vector = qi->mmsg_vector;
+		for (i = 0; i < qi->max_depth; i++) {
+			iov = mmsg_vector->msg_hdr.msg_iov;
+			if (iov != NULL) {
+				if ((vp->header_size > 0) &&
+					(iov->iov_base != NULL))
+					kfree(iov->iov_base);
+				kfree(iov);
+			}
+			mmsg_vector++;
+		}
+		kfree(qi->mmsg_vector);
+	}
+	kfree(qi);
+}
+
+/*
+ * Queue constructor. Create a queue with a given side.
+ */
+static struct vector_queue *create_queue(
+	struct vector_private *vp,
+	int max_size,
+	int header_size,
+	int num_extra_frags)
+{
+	struct vector_queue *result;
+	int i;
+	struct iovec *iov;
+	struct mmsghdr *mmsg_vector;
+
+	result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
+	if (result == NULL)
+		goto out_fail;
+	result->max_depth = max_size;
+	result->dev = vp->dev;
+	result->mmsg_vector = kmalloc(
+		(sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
+	result->skbuff_vector = kmalloc(
+		(sizeof(void *) * max_size), GFP_KERNEL);
+	if (result->mmsg_vector == NULL || result->skbuff_vector == NULL)
+		goto out_fail;
+
+	mmsg_vector = result->mmsg_vector;
+	for (i = 0; i < max_size; i++) {
+		/* Clear all pointers - we use non-NULL as marking on
+		 * what to free on destruction
+		 */
+		*(result->skbuff_vector + i) = NULL;
+		mmsg_vector->msg_hdr.msg_iov = NULL;
+		mmsg_vector++;
+	}
+	mmsg_vector = result->mmsg_vector;
+	result->max_iov_frags = num_extra_frags;
+	for (i = 0; i < max_size; i++) {
+		if (vp->header_size > 0)
+			iov = kmalloc(
+				sizeof(struct iovec) * (3 + num_extra_frags),
+				GFP_KERNEL
+			);
+		else
+			iov = kmalloc(
+				sizeof(struct iovec) * (2 + num_extra_frags),
+				GFP_KERNEL
+			);
+		if (iov == NULL)
+			goto out_fail;
+		mmsg_vector->msg_hdr.msg_iov = iov;
+		mmsg_vector->msg_hdr.msg_iovlen = 1;
+		mmsg_vector->msg_hdr.msg_control = NULL;
+		mmsg_vector->msg_hdr.msg_controllen = 0;
+		mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
+		mmsg_vector->msg_hdr.msg_name = NULL;
+		mmsg_vector->msg_hdr.msg_namelen = 0;
+		if (vp->header_size > 0) {
+			iov->iov_base = kmalloc(header_size, GFP_KERNEL);
+			if (iov->iov_base == NULL)
+				goto out_fail;
+			iov->iov_len = header_size;
+			mmsg_vector->msg_hdr.msg_iovlen = 2;
+			iov++;
+		}
+		iov->iov_base = NULL;
+		iov->iov_len = 0;
+		mmsg_vector++;
+	}
+	spin_lock_init(&result->head_lock);
+	spin_lock_init(&result->tail_lock);
+	result->queue_depth = 0;
+	result->head = 0;
+	result->tail = 0;
+	return result;
+out_fail:
+	destroy_queue(result);
+	return NULL;
+}
+
+/*
+ * We do not use the RX queue as a proper wraparound queue for now
+ * This is not necessary because the consumption via netif_rx()
+ * happens in-line. While we can try using the return code of
+ * netif_rx() for flow control there are no drivers doing this today.
+ * For this RX specific use we ignore the tail/head locks and
+ * just read into a prepared queue filled with skbuffs.
+ */
+
+static struct sk_buff *prep_skb(
+	struct vector_private *vp,
+	struct user_msghdr *msg)
+{
+	int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+	struct sk_buff *result;
+	int iov_index = 0, len;
+	struct iovec *iov = msg->msg_iov;
+	int err, nr_frags, frag;
+	skb_frag_t *skb_frag;
+
+	if (vp->req_size <= linear)
+		len = linear;
+	else
+		len = vp->req_size;
+	result = alloc_skb_with_frags(
+		linear,
+		len - vp->max_packet,
+		3,
+		&err,
+		GFP_ATOMIC
+	);
+	if (vp->header_size > 0)
+		iov_index++;
+	if (result == NULL) {
+		iov[iov_index].iov_base = NULL;
+		iov[iov_index].iov_len = 0;
+		goto done;
+	}
+	skb_reserve(result, vp->headroom);
+	result->dev = vp->dev;
+	skb_put(result, vp->max_packet);
+	result->data_len = len - vp->max_packet;
+	result->len += len - vp->max_packet;
+	skb_reset_mac_header(result);
+	result->ip_summed = CHECKSUM_NONE;
+	iov[iov_index].iov_base = result->data;
+	iov[iov_index].iov_len = vp->max_packet;
+	iov_index++;
+
+	nr_frags = skb_shinfo(result)->nr_frags;
+	for (frag = 0; frag < nr_frags; frag++) {
+		skb_frag = &skb_shinfo(result)->frags[frag];
+		iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+		if (iov[iov_index].iov_base != NULL)
+			iov[iov_index].iov_len = skb_frag_size(skb_frag);
+		else
+			iov[iov_index].iov_len = 0;
+		iov_index++;
+	}
+done:
+	msg->msg_iovlen = iov_index;
+	return result;
+}
+
+
+/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
+
+static void prep_queue_for_rx(struct vector_queue *qi)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	void **skbuff_vector = qi->skbuff_vector;
+	int i;
+
+	if (qi->queue_depth == 0)
+		return;
+	for (i = 0; i < qi->queue_depth; i++) {
+		/* it is OK if allocation fails - recvmmsg with NULL data in
+		 * iov argument still performs an RX, just drops the packet
+		 * This allows us stop faffing around with a "drop buffer"
+		 */
+
+		*skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
+		skbuff_vector++;
+		mmsg_vector++;
+	}
+	qi->queue_depth = 0;
+}
+
+static struct vector_device *find_device(int n)
+{
+	struct vector_device *device;
+	struct list_head *ele;
+
+	spin_lock(&vector_devices_lock);
+	list_for_each(ele, &vector_devices) {
+		device = list_entry(ele, struct vector_device, list);
+		if (device->unit == n)
+			goto out;
+	}
+	device = NULL;
+ out:
+	spin_unlock(&vector_devices_lock);
+	return device;
+}
+
+static int vector_parse(char *str, int *index_out, char **str_out,
+			char **error_out)
+{
+	int n, len, err;
+	char *start = str;
+
+	len = strlen(str);
+
+	while ((*str != ':') && (strlen(str) > 1))
+		str++;
+	if (*str != ':') {
+		*error_out = "Expected ':' after device number";
+		return -EINVAL;
+	}
+	*str = '\0';
+
+	err = kstrtouint(start, 0, &n);
+	if (err < 0) {
+		*error_out = "Bad device number";
+		return err;
+	}
+
+	str++;
+	if (find_device(n)) {
+		*error_out = "Device already configured";
+		return -EINVAL;
+	}
+
+	*index_out = n;
+	*str_out = str;
+	return 0;
+}
+
+static int vector_config(char *str, char **error_out)
+{
+	int err, n;
+	char *params;
+	struct arglist *parsed;
+
+	err = vector_parse(str, &n, &params, error_out);
+	if (err != 0)
+		return err;
+
+	/* This string is broken up and the pieces used by the underlying
+	 * driver. We should copy it to make sure things do not go wrong
+	 * later.
+	 */
+
+	params = kstrdup(params, GFP_KERNEL);
+	if (params == NULL) {
+		*error_out = "vector_config failed to strdup string";
+		return -ENOMEM;
+	}
+
+	parsed = uml_parse_vector_ifspec(params);
+
+	if (parsed == NULL) {
+		*error_out = "vector_config failed to parse parameters";
+		return -EINVAL;
+	}
+
+	vector_eth_configure(n, parsed);
+	return 0;
+}
+
+static int vector_id(char **str, int *start_out, int *end_out)
+{
+	char *end;
+	int n;
+
+	n = simple_strtoul(*str, &end, 0);
+	if ((*end != '\0') || (end == *str))
+		return -1;
+
+	*start_out = n;
+	*end_out = n;
+	*str = end;
+	return n;
+}
+
+static int vector_remove(int n, char **error_out)
+{
+	struct vector_device *vec_d;
+	struct net_device *dev;
+	struct vector_private *vp;
+
+	vec_d = find_device(n);
+	if (vec_d == NULL)
+		return -ENODEV;
+	dev = vec_d->dev;
+	vp = netdev_priv(dev);
+	if (vp->fds != NULL)
+		return -EBUSY;
+	unregister_netdev(dev);
+	platform_device_unregister(&vec_d->pdev);
+	return 0;
+}
+
+/*
+ * There is no shared per-transport initialization code, so
+ * we will just initialize each interface one by one and
+ * add them to a list
+ */
+
+static struct platform_driver uml_net_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+	},
+};
+
+
+static void vector_device_release(struct device *dev)
+{
+	struct vector_device *device = dev_get_drvdata(dev);
+	struct net_device *netdev = device->dev;
+
+	list_del(&device->list);
+	kfree(device);
+	free_netdev(netdev);
+}
+
+/* Bog standard recv using recvmsg - not used normally unless the user
+ * explicitly specifies not to use recvmmsg vector RX.
+ */
+
+static int vector_legacy_rx(struct vector_private *vp)
+{
+	int pkt_len;
+	struct user_msghdr hdr;
+	struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
+	int iovpos = 0;
+	struct sk_buff *skb;
+	int header_check;
+
+	hdr.msg_name = NULL;
+	hdr.msg_namelen = 0;
+	hdr.msg_iov = (struct iovec *) &iov;
+	hdr.msg_control = NULL;
+	hdr.msg_controllen = 0;
+	hdr.msg_flags = 0;
+
+	if (vp->header_size > 0) {
+		iov[0].iov_base = vp->header_rxbuffer;
+		iov[0].iov_len = vp->header_size;
+	}
+
+	skb = prep_skb(vp, &hdr);
+
+	if (skb == NULL) {
+		/* Read a packet into drop_buffer and don't do
+		 * anything with it.
+		 */
+		iov[iovpos].iov_base = drop_buffer;
+		iov[iovpos].iov_len = DROP_BUFFER_SIZE;
+		hdr.msg_iovlen = 1;
+		vp->dev->stats.rx_dropped++;
+	}
+
+	pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
+
+	if (skb != NULL) {
+		if (pkt_len > vp->header_size) {
+			if (vp->header_size > 0) {
+				header_check = vp->verify_header(
+					vp->header_rxbuffer, skb, vp);
+				if (header_check < 0) {
+					dev_kfree_skb_irq(skb);
+					vp->dev->stats.rx_dropped++;
+					vp->estats.rx_encaps_errors++;
+					return 0;
+				}
+				if (header_check > 0) {
+					vp->estats.rx_csum_offload_good++;
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+				}
+			}
+			pskb_trim(skb, pkt_len - vp->rx_header_size);
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			vp->dev->stats.rx_bytes += skb->len;
+			vp->dev->stats.rx_packets++;
+			netif_rx(skb);
+		} else {
+			dev_kfree_skb_irq(skb);
+		}
+	}
+	return pkt_len;
+}
+
+/*
+ * Packet at a time TX which falls back to vector TX if the
+ * underlying transport is busy.
+ */
+
+
+
+static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
+{
+	struct iovec iov[3 + MAX_IOV_SIZE];
+	int iov_count, pkt_len = 0;
+
+	iov[0].iov_base = vp->header_txbuffer;
+	iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
+
+	if (iov_count < 1)
+		goto drop;
+	pkt_len = uml_vector_writev(
+		vp->fds->tx_fd,
+		(struct iovec *) &iov,
+		iov_count
+	);
+
+	netif_trans_update(vp->dev);
+	netif_wake_queue(vp->dev);
+
+	if (pkt_len > 0) {
+		vp->dev->stats.tx_bytes += skb->len;
+		vp->dev->stats.tx_packets++;
+	} else {
+		vp->dev->stats.tx_dropped++;
+	}
+	consume_skb(skb);
+	return pkt_len;
+drop:
+	vp->dev->stats.tx_dropped++;
+	consume_skb(skb);
+	return pkt_len;
+}
+
+/*
+ * Receive as many messages as we can in one call using the special
+ * mmsg vector matched to an skb vector which we prepared earlier.
+ */
+
+static int vector_mmsg_rx(struct vector_private *vp)
+{
+	int packet_count, i;
+	struct vector_queue *qi = vp->rx_queue;
+	struct sk_buff *skb;
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	void **skbuff_vector = qi->skbuff_vector;
+	int header_check;
+
+	/* Refresh the vector and make sure it is with new skbs and the
+	 * iovs are updated to point to them.
+	 */
+
+	prep_queue_for_rx(qi);
+
+	/* Fire the Lazy Gun - get as many packets as we can in one go. */
+
+	packet_count = uml_vector_recvmmsg(
+		vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
+
+	if (packet_count <= 0)
+		return packet_count;
+
+	/* We treat packet processing as enqueue, buffer refresh as dequeue
+	 * The queue_depth tells us how many buffers have been used and how
+	 * many do we need to prep the next time prep_queue_for_rx() is called.
+	 */
+
+	qi->queue_depth = packet_count;
+
+	for (i = 0; i < packet_count; i++) {
+		skb = (*skbuff_vector);
+		if (mmsg_vector->msg_len > vp->header_size) {
+			if (vp->header_size > 0) {
+				header_check = vp->verify_header(
+					mmsg_vector->msg_hdr.msg_iov->iov_base,
+					skb,
+					vp
+				);
+				if (header_check < 0) {
+				/* Overlay header failed to verify - discard.
+				 * We can actually keep this skb and reuse it,
+				 * but that will make the prep logic too
+				 * complex.
+				 */
+					dev_kfree_skb_irq(skb);
+					vp->estats.rx_encaps_errors++;
+					continue;
+				}
+				if (header_check > 0) {
+					vp->estats.rx_csum_offload_good++;
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+				}
+			}
+			pskb_trim(skb,
+				mmsg_vector->msg_len - vp->rx_header_size);
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			/*
+			 * We do not need to lock on updating stats here
+			 * The interrupt loop is non-reentrant.
+			 */
+			vp->dev->stats.rx_bytes += skb->len;
+			vp->dev->stats.rx_packets++;
+			netif_rx(skb);
+		} else {
+			/* Overlay header too short to do anything - discard.
+			 * We can actually keep this skb and reuse it,
+			 * but that will make the prep logic too complex.
+			 */
+			if (skb != NULL)
+				dev_kfree_skb_irq(skb);
+		}
+		(*skbuff_vector) = NULL;
+		/* Move to the next buffer element */
+		mmsg_vector++;
+		skbuff_vector++;
+	}
+	if (packet_count > 0) {
+		if (vp->estats.rx_queue_max < packet_count)
+			vp->estats.rx_queue_max = packet_count;
+		vp->estats.rx_queue_running_average =
+			(vp->estats.rx_queue_running_average + packet_count) >> 1;
+	}
+	return packet_count;
+}
+
+static void vector_rx(struct vector_private *vp)
+{
+	int err;
+
+	if ((vp->options & VECTOR_RX) > 0)
+		while ((err = vector_mmsg_rx(vp)) > 0)
+			;
+	else
+		while ((err = vector_legacy_rx(vp)) > 0)
+			;
+	if ((err != 0) && net_ratelimit())
+		netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
+}
+
+static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	int queue_depth = 0;
+
+	if ((vp->options & VECTOR_TX) == 0) {
+		writev_tx(vp, skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* We do BQL only in the vector path, no point doing it in
+	 * packet at a time mode as there is no device queue
+	 */
+
+	netdev_sent_queue(vp->dev, skb->len);
+	queue_depth = vector_enqueue(vp->tx_queue, skb);
+
+	/* if the device queue is full, stop the upper layers and
+	 * flush it.
+	 */
+
+	if (queue_depth >= vp->tx_queue->max_depth - 1) {
+		vp->estats.tx_kicks++;
+		netif_stop_queue(dev);
+		vector_send(vp->tx_queue);
+		return NETDEV_TX_OK;
+	}
+	if (skb->xmit_more) {
+		mod_timer(&vp->tl, vp->coalesce);
+		return NETDEV_TX_OK;
+	}
+	if (skb->len < TX_SMALL_PACKET) {
+		vp->estats.tx_kicks++;
+		vector_send(vp->tx_queue);
+	} else
+		tasklet_schedule(&vp->tx_poll);
+	return NETDEV_TX_OK;
+}
+
+static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct vector_private *vp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return IRQ_NONE;
+	vector_rx(vp);
+	return IRQ_HANDLED;
+
+}
+
+static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct vector_private *vp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return IRQ_NONE;
+	/* We need to pay attention to it only if we got
+	 * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
+	 * we ignore it. In the future, it may be worth
+	 * it to improve the IRQ controller a bit to make
+	 * tweaking the IRQ mask less costly
+	 */
+
+	if (vp->in_write_poll)
+		tasklet_schedule(&vp->tx_poll);
+	return IRQ_HANDLED;
+
+}
+
+static int irq_rr;
+
+static int vector_net_close(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	unsigned long flags;
+
+	netif_stop_queue(dev);
+	del_timer(&vp->tl);
+
+	if (vp->fds == NULL)
+		return 0;
+
+	/* Disable and free all IRQS */
+	if (vp->rx_irq > 0) {
+		um_free_irq(vp->rx_irq, dev);
+		vp->rx_irq = 0;
+	}
+	if (vp->tx_irq > 0) {
+		um_free_irq(vp->tx_irq, dev);
+		vp->tx_irq = 0;
+	}
+	tasklet_kill(&vp->tx_poll);
+	if (vp->fds->rx_fd > 0) {
+		os_close_file(vp->fds->rx_fd);
+		vp->fds->rx_fd = -1;
+	}
+	if (vp->fds->tx_fd > 0) {
+		os_close_file(vp->fds->tx_fd);
+		vp->fds->tx_fd = -1;
+	}
+	if (vp->bpf != NULL)
+		kfree(vp->bpf);
+	if (vp->fds->remote_addr != NULL)
+		kfree(vp->fds->remote_addr);
+	if (vp->transport_data != NULL)
+		kfree(vp->transport_data);
+	if (vp->header_rxbuffer != NULL)
+		kfree(vp->header_rxbuffer);
+	if (vp->header_txbuffer != NULL)
+		kfree(vp->header_txbuffer);
+	if (vp->rx_queue != NULL)
+		destroy_queue(vp->rx_queue);
+	if (vp->tx_queue != NULL)
+		destroy_queue(vp->tx_queue);
+	kfree(vp->fds);
+	vp->fds = NULL;
+	spin_lock_irqsave(&vp->lock, flags);
+	vp->opened = false;
+	spin_unlock_irqrestore(&vp->lock, flags);
+	return 0;
+}
+
+/* TX tasklet */
+
+static void vector_tx_poll(unsigned long data)
+{
+	struct vector_private *vp = (struct vector_private *)data;
+
+	vp->estats.tx_kicks++;
+	vector_send(vp->tx_queue);
+}
+static void vector_reset_tx(struct work_struct *work)
+{
+	struct vector_private *vp =
+		container_of(work, struct vector_private, reset_tx);
+	netdev_reset_queue(vp->dev);
+	netif_start_queue(vp->dev);
+	netif_wake_queue(vp->dev);
+}
+static int vector_net_open(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	unsigned long flags;
+	int err = -EINVAL;
+	struct vector_device *vdevice;
+
+	spin_lock_irqsave(&vp->lock, flags);
+	if (vp->opened) {
+		spin_unlock_irqrestore(&vp->lock, flags);
+		return -ENXIO;
+	}
+	vp->opened = true;
+	spin_unlock_irqrestore(&vp->lock, flags);
+
+	vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
+
+	if (vp->fds == NULL)
+		goto out_close;
+
+	if (build_transport_data(vp) < 0)
+		goto out_close;
+
+	if ((vp->options & VECTOR_RX) > 0) {
+		vp->rx_queue = create_queue(
+			vp,
+			get_depth(vp->parsed),
+			vp->rx_header_size,
+			MAX_IOV_SIZE
+		);
+		vp->rx_queue->queue_depth = get_depth(vp->parsed);
+	} else {
+		vp->header_rxbuffer = kmalloc(
+			vp->rx_header_size,
+			GFP_KERNEL
+		);
+		if (vp->header_rxbuffer == NULL)
+			goto out_close;
+	}
+	if ((vp->options & VECTOR_TX) > 0) {
+		vp->tx_queue = create_queue(
+			vp,
+			get_depth(vp->parsed),
+			vp->header_size,
+			MAX_IOV_SIZE
+		);
+	} else {
+		vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
+		if (vp->header_txbuffer == NULL)
+			goto out_close;
+	}
+
+	/* READ IRQ */
+	err = um_request_irq(
+		irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
+			IRQ_READ, vector_rx_interrupt,
+			IRQF_SHARED, dev->name, dev);
+	if (err != 0) {
+		netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
+		err = -ENETUNREACH;
+		goto out_close;
+	}
+	vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
+	dev->irq = irq_rr + VECTOR_BASE_IRQ;
+	irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+
+	/* WRITE IRQ - we need it only if we have vector TX */
+	if ((vp->options & VECTOR_TX) > 0) {
+		err = um_request_irq(
+			irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
+				IRQ_WRITE, vector_tx_interrupt,
+				IRQF_SHARED, dev->name, dev);
+		if (err != 0) {
+			netdev_err(dev,
+				"vector_open: failed to get tx irq(%d)\n", err);
+			err = -ENETUNREACH;
+			goto out_close;
+		}
+		vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
+		irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+	}
+
+	if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
+		if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
+			vp->options = vp->options | VECTOR_BPF;
+	}
+
+	if ((vp->options & VECTOR_BPF) != 0)
+		vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
+
+	netif_start_queue(dev);
+
+	/* clear buffer - it can happen that the host side of the interface
+	 * is full when we get here. In this case, new data is never queued,
+	 * SIGIOs never arrive, and the net never works.
+	 */
+
+	vector_rx(vp);
+
+	vector_reset_stats(vp);
+	vdevice = find_device(vp->unit);
+	vdevice->opened = 1;
+
+	if ((vp->options & VECTOR_TX) != 0)
+		add_timer(&vp->tl);
+	return 0;
+out_close:
+	vector_net_close(dev);
+	return err;
+}
+
+
+static void vector_net_set_multicast_list(struct net_device *dev)
+{
+	/* TODO: - we can do some BPF games here */
+	return;
+}
+
+static void vector_net_tx_timeout(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+
+	vp->estats.tx_timeout_count++;
+	netif_trans_update(dev);
+	schedule_work(&vp->reset_tx);
+}
+
+static netdev_features_t vector_fix_features(struct net_device *dev,
+	netdev_features_t features)
+{
+	features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+	return features;
+}
+
+static int vector_set_features(struct net_device *dev,
+	netdev_features_t features)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	/* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
+	 * no way to negotiate it on raw sockets, so we can change
+	 * only our side.
+	 */
+	if (features & NETIF_F_GRO)
+		/* All new frame buffers will be GRO-sized */
+		vp->req_size = 65536;
+	else
+		/* All new frame buffers will be normal sized */
+		vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+	return 0;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void vector_net_poll_controller(struct net_device *dev)
+{
+	disable_irq(dev->irq);
+	vector_rx_interrupt(dev->irq, dev);
+	enable_irq(dev->irq);
+}
+#endif
+
+static void vector_net_get_drvinfo(struct net_device *dev,
+				struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
+}
+
+static void vector_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	ring->rx_max_pending = vp->rx_queue->max_depth;
+	ring->tx_max_pending = vp->tx_queue->max_depth;
+	ring->rx_pending = vp->rx_queue->max_depth;
+	ring->tx_pending = vp->tx_queue->max_depth;
+}
+
+static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		*buf = '\0';
+		break;
+	case ETH_SS_STATS:
+		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+static int vector_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return 0;
+	case ETH_SS_STATS:
+		return VECTOR_NUM_STATS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void vector_get_ethtool_stats(struct net_device *dev,
+	struct ethtool_stats *estats,
+	u64 *tmp_stats)
+{
+	struct vector_private *vp = netdev_priv(dev);
+
+	memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
+}
+
+static int vector_get_coalesce(struct net_device *netdev,
+					struct ethtool_coalesce *ec)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
+	return 0;
+}
+
+static int vector_set_coalesce(struct net_device *netdev,
+					struct ethtool_coalesce *ec)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
+	if (vp->coalesce == 0)
+		vp->coalesce = 1;
+	return 0;
+}
+
+static const struct ethtool_ops vector_net_ethtool_ops = {
+	.get_drvinfo	= vector_net_get_drvinfo,
+	.get_link	= ethtool_op_get_link,
+	.get_ts_info	= ethtool_op_get_ts_info,
+	.get_ringparam	= vector_get_ringparam,
+	.get_strings	= vector_get_strings,
+	.get_sset_count	= vector_get_sset_count,
+	.get_ethtool_stats = vector_get_ethtool_stats,
+	.get_coalesce	= vector_get_coalesce,
+	.set_coalesce	= vector_set_coalesce,
+};
+
+
+static const struct net_device_ops vector_netdev_ops = {
+	.ndo_open		= vector_net_open,
+	.ndo_stop		= vector_net_close,
+	.ndo_start_xmit		= vector_net_start_xmit,
+	.ndo_set_rx_mode	= vector_net_set_multicast_list,
+	.ndo_tx_timeout		= vector_net_tx_timeout,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_fix_features	= vector_fix_features,
+	.ndo_set_features	= vector_set_features,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = vector_net_poll_controller,
+#endif
+};
+
+
+static void vector_timer_expire(struct timer_list *t)
+{
+	struct vector_private *vp = from_timer(vp, t, tl);
+
+	vp->estats.tx_kicks++;
+	vector_send(vp->tx_queue);
+}
+
+static void vector_eth_configure(
+		int n,
+		struct arglist *def
+	)
+{
+	struct vector_device *device;
+	struct net_device *dev;
+	struct vector_private *vp;
+	int err;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (device == NULL) {
+		printk(KERN_ERR "eth_configure failed to allocate struct "
+				 "vector_device\n");
+		return;
+	}
+	dev = alloc_etherdev(sizeof(struct vector_private));
+	if (dev == NULL) {
+		printk(KERN_ERR "eth_configure: failed to allocate struct "
+				 "net_device for vec%d\n", n);
+		goto out_free_device;
+	}
+
+	dev->mtu = get_mtu(def);
+
+	INIT_LIST_HEAD(&device->list);
+	device->unit = n;
+
+	/* If this name ends up conflicting with an existing registered
+	 * netdevice, that is OK, register_netdev{,ice}() will notice this
+	 * and fail.
+	 */
+	snprintf(dev->name, sizeof(dev->name), "vec%d", n);
+	uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
+	vp = netdev_priv(dev);
+
+	/* sysfs register */
+	if (!driver_registered) {
+		platform_driver_register(&uml_net_driver);
+		driver_registered = 1;
+	}
+	device->pdev.id = n;
+	device->pdev.name = DRIVER_NAME;
+	device->pdev.dev.release = vector_device_release;
+	dev_set_drvdata(&device->pdev.dev, device);
+	if (platform_device_register(&device->pdev))
+		goto out_free_netdev;
+	SET_NETDEV_DEV(dev, &device->pdev.dev);
+
+	device->dev = dev;
+
+	*vp = ((struct vector_private)
+		{
+		.list			= LIST_HEAD_INIT(vp->list),
+		.dev			= dev,
+		.unit			= n,
+		.options		= get_transport_options(def),
+		.rx_irq			= 0,
+		.tx_irq			= 0,
+		.parsed			= def,
+		.max_packet		= get_mtu(def) + ETH_HEADER_OTHER,
+		/* TODO - we need to calculate headroom so that ip header
+		 * is 16 byte aligned all the time
+		 */
+		.headroom		= get_headroom(def),
+		.form_header		= NULL,
+		.verify_header		= NULL,
+		.header_rxbuffer	= NULL,
+		.header_txbuffer	= NULL,
+		.header_size		= 0,
+		.rx_header_size		= 0,
+		.rexmit_scheduled	= false,
+		.opened			= false,
+		.transport_data		= NULL,
+		.in_write_poll		= false,
+		.coalesce		= 2,
+		.req_size		= get_req_size(def)
+		});
+
+	dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
+	tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
+	INIT_WORK(&vp->reset_tx, vector_reset_tx);
+
+	timer_setup(&vp->tl, vector_timer_expire, 0);
+	spin_lock_init(&vp->lock);
+
+	/* FIXME */
+	dev->netdev_ops = &vector_netdev_ops;
+	dev->ethtool_ops = &vector_net_ethtool_ops;
+	dev->watchdog_timeo = (HZ >> 1);
+	/* primary IRQ - fixme */
+	dev->irq = 0; /* we will adjust this once opened */
+
+	rtnl_lock();
+	err = register_netdevice(dev);
+	rtnl_unlock();
+	if (err)
+		goto out_undo_user_init;
+
+	spin_lock(&vector_devices_lock);
+	list_add(&device->list, &vector_devices);
+	spin_unlock(&vector_devices_lock);
+
+	return;
+
+out_undo_user_init:
+	return;
+out_free_netdev:
+	free_netdev(dev);
+out_free_device:
+	kfree(device);
+}
+
+
+
+
+/*
+ * Invoked late in the init
+ */
+
+static int __init vector_init(void)
+{
+	struct list_head *ele;
+	struct vector_cmd_line_arg *def;
+	struct arglist *parsed;
+
+	list_for_each(ele, &vec_cmd_line) {
+		def = list_entry(ele, struct vector_cmd_line_arg, list);
+		parsed = uml_parse_vector_ifspec(def->arguments);
+		if (parsed != NULL)
+			vector_eth_configure(def->unit, parsed);
+	}
+	return 0;
+}
+
+
+/* Invoked at initial argument parsing, only stores
+ * arguments until a proper vector_init is called
+ * later
+ */
+
+static int __init vector_setup(char *str)
+{
+	char *error;
+	int n, err;
+	struct vector_cmd_line_arg *new;
+
+	err = vector_parse(str, &n, &str, &error);
+	if (err) {
+		printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
+				 str, error);
+		return 1;
+	}
+	new = alloc_bootmem(sizeof(*new));
+	INIT_LIST_HEAD(&new->list);
+	new->unit = n;
+	new->arguments = str;
+	list_add_tail(&new->list, &vec_cmd_line);
+	return 1;
+}
+
+__setup("vec", vector_setup);
+__uml_help(vector_setup,
+"vec[0-9]+:<option>=<value>,<option>=<value>\n"
+"	 Configure a vector io network device.\n\n"
+);
+
+late_initcall(vector_init);
+
+static struct mc_device vector_mc = {
+	.list		= LIST_HEAD_INIT(vector_mc.list),
+	.name		= "vec",
+	.config		= vector_config,
+	.get_config	= NULL,
+	.id		= vector_id,
+	.remove		= vector_remove,
+};
+
+#ifdef CONFIG_INET
+static int vector_inetaddr_event(
+	struct notifier_block *this,
+	unsigned long event,
+	void *ptr)
+{
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vector_inetaddr_notifier = {
+	.notifier_call		= vector_inetaddr_event,
+};
+
+static void inet_register(void)
+{
+	register_inetaddr_notifier(&vector_inetaddr_notifier);
+}
+#else
+static inline void inet_register(void)
+{
+}
+#endif
+
+static int vector_net_init(void)
+{
+	mconsole_register_dev(&vector_mc);
+	inet_register();
+	return 0;
+}
+
+__initcall(vector_net_init);
+
+
+
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
new file mode 100644
index 000000000000..0b0a767b9076
--- /dev/null
+++ b/arch/um/drivers/vector_kern.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_KERN_H
+#define __UM_VECTOR_KERN_H
+
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include "vector_user.h"
+
+/* Queue structure specially adapted for multiple enqueue/dequeue
+ * in a mmsgrecv/mmsgsend context
+ */
+
+/* Dequeue method */
+
+#define QUEUE_SENDMSG 0
+#define QUEUE_SENDMMSG 1
+
+#define VECTOR_RX 1
+#define VECTOR_TX (1 << 1)
+#define VECTOR_BPF (1 << 2)
+#define VECTOR_QDISC_BYPASS (1 << 3)
+
+#define ETH_MAX_PACKET 1500
+#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
+
+struct vector_queue {
+	struct mmsghdr *mmsg_vector;
+	void **skbuff_vector;
+	 /* backlink to device which owns us */
+	struct net_device *dev;
+	spinlock_t head_lock;
+	spinlock_t tail_lock;
+	int queue_depth, head, tail, max_depth, max_iov_frags;
+	short options;
+};
+
+struct vector_estats {
+	uint64_t rx_queue_max;
+	uint64_t rx_queue_running_average;
+	uint64_t tx_queue_max;
+	uint64_t tx_queue_running_average;
+	uint64_t rx_encaps_errors;
+	uint64_t tx_timeout_count;
+	uint64_t tx_restart_queue;
+	uint64_t tx_kicks;
+	uint64_t tx_flow_control_xon;
+	uint64_t tx_flow_control_xoff;
+	uint64_t rx_csum_offload_good;
+	uint64_t rx_csum_offload_errors;
+	uint64_t sg_ok;
+	uint64_t sg_linearized;
+};
+
+#define VERIFY_HEADER_NOK -1
+#define VERIFY_HEADER_OK 0
+#define VERIFY_CSUM_OK 1
+
+struct vector_private {
+	struct list_head list;
+	spinlock_t lock;
+	struct net_device *dev;
+
+	int unit;
+
+	/* Timeout timer in TX */
+
+	struct timer_list tl;
+
+	/* Scheduled "remove device" work */
+	struct work_struct reset_tx;
+	struct vector_fds *fds;
+
+	struct vector_queue *rx_queue;
+	struct vector_queue *tx_queue;
+
+	int rx_irq;
+	int tx_irq;
+
+	struct arglist *parsed;
+
+	void *transport_data; /* transport specific params if needed */
+
+	int max_packet;
+	int req_size; /* different from max packet - used for TSO */
+	int headroom;
+
+	int options;
+
+	/* remote address if any - some transports will leave this as null */
+
+	int header_size;
+	int rx_header_size;
+	int coalesce;
+
+	void *header_rxbuffer;
+	void *header_txbuffer;
+
+	int (*form_header)(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp);
+	int (*verify_header)(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp);
+
+	spinlock_t stats_lock;
+
+	struct tasklet_struct tx_poll;
+	bool rexmit_scheduled;
+	bool opened;
+	bool in_write_poll;
+
+	/* ethtool stats */
+
+	struct vector_estats estats;
+	void *bpf;
+
+	char user[0];
+};
+
+extern int build_transport_data(struct vector_private *vp);
+
+#endif
diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
new file mode 100644
index 000000000000..9065047f844b
--- /dev/null
+++ b/arch/um/drivers/vector_transports.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Licensed under the GPL.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/virtio_net.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_byteorder.h>
+#include <linux/netdev_features.h>
+#include "vector_user.h"
+#include "vector_kern.h"
+
+#define GOOD_LINEAR 512
+#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface"
+
+struct gre_minimal_header {
+	uint16_t header;
+	uint16_t arptype;
+};
+
+
+struct uml_gre_data {
+	uint32_t rx_key;
+	uint32_t tx_key;
+	uint32_t sequence;
+
+	bool ipv6;
+	bool has_sequence;
+	bool pin_sequence;
+	bool checksum;
+	bool key;
+	struct gre_minimal_header expected_header;
+
+	uint32_t checksum_offset;
+	uint32_t key_offset;
+	uint32_t sequence_offset;
+
+};
+
+struct uml_l2tpv3_data {
+	uint64_t rx_cookie;
+	uint64_t tx_cookie;
+	uint64_t rx_session;
+	uint64_t tx_session;
+	uint32_t counter;
+
+	bool udp;
+	bool ipv6;
+	bool has_counter;
+	bool pin_counter;
+	bool cookie;
+	bool cookie_is_64;
+
+	uint32_t cookie_offset;
+	uint32_t session_offset;
+	uint32_t counter_offset;
+};
+
+static int l2tpv3_form_header(uint8_t *header,
+	struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_l2tpv3_data *td = vp->transport_data;
+	uint32_t *counter;
+
+	if (td->udp)
+		*(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET);
+	(*(uint32_t *) (header + td->session_offset)) = td->tx_session;
+
+	if (td->cookie) {
+		if (td->cookie_is_64)
+			(*(uint64_t *)(header + td->cookie_offset)) =
+				td->tx_cookie;
+		else
+			(*(uint32_t *)(header + td->cookie_offset)) =
+				td->tx_cookie;
+	}
+	if (td->has_counter) {
+		counter = (uint32_t *)(header + td->counter_offset);
+		if (td->pin_counter) {
+			*counter = 0;
+		} else {
+			td->counter++;
+			*counter = cpu_to_be32(td->counter);
+		}
+	}
+	return 0;
+}
+
+static int gre_form_header(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_gre_data *td = vp->transport_data;
+	uint32_t *sequence;
+	*((uint32_t *) header) = *((uint32_t *) &td->expected_header);
+	if (td->key)
+		(*(uint32_t *) (header + td->key_offset)) = td->tx_key;
+	if (td->has_sequence) {
+		sequence = (uint32_t *)(header + td->sequence_offset);
+		if (td->pin_sequence)
+			*sequence = 0;
+		else
+			*sequence = cpu_to_be32(++td->sequence);
+	}
+	return 0;
+}
+
+static int raw_form_header(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp)
+{
+	struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+	virtio_net_hdr_from_skb(
+		skb,
+		vheader,
+		virtio_legacy_is_little_endian(),
+		false
+	);
+
+	return 0;
+}
+
+static int l2tpv3_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_l2tpv3_data *td = vp->transport_data;
+	uint32_t *session;
+	uint64_t cookie;
+
+	if ((!td->udp) && (!td->ipv6))
+		header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+	/* we do not do a strict check for "data" packets as per
+	 * the RFC spec because the pure IP spec does not have
+	 * that anyway.
+	 */
+
+	if (td->cookie) {
+		if (td->cookie_is_64)
+			cookie = *(uint64_t *)(header + td->cookie_offset);
+		else
+			cookie = *(uint32_t *)(header + td->cookie_offset);
+		if (cookie != td->rx_cookie) {
+			if (net_ratelimit())
+				netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id");
+			return -1;
+		}
+	}
+	session = (uint32_t *) (header + td->session_offset);
+	if (*session != td->rx_session) {
+		if (net_ratelimit())
+			netdev_err(vp->dev, "uml_l2tpv3: session mismatch");
+		return -1;
+	}
+	return 0;
+}
+
+static int gre_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+
+	uint32_t key;
+	struct uml_gre_data *td = vp->transport_data;
+
+	if (!td->ipv6)
+		header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+	if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) {
+		if (net_ratelimit())
+			netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x",
+				*((uint32_t *) &td->expected_header),
+				*((uint32_t *) header)
+			);
+		return -1;
+	}
+
+	if (td->key) {
+		key = (*(uint32_t *)(header + td->key_offset));
+		if (key != td->rx_key) {
+			if (net_ratelimit())
+				netdev_err(vp->dev, "unknown key id %0x, expecting %0x",
+						key, td->rx_key);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int raw_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+	struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+	if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) &&
+		(vp->req_size != 65536)) {
+		if (net_ratelimit())
+			netdev_err(
+				vp->dev,
+				GSO_ERROR
+		);
+	}
+	if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
+		return 1;
+
+	virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian());
+	return 0;
+}
+
+static bool get_uint_param(
+	struct arglist *def, char *param, unsigned int *result)
+{
+	char *arg = uml_vector_fetch_arg(def, param);
+
+	if (arg != NULL) {
+		if (kstrtoint(arg, 0, result) == 0)
+			return true;
+	}
+	return false;
+}
+
+static bool get_ulong_param(
+	struct arglist *def, char *param, unsigned long *result)
+{
+	char *arg = uml_vector_fetch_arg(def, param);
+
+	if (arg != NULL) {
+		if (kstrtoul(arg, 0, result) == 0)
+			return true;
+		return true;
+	}
+	return false;
+}
+
+static int build_gre_transport_data(struct vector_private *vp)
+{
+	struct uml_gre_data *td;
+	int temp_int;
+	int temp_rx;
+	int temp_tx;
+
+	vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL);
+	if (vp->transport_data == NULL)
+		return -ENOMEM;
+	td = vp->transport_data;
+	td->sequence = 0;
+
+	td->expected_header.arptype = GRE_IRB;
+	td->expected_header.header = 0;
+
+	vp->form_header = &gre_form_header;
+	vp->verify_header = &gre_verify_header;
+	vp->header_size = 4;
+	td->key_offset = 4;
+	td->sequence_offset = 4;
+	td->checksum_offset = 4;
+
+	td->ipv6 = false;
+	if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+		if (temp_int > 0)
+			td->ipv6 = true;
+	}
+	td->key = false;
+	if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) {
+		if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) {
+			td->key = true;
+			td->expected_header.header |= GRE_MODE_KEY;
+			td->rx_key = cpu_to_be32(temp_rx);
+			td->tx_key = cpu_to_be32(temp_tx);
+			vp->header_size += 4;
+			td->sequence_offset += 4;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	td->sequence = false;
+	if (get_uint_param(vp->parsed, "sequence", &temp_int)) {
+		if (temp_int > 0) {
+			vp->header_size += 4;
+			td->has_sequence = true;
+			td->expected_header.header |= GRE_MODE_SEQUENCE;
+			if (get_uint_param(
+				vp->parsed, "pin_sequence", &temp_int)) {
+				if (temp_int > 0)
+					td->pin_sequence = true;
+			}
+		}
+	}
+	vp->rx_header_size = vp->header_size;
+	if (!td->ipv6)
+		vp->rx_header_size += sizeof(struct iphdr);
+	return 0;
+}
+
+static int build_l2tpv3_transport_data(struct vector_private *vp)
+{
+
+	struct uml_l2tpv3_data *td;
+	int temp_int, temp_rxs, temp_txs;
+	unsigned long temp_rx;
+	unsigned long temp_tx;
+
+	vp->transport_data = kmalloc(
+		sizeof(struct uml_l2tpv3_data), GFP_KERNEL);
+
+	if (vp->transport_data == NULL)
+		return -ENOMEM;
+
+	td = vp->transport_data;
+
+	vp->form_header = &l2tpv3_form_header;
+	vp->verify_header = &l2tpv3_verify_header;
+	td->counter = 0;
+
+	vp->header_size = 4;
+	td->session_offset = 0;
+	td->cookie_offset = 4;
+	td->counter_offset = 4;
+
+
+	td->ipv6 = false;
+	if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+		if (temp_int > 0)
+			td->ipv6 = true;
+	}
+
+	if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) {
+		if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) {
+			td->tx_session = cpu_to_be32(temp_txs);
+			td->rx_session = cpu_to_be32(temp_rxs);
+		} else {
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	td->cookie_is_64  = false;
+	if (get_uint_param(vp->parsed, "cookie64", &temp_int)) {
+		if (temp_int > 0)
+			td->cookie_is_64  = true;
+	}
+	td->cookie = false;
+	if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) {
+		if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) {
+			td->cookie = true;
+			if (td->cookie_is_64) {
+				td->rx_cookie = cpu_to_be64(temp_rx);
+				td->tx_cookie = cpu_to_be64(temp_tx);
+				vp->header_size += 8;
+				td->counter_offset += 8;
+			} else {
+				td->rx_cookie = cpu_to_be32(temp_rx);
+				td->tx_cookie = cpu_to_be32(temp_tx);
+				vp->header_size += 4;
+				td->counter_offset += 4;
+			}
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	td->has_counter = false;
+	if (get_uint_param(vp->parsed, "counter", &temp_int)) {
+		if (temp_int > 0) {
+			td->has_counter = true;
+			vp->header_size += 4;
+			if (get_uint_param(
+				vp->parsed, "pin_counter", &temp_int)) {
+				if (temp_int > 0)
+					td->pin_counter = true;
+			}
+		}
+	}
+
+	if (get_uint_param(vp->parsed, "udp", &temp_int)) {
+		if (temp_int > 0) {
+			td->udp = true;
+			vp->header_size += 4;
+			td->counter_offset += 4;
+			td->session_offset += 4;
+			td->cookie_offset += 4;
+		}
+	}
+
+	vp->rx_header_size = vp->header_size;
+	if ((!td->ipv6) && (!td->udp))
+		vp->rx_header_size += sizeof(struct iphdr);
+
+	return 0;
+}
+
+static int build_raw_transport_data(struct vector_private *vp)
+{
+	if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+		if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd))
+			return -1;
+		vp->form_header = &raw_form_header;
+		vp->verify_header = &raw_verify_header;
+		vp->header_size = sizeof(struct virtio_net_hdr);
+		vp->rx_header_size = sizeof(struct virtio_net_hdr);
+		vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO);
+		vp->dev->features |=
+			(NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+				NETIF_F_TSO | NETIF_F_GRO);
+		netdev_info(
+			vp->dev,
+			"raw: using vnet headers for tso and tx/rx checksum"
+		);
+	}
+	return 0;
+}
+
+static int build_tap_transport_data(struct vector_private *vp)
+{
+	if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+		vp->form_header = &raw_form_header;
+		vp->verify_header = &raw_verify_header;
+		vp->header_size = sizeof(struct virtio_net_hdr);
+		vp->rx_header_size = sizeof(struct virtio_net_hdr);
+		vp->dev->hw_features |=
+			(NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		vp->dev->features |=
+			(NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+				NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		netdev_info(
+			vp->dev,
+			"tap/raw: using vnet headers for tso and tx/rx checksum"
+		);
+	} else {
+		return 0; /* do not try to enable tap too if raw failed */
+	}
+	if (uml_tap_enable_vnet_headers(vp->fds->tx_fd))
+		return 0;
+	return -1;
+}
+
+int build_transport_data(struct vector_private *vp)
+{
+	char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
+
+	if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+		return build_gre_transport_data(vp);
+	if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+		return build_l2tpv3_transport_data(vp);
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return build_raw_transport_data(vp);
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return build_tap_transport_data(vp);
+	return 0;
+}
+
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
new file mode 100644
index 000000000000..4d6a78e31089
--- /dev/null
+++ b/arch/um/drivers/vector_user.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/ether.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <linux/virtio_net.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <os.h>
+#include <um_malloc.h>
+#include "vector_user.h"
+
+#define ID_GRE 0
+#define ID_L2TPV3 1
+#define ID_MAX 1
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
+#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
+#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
+#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
+
+/* This is very ugly and brute force lookup, but it is done
+ * only once at initialization so not worth doing hashes or
+ * anything more intelligent
+ */
+
+char *uml_vector_fetch_arg(struct arglist *ifspec, char *token)
+{
+	int i;
+
+	for (i = 0; i < ifspec->numargs; i++) {
+		if (strcmp(ifspec->tokens[i], token) == 0)
+			return ifspec->values[i];
+	}
+	return NULL;
+
+}
+
+struct arglist *uml_parse_vector_ifspec(char *arg)
+{
+	struct arglist *result;
+	int pos, len;
+	bool parsing_token = true, next_starts = true;
+
+	if (arg == NULL)
+		return NULL;
+	result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL);
+	if (result == NULL)
+		return NULL;
+	result->numargs = 0;
+	len = strlen(arg);
+	for (pos = 0; pos < len; pos++) {
+		if (next_starts) {
+			if (parsing_token) {
+				result->tokens[result->numargs] = arg + pos;
+			} else {
+				result->values[result->numargs] = arg + pos;
+				result->numargs++;
+			}
+			next_starts = false;
+		}
+		if (*(arg + pos) == '=') {
+			if (parsing_token)
+				parsing_token = false;
+			else
+				goto cleanup;
+			next_starts = true;
+			(*(arg + pos)) = '\0';
+		}
+		if (*(arg + pos) == ',') {
+			parsing_token = true;
+			next_starts = true;
+			(*(arg + pos)) = '\0';
+		}
+	}
+	return result;
+cleanup:
+	printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg);
+	kfree(result);
+	return NULL;
+}
+
+/*
+ * Socket/FD configuration functions. These return an structure
+ * of rx and tx descriptors to cover cases where these are not
+ * the same (f.e. read via raw socket and write via tap).
+ */
+
+#define PATH_NET_TUN "/dev/net/tun"
+
+static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
+{
+	struct ifreq ifr;
+	int fd = -1;
+	struct sockaddr_ll sock;
+	int err = -ENOMEM, offload;
+	char *iface;
+	struct vector_fds *result = NULL;
+
+	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+	if (iface == NULL) {
+		printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
+		goto tap_cleanup;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
+		goto tap_cleanup;
+	}
+	result->rx_fd = -1;
+	result->tx_fd = -1;
+	result->remote_addr = NULL;
+	result->remote_addr_size = 0;
+
+	/* TAP */
+
+	fd = open(PATH_NET_TUN, O_RDWR);
+	if (fd < 0) {
+		printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
+		goto tap_cleanup;
+	}
+	result->tx_fd = fd;
+	memset(&ifr, 0, sizeof(ifr));
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+
+	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	if (err != 0) {
+		printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
+		goto tap_cleanup;
+	}
+
+	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+	ioctl(fd, TUNSETOFFLOAD, offload);
+
+	/* RAW */
+
+	fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+	if (fd == -1) {
+		printk(UM_KERN_ERR
+			"uml_tap: failed to create socket: %i\n", -errno);
+		goto tap_cleanup;
+	}
+	result->rx_fd = fd;
+	memset(&ifr, 0, sizeof(ifr));
+	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+	if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+		printk(UM_KERN_ERR
+			"uml_tap: failed to set interface: %i\n", -errno);
+		goto tap_cleanup;
+	}
+
+	sock.sll_family = AF_PACKET;
+	sock.sll_protocol = htons(ETH_P_ALL);
+	sock.sll_ifindex = ifr.ifr_ifindex;
+
+	if (bind(fd,
+		(struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+		printk(UM_KERN_ERR
+			"user_init_tap: failed to bind raw pair, err %d\n",
+				-errno);
+		goto tap_cleanup;
+	}
+	return result;
+tap_cleanup:
+	printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err);
+	if (result != NULL) {
+		if (result->rx_fd >= 0)
+			os_close_file(result->rx_fd);
+		if (result->tx_fd >= 0)
+			os_close_file(result->tx_fd);
+		kfree(result);
+	}
+	return NULL;
+}
+
+
+static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
+{
+	struct ifreq ifr;
+	int rxfd = -1, txfd = -1;
+	struct sockaddr_ll sock;
+	int err = -ENOMEM;
+	char *iface;
+	struct vector_fds *result = NULL;
+
+	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+	if (iface == NULL)
+		goto cleanup;
+
+	rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL);
+	if (rxfd == -1) {
+		err = -errno;
+		goto cleanup;
+	}
+	txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */
+	if (txfd == -1) {
+		err = -errno;
+		goto cleanup;
+	}
+	memset(&ifr, 0, sizeof(ifr));
+	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+	if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+		err = -errno;
+		goto cleanup;
+	}
+
+	sock.sll_family = AF_PACKET;
+	sock.sll_protocol = htons(ETH_P_ALL);
+	sock.sll_ifindex = ifr.ifr_ifindex;
+
+	if (bind(rxfd,
+		(struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+		err = -errno;
+		goto cleanup;
+	}
+
+	sock.sll_family = AF_PACKET;
+	sock.sll_protocol = htons(ETH_P_IP);
+	sock.sll_ifindex = ifr.ifr_ifindex;
+
+	if (bind(txfd,
+		(struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+		err = -errno;
+		goto cleanup;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result != NULL) {
+		result->rx_fd = rxfd;
+		result->tx_fd = txfd;
+		result->remote_addr = NULL;
+		result->remote_addr_size = 0;
+	}
+	return result;
+cleanup:
+	printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
+	if (rxfd >= 0)
+		os_close_file(rxfd);
+	if (txfd >= 0)
+		os_close_file(txfd);
+	if (result != NULL)
+		kfree(result);
+	return NULL;
+}
+
+
+bool uml_raw_enable_qdisc_bypass(int fd)
+{
+	int optval = 1;
+
+	if (setsockopt(fd,
+		SOL_PACKET, PACKET_QDISC_BYPASS,
+		&optval, sizeof(optval)) != 0) {
+		return false;
+	}
+	return true;
+}
+
+bool uml_raw_enable_vnet_headers(int fd)
+{
+	int optval = 1;
+
+	if (setsockopt(fd,
+		SOL_PACKET, PACKET_VNET_HDR,
+		&optval, sizeof(optval)) != 0) {
+		printk(UM_KERN_INFO VNET_HDR_FAIL, fd);
+		return false;
+	}
+	return true;
+}
+bool uml_tap_enable_vnet_headers(int fd)
+{
+	unsigned int features;
+	int len = sizeof(struct virtio_net_hdr);
+
+	if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
+		printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno));
+		return false;
+	}
+	if ((features & IFF_VNET_HDR) == 0) {
+		printk(UM_KERN_INFO "tapraw: No VNET HEADER support");
+		return false;
+	}
+	ioctl(fd, TUNSETVNETHDRSZ, &len);
+	return true;
+}
+
+static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id)
+{
+	int err = -ENOMEM;
+	int fd = -1, gairet;
+	struct addrinfo srchints;
+	struct addrinfo dsthints;
+	bool v6, udp;
+	char *value;
+	char *src, *dst, *srcport, *dstport;
+	struct addrinfo *gairesult = NULL;
+	struct vector_fds *result = NULL;
+
+
+	value = uml_vector_fetch_arg(ifspec, "v6");
+	v6 = false;
+	udp = false;
+	if (value != NULL) {
+		if (strtol((const char *) value, NULL, 10) > 0)
+			v6 = true;
+	}
+
+	value = uml_vector_fetch_arg(ifspec, "udp");
+	if (value != NULL) {
+		if (strtol((const char *) value, NULL, 10) > 0)
+			udp = true;
+	}
+	src = uml_vector_fetch_arg(ifspec, "src");
+	dst = uml_vector_fetch_arg(ifspec, "dst");
+	srcport = uml_vector_fetch_arg(ifspec, "srcport");
+	dstport = uml_vector_fetch_arg(ifspec, "dstport");
+
+	memset(&dsthints, 0, sizeof(dsthints));
+
+	if (v6)
+		dsthints.ai_family = AF_INET6;
+	else
+		dsthints.ai_family = AF_INET;
+
+	switch (id) {
+	case ID_GRE:
+		dsthints.ai_socktype = SOCK_RAW;
+		dsthints.ai_protocol = IPPROTO_GRE;
+		break;
+	case ID_L2TPV3:
+		if (udp) {
+			dsthints.ai_socktype = SOCK_DGRAM;
+			dsthints.ai_protocol = 0;
+		} else {
+			dsthints.ai_socktype = SOCK_RAW;
+			dsthints.ai_protocol = IPPROTO_L2TP;
+		}
+		break;
+	default:
+		printk(KERN_ERR "Unsupported socket type\n");
+		return NULL;
+	}
+	memcpy(&srchints, &dsthints, sizeof(struct addrinfo));
+
+	gairet = getaddrinfo(src, srcport, &dsthints, &gairesult);
+	if ((gairet != 0) || (gairesult == NULL)) {
+		printk(UM_KERN_ERR
+			"socket_open : could not resolve src, error = %s",
+			gai_strerror(gairet)
+		);
+		return NULL;
+	}
+	fd = socket(gairesult->ai_family,
+		gairesult->ai_socktype, gairesult->ai_protocol);
+	if (fd == -1) {
+		printk(UM_KERN_ERR
+			"socket_open : could not open socket, error = %d",
+			-errno
+		);
+		goto cleanup;
+	}
+	if (bind(fd,
+		(struct sockaddr *) gairesult->ai_addr,
+		gairesult->ai_addrlen)) {
+		printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno);
+		goto cleanup;
+	}
+
+	if (gairesult != NULL)
+		freeaddrinfo(gairesult);
+
+	gairesult = NULL;
+
+	gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult);
+	if ((gairet != 0) || (gairesult == NULL)) {
+		printk(UM_KERN_ERR
+			"socket_open : could not resolve dst, error = %s",
+			gai_strerror(gairet)
+		);
+		return NULL;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result != NULL) {
+		result->rx_fd = fd;
+		result->tx_fd = fd;
+		result->remote_addr = uml_kmalloc(
+			gairesult->ai_addrlen, UM_GFP_KERNEL);
+		if (result->remote_addr == NULL)
+			goto cleanup;
+		result->remote_addr_size = gairesult->ai_addrlen;
+		memcpy(
+			result->remote_addr,
+			gairesult->ai_addr,
+			gairesult->ai_addrlen
+		);
+	}
+	freeaddrinfo(gairesult);
+	return result;
+cleanup:
+	if (gairesult != NULL)
+		freeaddrinfo(gairesult);
+	printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err);
+	if (fd >= 0)
+		os_close_file(fd);
+	if (result != NULL) {
+		if (result->remote_addr != NULL)
+			kfree(result->remote_addr);
+		kfree(result);
+	}
+	return NULL;
+}
+
+struct vector_fds *uml_vector_user_open(
+	int unit,
+	struct arglist *parsed
+)
+{
+	char *transport;
+
+	if (parsed == NULL) {
+		printk(UM_KERN_ERR "no parsed config for unit %d\n", unit);
+		return NULL;
+	}
+	transport = uml_vector_fetch_arg(parsed, "transport");
+	if (transport == NULL) {
+		printk(UM_KERN_ERR "missing transport for unit %d\n", unit);
+		return NULL;
+	}
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return user_init_raw_fds(parsed);
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return user_init_tap_fds(parsed);
+	if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+		return user_init_socket_fds(parsed, ID_GRE);
+	if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+		return user_init_socket_fds(parsed, ID_L2TPV3);
+	return NULL;
+}
+
+
+int uml_vector_sendmsg(int fd, void *hdr, int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr,  flags));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_recvmsg(int fd, void *hdr, int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr,  flags));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_writev(int fd, void *hdr, int iovcount)
+{
+	int n;
+
+	CATCH_EINTR(n = writev(fd, (struct iovec *) hdr,  iovcount));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_sendmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_recvmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags)
+{
+	int n;
+
+	CATCH_EINTR(
+		n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
+{
+	int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
+
+	if (err < 0)
+		printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
+	return err;
+}
+
+#define DEFAULT_BPF_LEN 6
+
+void *uml_vector_default_bpf(int fd, void *mac)
+{
+	struct sock_filter *bpf;
+	uint32_t *mac1 = (uint32_t *)(mac + 2);
+	uint16_t *mac2 = (uint16_t *) mac;
+	struct sock_fprog bpf_prog = {
+		.len = 6,
+		.filter = NULL,
+	};
+
+	bpf = uml_kmalloc(
+		sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
+	if (bpf != NULL) {
+		bpf_prog.filter = bpf;
+		/* ld	[8] */
+		bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
+		/* jeq	#0xMAC[2-6] jt 2 jf 5*/
+		bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)};
+		/* ldh	[6] */
+		bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 };
+		/* jeq	#0xMAC[0-1] jt 4 jf 5 */
+		bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)};
+		/* ret	#0 */
+		bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
+		/* ret	#0x40000 */
+		bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
+		if (uml_vector_attach_bpf(
+			fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
+			kfree(bpf);
+			bpf = NULL;
+		}
+	}
+	return bpf;
+}
+
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
new file mode 100644
index 000000000000..d7cbff73b7ff
--- /dev/null
+++ b/arch/um/drivers/vector_user.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_USER_H
+#define __UM_VECTOR_USER_H
+
+#define MAXVARGS	20
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define TRANS_TAP "tap"
+#define TRANS_TAP_LEN strlen(TRANS_TAP)
+
+
+#define TRANS_GRE "gre"
+#define TRANS_GRE_LEN strlen(TRANS_RAW)
+
+#define TRANS_L2TPV3 "l2tpv3"
+#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
+
+#ifndef IPPROTO_GRE
+#define IPPROTO_GRE 0x2F
+#endif
+
+#define GRE_MODE_CHECKSUM	cpu_to_be16(8 << 12)	/* checksum */
+#define GRE_MODE_RESERVED	cpu_to_be16(4 << 12)	/* unused */
+#define GRE_MODE_KEY		cpu_to_be16(2 << 12)	/* KEY present */
+#define GRE_MODE_SEQUENCE	cpu_to_be16(1 << 12)	/* sequence */
+
+#define GRE_IRB cpu_to_be16(0x6558)
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+struct arglist {
+	int	numargs;
+	char	*tokens[MAXVARGS];
+	char	*values[MAXVARGS];
+};
+
+/* Separating read and write FDs allows us to have different
+ * rx and tx method. Example - read tap via raw socket using
+ * recvmmsg, write using legacy tap write calls
+ */
+
+struct vector_fds {
+	int rx_fd;
+	int tx_fd;
+	void *remote_addr;
+	int remote_addr_size;
+};
+
+#define VECTOR_READ	1
+#define VECTOR_WRITE	(1 < 1)
+#define VECTOR_HEADERS	(1 < 2)
+
+extern struct arglist *uml_parse_vector_ifspec(char *arg);
+
+extern struct vector_fds *uml_vector_user_open(
+	int unit,
+	struct arglist *parsed
+);
+
+extern char *uml_vector_fetch_arg(
+	struct arglist *ifspec,
+	char *token
+);
+
+extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
+extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
+extern int uml_vector_writev(int fd, void *hdr, int iovcount);
+extern int uml_vector_sendmmsg(
+	int fd, void *msgvec,
+	unsigned int vlen,
+	unsigned int flags
+);
+extern int uml_vector_recvmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags
+);
+extern void *uml_vector_default_bpf(int fd, void *mac);
+extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
+extern bool uml_raw_enable_qdisc_bypass(int fd);
+extern bool uml_raw_enable_vnet_headers(int fd);
+extern bool uml_tap_enable_vnet_headers(int fd);
+
+
+#endif
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..5898a26daa0d
--- /dev/null
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -0,0 +1 @@
+#include <asm-generic/asm-prototypes.h>
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index b5cdd3f91157..49ed3e35b35a 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -18,7 +18,19 @@
 #define XTERM_IRQ 		13
 #define RANDOM_IRQ 		14
 
+#ifdef CONFIG_UML_NET_VECTOR
+
+#define VECTOR_BASE_IRQ		15
+#define VECTOR_IRQ_SPACE	8
+
+#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
+
+#else
+
 #define LAST_IRQ RANDOM_IRQ
+
+#endif
+
 #define NR_IRQS (LAST_IRQ + 1)
 
 #endif
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index df5633053957..a7a6120f19d5 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -7,6 +7,7 @@
 #define __IRQ_USER_H__
 
 #include <sysdep/ptrace.h>
+#include <stdbool.h>
 
 struct irq_fd {
 	struct irq_fd *next;
@@ -15,10 +16,17 @@ struct irq_fd {
 	int type;
 	int irq;
 	int events;
-	int current_events;
+	bool active;
+	bool pending;
+	bool purge;
 };
 
-enum { IRQ_READ, IRQ_WRITE };
+#define IRQ_READ  0
+#define IRQ_WRITE 1
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+
 
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
index 012ac87d4900..40442b98b173 100644
--- a/arch/um/include/shared/net_kern.h
+++ b/arch/um/include/shared/net_kern.h
@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name,
 			    char **mac_out, char **gate_addr);
 extern void register_transport(struct transport *new);
 extern unsigned short eth_protocol(struct sk_buff *skb);
+extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
+
 
 #endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index d8ddaf9790d2..048ae37eb5aa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -290,15 +290,16 @@ extern void halt_skas(void);
 extern void reboot_skas(void);
 
 /* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
-extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
-extern void os_free_irq_later(struct irq_fd *active_fds,
-		int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
+extern int os_waiting_for_events_epoll(void);
+extern void *os_epoll_get_data_pointer(int index);
+extern int os_epoll_triggered(int index, int events);
+extern int os_event_mask(int irq_type);
+extern int os_setup_epoll(void);
+extern int os_add_epoll_fd(int events, int fd, void *data);
+extern int os_mod_epoll_fd(int events, int fd, void *data);
+extern int os_del_epoll_fd(int fd);
 extern void os_set_ioignore(void);
+extern void os_close_epoll_fd(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb9350d47e..6b7f3827d6e4 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -16,243 +18,362 @@
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
+#include <irq_user.h>
 
-/*
- * This list is accessed under irq_lock, except in sigio_handler,
- * where it is safe from being modified.  IRQ handlers won't change it -
- * if an IRQ source has vanished, it will be freed by free_irqs just
- * before returning from sigio_handler.  That will process a separate
- * list of irqs to free, with its own locking, coming back here to
- * remove list elements, taking the irq_lock to do so.
+
+/* When epoll triggers we do not know why it did so
+ * we can also have different IRQs for read and write.
+ * This is why we keep a small irq_fd array for each fd -
+ * one entry per IRQ type
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
 
-extern void free_irqs(void);
+struct irq_entry {
+	struct irq_entry *next;
+	int fd;
+	struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
+};
+
+static struct irq_entry *active_fds;
+
+static DEFINE_SPINLOCK(irq_lock);
+
+static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
+{
+/*
+ * irq->active guards against reentry
+ * irq->pending accumulates pending requests
+ * if pending is raised the irq_handler is re-run
+ * until pending is cleared
+ */
+	if (irq->active) {
+		irq->active = false;
+		do {
+			irq->pending = false;
+			do_IRQ(irq->irq, regs);
+		} while (irq->pending && (!irq->purge));
+		if (!irq->purge)
+			irq->active = true;
+	} else {
+		irq->pending = true;
+	}
+}
 
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
-	struct irq_fd *irq_fd;
-	int n;
+	struct irq_entry *irq_entry;
+	struct irq_fd *irq;
+
+	int n, i, j;
 
 	while (1) {
-		n = os_waiting_for_events(active_fds);
+		/* This is now lockless - epoll keeps back-referencesto the irqs
+		 * which have trigger it so there is no need to walk the irq
+		 * list and lock it every time. We avoid locking by turning off
+		 * IO for a specific fd by executing os_del_epoll_fd(fd) before
+		 * we do any changes to the actual data structures
+		 */
+		n = os_waiting_for_events_epoll();
+
 		if (n <= 0) {
 			if (n == -EINTR)
 				continue;
-			else break;
+			else
+				break;
 		}
 
-		for (irq_fd = active_fds; irq_fd != NULL;
-		     irq_fd = irq_fd->next) {
-			if (irq_fd->current_events != 0) {
-				irq_fd->current_events = 0;
-				do_IRQ(irq_fd->irq, regs);
+		for (i = 0; i < n ; i++) {
+			/* Epoll back reference is the entry with 3 irq_fd
+			 * leaves - one for each irq type.
+			 */
+			irq_entry = (struct irq_entry *)
+				os_epoll_get_data_pointer(i);
+			for (j = 0; j < MAX_IRQ_TYPE ; j++) {
+				irq = irq_entry->irq_array[j];
+				if (irq == NULL)
+					continue;
+				if (os_epoll_triggered(i, irq->events) > 0)
+					irq_io_loop(irq, regs);
+				if (irq->purge) {
+					irq_entry->irq_array[j] = NULL;
+					kfree(irq);
+				}
 			}
 		}
 	}
+}
+
+static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
+{
+	int i;
+	int events = 0;
+	struct irq_fd *irq;
 
-	free_irqs();
+	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+		irq = irq_entry->irq_array[i];
+		if (irq != NULL)
+			events = irq->events | events;
+	}
+	if (events > 0) {
+	/* os_add_epoll will call os_mod_epoll if this already exists */
+		return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+	}
+	/* No events - delete */
+	return os_del_epoll_fd(irq_entry->fd);
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+
 
 static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
-	struct pollfd *tmp_pfd;
-	struct irq_fd *new_fd, *irq_fd;
+	struct irq_fd *new_fd;
+	struct irq_entry *irq_entry;
+	int i, err, events;
 	unsigned long flags;
-	int events, err, n;
 
 	err = os_set_fd_async(fd);
 	if (err < 0)
 		goto out;
 
-	err = -ENOMEM;
-	new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
-	if (new_fd == NULL)
-		goto out;
+	spin_lock_irqsave(&irq_lock, flags);
 
-	if (type == IRQ_READ)
-		events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
-	*new_fd = ((struct irq_fd) { .next  		= NULL,
-				     .id 		= dev_id,
-				     .fd 		= fd,
-				     .type 		= type,
-				     .irq 		= irq,
-				     .events 		= events,
-				     .current_events 	= 0 } );
+	/* Check if we have an entry for this fd */
 
 	err = -EBUSY;
-	spin_lock_irqsave(&irq_lock, flags);
-	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-			printk(KERN_ERR "Registering fd %d twice\n", fd);
-			printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-			       dev_id);
+	for (irq_entry = active_fds;
+		irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == fd)
+			break;
+	}
+
+	if (irq_entry == NULL) {
+		/* This needs to be atomic as it may be called from an
+		 * IRQ context.
+		 */
+		irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
+		if (irq_entry == NULL) {
+			printk(KERN_ERR
+				"Failed to allocate new IRQ entry\n");
 			goto out_unlock;
 		}
+		irq_entry->fd = fd;
+		for (i = 0; i < MAX_IRQ_TYPE; i++)
+			irq_entry->irq_array[i] = NULL;
+		irq_entry->next = active_fds;
+		active_fds = irq_entry;
 	}
 
-	if (type == IRQ_WRITE)
-		fd = -1;
-
-	tmp_pfd = NULL;
-	n = 0;
+	/* Check if we are trying to re-register an interrupt for a
+	 * particular fd
+	 */
 
-	while (1) {
-		n = os_create_pollfd(fd, events, tmp_pfd, n);
-		if (n == 0)
-			break;
+	if (irq_entry->irq_array[type] != NULL) {
+		printk(KERN_ERR
+			"Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+			irq, fd, type, dev_id
+		);
+		goto out_unlock;
+	} else {
+		/* New entry for this fd */
+
+		err = -ENOMEM;
+		new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
+		if (new_fd == NULL)
+			goto out_unlock;
 
-		/*
-		 * n > 0
-		 * It means we couldn't put new pollfd to current pollfds
-		 * and tmp_fds is NULL or too small for new pollfds array.
-		 * Needed size is equal to n as minimum.
-		 *
-		 * Here we have to drop the lock in order to call
-		 * kmalloc, which might sleep.
-		 * If something else came in and changed the pollfds array
-		 * so we will not be able to put new pollfd struct to pollfds
-		 * then we free the buffer tmp_fds and try again.
+		events = os_event_mask(type);
+
+		*new_fd = ((struct irq_fd) {
+			.id		= dev_id,
+			.irq		= irq,
+			.type		= type,
+			.events		= events,
+			.active		= true,
+			.pending	= false,
+			.purge		= false
+		});
+		/* Turn off any IO on this fd - allows us to
+		 * avoid locking the IRQ loop
 		 */
-		spin_unlock_irqrestore(&irq_lock, flags);
-		kfree(tmp_pfd);
-
-		tmp_pfd = kmalloc(n, GFP_KERNEL);
-		if (tmp_pfd == NULL)
-			goto out_kfree;
-
-		spin_lock_irqsave(&irq_lock, flags);
+		os_del_epoll_fd(irq_entry->fd);
+		irq_entry->irq_array[type] = new_fd;
 	}
 
-	*last_irq_ptr = new_fd;
-	last_irq_ptr = &new_fd->next;
-
+	/* Turn back IO on with the correct (new) IO event mask */
+	assign_epoll_events_to_irq(irq_entry);
 	spin_unlock_irqrestore(&irq_lock, flags);
-
-	/*
-	 * This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, (type == IRQ_READ));
+	maybe_sigio_broken(fd, (type != IRQ_NONE));
 
 	return 0;
-
- out_unlock:
+out_unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
-	kfree(new_fd);
- out:
+out:
 	return err;
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+/*
+ * Walk the IRQ list and dispose of any unused entries.
+ * Should be done under irq_lock.
+ */
+
+static void garbage_collect_irq_entries(void)
 {
-	unsigned long flags;
+	int i;
+	bool reap;
+	struct irq_entry *walk;
+	struct irq_entry *previous = NULL;
+	struct irq_entry *to_free;
 
-	spin_lock_irqsave(&irq_lock, flags);
-	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-	spin_unlock_irqrestore(&irq_lock, flags);
+	if (active_fds == NULL)
+		return;
+	walk = active_fds;
+	while (walk != NULL) {
+		reap = true;
+		for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+			if (walk->irq_array[i] != NULL) {
+				reap = false;
+				break;
+			}
+		}
+		if (reap) {
+			if (previous == NULL)
+				active_fds = walk->next;
+			else
+				previous->next = walk->next;
+			to_free = walk;
+		} else {
+			to_free = NULL;
+		}
+		walk = walk->next;
+		if (to_free != NULL)
+			kfree(to_free);
+	}
 }
 
-struct irq_and_dev {
-	int irq;
-	void *dev;
-};
+/*
+ * Walk the IRQ list and get the descriptor for our FD
+ */
 
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
-	struct irq_and_dev *data = d;
+	struct irq_entry *walk = active_fds;
 
-	return ((irq->irq == data->irq) && (irq->id == data->dev));
+	while (walk != NULL) {
+		if (walk->fd == fd)
+			return walk;
+		walk = walk->next;
+	}
+	return NULL;
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
-	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-							  .dev  = dev });
 
-	free_irq_by_cb(same_irq_and_dev, &data);
-}
+/*
+ * Walk the IRQ list and dispose of an entry for a specific
+ * device, fd and number. Note - if sharing an IRQ for read
+ * and writefor the same FD it will be disposed in either case.
+ * If this behaviour is undesirable use different IRQ ids.
+ */
 
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-	return (irq->fd == *((int *)fd));
-}
+#define IGNORE_IRQ 1
+#define IGNORE_DEV (1<<1)
 
-void free_irq_by_fd(int fd)
+static void do_free_by_irq_and_dev(
+	struct irq_entry *irq_entry,
+	unsigned int irq,
+	void *dev,
+	int flags
+)
 {
-	free_irq_by_cb(same_fd, &fd);
+	int i;
+	struct irq_fd *to_free;
+
+	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+		if (irq_entry->irq_array[i] != NULL) {
+			if (
+			((flags & IGNORE_IRQ) ||
+				(irq_entry->irq_array[i]->irq == irq)) &&
+			((flags & IGNORE_DEV) ||
+				(irq_entry->irq_array[i]->id == dev))
+			) {
+				/* Turn off any IO on this fd - allows us to
+				 * avoid locking the IRQ loop
+				 */
+				os_del_epoll_fd(irq_entry->fd);
+				to_free = irq_entry->irq_array[i];
+				irq_entry->irq_array[i] = NULL;
+				assign_epoll_events_to_irq(irq_entry);
+				if (to_free->active)
+					to_free->purge = true;
+				else
+					kfree(to_free);
+			}
+		}
+	}
 }
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+void free_irq_by_fd(int fd)
 {
-	struct irq_fd *irq;
-	int i = 0;
-	int fdi;
+	struct irq_entry *to_free;
+	unsigned long flags;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		if ((irq->fd == fd) && (irq->irq == irqnum))
-			break;
-		i++;
-	}
-	if (irq == NULL) {
-		printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-		       fd);
-		goto out;
-	}
-	fdi = os_get_pollfd(i);
-	if ((fdi != -1) && (fdi != fd)) {
-		printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-		       "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-		       fdi, fd);
-		irq = NULL;
-		goto out;
+	spin_lock_irqsave(&irq_lock, flags);
+	to_free = get_irq_entry_by_fd(fd);
+	if (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			-1,
+			NULL,
+			IGNORE_IRQ | IGNORE_DEV
+		);
 	}
-	*index_out = i;
- out:
-	return irq;
+	garbage_collect_irq_entries();
+	spin_unlock_irqrestore(&irq_lock, flags);
 }
+EXPORT_SYMBOL(free_irq_by_fd);
 
-void reactivate_fd(int fd, int irqnum)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-	struct irq_fd *irq;
+	struct irq_entry *to_free;
 	unsigned long flags;
-	int i;
 
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	to_free = active_fds;
+	while (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			irq,
+			dev,
+			0
+		);
+		to_free = to_free->next;
 	}
-	os_set_pollfd(i, irq->fd);
+	garbage_collect_irq_entries();
 	spin_unlock_irqrestore(&irq_lock, flags);
+}
 
-	add_sigio_fd(fd);
+
+void reactivate_fd(int fd, int irqnum)
+{
+	/** NOP - we do auto-EOI now **/
 }
 
 void deactivate_fd(int fd, int irqnum)
 {
-	struct irq_fd *irq;
+	struct irq_entry *to_free;
 	unsigned long flags;
-	int i;
 
+	os_del_epoll_fd(fd);
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	to_free = get_irq_entry_by_fd(fd);
+	if (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			irqnum,
+			NULL,
+			IGNORE_DEV
+		);
 	}
-
-	os_set_pollfd(i, -1);
+	garbage_collect_irq_entries();
 	spin_unlock_irqrestore(&irq_lock, flags);
-
 	ignore_sigio_fd(fd);
 }
 EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +386,28 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-	struct irq_fd *irq;
-	int err;
+	unsigned long flags;
+	struct irq_entry *to_free;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		err = os_clear_fd_async(irq->fd);
-		if (err)
-			return err;
-	}
-	/* If there is a signal already queued, after unblocking ignore it */
+	spin_lock_irqsave(&irq_lock, flags);
+	/* Stop IO. The IRQ loop has no lock so this is our
+	 * only way of making sure we are safe to dispose
+	 * of all IRQ handlers
+	 */
 	os_set_ioignore();
-
+	to_free = active_fds;
+	while (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			-1,
+			NULL,
+			IGNORE_IRQ | IGNORE_DEV
+		);
+		to_free = to_free->next;
+	}
+	garbage_collect_irq_entries();
+	spin_unlock_irqrestore(&irq_lock, flags);
+	os_close_epoll_fd();
 	return 0;
 }
 
@@ -353,8 +485,11 @@ void __init init_IRQ(void)
 
 	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
 
+
 	for (i = 1; i < NR_IRQS; i++)
 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+	/* Initialize EPOLL Loop */
+	os_setup_epoll();
 }
 
 /*
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 7f69d17de354..052de4c8acb2 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -121,12 +121,12 @@ static void __init um_timer_setup(void)
 	clockevents_register_device(&timer_clockevent);
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	long long nsecs = os_persistent_clock_emulation();
 
-	set_normalized_timespec(ts, nsecs / NSEC_PER_SEC,
-				nsecs % NSEC_PER_SEC);
+	set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
+				  nsecs % NSEC_PER_SEC);
 }
 
 void __init time_init(void)
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 2db18cbbb0ea..c0197097c86e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -12,6 +12,7 @@
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/sysmacros.h>
 #include <sys/un.h>
 #include <sys/types.h>
 #include <os.h>
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74b79ad..365823010346 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,135 +1,147 @@
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdlib.h>
 #include <errno.h>
-#include <poll.h>
+#include <sys/epoll.h>
 #include <signal.h>
 #include <string.h>
 #include <irq_user.h>
 #include <os.h>
 #include <um_malloc.h>
 
+/* Epoll support */
+
+static int epollfd = -1;
+
+#define MAX_EPOLL_EVENTS 64
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/* Helper to return an Epoll data pointer from an epoll event structure.
+ * We need to keep this one on the userspace side to keep includes separate
+ */
+
+void *os_epoll_get_data_pointer(int index)
+{
+	return epoll_events[index].data.ptr;
+}
+
+/* Helper to compare events versus the events in the epoll structure.
+ * Same as above - needs to be on the userspace side
+ */
+
+
+int os_epoll_triggered(int index, int events)
+{
+	return epoll_events[index].events & events;
+}
+/* Helper to set the event mask.
+ * The event mask is opaque to the kernel side, because it does not have
+ * access to the right includes/defines for EPOLL constants.
+ */
+
+int os_event_mask(int irq_type)
+{
+	if (irq_type == IRQ_READ)
+		return EPOLLIN | EPOLLPRI;
+	if (irq_type == IRQ_WRITE)
+		return EPOLLOUT;
+	return 0;
+}
+
 /*
- * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
- * and os_free_irq_by_cb, which are called under irq_lock.
+ * Initial Epoll Setup
  */
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
+int os_setup_epoll(void)
+{
+	epollfd = epoll_create(MAX_EPOLL_EVENTS);
+	return epollfd;
+}
 
-int os_waiting_for_events(struct irq_fd *active_fds)
+/*
+ * Helper to run the actual epoll_wait
+ */
+int os_waiting_for_events_epoll(void)
 {
-	struct irq_fd *irq_fd;
-	int i, n, err;
+	int n, err;
 
-	n = poll(pollfds, pollfds_num, 0);
+	n = epoll_wait(epollfd,
+		(struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0);
 	if (n < 0) {
 		err = -errno;
 		if (errno != EINTR)
-			printk(UM_KERN_ERR "os_waiting_for_events:"
-			       " poll returned %d, errno = %d\n", n, errno);
+			printk(
+				UM_KERN_ERR "os_waiting_for_events:"
+				" epoll returned %d, error = %s\n", n,
+				strerror(errno)
+			);
 		return err;
 	}
-
-	if (n == 0)
-		return 0;
-
-	irq_fd = active_fds;
-
-	for (i = 0; i < pollfds_num; i++) {
-		if (pollfds[i].revents != 0) {
-			irq_fd->current_events = pollfds[i].revents;
-			pollfds[i].fd = -1;
-		}
-		irq_fd = irq_fd->next;
-	}
 	return n;
 }
 
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
-	if (pollfds_num == pollfds_size) {
-		if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
-			/* return min size needed for new pollfds area */
-			return (pollfds_size + 1) * sizeof(pollfds[0]);
-		}
-
-		if (pollfds != NULL) {
-			memcpy(tmp_pfd, pollfds,
-			       sizeof(pollfds[0]) * pollfds_size);
-			/* remove old pollfds */
-			kfree(pollfds);
-		}
-		pollfds = tmp_pfd;
-		pollfds_size++;
-	} else
-		kfree(tmp_pfd);	/* remove not used tmp_pfd */
-
-	pollfds[pollfds_num] = ((struct pollfd) { .fd		= fd,
-						  .events	= events,
-						  .revents	= 0 });
-	pollfds_num++;
-
-	return 0;
-}
 
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
+/*
+ * Helper to add a fd to epoll
+ */
+int os_add_epoll_fd(int events, int fd, void *data)
 {
-	struct irq_fd **prev;
-	int i = 0;
-
-	prev = &active_fds;
-	while (*prev != NULL) {
-		if ((*test)(*prev, arg)) {
-			struct irq_fd *old_fd = *prev;
-			if ((pollfds[i].fd != -1) &&
-			    (pollfds[i].fd != (*prev)->fd)) {
-				printk(UM_KERN_ERR "os_free_irq_by_cb - "
-				       "mismatch between active_fds and "
-				       "pollfds, fd %d vs %d\n",
-				       (*prev)->fd, pollfds[i].fd);
-				goto out;
-			}
-
-			pollfds_num--;
-
-			/*
-			 * This moves the *whole* array after pollfds[i]
-			 * (though it doesn't spot as such)!
-			 */
-			memmove(&pollfds[i], &pollfds[i + 1],
-			       (pollfds_num - i) * sizeof(pollfds[0]));
-			if (*last_irq_ptr2 == &old_fd->next)
-				*last_irq_ptr2 = prev;
-
-			*prev = (*prev)->next;
-			if (old_fd->type == IRQ_WRITE)
-				ignore_sigio_fd(old_fd->fd);
-			kfree(old_fd);
-			continue;
-		}
-		prev = &(*prev)->next;
-		i++;
-	}
- out:
-	return;
+	struct epoll_event event;
+	int result;
+
+	event.data.ptr = data;
+	event.events = events | EPOLLET;
+	result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+	if ((result) && (errno == EEXIST))
+		result = os_mod_epoll_fd(events, fd, data);
+	if (result)
+		printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+	return result;
 }
 
-int os_get_pollfd(int i)
+/*
+ * Helper to mod the fd event mask and/or data backreference
+ */
+int os_mod_epoll_fd(int events, int fd, void *data)
 {
-	return pollfds[i].fd;
+	struct epoll_event event;
+	int result;
+
+	event.data.ptr = data;
+	event.events = events;
+	result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+	if (result)
+		printk(UM_KERN_ERR
+			"epollctl mod err fd %d, %s\n", fd, strerror(errno));
+	return result;
 }
 
-void os_set_pollfd(int i, int fd)
+/*
+ * Helper to delete the epoll fd
+ */
+int os_del_epoll_fd(int fd)
 {
-	pollfds[i].fd = fd;
+	struct epoll_event event;
+	int result;
+	/* This is quiet as we use this as IO ON/OFF - so it is often
+	 * invoked on a non-existent fd
+	 */
+	result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+	return result;
 }
 
 void os_set_ioignore(void)
 {
 	signal(SIGIO, SIG_IGN);
 }
+
+void os_close_epoll_fd(void)
+{
+	/* Needed so we do not leak an fd when rebooting */
+	os_close_file(epollfd);
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index a86d7cc2c2d8..bf0acb8aad8b 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -16,6 +16,7 @@
 #include <os.h>
 #include <sysdep/mcontext.h>
 #include <um_malloc.h>
+#include <sys/ucontext.h>
 
 void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
 	[SIGTRAP]	= relay_signal,
@@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 
 static void hard_handler(int sig, siginfo_t *si, void *p)
 {
-	struct ucontext *uc = p;
+	ucontext_t *uc = p;
 	mcontext_t *mc = &uc->uc_mcontext;
 	unsigned long pending = 1UL << sig;
 
diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h
index a5e08e2d5d6d..1d9132b66039 100644
--- a/arch/unicore32/include/asm/cacheflush.h
+++ b/arch/unicore32/include/asm/cacheflush.h
@@ -170,10 +170,8 @@ extern void flush_cache_page(struct vm_area_struct *vma,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-#define flush_dcache_mmap_lock(mapping)			\
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping)		\
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
 #define flush_icache_user_range(vma, page, addr, len)	\
 	flush_dcache_page(page)
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
index 3bb0a29fd2d7..66bb9f6525c0 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -20,12 +20,6 @@
 #include <mach/memory.h>
 
 /*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
-/*
  * PAGE_OFFSET - the virtual address of the start of the kernel image
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d234cca296db..00fcf81f2c56 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -29,6 +29,7 @@ config X86_64
 	select HAVE_ARCH_SOFT_DIRTY
 	select MODULES_USE_ELF_RELA
 	select X86_DEV_DMA_OPS
+	select ARCH_HAS_SYSCALL_WRAPPER
 
 #
 # Arch settings
@@ -2008,6 +2009,9 @@ config KEXEC_FILE
 	  for kernel and initramfs as opposed to list of segments as
 	  accepted by previous system call.
 
+config ARCH_HAS_KEXEC_PURGATORY
+	def_bool KEXEC_FILE
+
 config KEXEC_VERIFY_SIG
 	bool "Verify kernel signature during kexec_file_load() syscall"
 	depends on KEXEC_FILE
@@ -2760,11 +2764,9 @@ config OLPC_XO1_RTC
 
 config OLPC_XO1_SCI
 	bool "OLPC XO-1 SCI extras"
-	depends on OLPC && OLPC_XO1_PM
+	depends on OLPC && OLPC_XO1_PM && GPIO_CS5535=y
 	depends on INPUT=y
 	select POWER_SUPPLY
-	select GPIO_CS5535
-	select MFD_CORE
 	---help---
 	  Add support for SCI-based features of the OLPC XO-1 laptop:
 	   - EC-driven system wakeups
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 66e42a098d70..a0a50b91ecef 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -54,6 +54,9 @@ unsigned int ptrs_per_p4d __ro_after_init = 1;
 
 extern unsigned long get_cmd_line_ptr(void);
 
+/* Used by PAGE_KERN* macros: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
+
 /* Simplified build-specific string for starting entropy. */
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index be63330c5511..352e70cd33e8 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -114,7 +114,9 @@ For 32-bit we have the following conventions - kernel is built with
 	pushq   %rsi		/* pt_regs->si */
 	.endif
 	pushq	\rdx		/* pt_regs->dx */
+	xorl	%edx, %edx	/* nospec   dx */
 	pushq   %rcx		/* pt_regs->cx */
+	xorl	%ecx, %ecx	/* nospec   cx */
 	pushq   \rax		/* pt_regs->ax */
 	pushq   %r8		/* pt_regs->r8 */
 	xorl	%r8d, %r8d	/* nospec   r8 */
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 74f6eee15179..fbf6a6c3fd2d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -266,14 +266,13 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_X86_64
-__visible void do_syscall_64(struct pt_regs *regs)
+__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
-	struct thread_info *ti = current_thread_info();
-	unsigned long nr = regs->orig_ax;
+	struct thread_info *ti;
 
 	enter_from_user_mode();
 	local_irq_enable();
-
+	ti = current_thread_info();
 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
 		nr = syscall_trace_enter(regs);
 
@@ -282,11 +281,10 @@ __visible void do_syscall_64(struct pt_regs *regs)
 	 * table.  The only functional difference is the x32 bit in
 	 * regs->orig_ax, which changes the behavior of some syscalls.
 	 */
-	if (likely((nr & __SYSCALL_MASK) < NR_syscalls)) {
-		nr = array_index_nospec(nr & __SYSCALL_MASK, NR_syscalls);
-		regs->ax = sys_call_table[nr](
-			regs->di, regs->si, regs->dx,
-			regs->r10, regs->r8, regs->r9);
+	nr &= __SYSCALL_MASK;
+	if (likely(nr < NR_syscalls)) {
+		nr = array_index_nospec(nr, NR_syscalls);
+		regs->ax = sys_call_table[nr](regs);
 	}
 
 	syscall_return_slowpath(regs);
@@ -321,6 +319,9 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 
 	if (likely(nr < IA32_NR_syscalls)) {
 		nr = array_index_nospec(nr, IA32_NR_syscalls);
+#ifdef CONFIG_IA32_EMULATION
+		regs->ax = ia32_sys_call_table[nr](regs);
+#else
 		/*
 		 * It's possible that a 32-bit syscall implementation
 		 * takes a 64-bit parameter but nonetheless assumes that
@@ -331,6 +332,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 			(unsigned int)regs->bx, (unsigned int)regs->cx,
 			(unsigned int)regs->dx, (unsigned int)regs->si,
 			(unsigned int)regs->di, (unsigned int)regs->bp);
+#endif /* CONFIG_IA32_EMULATION */
 	}
 
 	syscall_return_slowpath(regs);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b0a4649e55ce..3166b9674429 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -233,7 +233,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 	TRACE_IRQS_OFF
 
 	/* IRQs are off. */
-	movq	%rsp, %rdi
+	movq	%rax, %rdi
+	movq	%rsp, %rsi
 	call	do_syscall_64		/* returns with IRQs disabled */
 
 	TRACE_IRQS_IRETQ		/* we're about to change IF */
@@ -913,7 +914,7 @@ ENTRY(\sym)
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	.if \paranoid < 2
+	.if \paranoid == 1
 	testb	$3, CS-ORIG_RAX(%rsp)		/* If coming from userspace, switch stacks */
 	jnz	.Lfrom_usermode_switch_stack_\@
 	.endif
@@ -960,7 +961,7 @@ ENTRY(\sym)
 	jmp	error_exit
 	.endif
 
-	.if \paranoid < 2
+	.if \paranoid == 1
 	/*
 	 * Entry from userspace.  Switch stacks and treat it
 	 * as a normal entry.  This means that paranoid handlers
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 08425c42f8b7..9af927e59d49 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -220,8 +220,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 	pushq	%rax			/* pt_regs->orig_ax */
 	pushq	%rdi			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
+	xorl	%esi, %esi		/* nospec   si */
 	pushq	%rdx			/* pt_regs->dx */
+	xorl	%edx, %edx		/* nospec   dx */
 	pushq	%rbp			/* pt_regs->cx (stashed in bp) */
+	xorl	%ecx, %ecx		/* nospec   cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
 	xorl	%r8d, %r8d		/* nospec   r8 */
@@ -365,8 +368,11 @@ ENTRY(entry_INT80_compat)
 
 	pushq	(%rdi)			/* pt_regs->di */
 	pushq	%rsi			/* pt_regs->si */
+	xorl	%esi, %esi		/* nospec   si */
 	pushq	%rdx			/* pt_regs->dx */
+	xorl	%edx, %edx		/* nospec   dx */
 	pushq	%rcx			/* pt_regs->cx */
+	xorl	%ecx, %ecx		/* nospec   cx */
 	pushq	$-ENOSYS		/* pt_regs->ax */
 	pushq   $0			/* pt_regs->r8  = 0 */
 	xorl	%r8d, %r8d		/* nospec   r8 */
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 95c294963612..aa3336a7cb15 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -7,14 +7,23 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#ifdef CONFIG_IA32_EMULATION
+/* On X86_64, we use struct pt_regs * to pass parameters to syscalls */
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
+
+/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
+extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
+
+#else /* CONFIG_IA32_EMULATION */
+#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#endif /* CONFIG_IA32_EMULATION */
+
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
 #define __SYSCALL_I386(nr, sym, qual) [nr] = sym,
 
-extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
 __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] = {
 	/*
 	 * Smells like a compiler bug -- it doesn't work
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index c176d2fab1da..d5252bc1e380 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -7,14 +7,14 @@
 #include <asm/asm-offsets.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
+extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
+#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
 
 #define __SYSCALL_64(nr, sym, qual) [nr] = sym,
 
-extern long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
-
 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
 	 * Smells like a compiler bug -- it doesn't work
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c58f75b088c5..d6b27dab1b30 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -4,390 +4,395 @@
 # The format is:
 # <number> <abi> <name> <entry point> <compat entry point>
 #
+# The __ia32_sys and __ia32_compat_sys stubs are created on-the-fly for
+# sys_*() system calls and compat_sys_*() compat system calls if
+# IA32_EMULATION is defined, and expect struct pt_regs *regs as their only
+# parameter.
+#
 # The abi is always "i386" for this file.
 #
-0	i386	restart_syscall		sys_restart_syscall
-1	i386	exit			sys_exit
-2	i386	fork			sys_fork
-3	i386	read			sys_read
-4	i386	write			sys_write
-5	i386	open			sys_open			compat_sys_open
-6	i386	close			sys_close
-7	i386	waitpid			sys_waitpid
-8	i386	creat			sys_creat
-9	i386	link			sys_link
-10	i386	unlink			sys_unlink
-11	i386	execve			sys_execve			compat_sys_execve
-12	i386	chdir			sys_chdir
-13	i386	time			sys_time			compat_sys_time
-14	i386	mknod			sys_mknod
-15	i386	chmod			sys_chmod
-16	i386	lchown			sys_lchown16
+0	i386	restart_syscall		sys_restart_syscall		__ia32_sys_restart_syscall
+1	i386	exit			sys_exit			__ia32_sys_exit
+2	i386	fork			sys_fork			__ia32_sys_fork
+3	i386	read			sys_read			__ia32_sys_read
+4	i386	write			sys_write			__ia32_sys_write
+5	i386	open			sys_open			__ia32_compat_sys_open
+6	i386	close			sys_close			__ia32_sys_close
+7	i386	waitpid			sys_waitpid			__ia32_sys_waitpid
+8	i386	creat			sys_creat			__ia32_sys_creat
+9	i386	link			sys_link			__ia32_sys_link
+10	i386	unlink			sys_unlink			__ia32_sys_unlink
+11	i386	execve			sys_execve			__ia32_compat_sys_execve
+12	i386	chdir			sys_chdir			__ia32_sys_chdir
+13	i386	time			sys_time			__ia32_compat_sys_time
+14	i386	mknod			sys_mknod			__ia32_sys_mknod
+15	i386	chmod			sys_chmod			__ia32_sys_chmod
+16	i386	lchown			sys_lchown16			__ia32_sys_lchown16
 17	i386	break
-18	i386	oldstat			sys_stat
-19	i386	lseek			sys_lseek			compat_sys_lseek
-20	i386	getpid			sys_getpid
-21	i386	mount			sys_mount			compat_sys_mount
-22	i386	umount			sys_oldumount
-23	i386	setuid			sys_setuid16
-24	i386	getuid			sys_getuid16
-25	i386	stime			sys_stime			compat_sys_stime
-26	i386	ptrace			sys_ptrace			compat_sys_ptrace
-27	i386	alarm			sys_alarm
-28	i386	oldfstat		sys_fstat
-29	i386	pause			sys_pause
-30	i386	utime			sys_utime			compat_sys_utime
+18	i386	oldstat			sys_stat			__ia32_sys_stat
+19	i386	lseek			sys_lseek			__ia32_compat_sys_lseek
+20	i386	getpid			sys_getpid			__ia32_sys_getpid
+21	i386	mount			sys_mount			__ia32_compat_sys_mount
+22	i386	umount			sys_oldumount			__ia32_sys_oldumount
+23	i386	setuid			sys_setuid16			__ia32_sys_setuid16
+24	i386	getuid			sys_getuid16			__ia32_sys_getuid16
+25	i386	stime			sys_stime			__ia32_compat_sys_stime
+26	i386	ptrace			sys_ptrace			__ia32_compat_sys_ptrace
+27	i386	alarm			sys_alarm			__ia32_sys_alarm
+28	i386	oldfstat		sys_fstat			__ia32_sys_fstat
+29	i386	pause			sys_pause			__ia32_sys_pause
+30	i386	utime			sys_utime			__ia32_compat_sys_utime
 31	i386	stty
 32	i386	gtty
-33	i386	access			sys_access
-34	i386	nice			sys_nice
+33	i386	access			sys_access			__ia32_sys_access
+34	i386	nice			sys_nice			__ia32_sys_nice
 35	i386	ftime
-36	i386	sync			sys_sync
-37	i386	kill			sys_kill
-38	i386	rename			sys_rename
-39	i386	mkdir			sys_mkdir
-40	i386	rmdir			sys_rmdir
-41	i386	dup			sys_dup
-42	i386	pipe			sys_pipe
-43	i386	times			sys_times			compat_sys_times
+36	i386	sync			sys_sync			__ia32_sys_sync
+37	i386	kill			sys_kill			__ia32_sys_kill
+38	i386	rename			sys_rename			__ia32_sys_rename
+39	i386	mkdir			sys_mkdir			__ia32_sys_mkdir
+40	i386	rmdir			sys_rmdir			__ia32_sys_rmdir
+41	i386	dup			sys_dup				__ia32_sys_dup
+42	i386	pipe			sys_pipe			__ia32_sys_pipe
+43	i386	times			sys_times			__ia32_compat_sys_times
 44	i386	prof
-45	i386	brk			sys_brk
-46	i386	setgid			sys_setgid16
-47	i386	getgid			sys_getgid16
-48	i386	signal			sys_signal
-49	i386	geteuid			sys_geteuid16
-50	i386	getegid			sys_getegid16
-51	i386	acct			sys_acct
-52	i386	umount2			sys_umount
+45	i386	brk			sys_brk				__ia32_sys_brk
+46	i386	setgid			sys_setgid16			__ia32_sys_setgid16
+47	i386	getgid			sys_getgid16			__ia32_sys_getgid16
+48	i386	signal			sys_signal			__ia32_sys_signal
+49	i386	geteuid			sys_geteuid16			__ia32_sys_geteuid16
+50	i386	getegid			sys_getegid16			__ia32_sys_getegid16
+51	i386	acct			sys_acct			__ia32_sys_acct
+52	i386	umount2			sys_umount			__ia32_sys_umount
 53	i386	lock
-54	i386	ioctl			sys_ioctl			compat_sys_ioctl
-55	i386	fcntl			sys_fcntl			compat_sys_fcntl64
+54	i386	ioctl			sys_ioctl			__ia32_compat_sys_ioctl
+55	i386	fcntl			sys_fcntl			__ia32_compat_sys_fcntl64
 56	i386	mpx
-57	i386	setpgid			sys_setpgid
+57	i386	setpgid			sys_setpgid			__ia32_sys_setpgid
 58	i386	ulimit
-59	i386	oldolduname		sys_olduname
-60	i386	umask			sys_umask
-61	i386	chroot			sys_chroot
-62	i386	ustat			sys_ustat			compat_sys_ustat
-63	i386	dup2			sys_dup2
-64	i386	getppid			sys_getppid
-65	i386	getpgrp			sys_getpgrp
-66	i386	setsid			sys_setsid
-67	i386	sigaction		sys_sigaction			compat_sys_sigaction
-68	i386	sgetmask		sys_sgetmask
-69	i386	ssetmask		sys_ssetmask
-70	i386	setreuid		sys_setreuid16
-71	i386	setregid		sys_setregid16
-72	i386	sigsuspend		sys_sigsuspend
-73	i386	sigpending		sys_sigpending			compat_sys_sigpending
-74	i386	sethostname		sys_sethostname
-75	i386	setrlimit		sys_setrlimit			compat_sys_setrlimit
-76	i386	getrlimit		sys_old_getrlimit		compat_sys_old_getrlimit
-77	i386	getrusage		sys_getrusage			compat_sys_getrusage
-78	i386	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
-79	i386	settimeofday		sys_settimeofday		compat_sys_settimeofday
-80	i386	getgroups		sys_getgroups16
-81	i386	setgroups		sys_setgroups16
-82	i386	select			sys_old_select			compat_sys_old_select
-83	i386	symlink			sys_symlink
-84	i386	oldlstat		sys_lstat
-85	i386	readlink		sys_readlink
-86	i386	uselib			sys_uselib
-87	i386	swapon			sys_swapon
-88	i386	reboot			sys_reboot
-89	i386	readdir			sys_old_readdir			compat_sys_old_readdir
-90	i386	mmap			sys_old_mmap			compat_sys_x86_mmap
-91	i386	munmap			sys_munmap
-92	i386	truncate		sys_truncate			compat_sys_truncate
-93	i386	ftruncate		sys_ftruncate			compat_sys_ftruncate
-94	i386	fchmod			sys_fchmod
-95	i386	fchown			sys_fchown16
-96	i386	getpriority		sys_getpriority
-97	i386	setpriority		sys_setpriority
+59	i386	oldolduname		sys_olduname			__ia32_sys_olduname
+60	i386	umask			sys_umask			__ia32_sys_umask
+61	i386	chroot			sys_chroot			__ia32_sys_chroot
+62	i386	ustat			sys_ustat			__ia32_compat_sys_ustat
+63	i386	dup2			sys_dup2			__ia32_sys_dup2
+64	i386	getppid			sys_getppid			__ia32_sys_getppid
+65	i386	getpgrp			sys_getpgrp			__ia32_sys_getpgrp
+66	i386	setsid			sys_setsid			__ia32_sys_setsid
+67	i386	sigaction		sys_sigaction			__ia32_compat_sys_sigaction
+68	i386	sgetmask		sys_sgetmask			__ia32_sys_sgetmask
+69	i386	ssetmask		sys_ssetmask			__ia32_sys_ssetmask
+70	i386	setreuid		sys_setreuid16			__ia32_sys_setreuid16
+71	i386	setregid		sys_setregid16			__ia32_sys_setregid16
+72	i386	sigsuspend		sys_sigsuspend			__ia32_sys_sigsuspend
+73	i386	sigpending		sys_sigpending			__ia32_compat_sys_sigpending
+74	i386	sethostname		sys_sethostname			__ia32_sys_sethostname
+75	i386	setrlimit		sys_setrlimit			__ia32_compat_sys_setrlimit
+76	i386	getrlimit		sys_old_getrlimit		__ia32_compat_sys_old_getrlimit
+77	i386	getrusage		sys_getrusage			__ia32_compat_sys_getrusage
+78	i386	gettimeofday		sys_gettimeofday		__ia32_compat_sys_gettimeofday
+79	i386	settimeofday		sys_settimeofday		__ia32_compat_sys_settimeofday
+80	i386	getgroups		sys_getgroups16			__ia32_sys_getgroups16
+81	i386	setgroups		sys_setgroups16			__ia32_sys_setgroups16
+82	i386	select			sys_old_select			__ia32_compat_sys_old_select
+83	i386	symlink			sys_symlink			__ia32_sys_symlink
+84	i386	oldlstat		sys_lstat			__ia32_sys_lstat
+85	i386	readlink		sys_readlink			__ia32_sys_readlink
+86	i386	uselib			sys_uselib			__ia32_sys_uselib
+87	i386	swapon			sys_swapon			__ia32_sys_swapon
+88	i386	reboot			sys_reboot			__ia32_sys_reboot
+89	i386	readdir			sys_old_readdir			__ia32_compat_sys_old_readdir
+90	i386	mmap			sys_old_mmap			__ia32_compat_sys_x86_mmap
+91	i386	munmap			sys_munmap			__ia32_sys_munmap
+92	i386	truncate		sys_truncate			__ia32_compat_sys_truncate
+93	i386	ftruncate		sys_ftruncate			__ia32_compat_sys_ftruncate
+94	i386	fchmod			sys_fchmod			__ia32_sys_fchmod
+95	i386	fchown			sys_fchown16			__ia32_sys_fchown16
+96	i386	getpriority		sys_getpriority			__ia32_sys_getpriority
+97	i386	setpriority		sys_setpriority			__ia32_sys_setpriority
 98	i386	profil
-99	i386	statfs			sys_statfs			compat_sys_statfs
-100	i386	fstatfs			sys_fstatfs			compat_sys_fstatfs
-101	i386	ioperm			sys_ioperm
-102	i386	socketcall		sys_socketcall			compat_sys_socketcall
-103	i386	syslog			sys_syslog
-104	i386	setitimer		sys_setitimer			compat_sys_setitimer
-105	i386	getitimer		sys_getitimer			compat_sys_getitimer
-106	i386	stat			sys_newstat			compat_sys_newstat
-107	i386	lstat			sys_newlstat			compat_sys_newlstat
-108	i386	fstat			sys_newfstat			compat_sys_newfstat
-109	i386	olduname		sys_uname
-110	i386	iopl			sys_iopl
-111	i386	vhangup			sys_vhangup
+99	i386	statfs			sys_statfs			__ia32_compat_sys_statfs
+100	i386	fstatfs			sys_fstatfs			__ia32_compat_sys_fstatfs
+101	i386	ioperm			sys_ioperm			__ia32_sys_ioperm
+102	i386	socketcall		sys_socketcall			__ia32_compat_sys_socketcall
+103	i386	syslog			sys_syslog			__ia32_sys_syslog
+104	i386	setitimer		sys_setitimer			__ia32_compat_sys_setitimer
+105	i386	getitimer		sys_getitimer			__ia32_compat_sys_getitimer
+106	i386	stat			sys_newstat			__ia32_compat_sys_newstat
+107	i386	lstat			sys_newlstat			__ia32_compat_sys_newlstat
+108	i386	fstat			sys_newfstat			__ia32_compat_sys_newfstat
+109	i386	olduname		sys_uname			__ia32_sys_uname
+110	i386	iopl			sys_iopl			__ia32_sys_iopl
+111	i386	vhangup			sys_vhangup			__ia32_sys_vhangup
 112	i386	idle
 113	i386	vm86old			sys_vm86old			sys_ni_syscall
-114	i386	wait4			sys_wait4			compat_sys_wait4
-115	i386	swapoff			sys_swapoff
-116	i386	sysinfo			sys_sysinfo			compat_sys_sysinfo
-117	i386	ipc			sys_ipc				compat_sys_ipc
-118	i386	fsync			sys_fsync
+114	i386	wait4			sys_wait4			__ia32_compat_sys_wait4
+115	i386	swapoff			sys_swapoff			__ia32_sys_swapoff
+116	i386	sysinfo			sys_sysinfo			__ia32_compat_sys_sysinfo
+117	i386	ipc			sys_ipc				__ia32_compat_sys_ipc
+118	i386	fsync			sys_fsync			__ia32_sys_fsync
 119	i386	sigreturn		sys_sigreturn			sys32_sigreturn
-120	i386	clone			sys_clone			compat_sys_x86_clone
-121	i386	setdomainname		sys_setdomainname
-122	i386	uname			sys_newuname
-123	i386	modify_ldt		sys_modify_ldt
-124	i386	adjtimex		sys_adjtimex			compat_sys_adjtimex
-125	i386	mprotect		sys_mprotect
-126	i386	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+120	i386	clone			sys_clone			__ia32_compat_sys_x86_clone
+121	i386	setdomainname		sys_setdomainname		__ia32_sys_setdomainname
+122	i386	uname			sys_newuname			__ia32_sys_newuname
+123	i386	modify_ldt		sys_modify_ldt			__ia32_sys_modify_ldt
+124	i386	adjtimex		sys_adjtimex			__ia32_compat_sys_adjtimex
+125	i386	mprotect		sys_mprotect			__ia32_sys_mprotect
+126	i386	sigprocmask		sys_sigprocmask			__ia32_compat_sys_sigprocmask
 127	i386	create_module
-128	i386	init_module		sys_init_module
-129	i386	delete_module		sys_delete_module
+128	i386	init_module		sys_init_module			__ia32_sys_init_module
+129	i386	delete_module		sys_delete_module		__ia32_sys_delete_module
 130	i386	get_kernel_syms
-131	i386	quotactl		sys_quotactl			compat_sys_quotactl32
-132	i386	getpgid			sys_getpgid
-133	i386	fchdir			sys_fchdir
-134	i386	bdflush			sys_bdflush
-135	i386	sysfs			sys_sysfs
-136	i386	personality		sys_personality
+131	i386	quotactl		sys_quotactl			__ia32_compat_sys_quotactl32
+132	i386	getpgid			sys_getpgid			__ia32_sys_getpgid
+133	i386	fchdir			sys_fchdir			__ia32_sys_fchdir
+134	i386	bdflush			sys_bdflush			__ia32_sys_bdflush
+135	i386	sysfs			sys_sysfs			__ia32_sys_sysfs
+136	i386	personality		sys_personality			__ia32_sys_personality
 137	i386	afs_syscall
-138	i386	setfsuid		sys_setfsuid16
-139	i386	setfsgid		sys_setfsgid16
-140	i386	_llseek			sys_llseek
-141	i386	getdents		sys_getdents			compat_sys_getdents
-142	i386	_newselect		sys_select			compat_sys_select
-143	i386	flock			sys_flock
-144	i386	msync			sys_msync
-145	i386	readv			sys_readv			compat_sys_readv
-146	i386	writev			sys_writev			compat_sys_writev
-147	i386	getsid			sys_getsid
-148	i386	fdatasync		sys_fdatasync
-149	i386	_sysctl			sys_sysctl			compat_sys_sysctl
-150	i386	mlock			sys_mlock
-151	i386	munlock			sys_munlock
-152	i386	mlockall		sys_mlockall
-153	i386	munlockall		sys_munlockall
-154	i386	sched_setparam		sys_sched_setparam
-155	i386	sched_getparam		sys_sched_getparam
-156	i386	sched_setscheduler	sys_sched_setscheduler
-157	i386	sched_getscheduler	sys_sched_getscheduler
-158	i386	sched_yield		sys_sched_yield
-159	i386	sched_get_priority_max	sys_sched_get_priority_max
-160	i386	sched_get_priority_min	sys_sched_get_priority_min
-161	i386	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
-162	i386	nanosleep		sys_nanosleep			compat_sys_nanosleep
-163	i386	mremap			sys_mremap
-164	i386	setresuid		sys_setresuid16
-165	i386	getresuid		sys_getresuid16
+138	i386	setfsuid		sys_setfsuid16			__ia32_sys_setfsuid16
+139	i386	setfsgid		sys_setfsgid16			__ia32_sys_setfsgid16
+140	i386	_llseek			sys_llseek			__ia32_sys_llseek
+141	i386	getdents		sys_getdents			__ia32_compat_sys_getdents
+142	i386	_newselect		sys_select			__ia32_compat_sys_select
+143	i386	flock			sys_flock			__ia32_sys_flock
+144	i386	msync			sys_msync			__ia32_sys_msync
+145	i386	readv			sys_readv			__ia32_compat_sys_readv
+146	i386	writev			sys_writev			__ia32_compat_sys_writev
+147	i386	getsid			sys_getsid			__ia32_sys_getsid
+148	i386	fdatasync		sys_fdatasync			__ia32_sys_fdatasync
+149	i386	_sysctl			sys_sysctl			__ia32_compat_sys_sysctl
+150	i386	mlock			sys_mlock			__ia32_sys_mlock
+151	i386	munlock			sys_munlock			__ia32_sys_munlock
+152	i386	mlockall		sys_mlockall			__ia32_sys_mlockall
+153	i386	munlockall		sys_munlockall			__ia32_sys_munlockall
+154	i386	sched_setparam		sys_sched_setparam		__ia32_sys_sched_setparam
+155	i386	sched_getparam		sys_sched_getparam		__ia32_sys_sched_getparam
+156	i386	sched_setscheduler	sys_sched_setscheduler		__ia32_sys_sched_setscheduler
+157	i386	sched_getscheduler	sys_sched_getscheduler		__ia32_sys_sched_getscheduler
+158	i386	sched_yield		sys_sched_yield			__ia32_sys_sched_yield
+159	i386	sched_get_priority_max	sys_sched_get_priority_max	__ia32_sys_sched_get_priority_max
+160	i386	sched_get_priority_min	sys_sched_get_priority_min	__ia32_sys_sched_get_priority_min
+161	i386	sched_rr_get_interval	sys_sched_rr_get_interval	__ia32_compat_sys_sched_rr_get_interval
+162	i386	nanosleep		sys_nanosleep			__ia32_compat_sys_nanosleep
+163	i386	mremap			sys_mremap			__ia32_sys_mremap
+164	i386	setresuid		sys_setresuid16			__ia32_sys_setresuid16
+165	i386	getresuid		sys_getresuid16			__ia32_sys_getresuid16
 166	i386	vm86			sys_vm86			sys_ni_syscall
 167	i386	query_module
-168	i386	poll			sys_poll
+168	i386	poll			sys_poll			__ia32_sys_poll
 169	i386	nfsservctl
-170	i386	setresgid		sys_setresgid16
-171	i386	getresgid		sys_getresgid16
-172	i386	prctl			sys_prctl
+170	i386	setresgid		sys_setresgid16			__ia32_sys_setresgid16
+171	i386	getresgid		sys_getresgid16			__ia32_sys_getresgid16
+172	i386	prctl			sys_prctl			__ia32_sys_prctl
 173	i386	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
-174	i386	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
-175	i386	rt_sigprocmask		sys_rt_sigprocmask
-176	i386	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
-178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
-179	i386	rt_sigsuspend		sys_rt_sigsuspend
-180	i386	pread64			sys_pread64			compat_sys_x86_pread
-181	i386	pwrite64		sys_pwrite64			compat_sys_x86_pwrite
-182	i386	chown			sys_chown16
-183	i386	getcwd			sys_getcwd
-184	i386	capget			sys_capget
-185	i386	capset			sys_capset
-186	i386	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
-187	i386	sendfile		sys_sendfile			compat_sys_sendfile
+174	i386	rt_sigaction		sys_rt_sigaction		__ia32_compat_sys_rt_sigaction
+175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_sys_rt_sigprocmask
+176	i386	rt_sigpending		sys_rt_sigpending		__ia32_compat_sys_rt_sigpending
+177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		__ia32_compat_sys_rt_sigtimedwait
+178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		__ia32_compat_sys_rt_sigqueueinfo
+179	i386	rt_sigsuspend		sys_rt_sigsuspend		__ia32_sys_rt_sigsuspend
+180	i386	pread64			sys_pread64			__ia32_compat_sys_x86_pread
+181	i386	pwrite64		sys_pwrite64			__ia32_compat_sys_x86_pwrite
+182	i386	chown			sys_chown16			__ia32_sys_chown16
+183	i386	getcwd			sys_getcwd			__ia32_sys_getcwd
+184	i386	capget			sys_capget			__ia32_sys_capget
+185	i386	capset			sys_capset			__ia32_sys_capset
+186	i386	sigaltstack		sys_sigaltstack			__ia32_compat_sys_sigaltstack
+187	i386	sendfile		sys_sendfile			__ia32_compat_sys_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
-190	i386	vfork			sys_vfork
-191	i386	ugetrlimit		sys_getrlimit			compat_sys_getrlimit
-192	i386	mmap2			sys_mmap_pgoff
-193	i386	truncate64		sys_truncate64			compat_sys_x86_truncate64
-194	i386	ftruncate64		sys_ftruncate64			compat_sys_x86_ftruncate64
-195	i386	stat64			sys_stat64			compat_sys_x86_stat64
-196	i386	lstat64			sys_lstat64			compat_sys_x86_lstat64
-197	i386	fstat64			sys_fstat64			compat_sys_x86_fstat64
-198	i386	lchown32		sys_lchown
-199	i386	getuid32		sys_getuid
-200	i386	getgid32		sys_getgid
-201	i386	geteuid32		sys_geteuid
-202	i386	getegid32		sys_getegid
-203	i386	setreuid32		sys_setreuid
-204	i386	setregid32		sys_setregid
-205	i386	getgroups32		sys_getgroups
-206	i386	setgroups32		sys_setgroups
-207	i386	fchown32		sys_fchown
-208	i386	setresuid32		sys_setresuid
-209	i386	getresuid32		sys_getresuid
-210	i386	setresgid32		sys_setresgid
-211	i386	getresgid32		sys_getresgid
-212	i386	chown32			sys_chown
-213	i386	setuid32		sys_setuid
-214	i386	setgid32		sys_setgid
-215	i386	setfsuid32		sys_setfsuid
-216	i386	setfsgid32		sys_setfsgid
-217	i386	pivot_root		sys_pivot_root
-218	i386	mincore			sys_mincore
-219	i386	madvise			sys_madvise
-220	i386	getdents64		sys_getdents64
-221	i386	fcntl64			sys_fcntl64			compat_sys_fcntl64
+190	i386	vfork			sys_vfork			__ia32_sys_vfork
+191	i386	ugetrlimit		sys_getrlimit			__ia32_compat_sys_getrlimit
+192	i386	mmap2			sys_mmap_pgoff			__ia32_sys_mmap_pgoff
+193	i386	truncate64		sys_truncate64			__ia32_compat_sys_x86_truncate64
+194	i386	ftruncate64		sys_ftruncate64			__ia32_compat_sys_x86_ftruncate64
+195	i386	stat64			sys_stat64			__ia32_compat_sys_x86_stat64
+196	i386	lstat64			sys_lstat64			__ia32_compat_sys_x86_lstat64
+197	i386	fstat64			sys_fstat64			__ia32_compat_sys_x86_fstat64
+198	i386	lchown32		sys_lchown			__ia32_sys_lchown
+199	i386	getuid32		sys_getuid			__ia32_sys_getuid
+200	i386	getgid32		sys_getgid			__ia32_sys_getgid
+201	i386	geteuid32		sys_geteuid			__ia32_sys_geteuid
+202	i386	getegid32		sys_getegid			__ia32_sys_getegid
+203	i386	setreuid32		sys_setreuid			__ia32_sys_setreuid
+204	i386	setregid32		sys_setregid			__ia32_sys_setregid
+205	i386	getgroups32		sys_getgroups			__ia32_sys_getgroups
+206	i386	setgroups32		sys_setgroups			__ia32_sys_setgroups
+207	i386	fchown32		sys_fchown			__ia32_sys_fchown
+208	i386	setresuid32		sys_setresuid			__ia32_sys_setresuid
+209	i386	getresuid32		sys_getresuid			__ia32_sys_getresuid
+210	i386	setresgid32		sys_setresgid			__ia32_sys_setresgid
+211	i386	getresgid32		sys_getresgid			__ia32_sys_getresgid
+212	i386	chown32			sys_chown			__ia32_sys_chown
+213	i386	setuid32		sys_setuid			__ia32_sys_setuid
+214	i386	setgid32		sys_setgid			__ia32_sys_setgid
+215	i386	setfsuid32		sys_setfsuid			__ia32_sys_setfsuid
+216	i386	setfsgid32		sys_setfsgid			__ia32_sys_setfsgid
+217	i386	pivot_root		sys_pivot_root			__ia32_sys_pivot_root
+218	i386	mincore			sys_mincore			__ia32_sys_mincore
+219	i386	madvise			sys_madvise			__ia32_sys_madvise
+220	i386	getdents64		sys_getdents64			__ia32_sys_getdents64
+221	i386	fcntl64			sys_fcntl64			__ia32_compat_sys_fcntl64
 # 222 is unused
 # 223 is unused
-224	i386	gettid			sys_gettid
-225	i386	readahead		sys_readahead			compat_sys_x86_readahead
-226	i386	setxattr		sys_setxattr
-227	i386	lsetxattr		sys_lsetxattr
-228	i386	fsetxattr		sys_fsetxattr
-229	i386	getxattr		sys_getxattr
-230	i386	lgetxattr		sys_lgetxattr
-231	i386	fgetxattr		sys_fgetxattr
-232	i386	listxattr		sys_listxattr
-233	i386	llistxattr		sys_llistxattr
-234	i386	flistxattr		sys_flistxattr
-235	i386	removexattr		sys_removexattr
-236	i386	lremovexattr		sys_lremovexattr
-237	i386	fremovexattr		sys_fremovexattr
-238	i386	tkill			sys_tkill
-239	i386	sendfile64		sys_sendfile64
-240	i386	futex			sys_futex			compat_sys_futex
-241	i386	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
-242	i386	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
-243	i386	set_thread_area		sys_set_thread_area
-244	i386	get_thread_area		sys_get_thread_area
-245	i386	io_setup		sys_io_setup			compat_sys_io_setup
-246	i386	io_destroy		sys_io_destroy
-247	i386	io_getevents		sys_io_getevents		compat_sys_io_getevents
-248	i386	io_submit		sys_io_submit			compat_sys_io_submit
-249	i386	io_cancel		sys_io_cancel
-250	i386	fadvise64		sys_fadvise64			compat_sys_x86_fadvise64
+224	i386	gettid			sys_gettid			__ia32_sys_gettid
+225	i386	readahead		sys_readahead			__ia32_compat_sys_x86_readahead
+226	i386	setxattr		sys_setxattr			__ia32_sys_setxattr
+227	i386	lsetxattr		sys_lsetxattr			__ia32_sys_lsetxattr
+228	i386	fsetxattr		sys_fsetxattr			__ia32_sys_fsetxattr
+229	i386	getxattr		sys_getxattr			__ia32_sys_getxattr
+230	i386	lgetxattr		sys_lgetxattr			__ia32_sys_lgetxattr
+231	i386	fgetxattr		sys_fgetxattr			__ia32_sys_fgetxattr
+232	i386	listxattr		sys_listxattr			__ia32_sys_listxattr
+233	i386	llistxattr		sys_llistxattr			__ia32_sys_llistxattr
+234	i386	flistxattr		sys_flistxattr			__ia32_sys_flistxattr
+235	i386	removexattr		sys_removexattr			__ia32_sys_removexattr
+236	i386	lremovexattr		sys_lremovexattr		__ia32_sys_lremovexattr
+237	i386	fremovexattr		sys_fremovexattr		__ia32_sys_fremovexattr
+238	i386	tkill			sys_tkill			__ia32_sys_tkill
+239	i386	sendfile64		sys_sendfile64			__ia32_sys_sendfile64
+240	i386	futex			sys_futex			__ia32_compat_sys_futex
+241	i386	sched_setaffinity	sys_sched_setaffinity		__ia32_compat_sys_sched_setaffinity
+242	i386	sched_getaffinity	sys_sched_getaffinity		__ia32_compat_sys_sched_getaffinity
+243	i386	set_thread_area		sys_set_thread_area		__ia32_sys_set_thread_area
+244	i386	get_thread_area		sys_get_thread_area		__ia32_sys_get_thread_area
+245	i386	io_setup		sys_io_setup			__ia32_compat_sys_io_setup
+246	i386	io_destroy		sys_io_destroy			__ia32_sys_io_destroy
+247	i386	io_getevents		sys_io_getevents		__ia32_compat_sys_io_getevents
+248	i386	io_submit		sys_io_submit			__ia32_compat_sys_io_submit
+249	i386	io_cancel		sys_io_cancel			__ia32_sys_io_cancel
+250	i386	fadvise64		sys_fadvise64			__ia32_compat_sys_x86_fadvise64
 # 251 is available for reuse (was briefly sys_set_zone_reclaim)
-252	i386	exit_group		sys_exit_group
-253	i386	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
-254	i386	epoll_create		sys_epoll_create
-255	i386	epoll_ctl		sys_epoll_ctl
-256	i386	epoll_wait		sys_epoll_wait
-257	i386	remap_file_pages	sys_remap_file_pages
-258	i386	set_tid_address		sys_set_tid_address
-259	i386	timer_create		sys_timer_create		compat_sys_timer_create
-260	i386	timer_settime		sys_timer_settime		compat_sys_timer_settime
-261	i386	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
-262	i386	timer_getoverrun	sys_timer_getoverrun
-263	i386	timer_delete		sys_timer_delete
-264	i386	clock_settime		sys_clock_settime		compat_sys_clock_settime
-265	i386	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
-266	i386	clock_getres		sys_clock_getres		compat_sys_clock_getres
-267	i386	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
-268	i386	statfs64		sys_statfs64			compat_sys_statfs64
-269	i386	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
-270	i386	tgkill			sys_tgkill
-271	i386	utimes			sys_utimes			compat_sys_utimes
-272	i386	fadvise64_64		sys_fadvise64_64		compat_sys_x86_fadvise64_64
+252	i386	exit_group		sys_exit_group			__ia32_sys_exit_group
+253	i386	lookup_dcookie		sys_lookup_dcookie		__ia32_compat_sys_lookup_dcookie
+254	i386	epoll_create		sys_epoll_create		__ia32_sys_epoll_create
+255	i386	epoll_ctl		sys_epoll_ctl			__ia32_sys_epoll_ctl
+256	i386	epoll_wait		sys_epoll_wait			__ia32_sys_epoll_wait
+257	i386	remap_file_pages	sys_remap_file_pages		__ia32_sys_remap_file_pages
+258	i386	set_tid_address		sys_set_tid_address		__ia32_sys_set_tid_address
+259	i386	timer_create		sys_timer_create		__ia32_compat_sys_timer_create
+260	i386	timer_settime		sys_timer_settime		__ia32_compat_sys_timer_settime
+261	i386	timer_gettime		sys_timer_gettime		__ia32_compat_sys_timer_gettime
+262	i386	timer_getoverrun	sys_timer_getoverrun		__ia32_sys_timer_getoverrun
+263	i386	timer_delete		sys_timer_delete		__ia32_sys_timer_delete
+264	i386	clock_settime		sys_clock_settime		__ia32_compat_sys_clock_settime
+265	i386	clock_gettime		sys_clock_gettime		__ia32_compat_sys_clock_gettime
+266	i386	clock_getres		sys_clock_getres		__ia32_compat_sys_clock_getres
+267	i386	clock_nanosleep		sys_clock_nanosleep		__ia32_compat_sys_clock_nanosleep
+268	i386	statfs64		sys_statfs64			__ia32_compat_sys_statfs64
+269	i386	fstatfs64		sys_fstatfs64			__ia32_compat_sys_fstatfs64
+270	i386	tgkill			sys_tgkill			__ia32_sys_tgkill
+271	i386	utimes			sys_utimes			__ia32_compat_sys_utimes
+272	i386	fadvise64_64		sys_fadvise64_64		__ia32_compat_sys_x86_fadvise64_64
 273	i386	vserver
-274	i386	mbind			sys_mbind
-275	i386	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
-276	i386	set_mempolicy		sys_set_mempolicy
-277	i386	mq_open			sys_mq_open			compat_sys_mq_open
-278	i386	mq_unlink		sys_mq_unlink
-279	i386	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
-280	i386	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
-281	i386	mq_notify		sys_mq_notify			compat_sys_mq_notify
-282	i386	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
-283	i386	kexec_load		sys_kexec_load			compat_sys_kexec_load
-284	i386	waitid			sys_waitid			compat_sys_waitid
+274	i386	mbind			sys_mbind			__ia32_sys_mbind
+275	i386	get_mempolicy		sys_get_mempolicy		__ia32_compat_sys_get_mempolicy
+276	i386	set_mempolicy		sys_set_mempolicy		__ia32_sys_set_mempolicy
+277	i386	mq_open			sys_mq_open			__ia32_compat_sys_mq_open
+278	i386	mq_unlink		sys_mq_unlink			__ia32_sys_mq_unlink
+279	i386	mq_timedsend		sys_mq_timedsend		__ia32_compat_sys_mq_timedsend
+280	i386	mq_timedreceive		sys_mq_timedreceive		__ia32_compat_sys_mq_timedreceive
+281	i386	mq_notify		sys_mq_notify			__ia32_compat_sys_mq_notify
+282	i386	mq_getsetattr		sys_mq_getsetattr		__ia32_compat_sys_mq_getsetattr
+283	i386	kexec_load		sys_kexec_load			__ia32_compat_sys_kexec_load
+284	i386	waitid			sys_waitid			__ia32_compat_sys_waitid
 # 285 sys_setaltroot
-286	i386	add_key			sys_add_key
-287	i386	request_key		sys_request_key
-288	i386	keyctl			sys_keyctl			compat_sys_keyctl
-289	i386	ioprio_set		sys_ioprio_set
-290	i386	ioprio_get		sys_ioprio_get
-291	i386	inotify_init		sys_inotify_init
-292	i386	inotify_add_watch	sys_inotify_add_watch
-293	i386	inotify_rm_watch	sys_inotify_rm_watch
-294	i386	migrate_pages		sys_migrate_pages
-295	i386	openat			sys_openat			compat_sys_openat
-296	i386	mkdirat			sys_mkdirat
-297	i386	mknodat			sys_mknodat
-298	i386	fchownat		sys_fchownat
-299	i386	futimesat		sys_futimesat			compat_sys_futimesat
-300	i386	fstatat64		sys_fstatat64			compat_sys_x86_fstatat
-301	i386	unlinkat		sys_unlinkat
-302	i386	renameat		sys_renameat
-303	i386	linkat			sys_linkat
-304	i386	symlinkat		sys_symlinkat
-305	i386	readlinkat		sys_readlinkat
-306	i386	fchmodat		sys_fchmodat
-307	i386	faccessat		sys_faccessat
-308	i386	pselect6		sys_pselect6			compat_sys_pselect6
-309	i386	ppoll			sys_ppoll			compat_sys_ppoll
-310	i386	unshare			sys_unshare
-311	i386	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
-312	i386	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
-313	i386	splice			sys_splice
-314	i386	sync_file_range		sys_sync_file_range		compat_sys_x86_sync_file_range
-315	i386	tee			sys_tee
-316	i386	vmsplice		sys_vmsplice			compat_sys_vmsplice
-317	i386	move_pages		sys_move_pages			compat_sys_move_pages
-318	i386	getcpu			sys_getcpu
-319	i386	epoll_pwait		sys_epoll_pwait
-320	i386	utimensat		sys_utimensat			compat_sys_utimensat
-321	i386	signalfd		sys_signalfd			compat_sys_signalfd
-322	i386	timerfd_create		sys_timerfd_create
-323	i386	eventfd			sys_eventfd
-324	i386	fallocate		sys_fallocate			compat_sys_x86_fallocate
-325	i386	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
-326	i386	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
-327	i386	signalfd4		sys_signalfd4			compat_sys_signalfd4
-328	i386	eventfd2		sys_eventfd2
-329	i386	epoll_create1		sys_epoll_create1
-330	i386	dup3			sys_dup3
-331	i386	pipe2			sys_pipe2
-332	i386	inotify_init1		sys_inotify_init1
-333	i386	preadv			sys_preadv			compat_sys_preadv
-334	i386	pwritev			sys_pwritev			compat_sys_pwritev
-335	i386	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
-336	i386	perf_event_open		sys_perf_event_open
-337	i386	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
-338	i386	fanotify_init		sys_fanotify_init
-339	i386	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
-340	i386	prlimit64		sys_prlimit64
-341	i386	name_to_handle_at	sys_name_to_handle_at
-342	i386	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-343	i386	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
-344	i386	syncfs			sys_syncfs
-345	i386	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
-346	i386	setns			sys_setns
-347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
-349	i386	kcmp			sys_kcmp
-350	i386	finit_module		sys_finit_module
-351	i386	sched_setattr		sys_sched_setattr
-352	i386	sched_getattr		sys_sched_getattr
-353	i386	renameat2		sys_renameat2
-354	i386	seccomp			sys_seccomp
-355	i386	getrandom		sys_getrandom
-356	i386	memfd_create		sys_memfd_create
-357	i386	bpf			sys_bpf
-358	i386	execveat		sys_execveat			compat_sys_execveat
-359	i386	socket			sys_socket
-360	i386	socketpair		sys_socketpair
-361	i386	bind			sys_bind
-362	i386	connect			sys_connect
-363	i386	listen			sys_listen
-364	i386	accept4			sys_accept4
-365	i386	getsockopt		sys_getsockopt			compat_sys_getsockopt
-366	i386	setsockopt		sys_setsockopt			compat_sys_setsockopt
-367	i386	getsockname		sys_getsockname
-368	i386	getpeername		sys_getpeername
-369	i386	sendto			sys_sendto
-370	i386	sendmsg			sys_sendmsg			compat_sys_sendmsg
-371	i386	recvfrom		sys_recvfrom			compat_sys_recvfrom
-372	i386	recvmsg			sys_recvmsg			compat_sys_recvmsg
-373	i386	shutdown		sys_shutdown
-374	i386	userfaultfd		sys_userfaultfd
-375	i386	membarrier		sys_membarrier
-376	i386	mlock2			sys_mlock2
-377	i386	copy_file_range		sys_copy_file_range
-378	i386	preadv2			sys_preadv2			compat_sys_preadv2
-379	i386	pwritev2		sys_pwritev2			compat_sys_pwritev2
-380	i386	pkey_mprotect		sys_pkey_mprotect
-381	i386	pkey_alloc		sys_pkey_alloc
-382	i386	pkey_free		sys_pkey_free
-383	i386	statx			sys_statx
-384	i386	arch_prctl		sys_arch_prctl			compat_sys_arch_prctl
+286	i386	add_key			sys_add_key			__ia32_sys_add_key
+287	i386	request_key		sys_request_key			__ia32_sys_request_key
+288	i386	keyctl			sys_keyctl			__ia32_compat_sys_keyctl
+289	i386	ioprio_set		sys_ioprio_set			__ia32_sys_ioprio_set
+290	i386	ioprio_get		sys_ioprio_get			__ia32_sys_ioprio_get
+291	i386	inotify_init		sys_inotify_init		__ia32_sys_inotify_init
+292	i386	inotify_add_watch	sys_inotify_add_watch		__ia32_sys_inotify_add_watch
+293	i386	inotify_rm_watch	sys_inotify_rm_watch		__ia32_sys_inotify_rm_watch
+294	i386	migrate_pages		sys_migrate_pages		__ia32_sys_migrate_pages
+295	i386	openat			sys_openat			__ia32_compat_sys_openat
+296	i386	mkdirat			sys_mkdirat			__ia32_sys_mkdirat
+297	i386	mknodat			sys_mknodat			__ia32_sys_mknodat
+298	i386	fchownat		sys_fchownat			__ia32_sys_fchownat
+299	i386	futimesat		sys_futimesat			__ia32_compat_sys_futimesat
+300	i386	fstatat64		sys_fstatat64			__ia32_compat_sys_x86_fstatat
+301	i386	unlinkat		sys_unlinkat			__ia32_sys_unlinkat
+302	i386	renameat		sys_renameat			__ia32_sys_renameat
+303	i386	linkat			sys_linkat			__ia32_sys_linkat
+304	i386	symlinkat		sys_symlinkat			__ia32_sys_symlinkat
+305	i386	readlinkat		sys_readlinkat			__ia32_sys_readlinkat
+306	i386	fchmodat		sys_fchmodat			__ia32_sys_fchmodat
+307	i386	faccessat		sys_faccessat			__ia32_sys_faccessat
+308	i386	pselect6		sys_pselect6			__ia32_compat_sys_pselect6
+309	i386	ppoll			sys_ppoll			__ia32_compat_sys_ppoll
+310	i386	unshare			sys_unshare			__ia32_sys_unshare
+311	i386	set_robust_list		sys_set_robust_list		__ia32_compat_sys_set_robust_list
+312	i386	get_robust_list		sys_get_robust_list		__ia32_compat_sys_get_robust_list
+313	i386	splice			sys_splice			__ia32_sys_splice
+314	i386	sync_file_range		sys_sync_file_range		__ia32_compat_sys_x86_sync_file_range
+315	i386	tee			sys_tee				__ia32_sys_tee
+316	i386	vmsplice		sys_vmsplice			__ia32_compat_sys_vmsplice
+317	i386	move_pages		sys_move_pages			__ia32_compat_sys_move_pages
+318	i386	getcpu			sys_getcpu			__ia32_sys_getcpu
+319	i386	epoll_pwait		sys_epoll_pwait			__ia32_sys_epoll_pwait
+320	i386	utimensat		sys_utimensat			__ia32_compat_sys_utimensat
+321	i386	signalfd		sys_signalfd			__ia32_compat_sys_signalfd
+322	i386	timerfd_create		sys_timerfd_create		__ia32_sys_timerfd_create
+323	i386	eventfd			sys_eventfd			__ia32_sys_eventfd
+324	i386	fallocate		sys_fallocate			__ia32_compat_sys_x86_fallocate
+325	i386	timerfd_settime		sys_timerfd_settime		__ia32_compat_sys_timerfd_settime
+326	i386	timerfd_gettime		sys_timerfd_gettime		__ia32_compat_sys_timerfd_gettime
+327	i386	signalfd4		sys_signalfd4			__ia32_compat_sys_signalfd4
+328	i386	eventfd2		sys_eventfd2			__ia32_sys_eventfd2
+329	i386	epoll_create1		sys_epoll_create1		__ia32_sys_epoll_create1
+330	i386	dup3			sys_dup3			__ia32_sys_dup3
+331	i386	pipe2			sys_pipe2			__ia32_sys_pipe2
+332	i386	inotify_init1		sys_inotify_init1		__ia32_sys_inotify_init1
+333	i386	preadv			sys_preadv			__ia32_compat_sys_preadv
+334	i386	pwritev			sys_pwritev			__ia32_compat_sys_pwritev
+335	i386	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		__ia32_compat_sys_rt_tgsigqueueinfo
+336	i386	perf_event_open		sys_perf_event_open		__ia32_sys_perf_event_open
+337	i386	recvmmsg		sys_recvmmsg			__ia32_compat_sys_recvmmsg
+338	i386	fanotify_init		sys_fanotify_init		__ia32_sys_fanotify_init
+339	i386	fanotify_mark		sys_fanotify_mark		__ia32_compat_sys_fanotify_mark
+340	i386	prlimit64		sys_prlimit64			__ia32_sys_prlimit64
+341	i386	name_to_handle_at	sys_name_to_handle_at		__ia32_sys_name_to_handle_at
+342	i386	open_by_handle_at	sys_open_by_handle_at		__ia32_compat_sys_open_by_handle_at
+343	i386	clock_adjtime		sys_clock_adjtime		__ia32_compat_sys_clock_adjtime
+344	i386	syncfs			sys_syncfs			__ia32_sys_syncfs
+345	i386	sendmmsg		sys_sendmmsg			__ia32_compat_sys_sendmmsg
+346	i386	setns			sys_setns			__ia32_sys_setns
+347	i386	process_vm_readv	sys_process_vm_readv		__ia32_compat_sys_process_vm_readv
+348	i386	process_vm_writev	sys_process_vm_writev		__ia32_compat_sys_process_vm_writev
+349	i386	kcmp			sys_kcmp			__ia32_sys_kcmp
+350	i386	finit_module		sys_finit_module		__ia32_sys_finit_module
+351	i386	sched_setattr		sys_sched_setattr		__ia32_sys_sched_setattr
+352	i386	sched_getattr		sys_sched_getattr		__ia32_sys_sched_getattr
+353	i386	renameat2		sys_renameat2			__ia32_sys_renameat2
+354	i386	seccomp			sys_seccomp			__ia32_sys_seccomp
+355	i386	getrandom		sys_getrandom			__ia32_sys_getrandom
+356	i386	memfd_create		sys_memfd_create		__ia32_sys_memfd_create
+357	i386	bpf			sys_bpf				__ia32_sys_bpf
+358	i386	execveat		sys_execveat			__ia32_compat_sys_execveat
+359	i386	socket			sys_socket			__ia32_sys_socket
+360	i386	socketpair		sys_socketpair			__ia32_sys_socketpair
+361	i386	bind			sys_bind			__ia32_sys_bind
+362	i386	connect			sys_connect			__ia32_sys_connect
+363	i386	listen			sys_listen			__ia32_sys_listen
+364	i386	accept4			sys_accept4			__ia32_sys_accept4
+365	i386	getsockopt		sys_getsockopt			__ia32_compat_sys_getsockopt
+366	i386	setsockopt		sys_setsockopt			__ia32_compat_sys_setsockopt
+367	i386	getsockname		sys_getsockname			__ia32_sys_getsockname
+368	i386	getpeername		sys_getpeername			__ia32_sys_getpeername
+369	i386	sendto			sys_sendto			__ia32_sys_sendto
+370	i386	sendmsg			sys_sendmsg			__ia32_compat_sys_sendmsg
+371	i386	recvfrom		sys_recvfrom			__ia32_compat_sys_recvfrom
+372	i386	recvmsg			sys_recvmsg			__ia32_compat_sys_recvmsg
+373	i386	shutdown		sys_shutdown			__ia32_sys_shutdown
+374	i386	userfaultfd		sys_userfaultfd			__ia32_sys_userfaultfd
+375	i386	membarrier		sys_membarrier			__ia32_sys_membarrier
+376	i386	mlock2			sys_mlock2			__ia32_sys_mlock2
+377	i386	copy_file_range		sys_copy_file_range		__ia32_sys_copy_file_range
+378	i386	preadv2			sys_preadv2			__ia32_compat_sys_preadv2
+379	i386	pwritev2		sys_pwritev2			__ia32_compat_sys_pwritev2
+380	i386	pkey_mprotect		sys_pkey_mprotect		__ia32_sys_pkey_mprotect
+381	i386	pkey_alloc		sys_pkey_alloc			__ia32_sys_pkey_alloc
+382	i386	pkey_free		sys_pkey_free			__ia32_sys_pkey_free
+383	i386	statx			sys_statx			__ia32_sys_statx
+384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 5aef183e2f85..4dfe42666d0c 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -4,379 +4,383 @@
 # The format is:
 # <number> <abi> <name> <entry point>
 #
+# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
+#
 # The abi is "common", "64" or "x32" for this file.
 #
-0	common	read			sys_read
-1	common	write			sys_write
-2	common	open			sys_open
-3	common	close			sys_close
-4	common	stat			sys_newstat
-5	common	fstat			sys_newfstat
-6	common	lstat			sys_newlstat
-7	common	poll			sys_poll
-8	common	lseek			sys_lseek
-9	common	mmap			sys_mmap
-10	common	mprotect		sys_mprotect
-11	common	munmap			sys_munmap
-12	common	brk			sys_brk
-13	64	rt_sigaction		sys_rt_sigaction
-14	common	rt_sigprocmask		sys_rt_sigprocmask
-15	64	rt_sigreturn		sys_rt_sigreturn/ptregs
-16	64	ioctl			sys_ioctl
-17	common	pread64			sys_pread64
-18	common	pwrite64		sys_pwrite64
-19	64	readv			sys_readv
-20	64	writev			sys_writev
-21	common	access			sys_access
-22	common	pipe			sys_pipe
-23	common	select			sys_select
-24	common	sched_yield		sys_sched_yield
-25	common	mremap			sys_mremap
-26	common	msync			sys_msync
-27	common	mincore			sys_mincore
-28	common	madvise			sys_madvise
-29	common	shmget			sys_shmget
-30	common	shmat			sys_shmat
-31	common	shmctl			sys_shmctl
-32	common	dup			sys_dup
-33	common	dup2			sys_dup2
-34	common	pause			sys_pause
-35	common	nanosleep		sys_nanosleep
-36	common	getitimer		sys_getitimer
-37	common	alarm			sys_alarm
-38	common	setitimer		sys_setitimer
-39	common	getpid			sys_getpid
-40	common	sendfile		sys_sendfile64
-41	common	socket			sys_socket
-42	common	connect			sys_connect
-43	common	accept			sys_accept
-44	common	sendto			sys_sendto
-45	64	recvfrom		sys_recvfrom
-46	64	sendmsg			sys_sendmsg
-47	64	recvmsg			sys_recvmsg
-48	common	shutdown		sys_shutdown
-49	common	bind			sys_bind
-50	common	listen			sys_listen
-51	common	getsockname		sys_getsockname
-52	common	getpeername		sys_getpeername
-53	common	socketpair		sys_socketpair
-54	64	setsockopt		sys_setsockopt
-55	64	getsockopt		sys_getsockopt
-56	common	clone			sys_clone/ptregs
-57	common	fork			sys_fork/ptregs
-58	common	vfork			sys_vfork/ptregs
-59	64	execve			sys_execve/ptregs
-60	common	exit			sys_exit
-61	common	wait4			sys_wait4
-62	common	kill			sys_kill
-63	common	uname			sys_newuname
-64	common	semget			sys_semget
-65	common	semop			sys_semop
-66	common	semctl			sys_semctl
-67	common	shmdt			sys_shmdt
-68	common	msgget			sys_msgget
-69	common	msgsnd			sys_msgsnd
-70	common	msgrcv			sys_msgrcv
-71	common	msgctl			sys_msgctl
-72	common	fcntl			sys_fcntl
-73	common	flock			sys_flock
-74	common	fsync			sys_fsync
-75	common	fdatasync		sys_fdatasync
-76	common	truncate		sys_truncate
-77	common	ftruncate		sys_ftruncate
-78	common	getdents		sys_getdents
-79	common	getcwd			sys_getcwd
-80	common	chdir			sys_chdir
-81	common	fchdir			sys_fchdir
-82	common	rename			sys_rename
-83	common	mkdir			sys_mkdir
-84	common	rmdir			sys_rmdir
-85	common	creat			sys_creat
-86	common	link			sys_link
-87	common	unlink			sys_unlink
-88	common	symlink			sys_symlink
-89	common	readlink		sys_readlink
-90	common	chmod			sys_chmod
-91	common	fchmod			sys_fchmod
-92	common	chown			sys_chown
-93	common	fchown			sys_fchown
-94	common	lchown			sys_lchown
-95	common	umask			sys_umask
-96	common	gettimeofday		sys_gettimeofday
-97	common	getrlimit		sys_getrlimit
-98	common	getrusage		sys_getrusage
-99	common	sysinfo			sys_sysinfo
-100	common	times			sys_times
-101	64	ptrace			sys_ptrace
-102	common	getuid			sys_getuid
-103	common	syslog			sys_syslog
-104	common	getgid			sys_getgid
-105	common	setuid			sys_setuid
-106	common	setgid			sys_setgid
-107	common	geteuid			sys_geteuid
-108	common	getegid			sys_getegid
-109	common	setpgid			sys_setpgid
-110	common	getppid			sys_getppid
-111	common	getpgrp			sys_getpgrp
-112	common	setsid			sys_setsid
-113	common	setreuid		sys_setreuid
-114	common	setregid		sys_setregid
-115	common	getgroups		sys_getgroups
-116	common	setgroups		sys_setgroups
-117	common	setresuid		sys_setresuid
-118	common	getresuid		sys_getresuid
-119	common	setresgid		sys_setresgid
-120	common	getresgid		sys_getresgid
-121	common	getpgid			sys_getpgid
-122	common	setfsuid		sys_setfsuid
-123	common	setfsgid		sys_setfsgid
-124	common	getsid			sys_getsid
-125	common	capget			sys_capget
-126	common	capset			sys_capset
-127	64	rt_sigpending		sys_rt_sigpending
-128	64	rt_sigtimedwait		sys_rt_sigtimedwait
-129	64	rt_sigqueueinfo		sys_rt_sigqueueinfo
-130	common	rt_sigsuspend		sys_rt_sigsuspend
-131	64	sigaltstack		sys_sigaltstack
-132	common	utime			sys_utime
-133	common	mknod			sys_mknod
+0	common	read			__x64_sys_read
+1	common	write			__x64_sys_write
+2	common	open			__x64_sys_open
+3	common	close			__x64_sys_close
+4	common	stat			__x64_sys_newstat
+5	common	fstat			__x64_sys_newfstat
+6	common	lstat			__x64_sys_newlstat
+7	common	poll			__x64_sys_poll
+8	common	lseek			__x64_sys_lseek
+9	common	mmap			__x64_sys_mmap
+10	common	mprotect		__x64_sys_mprotect
+11	common	munmap			__x64_sys_munmap
+12	common	brk			__x64_sys_brk
+13	64	rt_sigaction		__x64_sys_rt_sigaction
+14	common	rt_sigprocmask		__x64_sys_rt_sigprocmask
+15	64	rt_sigreturn		__x64_sys_rt_sigreturn/ptregs
+16	64	ioctl			__x64_sys_ioctl
+17	common	pread64			__x64_sys_pread64
+18	common	pwrite64		__x64_sys_pwrite64
+19	64	readv			__x64_sys_readv
+20	64	writev			__x64_sys_writev
+21	common	access			__x64_sys_access
+22	common	pipe			__x64_sys_pipe
+23	common	select			__x64_sys_select
+24	common	sched_yield		__x64_sys_sched_yield
+25	common	mremap			__x64_sys_mremap
+26	common	msync			__x64_sys_msync
+27	common	mincore			__x64_sys_mincore
+28	common	madvise			__x64_sys_madvise
+29	common	shmget			__x64_sys_shmget
+30	common	shmat			__x64_sys_shmat
+31	common	shmctl			__x64_sys_shmctl
+32	common	dup			__x64_sys_dup
+33	common	dup2			__x64_sys_dup2
+34	common	pause			__x64_sys_pause
+35	common	nanosleep		__x64_sys_nanosleep
+36	common	getitimer		__x64_sys_getitimer
+37	common	alarm			__x64_sys_alarm
+38	common	setitimer		__x64_sys_setitimer
+39	common	getpid			__x64_sys_getpid
+40	common	sendfile		__x64_sys_sendfile64
+41	common	socket			__x64_sys_socket
+42	common	connect			__x64_sys_connect
+43	common	accept			__x64_sys_accept
+44	common	sendto			__x64_sys_sendto
+45	64	recvfrom		__x64_sys_recvfrom
+46	64	sendmsg			__x64_sys_sendmsg
+47	64	recvmsg			__x64_sys_recvmsg
+48	common	shutdown		__x64_sys_shutdown
+49	common	bind			__x64_sys_bind
+50	common	listen			__x64_sys_listen
+51	common	getsockname		__x64_sys_getsockname
+52	common	getpeername		__x64_sys_getpeername
+53	common	socketpair		__x64_sys_socketpair
+54	64	setsockopt		__x64_sys_setsockopt
+55	64	getsockopt		__x64_sys_getsockopt
+56	common	clone			__x64_sys_clone/ptregs
+57	common	fork			__x64_sys_fork/ptregs
+58	common	vfork			__x64_sys_vfork/ptregs
+59	64	execve			__x64_sys_execve/ptregs
+60	common	exit			__x64_sys_exit
+61	common	wait4			__x64_sys_wait4
+62	common	kill			__x64_sys_kill
+63	common	uname			__x64_sys_newuname
+64	common	semget			__x64_sys_semget
+65	common	semop			__x64_sys_semop
+66	common	semctl			__x64_sys_semctl
+67	common	shmdt			__x64_sys_shmdt
+68	common	msgget			__x64_sys_msgget
+69	common	msgsnd			__x64_sys_msgsnd
+70	common	msgrcv			__x64_sys_msgrcv
+71	common	msgctl			__x64_sys_msgctl
+72	common	fcntl			__x64_sys_fcntl
+73	common	flock			__x64_sys_flock
+74	common	fsync			__x64_sys_fsync
+75	common	fdatasync		__x64_sys_fdatasync
+76	common	truncate		__x64_sys_truncate
+77	common	ftruncate		__x64_sys_ftruncate
+78	common	getdents		__x64_sys_getdents
+79	common	getcwd			__x64_sys_getcwd
+80	common	chdir			__x64_sys_chdir
+81	common	fchdir			__x64_sys_fchdir
+82	common	rename			__x64_sys_rename
+83	common	mkdir			__x64_sys_mkdir
+84	common	rmdir			__x64_sys_rmdir
+85	common	creat			__x64_sys_creat
+86	common	link			__x64_sys_link
+87	common	unlink			__x64_sys_unlink
+88	common	symlink			__x64_sys_symlink
+89	common	readlink		__x64_sys_readlink
+90	common	chmod			__x64_sys_chmod
+91	common	fchmod			__x64_sys_fchmod
+92	common	chown			__x64_sys_chown
+93	common	fchown			__x64_sys_fchown
+94	common	lchown			__x64_sys_lchown
+95	common	umask			__x64_sys_umask
+96	common	gettimeofday		__x64_sys_gettimeofday
+97	common	getrlimit		__x64_sys_getrlimit
+98	common	getrusage		__x64_sys_getrusage
+99	common	sysinfo			__x64_sys_sysinfo
+100	common	times			__x64_sys_times
+101	64	ptrace			__x64_sys_ptrace
+102	common	getuid			__x64_sys_getuid
+103	common	syslog			__x64_sys_syslog
+104	common	getgid			__x64_sys_getgid
+105	common	setuid			__x64_sys_setuid
+106	common	setgid			__x64_sys_setgid
+107	common	geteuid			__x64_sys_geteuid
+108	common	getegid			__x64_sys_getegid
+109	common	setpgid			__x64_sys_setpgid
+110	common	getppid			__x64_sys_getppid
+111	common	getpgrp			__x64_sys_getpgrp
+112	common	setsid			__x64_sys_setsid
+113	common	setreuid		__x64_sys_setreuid
+114	common	setregid		__x64_sys_setregid
+115	common	getgroups		__x64_sys_getgroups
+116	common	setgroups		__x64_sys_setgroups
+117	common	setresuid		__x64_sys_setresuid
+118	common	getresuid		__x64_sys_getresuid
+119	common	setresgid		__x64_sys_setresgid
+120	common	getresgid		__x64_sys_getresgid
+121	common	getpgid			__x64_sys_getpgid
+122	common	setfsuid		__x64_sys_setfsuid
+123	common	setfsgid		__x64_sys_setfsgid
+124	common	getsid			__x64_sys_getsid
+125	common	capget			__x64_sys_capget
+126	common	capset			__x64_sys_capset
+127	64	rt_sigpending		__x64_sys_rt_sigpending
+128	64	rt_sigtimedwait		__x64_sys_rt_sigtimedwait
+129	64	rt_sigqueueinfo		__x64_sys_rt_sigqueueinfo
+130	common	rt_sigsuspend		__x64_sys_rt_sigsuspend
+131	64	sigaltstack		__x64_sys_sigaltstack
+132	common	utime			__x64_sys_utime
+133	common	mknod			__x64_sys_mknod
 134	64	uselib
-135	common	personality		sys_personality
-136	common	ustat			sys_ustat
-137	common	statfs			sys_statfs
-138	common	fstatfs			sys_fstatfs
-139	common	sysfs			sys_sysfs
-140	common	getpriority		sys_getpriority
-141	common	setpriority		sys_setpriority
-142	common	sched_setparam		sys_sched_setparam
-143	common	sched_getparam		sys_sched_getparam
-144	common	sched_setscheduler	sys_sched_setscheduler
-145	common	sched_getscheduler	sys_sched_getscheduler
-146	common	sched_get_priority_max	sys_sched_get_priority_max
-147	common	sched_get_priority_min	sys_sched_get_priority_min
-148	common	sched_rr_get_interval	sys_sched_rr_get_interval
-149	common	mlock			sys_mlock
-150	common	munlock			sys_munlock
-151	common	mlockall		sys_mlockall
-152	common	munlockall		sys_munlockall
-153	common	vhangup			sys_vhangup
-154	common	modify_ldt		sys_modify_ldt
-155	common	pivot_root		sys_pivot_root
-156	64	_sysctl			sys_sysctl
-157	common	prctl			sys_prctl
-158	common	arch_prctl		sys_arch_prctl
-159	common	adjtimex		sys_adjtimex
-160	common	setrlimit		sys_setrlimit
-161	common	chroot			sys_chroot
-162	common	sync			sys_sync
-163	common	acct			sys_acct
-164	common	settimeofday		sys_settimeofday
-165	common	mount			sys_mount
-166	common	umount2			sys_umount
-167	common	swapon			sys_swapon
-168	common	swapoff			sys_swapoff
-169	common	reboot			sys_reboot
-170	common	sethostname		sys_sethostname
-171	common	setdomainname		sys_setdomainname
-172	common	iopl			sys_iopl/ptregs
-173	common	ioperm			sys_ioperm
+135	common	personality		__x64_sys_personality
+136	common	ustat			__x64_sys_ustat
+137	common	statfs			__x64_sys_statfs
+138	common	fstatfs			__x64_sys_fstatfs
+139	common	sysfs			__x64_sys_sysfs
+140	common	getpriority		__x64_sys_getpriority
+141	common	setpriority		__x64_sys_setpriority
+142	common	sched_setparam		__x64_sys_sched_setparam
+143	common	sched_getparam		__x64_sys_sched_getparam
+144	common	sched_setscheduler	__x64_sys_sched_setscheduler
+145	common	sched_getscheduler	__x64_sys_sched_getscheduler
+146	common	sched_get_priority_max	__x64_sys_sched_get_priority_max
+147	common	sched_get_priority_min	__x64_sys_sched_get_priority_min
+148	common	sched_rr_get_interval	__x64_sys_sched_rr_get_interval
+149	common	mlock			__x64_sys_mlock
+150	common	munlock			__x64_sys_munlock
+151	common	mlockall		__x64_sys_mlockall
+152	common	munlockall		__x64_sys_munlockall
+153	common	vhangup			__x64_sys_vhangup
+154	common	modify_ldt		__x64_sys_modify_ldt
+155	common	pivot_root		__x64_sys_pivot_root
+156	64	_sysctl			__x64_sys_sysctl
+157	common	prctl			__x64_sys_prctl
+158	common	arch_prctl		__x64_sys_arch_prctl
+159	common	adjtimex		__x64_sys_adjtimex
+160	common	setrlimit		__x64_sys_setrlimit
+161	common	chroot			__x64_sys_chroot
+162	common	sync			__x64_sys_sync
+163	common	acct			__x64_sys_acct
+164	common	settimeofday		__x64_sys_settimeofday
+165	common	mount			__x64_sys_mount
+166	common	umount2			__x64_sys_umount
+167	common	swapon			__x64_sys_swapon
+168	common	swapoff			__x64_sys_swapoff
+169	common	reboot			__x64_sys_reboot
+170	common	sethostname		__x64_sys_sethostname
+171	common	setdomainname		__x64_sys_setdomainname
+172	common	iopl			__x64_sys_iopl/ptregs
+173	common	ioperm			__x64_sys_ioperm
 174	64	create_module
-175	common	init_module		sys_init_module
-176	common	delete_module		sys_delete_module
+175	common	init_module		__x64_sys_init_module
+176	common	delete_module		__x64_sys_delete_module
 177	64	get_kernel_syms
 178	64	query_module
-179	common	quotactl		sys_quotactl
+179	common	quotactl		__x64_sys_quotactl
 180	64	nfsservctl
 181	common	getpmsg
 182	common	putpmsg
 183	common	afs_syscall
 184	common	tuxcall
 185	common	security
-186	common	gettid			sys_gettid
-187	common	readahead		sys_readahead
-188	common	setxattr		sys_setxattr
-189	common	lsetxattr		sys_lsetxattr
-190	common	fsetxattr		sys_fsetxattr
-191	common	getxattr		sys_getxattr
-192	common	lgetxattr		sys_lgetxattr
-193	common	fgetxattr		sys_fgetxattr
-194	common	listxattr		sys_listxattr
-195	common	llistxattr		sys_llistxattr
-196	common	flistxattr		sys_flistxattr
-197	common	removexattr		sys_removexattr
-198	common	lremovexattr		sys_lremovexattr
-199	common	fremovexattr		sys_fremovexattr
-200	common	tkill			sys_tkill
-201	common	time			sys_time
-202	common	futex			sys_futex
-203	common	sched_setaffinity	sys_sched_setaffinity
-204	common	sched_getaffinity	sys_sched_getaffinity
+186	common	gettid			__x64_sys_gettid
+187	common	readahead		__x64_sys_readahead
+188	common	setxattr		__x64_sys_setxattr
+189	common	lsetxattr		__x64_sys_lsetxattr
+190	common	fsetxattr		__x64_sys_fsetxattr
+191	common	getxattr		__x64_sys_getxattr
+192	common	lgetxattr		__x64_sys_lgetxattr
+193	common	fgetxattr		__x64_sys_fgetxattr
+194	common	listxattr		__x64_sys_listxattr
+195	common	llistxattr		__x64_sys_llistxattr
+196	common	flistxattr		__x64_sys_flistxattr
+197	common	removexattr		__x64_sys_removexattr
+198	common	lremovexattr		__x64_sys_lremovexattr
+199	common	fremovexattr		__x64_sys_fremovexattr
+200	common	tkill			__x64_sys_tkill
+201	common	time			__x64_sys_time
+202	common	futex			__x64_sys_futex
+203	common	sched_setaffinity	__x64_sys_sched_setaffinity
+204	common	sched_getaffinity	__x64_sys_sched_getaffinity
 205	64	set_thread_area
-206	64	io_setup		sys_io_setup
-207	common	io_destroy		sys_io_destroy
-208	common	io_getevents		sys_io_getevents
-209	64	io_submit		sys_io_submit
-210	common	io_cancel		sys_io_cancel
+206	64	io_setup		__x64_sys_io_setup
+207	common	io_destroy		__x64_sys_io_destroy
+208	common	io_getevents		__x64_sys_io_getevents
+209	64	io_submit		__x64_sys_io_submit
+210	common	io_cancel		__x64_sys_io_cancel
 211	64	get_thread_area
-212	common	lookup_dcookie		sys_lookup_dcookie
-213	common	epoll_create		sys_epoll_create
+212	common	lookup_dcookie		__x64_sys_lookup_dcookie
+213	common	epoll_create		__x64_sys_epoll_create
 214	64	epoll_ctl_old
 215	64	epoll_wait_old
-216	common	remap_file_pages	sys_remap_file_pages
-217	common	getdents64		sys_getdents64
-218	common	set_tid_address		sys_set_tid_address
-219	common	restart_syscall		sys_restart_syscall
-220	common	semtimedop		sys_semtimedop
-221	common	fadvise64		sys_fadvise64
-222	64	timer_create		sys_timer_create
-223	common	timer_settime		sys_timer_settime
-224	common	timer_gettime		sys_timer_gettime
-225	common	timer_getoverrun	sys_timer_getoverrun
-226	common	timer_delete		sys_timer_delete
-227	common	clock_settime		sys_clock_settime
-228	common	clock_gettime		sys_clock_gettime
-229	common	clock_getres		sys_clock_getres
-230	common	clock_nanosleep		sys_clock_nanosleep
-231	common	exit_group		sys_exit_group
-232	common	epoll_wait		sys_epoll_wait
-233	common	epoll_ctl		sys_epoll_ctl
-234	common	tgkill			sys_tgkill
-235	common	utimes			sys_utimes
+216	common	remap_file_pages	__x64_sys_remap_file_pages
+217	common	getdents64		__x64_sys_getdents64
+218	common	set_tid_address		__x64_sys_set_tid_address
+219	common	restart_syscall		__x64_sys_restart_syscall
+220	common	semtimedop		__x64_sys_semtimedop
+221	common	fadvise64		__x64_sys_fadvise64
+222	64	timer_create		__x64_sys_timer_create
+223	common	timer_settime		__x64_sys_timer_settime
+224	common	timer_gettime		__x64_sys_timer_gettime
+225	common	timer_getoverrun	__x64_sys_timer_getoverrun
+226	common	timer_delete		__x64_sys_timer_delete
+227	common	clock_settime		__x64_sys_clock_settime
+228	common	clock_gettime		__x64_sys_clock_gettime
+229	common	clock_getres		__x64_sys_clock_getres
+230	common	clock_nanosleep		__x64_sys_clock_nanosleep
+231	common	exit_group		__x64_sys_exit_group
+232	common	epoll_wait		__x64_sys_epoll_wait
+233	common	epoll_ctl		__x64_sys_epoll_ctl
+234	common	tgkill			__x64_sys_tgkill
+235	common	utimes			__x64_sys_utimes
 236	64	vserver
-237	common	mbind			sys_mbind
-238	common	set_mempolicy		sys_set_mempolicy
-239	common	get_mempolicy		sys_get_mempolicy
-240	common	mq_open			sys_mq_open
-241	common	mq_unlink		sys_mq_unlink
-242	common	mq_timedsend		sys_mq_timedsend
-243	common	mq_timedreceive		sys_mq_timedreceive
-244	64	mq_notify		sys_mq_notify
-245	common	mq_getsetattr		sys_mq_getsetattr
-246	64	kexec_load		sys_kexec_load
-247	64	waitid			sys_waitid
-248	common	add_key			sys_add_key
-249	common	request_key		sys_request_key
-250	common	keyctl			sys_keyctl
-251	common	ioprio_set		sys_ioprio_set
-252	common	ioprio_get		sys_ioprio_get
-253	common	inotify_init		sys_inotify_init
-254	common	inotify_add_watch	sys_inotify_add_watch
-255	common	inotify_rm_watch	sys_inotify_rm_watch
-256	common	migrate_pages		sys_migrate_pages
-257	common	openat			sys_openat
-258	common	mkdirat			sys_mkdirat
-259	common	mknodat			sys_mknodat
-260	common	fchownat		sys_fchownat
-261	common	futimesat		sys_futimesat
-262	common	newfstatat		sys_newfstatat
-263	common	unlinkat		sys_unlinkat
-264	common	renameat		sys_renameat
-265	common	linkat			sys_linkat
-266	common	symlinkat		sys_symlinkat
-267	common	readlinkat		sys_readlinkat
-268	common	fchmodat		sys_fchmodat
-269	common	faccessat		sys_faccessat
-270	common	pselect6		sys_pselect6
-271	common	ppoll			sys_ppoll
-272	common	unshare			sys_unshare
-273	64	set_robust_list		sys_set_robust_list
-274	64	get_robust_list		sys_get_robust_list
-275	common	splice			sys_splice
-276	common	tee			sys_tee
-277	common	sync_file_range		sys_sync_file_range
-278	64	vmsplice		sys_vmsplice
-279	64	move_pages		sys_move_pages
-280	common	utimensat		sys_utimensat
-281	common	epoll_pwait		sys_epoll_pwait
-282	common	signalfd		sys_signalfd
-283	common	timerfd_create		sys_timerfd_create
-284	common	eventfd			sys_eventfd
-285	common	fallocate		sys_fallocate
-286	common	timerfd_settime		sys_timerfd_settime
-287	common	timerfd_gettime		sys_timerfd_gettime
-288	common	accept4			sys_accept4
-289	common	signalfd4		sys_signalfd4
-290	common	eventfd2		sys_eventfd2
-291	common	epoll_create1		sys_epoll_create1
-292	common	dup3			sys_dup3
-293	common	pipe2			sys_pipe2
-294	common	inotify_init1		sys_inotify_init1
-295	64	preadv			sys_preadv
-296	64	pwritev			sys_pwritev
-297	64	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo
-298	common	perf_event_open		sys_perf_event_open
-299	64	recvmmsg		sys_recvmmsg
-300	common	fanotify_init		sys_fanotify_init
-301	common	fanotify_mark		sys_fanotify_mark
-302	common	prlimit64		sys_prlimit64
-303	common	name_to_handle_at	sys_name_to_handle_at
-304	common	open_by_handle_at	sys_open_by_handle_at
-305	common	clock_adjtime		sys_clock_adjtime
-306	common	syncfs			sys_syncfs
-307	64	sendmmsg		sys_sendmmsg
-308	common	setns			sys_setns
-309	common	getcpu			sys_getcpu
-310	64	process_vm_readv	sys_process_vm_readv
-311	64	process_vm_writev	sys_process_vm_writev
-312	common	kcmp			sys_kcmp
-313	common	finit_module		sys_finit_module
-314	common	sched_setattr		sys_sched_setattr
-315	common	sched_getattr		sys_sched_getattr
-316	common	renameat2		sys_renameat2
-317	common	seccomp			sys_seccomp
-318	common	getrandom		sys_getrandom
-319	common	memfd_create		sys_memfd_create
-320	common	kexec_file_load		sys_kexec_file_load
-321	common	bpf			sys_bpf
-322	64	execveat		sys_execveat/ptregs
-323	common	userfaultfd		sys_userfaultfd
-324	common	membarrier		sys_membarrier
-325	common	mlock2			sys_mlock2
-326	common	copy_file_range		sys_copy_file_range
-327	64	preadv2			sys_preadv2
-328	64	pwritev2		sys_pwritev2
-329	common	pkey_mprotect		sys_pkey_mprotect
-330	common	pkey_alloc		sys_pkey_alloc
-331	common	pkey_free		sys_pkey_free
-332	common	statx			sys_statx
+237	common	mbind			__x64_sys_mbind
+238	common	set_mempolicy		__x64_sys_set_mempolicy
+239	common	get_mempolicy		__x64_sys_get_mempolicy
+240	common	mq_open			__x64_sys_mq_open
+241	common	mq_unlink		__x64_sys_mq_unlink
+242	common	mq_timedsend		__x64_sys_mq_timedsend
+243	common	mq_timedreceive		__x64_sys_mq_timedreceive
+244	64	mq_notify		__x64_sys_mq_notify
+245	common	mq_getsetattr		__x64_sys_mq_getsetattr
+246	64	kexec_load		__x64_sys_kexec_load
+247	64	waitid			__x64_sys_waitid
+248	common	add_key			__x64_sys_add_key
+249	common	request_key		__x64_sys_request_key
+250	common	keyctl			__x64_sys_keyctl
+251	common	ioprio_set		__x64_sys_ioprio_set
+252	common	ioprio_get		__x64_sys_ioprio_get
+253	common	inotify_init		__x64_sys_inotify_init
+254	common	inotify_add_watch	__x64_sys_inotify_add_watch
+255	common	inotify_rm_watch	__x64_sys_inotify_rm_watch
+256	common	migrate_pages		__x64_sys_migrate_pages
+257	common	openat			__x64_sys_openat
+258	common	mkdirat			__x64_sys_mkdirat
+259	common	mknodat			__x64_sys_mknodat
+260	common	fchownat		__x64_sys_fchownat
+261	common	futimesat		__x64_sys_futimesat
+262	common	newfstatat		__x64_sys_newfstatat
+263	common	unlinkat		__x64_sys_unlinkat
+264	common	renameat		__x64_sys_renameat
+265	common	linkat			__x64_sys_linkat
+266	common	symlinkat		__x64_sys_symlinkat
+267	common	readlinkat		__x64_sys_readlinkat
+268	common	fchmodat		__x64_sys_fchmodat
+269	common	faccessat		__x64_sys_faccessat
+270	common	pselect6		__x64_sys_pselect6
+271	common	ppoll			__x64_sys_ppoll
+272	common	unshare			__x64_sys_unshare
+273	64	set_robust_list		__x64_sys_set_robust_list
+274	64	get_robust_list		__x64_sys_get_robust_list
+275	common	splice			__x64_sys_splice
+276	common	tee			__x64_sys_tee
+277	common	sync_file_range		__x64_sys_sync_file_range
+278	64	vmsplice		__x64_sys_vmsplice
+279	64	move_pages		__x64_sys_move_pages
+280	common	utimensat		__x64_sys_utimensat
+281	common	epoll_pwait		__x64_sys_epoll_pwait
+282	common	signalfd		__x64_sys_signalfd
+283	common	timerfd_create		__x64_sys_timerfd_create
+284	common	eventfd			__x64_sys_eventfd
+285	common	fallocate		__x64_sys_fallocate
+286	common	timerfd_settime		__x64_sys_timerfd_settime
+287	common	timerfd_gettime		__x64_sys_timerfd_gettime
+288	common	accept4			__x64_sys_accept4
+289	common	signalfd4		__x64_sys_signalfd4
+290	common	eventfd2		__x64_sys_eventfd2
+291	common	epoll_create1		__x64_sys_epoll_create1
+292	common	dup3			__x64_sys_dup3
+293	common	pipe2			__x64_sys_pipe2
+294	common	inotify_init1		__x64_sys_inotify_init1
+295	64	preadv			__x64_sys_preadv
+296	64	pwritev			__x64_sys_pwritev
+297	64	rt_tgsigqueueinfo	__x64_sys_rt_tgsigqueueinfo
+298	common	perf_event_open		__x64_sys_perf_event_open
+299	64	recvmmsg		__x64_sys_recvmmsg
+300	common	fanotify_init		__x64_sys_fanotify_init
+301	common	fanotify_mark		__x64_sys_fanotify_mark
+302	common	prlimit64		__x64_sys_prlimit64
+303	common	name_to_handle_at	__x64_sys_name_to_handle_at
+304	common	open_by_handle_at	__x64_sys_open_by_handle_at
+305	common	clock_adjtime		__x64_sys_clock_adjtime
+306	common	syncfs			__x64_sys_syncfs
+307	64	sendmmsg		__x64_sys_sendmmsg
+308	common	setns			__x64_sys_setns
+309	common	getcpu			__x64_sys_getcpu
+310	64	process_vm_readv	__x64_sys_process_vm_readv
+311	64	process_vm_writev	__x64_sys_process_vm_writev
+312	common	kcmp			__x64_sys_kcmp
+313	common	finit_module		__x64_sys_finit_module
+314	common	sched_setattr		__x64_sys_sched_setattr
+315	common	sched_getattr		__x64_sys_sched_getattr
+316	common	renameat2		__x64_sys_renameat2
+317	common	seccomp			__x64_sys_seccomp
+318	common	getrandom		__x64_sys_getrandom
+319	common	memfd_create		__x64_sys_memfd_create
+320	common	kexec_file_load		__x64_sys_kexec_file_load
+321	common	bpf			__x64_sys_bpf
+322	64	execveat		__x64_sys_execveat/ptregs
+323	common	userfaultfd		__x64_sys_userfaultfd
+324	common	membarrier		__x64_sys_membarrier
+325	common	mlock2			__x64_sys_mlock2
+326	common	copy_file_range		__x64_sys_copy_file_range
+327	64	preadv2			__x64_sys_preadv2
+328	64	pwritev2		__x64_sys_pwritev2
+329	common	pkey_mprotect		__x64_sys_pkey_mprotect
+330	common	pkey_alloc		__x64_sys_pkey_alloc
+331	common	pkey_free		__x64_sys_pkey_free
+332	common	statx			__x64_sys_statx
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation.
+# for native 64-bit operation. The __x32_compat_sys stubs are created
+# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
+# is defined.
 #
-512	x32	rt_sigaction		compat_sys_rt_sigaction
+512	x32	rt_sigaction		__x32_compat_sys_rt_sigaction
 513	x32	rt_sigreturn		sys32_x32_rt_sigreturn
-514	x32	ioctl			compat_sys_ioctl
-515	x32	readv			compat_sys_readv
-516	x32	writev			compat_sys_writev
-517	x32	recvfrom		compat_sys_recvfrom
-518	x32	sendmsg			compat_sys_sendmsg
-519	x32	recvmsg			compat_sys_recvmsg
-520	x32	execve			compat_sys_execve/ptregs
-521	x32	ptrace			compat_sys_ptrace
-522	x32	rt_sigpending		compat_sys_rt_sigpending
-523	x32	rt_sigtimedwait		compat_sys_rt_sigtimedwait
-524	x32	rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
-525	x32	sigaltstack		compat_sys_sigaltstack
-526	x32	timer_create		compat_sys_timer_create
-527	x32	mq_notify		compat_sys_mq_notify
-528	x32	kexec_load		compat_sys_kexec_load
-529	x32	waitid			compat_sys_waitid
-530	x32	set_robust_list		compat_sys_set_robust_list
-531	x32	get_robust_list		compat_sys_get_robust_list
-532	x32	vmsplice		compat_sys_vmsplice
-533	x32	move_pages		compat_sys_move_pages
-534	x32	preadv			compat_sys_preadv64
-535	x32	pwritev			compat_sys_pwritev64
-536	x32	rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
-537	x32	recvmmsg		compat_sys_recvmmsg
-538	x32	sendmmsg		compat_sys_sendmmsg
-539	x32	process_vm_readv	compat_sys_process_vm_readv
-540	x32	process_vm_writev	compat_sys_process_vm_writev
-541	x32	setsockopt		compat_sys_setsockopt
-542	x32	getsockopt		compat_sys_getsockopt
-543	x32	io_setup		compat_sys_io_setup
-544	x32	io_submit		compat_sys_io_submit
-545	x32	execveat		compat_sys_execveat/ptregs
-546	x32	preadv2			compat_sys_preadv64v2
-547	x32	pwritev2		compat_sys_pwritev64v2
+514	x32	ioctl			__x32_compat_sys_ioctl
+515	x32	readv			__x32_compat_sys_readv
+516	x32	writev			__x32_compat_sys_writev
+517	x32	recvfrom		__x32_compat_sys_recvfrom
+518	x32	sendmsg			__x32_compat_sys_sendmsg
+519	x32	recvmsg			__x32_compat_sys_recvmsg
+520	x32	execve			__x32_compat_sys_execve/ptregs
+521	x32	ptrace			__x32_compat_sys_ptrace
+522	x32	rt_sigpending		__x32_compat_sys_rt_sigpending
+523	x32	rt_sigtimedwait		__x32_compat_sys_rt_sigtimedwait
+524	x32	rt_sigqueueinfo		__x32_compat_sys_rt_sigqueueinfo
+525	x32	sigaltstack		__x32_compat_sys_sigaltstack
+526	x32	timer_create		__x32_compat_sys_timer_create
+527	x32	mq_notify		__x32_compat_sys_mq_notify
+528	x32	kexec_load		__x32_compat_sys_kexec_load
+529	x32	waitid			__x32_compat_sys_waitid
+530	x32	set_robust_list		__x32_compat_sys_set_robust_list
+531	x32	get_robust_list		__x32_compat_sys_get_robust_list
+532	x32	vmsplice		__x32_compat_sys_vmsplice
+533	x32	move_pages		__x32_compat_sys_move_pages
+534	x32	preadv			__x32_compat_sys_preadv64
+535	x32	pwritev			__x32_compat_sys_pwritev64
+536	x32	rt_tgsigqueueinfo	__x32_compat_sys_rt_tgsigqueueinfo
+537	x32	recvmmsg		__x32_compat_sys_recvmmsg
+538	x32	sendmmsg		__x32_compat_sys_sendmmsg
+539	x32	process_vm_readv	__x32_compat_sys_process_vm_readv
+540	x32	process_vm_writev	__x32_compat_sys_process_vm_writev
+541	x32	setsockopt		__x32_compat_sys_setsockopt
+542	x32	getsockopt		__x32_compat_sys_getsockopt
+543	x32	io_setup		__x32_compat_sys_io_setup
+544	x32	io_submit		__x32_compat_sys_io_submit
+545	x32	execveat		__x32_compat_sys_execveat/ptregs
+546	x32	preadv2			__x32_compat_sys_preadv64v2
+547	x32	pwritev2		__x32_compat_sys_pwritev64v2
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index d71ef4bd3615..94fcd1951aca 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -25,15 +25,27 @@ emit() {
     nr="$2"
     entry="$3"
     compat="$4"
+    umlentry=""
 
     if [ "$abi" = "64" -a -n "$compat" ]; then
 	echo "a compat entry for a 64-bit syscall makes no sense" >&2
 	exit 1
     fi
 
+    # For CONFIG_UML, we need to strip the __x64_sys prefix
+    if [ "$abi" = "64" -a "${entry}" != "${entry#__x64_sys}" ]; then
+	    umlentry="sys${entry#__x64_sys}"
+    fi
+
     if [ -z "$compat" ]; then
-	if [ -n "$entry" ]; then
+	if [ -n "$entry" -a -z "$umlentry" ]; then
 	    syscall_macro "$abi" "$nr" "$entry"
+	elif [ -n "$umlentry" ]; then # implies -n "$entry"
+	    echo "#ifdef CONFIG_X86"
+	    syscall_macro "$abi" "$nr" "$entry"
+	    echo "#else /* CONFIG_UML */"
+	    syscall_macro "$abi" "$nr" "$umlentry"
+	    echo "#endif"
 	fi
     else
 	echo "#ifdef CONFIG_X86_32"
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 317be365bce3..70b7845434cb 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -127,6 +127,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	int vsyscall_nr, syscall_nr, tmp;
 	int prev_sig_on_uaccess_err;
 	long ret;
+	unsigned long orig_dx;
 
 	/*
 	 * No point in checking CS -- the only way to get here is a user mode
@@ -227,19 +228,22 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	ret = -EFAULT;
 	switch (vsyscall_nr) {
 	case 0:
-		ret = sys_gettimeofday(
-			(struct timeval __user *)regs->di,
-			(struct timezone __user *)regs->si);
+		/* this decodes regs->di and regs->si on its own */
+		ret = __x64_sys_gettimeofday(regs);
 		break;
 
 	case 1:
-		ret = sys_time((time_t __user *)regs->di);
+		/* this decodes regs->di on its own */
+		ret = __x64_sys_time(regs);
 		break;
 
 	case 2:
-		ret = sys_getcpu((unsigned __user *)regs->di,
-				 (unsigned __user *)regs->si,
-				 NULL);
+		/* while we could clobber regs->dx, we didn't in the past... */
+		orig_dx = regs->dx;
+		regs->dx = 0;
+		/* this decodes regs->di, regs->si and regs->dx on its own */
+		ret = __x64_sys_getcpu(regs);
+		regs->dx = orig_dx;
 		break;
 	}
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index da6780122786..8a10a045b57b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1153,7 +1153,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	if (pebs == NULL)
 		return;
 
-	regs->flags &= ~PERF_EFLAGS_EXACT;
 	sample_type = event->attr.sample_type;
 	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
 
@@ -1197,7 +1196,13 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	 * and PMI.
 	 */
 	*regs = *iregs;
-	regs->flags = pebs->flags;
+
+	/*
+	 * Initialize regs_>flags from PEBS,
+	 * Clear exact bit (which uses x86 EFLAGS Reserved bit 3),
+	 * i.e., do not rely on it being zero:
+	 */
+	regs->flags = pebs->flags & ~PERF_EFLAGS_EXACT;
 
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
 		regs->ax = pebs->ax;
@@ -1217,10 +1222,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
 			regs->sp = pebs->sp;
 		}
 
-		/*
-		 * Preserve PERF_EFLAGS_VM from set_linear_ip().
-		 */
-		regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
 #ifndef CONFIG_X86_32
 		regs->r8 = pebs->r8;
 		regs->r9 = pebs->r9;
@@ -1234,20 +1235,33 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	}
 
 	if (event->attr.precise_ip > 1) {
-		/* Haswell and later have the eventing IP, so use it: */
+		/*
+		 * Haswell and later processors have an 'eventing IP'
+		 * (real IP) which fixes the off-by-1 skid in hardware.
+		 * Use it when precise_ip >= 2 :
+		 */
 		if (x86_pmu.intel_cap.pebs_format >= 2) {
 			set_linear_ip(regs, pebs->real_ip);
 			regs->flags |= PERF_EFLAGS_EXACT;
 		} else {
-			/* Otherwise use PEBS off-by-1 IP: */
+			/* Otherwise, use PEBS off-by-1 IP: */
 			set_linear_ip(regs, pebs->ip);
 
-			/* ... and try to fix it up using the LBR entries: */
+			/*
+			 * With precise_ip >= 2, try to fix up the off-by-1 IP
+			 * using the LBR. If successful, the fixup function
+			 * corrects regs->ip and calls set_linear_ip() on regs:
+			 */
 			if (intel_pmu_pebs_fixup_ip(regs))
 				regs->flags |= PERF_EFLAGS_EXACT;
 		}
-	} else
+	} else {
+		/*
+		 * When precise_ip == 1, return the PEBS off-by-1 IP,
+		 * no fixup attempted:
+		 */
 		set_linear_ip(regs, pebs->ip);
+	}
 
 
 	if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 2edc49e7409b..cfecc2272f2d 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -21,7 +21,7 @@
 #include <asm/apic.h>
 #include <asm/desc.h>
 #include <asm/hypervisor.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
 #include <linux/version.h>
 #include <linux/vmalloc.h>
@@ -88,11 +88,15 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
 u32 *hv_vp_index;
 EXPORT_SYMBOL_GPL(hv_vp_index);
 
+struct hv_vp_assist_page **hv_vp_assist_page;
+EXPORT_SYMBOL_GPL(hv_vp_assist_page);
+
 u32 hv_max_vp_index;
 
 static int hv_cpu_init(unsigned int cpu)
 {
 	u64 msr_vp_index;
+	struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
 
 	hv_get_vp_index(msr_vp_index);
 
@@ -101,6 +105,22 @@ static int hv_cpu_init(unsigned int cpu)
 	if (msr_vp_index > hv_max_vp_index)
 		hv_max_vp_index = msr_vp_index;
 
+	if (!hv_vp_assist_page)
+		return 0;
+
+	if (!*hvp)
+		*hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+
+	if (*hvp) {
+		u64 val;
+
+		val = vmalloc_to_pfn(*hvp);
+		val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
+			HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
+	}
+
 	return 0;
 }
 
@@ -198,6 +218,9 @@ static int hv_cpu_die(unsigned int cpu)
 	struct hv_reenlightenment_control re_ctrl;
 	unsigned int new_cpu;
 
+	if (hv_vp_assist_page && hv_vp_assist_page[cpu])
+		wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
+
 	if (hv_reenlightenment_cb == NULL)
 		return 0;
 
@@ -224,6 +247,7 @@ void hyperv_init(void)
 {
 	u64 guest_id, required_msrs;
 	union hv_x64_msr_hypercall_contents hypercall_msr;
+	int cpuhp;
 
 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
 		return;
@@ -241,9 +265,17 @@ void hyperv_init(void)
 	if (!hv_vp_index)
 		return;
 
-	if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
-			      hv_cpu_init, hv_cpu_die) < 0)
+	hv_vp_assist_page = kcalloc(num_possible_cpus(),
+				    sizeof(*hv_vp_assist_page), GFP_KERNEL);
+	if (!hv_vp_assist_page) {
+		ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
 		goto free_vp_index;
+	}
+
+	cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+				  hv_cpu_init, hv_cpu_die);
+	if (cpuhp < 0)
+		goto free_vp_assist_page;
 
 	/*
 	 * Setup the hypercall page and enable hypercalls.
@@ -256,7 +288,7 @@ void hyperv_init(void)
 	hv_hypercall_pg  = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
 	if (hv_hypercall_pg == NULL) {
 		wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
-		goto free_vp_index;
+		goto remove_cpuhp_state;
 	}
 
 	rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -304,6 +336,11 @@ register_msr_cs:
 
 	return;
 
+remove_cpuhp_state:
+	cpuhp_remove_state(cpuhp);
+free_vp_assist_page:
+	kfree(hv_vp_assist_page);
+	hv_vp_assist_page = NULL;
 free_vp_index:
 	kfree(hv_vp_index);
 	hv_vp_index = NULL;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 40a3d3642f3a..08acd954f00e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -313,7 +313,7 @@ struct apic {
 	/* Probe, setup and smpboot functions */
 	int	(*probe)(void);
 	int	(*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
-	int	(*apic_id_valid)(int apicid);
+	int	(*apic_id_valid)(u32 apicid);
 	int	(*apic_id_registered)(void);
 
 	bool	(*check_apicid_used)(physid_mask_t *map, int apicid);
@@ -486,7 +486,7 @@ static inline unsigned int read_apic_id(void)
 	return apic->get_apic_id(reg);
 }
 
-extern int default_apic_id_valid(int apicid);
+extern int default_apic_id_valid(u32 apicid);
 extern int default_acpi_madt_oem_check(char *, char *);
 extern void default_setup_apic_routing(void);
 
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6c0c3a3b631c..416cb0e0c496 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -1,6 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_X86_HYPERV_H
-#define _ASM_X86_HYPERV_H
+
+/*
+ * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
+ * Specification (TLFS):
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
+ */
+
+#ifndef _ASM_X86_HYPERV_TLFS_H
+#define _ASM_X86_HYPERV_TLFS_H
 
 #include <linux/types.h>
 
@@ -14,6 +21,7 @@
 #define HYPERV_CPUID_FEATURES			0x40000003
 #define HYPERV_CPUID_ENLIGHTMENT_INFO		0x40000004
 #define HYPERV_CPUID_IMPLEMENT_LIMITS		0x40000005
+#define HYPERV_CPUID_NESTED_FEATURES		0x4000000A
 
 #define HYPERV_HYPERVISOR_PRESENT_BIT		0x80000000
 #define HYPERV_CPUID_MIN			0x40000005
@@ -159,6 +167,9 @@
 /* Recommend using the newer ExProcessorMasks interface */
 #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED	(1 << 11)
 
+/* Recommend using enlightened VMCS */
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED    (1 << 14)
+
 /*
  * Crash notification flag.
  */
@@ -192,7 +203,7 @@
 #define HV_X64_MSR_EOI				0x40000070
 #define HV_X64_MSR_ICR				0x40000071
 #define HV_X64_MSR_TPR				0x40000072
-#define HV_X64_MSR_APIC_ASSIST_PAGE		0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
 
 /* Define synthetic interrupt controller model specific registers. */
 #define HV_X64_MSR_SCONTROL			0x40000080
@@ -240,6 +251,55 @@
 #define HV_X64_MSR_CRASH_PARAMS		\
 		(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
 
+/*
+ * Declare the MSR used to setup pages used to communicate with the hypervisor.
+ */
+union hv_x64_msr_hypercall_contents {
+	u64 as_uint64;
+	struct {
+		u64 enable:1;
+		u64 reserved:11;
+		u64 guest_physical_address:52;
+	};
+};
+
+/*
+ * TSC page layout.
+ */
+struct ms_hyperv_tsc_page {
+	volatile u32 tsc_sequence;
+	u32 reserved1;
+	volatile u64 tsc_scale;
+	volatile s64 tsc_offset;
+	u64 reserved2[509];
+};
+
+/*
+ * The guest OS needs to register the guest ID with the hypervisor.
+ * The guest ID is a 64 bit entity and the structure of this ID is
+ * specified in the Hyper-V specification:
+ *
+ * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
+ *
+ * While the current guideline does not specify how Linux guest ID(s)
+ * need to be generated, our plan is to publish the guidelines for
+ * Linux and other guest operating systems that currently are hosted
+ * on Hyper-V. The implementation here conforms to this yet
+ * unpublished guidelines.
+ *
+ *
+ * Bit(s)
+ * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
+ * 62:56 - Os Type; Linux is 0x100
+ * 55:48 - Distro specific identification
+ * 47:16 - Linux kernel version number
+ * 15:0  - Distro specific identification
+ *
+ *
+ */
+
+#define HV_LINUX_VENDOR_ID              0x8100
+
 /* TSC emulation after migration */
 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
 
@@ -278,10 +338,13 @@ struct hv_tsc_emulation_status {
 #define HVCALL_POST_MESSAGE			0x005c
 #define HVCALL_SIGNAL_EVENT			0x005d
 
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE		0x00000001
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT	12
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
-		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+/* Hyper-V Enlightened VMCS version mask in nested features CPUID */
+#define HV_X64_ENLIGHTENED_VMCS_VERSION		0xff
 
 #define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
@@ -301,12 +364,22 @@ enum HV_GENERIC_SET_FORMAT {
 	HV_GENERIC_SET_ALL,
 };
 
+#define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT		BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET	17
+#define HV_HYPERCALL_REP_COMP_OFFSET	32
+#define HV_HYPERCALL_REP_COMP_MASK	GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET	48
+#define HV_HYPERCALL_REP_START_MASK	GENMASK_ULL(59, 48)
+
 /* hypercall status code */
 #define HV_STATUS_SUCCESS			0
 #define HV_STATUS_INVALID_HYPERCALL_CODE	2
 #define HV_STATUS_INVALID_HYPERCALL_INPUT	3
 #define HV_STATUS_INVALID_ALIGNMENT		4
+#define HV_STATUS_INVALID_PARAMETER		5
 #define HV_STATUS_INSUFFICIENT_MEMORY		11
+#define HV_STATUS_INVALID_PORT_ID		17
 #define HV_STATUS_INVALID_CONNECTION_ID		18
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
@@ -321,6 +394,8 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
 #define HV_SYNIC_SINT_COUNT		(16)
 /* Define the expected SynIC version. */
 #define HV_SYNIC_VERSION_1		(0x1)
+/* Valid SynIC vectors are 16-255. */
+#define HV_SYNIC_FIRST_VALID_VECTOR	(16)
 
 #define HV_SYNIC_CONTROL_ENABLE		(1ULL << 0)
 #define HV_SYNIC_SIMP_ENABLE		(1ULL << 0)
@@ -415,6 +490,216 @@ struct hv_timer_message_payload {
 	__u64 delivery_time;	/* When the message was delivered */
 };
 
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+	__u32 apic_assist;
+	__u32 reserved;
+	__u64 vtl_control[2];
+	__u64 nested_enlightenments_control[2];
+	__u32 enlighten_vmentry;
+	__u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+	u32 revision_id;
+	u32 abort;
+
+	u16 host_es_selector;
+	u16 host_cs_selector;
+	u16 host_ss_selector;
+	u16 host_ds_selector;
+	u16 host_fs_selector;
+	u16 host_gs_selector;
+	u16 host_tr_selector;
+
+	u64 host_ia32_pat;
+	u64 host_ia32_efer;
+
+	u64 host_cr0;
+	u64 host_cr3;
+	u64 host_cr4;
+
+	u64 host_ia32_sysenter_esp;
+	u64 host_ia32_sysenter_eip;
+	u64 host_rip;
+	u32 host_ia32_sysenter_cs;
+
+	u32 pin_based_vm_exec_control;
+	u32 vm_exit_controls;
+	u32 secondary_vm_exec_control;
+
+	u64 io_bitmap_a;
+	u64 io_bitmap_b;
+	u64 msr_bitmap;
+
+	u16 guest_es_selector;
+	u16 guest_cs_selector;
+	u16 guest_ss_selector;
+	u16 guest_ds_selector;
+	u16 guest_fs_selector;
+	u16 guest_gs_selector;
+	u16 guest_ldtr_selector;
+	u16 guest_tr_selector;
+
+	u32 guest_es_limit;
+	u32 guest_cs_limit;
+	u32 guest_ss_limit;
+	u32 guest_ds_limit;
+	u32 guest_fs_limit;
+	u32 guest_gs_limit;
+	u32 guest_ldtr_limit;
+	u32 guest_tr_limit;
+	u32 guest_gdtr_limit;
+	u32 guest_idtr_limit;
+
+	u32 guest_es_ar_bytes;
+	u32 guest_cs_ar_bytes;
+	u32 guest_ss_ar_bytes;
+	u32 guest_ds_ar_bytes;
+	u32 guest_fs_ar_bytes;
+	u32 guest_gs_ar_bytes;
+	u32 guest_ldtr_ar_bytes;
+	u32 guest_tr_ar_bytes;
+
+	u64 guest_es_base;
+	u64 guest_cs_base;
+	u64 guest_ss_base;
+	u64 guest_ds_base;
+	u64 guest_fs_base;
+	u64 guest_gs_base;
+	u64 guest_ldtr_base;
+	u64 guest_tr_base;
+	u64 guest_gdtr_base;
+	u64 guest_idtr_base;
+
+	u64 padding64_1[3];
+
+	u64 vm_exit_msr_store_addr;
+	u64 vm_exit_msr_load_addr;
+	u64 vm_entry_msr_load_addr;
+
+	u64 cr3_target_value0;
+	u64 cr3_target_value1;
+	u64 cr3_target_value2;
+	u64 cr3_target_value3;
+
+	u32 page_fault_error_code_mask;
+	u32 page_fault_error_code_match;
+
+	u32 cr3_target_count;
+	u32 vm_exit_msr_store_count;
+	u32 vm_exit_msr_load_count;
+	u32 vm_entry_msr_load_count;
+
+	u64 tsc_offset;
+	u64 virtual_apic_page_addr;
+	u64 vmcs_link_pointer;
+
+	u64 guest_ia32_debugctl;
+	u64 guest_ia32_pat;
+	u64 guest_ia32_efer;
+
+	u64 guest_pdptr0;
+	u64 guest_pdptr1;
+	u64 guest_pdptr2;
+	u64 guest_pdptr3;
+
+	u64 guest_pending_dbg_exceptions;
+	u64 guest_sysenter_esp;
+	u64 guest_sysenter_eip;
+
+	u32 guest_activity_state;
+	u32 guest_sysenter_cs;
+
+	u64 cr0_guest_host_mask;
+	u64 cr4_guest_host_mask;
+	u64 cr0_read_shadow;
+	u64 cr4_read_shadow;
+	u64 guest_cr0;
+	u64 guest_cr3;
+	u64 guest_cr4;
+	u64 guest_dr7;
+
+	u64 host_fs_base;
+	u64 host_gs_base;
+	u64 host_tr_base;
+	u64 host_gdtr_base;
+	u64 host_idtr_base;
+	u64 host_rsp;
+
+	u64 ept_pointer;
+
+	u16 virtual_processor_id;
+	u16 padding16[3];
+
+	u64 padding64_2[5];
+	u64 guest_physical_address;
+
+	u32 vm_instruction_error;
+	u32 vm_exit_reason;
+	u32 vm_exit_intr_info;
+	u32 vm_exit_intr_error_code;
+	u32 idt_vectoring_info_field;
+	u32 idt_vectoring_error_code;
+	u32 vm_exit_instruction_len;
+	u32 vmx_instruction_info;
+
+	u64 exit_qualification;
+	u64 exit_io_instruction_ecx;
+	u64 exit_io_instruction_esi;
+	u64 exit_io_instruction_edi;
+	u64 exit_io_instruction_eip;
+
+	u64 guest_linear_address;
+	u64 guest_rsp;
+	u64 guest_rflags;
+
+	u32 guest_interruptibility_info;
+	u32 cpu_based_vm_exec_control;
+	u32 exception_bitmap;
+	u32 vm_entry_controls;
+	u32 vm_entry_intr_info_field;
+	u32 vm_entry_exception_error_code;
+	u32 vm_entry_instruction_len;
+	u32 tpr_threshold;
+
+	u64 guest_rip;
+
+	u32 hv_clean_fields;
+	u32 hv_padding_32;
+	u32 hv_synthetic_controls;
+	u32 hv_enlightenments_control;
+	u32 hv_vp_id;
+
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 padding64_4[4];
+	u64 guest_bndcfgs;
+	u64 padding64_5[7];
+	u64 xss_exit_bitmap;
+	u64 padding64_6[7];
+};
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE			0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP		BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP		BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2		BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1		BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC		BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT		BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY		BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN		BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR			BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT		BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC		BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1		BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2		BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER		BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1		BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL	BIT(15)
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL			0xFFFF
+
 #define HV_STIMER_ENABLE		(1ULL << 0)
 #define HV_STIMER_PERIODIC		(1ULL << 1)
 #define HV_STIMER_LAZY			(1ULL << 2)
diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
index 9f07cff43705..df89ee7d3e9e 100644
--- a/arch/x86/include/asm/kexec-bzimage64.h
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -2,6 +2,6 @@
 #ifndef _ASM_KEXEC_BZIMAGE64_H
 #define _ASM_KEXEC_BZIMAGE64_H
 
-extern struct kexec_file_ops kexec_bzImage64_ops;
+extern const struct kexec_file_ops kexec_bzImage64_ops;
 
 #endif  /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b605a5b6a30c..949c977bc4c9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -34,6 +34,7 @@
 #include <asm/msr-index.h>
 #include <asm/asm.h>
 #include <asm/kvm_page_track.h>
+#include <asm/hyperv-tlfs.h>
 
 #define KVM_MAX_VCPUS 288
 #define KVM_SOFT_MAX_VCPUS 240
@@ -73,6 +74,7 @@
 #define KVM_REQ_HV_RESET		KVM_ARCH_REQ(20)
 #define KVM_REQ_HV_EXIT			KVM_ARCH_REQ(21)
 #define KVM_REQ_HV_STIMER		KVM_ARCH_REQ(22)
+#define KVM_REQ_LOAD_EOI_EXITMAP	KVM_ARCH_REQ(23)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -498,6 +500,7 @@ struct kvm_vcpu_arch {
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
 	bool apicv_active;
+	bool load_eoi_exitmap_pending;
 	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 	unsigned long apic_attention;
 	int32_t apic_arb_prio;
@@ -571,7 +574,7 @@ struct kvm_vcpu_arch {
 	} exception;
 
 	struct kvm_queued_interrupt {
-		bool pending;
+		bool injected;
 		bool soft;
 		u8 nr;
 	} interrupt;
@@ -754,6 +757,12 @@ struct kvm_hv {
 	u64 hv_crash_ctl;
 
 	HV_REFERENCE_TSC_PAGE tsc_ref;
+
+	struct idr conn_to_evt;
+
+	u64 hv_reenlightenment_control;
+	u64 hv_tsc_emulation_control;
+	u64 hv_tsc_emulation_status;
 };
 
 enum kvm_irqchip_mode {
@@ -762,15 +771,6 @@ enum kvm_irqchip_mode {
 	KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
 };
 
-struct kvm_sev_info {
-	bool active;		/* SEV enabled guest */
-	unsigned int asid;	/* ASID used for this guest */
-	unsigned int handle;	/* SEV firmware handle */
-	int fd;			/* SEV device fd */
-	unsigned long pages_locked; /* Number of pages locked */
-	struct list_head regions_list;  /* List of registered regions */
-};
-
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -800,13 +800,13 @@ struct kvm_arch {
 	struct mutex apic_map_lock;
 	struct kvm_apic_map *apic_map;
 
-	unsigned int tss_addr;
 	bool apic_access_page_done;
 
 	gpa_t wall_clock;
 
-	bool ept_identity_pagetable_done;
-	gpa_t ept_identity_map_addr;
+	bool mwait_in_guest;
+	bool hlt_in_guest;
+	bool pause_in_guest;
 
 	unsigned long irq_sources_bitmap;
 	s64 kvmclock_offset;
@@ -849,17 +849,8 @@ struct kvm_arch {
 
 	bool disabled_lapic_found;
 
-	/* Struct members for AVIC */
-	u32 avic_vm_id;
-	u32 ldr_mode;
-	struct page *avic_logical_id_table_page;
-	struct page *avic_physical_id_table_page;
-	struct hlist_node hnode;
-
 	bool x2apic_format;
 	bool x2apic_broadcast_quirk_disabled;
-
-	struct kvm_sev_info sev_info;
 };
 
 struct kvm_vm_stat {
@@ -936,6 +927,8 @@ struct kvm_x86_ops {
 	bool (*cpu_has_high_real_mode_segbase)(void);
 	void (*cpuid_update)(struct kvm_vcpu *vcpu);
 
+	struct kvm *(*vm_alloc)(void);
+	void (*vm_free)(struct kvm *);
 	int (*vm_init)(struct kvm *kvm);
 	void (*vm_destroy)(struct kvm *kvm);
 
@@ -1007,6 +1000,7 @@ struct kvm_x86_ops {
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
+	int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
 	int (*get_tdp_level)(struct kvm_vcpu *vcpu);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 	int (*get_lpage_level)(void);
@@ -1109,6 +1103,17 @@ struct kvm_arch_async_pf {
 
 extern struct kvm_x86_ops *kvm_x86_ops;
 
+#define __KVM_HAVE_ARCH_VM_ALLOC
+static inline struct kvm *kvm_arch_alloc_vm(void)
+{
+	return kvm_x86_ops->vm_alloc();
+}
+
+static inline void kvm_arch_free_vm(struct kvm *kvm)
+{
+	return kvm_x86_ops->vm_free(kvm);
+}
+
 int kvm_mmu_module_init(void);
 void kvm_mmu_module_exit(void);
 
@@ -1187,6 +1192,8 @@ enum emulation_result {
 #define EMULTYPE_SKIP		    (1 << 2)
 #define EMULTYPE_RETRY		    (1 << 3)
 #define EMULTYPE_NO_REEXECUTE	    (1 << 4)
+#define EMULTYPE_NO_UD_ON_FAIL	    (1 << 5)
+#define EMULTYPE_VMWARE		    (1 << 6)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
 
@@ -1204,8 +1211,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
 
 struct x86_emulate_ctxt;
 
-int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
-int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
 int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 7b407dda2bd7..3aea2658323a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -88,6 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 #ifdef CONFIG_KVM_GUEST
 bool kvm_para_available(void);
 unsigned int kvm_arch_para_features(void);
+unsigned int kvm_arch_para_hints(void);
 void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
@@ -115,6 +116,11 @@ static inline unsigned int kvm_arch_para_features(void)
 	return 0;
 }
 
+static inline unsigned int kvm_arch_para_hints(void)
+{
+	return 0;
+}
+
 static inline u32 kvm_read_and_reset_pf_reason(void)
 {
 	return 0;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index e73c4d0c06ad..b90e79610cf7 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,90 +6,23 @@
 #include <linux/atomic.h>
 #include <linux/nmi.h>
 #include <asm/io.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
 #include <asm/nospec-branch.h>
 
-/*
- * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
- * is set by CPUID(HVCPUID_VERSION_FEATURES).
- */
-enum hv_cpuid_function {
-	HVCPUID_VERSION_FEATURES		= 0x00000001,
-	HVCPUID_VENDOR_MAXFUNCTION		= 0x40000000,
-	HVCPUID_INTERFACE			= 0x40000001,
-
-	/*
-	 * The remaining functions depend on the value of
-	 * HVCPUID_INTERFACE
-	 */
-	HVCPUID_VERSION				= 0x40000002,
-	HVCPUID_FEATURES			= 0x40000003,
-	HVCPUID_ENLIGHTENMENT_INFO		= 0x40000004,
-	HVCPUID_IMPLEMENTATION_LIMITS		= 0x40000005,
-};
-
 struct ms_hyperv_info {
 	u32 features;
 	u32 misc_features;
 	u32 hints;
+	u32 nested_features;
 	u32 max_vp_index;
 	u32 max_lp_index;
 };
 
 extern struct ms_hyperv_info ms_hyperv;
 
-/*
- * Declare the MSR used to setup pages used to communicate with the hypervisor.
- */
-union hv_x64_msr_hypercall_contents {
-	u64 as_uint64;
-	struct {
-		u64 enable:1;
-		u64 reserved:11;
-		u64 guest_physical_address:52;
-	};
-};
 
 /*
- * TSC page layout.
- */
-
-struct ms_hyperv_tsc_page {
-	volatile u32 tsc_sequence;
-	u32 reserved1;
-	volatile u64 tsc_scale;
-	volatile s64 tsc_offset;
-	u64 reserved2[509];
-};
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0  - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID              0x8100
-
-/*
- * Generate the guest ID based on the guideline described above.
+ * Generate the guest ID.
  */
 
 static inline  __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
@@ -228,14 +161,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	return hv_status;
 }
 
-#define HV_HYPERCALL_RESULT_MASK	GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT		BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET	17
-#define HV_HYPERCALL_REP_COMP_OFFSET	32
-#define HV_HYPERCALL_REP_COMP_MASK	GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_REP_START_OFFSET	48
-#define HV_HYPERCALL_REP_START_MASK	GENMASK_ULL(59, 48)
-
 /* Fast hypercall with 8 bytes of input and no output */
 static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 {
@@ -307,6 +232,15 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
  */
 extern u32 *hv_vp_index;
 extern u32 hv_max_vp_index;
+extern struct hv_vp_assist_page **hv_vp_assist_page;
+
+static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
+{
+	if (!hv_vp_assist_page)
+		return NULL;
+
+	return hv_vp_assist_page[cpu];
+}
 
 /**
  * hv_cpu_number_to_vp_number() - Map CPU to VP.
@@ -343,6 +277,10 @@ static inline void hyperv_setup_mmu_ops(void) {}
 static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
 static inline void clear_hv_tscchange_cb(void) {}
 static inline void hyperv_stop_tsc_emulation(void) {};
+static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
+{
+	return NULL;
+}
 #endif /* CONFIG_HYPERV */
 
 #ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c9084dedfcfa..53d5b1b9255e 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -353,7 +353,21 @@
 
 /* Fam 15h MSRs */
 #define MSR_F15H_PERF_CTL		0xc0010200
+#define MSR_F15H_PERF_CTL0		MSR_F15H_PERF_CTL
+#define MSR_F15H_PERF_CTL1		(MSR_F15H_PERF_CTL + 2)
+#define MSR_F15H_PERF_CTL2		(MSR_F15H_PERF_CTL + 4)
+#define MSR_F15H_PERF_CTL3		(MSR_F15H_PERF_CTL + 6)
+#define MSR_F15H_PERF_CTL4		(MSR_F15H_PERF_CTL + 8)
+#define MSR_F15H_PERF_CTL5		(MSR_F15H_PERF_CTL + 10)
+
 #define MSR_F15H_PERF_CTR		0xc0010201
+#define MSR_F15H_PERF_CTR0		MSR_F15H_PERF_CTR
+#define MSR_F15H_PERF_CTR1		(MSR_F15H_PERF_CTR + 2)
+#define MSR_F15H_PERF_CTR2		(MSR_F15H_PERF_CTR + 4)
+#define MSR_F15H_PERF_CTR3		(MSR_F15H_PERF_CTR + 6)
+#define MSR_F15H_PERF_CTR4		(MSR_F15H_PERF_CTR + 8)
+#define MSR_F15H_PERF_CTR5		(MSR_F15H_PERF_CTR + 10)
+
 #define MSR_F15H_NB_PERF_CTL		0xc0010240
 #define MSR_F15H_NB_PERF_CTR		0xc0010241
 #define MSR_F15H_PTSC			0xc0010280
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 89d5c8886c85..5f49b4ff0c24 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -526,22 +526,39 @@ static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
 	return protval;
 }
 
+static inline pgprotval_t check_pgprot(pgprot_t pgprot)
+{
+	pgprotval_t massaged_val = massage_pgprot(pgprot);
+
+	/* mmdebug.h can not be included here because of dependencies */
+#ifdef CONFIG_DEBUG_VM
+	WARN_ONCE(pgprot_val(pgprot) != massaged_val,
+		  "attempted to set unsupported pgprot: %016llx "
+		  "bits: %016llx supported: %016llx\n",
+		  (u64)pgprot_val(pgprot),
+		  (u64)pgprot_val(pgprot) ^ massaged_val,
+		  (u64)__supported_pte_mask);
+#endif
+
+	return massaged_val;
+}
+
 static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
 {
 	return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+		     check_pgprot(pgprot));
 }
 
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
 	return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+		     check_pgprot(pgprot));
 }
 
 static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
 {
 	return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
-		     massage_pgprot(pgprot));
+		     check_pgprot(pgprot));
 }
 
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -553,7 +570,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 	 * the newprot (if present):
 	 */
 	val &= _PAGE_CHG_MASK;
-	val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
+	val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
 
 	return __pte(val);
 }
@@ -563,7 +580,7 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 	pmdval_t val = pmd_val(pmd);
 
 	val &= _HPAGE_CHG_MASK;
-	val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+	val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
 
 	return __pmd(val);
 }
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index acfe755562a6..1e5a40673953 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -196,19 +196,21 @@ enum page_cache_mode {
 #define __PAGE_KERNEL_NOENC	(__PAGE_KERNEL)
 #define __PAGE_KERNEL_NOENC_WP	(__PAGE_KERNEL_WP)
 
-#define PAGE_KERNEL		__pgprot(__PAGE_KERNEL | _PAGE_ENC)
-#define PAGE_KERNEL_NOENC	__pgprot(__PAGE_KERNEL)
-#define PAGE_KERNEL_RO		__pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC	__pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_EXEC_NOENC	__pgprot(__PAGE_KERNEL_EXEC)
-#define PAGE_KERNEL_RX		__pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
-#define PAGE_KERNEL_NOCACHE	__pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE	__pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
-#define PAGE_KERNEL_LARGE_EXEC	__pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VVAR	__pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
-
-#define PAGE_KERNEL_IO		__pgprot(__PAGE_KERNEL_IO)
-#define PAGE_KERNEL_IO_NOCACHE	__pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define default_pgprot(x)	__pgprot((x) & __default_kernel_pte_mask)
+
+#define PAGE_KERNEL		default_pgprot(__PAGE_KERNEL | _PAGE_ENC)
+#define PAGE_KERNEL_NOENC	default_pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO		default_pgprot(__PAGE_KERNEL_RO | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC	default_pgprot(__PAGE_KERNEL_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_EXEC_NOENC	default_pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX		default_pgprot(__PAGE_KERNEL_RX | _PAGE_ENC)
+#define PAGE_KERNEL_NOCACHE	default_pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE	default_pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
+#define PAGE_KERNEL_LARGE_EXEC	default_pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
+#define PAGE_KERNEL_VVAR	default_pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
+
+#define PAGE_KERNEL_IO		default_pgprot(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE	default_pgprot(__PAGE_KERNEL_IO_NOCACHE)
 
 #endif	/* __ASSEMBLY__ */
 
@@ -483,6 +485,7 @@ static inline pgprot_t pgprot_large_2_4k(pgprot_t pgprot)
 typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
+extern pteval_t __default_kernel_pte_mask;
 extern void set_nx(void);
 extern int nx_enabled;
 
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b0ccd4847a58..4fa4206029e3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -407,9 +407,19 @@ union irq_stack_union {
 DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
 DECLARE_INIT_PER_CPU(irq_stack_union);
 
+static inline unsigned long cpu_kernelmode_gs_base(int cpu)
+{
+	return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+}
+
 DECLARE_PER_CPU(char *, irq_stack_ptr);
 DECLARE_PER_CPU(unsigned int, irq_count);
 extern asmlinkage void ignore_sysret(void);
+
+#if IS_ENABLED(CONFIG_KVM)
+/* Save actual FS/GS selectors and bases to current->thread */
+void save_fsgs_for_kvm(void);
+#endif
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
 /*
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 0b5ef05b2d2d..38a17f1d5c9d 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -6,8 +6,10 @@
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 extern void pti_init(void);
 extern void pti_check_boottime_disable(void);
+extern void pti_clone_kernel_text(void);
 #else
 static inline void pti_check_boottime_disable(void) { }
+static inline void pti_clone_kernel_text(void) { }
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 0487ac054870..93b462e48067 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -60,7 +60,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u32 intercept_dr;
 	u32 intercept_exceptions;
 	u64 intercept;
-	u8 reserved_1[42];
+	u8 reserved_1[40];
+	u16 pause_filter_thresh;
 	u16 pause_filter_count;
 	u64 iopm_base_pa;
 	u64 msrpm_base_pa;
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 03eedc21246d..d653139857af 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -20,9 +20,13 @@
 #include <asm/thread_info.h>	/* for TS_COMPAT */
 #include <asm/unistd.h>
 
+#ifdef CONFIG_X86_64
+typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *);
+#else
 typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
 					  unsigned long, unsigned long,
 					  unsigned long, unsigned long);
+#endif /* CONFIG_X86_64 */
 extern const sys_call_ptr_t sys_call_table[];
 
 #if defined(CONFIG_X86_32)
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..e046a405743d
--- /dev/null
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - x86 specific wrappers to syscall definitions
+ */
+
+#ifndef _ASM_X86_SYSCALL_WRAPPER_H
+#define _ASM_X86_SYSCALL_WRAPPER_H
+
+/* Mapping of registers to parameters for syscalls on x86-64 and x32 */
+#define SC_X86_64_REGS_TO_ARGS(x, ...)					\
+	__MAP(x,__SC_ARGS						\
+		,,regs->di,,regs->si,,regs->dx				\
+		,,regs->r10,,regs->r8,,regs->r9)			\
+
+/* Mapping of registers to parameters for syscalls on i386 */
+#define SC_IA32_REGS_TO_ARGS(x, ...)					\
+	__MAP(x,__SC_ARGS						\
+	      ,,(unsigned int)regs->bx,,(unsigned int)regs->cx		\
+	      ,,(unsigned int)regs->dx,,(unsigned int)regs->si		\
+	      ,,(unsigned int)regs->di,,(unsigned int)regs->bp)
+
+#ifdef CONFIG_IA32_EMULATION
+/*
+ * For IA32 emulation, we need to handle "compat" syscalls *and* create
+ * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the
+ * ia32 regs in the proper order for shared or "common" syscalls. As some
+ * syscalls may not be implemented, we need to expand COND_SYSCALL in
+ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
+ * case as well.
+ */
+#define __IA32_COMPAT_SYS_STUBx(x, name, ...)				\
+	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
+	ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO);		\
+	asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
+	{								\
+		return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
+	}								\
+
+#define __IA32_SYS_STUBx(x, name, ...)					\
+	asmlinkage long __ia32_sys##name(const struct pt_regs *regs);	\
+	ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO);			\
+	asmlinkage long __ia32_sys##name(const struct pt_regs *regs)	\
+	{								\
+		return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
+	}
+
+/*
+ * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
+ * named __ia32_sys_*()
+ */
+#define SYSCALL_DEFINE0(sname)					\
+	SYSCALL_METADATA(_##sname, 0);				\
+	asmlinkage long __x64_sys_##sname(void);		\
+	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
+	SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname);	\
+	asmlinkage long __x64_sys_##sname(void)
+
+#define COND_SYSCALL(name)						\
+	cond_syscall(__x64_sys_##name);					\
+	cond_syscall(__ia32_sys_##name)
+
+#define SYS_NI(name)							\
+	SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);		\
+	SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers)
+
+#else /* CONFIG_IA32_EMULATION */
+#define __IA32_COMPAT_SYS_STUBx(x, name, ...)
+#define __IA32_SYS_STUBx(x, fullname, name, ...)
+#endif /* CONFIG_IA32_EMULATION */
+
+
+#ifdef CONFIG_X86_X32
+/*
+ * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware
+ * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
+ * with x86_64 obviously do not need such care.
+ */
+#define __X32_COMPAT_SYS_STUBx(x, name, ...)				\
+	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
+	ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO);		\
+	asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
+	{								\
+		return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
+	}								\
+
+#else /* CONFIG_X86_X32 */
+#define __X32_COMPAT_SYS_STUBx(x, name, ...)
+#endif /* CONFIG_X86_X32 */
+
+
+#ifdef CONFIG_COMPAT
+/*
+ * Compat means IA32_EMULATION and/or X86_X32. As they use a different
+ * mapping of registers to parameters, we need to generate stubs for each
+ * of them.
+ */
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)					\
+	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	__IA32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__)				\
+	__X32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__)				\
+	static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
+	{									\
+		return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\
+	}									\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+/*
+ * As some compat syscalls may not be implemented, we need to expand
+ * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in
+ * kernel/time/posix-stubs.c to cover this case as well.
+ */
+#define COND_SYSCALL_COMPAT(name) 					\
+	cond_syscall(__ia32_compat_sys_##name);				\
+	cond_syscall(__x32_compat_sys_##name)
+
+#define COMPAT_SYS_NI(name)						\
+	SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers);	\
+	SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers)
+
+#endif /* CONFIG_COMPAT */
+
+
+/*
+ * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes
+ * struct pt_regs *regs as the only argument of the syscall stub named
+ * __x64_sys_*(). It decodes just the registers it needs and passes them on to
+ * the __se_sys_*() wrapper performing sign extension and then to the
+ * __do_sys_*() function doing the actual job. These wrappers and functions
+ * are inlined (at least in very most cases), meaning that the assembly looks
+ * as follows (slightly re-ordered for better readability):
+ *
+ * <__x64_sys_recv>:		<-- syscall with 4 parameters
+ *	callq	<__fentry__>
+ *
+ *	mov	0x70(%rdi),%rdi	<-- decode regs->di
+ *	mov	0x68(%rdi),%rsi	<-- decode regs->si
+ *	mov	0x60(%rdi),%rdx	<-- decode regs->dx
+ *	mov	0x38(%rdi),%rcx	<-- decode regs->r10
+ *
+ *	xor	%r9d,%r9d	<-- clear %r9
+ *	xor	%r8d,%r8d	<-- clear %r8
+ *
+ *	callq	__sys_recvfrom	<-- do the actual work in __sys_recvfrom()
+ *				    which takes 6 arguments
+ *
+ *	cltq			<-- extend return value to 64-bit
+ *	retq			<-- return
+ *
+ * This approach avoids leaking random user-provided register content down
+ * the call chain.
+ *
+ * If IA32_EMULATION is enabled, this macro generates an additional wrapper
+ * named __ia32_sys_*() which decodes the struct pt_regs *regs according
+ * to the i386 calling convention (bx, cx, dx, si, di, bp).
+ */
+#define __SYSCALL_DEFINEx(x, name, ...)					\
+	asmlinkage long __x64_sys##name(const struct pt_regs *regs);	\
+	ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO);			\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	asmlinkage long __x64_sys##name(const struct pt_regs *regs)	\
+	{								\
+		return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
+	}								\
+	__IA32_SYS_STUBx(x, name, __VA_ARGS__)				\
+	static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
+	{								\
+		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
+		__MAP(x,__SC_TEST,__VA_ARGS__);				\
+		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
+		return ret;						\
+	}								\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+/*
+ * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for
+ * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not
+ * hurt, we only need to re-define it here to keep the naming congruent to
+ * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI()
+ * macros to work correctly.
+ */
+#ifndef SYSCALL_DEFINE0
+#define SYSCALL_DEFINE0(sname)					\
+	SYSCALL_METADATA(_##sname, 0);				\
+	asmlinkage long __x64_sys_##sname(void);		\
+	ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO);	\
+	asmlinkage long __x64_sys_##sname(void)
+#endif
+
+#ifndef COND_SYSCALL
+#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name)
+#endif
+
+#ifndef SYS_NI
+#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers);
+#endif
+
+
+/*
+ * For VSYSCALLS, we need to declare these three syscalls with the new
+ * pt_regs-based calling convention for in-kernel use.
+ */
+struct pt_regs;
+asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
+asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
+asmlinkage long __x64_sys_time(const struct pt_regs *regs);
+
+#endif /* _ASM_X86_SYSCALL_WRAPPER_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index ae6e05fdc24b..9fa979dd0d9d 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -18,6 +18,12 @@
 /* Common in X86_32 and X86_64 */
 /* kernel/ioport.c */
 long ksys_ioperm(unsigned long from, unsigned long num, int turn_on);
+
+#ifdef CONFIG_X86_32
+/*
+ * These definitions are only valid on pure 32-bit systems; x86-64 uses a
+ * different syscall calling convention
+ */
 asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 asmlinkage long sys_iopl(unsigned int);
 
@@ -32,7 +38,6 @@ asmlinkage long sys_set_thread_area(struct user_desc __user *);
 asmlinkage long sys_get_thread_area(struct user_desc __user *);
 
 /* X86_32 only */
-#ifdef CONFIG_X86_32
 
 /* kernel/signal.c */
 asmlinkage long sys_sigreturn(void);
@@ -42,15 +47,5 @@ struct vm86_struct;
 asmlinkage long sys_vm86old(struct vm86_struct __user *);
 asmlinkage long sys_vm86(unsigned long, unsigned long);
 
-#else /* CONFIG_X86_32 */
-
-/* X86_64 only */
-/* kernel/process_64.c */
-asmlinkage long sys_arch_prctl(int, unsigned long);
-
-/* kernel/sys_x86_64.c */
-asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
-			 unsigned long, unsigned long, unsigned long);
-
 #endif /* CONFIG_X86_32 */
 #endif /* _ASM_X86_SYSCALLS_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 84137c22fdfa..6690cd3fc8b1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -131,7 +131,12 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
 static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 {
 	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
-	VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+	/*
+	 * Use boot_cpu_has() instead of this_cpu_has() as this function
+	 * might be called during early boot. This should work even after
+	 * boot because all CPU's the have same capabilities:
+	 */
+	VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
 	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
 }
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 199e15bd3ec5..ce8b4da07e35 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -122,12 +122,14 @@ struct x86_init_pci {
  * @guest_late_init:		guest late init
  * @x2apic_available:		X2APIC detection
  * @init_mem_mapping:		setup early mappings during init_mem_mapping()
+ * @init_after_bootmem:		guest init after boot allocator is finished
  */
 struct x86_hyper_init {
 	void (*init_platform)(void);
 	void (*guest_late_init)(void);
 	bool (*x2apic_available)(void);
 	void (*init_mem_mapping)(void);
+	void (*init_after_bootmem)(void);
 };
 
 /**
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index aebf60357758..a06cbf019744 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -137,15 +137,15 @@ struct boot_e820_entry {
  * setup data structure.
  */
 struct jailhouse_setup_data {
-	u16	version;
-	u16	compatible_version;
-	u16	pm_timer_address;
-	u16	num_cpus;
-	u64	pci_mmconfig_base;
-	u32	tsc_khz;
-	u32	apic_khz;
-	u8	standard_ioapic;
-	u8	cpu_ids[255];
+	__u16	version;
+	__u16	compatible_version;
+	__u16	pm_timer_address;
+	__u16	num_cpus;
+	__u64	pci_mmconfig_base;
+	__u32	tsc_khz;
+	__u32	apic_khz;
+	__u8	standard_ioapic;
+	__u8	cpu_ids[255];
 } __attribute__((packed));
 
 /* The so-called "zeropage" */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index f3a960488eae..c535c2fdea13 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -354,8 +354,25 @@ struct kvm_xcrs {
 	__u64 padding[16];
 };
 
-/* definition of registers in kvm_run */
+#define KVM_SYNC_X86_REGS      (1UL << 0)
+#define KVM_SYNC_X86_SREGS     (1UL << 1)
+#define KVM_SYNC_X86_EVENTS    (1UL << 2)
+
+#define KVM_SYNC_X86_VALID_FIELDS \
+	(KVM_SYNC_X86_REGS| \
+	 KVM_SYNC_X86_SREGS| \
+	 KVM_SYNC_X86_EVENTS)
+
+/* kvm_sync_regs struct included by kvm_run struct */
 struct kvm_sync_regs {
+	/* Members of this structure are potentially malicious.
+	 * Care must be taken by code reading, esp. interpreting,
+	 * data fields from them inside KVM to prevent TOCTOU and
+	 * double-fetch types of vulnerabilities.
+	 */
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	struct kvm_vcpu_events events;
 };
 
 #define KVM_X86_QUIRK_LINT0_REENABLED	(1 << 0)
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6cfa9c8cb7d6..4c851ebb3ceb 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -3,15 +3,16 @@
 #define _UAPI_ASM_X86_KVM_PARA_H
 
 #include <linux/types.h>
-#include <asm/hyperv.h>
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
  */
 #define KVM_CPUID_SIGNATURE	0x40000000
 
-/* This CPUID returns a feature bitmap in eax.  Before enabling a particular
- * paravirtualization, the appropriate feature bit should be checked.
+/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
+ * a particular paravirtualization, the appropriate feature bit should
+ * be checked in eax. The performance hint feature bit should be checked
+ * in edx.
  */
 #define KVM_CPUID_FEATURES	0x40000001
 #define KVM_FEATURE_CLOCKSOURCE		0
@@ -28,6 +29,8 @@
 #define KVM_FEATURE_PV_TLB_FLUSH	9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
 
+#define KVM_HINTS_DEDICATED      0
+
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
  */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7a37d9357bc4..dde444f932c1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -200,7 +200,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 {
 	struct acpi_madt_local_x2apic *processor = NULL;
 #ifdef CONFIG_X86_X2APIC
-	int apic_id;
+	u32 apic_id;
 	u8 enabled;
 #endif
 
@@ -222,10 +222,13 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 	 * to not preallocating memory for all NR_CPUS
 	 * when we use CPU hotplug.
 	 */
-	if (!apic->apic_id_valid(apic_id) && enabled)
-		printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
-	else
-		acpi_register_lapic(apic_id, processor->uid, enabled);
+	if (!apic->apic_id_valid(apic_id)) {
+		if (enabled)
+			pr_warn(PREFIX "x2apic entry ignored\n");
+		return 0;
+	}
+
+	acpi_register_lapic(apic_id, processor->uid, enabled);
 #else
 	printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
 #endif
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
index a360801779ae..02b4839478b1 100644
--- a/arch/x86/kernel/apic/apic_common.c
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -40,7 +40,7 @@ int default_check_phys_apicid_present(int phys_apicid)
 	return physid_isset(phys_apicid, phys_cpu_present_map);
 }
 
-int default_apic_id_valid(int apicid)
+int default_apic_id_valid(u32 apicid)
 {
 	return (apicid < 255);
 }
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 134e04506ab4..78778b54f904 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -56,7 +56,7 @@ static u32 numachip2_set_apic_id(unsigned int id)
 	return id << 24;
 }
 
-static int numachip_apic_id_valid(int apicid)
+static int numachip_apic_id_valid(u32 apicid)
 {
 	/* Trust what bootloader passes in MADT */
 	return 1;
diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
index b107de381cb5..a49b3604027f 100644
--- a/arch/x86/kernel/apic/x2apic.h
+++ b/arch/x86/kernel/apic/x2apic.h
@@ -1,6 +1,6 @@
 /* Common bits for X2APIC cluster/physical modes. */
 
-int x2apic_apic_id_valid(int apicid);
+int x2apic_apic_id_valid(u32 apicid);
 int x2apic_apic_id_registered(void);
 void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
 unsigned int x2apic_get_apic_id(unsigned long id);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e2829bf40e4a..b5cf9e7b3830 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -101,7 +101,7 @@ static int x2apic_phys_probe(void)
 }
 
 /* Common x2apic functions, also used by x2apic_cluster */
-int x2apic_apic_id_valid(int apicid)
+int x2apic_apic_id_valid(u32 apicid)
 {
 	return 1;
 }
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f11910b44638..efaf2d4f9c3c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -557,7 +557,7 @@ static void uv_send_IPI_all(int vector)
 	uv_send_IPI_mask(cpu_online_mask, vector);
 }
 
-static int uv_apic_id_valid(int apicid)
+static int uv_apic_id_valid(u32 apicid)
 {
 	return 1;
 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 348cf4821240..8a5b185735e1 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -487,7 +487,7 @@ void load_percpu_segment(int cpu)
 	loadsegment(fs, __KERNEL_PERCPU);
 #else
 	__loadsegment_simple(gs, 0);
-	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+	wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
 #endif
 	load_stack_canary_segment();
 }
@@ -848,18 +848,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 		c->x86_power = edx;
 	}
 
-	if (c->extended_cpuid_level >= 0x80000008) {
-		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
-
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-		c->x86_capability[CPUID_8000_0008_EBX] = ebx;
-	}
-#ifdef CONFIG_X86_32
-	else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
-		c->x86_phys_bits = 36;
-#endif
-
 	if (c->extended_cpuid_level >= 0x8000000a)
 		c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
 
@@ -874,6 +862,23 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
 	apply_forced_caps(c);
 }
 
+static void get_cpu_address_sizes(struct cpuinfo_x86 *c)
+{
+	u32 eax, ebx, ecx, edx;
+
+	if (c->extended_cpuid_level >= 0x80000008) {
+		cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+		c->x86_capability[CPUID_8000_0008_EBX] = ebx;
+	}
+#ifdef CONFIG_X86_32
+	else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
+		c->x86_phys_bits = 36;
+#endif
+}
+
 static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_X86_32
@@ -965,6 +970,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 		cpu_detect(c);
 		get_cpu_vendor(c);
 		get_cpu_cap(c);
+		get_cpu_address_sizes(c);
 		setup_force_cpu_cap(X86_FEATURE_CPUID);
 
 		if (this_cpu->c_early_init)
@@ -1097,6 +1103,8 @@ static void generic_identify(struct cpuinfo_x86 *c)
 
 	get_cpu_cap(c);
 
+	get_cpu_address_sizes(c);
+
 	if (c->cpuid_level >= 0x00000001) {
 		c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
 #ifdef CONFIG_X86_32
@@ -1398,6 +1406,7 @@ __setup("clearcpuid=", setup_clearcpuid);
 #ifdef CONFIG_X86_64
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
 
 /*
  * The following percpu variables are hot.  Align current_task to
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index 904b0a3c4e53..2c0bd38a44ab 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -19,7 +19,7 @@ struct cpuid_dep {
  * called from cpu hotplug. It shouldn't do anything in this case,
  * but it's difficult to tell that to the init reference checker.
  */
-const static struct cpuid_dep cpuid_deps[] = {
+static const struct cpuid_dep cpuid_deps[] = {
 	{ X86_FEATURE_XSAVEOPT,		X86_FEATURE_XSAVE     },
 	{ X86_FEATURE_XSAVEC,		X86_FEATURE_XSAVE     },
 	{ X86_FEATURE_XSAVES,		X86_FEATURE_XSAVE     },
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4488cf0dd499..031082c96db8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -22,7 +22,7 @@
 #include <linux/kexec.h>
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
 #include <asm/desc.h>
 #include <asm/irq_regs.h>
@@ -216,8 +216,8 @@ static void __init ms_hyperv_init_platform(void)
 	pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
 		ms_hyperv.features, ms_hyperv.hints);
 
-	ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS);
-	ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS);
+	ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
+	ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
 
 	pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
 		 ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
@@ -225,11 +225,12 @@ static void __init ms_hyperv_init_platform(void)
 	/*
 	 * Extract host information.
 	 */
-	if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) {
-		hv_host_info_eax = cpuid_eax(HVCPUID_VERSION);
-		hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION);
-		hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION);
-		hv_host_info_edx = cpuid_edx(HVCPUID_VERSION);
+	if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >=
+	    HYPERV_CPUID_VERSION) {
+		hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
+		hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
+		hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
+		hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
 
 		pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
 			hv_host_info_eax, hv_host_info_ebx >> 16,
@@ -243,6 +244,11 @@ static void __init ms_hyperv_init_platform(void)
 		x86_platform.calibrate_cpu = hv_get_tsc_khz;
 	}
 
+	if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
+		ms_hyperv.nested_features =
+			cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
+	}
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
 	    ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 1f6680427ff0..f631a3f15587 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -38,37 +38,6 @@
 #include <asm/virtext.h>
 #include <asm/intel_pt.h>
 
-/* Alignment required for elf header segment */
-#define ELF_CORE_HEADER_ALIGN   4096
-
-/* This primarily represents number of split ranges due to exclusion */
-#define CRASH_MAX_RANGES	16
-
-struct crash_mem_range {
-	u64 start, end;
-};
-
-struct crash_mem {
-	unsigned int nr_ranges;
-	struct crash_mem_range ranges[CRASH_MAX_RANGES];
-};
-
-/* Misc data about ram ranges needed to prepare elf headers */
-struct crash_elf_data {
-	struct kimage *image;
-	/*
-	 * Total number of ram ranges we have after various adjustments for
-	 * crash reserved region, etc.
-	 */
-	unsigned int max_nr_ranges;
-
-	/* Pointer to elf header */
-	void *ehdr;
-	/* Pointer to next phdr */
-	void *bufp;
-	struct crash_mem mem;
-};
-
 /* Used while preparing memory map entries for second kernel */
 struct crash_memmap_data {
 	struct boot_params *params;
@@ -218,124 +187,49 @@ static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 	return 0;
 }
 
-
 /* Gather all the required information to prepare elf headers for ram regions */
-static void fill_up_crash_elf_data(struct crash_elf_data *ced,
-				   struct kimage *image)
+static struct crash_mem *fill_up_crash_elf_data(void)
 {
 	unsigned int nr_ranges = 0;
-
-	ced->image = image;
+	struct crash_mem *cmem;
 
 	walk_system_ram_res(0, -1, &nr_ranges,
 				get_nr_ram_ranges_callback);
+	if (!nr_ranges)
+		return NULL;
 
-	ced->max_nr_ranges = nr_ranges;
-
-	/* Exclusion of crash region could split memory ranges */
-	ced->max_nr_ranges++;
-
-	/* If crashk_low_res is not 0, another range split possible */
-	if (crashk_low_res.end)
-		ced->max_nr_ranges++;
-}
-
-static int exclude_mem_range(struct crash_mem *mem,
-		unsigned long long mstart, unsigned long long mend)
-{
-	int i, j;
-	unsigned long long start, end;
-	struct crash_mem_range temp_range = {0, 0};
-
-	for (i = 0; i < mem->nr_ranges; i++) {
-		start = mem->ranges[i].start;
-		end = mem->ranges[i].end;
-
-		if (mstart > end || mend < start)
-			continue;
-
-		/* Truncate any area outside of range */
-		if (mstart < start)
-			mstart = start;
-		if (mend > end)
-			mend = end;
-
-		/* Found completely overlapping range */
-		if (mstart == start && mend == end) {
-			mem->ranges[i].start = 0;
-			mem->ranges[i].end = 0;
-			if (i < mem->nr_ranges - 1) {
-				/* Shift rest of the ranges to left */
-				for (j = i; j < mem->nr_ranges - 1; j++) {
-					mem->ranges[j].start =
-						mem->ranges[j+1].start;
-					mem->ranges[j].end =
-							mem->ranges[j+1].end;
-				}
-			}
-			mem->nr_ranges--;
-			return 0;
-		}
-
-		if (mstart > start && mend < end) {
-			/* Split original range */
-			mem->ranges[i].end = mstart - 1;
-			temp_range.start = mend + 1;
-			temp_range.end = end;
-		} else if (mstart != start)
-			mem->ranges[i].end = mstart - 1;
-		else
-			mem->ranges[i].start = mend + 1;
-		break;
-	}
+	/*
+	 * Exclusion of crash region and/or crashk_low_res may cause
+	 * another range split. So add extra two slots here.
+	 */
+	nr_ranges += 2;
+	cmem = vzalloc(sizeof(struct crash_mem) +
+			sizeof(struct crash_mem_range) * nr_ranges);
+	if (!cmem)
+		return NULL;
 
-	/* If a split happend, add the split to array */
-	if (!temp_range.end)
-		return 0;
+	cmem->max_nr_ranges = nr_ranges;
+	cmem->nr_ranges = 0;
 
-	/* Split happened */
-	if (i == CRASH_MAX_RANGES - 1) {
-		pr_err("Too many crash ranges after split\n");
-		return -ENOMEM;
-	}
-
-	/* Location where new range should go */
-	j = i + 1;
-	if (j < mem->nr_ranges) {
-		/* Move over all ranges one slot towards the end */
-		for (i = mem->nr_ranges - 1; i >= j; i--)
-			mem->ranges[i + 1] = mem->ranges[i];
-	}
-
-	mem->ranges[j].start = temp_range.start;
-	mem->ranges[j].end = temp_range.end;
-	mem->nr_ranges++;
-	return 0;
+	return cmem;
 }
 
 /*
  * Look for any unwanted ranges between mstart, mend and remove them. This
- * might lead to split and split ranges are put in ced->mem.ranges[] array
+ * might lead to split and split ranges are put in cmem->ranges[] array
  */
-static int elf_header_exclude_ranges(struct crash_elf_data *ced,
-		unsigned long long mstart, unsigned long long mend)
+static int elf_header_exclude_ranges(struct crash_mem *cmem)
 {
-	struct crash_mem *cmem = &ced->mem;
 	int ret = 0;
 
-	memset(cmem->ranges, 0, sizeof(cmem->ranges));
-
-	cmem->ranges[0].start = mstart;
-	cmem->ranges[0].end = mend;
-	cmem->nr_ranges = 1;
-
 	/* Exclude crashkernel region */
-	ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+	ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
 	if (ret)
 		return ret;
 
 	if (crashk_low_res.end) {
-		ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+		ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
+							crashk_low_res.end);
 		if (ret)
 			return ret;
 	}
@@ -345,144 +239,12 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
 
 static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 {
-	struct crash_elf_data *ced = arg;
-	Elf64_Ehdr *ehdr;
-	Elf64_Phdr *phdr;
-	unsigned long mstart, mend;
-	struct kimage *image = ced->image;
-	struct crash_mem *cmem;
-	int ret, i;
-
-	ehdr = ced->ehdr;
-
-	/* Exclude unwanted mem ranges */
-	ret = elf_header_exclude_ranges(ced, res->start, res->end);
-	if (ret)
-		return ret;
-
-	/* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
-	cmem = &ced->mem;
-
-	for (i = 0; i < cmem->nr_ranges; i++) {
-		mstart = cmem->ranges[i].start;
-		mend = cmem->ranges[i].end;
-
-		phdr = ced->bufp;
-		ced->bufp += sizeof(Elf64_Phdr);
-
-		phdr->p_type = PT_LOAD;
-		phdr->p_flags = PF_R|PF_W|PF_X;
-		phdr->p_offset  = mstart;
-
-		/*
-		 * If a range matches backup region, adjust offset to backup
-		 * segment.
-		 */
-		if (mstart == image->arch.backup_src_start &&
-		    (mend - mstart + 1) == image->arch.backup_src_sz)
-			phdr->p_offset = image->arch.backup_load_addr;
-
-		phdr->p_paddr = mstart;
-		phdr->p_vaddr = (unsigned long long) __va(mstart);
-		phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
-		phdr->p_align = 0;
-		ehdr->e_phnum++;
-		pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
-			phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
-			ehdr->e_phnum, phdr->p_offset);
-	}
-
-	return ret;
-}
-
-static int prepare_elf64_headers(struct crash_elf_data *ced,
-		void **addr, unsigned long *sz)
-{
-	Elf64_Ehdr *ehdr;
-	Elf64_Phdr *phdr;
-	unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
-	unsigned char *buf, *bufp;
-	unsigned int cpu;
-	unsigned long long notes_addr;
-	int ret;
+	struct crash_mem *cmem = arg;
 
-	/* extra phdr for vmcoreinfo elf note */
-	nr_phdr = nr_cpus + 1;
-	nr_phdr += ced->max_nr_ranges;
-
-	/*
-	 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
-	 * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
-	 * I think this is required by tools like gdb. So same physical
-	 * memory will be mapped in two elf headers. One will contain kernel
-	 * text virtual addresses and other will have __va(physical) addresses.
-	 */
+	cmem->ranges[cmem->nr_ranges].start = res->start;
+	cmem->ranges[cmem->nr_ranges].end = res->end;
+	cmem->nr_ranges++;
 
-	nr_phdr++;
-	elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
-	elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
-
-	buf = vzalloc(elf_sz);
-	if (!buf)
-		return -ENOMEM;
-
-	bufp = buf;
-	ehdr = (Elf64_Ehdr *)bufp;
-	bufp += sizeof(Elf64_Ehdr);
-	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
-	ehdr->e_ident[EI_CLASS] = ELFCLASS64;
-	ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
-	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
-	ehdr->e_ident[EI_OSABI] = ELF_OSABI;
-	memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
-	ehdr->e_type = ET_CORE;
-	ehdr->e_machine = ELF_ARCH;
-	ehdr->e_version = EV_CURRENT;
-	ehdr->e_phoff = sizeof(Elf64_Ehdr);
-	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
-	ehdr->e_phentsize = sizeof(Elf64_Phdr);
-
-	/* Prepare one phdr of type PT_NOTE for each present cpu */
-	for_each_present_cpu(cpu) {
-		phdr = (Elf64_Phdr *)bufp;
-		bufp += sizeof(Elf64_Phdr);
-		phdr->p_type = PT_NOTE;
-		notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
-		phdr->p_offset = phdr->p_paddr = notes_addr;
-		phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
-		(ehdr->e_phnum)++;
-	}
-
-	/* Prepare one PT_NOTE header for vmcoreinfo */
-	phdr = (Elf64_Phdr *)bufp;
-	bufp += sizeof(Elf64_Phdr);
-	phdr->p_type = PT_NOTE;
-	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
-	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
-	(ehdr->e_phnum)++;
-
-#ifdef CONFIG_X86_64
-	/* Prepare PT_LOAD type program header for kernel text region */
-	phdr = (Elf64_Phdr *)bufp;
-	bufp += sizeof(Elf64_Phdr);
-	phdr->p_type = PT_LOAD;
-	phdr->p_flags = PF_R|PF_W|PF_X;
-	phdr->p_vaddr = (Elf64_Addr)_text;
-	phdr->p_filesz = phdr->p_memsz = _end - _text;
-	phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
-	(ehdr->e_phnum)++;
-#endif
-
-	/* Prepare PT_LOAD headers for system ram chunks. */
-	ced->ehdr = ehdr;
-	ced->bufp = bufp;
-	ret = walk_system_ram_res(0, -1, ced,
-			prepare_elf64_ram_headers_callback);
-	if (ret < 0)
-		return ret;
-
-	*addr = buf;
-	*sz = elf_sz;
 	return 0;
 }
 
@@ -490,18 +252,46 @@ static int prepare_elf64_headers(struct crash_elf_data *ced,
 static int prepare_elf_headers(struct kimage *image, void **addr,
 					unsigned long *sz)
 {
-	struct crash_elf_data *ced;
-	int ret;
+	struct crash_mem *cmem;
+	Elf64_Ehdr *ehdr;
+	Elf64_Phdr *phdr;
+	int ret, i;
 
-	ced = kzalloc(sizeof(*ced), GFP_KERNEL);
-	if (!ced)
+	cmem = fill_up_crash_elf_data();
+	if (!cmem)
 		return -ENOMEM;
 
-	fill_up_crash_elf_data(ced, image);
+	ret = walk_system_ram_res(0, -1, cmem,
+				prepare_elf64_ram_headers_callback);
+	if (ret)
+		goto out;
+
+	/* Exclude unwanted mem ranges */
+	ret = elf_header_exclude_ranges(cmem);
+	if (ret)
+		goto out;
 
 	/* By default prepare 64bit headers */
-	ret =  prepare_elf64_headers(ced, addr, sz);
-	kfree(ced);
+	ret =  crash_prepare_elf64_headers(cmem,
+				IS_ENABLED(CONFIG_X86_64), addr, sz);
+	if (ret)
+		goto out;
+
+	/*
+	 * If a range matches backup region, adjust offset to backup
+	 * segment.
+	 */
+	ehdr = (Elf64_Ehdr *)*addr;
+	phdr = (Elf64_Phdr *)(ehdr + 1);
+	for (i = 0; i < ehdr->e_phnum; phdr++, i++)
+		if (phdr->p_type == PT_LOAD &&
+				phdr->p_paddr == image->arch.backup_src_start &&
+				phdr->p_memsz == image->arch.backup_src_sz) {
+			phdr->p_offset = image->arch.backup_load_addr;
+			break;
+		}
+out:
+	vfree(cmem);
 	return ret;
 }
 
@@ -547,14 +337,14 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
 	/* Exclude Backup region */
 	start = image->arch.backup_load_addr;
 	end = start + image->arch.backup_src_sz - 1;
-	ret = exclude_mem_range(cmem, start, end);
+	ret = crash_exclude_mem_range(cmem, start, end);
 	if (ret)
 		return ret;
 
 	/* Exclude elf header region */
 	start = image->arch.elf_load_addr;
 	end = start + image->arch.elf_headers_sz - 1;
-	return exclude_mem_range(cmem, start, end);
+	return crash_exclude_mem_range(cmem, start, end);
 }
 
 /* Prepare memory map for crash dump kernel */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index e5ec3cafa72e..aebd0d5bc086 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -195,6 +195,10 @@ void init_espfix_ap(int cpu)
 
 	pte_p = pte_offset_kernel(&pmd, addr);
 	stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0));
+	/*
+	 * __PAGE_KERNEL_* includes _PAGE_GLOBAL, which we want since
+	 * this is mapped to userspace.
+	 */
 	pte = __pte(__pa(stack_page) | ((__PAGE_KERNEL_RO | _PAGE_ENC) & ptemask));
 	for (n = 0; n < ESPFIX_PTE_CLONES; n++)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 0c855deee165..0c408f8c4ed4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -195,6 +195,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	pud[i + 1] = (pudval_t)pmd + pgtable_flags;
 
 	pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
+	/* Filter out unsupported __PAGE_KERNEL_* bits: */
+	pmd_entry &= __supported_pte_mask;
 	pmd_entry += sme_get_me_mask();
 	pmd_entry +=  physaddr;
 
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 48385c1074a5..8344dd2f310a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -399,8 +399,13 @@ NEXT_PAGE(level3_ident_pgt)
 	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.fill	511, 8, 0
 NEXT_PAGE(level2_ident_pgt)
-	/* Since I easily can, map the first 1G.
+	/*
+	 * Since I easily can, map the first 1G.
 	 * Don't set NX because code runs from these pages.
+	 *
+	 * Note: This sets _PAGE_GLOBAL despite whether
+	 * the CPU supports it or it is enabled.  But,
+	 * the CPU should ignore the bit.
 	 */
 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
 #else
@@ -431,6 +436,10 @@ NEXT_PAGE(level2_kernel_pgt)
 	 * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
 	 *  If you want to increase this then increase MODULES_VADDR
 	 *  too.)
+	 *
+	 *  This table is eventually used by the kernel during normal
+	 *  runtime.  Care must be taken to clear out undesired bits
+	 *  later, like _PAGE_RW or _PAGE_GLOBAL in some cases.
 	 */
 	PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
 		KERNEL_IMAGE_SIZE/PMD_SIZE)
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index fb095ba0c02f..3182908b7e6c 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -334,7 +334,6 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 	unsigned long setup_header_size, params_cmdline_sz;
 	struct boot_params *params;
 	unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
-	unsigned long purgatory_load_addr;
 	struct bzimage64_data *ldata;
 	struct kexec_entry64_regs regs64;
 	void *stack;
@@ -342,6 +341,8 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 	unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
 	struct kexec_buf kbuf = { .image = image, .buf_max = ULONG_MAX,
 				  .top_down = true };
+	struct kexec_buf pbuf = { .image = image, .buf_min = MIN_PURGATORY_ADDR,
+				  .buf_max = ULONG_MAX, .top_down = true };
 
 	header = (struct setup_header *)(kernel + setup_hdr_offset);
 	setup_sects = header->setup_sects;
@@ -379,14 +380,13 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 	 * Load purgatory. For 64bit entry point, purgatory  code can be
 	 * anywhere.
 	 */
-	ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
-				   &purgatory_load_addr);
+	ret = kexec_load_purgatory(image, &pbuf);
 	if (ret) {
 		pr_err("Loading purgatory failed\n");
 		return ERR_PTR(ret);
 	}
 
-	pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+	pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
 
 	/*
@@ -538,7 +538,7 @@ static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
 }
 #endif
 
-struct kexec_file_ops kexec_bzImage64_ops = {
+const struct kexec_file_ops kexec_bzImage64_ops = {
 	.probe = bzImage64_probe,
 	.load = bzImage64_load,
 	.cleanup = bzImage64_cleanup,
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index fae86e36e399..7867417cfaff 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -454,6 +454,13 @@ static void __init sev_map_percpu_data(void)
 }
 
 #ifdef CONFIG_SMP
+static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
+{
+	native_smp_prepare_cpus(max_cpus);
+	if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+		static_branch_disable(&virt_spin_lock_key);
+}
+
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
 	/*
@@ -546,6 +553,7 @@ static void __init kvm_guest_init(void)
 	}
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+	    !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
 	    kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
 		pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
 
@@ -556,6 +564,7 @@ static void __init kvm_guest_init(void)
 		kvm_setup_vsyscall_timeinfo();
 
 #ifdef CONFIG_SMP
+	smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
 	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
 	if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
 				      kvm_cpu_online, kvm_cpu_down_prepare) < 0)
@@ -605,6 +614,11 @@ unsigned int kvm_arch_para_features(void)
 	return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
 }
 
+unsigned int kvm_arch_para_hints(void)
+{
+	return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
+}
+
 static uint32_t __init kvm_detect(void)
 {
 	return kvm_cpuid_base();
@@ -635,6 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void)
 	int cpu;
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+	    !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
 	    kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
 		for_each_possible_cpu(cpu) {
 			zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
@@ -730,6 +745,9 @@ void __init kvm_spinlock_init(void)
 	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
 		return;
 
+	if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+		return;
+
 	__pv_init_lock_hash();
 	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
 	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 26d713ecad34..d41d896481b8 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -145,6 +145,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 		unsigned long offset = i << PAGE_SHIFT;
 		const void *src = (char *)ldt->entries + offset;
 		unsigned long pfn;
+		pgprot_t pte_prot;
 		pte_t pte, *ptep;
 
 		va = (unsigned long)ldt_slot_va(slot) + offset;
@@ -163,7 +164,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 		 * target via some kernel interface which misses a
 		 * permission check.
 		 */
-		pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+		pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
+		/* Filter out unsuppored __PAGE_KERNEL* bits: */
+		pgprot_val(pte_prot) |= __supported_pte_mask;
+		pte = pfn_pte(pfn, pte_prot);
 		set_pte_at(mm, va, ptep, pte);
 		pte_unmap_unlock(ptep, ptl);
 	}
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 93bd4fb603d1..a5e55d832d0a 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -30,8 +30,9 @@
 #include <asm/set_memory.h>
 
 #ifdef CONFIG_KEXEC_FILE
-static struct kexec_file_ops *kexec_file_loaders[] = {
+const struct kexec_file_ops * const kexec_file_loaders[] = {
 		&kexec_bzImage64_ops,
+		NULL
 };
 #endif
 
@@ -364,27 +365,6 @@ void arch_crash_save_vmcoreinfo(void)
 /* arch-dependent functionality related to kexec file-based syscall */
 
 #ifdef CONFIG_KEXEC_FILE
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-				  unsigned long buf_len)
-{
-	int i, ret = -ENOEXEC;
-	struct kexec_file_ops *fops;
-
-	for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
-		fops = kexec_file_loaders[i];
-		if (!fops || !fops->probe)
-			continue;
-
-		ret = fops->probe(buf, buf_len);
-		if (!ret) {
-			image->fops = fops;
-			return ret;
-		}
-	}
-
-	return ret;
-}
-
 void *arch_kexec_kernel_image_load(struct kimage *image)
 {
 	vfree(image->arch.elf_headers);
@@ -399,88 +379,53 @@ void *arch_kexec_kernel_image_load(struct kimage *image)
 				 image->cmdline_buf_len);
 }
 
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
-	if (!image->fops || !image->fops->cleanup)
-		return 0;
-
-	return image->fops->cleanup(image->image_loader_data);
-}
-
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
-				 unsigned long kernel_len)
-{
-	if (!image->fops || !image->fops->verify_sig) {
-		pr_debug("kernel loader does not support signature verification.");
-		return -EKEYREJECTED;
-	}
-
-	return image->fops->verify_sig(kernel, kernel_len);
-}
-#endif
-
 /*
  * Apply purgatory relocations.
  *
- * ehdr: Pointer to elf headers
- * sechdrs: Pointer to section headers.
- * relsec: section index of SHT_RELA section.
+ * @pi:		Purgatory to be relocated.
+ * @section:	Section relocations applying to.
+ * @relsec:	Section containing RELAs.
+ * @symtabsec:	Corresponding symtab.
  *
  * TODO: Some of the code belongs to generic code. Move that in kexec.c.
  */
-int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
-				     Elf64_Shdr *sechdrs, unsigned int relsec)
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+				     Elf_Shdr *section, const Elf_Shdr *relsec,
+				     const Elf_Shdr *symtabsec)
 {
 	unsigned int i;
 	Elf64_Rela *rel;
 	Elf64_Sym *sym;
 	void *location;
-	Elf64_Shdr *section, *symtabsec;
 	unsigned long address, sec_base, value;
 	const char *strtab, *name, *shstrtab;
+	const Elf_Shdr *sechdrs;
 
-	/*
-	 * ->sh_offset has been modified to keep the pointer to section
-	 * contents in memory
-	 */
-	rel = (void *)sechdrs[relsec].sh_offset;
-
-	/* Section to which relocations apply */
-	section = &sechdrs[sechdrs[relsec].sh_info];
-
-	pr_debug("Applying relocate section %u to %u\n", relsec,
-		 sechdrs[relsec].sh_info);
-
-	/* Associated symbol table */
-	symtabsec = &sechdrs[sechdrs[relsec].sh_link];
-
-	/* String table */
-	if (symtabsec->sh_link >= ehdr->e_shnum) {
-		/* Invalid strtab section number */
-		pr_err("Invalid string table section index %d\n",
-		       symtabsec->sh_link);
-		return -ENOEXEC;
-	}
+	/* String & section header string table */
+	sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+	strtab = (char *)pi->ehdr + sechdrs[symtabsec->sh_link].sh_offset;
+	shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
 
-	strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+	rel = (void *)pi->ehdr + relsec->sh_offset;
 
-	/* section header string table */
-	shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+	pr_debug("Applying relocate section %s to %u\n",
+		 shstrtab + relsec->sh_name, relsec->sh_info);
 
-	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+	for (i = 0; i < relsec->sh_size / sizeof(*rel); i++) {
 
 		/*
 		 * rel[i].r_offset contains byte offset from beginning
 		 * of section to the storage unit affected.
 		 *
-		 * This is location to update (->sh_offset). This is temporary
-		 * buffer where section is currently loaded. This will finally
-		 * be loaded to a different address later, pointed to by
+		 * This is location to update. This is temporary buffer
+		 * where section is currently loaded. This will finally be
+		 * loaded to a different address later, pointed to by
 		 * ->sh_addr. kexec takes care of moving it
 		 *  (kexec_load_segment()).
 		 */
-		location = (void *)(section->sh_offset + rel[i].r_offset);
+		location = pi->purgatory_buf;
+		location += section->sh_offset;
+		location += rel[i].r_offset;
 
 		/* Final address of the location */
 		address = section->sh_addr + rel[i].r_offset;
@@ -491,8 +436,8 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
 		 * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
 		 * these respectively.
 		 */
-		sym = (Elf64_Sym *)symtabsec->sh_offset +
-				ELF64_R_SYM(rel[i].r_info);
+		sym = (void *)pi->ehdr + symtabsec->sh_offset;
+		sym += ELF64_R_SYM(rel[i].r_info);
 
 		if (sym->st_name)
 			name = strtab + sym->st_name;
@@ -515,12 +460,12 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
 
 		if (sym->st_shndx == SHN_ABS)
 			sec_base = 0;
-		else if (sym->st_shndx >= ehdr->e_shnum) {
+		else if (sym->st_shndx >= pi->ehdr->e_shnum) {
 			pr_err("Invalid section %d for symbol %s\n",
 			       sym->st_shndx, name);
 			return -ENOEXEC;
 		} else
-			sec_base = sechdrs[sym->st_shndx].sh_addr;
+			sec_base = pi->sechdrs[sym->st_shndx].sh_addr;
 
 		value = sym->st_value;
 		value += sec_base;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9eb448c7859d..4b100fe0f508 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -205,6 +205,20 @@ static __always_inline void save_fsgs(struct task_struct *task)
 	save_base_legacy(task, task->thread.gsindex, GS);
 }
 
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * While a process is running,current->thread.fsbase and current->thread.gsbase
+ * may not match the corresponding CPU registers (see save_base_legacy()). KVM
+ * wants an efficient way to save and restore FSBASE and GSBASE.
+ * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE.
+ */
+void save_fsgs_for_kvm(void)
+{
+	save_fsgs(current);
+}
+EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
+#endif
+
 static __always_inline void loadseg(enum which_selector which,
 				    unsigned short sel)
 {
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index df92605d8724..14c057f29979 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -26,7 +26,7 @@ static inline void signal_compat_build_tests(void)
 	 * new fields are handled in copy_siginfo_to_user32()!
 	 */
 	BUILD_BUG_ON(NSIGILL  != 11);
-	BUILD_BUG_ON(NSIGFPE  != 14);
+	BUILD_BUG_ON(NSIGFPE  != 15);
 	BUILD_BUG_ON(NSIGSEGV != 7);
 	BUILD_BUG_ON(NSIGBUS  != 5);
 	BUILD_BUG_ON(NSIGTRAP != 4);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ebda84a91510..3ab867603e81 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
 		.guest_late_init	= x86_init_noop,
 		.x2apic_available	= bool_x86_init_noop,
 		.init_mem_mapping	= x86_init_noop,
+		.init_after_bootmem	= x86_init_noop,
 	},
 
 	.acpi = {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b671fc2d0422..82055b90a8b3 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 			return -EINVAL;
 	}
 
+	best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
+		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
+		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+
 	/* Update physical-address width */
 	vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
 	kvm_mmu_reset_context(vcpu);
@@ -370,7 +375,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
 		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
 		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
-		F(TOPOEXT);
+		F(TOPOEXT) | F(PERFCTR_CORE);
 
 	/* cpuid 0x80000008.ebx */
 	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d91eaeb01034..b3705ae52824 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,6 +30,7 @@
 #include "x86.h"
 #include "tss.h"
 #include "mmu.h"
+#include "pmu.h"
 
 /*
  * Operand types
@@ -2887,6 +2888,9 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
 	return ctxt->ops->cpl(ctxt) > iopl;
 }
 
+#define VMWARE_PORT_VMPORT	(0x5658)
+#define VMWARE_PORT_VMRPC	(0x5659)
+
 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 					    u16 port, u16 len)
 {
@@ -2898,6 +2902,14 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 	unsigned mask = (1 << len) - 1;
 	unsigned long base;
 
+	/*
+	 * VMware allows access to these ports even if denied
+	 * by TSS I/O permission bitmap. Mimic behavior.
+	 */
+	if (enable_vmware_backdoor &&
+	    ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
+		return true;
+
 	ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
 	if (!tr_seg.p)
 		return false;
@@ -4282,6 +4294,13 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
 	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
 	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
 
+	/*
+	 * VMware allows access to these Pseduo-PMCs even when read via RDPMC
+	 * in Ring3 when CR4.PCE=0.
+	 */
+	if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
+		return X86EMUL_CONTINUE;
+
 	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
 	    ctxt->ops->check_pmc(ctxt, rcx))
 		return emulate_gp(ctxt, 0);
@@ -4498,6 +4517,10 @@ static const struct gprefix pfx_0f_2b = {
 	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
 };
 
+static const struct gprefix pfx_0f_10_0f_11 = {
+	I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
+};
+
 static const struct gprefix pfx_0f_28_0f_29 = {
 	I(Aligned, em_mov), I(Aligned, em_mov), N, N,
 };
@@ -4709,7 +4732,9 @@ static const struct opcode twobyte_table[256] = {
 	DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
 	N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
 	/* 0x10 - 0x1F */
-	N, N, N, N, N, N, N, N,
+	GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
+	GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
+	N, N, N, N, N, N,
 	D(ImplicitOps | ModRM | SrcMem | NoAccess),
 	N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
 	/* 0x20 - 0x2F */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index dc97f2544b6f..98618e397342 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -29,6 +29,7 @@
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
+#include <linux/eventfd.h>
 
 #include <asm/apicdef.h>
 #include <trace/events/kvm.h>
@@ -74,13 +75,38 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
 	return false;
 }
 
+static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
+				int vector)
+{
+	if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
+		return;
+
+	if (synic_has_vector_connected(synic, vector))
+		__set_bit(vector, synic->vec_bitmap);
+	else
+		__clear_bit(vector, synic->vec_bitmap);
+
+	if (synic_has_vector_auto_eoi(synic, vector))
+		__set_bit(vector, synic->auto_eoi_bitmap);
+	else
+		__clear_bit(vector, synic->auto_eoi_bitmap);
+}
+
 static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
 			  u64 data, bool host)
 {
-	int vector;
+	int vector, old_vector;
+	bool masked;
 
 	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
-	if (vector < 16 && !host)
+	masked = data & HV_SYNIC_SINT_MASKED;
+
+	/*
+	 * Valid vectors are 16-255, however, nested Hyper-V attempts to write
+	 * default '0x10000' value on boot and this should not #GP. We need to
+	 * allow zero-initing the register from host as well.
+	 */
+	if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
 		return 1;
 	/*
 	 * Guest may configure multiple SINTs to use the same vector, so
@@ -88,18 +114,13 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
 	 * bitmap of vectors with auto-eoi behavior.  The bitmaps are
 	 * updated here, and atomically queried on fast paths.
 	 */
+	old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
 
 	atomic64_set(&synic->sint[sint], data);
 
-	if (synic_has_vector_connected(synic, vector))
-		__set_bit(vector, synic->vec_bitmap);
-	else
-		__clear_bit(vector, synic->vec_bitmap);
+	synic_update_vector(synic, old_vector);
 
-	if (synic_has_vector_auto_eoi(synic, vector))
-		__set_bit(vector, synic->auto_eoi_bitmap);
-	else
-		__clear_bit(vector, synic->auto_eoi_bitmap);
+	synic_update_vector(synic, vector);
 
 	/* Load SynIC vectors into EOI exit bitmap */
 	kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
@@ -736,6 +757,9 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_RESET:
+	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_STATUS:
 		r = true;
 		break;
 	}
@@ -981,6 +1005,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 			kvm_make_request(KVM_REQ_HV_RESET, vcpu);
 		}
 		break;
+	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+		hv->hv_reenlightenment_control = data;
+		break;
+	case HV_X64_MSR_TSC_EMULATION_CONTROL:
+		hv->hv_tsc_emulation_control = data;
+		break;
+	case HV_X64_MSR_TSC_EMULATION_STATUS:
+		hv->hv_tsc_emulation_status = data;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -1009,17 +1042,17 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 			return 1;
 		hv->vp_index = (u32)data;
 		break;
-	case HV_X64_MSR_APIC_ASSIST_PAGE: {
+	case HV_X64_MSR_VP_ASSIST_PAGE: {
 		u64 gfn;
 		unsigned long addr;
 
-		if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+		if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
 			hv->hv_vapic = data;
 			if (kvm_lapic_enable_pv_eoi(vcpu, 0))
 				return 1;
 			break;
 		}
-		gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+		gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
 		addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
 		if (kvm_is_error_hva(addr))
 			return 1;
@@ -1105,6 +1138,15 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_RESET:
 		data = 0;
 		break;
+	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+		data = hv->hv_reenlightenment_control;
+		break;
+	case HV_X64_MSR_TSC_EMULATION_CONTROL:
+		data = hv->hv_tsc_emulation_control;
+		break;
+	case HV_X64_MSR_TSC_EMULATION_STATUS:
+		data = hv->hv_tsc_emulation_status;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1129,7 +1171,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
 	case HV_X64_MSR_TPR:
 		return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
-	case HV_X64_MSR_APIC_ASSIST_PAGE:
+	case HV_X64_MSR_VP_ASSIST_PAGE:
 		data = hv->hv_vapic;
 		break;
 	case HV_X64_MSR_VP_RUNTIME:
@@ -1226,10 +1268,47 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
+{
+	struct eventfd_ctx *eventfd;
+
+	if (unlikely(!fast)) {
+		int ret;
+		gpa_t gpa = param;
+
+		if ((gpa & (__alignof__(param) - 1)) ||
+		    offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
+			return HV_STATUS_INVALID_ALIGNMENT;
+
+		ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
+		if (ret < 0)
+			return HV_STATUS_INVALID_ALIGNMENT;
+	}
+
+	/*
+	 * Per spec, bits 32-47 contain the extra "flag number".  However, we
+	 * have no use for it, and in all known usecases it is zero, so just
+	 * report lookup failure if it isn't.
+	 */
+	if (param & 0xffff00000000ULL)
+		return HV_STATUS_INVALID_PORT_ID;
+	/* remaining bits are reserved-zero */
+	if (param & ~KVM_HYPERV_CONN_ID_MASK)
+		return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+	/* conn_to_evt is protected by vcpu->kvm->srcu */
+	eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
+	if (!eventfd)
+		return HV_STATUS_INVALID_PORT_ID;
+
+	eventfd_signal(eventfd, 1);
+	return HV_STATUS_SUCCESS;
+}
+
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
-	u64 param, ingpa, outgpa, ret;
-	uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
+	uint16_t code, rep_idx, rep_cnt;
 	bool fast, longmode;
 
 	/*
@@ -1268,7 +1347,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
 	/* Hypercall continuation is not supported yet */
 	if (rep_cnt || rep_idx) {
-		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
 		goto set_result;
 	}
 
@@ -1276,11 +1355,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
 		kvm_vcpu_on_spin(vcpu, true);
 		break;
-	case HVCALL_POST_MESSAGE:
 	case HVCALL_SIGNAL_EVENT:
+		ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
+		if (ret != HV_STATUS_INVALID_PORT_ID)
+			break;
+		/* maybe userspace knows this conn_id: fall through */
+	case HVCALL_POST_MESSAGE:
 		/* don't bother userspace if it has no way to handle it */
 		if (!vcpu_to_synic(vcpu)->active) {
-			res = HV_STATUS_INVALID_HYPERCALL_CODE;
+			ret = HV_STATUS_INVALID_HYPERCALL_CODE;
 			break;
 		}
 		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
@@ -1292,12 +1375,79 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 				kvm_hv_hypercall_complete_userspace;
 		return 0;
 	default:
-		res = HV_STATUS_INVALID_HYPERCALL_CODE;
+		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
 		break;
 	}
 
 set_result:
-	ret = res | (((u64)rep_done & 0xfff) << 32);
 	kvm_hv_hypercall_set_result(vcpu, ret);
 	return 1;
 }
+
+void kvm_hv_init_vm(struct kvm *kvm)
+{
+	mutex_init(&kvm->arch.hyperv.hv_lock);
+	idr_init(&kvm->arch.hyperv.conn_to_evt);
+}
+
+void kvm_hv_destroy_vm(struct kvm *kvm)
+{
+	struct eventfd_ctx *eventfd;
+	int i;
+
+	idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
+		eventfd_ctx_put(eventfd);
+	idr_destroy(&kvm->arch.hyperv.conn_to_evt);
+}
+
+static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
+{
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+	struct eventfd_ctx *eventfd;
+	int ret;
+
+	eventfd = eventfd_ctx_fdget(fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	mutex_lock(&hv->hv_lock);
+	ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
+			GFP_KERNEL);
+	mutex_unlock(&hv->hv_lock);
+
+	if (ret >= 0)
+		return 0;
+
+	if (ret == -ENOSPC)
+		ret = -EEXIST;
+	eventfd_ctx_put(eventfd);
+	return ret;
+}
+
+static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
+{
+	struct kvm_hv *hv = &kvm->arch.hyperv;
+	struct eventfd_ctx *eventfd;
+
+	mutex_lock(&hv->hv_lock);
+	eventfd = idr_remove(&hv->conn_to_evt, conn_id);
+	mutex_unlock(&hv->hv_lock);
+
+	if (!eventfd)
+		return -ENOENT;
+
+	synchronize_srcu(&kvm->srcu);
+	eventfd_ctx_put(eventfd);
+	return 0;
+}
+
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
+{
+	if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
+	    (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
+		return -EINVAL;
+
+	if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
+		return kvm_hv_eventfd_deassign(kvm, args->conn_id);
+	return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index e637631a9574..837465d69c6d 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -88,4 +88,8 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
 void kvm_hv_setup_tsc_page(struct kvm *kvm,
 			   struct pvclock_vcpu_time_info *hv_clock);
 
+void kvm_hv_init_vm(struct kvm *kvm);
+void kvm_hv_destroy_vm(struct kvm *kvm);
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
+
 #endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index f171051eecf3..faa264822cee 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -73,8 +73,19 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 {
+	/*
+	 * FIXME: interrupt.injected represents an interrupt that it's
+	 * side-effects have already been applied (e.g. bit from IRR
+	 * already moved to ISR). Therefore, it is incorrect to rely
+	 * on interrupt.injected to know if there is a pending
+	 * interrupt in the user-mode LAPIC.
+	 * This leads to nVMX/nSVM not be able to distinguish
+	 * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
+	 * pending interrupt or should re-inject an injected
+	 * interrupt.
+	 */
 	if (!lapic_in_kernel(v))
-		return v->arch.interrupt.pending;
+		return v->arch.interrupt.injected;
 
 	if (kvm_cpu_has_extint(v))
 		return 1;
@@ -91,8 +102,19 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
  */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 {
+	/*
+	 * FIXME: interrupt.injected represents an interrupt that it's
+	 * side-effects have already been applied (e.g. bit from IRR
+	 * already moved to ISR). Therefore, it is incorrect to rely
+	 * on interrupt.injected to know if there is a pending
+	 * interrupt in the user-mode LAPIC.
+	 * This leads to nVMX/nSVM not be able to distinguish
+	 * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
+	 * pending interrupt or should re-inject an injected
+	 * interrupt.
+	 */
 	if (!lapic_in_kernel(v))
-		return v->arch.interrupt.pending;
+		return v->arch.interrupt.injected;
 
 	if (kvm_cpu_has_extint(v))
 		return 1;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f500293dad8d..9619dcc2b325 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -41,7 +41,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
 
 	if (!test_bit(VCPU_EXREG_PDPTR,
 		      (unsigned long *)&vcpu->arch.regs_avail))
-		kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
+		kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR);
 
 	return vcpu->arch.walk_mmu->pdptrs[index];
 }
@@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
 static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hflags &= ~HF_GUEST_MASK;
+
+	if (vcpu->arch.load_eoi_exitmap_pending) {
+		vcpu->arch.load_eoi_exitmap_pending = false;
+		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+	}
 }
 
 static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 391dda8d43b7..70dcb5548022 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -321,8 +321,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	if (!lapic_in_kernel(vcpu))
 		return;
 
+	/*
+	 * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
+	 * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
+	 * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
+	 * version first and level-triggered interrupts never get EOIed in
+	 * IOAPIC.
+	 */
 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
-	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
+	if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+	    !ioapic_in_kernel(vcpu->kvm))
 		v |= APIC_LVR_DIRECTED_EOI;
 	kvm_lapic_set_reg(apic, APIC_LVR, v);
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 56c36014f7b7..edce055e9fd7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -109,7 +109,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 
 static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+	return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
 }
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 763bb3bade63..8494dbae41b9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3031,7 +3031,7 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
 		return RET_PF_RETRY;
 	}
 
-	return RET_PF_EMULATE;
+	return -EFAULT;
 }
 
 static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 5abae72266b7..6288e9d7068e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -452,14 +452,21 @@ error:
 	 * done by is_rsvd_bits_set() above.
 	 *
 	 * We set up the value of exit_qualification to inject:
-	 * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
+	 * [2:0] - Derive from the access bits. The exit_qualification might be
+	 *         out of date if it is serving an EPT misconfiguration.
 	 * [5:3] - Calculated by the page walk of the guest EPT page tables
 	 * [7:8] - Derived from [7:8] of real exit_qualification
 	 *
 	 * The other bits are set to 0.
 	 */
 	if (!(errcode & PFERR_RSVD_MASK)) {
-		vcpu->arch.exit_qualification &= 0x187;
+		vcpu->arch.exit_qualification &= 0x180;
+		if (write_fault)
+			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
+		if (user_fault)
+			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
+		if (fetch_fault)
+			vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
 		vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
 	}
 #endif
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 026db42a86c3..58ead7db71a3 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -244,12 +244,49 @@ int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
 	return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
 }
 
+bool is_vmware_backdoor_pmc(u32 pmc_idx)
+{
+	switch (pmc_idx) {
+	case VMWARE_BACKDOOR_PMC_HOST_TSC:
+	case VMWARE_BACKDOOR_PMC_REAL_TIME:
+	case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
+		return true;
+	}
+	return false;
+}
+
+static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
+{
+	u64 ctr_val;
+
+	switch (idx) {
+	case VMWARE_BACKDOOR_PMC_HOST_TSC:
+		ctr_val = rdtsc();
+		break;
+	case VMWARE_BACKDOOR_PMC_REAL_TIME:
+		ctr_val = ktime_get_boot_ns();
+		break;
+	case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
+		ctr_val = ktime_get_boot_ns() +
+			vcpu->kvm->arch.kvmclock_offset;
+		break;
+	default:
+		return 1;
+	}
+
+	*data = ctr_val;
+	return 0;
+}
+
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 {
 	bool fast_mode = idx & (1u << 31);
 	struct kvm_pmc *pmc;
 	u64 ctr_val;
 
+	if (is_vmware_backdoor_pmc(idx))
+		return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
+
 	pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx);
 	if (!pmc)
 		return 1;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index a9a62b9a73e2..ba8898e1a854 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -9,6 +9,10 @@
 /* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
 #define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf)
 
+#define VMWARE_BACKDOOR_PMC_HOST_TSC		0x10000
+#define VMWARE_BACKDOOR_PMC_REAL_TIME		0x10001
+#define VMWARE_BACKDOOR_PMC_APPARENT_TIME	0x10002
+
 struct kvm_event_hw_type_mapping {
 	u8 eventsel;
 	u8 unit_mask;
@@ -114,6 +118,8 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_init(struct kvm_vcpu *vcpu);
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
 
+bool is_vmware_backdoor_pmc(u32 pmc_idx);
+
 extern struct kvm_pmu_ops intel_pmu_ops;
 extern struct kvm_pmu_ops amd_pmu_ops;
 #endif /* __KVM_X86_PMU_H */
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index cd944435dfbd..1495a735b38e 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -19,6 +19,21 @@
 #include "lapic.h"
 #include "pmu.h"
 
+enum pmu_type {
+	PMU_TYPE_COUNTER = 0,
+	PMU_TYPE_EVNTSEL,
+};
+
+enum index {
+	INDEX_ZERO = 0,
+	INDEX_ONE,
+	INDEX_TWO,
+	INDEX_THREE,
+	INDEX_FOUR,
+	INDEX_FIVE,
+	INDEX_ERROR,
+};
+
 /* duplicated from amd_perfmon_event_map, K7 and above should work. */
 static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
 	[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
@@ -31,6 +46,88 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
 	[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
 };
 
+static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
+{
+	struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+
+	if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
+		if (type == PMU_TYPE_COUNTER)
+			return MSR_F15H_PERF_CTR;
+		else
+			return MSR_F15H_PERF_CTL;
+	} else {
+		if (type == PMU_TYPE_COUNTER)
+			return MSR_K7_PERFCTR0;
+		else
+			return MSR_K7_EVNTSEL0;
+	}
+}
+
+static enum index msr_to_index(u32 msr)
+{
+	switch (msr) {
+	case MSR_F15H_PERF_CTL0:
+	case MSR_F15H_PERF_CTR0:
+	case MSR_K7_EVNTSEL0:
+	case MSR_K7_PERFCTR0:
+		return INDEX_ZERO;
+	case MSR_F15H_PERF_CTL1:
+	case MSR_F15H_PERF_CTR1:
+	case MSR_K7_EVNTSEL1:
+	case MSR_K7_PERFCTR1:
+		return INDEX_ONE;
+	case MSR_F15H_PERF_CTL2:
+	case MSR_F15H_PERF_CTR2:
+	case MSR_K7_EVNTSEL2:
+	case MSR_K7_PERFCTR2:
+		return INDEX_TWO;
+	case MSR_F15H_PERF_CTL3:
+	case MSR_F15H_PERF_CTR3:
+	case MSR_K7_EVNTSEL3:
+	case MSR_K7_PERFCTR3:
+		return INDEX_THREE;
+	case MSR_F15H_PERF_CTL4:
+	case MSR_F15H_PERF_CTR4:
+		return INDEX_FOUR;
+	case MSR_F15H_PERF_CTL5:
+	case MSR_F15H_PERF_CTR5:
+		return INDEX_FIVE;
+	default:
+		return INDEX_ERROR;
+	}
+}
+
+static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
+					     enum pmu_type type)
+{
+	switch (msr) {
+	case MSR_F15H_PERF_CTL0:
+	case MSR_F15H_PERF_CTL1:
+	case MSR_F15H_PERF_CTL2:
+	case MSR_F15H_PERF_CTL3:
+	case MSR_F15H_PERF_CTL4:
+	case MSR_F15H_PERF_CTL5:
+	case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
+		if (type != PMU_TYPE_EVNTSEL)
+			return NULL;
+		break;
+	case MSR_F15H_PERF_CTR0:
+	case MSR_F15H_PERF_CTR1:
+	case MSR_F15H_PERF_CTR2:
+	case MSR_F15H_PERF_CTR3:
+	case MSR_F15H_PERF_CTR4:
+	case MSR_F15H_PERF_CTR5:
+	case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
+		if (type != PMU_TYPE_COUNTER)
+			return NULL;
+		break;
+	default:
+		return NULL;
+	}
+
+	return &pmu->gp_counters[msr_to_index(msr)];
+}
+
 static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
 				    u8 event_select,
 				    u8 unit_mask)
@@ -64,7 +161,18 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
 
 static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
 {
-	return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0);
+	unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER);
+	struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+
+	if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
+		/*
+		 * The idx is contiguous. The MSRs are not. The counter MSRs
+		 * are interleaved with the event select MSRs.
+		 */
+		pmc_idx *= 2;
+	}
+
+	return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
 }
 
 /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
@@ -96,8 +204,8 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	int ret = false;
 
-	ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
-		get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+	ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) ||
+		get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
 
 	return ret;
 }
@@ -107,14 +215,14 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
 
-	/* MSR_K7_PERFCTRn */
-	pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
+	/* MSR_PERFCTRn */
+	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
 	if (pmc) {
 		*data = pmc_read_counter(pmc);
 		return 0;
 	}
-	/* MSR_K7_EVNTSELn */
-	pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+	/* MSR_EVNTSELn */
+	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
 	if (pmc) {
 		*data = pmc->eventsel;
 		return 0;
@@ -130,14 +238,14 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	u32 msr = msr_info->index;
 	u64 data = msr_info->data;
 
-	/* MSR_K7_PERFCTRn */
-	pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
+	/* MSR_PERFCTRn */
+	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
 	if (pmc) {
 		pmc->counter += data - pmc_read_counter(pmc);
 		return 0;
 	}
-	/* MSR_K7_EVNTSELn */
-	pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+	/* MSR_EVNTSELn */
+	pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
 	if (pmc) {
 		if (data == pmc->eventsel)
 			return 0;
@@ -154,7 +262,11 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
-	pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+	if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
+		pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
+	else
+		pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+
 	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
 	pmu->reserved_bits = 0xffffffff00200000ull;
 	/* not applicable to AMD; but clean them to prevent any fall out */
@@ -169,7 +281,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	int i;
 
-	for (i = 0; i < AMD64_NUM_COUNTERS ; i++) {
+	BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+
+	for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
 		pmu->gp_counters[i].type = KVM_PMC_GP;
 		pmu->gp_counters[i].vcpu = vcpu;
 		pmu->gp_counters[i].idx = i;
@@ -181,7 +295,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 	int i;
 
-	for (i = 0; i < AMD64_NUM_COUNTERS; i++) {
+	for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
 		struct kvm_pmc *pmc = &pmu->gp_counters[i];
 
 		pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 9d2043f94e29..b58787daf9f8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -131,6 +131,28 @@ static const u32 host_save_user_msrs[] = {
 
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 
+struct kvm_sev_info {
+	bool active;		/* SEV enabled guest */
+	unsigned int asid;	/* ASID used for this guest */
+	unsigned int handle;	/* SEV firmware handle */
+	int fd;			/* SEV device fd */
+	unsigned long pages_locked; /* Number of pages locked */
+	struct list_head regions_list;  /* List of registered regions */
+};
+
+struct kvm_svm {
+	struct kvm kvm;
+
+	/* Struct members for AVIC */
+	u32 avic_vm_id;
+	u32 ldr_mode;
+	struct page *avic_logical_id_table_page;
+	struct page *avic_physical_id_table_page;
+	struct hlist_node hnode;
+
+	struct kvm_sev_info sev_info;
+};
+
 struct kvm_vcpu;
 
 struct nested_state {
@@ -276,6 +298,54 @@ static bool npt_enabled = true;
 static bool npt_enabled;
 #endif
 
+/*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * pause_filter_count: On processors that support Pause filtering(indicated
+ *	by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter
+ *	count value. On VMRUN this value is loaded into an internal counter.
+ *	Each time a pause instruction is executed, this counter is decremented
+ *	until it reaches zero at which time a #VMEXIT is generated if pause
+ *	intercept is enabled. Refer to  AMD APM Vol 2 Section 15.14.4 Pause
+ *	Intercept Filtering for more details.
+ *	This also indicate if ple logic enabled.
+ *
+ * pause_filter_thresh: In addition, some processor families support advanced
+ *	pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on
+ *	the amount of time a guest is allowed to execute in a pause loop.
+ *	In this mode, a 16-bit pause filter threshold field is added in the
+ *	VMCB. The threshold value is a cycle count that is used to reset the
+ *	pause counter. As with simple pause filtering, VMRUN loads the pause
+ *	count value from VMCB into an internal counter. Then, on each pause
+ *	instruction the hardware checks the elapsed number of cycles since
+ *	the most recent pause instruction against the pause filter threshold.
+ *	If the elapsed cycle count is greater than the pause filter threshold,
+ *	then the internal pause count is reloaded from the VMCB and execution
+ *	continues. If the elapsed cycle count is less than the pause filter
+ *	threshold, then the internal pause count is decremented. If the count
+ *	value is less than zero and PAUSE intercept is enabled, a #VMEXIT is
+ *	triggered. If advanced pause filtering is supported and pause filter
+ *	threshold field is set to zero, the filter will operate in the simpler,
+ *	count only mode.
+ */
+
+static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
+module_param(pause_filter_thresh, ushort, 0444);
+
+static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
+module_param(pause_filter_count, ushort, 0444);
+
+/* Default doubles per-vcpu window every exit. */
+static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
+module_param(pause_filter_count_grow, ushort, 0444);
+
+/* Default resets per-vcpu window every exit to pause_filter_count. */
+static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(pause_filter_count_shrink, ushort, 0444);
+
+/* Default is to compute the maximum so we can never overflow. */
+static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
+module_param(pause_filter_count_max, ushort, 0444);
+
 /* allow nested paging (virtualized MMU) for all guests */
 static int npt = true;
 module_param(npt, int, S_IRUGO);
@@ -352,6 +422,12 @@ struct enc_region {
 	unsigned long size;
 };
 
+
+static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
+{
+	return container_of(kvm, struct kvm_svm, kvm);
+}
+
 static inline bool svm_sev_enabled(void)
 {
 	return max_sev_asid;
@@ -359,14 +435,14 @@ static inline bool svm_sev_enabled(void)
 
 static inline bool sev_guest(struct kvm *kvm)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
 	return sev->active;
 }
 
 static inline int sev_get_asid(struct kvm *kvm)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
 	return sev->asid;
 }
@@ -1083,7 +1159,7 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
 }
 
 /* Note:
- * This hash table is used to map VM_ID to a struct kvm_arch,
+ * This hash table is used to map VM_ID to a struct kvm_svm,
  * when handling AMD IOMMU GALOG notification to schedule in
  * a particular vCPU.
  */
@@ -1100,7 +1176,7 @@ static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
 static int avic_ga_log_notifier(u32 ga_tag)
 {
 	unsigned long flags;
-	struct kvm_arch *ka = NULL;
+	struct kvm_svm *kvm_svm;
 	struct kvm_vcpu *vcpu = NULL;
 	u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
 	u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
@@ -1108,13 +1184,10 @@ static int avic_ga_log_notifier(u32 ga_tag)
 	pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
 
 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
-	hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
-		struct kvm *kvm = container_of(ka, struct kvm, arch);
-		struct kvm_arch *vm_data = &kvm->arch;
-
-		if (vm_data->avic_vm_id != vm_id)
+	hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
+		if (kvm_svm->avic_vm_id != vm_id)
 			continue;
-		vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+		vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
 		break;
 	}
 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
@@ -1172,6 +1245,42 @@ err:
 	return rc;
 }
 
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb_control_area *control = &svm->vmcb->control;
+	int old = control->pause_filter_count;
+
+	control->pause_filter_count = __grow_ple_window(old,
+							pause_filter_count,
+							pause_filter_count_grow,
+							pause_filter_count_max);
+
+	if (control->pause_filter_count != old)
+		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+	trace_kvm_ple_window_grow(vcpu->vcpu_id,
+				  control->pause_filter_count, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb_control_area *control = &svm->vmcb->control;
+	int old = control->pause_filter_count;
+
+	control->pause_filter_count =
+				__shrink_ple_window(old,
+						    pause_filter_count,
+						    pause_filter_count_shrink,
+						    pause_filter_count);
+	if (control->pause_filter_count != old)
+		mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+	trace_kvm_ple_window_shrink(vcpu->vcpu_id,
+				    control->pause_filter_count, old);
+}
+
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
@@ -1202,6 +1311,14 @@ static __init int svm_hardware_setup(void)
 		kvm_tsc_scaling_ratio_frac_bits = 32;
 	}
 
+	/* Check for pause filtering support */
+	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+		pause_filter_count = 0;
+		pause_filter_thresh = 0;
+	} else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
+		pause_filter_thresh = 0;
+	}
+
 	if (nested) {
 		printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 		kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@@ -1328,10 +1445,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 static void avic_init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb *vmcb = svm->vmcb;
-	struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+	struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
 	phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
-	phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page));
-	phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page));
+	phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
+	phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
 
 	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
 	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
@@ -1363,6 +1480,14 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_exception_intercept(svm, MC_VECTOR);
 	set_exception_intercept(svm, AC_VECTOR);
 	set_exception_intercept(svm, DB_VECTOR);
+	/*
+	 * Guest access to VMware backdoor ports could legitimately
+	 * trigger #GP because of TSS I/O permission bitmap.
+	 * We intercept those #GP and allow access to them anyway
+	 * as VMware does.
+	 */
+	if (enable_vmware_backdoor)
+		set_exception_intercept(svm, GP_VECTOR);
 
 	set_intercept(svm, INTERCEPT_INTR);
 	set_intercept(svm, INTERCEPT_NMI);
@@ -1371,7 +1496,6 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_intercept(svm, INTERCEPT_RDPMC);
 	set_intercept(svm, INTERCEPT_CPUID);
 	set_intercept(svm, INTERCEPT_INVD);
-	set_intercept(svm, INTERCEPT_HLT);
 	set_intercept(svm, INTERCEPT_INVLPG);
 	set_intercept(svm, INTERCEPT_INVLPGA);
 	set_intercept(svm, INTERCEPT_IOIO_PROT);
@@ -1389,11 +1513,14 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_intercept(svm, INTERCEPT_XSETBV);
 	set_intercept(svm, INTERCEPT_RSM);
 
-	if (!kvm_mwait_in_guest()) {
+	if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
 		set_intercept(svm, INTERCEPT_MONITOR);
 		set_intercept(svm, INTERCEPT_MWAIT);
 	}
 
+	if (!kvm_hlt_in_guest(svm->vcpu.kvm))
+		set_intercept(svm, INTERCEPT_HLT);
+
 	control->iopm_base_pa = __sme_set(iopm_base);
 	control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
 	control->int_ctl = V_INTR_MASKING_MASK;
@@ -1449,9 +1576,13 @@ static void init_vmcb(struct vcpu_svm *svm)
 	svm->nested.vmcb = 0;
 	svm->vcpu.arch.hflags = 0;
 
-	if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
-		control->pause_filter_count = 3000;
+	if (pause_filter_count) {
+		control->pause_filter_count = pause_filter_count;
+		if (pause_filter_thresh)
+			control->pause_filter_thresh = pause_filter_thresh;
 		set_intercept(svm, INTERCEPT_PAUSE);
+	} else {
+		clr_intercept(svm, INTERCEPT_PAUSE);
 	}
 
 	if (kvm_vcpu_apicv_active(&svm->vcpu))
@@ -1488,12 +1619,12 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 				       unsigned int index)
 {
 	u64 *avic_physical_id_table;
-	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 
 	if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
 		return NULL;
 
-	avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
+	avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
 
 	return &avic_physical_id_table[index];
 }
@@ -1576,7 +1707,7 @@ static void __sev_asid_free(int asid)
 
 static void sev_asid_free(struct kvm *kvm)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
 	__sev_asid_free(sev->asid);
 }
@@ -1616,7 +1747,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
 				    unsigned long ulen, unsigned long *n,
 				    int write)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	unsigned long npages, npinned, size;
 	unsigned long locked, lock_limit;
 	struct page **pages;
@@ -1667,7 +1798,7 @@ err:
 static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
 			     unsigned long npages)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
 	release_pages(pages, npages);
 	kvfree(pages);
@@ -1705,9 +1836,20 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
 	kfree(region);
 }
 
+static struct kvm *svm_vm_alloc(void)
+{
+	struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+	return &kvm_svm->kvm;
+}
+
+static void svm_vm_free(struct kvm *kvm)
+{
+	kfree(to_kvm_svm(kvm));
+}
+
 static void sev_vm_destroy(struct kvm *kvm)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct list_head *head = &sev->regions_list;
 	struct list_head *pos, *q;
 
@@ -1736,18 +1878,18 @@ static void sev_vm_destroy(struct kvm *kvm)
 static void avic_vm_destroy(struct kvm *kvm)
 {
 	unsigned long flags;
-	struct kvm_arch *vm_data = &kvm->arch;
+	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 
 	if (!avic)
 		return;
 
-	if (vm_data->avic_logical_id_table_page)
-		__free_page(vm_data->avic_logical_id_table_page);
-	if (vm_data->avic_physical_id_table_page)
-		__free_page(vm_data->avic_physical_id_table_page);
+	if (kvm_svm->avic_logical_id_table_page)
+		__free_page(kvm_svm->avic_logical_id_table_page);
+	if (kvm_svm->avic_physical_id_table_page)
+		__free_page(kvm_svm->avic_physical_id_table_page);
 
 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
-	hash_del(&vm_data->hnode);
+	hash_del(&kvm_svm->hnode);
 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 }
 
@@ -1761,10 +1903,10 @@ static int avic_vm_init(struct kvm *kvm)
 {
 	unsigned long flags;
 	int err = -ENOMEM;
-	struct kvm_arch *vm_data = &kvm->arch;
+	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+	struct kvm_svm *k2;
 	struct page *p_page;
 	struct page *l_page;
-	struct kvm_arch *ka;
 	u32 vm_id;
 
 	if (!avic)
@@ -1775,7 +1917,7 @@ static int avic_vm_init(struct kvm *kvm)
 	if (!p_page)
 		goto free_avic;
 
-	vm_data->avic_physical_id_table_page = p_page;
+	kvm_svm->avic_physical_id_table_page = p_page;
 	clear_page(page_address(p_page));
 
 	/* Allocating logical APIC ID table (4KB) */
@@ -1783,7 +1925,7 @@ static int avic_vm_init(struct kvm *kvm)
 	if (!l_page)
 		goto free_avic;
 
-	vm_data->avic_logical_id_table_page = l_page;
+	kvm_svm->avic_logical_id_table_page = l_page;
 	clear_page(page_address(l_page));
 
 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
@@ -1795,15 +1937,13 @@ static int avic_vm_init(struct kvm *kvm)
 	}
 	/* Is it still in use? Only possible if wrapped at least once */
 	if (next_vm_id_wrapped) {
-		hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
-			struct kvm *k2 = container_of(ka, struct kvm, arch);
-			struct kvm_arch *vd2 = &k2->arch;
-			if (vd2->avic_vm_id == vm_id)
+		hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
+			if (k2->avic_vm_id == vm_id)
 				goto again;
 		}
 	}
-	vm_data->avic_vm_id = vm_id;
-	hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
+	kvm_svm->avic_vm_id = vm_id;
+	hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 
 	return 0;
@@ -2535,14 +2675,7 @@ static int bp_interception(struct vcpu_svm *svm)
 
 static int ud_interception(struct vcpu_svm *svm)
 {
-	int er;
-
-	er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
-	if (er == EMULATE_USER_EXIT)
-		return 0;
-	if (er != EMULATE_DONE)
-		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
-	return 1;
+	return handle_ud(&svm->vcpu);
 }
 
 static int ac_interception(struct vcpu_svm *svm)
@@ -2551,6 +2684,23 @@ static int ac_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
+static int gp_interception(struct vcpu_svm *svm)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+	u32 error_code = svm->vmcb->control.exit_info_1;
+	int er;
+
+	WARN_ON_ONCE(!enable_vmware_backdoor);
+
+	er = emulate_instruction(vcpu,
+		EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
+	if (er == EMULATE_USER_EXIT)
+		return 0;
+	else if (er != EMULATE_DONE)
+		kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+	return 1;
+}
+
 static bool is_erratum_383(void)
 {
 	int err, i;
@@ -2639,7 +2789,7 @@ static int io_interception(struct vcpu_svm *svm)
 {
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
-	int size, in, string, ret;
+	int size, in, string;
 	unsigned port;
 
 	++svm->vcpu.stat.io_exits;
@@ -2651,16 +2801,8 @@ static int io_interception(struct vcpu_svm *svm)
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
 	svm->next_rip = svm->vmcb->control.exit_info_2;
-	ret = kvm_skip_emulated_instruction(&svm->vcpu);
 
-	/*
-	 * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
-	 * KVM_EXIT_DEBUG here.
-	 */
-	if (in)
-		return kvm_fast_pio_in(vcpu, size, port) && ret;
-	else
-		return kvm_fast_pio_out(vcpu, size, port) && ret;
+	return kvm_fast_pio(&svm->vcpu, size, port, in);
 }
 
 static int nmi_interception(struct vcpu_svm *svm)
@@ -4233,6 +4375,9 @@ static int pause_interception(struct vcpu_svm *svm)
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 	bool in_kernel = (svm_get_cpl(vcpu) == 0);
 
+	if (pause_filter_thresh)
+		grow_ple_window(vcpu);
+
 	kvm_vcpu_on_spin(vcpu, in_kernel);
 	return 1;
 }
@@ -4323,7 +4468,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
 
 static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
 {
-	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 	int index;
 	u32 *logical_apic_id_table;
 	int dlid = GET_APIC_LOGICAL_ID(ldr);
@@ -4345,7 +4490,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
 		index = (cluster << 2) + apic;
 	}
 
-	logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+	logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
 
 	return &logical_apic_id_table[index];
 }
@@ -4425,7 +4570,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
 static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	struct kvm_arch *vm_data = &vcpu->kvm->arch;
+	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 	u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
 	u32 mod = (dfr >> 28) & 0xf;
 
@@ -4434,11 +4579,11 @@ static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
 	 * If this changes, we need to flush the AVIC logical
 	 * APID id table.
 	 */
-	if (vm_data->ldr_mode == mod)
+	if (kvm_svm->ldr_mode == mod)
 		return 0;
 
-	clear_page(page_address(vm_data->avic_logical_id_table_page));
-	vm_data->ldr_mode = mod;
+	clear_page(page_address(kvm_svm->avic_logical_id_table_page));
+	kvm_svm->ldr_mode = mod;
 
 	if (svm->ldr_reg)
 		avic_handle_ldr_update(vcpu);
@@ -4558,6 +4703,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_EXCP_BASE + PF_VECTOR]	= pf_interception,
 	[SVM_EXIT_EXCP_BASE + MC_VECTOR]	= mc_interception,
 	[SVM_EXIT_EXCP_BASE + AC_VECTOR]	= ac_interception,
+	[SVM_EXIT_EXCP_BASE + GP_VECTOR]	= gp_interception,
 	[SVM_EXIT_INTR]				= intr_interception,
 	[SVM_EXIT_NMI]				= nmi_interception,
 	[SVM_EXIT_SMI]				= nop_on_interception,
@@ -4606,6 +4752,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
 	pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
 	pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
 	pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
+	pr_err("%-20s%d\n", "pause filter threshold:",
+	       control->pause_filter_thresh);
 	pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
 	pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
 	pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
@@ -5073,7 +5221,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 			/* Try to enable guest_mode in IRTE */
 			pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
 					    AVIC_HPA_MASK);
-			pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
+			pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
 						     svm->vcpu.vcpu_id);
 			pi.is_guest_mode = true;
 			pi.vcpu_data = &vcpu_info;
@@ -5237,6 +5385,11 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
 	return 0;
 }
 
+static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+	return 0;
+}
+
 static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -5538,14 +5691,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
 
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
-		kvm_before_handle_nmi(&svm->vcpu);
+		kvm_before_interrupt(&svm->vcpu);
 
 	stgi();
 
 	/* Any pending NMI will happen here */
 
 	if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
-		kvm_after_handle_nmi(&svm->vcpu);
+		kvm_after_interrupt(&svm->vcpu);
 
 	sync_cr8_to_lapic(vcpu);
 
@@ -5921,6 +6074,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
+	if (pause_filter_thresh)
+		shrink_ple_window(vcpu);
 }
 
 static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
@@ -6037,7 +6192,7 @@ static int sev_asid_new(void)
 
 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	int asid, ret;
 
 	ret = -EBUSY;
@@ -6102,14 +6257,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error)
 
 static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
 	return __sev_issue_cmd(sev->fd, id, data, error);
 }
 
 static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct sev_data_launch_start *start;
 	struct kvm_sev_launch_start params;
 	void *dh_blob, *session_blob;
@@ -6207,7 +6362,7 @@ static int get_num_contig_pages(int idx, struct page **inpages,
 static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
 	unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct kvm_sev_launch_update_data params;
 	struct sev_data_launch_update_data *data;
 	struct page **inpages;
@@ -6283,7 +6438,7 @@ e_free:
 static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
 	void __user *measure = (void __user *)(uintptr_t)argp->data;
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct sev_data_launch_measure *data;
 	struct kvm_sev_launch_measure params;
 	void __user *p = NULL;
@@ -6351,7 +6506,7 @@ e_free:
 
 static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct sev_data_launch_finish *data;
 	int ret;
 
@@ -6371,7 +6526,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
 static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct kvm_sev_guest_status params;
 	struct sev_data_guest_status *data;
 	int ret;
@@ -6403,7 +6558,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
 			       unsigned long dst, int size,
 			       int *error, bool enc)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct sev_data_dbg *data;
 	int ret;
 
@@ -6635,7 +6790,7 @@ err:
 
 static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct sev_data_launch_secret *data;
 	struct kvm_sev_launch_secret params;
 	struct page **pages;
@@ -6759,7 +6914,7 @@ out:
 static int svm_register_enc_region(struct kvm *kvm,
 				   struct kvm_enc_region *range)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct enc_region *region;
 	int ret = 0;
 
@@ -6801,7 +6956,7 @@ e_free:
 static struct enc_region *
 find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
 {
-	struct kvm_sev_info *sev = &kvm->arch.sev_info;
+	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 	struct list_head *head = &sev->regions_list;
 	struct enc_region *i;
 
@@ -6859,6 +7014,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.vcpu_free = svm_free_vcpu,
 	.vcpu_reset = svm_vcpu_reset,
 
+	.vm_alloc = svm_vm_alloc,
+	.vm_free = svm_vm_free,
 	.vm_init = avic_vm_init,
 	.vm_destroy = svm_vm_destroy,
 
@@ -6925,6 +7082,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.apicv_post_state_restore = avic_post_state_restore,
 
 	.set_tss_addr = svm_set_tss_addr,
+	.set_identity_map_addr = svm_set_identity_map_addr,
 	.get_tdp_level = get_npt_level,
 	.get_mt_mask = svm_get_mt_mask,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 92496b9b5f2b..aafcc9881e88 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -52,9 +52,11 @@
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
 #include <asm/nospec-branch.h>
+#include <asm/mshyperv.h>
 
 #include "trace.h"
 #include "pmu.h"
+#include "vmx_evmcs.h"
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 #define __ex_clear(x, reg) \
@@ -130,13 +132,15 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 #endif
 
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
-#define KVM_VM_CR0_ALWAYS_ON						\
-	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
+#define KVM_VM_CR0_ALWAYS_ON				\
+	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | 	\
+	 X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
 	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
 	 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
 
+#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
 
@@ -165,34 +169,33 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
  * Time is measured based on a counter that runs at the same rate as the TSC,
  * refer SDM volume 3b section 21.6.13 & 22.1.3.
  */
-#define KVM_VMX_DEFAULT_PLE_GAP           128
-#define KVM_VMX_DEFAULT_PLE_WINDOW        4096
-#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW   2
-#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
-#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
-		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
 
-static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
-module_param(ple_gap, int, S_IRUGO);
-
-static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
-module_param(ple_window, int, S_IRUGO);
+static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
+module_param(ple_window, uint, 0444);
 
 /* Default doubles per-vcpu window every exit. */
-static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
-module_param(ple_window_grow, int, S_IRUGO);
+static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, uint, 0444);
 
 /* Default resets per-vcpu window every exit to ple_window. */
-static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
-module_param(ple_window_shrink, int, S_IRUGO);
+static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, uint, 0444);
 
 /* Default is to compute the maximum so we can never overflow. */
-static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-module_param(ple_window_max, int, S_IRUGO);
+static unsigned int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, uint, 0444);
 
 extern const ulong vmx_return;
 
+struct kvm_vmx {
+	struct kvm kvm;
+
+	unsigned int tss_addr;
+	bool ept_identity_pagetable_done;
+	gpa_t ept_identity_map_addr;
+};
+
 #define NR_AUTOLOAD_MSRS 8
 
 struct vmcs {
@@ -424,6 +427,35 @@ struct __packed vmcs12 {
  */
 #define VMCS12_MAX_FIELD_INDEX 0x17
 
+struct nested_vmx_msrs {
+	/*
+	 * We only store the "true" versions of the VMX capability MSRs. We
+	 * generate the "non-true" versions by setting the must-be-1 bits
+	 * according to the SDM.
+	 */
+	u32 procbased_ctls_low;
+	u32 procbased_ctls_high;
+	u32 secondary_ctls_low;
+	u32 secondary_ctls_high;
+	u32 pinbased_ctls_low;
+	u32 pinbased_ctls_high;
+	u32 exit_ctls_low;
+	u32 exit_ctls_high;
+	u32 entry_ctls_low;
+	u32 entry_ctls_high;
+	u32 misc_low;
+	u32 misc_high;
+	u32 ept_caps;
+	u32 vpid_caps;
+	u64 basic;
+	u64 cr0_fixed0;
+	u64 cr0_fixed1;
+	u64 cr4_fixed0;
+	u64 cr4_fixed1;
+	u64 vmcs_enum;
+	u64 vmfunc_controls;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -475,32 +507,7 @@ struct nested_vmx {
 	u16 vpid02;
 	u16 last_vpid;
 
-	/*
-	 * We only store the "true" versions of the VMX capability MSRs. We
-	 * generate the "non-true" versions by setting the must-be-1 bits
-	 * according to the SDM.
-	 */
-	u32 nested_vmx_procbased_ctls_low;
-	u32 nested_vmx_procbased_ctls_high;
-	u32 nested_vmx_secondary_ctls_low;
-	u32 nested_vmx_secondary_ctls_high;
-	u32 nested_vmx_pinbased_ctls_low;
-	u32 nested_vmx_pinbased_ctls_high;
-	u32 nested_vmx_exit_ctls_low;
-	u32 nested_vmx_exit_ctls_high;
-	u32 nested_vmx_entry_ctls_low;
-	u32 nested_vmx_entry_ctls_high;
-	u32 nested_vmx_misc_low;
-	u32 nested_vmx_misc_high;
-	u32 nested_vmx_ept_caps;
-	u32 nested_vmx_vpid_caps;
-	u64 nested_vmx_basic;
-	u64 nested_vmx_cr0_fixed0;
-	u64 nested_vmx_cr0_fixed1;
-	u64 nested_vmx_cr4_fixed0;
-	u64 nested_vmx_cr4_fixed1;
-	u64 nested_vmx_vmcs_enum;
-	u64 nested_vmx_vmfunc_controls;
+	struct nested_vmx_msrs msrs;
 
 	/* SMM related state */
 	struct {
@@ -691,6 +698,11 @@ enum segment_cache_field {
 	SEG_FIELD_NR = 4
 };
 
+static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+{
+	return container_of(kvm, struct kvm_vmx, kvm);
+}
+
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 {
 	return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -953,6 +965,7 @@ static struct vmcs_config {
 	u32 cpu_based_2nd_exec_ctrl;
 	u32 vmexit_ctrl;
 	u32 vmentry_ctrl;
+	struct nested_vmx_msrs nested;
 } vmcs_config;
 
 static struct vmx_capability {
@@ -999,6 +1012,169 @@ static const u32 vmx_msr_index[] = {
 	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
 };
 
+DEFINE_STATIC_KEY_FALSE(enable_evmcs);
+
+#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
+
+#define KVM_EVMCS_VERSION 1
+
+#if IS_ENABLED(CONFIG_HYPERV)
+static bool __read_mostly enlightened_vmcs = true;
+module_param(enlightened_vmcs, bool, 0444);
+
+static inline void evmcs_write64(unsigned long field, u64 value)
+{
+	u16 clean_field;
+	int offset = get_evmcs_offset(field, &clean_field);
+
+	if (offset < 0)
+		return;
+
+	*(u64 *)((char *)current_evmcs + offset) = value;
+
+	current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline void evmcs_write32(unsigned long field, u32 value)
+{
+	u16 clean_field;
+	int offset = get_evmcs_offset(field, &clean_field);
+
+	if (offset < 0)
+		return;
+
+	*(u32 *)((char *)current_evmcs + offset) = value;
+	current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline void evmcs_write16(unsigned long field, u16 value)
+{
+	u16 clean_field;
+	int offset = get_evmcs_offset(field, &clean_field);
+
+	if (offset < 0)
+		return;
+
+	*(u16 *)((char *)current_evmcs + offset) = value;
+	current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline u64 evmcs_read64(unsigned long field)
+{
+	int offset = get_evmcs_offset(field, NULL);
+
+	if (offset < 0)
+		return 0;
+
+	return *(u64 *)((char *)current_evmcs + offset);
+}
+
+static inline u32 evmcs_read32(unsigned long field)
+{
+	int offset = get_evmcs_offset(field, NULL);
+
+	if (offset < 0)
+		return 0;
+
+	return *(u32 *)((char *)current_evmcs + offset);
+}
+
+static inline u16 evmcs_read16(unsigned long field)
+{
+	int offset = get_evmcs_offset(field, NULL);
+
+	if (offset < 0)
+		return 0;
+
+	return *(u16 *)((char *)current_evmcs + offset);
+}
+
+static void evmcs_load(u64 phys_addr)
+{
+	struct hv_vp_assist_page *vp_ap =
+		hv_get_vp_assist_page(smp_processor_id());
+
+	vp_ap->current_nested_vmcs = phys_addr;
+	vp_ap->enlighten_vmentry = 1;
+}
+
+static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
+{
+	/*
+	 * Enlightened VMCSv1 doesn't support these:
+	 *
+	 *	POSTED_INTR_NV                  = 0x00000002,
+	 *	GUEST_INTR_STATUS               = 0x00000810,
+	 *	APIC_ACCESS_ADDR		= 0x00002014,
+	 *	POSTED_INTR_DESC_ADDR           = 0x00002016,
+	 *	EOI_EXIT_BITMAP0                = 0x0000201c,
+	 *	EOI_EXIT_BITMAP1                = 0x0000201e,
+	 *	EOI_EXIT_BITMAP2                = 0x00002020,
+	 *	EOI_EXIT_BITMAP3                = 0x00002022,
+	 */
+	vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+	vmcs_conf->cpu_based_2nd_exec_ctrl &=
+		~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+	vmcs_conf->cpu_based_2nd_exec_ctrl &=
+		~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+	vmcs_conf->cpu_based_2nd_exec_ctrl &=
+		~SECONDARY_EXEC_APIC_REGISTER_VIRT;
+
+	/*
+	 *	GUEST_PML_INDEX			= 0x00000812,
+	 *	PML_ADDRESS			= 0x0000200e,
+	 */
+	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
+
+	/*	VM_FUNCTION_CONTROL             = 0x00002018, */
+	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
+
+	/*
+	 *	EPTP_LIST_ADDRESS               = 0x00002024,
+	 *	VMREAD_BITMAP                   = 0x00002026,
+	 *	VMWRITE_BITMAP                  = 0x00002028,
+	 */
+	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
+
+	/*
+	 *	TSC_MULTIPLIER                  = 0x00002032,
+	 */
+	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
+
+	/*
+	 *	PLE_GAP                         = 0x00004020,
+	 *	PLE_WINDOW                      = 0x00004022,
+	 */
+	vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+
+	/*
+	 *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+	 */
+	vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+
+	/*
+	 *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
+	 *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
+	 */
+	vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+	vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+
+	/*
+	 * Currently unsupported in KVM:
+	 *	GUEST_IA32_RTIT_CTL		= 0x00002814,
+	 */
+}
+#else /* !IS_ENABLED(CONFIG_HYPERV) */
+static inline void evmcs_write64(unsigned long field, u64 value) {}
+static inline void evmcs_write32(unsigned long field, u32 value) {}
+static inline void evmcs_write16(unsigned long field, u16 value) {}
+static inline u64 evmcs_read64(unsigned long field) { return 0; }
+static inline u32 evmcs_read32(unsigned long field) { return 0; }
+static inline u16 evmcs_read16(unsigned long field) { return 0; }
+static inline void evmcs_load(u64 phys_addr) {}
+static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
+#endif /* IS_ENABLED(CONFIG_HYPERV) */
+
 static inline bool is_exception_n(u32 intr_info, u8 vector)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -1031,6 +1207,11 @@ static inline bool is_invalid_opcode(u32 intr_info)
 	return is_exception_n(intr_info, UD_VECTOR);
 }
 
+static inline bool is_gp_fault(u32 intr_info)
+{
+	return is_exception_n(intr_info, GP_VECTOR);
+}
+
 static inline bool is_external_interrupt(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1320,7 +1501,7 @@ static inline bool report_flexpriority(void)
 
 static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
 {
-	return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+	return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
 }
 
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
@@ -1341,6 +1522,16 @@ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
 		PIN_BASED_VMX_PREEMPTION_TIMER;
 }
 
+static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
+{
+	return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
+}
+
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
+{
+	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
+}
+
 static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 {
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1479,6 +1670,9 @@ static void vmcs_load(struct vmcs *vmcs)
 	u64 phys_addr = __pa(vmcs);
 	u8 error;
 
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_load(phys_addr);
+
 	asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
 			: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
 			: "cc", "memory");
@@ -1652,18 +1846,24 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
 static __always_inline u16 vmcs_read16(unsigned long field)
 {
 	vmcs_check16(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_read16(field);
 	return __vmcs_readl(field);
 }
 
 static __always_inline u32 vmcs_read32(unsigned long field)
 {
 	vmcs_check32(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_read32(field);
 	return __vmcs_readl(field);
 }
 
 static __always_inline u64 vmcs_read64(unsigned long field)
 {
 	vmcs_check64(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_read64(field);
 #ifdef CONFIG_X86_64
 	return __vmcs_readl(field);
 #else
@@ -1674,6 +1874,8 @@ static __always_inline u64 vmcs_read64(unsigned long field)
 static __always_inline unsigned long vmcs_readl(unsigned long field)
 {
 	vmcs_checkl(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_read64(field);
 	return __vmcs_readl(field);
 }
 
@@ -1697,18 +1899,27 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
 static __always_inline void vmcs_write16(unsigned long field, u16 value)
 {
 	vmcs_check16(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_write16(field, value);
+
 	__vmcs_writel(field, value);
 }
 
 static __always_inline void vmcs_write32(unsigned long field, u32 value)
 {
 	vmcs_check32(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_write32(field, value);
+
 	__vmcs_writel(field, value);
 }
 
 static __always_inline void vmcs_write64(unsigned long field, u64 value)
 {
 	vmcs_check64(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_write64(field, value);
+
 	__vmcs_writel(field, value);
 #ifndef CONFIG_X86_64
 	asm volatile ("");
@@ -1719,6 +1930,9 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
 static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
 {
 	vmcs_checkl(field);
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_write64(field, value);
+
 	__vmcs_writel(field, value);
 }
 
@@ -1726,6 +1940,9 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
 {
         BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
 			 "vmcs_clear_bits does not support 64-bit fields");
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_write32(field, evmcs_read32(field) & ~mask);
+
 	__vmcs_writel(field, __vmcs_readl(field) & ~mask);
 }
 
@@ -1733,6 +1950,9 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
 {
         BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
 			 "vmcs_set_bits does not support 64-bit fields");
+	if (static_branch_unlikely(&enable_evmcs))
+		return evmcs_write32(field, evmcs_read32(field) | mask);
+
 	__vmcs_writel(field, __vmcs_readl(field) | mask);
 }
 
@@ -1864,6 +2084,14 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 
 	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
 	     (1u << DB_VECTOR) | (1u << AC_VECTOR);
+	/*
+	 * Guest access to VMware backdoor ports could legitimately
+	 * trigger #GP because of TSS I/O permission bitmap.
+	 * We intercept those #GP and allow access to them anyway
+	 * as VMware does.
+	 */
+	if (enable_vmware_backdoor)
+		eb |= (1u << GP_VECTOR);
 	if ((vcpu->guest_debug &
 	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
 	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -2129,6 +2357,9 @@ static unsigned long segment_base(u16 selector)
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+#ifdef CONFIG_X86_64
+	int cpu = raw_smp_processor_id();
+#endif
 	int i;
 
 	if (vmx->host_state.loaded)
@@ -2141,7 +2372,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 	 */
 	vmx->host_state.ldt_sel = kvm_read_ldt();
 	vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+
+#ifdef CONFIG_X86_64
+	save_fsgs_for_kvm();
+	vmx->host_state.fs_sel = current->thread.fsindex;
+	vmx->host_state.gs_sel = current->thread.gsindex;
+#else
 	savesegment(fs, vmx->host_state.fs_sel);
+	savesegment(gs, vmx->host_state.gs_sel);
+#endif
 	if (!(vmx->host_state.fs_sel & 7)) {
 		vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
 		vmx->host_state.fs_reload_needed = 0;
@@ -2149,7 +2388,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 		vmcs_write16(HOST_FS_SELECTOR, 0);
 		vmx->host_state.fs_reload_needed = 1;
 	}
-	savesegment(gs, vmx->host_state.gs_sel);
 	if (!(vmx->host_state.gs_sel & 7))
 		vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
 	else {
@@ -2160,20 +2398,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_X86_64
 	savesegment(ds, vmx->host_state.ds_sel);
 	savesegment(es, vmx->host_state.es_sel);
-#endif
 
-#ifdef CONFIG_X86_64
-	vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
-	vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
-#else
-	vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
-	vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
-#endif
+	vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+	vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
 
-#ifdef CONFIG_X86_64
-	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+	vmx->msr_host_kernel_gs_base = current->thread.gsbase;
 	if (is_long_mode(&vmx->vcpu))
 		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#else
+	vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
+	vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
 #endif
 	if (boot_cpu_has(X86_FEATURE_MPX))
 		rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
@@ -2532,6 +2766,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
 	return 0;
 }
 
+static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Ensure that we clear the HLT state in the VMCS.  We don't need to
+	 * explicitly skip the instruction because if the HLT state is set,
+	 * then the instruction is already executing and RIP has already been
+	 * advanced.
+	 */
+	if (kvm_hlt_in_guest(vcpu->kvm) &&
+			vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
+		vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+}
+
 static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2554,6 +2801,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 		return;
 	}
 
+	WARN_ON_ONCE(vmx->emulation_required);
+
 	if (kvm_exception_is_soft(nr)) {
 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
 			     vmx->vcpu.arch.event_exit_inst_len);
@@ -2562,6 +2811,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+
+	vmx_clear_hlt(vcpu);
 }
 
 static bool vmx_rdtscp_supported(void)
@@ -2689,8 +2940,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
  * bit in the high half is on if the corresponding bit in the control field
  * may be on. See also vmx_control_verify().
  */
-static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
+static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 {
+	if (!nested) {
+		memset(msrs, 0, sizeof(*msrs));
+		return;
+	}
+
 	/*
 	 * Note that as a general rule, the high half of the MSRs (bits in
 	 * the control fields which may be 1) should be initialized by the
@@ -2708,70 +2964,68 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
 	/* pin-based controls */
 	rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
-		vmx->nested.nested_vmx_pinbased_ctls_low,
-		vmx->nested.nested_vmx_pinbased_ctls_high);
-	vmx->nested.nested_vmx_pinbased_ctls_low |=
+		msrs->pinbased_ctls_low,
+		msrs->pinbased_ctls_high);
+	msrs->pinbased_ctls_low |=
 		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
-	vmx->nested.nested_vmx_pinbased_ctls_high &=
+	msrs->pinbased_ctls_high &=
 		PIN_BASED_EXT_INTR_MASK |
 		PIN_BASED_NMI_EXITING |
-		PIN_BASED_VIRTUAL_NMIS;
-	vmx->nested.nested_vmx_pinbased_ctls_high |=
+		PIN_BASED_VIRTUAL_NMIS |
+		(apicv ? PIN_BASED_POSTED_INTR : 0);
+	msrs->pinbased_ctls_high |=
 		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
 		PIN_BASED_VMX_PREEMPTION_TIMER;
-	if (kvm_vcpu_apicv_active(&vmx->vcpu))
-		vmx->nested.nested_vmx_pinbased_ctls_high |=
-			PIN_BASED_POSTED_INTR;
 
 	/* exit controls */
 	rdmsr(MSR_IA32_VMX_EXIT_CTLS,
-		vmx->nested.nested_vmx_exit_ctls_low,
-		vmx->nested.nested_vmx_exit_ctls_high);
-	vmx->nested.nested_vmx_exit_ctls_low =
+		msrs->exit_ctls_low,
+		msrs->exit_ctls_high);
+	msrs->exit_ctls_low =
 		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 
-	vmx->nested.nested_vmx_exit_ctls_high &=
+	msrs->exit_ctls_high &=
 #ifdef CONFIG_X86_64
 		VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
 		VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
-	vmx->nested.nested_vmx_exit_ctls_high |=
+	msrs->exit_ctls_high |=
 		VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
 		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
 		VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
 
 	if (kvm_mpx_supported())
-		vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+		msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
 	/* We support free control of debug control saving. */
-	vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+	msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
-		vmx->nested.nested_vmx_entry_ctls_low,
-		vmx->nested.nested_vmx_entry_ctls_high);
-	vmx->nested.nested_vmx_entry_ctls_low =
+		msrs->entry_ctls_low,
+		msrs->entry_ctls_high);
+	msrs->entry_ctls_low =
 		VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
-	vmx->nested.nested_vmx_entry_ctls_high &=
+	msrs->entry_ctls_high &=
 #ifdef CONFIG_X86_64
 		VM_ENTRY_IA32E_MODE |
 #endif
 		VM_ENTRY_LOAD_IA32_PAT;
-	vmx->nested.nested_vmx_entry_ctls_high |=
+	msrs->entry_ctls_high |=
 		(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
 	if (kvm_mpx_supported())
-		vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+		msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
 	/* We support free control of debug control loading. */
-	vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+	msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
 
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
-		vmx->nested.nested_vmx_procbased_ctls_low,
-		vmx->nested.nested_vmx_procbased_ctls_high);
-	vmx->nested.nested_vmx_procbased_ctls_low =
+		msrs->procbased_ctls_low,
+		msrs->procbased_ctls_high);
+	msrs->procbased_ctls_low =
 		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
-	vmx->nested.nested_vmx_procbased_ctls_high &=
+	msrs->procbased_ctls_high &=
 		CPU_BASED_VIRTUAL_INTR_PENDING |
 		CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
 		CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
@@ -2791,12 +3045,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	 * can use it to avoid exits to L1 - even when L0 runs L2
 	 * without MSR bitmaps.
 	 */
-	vmx->nested.nested_vmx_procbased_ctls_high |=
+	msrs->procbased_ctls_high |=
 		CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
 		CPU_BASED_USE_MSR_BITMAPS;
 
 	/* We support free control of CR3 access interception. */
-	vmx->nested.nested_vmx_procbased_ctls_low &=
+	msrs->procbased_ctls_low &=
 		~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
 
 	/*
@@ -2804,10 +3058,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	 * depend on CPUID bits, they are added later by vmx_cpuid_update.
 	 */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
-		vmx->nested.nested_vmx_secondary_ctls_low,
-		vmx->nested.nested_vmx_secondary_ctls_high);
-	vmx->nested.nested_vmx_secondary_ctls_low = 0;
-	vmx->nested.nested_vmx_secondary_ctls_high &=
+		msrs->secondary_ctls_low,
+		msrs->secondary_ctls_high);
+	msrs->secondary_ctls_low = 0;
+	msrs->secondary_ctls_high &=
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 		SECONDARY_EXEC_DESC |
 		SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
@@ -2817,33 +3071,33 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
-		vmx->nested.nested_vmx_secondary_ctls_high |=
+		msrs->secondary_ctls_high |=
 			SECONDARY_EXEC_ENABLE_EPT;
-		vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
+		msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
 		if (cpu_has_vmx_ept_execute_only())
-			vmx->nested.nested_vmx_ept_caps |=
+			msrs->ept_caps |=
 				VMX_EPT_EXECUTE_ONLY_BIT;
-		vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
-		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+		msrs->ept_caps &= vmx_capability.ept;
+		msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
 			VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
 			VMX_EPT_1GB_PAGE_BIT;
 		if (enable_ept_ad_bits) {
-			vmx->nested.nested_vmx_secondary_ctls_high |=
+			msrs->secondary_ctls_high |=
 				SECONDARY_EXEC_ENABLE_PML;
-			vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+			msrs->ept_caps |= VMX_EPT_AD_BIT;
 		}
 	}
 
 	if (cpu_has_vmx_vmfunc()) {
-		vmx->nested.nested_vmx_secondary_ctls_high |=
+		msrs->secondary_ctls_high |=
 			SECONDARY_EXEC_ENABLE_VMFUNC;
 		/*
 		 * Advertise EPTP switching unconditionally
 		 * since we emulate it
 		 */
 		if (enable_ept)
-			vmx->nested.nested_vmx_vmfunc_controls =
+			msrs->vmfunc_controls =
 				VMX_VMFUNC_EPTP_SWITCHING;
 	}
 
@@ -2854,25 +3108,25 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	 * not failing the single-context invvpid, and it is worse.
 	 */
 	if (enable_vpid) {
-		vmx->nested.nested_vmx_secondary_ctls_high |=
+		msrs->secondary_ctls_high |=
 			SECONDARY_EXEC_ENABLE_VPID;
-		vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+		msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
 			VMX_VPID_EXTENT_SUPPORTED_MASK;
 	}
 
 	if (enable_unrestricted_guest)
-		vmx->nested.nested_vmx_secondary_ctls_high |=
+		msrs->secondary_ctls_high |=
 			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 
 	/* miscellaneous data */
 	rdmsr(MSR_IA32_VMX_MISC,
-		vmx->nested.nested_vmx_misc_low,
-		vmx->nested.nested_vmx_misc_high);
-	vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
-	vmx->nested.nested_vmx_misc_low |=
+		msrs->misc_low,
+		msrs->misc_high);
+	msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
+	msrs->misc_low |=
 		VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
 		VMX_MISC_ACTIVITY_HLT;
-	vmx->nested.nested_vmx_misc_high = 0;
+	msrs->misc_high = 0;
 
 	/*
 	 * This MSR reports some information about VMX support. We
@@ -2880,14 +3134,14 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	 * guest, and the VMCS structure we give it - not about the
 	 * VMX support of the underlying hardware.
 	 */
-	vmx->nested.nested_vmx_basic =
+	msrs->basic =
 		VMCS12_REVISION |
 		VMX_BASIC_TRUE_CTLS |
 		((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
 		(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
 
 	if (cpu_has_vmx_basic_inout())
-		vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
+		msrs->basic |= VMX_BASIC_INOUT;
 
 	/*
 	 * These MSRs specify bits which the guest must keep fixed on
@@ -2896,15 +3150,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	 */
 #define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
 #define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
-	vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
-	vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
+	msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
+	msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
 
 	/* These MSRs specify bits which the guest must keep fixed off. */
-	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
-	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
+	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
+	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
 
 	/* highest index: VMX_PREEMPTION_TIMER_VALUE */
-	vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
+	msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
 }
 
 /*
@@ -2941,7 +3195,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
 		BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
 		/* reserved */
 		BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
-	u64 vmx_basic = vmx->nested.nested_vmx_basic;
+	u64 vmx_basic = vmx->nested.msrs.basic;
 
 	if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
 		return -EINVAL;
@@ -2960,7 +3214,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
 	if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
 		return -EINVAL;
 
-	vmx->nested.nested_vmx_basic = data;
+	vmx->nested.msrs.basic = data;
 	return 0;
 }
 
@@ -2972,24 +3226,24 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
 
 	switch (msr_index) {
 	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
-		lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
-		highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
+		lowp = &vmx->nested.msrs.pinbased_ctls_low;
+		highp = &vmx->nested.msrs.pinbased_ctls_high;
 		break;
 	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
-		lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
-		highp = &vmx->nested.nested_vmx_procbased_ctls_high;
+		lowp = &vmx->nested.msrs.procbased_ctls_low;
+		highp = &vmx->nested.msrs.procbased_ctls_high;
 		break;
 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
-		lowp = &vmx->nested.nested_vmx_exit_ctls_low;
-		highp = &vmx->nested.nested_vmx_exit_ctls_high;
+		lowp = &vmx->nested.msrs.exit_ctls_low;
+		highp = &vmx->nested.msrs.exit_ctls_high;
 		break;
 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
-		lowp = &vmx->nested.nested_vmx_entry_ctls_low;
-		highp = &vmx->nested.nested_vmx_entry_ctls_high;
+		lowp = &vmx->nested.msrs.entry_ctls_low;
+		highp = &vmx->nested.msrs.entry_ctls_high;
 		break;
 	case MSR_IA32_VMX_PROCBASED_CTLS2:
-		lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
-		highp = &vmx->nested.nested_vmx_secondary_ctls_high;
+		lowp = &vmx->nested.msrs.secondary_ctls_low;
+		highp = &vmx->nested.msrs.secondary_ctls_high;
 		break;
 	default:
 		BUG();
@@ -3020,13 +3274,13 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
 		GENMASK_ULL(13, 9) | BIT_ULL(31);
 	u64 vmx_misc;
 
-	vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
-				   vmx->nested.nested_vmx_misc_high);
+	vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+				   vmx->nested.msrs.misc_high);
 
 	if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
 		return -EINVAL;
 
-	if ((vmx->nested.nested_vmx_pinbased_ctls_high &
+	if ((vmx->nested.msrs.pinbased_ctls_high &
 	     PIN_BASED_VMX_PREEMPTION_TIMER) &&
 	    vmx_misc_preemption_timer_rate(data) !=
 	    vmx_misc_preemption_timer_rate(vmx_misc))
@@ -3041,8 +3295,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
 	if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
 		return -EINVAL;
 
-	vmx->nested.nested_vmx_misc_low = data;
-	vmx->nested.nested_vmx_misc_high = data >> 32;
+	vmx->nested.msrs.misc_low = data;
+	vmx->nested.msrs.misc_high = data >> 32;
 	return 0;
 }
 
@@ -3050,15 +3304,15 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
 {
 	u64 vmx_ept_vpid_cap;
 
-	vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
-					   vmx->nested.nested_vmx_vpid_caps);
+	vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+					   vmx->nested.msrs.vpid_caps);
 
 	/* Every bit is either reserved or a feature bit. */
 	if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
 		return -EINVAL;
 
-	vmx->nested.nested_vmx_ept_caps = data;
-	vmx->nested.nested_vmx_vpid_caps = data >> 32;
+	vmx->nested.msrs.ept_caps = data;
+	vmx->nested.msrs.vpid_caps = data >> 32;
 	return 0;
 }
 
@@ -3068,10 +3322,10 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
 
 	switch (msr_index) {
 	case MSR_IA32_VMX_CR0_FIXED0:
-		msr = &vmx->nested.nested_vmx_cr0_fixed0;
+		msr = &vmx->nested.msrs.cr0_fixed0;
 		break;
 	case MSR_IA32_VMX_CR4_FIXED0:
-		msr = &vmx->nested.nested_vmx_cr4_fixed0;
+		msr = &vmx->nested.msrs.cr4_fixed0;
 		break;
 	default:
 		BUG();
@@ -3135,7 +3389,7 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 	case MSR_IA32_VMX_EPT_VPID_CAP:
 		return vmx_restore_vmx_ept_vpid_cap(vmx, data);
 	case MSR_IA32_VMX_VMCS_ENUM:
-		vmx->nested.nested_vmx_vmcs_enum = data;
+		vmx->nested.msrs.vmcs_enum = data;
 		return 0;
 	default:
 		/*
@@ -3146,77 +3400,75 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 }
 
 /* Returns 0 on success, non-0 otherwise. */
-static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
 {
-	struct vcpu_vmx *vmx = to_vmx(vcpu);
-
 	switch (msr_index) {
 	case MSR_IA32_VMX_BASIC:
-		*pdata = vmx->nested.nested_vmx_basic;
+		*pdata = msrs->basic;
 		break;
 	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
 	case MSR_IA32_VMX_PINBASED_CTLS:
 		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_pinbased_ctls_low,
-			vmx->nested.nested_vmx_pinbased_ctls_high);
+			msrs->pinbased_ctls_low,
+			msrs->pinbased_ctls_high);
 		if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
 			*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
 	case MSR_IA32_VMX_PROCBASED_CTLS:
 		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_procbased_ctls_low,
-			vmx->nested.nested_vmx_procbased_ctls_high);
+			msrs->procbased_ctls_low,
+			msrs->procbased_ctls_high);
 		if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
 			*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
 	case MSR_IA32_VMX_EXIT_CTLS:
 		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_exit_ctls_low,
-			vmx->nested.nested_vmx_exit_ctls_high);
+			msrs->exit_ctls_low,
+			msrs->exit_ctls_high);
 		if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
 			*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
 	case MSR_IA32_VMX_ENTRY_CTLS:
 		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_entry_ctls_low,
-			vmx->nested.nested_vmx_entry_ctls_high);
+			msrs->entry_ctls_low,
+			msrs->entry_ctls_high);
 		if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
 			*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 		break;
 	case MSR_IA32_VMX_MISC:
 		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_misc_low,
-			vmx->nested.nested_vmx_misc_high);
+			msrs->misc_low,
+			msrs->misc_high);
 		break;
 	case MSR_IA32_VMX_CR0_FIXED0:
-		*pdata = vmx->nested.nested_vmx_cr0_fixed0;
+		*pdata = msrs->cr0_fixed0;
 		break;
 	case MSR_IA32_VMX_CR0_FIXED1:
-		*pdata = vmx->nested.nested_vmx_cr0_fixed1;
+		*pdata = msrs->cr0_fixed1;
 		break;
 	case MSR_IA32_VMX_CR4_FIXED0:
-		*pdata = vmx->nested.nested_vmx_cr4_fixed0;
+		*pdata = msrs->cr4_fixed0;
 		break;
 	case MSR_IA32_VMX_CR4_FIXED1:
-		*pdata = vmx->nested.nested_vmx_cr4_fixed1;
+		*pdata = msrs->cr4_fixed1;
 		break;
 	case MSR_IA32_VMX_VMCS_ENUM:
-		*pdata = vmx->nested.nested_vmx_vmcs_enum;
+		*pdata = msrs->vmcs_enum;
 		break;
 	case MSR_IA32_VMX_PROCBASED_CTLS2:
 		*pdata = vmx_control_msr(
-			vmx->nested.nested_vmx_secondary_ctls_low,
-			vmx->nested.nested_vmx_secondary_ctls_high);
+			msrs->secondary_ctls_low,
+			msrs->secondary_ctls_high);
 		break;
 	case MSR_IA32_VMX_EPT_VPID_CAP:
-		*pdata = vmx->nested.nested_vmx_ept_caps |
-			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
+		*pdata = msrs->ept_caps |
+			((u64)msrs->vpid_caps << 32);
 		break;
 	case MSR_IA32_VMX_VMFUNC:
-		*pdata = vmx->nested.nested_vmx_vmfunc_controls;
+		*pdata = msrs->vmfunc_controls;
 		break;
 	default:
 		return 1;
@@ -3235,7 +3487,16 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
 
 static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 {
-	return 1;
+	switch (msr->index) {
+	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+		if (!nested)
+			return 1;
+		return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
+	default:
+		return 1;
+	}
+
+	return 0;
 }
 
 /*
@@ -3309,7 +3570,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
-		return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
+		return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
+				       &msr_info->data);
 	case MSR_IA32_XSS:
 		if (!vmx_xsaves_supported())
 			return 1;
@@ -3602,6 +3864,14 @@ static int hardware_enable(void)
 	if (cr4_read_shadow() & X86_CR4_VMXE)
 		return -EBUSY;
 
+	/*
+	 * This can happen if we hot-added a CPU but failed to allocate
+	 * VP assist page for it.
+	 */
+	if (static_branch_unlikely(&enable_evmcs) &&
+	    !hv_get_vp_assist_page(cpu))
+		return -EFAULT;
+
 	INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
 	INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
 	spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
@@ -3700,6 +3970,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	u32 _vmexit_control = 0;
 	u32 _vmentry_control = 0;
 
+	memset(vmcs_conf, 0, sizeof(*vmcs_conf));
 	min = CPU_BASED_HLT_EXITING |
 #ifdef CONFIG_X86_64
 	      CPU_BASED_CR8_LOAD_EXITING |
@@ -3710,13 +3981,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_UNCOND_IO_EXITING |
 	      CPU_BASED_MOV_DR_EXITING |
 	      CPU_BASED_USE_TSC_OFFSETING |
+	      CPU_BASED_MWAIT_EXITING |
+	      CPU_BASED_MONITOR_EXITING |
 	      CPU_BASED_INVLPG_EXITING |
 	      CPU_BASED_RDPMC_EXITING;
 
-	if (!kvm_mwait_in_guest())
-		min |= CPU_BASED_MWAIT_EXITING |
-			CPU_BASED_MONITOR_EXITING;
-
 	opt = CPU_BASED_TPR_SHADOW |
 	      CPU_BASED_USE_MSR_BITMAPS |
 	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3835,7 +4104,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	vmcs_conf->size = vmx_msr_high & 0x1fff;
 	vmcs_conf->order = get_order(vmcs_conf->size);
 	vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
-	vmcs_conf->revision_id = vmx_msr_low;
+
+	/* KVM supports Enlightened VMCS v1 only */
+	if (static_branch_unlikely(&enable_evmcs))
+		vmcs_conf->revision_id = KVM_EVMCS_VERSION;
+	else
+		vmcs_conf->revision_id = vmx_msr_low;
 
 	vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
 	vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
@@ -3843,6 +4117,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	vmcs_conf->vmexit_ctrl         = _vmexit_control;
 	vmcs_conf->vmentry_ctrl        = _vmentry_control;
 
+	if (static_branch_unlikely(&enable_evmcs))
+		evmcs_sanitize_exec_ctrls(vmcs_conf);
+
 	cpu_has_load_ia32_efer =
 		allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
 				VM_ENTRY_LOAD_IA32_EFER)
@@ -4162,6 +4439,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 {
 	unsigned long flags;
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
 
 	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
 	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
@@ -4177,13 +4455,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
 	 * vcpu. Warn the user that an update is overdue.
 	 */
-	if (!vcpu->kvm->arch.tss_addr)
+	if (!kvm_vmx->tss_addr)
 		printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
 			     "called before entering vcpu\n");
 
 	vmx_segment_cache_clear(vmx);
 
-	vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
+	vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
@@ -4291,7 +4569,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 
 static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
 {
-	if (enable_ept && is_paging(vcpu))
+	if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
 		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
 	__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
 }
@@ -4339,11 +4617,11 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 
 static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
-	u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
-	u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-	if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
+	if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
 		SECONDARY_EXEC_UNRESTRICTED_GUEST &&
 	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
 		fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
@@ -4353,16 +4631,16 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 
 static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
-	u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
-	u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
 
 	return fixed_bits_valid(val, fixed0, fixed1);
 }
 
 static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
-	u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
-	u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
+	u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+	u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
 
 	return fixed_bits_valid(val, fixed0, fixed1);
 }
@@ -4428,7 +4706,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	}
 #endif
 
-	if (enable_ept)
+	if (enable_ept && !enable_unrestricted_guest)
 		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
 	vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -4469,10 +4747,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	if (enable_ept) {
 		eptp = construct_eptp(vcpu, cr3);
 		vmcs_write64(EPT_POINTER, eptp);
-		if (is_paging(vcpu) || is_guest_mode(vcpu))
+		if (enable_unrestricted_guest || is_paging(vcpu) ||
+		    is_guest_mode(vcpu))
 			guest_cr3 = kvm_read_cr3(vcpu);
 		else
-			guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
+			guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
 		ept_load_pdptrs(vcpu);
 	}
 
@@ -4487,11 +4766,15 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	 * is in force while we are in guest mode.  Do not let guests control
 	 * this bit, even if host CR4.MCE == 0.
 	 */
-	unsigned long hw_cr4 =
-		(cr4_read_shadow() & X86_CR4_MCE) |
-		(cr4 & ~X86_CR4_MCE) |
-		(to_vmx(vcpu)->rmode.vm86_active ?
-		 KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+	unsigned long hw_cr4;
+
+	hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
+	if (enable_unrestricted_guest)
+		hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
+	else if (to_vmx(vcpu)->rmode.vm86_active)
+		hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
+	else
+		hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
 
 	if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
 		vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
@@ -4517,16 +4800,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		return 1;
 
 	vcpu->arch.cr4 = cr4;
-	if (enable_ept) {
-		if (!is_paging(vcpu)) {
-			hw_cr4 &= ~X86_CR4_PAE;
-			hw_cr4 |= X86_CR4_PSE;
-		} else if (!(cr4 & X86_CR4_PAE)) {
-			hw_cr4 &= ~X86_CR4_PAE;
+
+	if (!enable_unrestricted_guest) {
+		if (enable_ept) {
+			if (!is_paging(vcpu)) {
+				hw_cr4 &= ~X86_CR4_PAE;
+				hw_cr4 |= X86_CR4_PSE;
+			} else if (!(cr4 & X86_CR4_PAE)) {
+				hw_cr4 &= ~X86_CR4_PAE;
+			}
 		}
-	}
 
-	if (!enable_unrestricted_guest && !is_paging(vcpu))
 		/*
 		 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
 		 * hardware.  To emulate this behavior, SMEP/SMAP/PKU needs
@@ -4538,7 +4822,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 		 * If enable_unrestricted_guest, the CPU automatically
 		 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
 		 */
-		hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+		if (!is_paging(vcpu))
+			hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+	}
 
 	vmcs_writel(CR4_READ_SHADOW, cr4);
 	vmcs_writel(GUEST_CR4, hw_cr4);
@@ -4906,7 +5192,7 @@ static int init_rmode_tss(struct kvm *kvm)
 	int idx, r;
 
 	idx = srcu_read_lock(&kvm->srcu);
-	fn = kvm->arch.tss_addr >> PAGE_SHIFT;
+	fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
 	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
 	if (r < 0)
 		goto out;
@@ -4932,22 +5218,23 @@ out:
 
 static int init_rmode_identity_map(struct kvm *kvm)
 {
+	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
 	int i, idx, r = 0;
 	kvm_pfn_t identity_map_pfn;
 	u32 tmp;
 
-	/* Protect kvm->arch.ept_identity_pagetable_done. */
+	/* Protect kvm_vmx->ept_identity_pagetable_done. */
 	mutex_lock(&kvm->slots_lock);
 
-	if (likely(kvm->arch.ept_identity_pagetable_done))
+	if (likely(kvm_vmx->ept_identity_pagetable_done))
 		goto out2;
 
-	if (!kvm->arch.ept_identity_map_addr)
-		kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
-	identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+	if (!kvm_vmx->ept_identity_map_addr)
+		kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+	identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
 
 	r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
-				    kvm->arch.ept_identity_map_addr, PAGE_SIZE);
+				    kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
 	if (r < 0)
 		goto out2;
 
@@ -4964,7 +5251,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
 		if (r < 0)
 			goto out;
 	}
-	kvm->arch.ept_identity_pagetable_done = true;
+	kvm_vmx->ept_identity_pagetable_done = true;
 
 out:
 	srcu_read_unlock(&kvm->srcu, idx);
@@ -5500,6 +5787,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 		exec_control |= CPU_BASED_CR3_STORE_EXITING |
 				CPU_BASED_CR3_LOAD_EXITING  |
 				CPU_BASED_INVLPG_EXITING;
+	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
+				CPU_BASED_MONITOR_EXITING);
+	if (kvm_hlt_in_guest(vmx->vcpu.kvm))
+		exec_control &= ~CPU_BASED_HLT_EXITING;
 	return exec_control;
 }
 
@@ -5533,7 +5825,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 	}
 	if (!enable_unrestricted_guest)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
-	if (!ple_gap)
+	if (kvm_pause_in_guest(vmx->vcpu.kvm))
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
 	if (!kvm_vcpu_apicv_active(vcpu))
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -5565,10 +5857,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
 		if (nested) {
 			if (xsaves_enabled)
-				vmx->nested.nested_vmx_secondary_ctls_high |=
+				vmx->nested.msrs.secondary_ctls_high |=
 					SECONDARY_EXEC_XSAVES;
 			else
-				vmx->nested.nested_vmx_secondary_ctls_high &=
+				vmx->nested.msrs.secondary_ctls_high &=
 					~SECONDARY_EXEC_XSAVES;
 		}
 	}
@@ -5580,10 +5872,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
 		if (nested) {
 			if (rdtscp_enabled)
-				vmx->nested.nested_vmx_secondary_ctls_high |=
+				vmx->nested.msrs.secondary_ctls_high |=
 					SECONDARY_EXEC_RDTSCP;
 			else
-				vmx->nested.nested_vmx_secondary_ctls_high &=
+				vmx->nested.msrs.secondary_ctls_high &=
 					~SECONDARY_EXEC_RDTSCP;
 		}
 	}
@@ -5601,10 +5893,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
 		if (nested) {
 			if (invpcid_enabled)
-				vmx->nested.nested_vmx_secondary_ctls_high |=
+				vmx->nested.msrs.secondary_ctls_high |=
 					SECONDARY_EXEC_ENABLE_INVPCID;
 			else
-				vmx->nested.nested_vmx_secondary_ctls_high &=
+				vmx->nested.msrs.secondary_ctls_high &=
 					~SECONDARY_EXEC_ENABLE_INVPCID;
 		}
 	}
@@ -5616,10 +5908,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
 		if (nested) {
 			if (rdrand_enabled)
-				vmx->nested.nested_vmx_secondary_ctls_high |=
+				vmx->nested.msrs.secondary_ctls_high |=
 					SECONDARY_EXEC_RDRAND_EXITING;
 			else
-				vmx->nested.nested_vmx_secondary_ctls_high &=
+				vmx->nested.msrs.secondary_ctls_high &=
 					~SECONDARY_EXEC_RDRAND_EXITING;
 		}
 	}
@@ -5631,10 +5923,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
 		if (nested) {
 			if (rdseed_enabled)
-				vmx->nested.nested_vmx_secondary_ctls_high |=
+				vmx->nested.msrs.secondary_ctls_high |=
 					SECONDARY_EXEC_RDSEED_EXITING;
 			else
-				vmx->nested.nested_vmx_secondary_ctls_high &=
+				vmx->nested.msrs.secondary_ctls_high &=
 					~SECONDARY_EXEC_RDSEED_EXITING;
 		}
 	}
@@ -5696,7 +5988,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 	}
 
-	if (ple_gap) {
+	if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
 		vmcs_write32(PLE_GAP, ple_gap);
 		vmx->ple_window = ple_window;
 		vmx->ple_window_dirty = true;
@@ -5861,6 +6153,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	update_exception_bitmap(vcpu);
 
 	vpid_sync_context(vmx->vpid);
+	if (init_event)
+		vmx_clear_hlt(vcpu);
 }
 
 /*
@@ -5885,8 +6179,7 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
 
 static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
 {
-	return get_vmcs12(vcpu)->pin_based_vm_exec_control &
-		PIN_BASED_NMI_EXITING;
+	return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
 }
 
 static void enable_irq_window(struct kvm_vcpu *vcpu)
@@ -5932,6 +6225,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 	} else
 		intr |= INTR_TYPE_EXT_INTR;
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+
+	vmx_clear_hlt(vcpu);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -5962,6 +6257,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+
+	vmx_clear_hlt(vcpu);
 }
 
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -6024,14 +6321,23 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
 	int ret;
 
+	if (enable_unrestricted_guest)
+		return 0;
+
 	ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
 				    PAGE_SIZE * 3);
 	if (ret)
 		return ret;
-	kvm->arch.tss_addr = addr;
+	to_kvm_vmx(kvm)->tss_addr = addr;
 	return init_rmode_tss(kvm);
 }
 
+static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+	to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
+	return 0;
+}
+
 static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
 {
 	switch (vec) {
@@ -6134,19 +6440,24 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 	if (is_nmi(intr_info))
 		return 1;  /* already handled by vmx_vcpu_run() */
 
-	if (is_invalid_opcode(intr_info)) {
-		er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
-		if (er == EMULATE_USER_EXIT)
-			return 0;
-		if (er != EMULATE_DONE)
-			kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
+	if (is_invalid_opcode(intr_info))
+		return handle_ud(vcpu);
 
 	error_code = 0;
 	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 
+	if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
+		WARN_ON_ONCE(!enable_vmware_backdoor);
+		er = emulate_instruction(vcpu,
+			EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
+		if (er == EMULATE_USER_EXIT)
+			return 0;
+		else if (er != EMULATE_DONE)
+			kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+		return 1;
+	}
+
 	/*
 	 * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
 	 * MMIO, it is better to report an internal error.
@@ -6232,28 +6543,22 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
 static int handle_io(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int size, in, string, ret;
+	int size, in, string;
 	unsigned port;
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 	string = (exit_qualification & 16) != 0;
-	in = (exit_qualification & 8) != 0;
 
 	++vcpu->stat.io_exits;
 
-	if (string || in)
+	if (string)
 		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
 	port = exit_qualification >> 16;
 	size = (exit_qualification & 7) + 1;
+	in = (exit_qualification & 8) != 0;
 
-	ret = kvm_skip_emulated_instruction(vcpu);
-
-	/*
-	 * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
-	 * KVM_EXIT_DEBUG here.
-	 */
-	return kvm_fast_pio_out(vcpu, size, port) && ret;
+	return kvm_fast_pio(vcpu, size, port, in);
 }
 
 static void
@@ -6344,6 +6649,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 			err = handle_set_cr0(vcpu, val);
 			return kvm_complete_insn_gp(vcpu, err);
 		case 3:
+			WARN_ON_ONCE(enable_unrestricted_guest);
 			err = kvm_set_cr3(vcpu, val);
 			return kvm_complete_insn_gp(vcpu, err);
 		case 4:
@@ -6376,6 +6682,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 	case 1: /*mov from cr*/
 		switch (cr) {
 		case 3:
+			WARN_ON_ONCE(enable_unrestricted_guest);
 			val = kvm_read_cr3(vcpu);
 			kvm_register_write(vcpu, reg, val);
 			trace_kvm_cr_read(cr, val);
@@ -6769,7 +7076,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 
 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 {
-	int ret;
 	gpa_t gpa;
 
 	/*
@@ -6797,17 +7103,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 						       NULL, 0) == EMULATE_DONE;
 	}
 
-	ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
-	if (ret >= 0)
-		return ret;
-
-	/* It is the real ept misconfig */
-	WARN_ON(1);
-
-	vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
-	vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
-
-	return 0;
+	return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
 }
 
 static int handle_nmi_window(struct kvm_vcpu *vcpu)
@@ -6830,6 +7126,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 	bool intr_window_requested;
 	unsigned count = 130;
 
+	/*
+	 * We should never reach the point where we are emulating L2
+	 * due to invalid guest state as that means we incorrectly
+	 * allowed a nested VMEntry with an invalid vmcs12.
+	 */
+	WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
+
 	cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
 
@@ -6848,12 +7151,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 			goto out;
 		}
 
-		if (err != EMULATE_DONE) {
-			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-			vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-			vcpu->run->internal.ndata = 0;
-			return 0;
-		}
+		if (err != EMULATE_DONE)
+			goto emulation_error;
+
+		if (vmx->emulation_required && !vmx->rmode.vm86_active &&
+		    vcpu->arch.exception.pending)
+			goto emulation_error;
 
 		if (vcpu->arch.halt_request) {
 			vcpu->arch.halt_request = 0;
@@ -6869,34 +7172,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
 out:
 	return ret;
-}
-
-static int __grow_ple_window(int val)
-{
-	if (ple_window_grow < 1)
-		return ple_window;
-
-	val = min(val, ple_window_actual_max);
-
-	if (ple_window_grow < ple_window)
-		val *= ple_window_grow;
-	else
-		val += ple_window_grow;
-
-	return val;
-}
 
-static int __shrink_ple_window(int val, int modifier, int minimum)
-{
-	if (modifier < 1)
-		return ple_window;
-
-	if (modifier < ple_window)
-		val /= modifier;
-	else
-		val -= modifier;
-
-	return max(val, minimum);
+emulation_error:
+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+	vcpu->run->internal.ndata = 0;
+	return 0;
 }
 
 static void grow_ple_window(struct kvm_vcpu *vcpu)
@@ -6904,7 +7185,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int old = vmx->ple_window;
 
-	vmx->ple_window = __grow_ple_window(old);
+	vmx->ple_window = __grow_ple_window(old, ple_window,
+					    ple_window_grow,
+					    ple_window_max);
 
 	if (vmx->ple_window != old)
 		vmx->ple_window_dirty = true;
@@ -6917,8 +7200,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int old = vmx->ple_window;
 
-	vmx->ple_window = __shrink_ple_window(old,
-	                                      ple_window_shrink, ple_window);
+	vmx->ple_window = __shrink_ple_window(old, ple_window,
+					      ple_window_shrink,
+					      ple_window);
 
 	if (vmx->ple_window != old)
 		vmx->ple_window_dirty = true;
@@ -6927,21 +7211,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
 }
 
 /*
- * ple_window_actual_max is computed to be one grow_ple_window() below
- * ple_window_max. (See __grow_ple_window for the reason.)
- * This prevents overflows, because ple_window_max is int.
- * ple_window_max effectively rounded down to a multiple of ple_window_grow in
- * this process.
- * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
- */
-static void update_ple_window_actual_max(void)
-{
-	ple_window_actual_max =
-			__shrink_ple_window(max(ple_window_max, ple_window),
-			                    ple_window_grow, INT_MIN);
-}
-
-/*
  * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
  */
 static void wakeup_handler(void)
@@ -6960,7 +7229,7 @@ static void wakeup_handler(void)
 	spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 }
 
-void vmx_enable_tdp(void)
+static void vmx_enable_tdp(void)
 {
 	kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
 		enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
@@ -7061,8 +7330,6 @@ static __init int hardware_setup(void)
 	else
 		kvm_disable_tdp();
 
-	update_ple_window_actual_max();
-
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
 	 * and EPT A/D bit features are enabled -- PML depends on them to work.
@@ -7094,6 +7361,7 @@ static __init int hardware_setup(void)
 		init_vmcs_shadow_fields();
 
 	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+	nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
 
 	kvm_mce_cap_supported |= MCG_LMCE_P;
 
@@ -7122,7 +7390,7 @@ static __exit void hardware_unsetup(void)
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
-	if (ple_gap)
+	if (!kvm_pause_in_guest(vcpu->kvm))
 		grow_ple_window(vcpu);
 
 	/*
@@ -7954,9 +8222,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 		u64 eptp, gpa;
 	} operand;
 
-	if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+	if (!(vmx->nested.msrs.secondary_ctls_high &
 	      SECONDARY_EXEC_ENABLE_EPT) ||
-	    !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+	    !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
 	}
@@ -7967,7 +8235,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
 
-	types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
+	types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
 	if (type >= 32 || !(types & (1 << type))) {
 		nested_vmx_failValid(vcpu,
@@ -8018,9 +8286,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 		u64 gla;
 	} operand;
 
-	if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+	if (!(vmx->nested.msrs.secondary_ctls_high &
 	      SECONDARY_EXEC_ENABLE_VPID) ||
-			!(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) {
+			!(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
 	}
@@ -8031,7 +8299,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 	type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
 
-	types = (vmx->nested.nested_vmx_vpid_caps &
+	types = (vmx->nested.msrs.vpid_caps &
 			VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
 
 	if (type >= 32 || !(types & (1 << type))) {
@@ -8125,11 +8393,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
 	/* Check for memory type validity */
 	switch (address & VMX_EPTP_MT_MASK) {
 	case VMX_EPTP_MT_UC:
-		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+		if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
 			return false;
 		break;
 	case VMX_EPTP_MT_WB:
-		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+		if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
 			return false;
 		break;
 	default:
@@ -8146,7 +8414,7 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
 
 	/* AD, if set, should be supported */
 	if (address & VMX_EPTP_AD_ENABLE_BIT) {
-		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
+		if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
 			return false;
 	}
 
@@ -8790,7 +9058,8 @@ static void dump_vmcs(void)
 	pr_err("DebugCtl = 0x%016llx  DebugExceptions = 0x%016lx\n",
 	       vmcs_read64(GUEST_IA32_DEBUGCTL),
 	       vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
-	if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+	if (cpu_has_load_perf_global_ctrl &&
+	    vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
 		pr_err("PerfGlobCtl = 0x%016llx\n",
 		       vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
 	if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
@@ -8826,7 +9095,8 @@ static void dump_vmcs(void)
 		pr_err("EFER = 0x%016llx  PAT = 0x%016llx\n",
 		       vmcs_read64(HOST_IA32_EFER),
 		       vmcs_read64(HOST_IA32_PAT));
-	if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+	if (cpu_has_load_perf_global_ctrl &&
+	    vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
 		pr_err("PerfGlobCtl = 0x%016llx\n",
 		       vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
 
@@ -9178,9 +9448,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 
 	/* We need to handle NMIs before interrupts are enabled */
 	if (is_nmi(exit_intr_info)) {
-		kvm_before_handle_nmi(&vmx->vcpu);
+		kvm_before_interrupt(&vmx->vcpu);
 		asm("int $2");
-		kvm_after_handle_nmi(&vmx->vcpu);
+		kvm_after_interrupt(&vmx->vcpu);
 	}
 }
 
@@ -9403,7 +9673,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long cr3, cr4;
+	unsigned long cr3, cr4, evmcs_rsp;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
@@ -9469,6 +9739,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
+
+	evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
+		(unsigned long)&current_evmcs->host_rsp : 0;
+
 	asm(
 		/* Store host registers */
 		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9477,15 +9751,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
 		"je 1f \n\t"
 		"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
+		/* Avoid VMWRITE when Enlightened VMCS is in use */
+		"test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+		"jz 2f \n\t"
+		"mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
+		"jmp 1f \n\t"
+		"2: \n\t"
 		__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
 		"1: \n\t"
 		/* Reload cr2 if changed */
 		"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
 		"mov %%cr2, %%" _ASM_DX " \n\t"
 		"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
-		"je 2f \n\t"
+		"je 3f \n\t"
 		"mov %%" _ASM_AX", %%cr2 \n\t"
-		"2: \n\t"
+		"3: \n\t"
 		/* Check if vmlaunch of vmresume is needed */
 		"cmpl $0, %c[launched](%0) \n\t"
 		/* Load guest registers.  Don't clobber flags. */
@@ -9554,7 +9834,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		".global vmx_return \n\t"
 		"vmx_return: " _ASM_PTR " 2b \n\t"
 		".popsection"
-	      : : "c"(vmx), "d"((unsigned long)HOST_RSP),
+	      : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
 		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
 		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
 		[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
@@ -9579,10 +9859,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		[wordsize]"i"(sizeof(ulong))
 	      : "cc", "memory"
 #ifdef CONFIG_X86_64
-		, "rax", "rbx", "rdi", "rsi"
+		, "rax", "rbx", "rdi"
 		, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 #else
-		, "eax", "ebx", "edi", "esi"
+		, "eax", "ebx", "edi"
 #endif
 	      );
 
@@ -9610,6 +9890,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	/* Eliminate branch target predictions from guest mode */
 	vmexit_fill_RSB();
 
+	/* All fields are clean at this point */
+	if (static_branch_unlikely(&enable_evmcs))
+		current_evmcs->hv_clean_fields |=
+			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
 	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
 	if (vmx->host_debugctlmsr)
 		update_debugctlmsr(vmx->host_debugctlmsr);
@@ -9646,14 +9931,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 			__write_pkru(vmx->host_pkru);
 	}
 
-	/*
-	 * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
-	 * we did not inject a still-pending event to L1 now because of
-	 * nested_run_pending, we need to re-enable this bit.
-	 */
-	if (vmx->nested.nested_run_pending)
-		kvm_make_request(KVM_REQ_EVENT, vcpu);
-
 	vmx->nested.nested_run_pending = 0;
 	vmx->idt_vectoring_info = 0;
 
@@ -9670,6 +9947,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
 
+static struct kvm *vmx_vm_alloc(void)
+{
+	struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+	return &kvm_vmx->kvm;
+}
+
+static void vmx_vm_free(struct kvm *kvm)
+{
+	kfree(to_kvm_vmx(kvm));
+}
+
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9777,14 +10065,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 			goto free_vmcs;
 	}
 
-	if (enable_ept) {
+	if (enable_ept && !enable_unrestricted_guest) {
 		err = init_rmode_identity_map(kvm);
 		if (err)
 			goto free_vmcs;
 	}
 
 	if (nested) {
-		nested_vmx_setup_ctls_msrs(vmx);
+		nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
+					   kvm_vcpu_apicv_active(&vmx->vcpu));
 		vmx->nested.vpid02 = allocate_vpid();
 	}
 
@@ -9817,6 +10106,13 @@ free_vcpu:
 	return ERR_PTR(err);
 }
 
+static int vmx_vm_init(struct kvm *kvm)
+{
+	if (!ple_gap)
+		kvm->arch.pause_in_guest = true;
+	return 0;
+}
+
 static void __init vmx_check_processor_compat(void *rtn)
 {
 	struct vmcs_config vmcs_conf;
@@ -9824,6 +10120,7 @@ static void __init vmx_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 	if (setup_vmcs_config(&vmcs_conf) < 0)
 		*(int *)rtn = -EIO;
+	nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
 	if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
 		printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
 				smp_processor_id());
@@ -9911,12 +10208,12 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_cpuid_entry2 *entry;
 
-	vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
-	vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+	vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
+	vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
 
 #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do {		\
 	if (entry && (entry->_reg & (_cpuid_mask)))			\
-		vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask);	\
+		vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask);	\
 } while (0)
 
 	entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
@@ -10013,7 +10310,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 
 	kvm_mmu_unload(vcpu);
 	kvm_init_shadow_ept_mmu(vcpu,
-			to_vmx(vcpu)->nested.nested_vmx_ept_caps &
+			to_vmx(vcpu)->nested.msrs.ept_caps &
 			VMX_EPT_EXECUTE_ONLY_BIT,
 			nested_ept_ad_enabled(vcpu));
 	vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
@@ -10952,6 +11249,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
 	vmx_set_efer(vcpu, vcpu->arch.efer);
 
+	/*
+	 * Guest state is invalid and unrestricted guest is disabled,
+	 * which means L1 attempted VMEntry to L2 with invalid state.
+	 * Fail the VMEntry.
+	 */
+	if (vmx->emulation_required) {
+		*entry_failure_code = ENTRY_FAIL_DEFAULT;
+		return 1;
+	}
+
 	/* Shadow page tables on either EPT or shadow page tables. */
 	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
 				entry_failure_code))
@@ -10965,6 +11272,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	return 0;
 }
 
+static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
+{
+	if (!nested_cpu_has_nmi_exiting(vmcs12) &&
+	    nested_cpu_has_virtual_nmis(vmcs12))
+		return -EINVAL;
+
+	if (!nested_cpu_has_virtual_nmis(vmcs12) &&
+	    nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -10992,26 +11312,29 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
 	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
-				vmx->nested.nested_vmx_procbased_ctls_low,
-				vmx->nested.nested_vmx_procbased_ctls_high) ||
+				vmx->nested.msrs.procbased_ctls_low,
+				vmx->nested.msrs.procbased_ctls_high) ||
 	    (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
 	     !vmx_control_verify(vmcs12->secondary_vm_exec_control,
-				 vmx->nested.nested_vmx_secondary_ctls_low,
-				 vmx->nested.nested_vmx_secondary_ctls_high)) ||
+				 vmx->nested.msrs.secondary_ctls_low,
+				 vmx->nested.msrs.secondary_ctls_high)) ||
 	    !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
-				vmx->nested.nested_vmx_pinbased_ctls_low,
-				vmx->nested.nested_vmx_pinbased_ctls_high) ||
+				vmx->nested.msrs.pinbased_ctls_low,
+				vmx->nested.msrs.pinbased_ctls_high) ||
 	    !vmx_control_verify(vmcs12->vm_exit_controls,
-				vmx->nested.nested_vmx_exit_ctls_low,
-				vmx->nested.nested_vmx_exit_ctls_high) ||
+				vmx->nested.msrs.exit_ctls_low,
+				vmx->nested.msrs.exit_ctls_high) ||
 	    !vmx_control_verify(vmcs12->vm_entry_controls,
-				vmx->nested.nested_vmx_entry_ctls_low,
-				vmx->nested.nested_vmx_entry_ctls_high))
+				vmx->nested.msrs.entry_ctls_low,
+				vmx->nested.msrs.entry_ctls_high))
+		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+	if (nested_vmx_check_nmi_controls(vmcs12))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
 	if (nested_cpu_has_vmfunc(vmcs12)) {
 		if (vmcs12->vm_function_control &
-		    ~vmx->nested.nested_vmx_vmfunc_controls)
+		    ~vmx->nested.msrs.vmfunc_controls)
 			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
 		if (nested_cpu_has_eptp_switching(vmcs12)) {
@@ -11293,7 +11616,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
 	} else if (vcpu->arch.nmi_injected) {
 		vmcs12->idt_vectoring_info_field =
 			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
-	} else if (vcpu->arch.interrupt.pending) {
+	} else if (vcpu->arch.interrupt.injected) {
 		nr = vcpu->arch.interrupt.nr;
 		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
 
@@ -11941,7 +12264,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
 
 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
-	if (ple_gap)
+	if (!kvm_pause_in_guest(vcpu->kvm))
 		shrink_ple_window(vcpu);
 }
 
@@ -12259,6 +12582,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 
 	vmx->nested.smm.vmxon = vmx->nested.vmxon;
 	vmx->nested.vmxon = false;
+	vmx_clear_hlt(vcpu);
 	return 0;
 }
 
@@ -12300,6 +12624,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_accelerated_tpr = report_flexpriority,
 	.cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
 
+	.vm_init = vmx_vm_init,
+	.vm_alloc = vmx_vm_alloc,
+	.vm_free = vmx_vm_free,
+
 	.vcpu_create = vmx_create_vcpu,
 	.vcpu_free = vmx_free_vcpu,
 	.vcpu_reset = vmx_vcpu_reset,
@@ -12367,6 +12695,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
 
 	.set_tss_addr = vmx_set_tss_addr,
+	.set_identity_map_addr = vmx_set_identity_map_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask = vmx_get_mt_mask,
 
@@ -12425,7 +12754,38 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 
 static int __init vmx_init(void)
 {
-	int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+	int r;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	/*
+	 * Enlightened VMCS usage should be recommended and the host needs
+	 * to support eVMCS v1 or above. We can also disable eVMCS support
+	 * with module parameter.
+	 */
+	if (enlightened_vmcs &&
+	    ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
+	    (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
+	    KVM_EVMCS_VERSION) {
+		int cpu;
+
+		/* Check that we have assist pages on all online CPUs */
+		for_each_online_cpu(cpu) {
+			if (!hv_get_vp_assist_page(cpu)) {
+				enlightened_vmcs = false;
+				break;
+			}
+		}
+
+		if (enlightened_vmcs) {
+			pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
+			static_branch_enable(&enable_evmcs);
+		}
+	} else {
+		enlightened_vmcs = false;
+	}
+#endif
+
+	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
                      __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
 		return r;
@@ -12446,6 +12806,29 @@ static void __exit vmx_exit(void)
 #endif
 
 	kvm_exit();
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	if (static_branch_unlikely(&enable_evmcs)) {
+		int cpu;
+		struct hv_vp_assist_page *vp_ap;
+		/*
+		 * Reset everything to support using non-enlightened VMCS
+		 * access later (e.g. when we reload the module with
+		 * enlightened_vmcs=0)
+		 */
+		for_each_online_cpu(cpu) {
+			vp_ap =	hv_get_vp_assist_page(cpu);
+
+			if (!vp_ap)
+				continue;
+
+			vp_ap->current_nested_vmcs = 0;
+			vp_ap->enlighten_vmentry = 0;
+		}
+
+		static_branch_disable(&enable_evmcs);
+	}
+#endif
 }
 
 module_init(vmx_init)
diff --git a/arch/x86/kvm/vmx_evmcs.h b/arch/x86/kvm/vmx_evmcs.h
new file mode 100644
index 000000000000..210a884090ad
--- /dev/null
+++ b/arch/x86/kvm/vmx_evmcs.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_EVMCS_H
+#define __KVM_X86_VMX_EVMCS_H
+
+#include <asm/hyperv-tlfs.h>
+
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
+#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
+#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
+		{EVMCS1_OFFSET(name), clean_field}
+
+struct evmcs_field {
+	u16 offset;
+	u16 clean_field;
+};
+
+static const struct evmcs_field vmcs_field_to_evmcs_1[] = {
+	/* 64 bit rw */
+	EVMCS1_FIELD(GUEST_RIP, guest_rip,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(GUEST_RSP, guest_rsp,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+	EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+	EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_CR0, host_cr0,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_CR3, host_cr3,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_CR4, host_cr4,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_RIP, host_rip,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+	EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+	EVMCS1_FIELD(MSR_BITMAP, msr_bitmap,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP),
+	EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(TSC_OFFSET, tsc_offset,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+	EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+	EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(GUEST_CR0, guest_cr0,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(GUEST_CR3, guest_cr3,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(GUEST_CR4, guest_cr4,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(GUEST_DR7, guest_dr7,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+	EVMCS1_FIELD(HOST_FS_BASE, host_fs_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+	EVMCS1_FIELD(HOST_GS_BASE, host_gs_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+	EVMCS1_FIELD(HOST_TR_BASE, host_tr_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+	EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+	EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+	EVMCS1_FIELD(HOST_RSP, host_rsp,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+	EVMCS1_FIELD(EPT_POINTER, ept_pointer,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+	EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+
+	/* 64 bit read only */
+	EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	/*
+	 * Not defined in KVM:
+	 *
+	 * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx,
+	 *		HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+	 * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi,
+	 *		HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+	 * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi,
+	 *		HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+	 * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip,
+	 *		HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+	 */
+	EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+	/*
+	 * No mask defined in the spec as Hyper-V doesn't currently support
+	 * these. Future proof by resetting the whole clean field mask on
+	 * access.
+	 */
+	EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+	/* 32 bit rw */
+	EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+	EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC),
+	EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN),
+	EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY),
+	EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+	EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE,
+		     vm_entry_exception_error_code,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+	EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+	EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+	EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+	EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+	EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+	EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+
+	/* 32 bit read only */
+	EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+	EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+	/* No mask defined in the spec (not used) */
+	EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+	EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+	/* 16 bit rw */
+	EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+	EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+	EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id,
+		     HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+};
+
+static __always_inline int get_evmcs_offset(unsigned long field,
+					    u16 *clean_field)
+{
+	unsigned int index = ROL16(field, 6);
+	const struct evmcs_field *evmcs_field;
+
+	if (unlikely(index >= ARRAY_SIZE(vmcs_field_to_evmcs_1))) {
+		WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
+			  field);
+		return -ENOENT;
+	}
+
+	evmcs_field = &vmcs_field_to_evmcs_1[index];
+
+	if (clean_field)
+		*clean_field = evmcs_field->clean_field;
+
+	return evmcs_field->offset;
+}
+
+#undef ROL16
+
+#endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18b5ca7a3197..b2ff74b12ec4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -102,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
 static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+static void store_regs(struct kvm_vcpu *vcpu);
+static int sync_regs(struct kvm_vcpu *vcpu);
 
 struct kvm_x86_ops *kvm_x86_ops __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -140,6 +142,13 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, S_IRUGO);
 
+bool __read_mostly enable_vmware_backdoor = false;
+module_param(enable_vmware_backdoor, bool, S_IRUGO);
+EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
+
+static bool __read_mostly force_emulation_prefix = false;
+module_param(force_emulation_prefix, bool, S_IRUGO);
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
@@ -1032,7 +1041,11 @@ static u32 emulated_msrs[] = {
 	HV_X64_MSR_VP_RUNTIME,
 	HV_X64_MSR_SCONTROL,
 	HV_X64_MSR_STIMER0_CONFIG,
-	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+	HV_X64_MSR_VP_ASSIST_PAGE,
+	HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
+	HV_X64_MSR_TSC_EMULATION_STATUS,
+
+	MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
 	MSR_IA32_TSC_ADJUST,
@@ -1054,6 +1067,25 @@ static unsigned num_emulated_msrs;
  * can be used by a hypervisor to validate requested CPU features.
  */
 static u32 msr_based_features[] = {
+	MSR_IA32_VMX_BASIC,
+	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+	MSR_IA32_VMX_PINBASED_CTLS,
+	MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+	MSR_IA32_VMX_PROCBASED_CTLS,
+	MSR_IA32_VMX_TRUE_EXIT_CTLS,
+	MSR_IA32_VMX_EXIT_CTLS,
+	MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+	MSR_IA32_VMX_ENTRY_CTLS,
+	MSR_IA32_VMX_MISC,
+	MSR_IA32_VMX_CR0_FIXED0,
+	MSR_IA32_VMX_CR0_FIXED1,
+	MSR_IA32_VMX_CR4_FIXED0,
+	MSR_IA32_VMX_CR4_FIXED1,
+	MSR_IA32_VMX_VMCS_ENUM,
+	MSR_IA32_VMX_PROCBASED_CTLS2,
+	MSR_IA32_VMX_EPT_VPID_CAP,
+	MSR_IA32_VMX_VMFUNC,
+
 	MSR_F10H_DECFG,
 	MSR_IA32_UCODE_REV,
 };
@@ -2432,6 +2464,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_STATUS:
 		return kvm_hv_set_msr_common(vcpu, msr, data,
 					     msr_info->host_initiated);
 	case MSR_IA32_BBL_CR_CTL3:
@@ -2558,6 +2593,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_AMD64_DC_CFG:
 		msr_info->data = 0;
 		break;
+	case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
 	case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
 	case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
 	case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -2661,6 +2697,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
 	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_STATUS:
 		return kvm_hv_get_msr_common(vcpu,
 					     msr_info->index, &msr_info->data);
 		break;
@@ -2777,9 +2816,15 @@ out:
 	return r;
 }
 
+static inline bool kvm_can_mwait_in_guest(void)
+{
+	return boot_cpu_has(X86_FEATURE_MWAIT) &&
+		!boot_cpu_has_bug(X86_BUG_MONITOR);
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
-	int r;
+	int r = 0;
 
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
@@ -2809,6 +2854,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_SYNIC:
 	case KVM_CAP_HYPERV_SYNIC2:
 	case KVM_CAP_HYPERV_VP_INDEX:
+	case KVM_CAP_HYPERV_EVENTFD:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2828,11 +2874,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_GET_MSR_FEATURES:
 		r = 1;
 		break;
+	case KVM_CAP_SYNC_REGS:
+		r = KVM_SYNC_X86_VALID_FIELDS;
+		break;
 	case KVM_CAP_ADJUST_CLOCK:
 		r = KVM_CLOCK_TSC_STABLE;
 		break;
-	case KVM_CAP_X86_GUEST_MWAIT:
-		r = kvm_mwait_in_guest();
+	case KVM_CAP_X86_DISABLE_EXITS:
+		r |=  KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
+		if(kvm_can_mwait_in_guest())
+			r |= KVM_X86_DISABLE_EXITS_MWAIT;
 		break;
 	case KVM_CAP_X86_SMM:
 		/* SMBASE is usually relocated above 1M on modern chipsets,
@@ -2873,7 +2924,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_X2APIC_API_VALID_FLAGS;
 		break;
 	default:
-		r = 0;
 		break;
 	}
 	return r;
@@ -3265,7 +3315,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 	events->exception.error_code = vcpu->arch.exception.error_code;
 
 	events->interrupt.injected =
-		vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
+		vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
 	events->interrupt.nr = vcpu->arch.interrupt.nr;
 	events->interrupt.soft = 0;
 	events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
@@ -3318,7 +3368,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
 	vcpu->arch.exception.error_code = events->exception.error_code;
 
-	vcpu->arch.interrupt.pending = events->interrupt.injected;
+	vcpu->arch.interrupt.injected = events->interrupt.injected;
 	vcpu->arch.interrupt.nr = events->interrupt.nr;
 	vcpu->arch.interrupt.soft = events->interrupt.soft;
 	if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
@@ -3917,8 +3967,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
 static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
 					      u64 ident_addr)
 {
-	kvm->arch.ept_identity_map_addr = ident_addr;
-	return 0;
+	return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
 }
 
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
@@ -4178,6 +4227,20 @@ split_irqchip_unlock:
 
 		r = 0;
 		break;
+	case KVM_CAP_X86_DISABLE_EXITS:
+		r = -EINVAL;
+		if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
+			break;
+
+		if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+			kvm_can_mwait_in_guest())
+			kvm->arch.mwait_in_guest = true;
+		if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+			kvm->arch.hlt_in_guest = true;
+		if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
+			kvm->arch.pause_in_guest = true;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -4482,6 +4545,15 @@ set_identity_unlock:
 			r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
 		break;
 	}
+	case KVM_HYPERV_EVENTFD: {
+		struct kvm_hyperv_eventfd hvevfd;
+
+		r = -EFAULT;
+		if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
+			goto out;
+		r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -4771,6 +4843,30 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 
+int handle_ud(struct kvm_vcpu *vcpu)
+{
+	int emul_type = EMULTYPE_TRAP_UD;
+	enum emulation_result er;
+	char sig[5]; /* ud2; .ascii "kvm" */
+	struct x86_exception e;
+
+	if (force_emulation_prefix &&
+	    kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
+				kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 &&
+	    memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
+		kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
+		emul_type = 0;
+	}
+
+	er = emulate_instruction(vcpu, emul_type);
+	if (er == EMULATE_USER_EXIT)
+		return 0;
+	if (er != EMULATE_DONE)
+		kvm_queue_exception(vcpu, UD_VECTOR);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(handle_ud);
+
 static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 			    gpa_t gpa, bool write)
 {
@@ -5612,27 +5708,27 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 
-	if (irq == NMI_VECTOR)
-		vcpu->arch.nmi_pending = 0;
-	else
-		vcpu->arch.interrupt.pending = false;
-
 	return EMULATE_DONE;
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
-static int handle_emulation_failure(struct kvm_vcpu *vcpu)
+static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 {
 	int r = EMULATE_DONE;
 
 	++vcpu->stat.insn_emulation_fail;
 	trace_kvm_emulate_insn_failed(vcpu);
+
+	if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
+		return EMULATE_FAIL;
+
 	if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
 		vcpu->run->internal.ndata = 0;
 		r = EMULATE_USER_EXIT;
 	}
+
 	kvm_queue_exception(vcpu, UD_VECTOR);
 
 	return r;
@@ -5876,6 +5972,37 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 	return false;
 }
 
+static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
+{
+	switch (ctxt->opcode_len) {
+	case 1:
+		switch (ctxt->b) {
+		case 0xe4:	/* IN */
+		case 0xe5:
+		case 0xec:
+		case 0xed:
+		case 0xe6:	/* OUT */
+		case 0xe7:
+		case 0xee:
+		case 0xef:
+		case 0x6c:	/* INS */
+		case 0x6d:
+		case 0x6e:	/* OUTS */
+		case 0x6f:
+			return true;
+		}
+		break;
+	case 2:
+		switch (ctxt->b) {
+		case 0x33:	/* RDPMC */
+			return true;
+		}
+		break;
+	}
+
+	return false;
+}
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -5928,10 +6055,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 				return EMULATE_DONE;
 			if (emulation_type & EMULTYPE_SKIP)
 				return EMULATE_FAIL;
-			return handle_emulation_failure(vcpu);
+			return handle_emulation_failure(vcpu, emulation_type);
 		}
 	}
 
+	if ((emulation_type & EMULTYPE_VMWARE) &&
+	    !is_vmware_backdoor_opcode(ctxt))
+		return EMULATE_FAIL;
+
 	if (emulation_type & EMULTYPE_SKIP) {
 		kvm_rip_write(vcpu, ctxt->_eip);
 		if (ctxt->eflags & X86_EFLAGS_RF)
@@ -5963,7 +6094,7 @@ restart:
 					emulation_type))
 			return EMULATE_DONE;
 
-		return handle_emulation_failure(vcpu);
+		return handle_emulation_failure(vcpu, emulation_type);
 	}
 
 	if (ctxt->have_exception) {
@@ -6016,7 +6147,8 @@ restart:
 }
 EXPORT_SYMBOL_GPL(x86_emulate_instruction);
 
-int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
+static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
+			    unsigned short port)
 {
 	unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
@@ -6025,7 +6157,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
 	vcpu->arch.pio.count = 0;
 	return ret;
 }
-EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
 
 static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
 {
@@ -6049,7 +6180,8 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
+			   unsigned short port)
 {
 	unsigned long val;
 	int ret;
@@ -6068,7 +6200,21 @@ int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
+int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
+{
+	int ret = kvm_skip_emulated_instruction(vcpu);
+
+	/*
+	 * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+	 * KVM_EXIT_DEBUG here.
+	 */
+	if (in)
+		return kvm_fast_pio_in(vcpu, size, port) && ret;
+	else
+		return kvm_fast_pio_out(vcpu, size, port) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio);
 
 static int kvmclock_cpu_down_prep(unsigned int cpu)
 {
@@ -6246,7 +6392,8 @@ static void kvm_timer_init(void)
 			  kvmclock_cpu_online, kvmclock_cpu_down_prep);
 }
 
-static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
 
 int kvm_is_in_guest(void)
 {
@@ -6279,18 +6426,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 	.get_guest_ip		= kvm_get_guest_ip,
 };
 
-void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
-{
-	__this_cpu_write(current_vcpu, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
-
-void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
-{
-	__this_cpu_write(current_vcpu, NULL);
-}
-EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
-
 static void kvm_set_mmio_spte_mask(void)
 {
 	u64 mask;
@@ -6644,27 +6779,36 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 	int r;
 
 	/* try to reinject previous events if any */
-	if (vcpu->arch.exception.injected) {
-		kvm_x86_ops->queue_exception(vcpu);
-		return 0;
-	}
 
+	if (vcpu->arch.exception.injected)
+		kvm_x86_ops->queue_exception(vcpu);
 	/*
-	 * Exceptions must be injected immediately, or the exception
-	 * frame will have the address of the NMI or interrupt handler.
+	 * Do not inject an NMI or interrupt if there is a pending
+	 * exception.  Exceptions and interrupts are recognized at
+	 * instruction boundaries, i.e. the start of an instruction.
+	 * Trap-like exceptions, e.g. #DB, have higher priority than
+	 * NMIs and interrupts, i.e. traps are recognized before an
+	 * NMI/interrupt that's pending on the same instruction.
+	 * Fault-like exceptions, e.g. #GP and #PF, are the lowest
+	 * priority, but are only generated (pended) during instruction
+	 * execution, i.e. a pending fault-like exception means the
+	 * fault occurred on the *previous* instruction and must be
+	 * serviced prior to recognizing any new events in order to
+	 * fully complete the previous instruction.
 	 */
-	if (!vcpu->arch.exception.pending) {
-		if (vcpu->arch.nmi_injected) {
+	else if (!vcpu->arch.exception.pending) {
+		if (vcpu->arch.nmi_injected)
 			kvm_x86_ops->set_nmi(vcpu);
-			return 0;
-		}
-
-		if (vcpu->arch.interrupt.pending) {
+		else if (vcpu->arch.interrupt.injected)
 			kvm_x86_ops->set_irq(vcpu);
-			return 0;
-		}
 	}
 
+	/*
+	 * Call check_nested_events() even if we reinjected a previous event
+	 * in order for caller to determine if it should require immediate-exit
+	 * from L2 to L1 due to pending L1 events which require exit
+	 * from L2 to L1.
+	 */
 	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
 		r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
 		if (r != 0)
@@ -6677,6 +6821,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 					vcpu->arch.exception.has_error_code,
 					vcpu->arch.exception.error_code);
 
+		WARN_ON_ONCE(vcpu->arch.exception.injected);
 		vcpu->arch.exception.pending = false;
 		vcpu->arch.exception.injected = true;
 
@@ -6691,7 +6836,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
 		}
 
 		kvm_x86_ops->queue_exception(vcpu);
-	} else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
+	}
+
+	/* Don't consider new event if we re-injected an event */
+	if (kvm_event_needs_reinjection(vcpu))
+		return 0;
+
+	if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
+	    kvm_x86_ops->smi_allowed(vcpu)) {
 		vcpu->arch.smi_pending = false;
 		++vcpu->arch.smi_count;
 		enter_smm(vcpu);
@@ -6985,8 +7137,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
 
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
-	u64 eoi_exit_bitmap[4];
-
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		return;
 
@@ -6999,6 +7149,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->sync_pir_to_irr(vcpu);
 		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
+
+	if (is_guest_mode(vcpu))
+		vcpu->arch.load_eoi_exitmap_pending = true;
+	else
+		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+}
+
+static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+{
+	u64 eoi_exit_bitmap[4];
+
+	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+		return;
+
 	bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
 		  vcpu_to_synic(vcpu)->vec_bitmap, 256);
 	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
@@ -7113,6 +7277,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
+		if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
+			vcpu_load_eoi_exitmap(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
 			kvm_vcpu_reload_apic_access_page(vcpu);
 		if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
@@ -7291,7 +7457,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	kvm_put_guest_xcr0(vcpu);
 
+	kvm_before_interrupt(vcpu);
 	kvm_x86_ops->handle_external_intr(vcpu);
+	kvm_after_interrupt(vcpu);
 
 	++vcpu->stat.exits;
 
@@ -7500,7 +7668,6 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -7526,6 +7693,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		goto out;
 	}
 
+	if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (vcpu->run->kvm_dirty_regs) {
+		r = sync_regs(vcpu);
+		if (r != 0)
+			goto out;
+	}
+
 	/* re-sync apic's tpr */
 	if (!lapic_in_kernel(vcpu)) {
 		if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
@@ -7550,6 +7728,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 out:
 	kvm_put_guest_fpu(vcpu);
+	if (vcpu->run->kvm_valid_regs)
+		store_regs(vcpu);
 	post_kvm_run_save(vcpu);
 	kvm_sigset_deactivate(vcpu);
 
@@ -7557,10 +7737,8 @@ out:
 	return r;
 }
 
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	vcpu_load(vcpu);
-
 	if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
 		/*
 		 * We are here if userspace calls get_regs() in the middle of
@@ -7593,15 +7771,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
 	regs->rip = kvm_rip_read(vcpu);
 	regs->rflags = kvm_get_rflags(vcpu);
+}
 
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	__get_regs(vcpu, regs);
 	vcpu_put(vcpu);
 	return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
-	vcpu_load(vcpu);
-
 	vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
 	vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 
@@ -7630,7 +7811,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.exception.pending = false;
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
 
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	__set_regs(vcpu, regs);
 	vcpu_put(vcpu);
 	return 0;
 }
@@ -7645,13 +7831,10 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
 
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-				  struct kvm_sregs *sregs)
+static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
 	struct desc_ptr dt;
 
-	vcpu_load(vcpu);
-
 	kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 	kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
 	kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -7679,10 +7862,16 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 
 	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
 
-	if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
+	if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
 		set_bit(vcpu->arch.interrupt.nr,
 			(unsigned long *)sregs->interrupt_bitmap);
+}
 
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+	__get_sregs(vcpu, sregs);
 	vcpu_put(vcpu);
 	return 0;
 }
@@ -7754,7 +7943,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
-int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
 	if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
 		/*
@@ -7777,8 +7966,7 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-				  struct kvm_sregs *sregs)
+static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
 	struct msr_data apic_base_msr;
 	int mmu_reset_needed = 0;
@@ -7786,8 +7974,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	struct desc_ptr dt;
 	int ret = -EINVAL;
 
-	vcpu_load(vcpu);
-
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
 			(sregs->cr4 & X86_CR4_OSXSAVE))
 		goto out;
@@ -7866,6 +8052,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 	ret = 0;
 out:
+	return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	int ret;
+
+	vcpu_load(vcpu);
+	ret = __set_sregs(vcpu, sregs);
 	vcpu_put(vcpu);
 	return ret;
 }
@@ -7992,6 +8188,45 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return 0;
 }
 
+static void store_regs(struct kvm_vcpu *vcpu)
+{
+	BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
+
+	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
+		__get_regs(vcpu, &vcpu->run->s.regs.regs);
+
+	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
+		__get_sregs(vcpu, &vcpu->run->s.regs.sregs);
+
+	if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
+		kvm_vcpu_ioctl_x86_get_vcpu_events(
+				vcpu, &vcpu->run->s.regs.events);
+}
+
+static int sync_regs(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
+		return -EINVAL;
+
+	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
+		__set_regs(vcpu, &vcpu->run->s.regs.regs);
+		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
+	}
+	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
+		if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+			return -EINVAL;
+		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
+	}
+	if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
+		if (kvm_vcpu_ioctl_x86_set_vcpu_events(
+				vcpu, &vcpu->run->s.regs.events))
+			return -EINVAL;
+		vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
+	}
+
+	return 0;
+}
+
 static void fx_init(struct kvm_vcpu *vcpu)
 {
 	fpstate_init(&vcpu->arch.guest_fpu.state);
@@ -8447,7 +8682,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
 	mutex_init(&kvm->arch.apic_map_lock);
-	mutex_init(&kvm->arch.hyperv.hv_lock);
 	spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
 
 	kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
@@ -8456,6 +8690,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
 	INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
 
+	kvm_hv_init_vm(kvm);
 	kvm_page_track_init(kvm);
 	kvm_mmu_init_vm(kvm);
 
@@ -8586,6 +8821,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 	kvm_mmu_uninit_vm(kvm);
 	kvm_page_track_cleanup(kvm);
+	kvm_hv_destroy_vm(kvm);
 }
 
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b91215d1fd80..7d35ce672989 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,12 +2,48 @@
 #ifndef ARCH_X86_KVM_X86_H
 #define ARCH_X86_KVM_X86_H
 
-#include <asm/processor.h>
-#include <asm/mwait.h>
 #include <linux/kvm_host.h>
 #include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
 
+#define KVM_DEFAULT_PLE_GAP		128
+#define KVM_VMX_DEFAULT_PLE_WINDOW	4096
+#define KVM_DEFAULT_PLE_WINDOW_GROW	2
+#define KVM_DEFAULT_PLE_WINDOW_SHRINK	0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX	UINT_MAX
+#define KVM_SVM_DEFAULT_PLE_WINDOW_MAX	USHRT_MAX
+#define KVM_SVM_DEFAULT_PLE_WINDOW	3000
+
+static inline unsigned int __grow_ple_window(unsigned int val,
+		unsigned int base, unsigned int modifier, unsigned int max)
+{
+	u64 ret = val;
+
+	if (modifier < 1)
+		return base;
+
+	if (modifier < base)
+		ret *= modifier;
+	else
+		ret += modifier;
+
+	return min(ret, (u64)max);
+}
+
+static inline unsigned int __shrink_ple_window(unsigned int val,
+		unsigned int base, unsigned int modifier, unsigned int min)
+{
+	if (modifier < 1)
+		return base;
+
+	if (modifier < base)
+		val /= modifier;
+	else
+		val -= modifier;
+
+	return max(val, min);
+}
+
 #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
 
 static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@@ -19,19 +55,19 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
 	bool soft)
 {
-	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.injected = true;
 	vcpu->arch.interrupt.soft = soft;
 	vcpu->arch.interrupt.nr = vector;
 }
 
 static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.interrupt.pending = false;
+	vcpu->arch.interrupt.injected = false;
 }
 
 static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
+	return vcpu->arch.exception.injected || vcpu->arch.interrupt.injected ||
 		vcpu->arch.nmi_injected;
 }
 
@@ -205,8 +241,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
 	return !(kvm->arch.disabled_quirks & quirk);
 }
 
-void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
-void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
 void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
 int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
@@ -221,6 +255,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
+int handle_ud(struct kvm_vcpu *vcpu);
+
 void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
 u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
@@ -242,6 +278,8 @@ extern unsigned int min_timer_period_us;
 
 extern unsigned int lapic_timer_advance_ns;
 
+extern bool enable_vmware_backdoor;
+
 extern struct static_key kvm_no_apic_vcpu;
 
 static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
@@ -264,10 +302,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
 	    __rem;						\
 	 })
 
-static inline bool kvm_mwait_in_guest(void)
+#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HTL            (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE          (1 << 2)
+#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT | \
+                                              KVM_X86_DISABLE_EXITS_HTL | \
+                                              KVM_X86_DISABLE_EXITS_PAUSE)
+
+static inline bool kvm_mwait_in_guest(struct kvm *kvm)
+{
+	return kvm->arch.mwait_in_guest;
+}
+
+static inline bool kvm_hlt_in_guest(struct kvm *kvm)
+{
+	return kvm->arch.hlt_in_guest;
+}
+
+static inline bool kvm_pause_in_guest(struct kvm *kvm)
+{
+	return kvm->arch.pause_in_guest;
+}
+
+DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+
+static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
+{
+	__this_cpu_write(current_vcpu, vcpu);
+}
+
+static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
 {
-	return boot_cpu_has(X86_FEATURE_MWAIT) &&
-		!boot_cpu_has_bug(X86_BUG_MONITOR);
+	__this_cpu_write(current_vcpu, NULL);
 }
 
 #endif
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 476d810639a8..b45f5aaefd74 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -27,8 +27,20 @@ EXPORT_SYMBOL(get_cpu_entry_area);
 void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
 {
 	unsigned long va = (unsigned long) cea_vaddr;
+	pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags);
 
-	set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+	/*
+	 * The cpu_entry_area is shared between the user and kernel
+	 * page tables.  All of its ptes can safely be global.
+	 * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for
+	 * non-present PTEs, so be careful not to set it in that
+	 * case to avoid confusion.
+	 */
+	if (boot_cpu_has(X86_FEATURE_PGE) &&
+	    (pgprot_val(flags) & _PAGE_PRESENT))
+		pte = pte_set_flags(pte, _PAGE_GLOBAL);
+
+	set_pte_vaddr(va, pte);
 }
 
 static void __init
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 9aa22be8331e..a2f0c7e20fb0 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -98,6 +98,9 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
 	if (!info->kernpg_flag)
 		info->kernpg_flag = _KERNPG_TABLE;
 
+	/* Filter out unsupported __PAGE_KERNEL_* bits: */
+	info->kernpg_flag &= __default_kernel_pte_mask;
+
 	for (; addr < end; addr = next) {
 		pgd_t *pgd = pgd_page + pgd_index(addr);
 		p4d_t *p4d;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 82f5252c723a..fec82b577c18 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -161,12 +161,6 @@ struct map_range {
 
 static int page_size_mask;
 
-static void enable_global_pages(void)
-{
-	if (!static_cpu_has(X86_FEATURE_PTI))
-		__supported_pte_mask |= _PAGE_GLOBAL;
-}
-
 static void __init probe_page_size_mask(void)
 {
 	/*
@@ -187,9 +181,15 @@ static void __init probe_page_size_mask(void)
 	__supported_pte_mask &= ~_PAGE_GLOBAL;
 	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		cr4_set_bits_and_update_boot(X86_CR4_PGE);
-		enable_global_pages();
+		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
 
+	/* By the default is everything supported: */
+	__default_kernel_pte_mask = __supported_pte_mask;
+	/* Except when with PTI where the kernel is mostly non-Global: */
+	if (cpu_feature_enabled(X86_FEATURE_PTI))
+		__default_kernel_pte_mask &= ~_PAGE_GLOBAL;
+
 	/* Enable 1 GB linear kernel mappings if available: */
 	if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
 		printk(KERN_INFO "Using GB pages for direct mapping\n");
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 396e1f0151ac..c893c6a3d707 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -558,8 +558,14 @@ static void __init pagetable_init(void)
 	permanent_kmaps_init(pgd_base);
 }
 
-pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
+#define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
+/* Bits supported by the hardware: */
+pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
+/* Bits allowed in normal kernel mappings: */
+pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
+/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
+EXPORT_SYMBOL(__default_kernel_pte_mask);
 
 /* user-defined highmem size */
 static unsigned int highmem_pages = -1;
@@ -778,6 +784,7 @@ void __init mem_init(void)
 	free_all_bootmem();
 
 	after_bootmem = 1;
+	x86_init.hyper.init_after_bootmem();
 
 	mem_init_print_info(NULL);
 	printk(KERN_INFO "virtual kernel memory layout:\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index dca9abf2b85c..0a400606dea0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -65,8 +65,13 @@
  * around without checking the pgd every time.
  */
 
+/* Bits supported by the hardware: */
 pteval_t __supported_pte_mask __read_mostly = ~0;
+/* Bits allowed in normal kernel mappings: */
+pteval_t __default_kernel_pte_mask __read_mostly = ~0;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
+/* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
+EXPORT_SYMBOL(__default_kernel_pte_mask);
 
 int force_personality32;
 
@@ -1185,6 +1190,7 @@ void __init mem_init(void)
 	/* this will put all memory onto the freelists */
 	free_all_bootmem();
 	after_bootmem = 1;
+	x86_init.hyper.init_after_bootmem();
 
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
@@ -1285,6 +1291,12 @@ void mark_rodata_ro(void)
 			(unsigned long) __va(__pa_symbol(_sdata)));
 
 	debug_checkwx();
+
+	/*
+	 * Do this after all of the manipulation of the
+	 * kernel text page tables are complete.
+	 */
+	pti_clone_kernel_text();
 }
 
 int kern_addr_valid(unsigned long addr)
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index ada98b39b8ad..b3294d36769d 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -44,6 +44,9 @@ int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
 		return ret;
 
 	*prot = __pgprot(__PAGE_KERNEL | cachemode2protval(pcm));
+	/* Filter out unsupported __PAGE_KERNEL* bits: */
+	pgprot_val(*prot) &= __default_kernel_pte_mask;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(iomap_create_wc);
@@ -88,6 +91,9 @@ iomap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
 		prot = __pgprot(__PAGE_KERNEL |
 				cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS));
 
+	/* Filter out unsupported __PAGE_KERNEL* bits: */
+	pgprot_val(prot) &= __default_kernel_pte_mask;
+
 	return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, prot);
 }
 EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index e2db83bebc3b..c63a545ec199 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -816,6 +816,9 @@ void __init __early_set_fixmap(enum fixed_addresses idx,
 	}
 	pte = early_ioremap_pte(addr);
 
+	/* Sanitize 'prot' against any unsupported bits: */
+	pgprot_val(flags) &= __default_kernel_pte_mask;
+
 	if (pgprot_val(flags))
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d8ff013ea9d0..980dbebd0ca7 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -269,6 +269,12 @@ void __init kasan_early_init(void)
 	pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE;
 	p4dval_t p4d_val = __pa_nodebug(kasan_zero_pud) | _KERNPG_TABLE;
 
+	/* Mask out unsupported __PAGE_KERNEL bits: */
+	pte_val &= __default_kernel_pte_mask;
+	pmd_val &= __default_kernel_pte_mask;
+	pud_val &= __default_kernel_pte_mask;
+	p4d_val &= __default_kernel_pte_mask;
+
 	for (i = 0; i < PTRS_PER_PTE; i++)
 		kasan_zero_pte[i] = __pte(pte_val);
 
@@ -371,7 +377,13 @@ void __init kasan_init(void)
 	 */
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 	for (i = 0; i < PTRS_PER_PTE; i++) {
-		pte_t pte = __pte(__pa(kasan_zero_page) | __PAGE_KERNEL_RO | _PAGE_ENC);
+		pte_t pte;
+		pgprot_t prot;
+
+		prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC);
+		pgprot_val(prot) &= __default_kernel_pte_mask;
+
+		pte = __pte(__pa(kasan_zero_page) | pgprot_val(prot));
 		set_pte(&kasan_zero_pte[i], pte);
 	}
 	/* Flush TLBs again to be sure that write protection applied. */
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 155ecbac9e28..48c591251600 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void)
 	return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
 }
 
-static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
+			       struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
 	unsigned long gap_min, gap_max;
 
@@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd,
  * process VM image, sets up which VM layout function to use:
  */
 static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
-		unsigned long random_factor, unsigned long task_size)
+		unsigned long random_factor, unsigned long task_size,
+		struct rlimit *rlim_stack)
 {
 	*legacy_base = mmap_legacy_base(random_factor, task_size);
 	if (mmap_is_legacy())
 		*base = *legacy_base;
 	else
-		*base = mmap_base(random_factor, task_size);
+		*base = mmap_base(random_factor, task_size, rlim_stack);
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	if (mmap_is_legacy())
 		mm->get_unmapped_area = arch_get_unmapped_area;
@@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 
 	arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
-			arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
+			arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
+			rlim_stack);
 
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
 	/*
@@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * mmap_base, the compat syscall uses mmap_compat_base.
 	 */
 	arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
-			arch_rnd(mmap32_rnd_bits), task_size_32bit());
+			arch_rnd(mmap32_rnd_bits), task_size_32bit(),
+			rlim_stack);
 #endif
 }
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 85cf12219dea..0f3d50f4c48c 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -298,9 +298,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
 
 	/*
 	 * The .rodata section needs to be read-only. Using the pfn
-	 * catches all aliases.
+	 * catches all aliases.  This also includes __ro_after_init,
+	 * so do not enforce until kernel_set_to_readonly is true.
 	 */
-	if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+	if (kernel_set_to_readonly &&
+	    within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
 		   __pa_symbol(__end_rodata) >> PAGE_SHIFT))
 		pgprot_val(forbidden) |= _PAGE_RW;
 
@@ -512,6 +514,23 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 #endif
 }
 
+static pgprot_t pgprot_clear_protnone_bits(pgprot_t prot)
+{
+	/*
+	 * _PAGE_GLOBAL means "global page" for present PTEs.
+	 * But, it is also used to indicate _PAGE_PROTNONE
+	 * for non-present PTEs.
+	 *
+	 * This ensures that a _PAGE_GLOBAL PTE going from
+	 * present to non-present is not confused as
+	 * _PAGE_PROTNONE.
+	 */
+	if (!(pgprot_val(prot) & _PAGE_PRESENT))
+		pgprot_val(prot) &= ~_PAGE_GLOBAL;
+
+	return prot;
+}
+
 static int
 try_preserve_large_page(pte_t *kpte, unsigned long address,
 			struct cpa_data *cpa)
@@ -566,6 +585,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * up accordingly.
 	 */
 	old_pte = *kpte;
+	/* Clear PSE (aka _PAGE_PAT) and move PAT bit to correct position */
 	req_prot = pgprot_large_2_4k(old_prot);
 
 	pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
@@ -577,19 +597,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	 * different bit positions in the two formats.
 	 */
 	req_prot = pgprot_4k_2_large(req_prot);
-
-	/*
-	 * Set the PSE and GLOBAL flags only if the PRESENT flag is
-	 * set otherwise pmd_present/pmd_huge will return true even on
-	 * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
-	 * for the ancient hardware that doesn't support it.
-	 */
+	req_prot = pgprot_clear_protnone_bits(req_prot);
 	if (pgprot_val(req_prot) & _PAGE_PRESENT)
-		pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
-	else
-		pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
-
-	req_prot = canon_pgprot(req_prot);
+		pgprot_val(req_prot) |= _PAGE_PSE;
 
 	/*
 	 * old_pfn points to the large page base pfn. So we need
@@ -674,8 +684,12 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
 	switch (level) {
 	case PG_LEVEL_2M:
 		ref_prot = pmd_pgprot(*(pmd_t *)kpte);
-		/* clear PSE and promote PAT bit to correct position */
+		/*
+		 * Clear PSE (aka _PAGE_PAT) and move
+		 * PAT bit to correct position.
+		 */
 		ref_prot = pgprot_large_2_4k(ref_prot);
+
 		ref_pfn = pmd_pfn(*(pmd_t *)kpte);
 		break;
 
@@ -698,23 +712,14 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
 		return 1;
 	}
 
-	/*
-	 * Set the GLOBAL flags only if the PRESENT flag is set
-	 * otherwise pmd/pte_present will return true even on a non
-	 * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL
-	 * for the ancient hardware that doesn't support it.
-	 */
-	if (pgprot_val(ref_prot) & _PAGE_PRESENT)
-		pgprot_val(ref_prot) |= _PAGE_GLOBAL;
-	else
-		pgprot_val(ref_prot) &= ~_PAGE_GLOBAL;
+	ref_prot = pgprot_clear_protnone_bits(ref_prot);
 
 	/*
 	 * Get the target pfn from the original entry:
 	 */
 	pfn = ref_pfn;
 	for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
-		set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
+		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	if (virt_addr_valid(address)) {
 		unsigned long pfn = PFN_DOWN(__pa(address));
@@ -930,19 +935,7 @@ static void populate_pte(struct cpa_data *cpa,
 
 	pte = pte_offset_kernel(pmd, start);
 
-	/*
-	 * Set the GLOBAL flags only if the PRESENT flag is
-	 * set otherwise pte_present will return true even on
-	 * a non present pte. The canon_pgprot will clear
-	 * _PAGE_GLOBAL for the ancient hardware that doesn't
-	 * support it.
-	 */
-	if (pgprot_val(pgprot) & _PAGE_PRESENT)
-		pgprot_val(pgprot) |= _PAGE_GLOBAL;
-	else
-		pgprot_val(pgprot) &= ~_PAGE_GLOBAL;
-
-	pgprot = canon_pgprot(pgprot);
+	pgprot = pgprot_clear_protnone_bits(pgprot);
 
 	while (num_pages-- && start < end) {
 		set_pte(pte, pfn_pte(cpa->pfn, pgprot));
@@ -1234,24 +1227,14 @@ repeat:
 
 		new_prot = static_protections(new_prot, address, pfn);
 
-		/*
-		 * Set the GLOBAL flags only if the PRESENT flag is
-		 * set otherwise pte_present will return true even on
-		 * a non present pte. The canon_pgprot will clear
-		 * _PAGE_GLOBAL for the ancient hardware that doesn't
-		 * support it.
-		 */
-		if (pgprot_val(new_prot) & _PAGE_PRESENT)
-			pgprot_val(new_prot) |= _PAGE_GLOBAL;
-		else
-			pgprot_val(new_prot) &= ~_PAGE_GLOBAL;
+		new_prot = pgprot_clear_protnone_bits(new_prot);
 
 		/*
 		 * We need to keep the pfn from the existing PTE,
 		 * after all we're only going to change it's attributes
 		 * not the memory it points to
 		 */
-		new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
+		new_pte = pfn_pte(pfn, new_prot);
 		cpa->pfn = pfn;
 		/*
 		 * Do we really change anything ?
@@ -1428,11 +1411,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	memset(&cpa, 0, sizeof(cpa));
 
 	/*
-	 * Check, if we are requested to change a not supported
-	 * feature:
+	 * Check, if we are requested to set a not supported
+	 * feature.  Clearing non-supported features is OK.
 	 */
 	mask_set = canon_pgprot(mask_set);
-	mask_clr = canon_pgprot(mask_clr);
+
 	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
 		return 0;
 
@@ -1775,6 +1758,12 @@ int set_memory_4k(unsigned long addr, int numpages)
 					__pgprot(0), 1, 0, NULL);
 }
 
+int set_memory_nonglobal(unsigned long addr, int numpages)
+{
+	return change_page_attr_clear(&addr, numpages,
+				      __pgprot(_PAGE_GLOBAL), 0);
+}
+
 static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
 {
 	struct cpa_data cpa;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 34cda7e0551b..ffc8c13c50e4 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/mm.h>
 #include <linux/gfp.h>
+#include <linux/hugetlb.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm/tlb.h>
@@ -583,6 +584,9 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
 void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
 		       pgprot_t flags)
 {
+	/* Sanitize 'prot' against any unsupported bits: */
+	pgprot_val(flags) &= __default_kernel_pte_mask;
+
 	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
 }
 
@@ -636,6 +640,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
 	    (mtrr != MTRR_TYPE_WRBACK))
 		return 0;
 
+	/* Bail out if we are we on a populated non-leaf entry: */
+	if (pud_present(*pud) && !pud_huge(*pud))
+		return 0;
+
 	prot = pgprot_4k_2_large(prot);
 
 	set_pte((pte_t *)pud, pfn_pte(
@@ -664,6 +672,10 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
 		return 0;
 	}
 
+	/* Bail out if we are we on a populated non-leaf entry: */
+	if (pmd_present(*pmd) && !pmd_huge(*pmd))
+		return 0;
+
 	prot = pgprot_4k_2_large(prot);
 
 	set_pte((pte_t *)pmd, pfn_pte(
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 631507f0c198..f1fd52f449e0 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -66,12 +66,22 @@ static void __init pti_print_if_secure(const char *reason)
 		pr_info("%s\n", reason);
 }
 
+enum pti_mode {
+	PTI_AUTO = 0,
+	PTI_FORCE_OFF,
+	PTI_FORCE_ON
+} pti_mode;
+
 void __init pti_check_boottime_disable(void)
 {
 	char arg[5];
 	int ret;
 
+	/* Assume mode is auto unless overridden. */
+	pti_mode = PTI_AUTO;
+
 	if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+		pti_mode = PTI_FORCE_OFF;
 		pti_print_if_insecure("disabled on XEN PV.");
 		return;
 	}
@@ -79,18 +89,23 @@ void __init pti_check_boottime_disable(void)
 	ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
 	if (ret > 0)  {
 		if (ret == 3 && !strncmp(arg, "off", 3)) {
+			pti_mode = PTI_FORCE_OFF;
 			pti_print_if_insecure("disabled on command line.");
 			return;
 		}
 		if (ret == 2 && !strncmp(arg, "on", 2)) {
+			pti_mode = PTI_FORCE_ON;
 			pti_print_if_secure("force enabled on command line.");
 			goto enable;
 		}
-		if (ret == 4 && !strncmp(arg, "auto", 4))
+		if (ret == 4 && !strncmp(arg, "auto", 4)) {
+			pti_mode = PTI_AUTO;
 			goto autosel;
+		}
 	}
 
 	if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+		pti_mode = PTI_FORCE_OFF;
 		pti_print_if_insecure("disabled on command line.");
 		return;
 	}
@@ -149,7 +164,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
  *
  * Returns a pointer to a P4D on success, or NULL on failure.
  */
-static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 {
 	pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -177,7 +192,7 @@ static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
  *
  * Returns a pointer to a PMD on success, or NULL on failure.
  */
-static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 {
 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
 	p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -267,7 +282,7 @@ static void __init pti_setup_vsyscall(void)
 static void __init pti_setup_vsyscall(void) { }
 #endif
 
-static void __init
+static void
 pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
 {
 	unsigned long addr;
@@ -300,6 +315,27 @@ pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
 			return;
 
 		/*
+		 * Only clone present PMDs.  This ensures only setting
+		 * _PAGE_GLOBAL on present PMDs.  This should only be
+		 * called on well-known addresses anyway, so a non-
+		 * present PMD would be a surprise.
+		 */
+		if (WARN_ON(!(pmd_flags(*pmd) & _PAGE_PRESENT)))
+			return;
+
+		/*
+		 * Setting 'target_pmd' below creates a mapping in both
+		 * the user and kernel page tables.  It is effectively
+		 * global, so set it as global in both copies.  Note:
+		 * the X86_FEATURE_PGE check is not _required_ because
+		 * the CPU ignores _PAGE_GLOBAL when PGE is not
+		 * supported.  The check keeps consistentency with
+		 * code that only set this bit when supported.
+		 */
+		if (boot_cpu_has(X86_FEATURE_PGE))
+			*pmd = pmd_set_flags(*pmd, _PAGE_GLOBAL);
+
+		/*
 		 * Copy the PMD.  That is, the kernelmode and usermode
 		 * tables will share the last-level page tables of this
 		 * address range
@@ -348,7 +384,83 @@ static void __init pti_clone_entry_text(void)
 {
 	pti_clone_pmds((unsigned long) __entry_text_start,
 			(unsigned long) __irqentry_text_end,
-		       _PAGE_RW | _PAGE_GLOBAL);
+		       _PAGE_RW);
+}
+
+/*
+ * Global pages and PCIDs are both ways to make kernel TLB entries
+ * live longer, reduce TLB misses and improve kernel performance.
+ * But, leaving all kernel text Global makes it potentially accessible
+ * to Meltdown-style attacks which make it trivial to find gadgets or
+ * defeat KASLR.
+ *
+ * Only use global pages when it is really worth it.
+ */
+static inline bool pti_kernel_image_global_ok(void)
+{
+	/*
+	 * Systems with PCIDs get litlle benefit from global
+	 * kernel text and are not worth the downsides.
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_PCID))
+		return false;
+
+	/*
+	 * Only do global kernel image for pti=auto.  Do the most
+	 * secure thing (not global) if pti=on specified.
+	 */
+	if (pti_mode != PTI_AUTO)
+		return false;
+
+	/*
+	 * K8 may not tolerate the cleared _PAGE_RW on the userspace
+	 * global kernel image pages.  Do the safe thing (disable
+	 * global kernel image).  This is unlikely to ever be
+	 * noticed because PTI is disabled by default on AMD CPUs.
+	 */
+	if (boot_cpu_has(X86_FEATURE_K8))
+		return false;
+
+	return true;
+}
+
+/*
+ * For some configurations, map all of kernel text into the user page
+ * tables.  This reduces TLB misses, especially on non-PCID systems.
+ */
+void pti_clone_kernel_text(void)
+{
+	unsigned long start = PFN_ALIGN(_text);
+	unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+
+	if (!pti_kernel_image_global_ok())
+		return;
+
+	pti_clone_pmds(start, end, _PAGE_RW);
+}
+
+/*
+ * This is the only user for it and it is not arch-generic like
+ * the other set_memory.h functions.  Just extern it.
+ */
+extern int set_memory_nonglobal(unsigned long addr, int numpages);
+void pti_set_kernel_image_nonglobal(void)
+{
+	/*
+	 * The identity map is created with PMDs, regardless of the
+	 * actual length of the kernel.  We need to clear
+	 * _PAGE_GLOBAL up to a PMD boundary, not just to the end
+	 * of the image.
+	 */
+	unsigned long start = PFN_ALIGN(_text);
+	unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+
+	if (pti_kernel_image_global_ok())
+		return;
+
+	pr_debug("set kernel image non-global\n");
+
+	set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT);
 }
 
 /*
@@ -362,6 +474,10 @@ void __init pti_init(void)
 	pr_info("enabled\n");
 
 	pti_clone_user_shared();
+
+	/* Undo all global bits from the init pagetables in head_64.S: */
+	pti_set_kernel_image_nonglobal();
+	/* Replace some of the global bits just for shared entry text: */
 	pti_clone_entry_text();
 	pti_setup_espfix64();
 	pti_setup_vsyscall();
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 74a532989308..48b14b534897 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -51,6 +51,12 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
 	pmd_t *pmd;
 	pud_t *pud;
 	p4d_t *p4d = NULL;
+	pgprot_t pgtable_prot = __pgprot(_KERNPG_TABLE);
+	pgprot_t pmd_text_prot = __pgprot(__PAGE_KERNEL_LARGE_EXEC);
+
+	/* Filter out unsupported __PAGE_KERNEL* bits: */
+	pgprot_val(pmd_text_prot) &= __default_kernel_pte_mask;
+	pgprot_val(pgtable_prot)  &= __default_kernel_pte_mask;
 
 	/*
 	 * The new mapping only has to cover the page containing the image
@@ -81,15 +87,19 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
 		return -ENOMEM;
 
 	set_pmd(pmd + pmd_index(restore_jump_address),
-		__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
+		__pmd((jump_address_phys & PMD_MASK) | pgprot_val(pmd_text_prot)));
 	set_pud(pud + pud_index(restore_jump_address),
-		__pud(__pa(pmd) | _KERNPG_TABLE));
+		__pud(__pa(pmd) | pgprot_val(pgtable_prot)));
 	if (p4d) {
-		set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
-		set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
+		p4d_t new_p4d = __p4d(__pa(pud) | pgprot_val(pgtable_prot));
+		pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
+
+		set_p4d(p4d + p4d_index(restore_jump_address), new_p4d);
+		set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
 	} else {
 		/* No p4d for 4-level paging: point the pgd to the pud page table */
-		set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE));
+		pgd_t new_pgd = __pgd(__pa(p4d) | pgprot_val(pgtable_prot));
+		set_pgd(pgd + pgd_index(restore_jump_address), new_pgd);
 	}
 
 	return 0;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index d70c15de417b..2e9ee023e6bc 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -6,6 +6,9 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
 targets += $(purgatory-y)
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
+$(obj)/sha256.o: $(srctree)/lib/sha256.c
+	$(call if_changed_rule,cc_o_c)
+
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
 targets += purgatory.ro
 
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 470edad96bb9..025c34ac0d84 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -11,9 +11,9 @@
  */
 
 #include <linux/bug.h>
+#include <linux/sha256.h>
 #include <asm/purgatory.h>
 
-#include "sha256.h"
 #include "../boot/string.h"
 
 unsigned long purgatory_backup_dest __section(.kexec-purgatory);
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
index d886b1fa36f0..795ca4f2cb3c 100644
--- a/arch/x86/purgatory/string.c
+++ b/arch/x86/purgatory/string.c
@@ -10,4 +10,16 @@
  * Version 2.  See the file COPYING for more details.
  */
 
+#include <linux/types.h>
+
 #include "../boot/string.c"
+
+void *memcpy(void *dst, const void *src, size_t len)
+{
+	return __builtin_memcpy(dst, src, len);
+}
+
+void *memset(void *dst, int c, size_t len)
+{
+	return __builtin_memset(dst, c, len);
+}
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index 1518d2805ae8..27361cbb7ca9 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -6,11 +6,12 @@
 #include <sysdep/stub.h>
 #include <sysdep/faultinfo.h>
 #include <sysdep/mcontext.h>
+#include <sys/ucontext.h>
 
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
 {
-	struct ucontext *uc = p;
+	ucontext_t *uc = p;
 
 	GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
 			      &uc->uc_mcontext);
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index 2163888497d3..5e53bfbe5823 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -112,7 +112,7 @@ static int xen_madt_oem_check(char *oem_id, char *oem_table_id)
 	return xen_pv_domain();
 }
 
-static int xen_id_always_valid(int apicid)
+static int xen_id_always_valid(u32 apicid)
 {
 	return 1;
 }
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 3c2c2530737e..c36d23aa6c35 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1259,10 +1259,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
-	/* Work out if we support NX */
-	get_cpu_cap(&boot_cpu_data);
-	x86_configure_nx();
-
 	/* Get mfn list */
 	xen_build_dynamic_phys_to_machine();
 
@@ -1272,6 +1268,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	xen_setup_gdt(0);
 
+	/* Work out if we support NX */
+	get_cpu_cap(&boot_cpu_data);
+	x86_configure_nx();
+
 	xen_init_irq_ops();
 
 	/* Let's presume PV guests always boot on vCPU with id 0. */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index d20763472920..486c0a34d00b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
 
 static phys_addr_t xen_pt_base, xen_pt_size __initdata;
 
+static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
+
 /*
  * Just beyond the highest usermode address.  STACK_TOP_MAX has a
  * redzone above it, so round it up to a PGD boundary.
@@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr)
 }
 
 
+/*
+ * During early boot all page table pages are pinned, but we do not have struct
+ * pages, so return true until struct pages are ready.
+ */
 static bool xen_page_pinned(void *ptr)
 {
-	struct page *page = virt_to_page(ptr);
+	if (static_branch_likely(&xen_struct_pages_ready)) {
+		struct page *page = virt_to_page(ptr);
 
-	return PagePinned(page);
+		return PagePinned(page);
+	}
+	return true;
 }
 
 static void xen_extend_mmu_update(const struct mmu_update *update)
@@ -836,11 +845,6 @@ void xen_mm_pin_all(void)
 	spin_unlock(&pgd_lock);
 }
 
-/*
- * The init_mm pagetable is really pinned as soon as its created, but
- * that's before we have page structures to store the bits.  So do all
- * the book-keeping now.
- */
 static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 				  enum pt_level level)
 {
@@ -848,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 	return 0;
 }
 
-static void __init xen_mark_init_mm_pinned(void)
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits.  So do all
+ * the book-keeping now once struct pages for allocated pages are
+ * initialized. This happens only after free_all_bootmem() is called.
+ */
+static void __init xen_after_bootmem(void)
 {
+	static_branch_enable(&xen_struct_pages_ready);
+#ifdef CONFIG_X86_64
+	SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
 	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
 
@@ -1623,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
 static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
 				    unsigned level)
 {
-	bool pinned = PagePinned(virt_to_page(mm->pgd));
+	bool pinned = xen_page_pinned(mm->pgd);
 
 	trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
 
 	if (pinned) {
 		struct page *page = pfn_to_page(pfn);
 
-		SetPagePinned(page);
+		if (static_branch_likely(&xen_struct_pages_ready))
+			SetPagePinned(page);
 
 		if (!PageHighMem(page)) {
 			xen_mc_batch();
@@ -2364,9 +2379,7 @@ static void __init xen_post_allocator_init(void)
 
 #ifdef CONFIG_X86_64
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
-	SetPagePinned(virt_to_page(level3_user_vsyscall));
 #endif
-	xen_mark_init_mm_pinned();
 }
 
 static void xen_leave_lazy_mmu(void)
@@ -2450,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 void __init xen_init_mmu_ops(void)
 {
 	x86_init.paging.pagetable_init = xen_pagetable_init;
+	x86_init.hyper.init_after_bootmem = xen_after_bootmem;
 
 	pv_mmu_ops = xen_mmu_ops;
 
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index c0c756c76afe..2e20ae2fa2d6 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
 	 * data back is to call:
 	 */
 	tick_nohz_idle_enter();
+	tick_nohz_idle_stop_tick_protected();
 
 	cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
 }
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 96f26e026783..5077ead5e59c 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -89,7 +89,9 @@ END(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
 		.ascii "!writable_page_tables|pae_pgdir_above_4gb")
 	ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
-		.long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0))
+		.long (1 << XENFEAT_writable_page_tables) |       \
+		      (1 << XENFEAT_dom0) |                       \
+		      (1 << XENFEAT_linux_rsdp_unrestricted))
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 3e9d01ada81f..58f29a9d895d 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -57,6 +57,7 @@
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
 #define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 #ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
 # define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be
 					 * uninitialized */
diff --git a/block/blk-core.c b/block/blk-core.c
index abcb8684ba67..806ce2442819 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2385,8 +2385,20 @@ blk_qc_t generic_make_request(struct bio *bio)
 	 * yet.
 	 */
 	struct bio_list bio_list_on_stack[2];
+	blk_mq_req_flags_t flags = 0;
+	struct request_queue *q = bio->bi_disk->queue;
 	blk_qc_t ret = BLK_QC_T_NONE;
 
+	if (bio->bi_opf & REQ_NOWAIT)
+		flags = BLK_MQ_REQ_NOWAIT;
+	if (blk_queue_enter(q, flags) < 0) {
+		if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT))
+			bio_wouldblock_error(bio);
+		else
+			bio_io_error(bio);
+		return ret;
+	}
+
 	if (!generic_make_request_checks(bio))
 		goto out;
 
@@ -2423,11 +2435,22 @@ blk_qc_t generic_make_request(struct bio *bio)
 	bio_list_init(&bio_list_on_stack[0]);
 	current->bio_list = bio_list_on_stack;
 	do {
-		struct request_queue *q = bio->bi_disk->queue;
-		blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
-			BLK_MQ_REQ_NOWAIT : 0;
+		bool enter_succeeded = true;
+
+		if (unlikely(q != bio->bi_disk->queue)) {
+			if (q)
+				blk_queue_exit(q);
+			q = bio->bi_disk->queue;
+			flags = 0;
+			if (bio->bi_opf & REQ_NOWAIT)
+				flags = BLK_MQ_REQ_NOWAIT;
+			if (blk_queue_enter(q, flags) < 0) {
+				enter_succeeded = false;
+				q = NULL;
+			}
+		}
 
-		if (likely(blk_queue_enter(q, flags) == 0)) {
+		if (enter_succeeded) {
 			struct bio_list lower, same;
 
 			/* Create a fresh bio_list for all subordinate requests */
@@ -2435,8 +2458,6 @@ blk_qc_t generic_make_request(struct bio *bio)
 			bio_list_init(&bio_list_on_stack[0]);
 			ret = q->make_request_fn(q, bio);
 
-			blk_queue_exit(q);
-
 			/* sort new bios into those for a lower level
 			 * and those for the same level
 			 */
@@ -2463,6 +2484,8 @@ blk_qc_t generic_make_request(struct bio *bio)
 	current->bio_list = NULL; /* deactivate */
 
 out:
+	if (q)
+		blk_queue_exit(q);
 	return ret;
 }
 EXPORT_SYMBOL(generic_make_request);
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 9f8cffc8a701..3eb169f15842 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -16,11 +16,6 @@
 
 static int cpu_to_queue_index(unsigned int nr_queues, const int cpu)
 {
-	/*
-	 * Non present CPU will be mapped to queue index 0.
-	 */
-	if (!cpu_present(cpu))
-		return 0;
 	return cpu % nr_queues;
 }
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 58b3b79cbe83..3080e18cb859 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -235,7 +235,6 @@ static const char *const hctx_state_name[] = {
 	HCTX_STATE_NAME(STOPPED),
 	HCTX_STATE_NAME(TAG_ACTIVE),
 	HCTX_STATE_NAME(SCHED_RESTART),
-	HCTX_STATE_NAME(START_ON_RUN),
 };
 #undef HCTX_STATE_NAME
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f5c7dbcb954f..0dc9e341c2a7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1180,7 +1180,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 		struct blk_mq_queue_data bd;
 
 		rq = list_first_entry(list, struct request, queuelist);
-		if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
+
+		hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+		if (!got_budget && !blk_mq_get_dispatch_budget(hctx))
+			break;
+
+		if (!blk_mq_get_driver_tag(rq, NULL, false)) {
 			/*
 			 * The initial allocation attempt failed, so we need to
 			 * rerun the hardware queue when a tag is freed. The
@@ -1189,8 +1194,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 			 * we'll re-run it below.
 			 */
 			if (!blk_mq_mark_tag_wait(&hctx, rq)) {
-				if (got_budget)
-					blk_mq_put_dispatch_budget(hctx);
+				blk_mq_put_dispatch_budget(hctx);
 				/*
 				 * For non-shared tags, the RESTART check
 				 * will suffice.
@@ -1201,11 +1205,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
 			}
 		}
 
-		if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
-			blk_mq_put_driver_tag(rq);
-			break;
-		}
-
 		list_del_init(&rq->queuelist);
 
 		bd.rq = rq;
@@ -1336,6 +1335,15 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	hctx_unlock(hctx, srcu_idx);
 }
 
+static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
+{
+	int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
+
+	if (cpu >= nr_cpu_ids)
+		cpu = cpumask_first(hctx->cpumask);
+	return cpu;
+}
+
 /*
  * It'd be great if the workqueue API had a way to pass
  * in a mask and had some smarts for more clever placement.
@@ -1345,26 +1353,17 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 {
 	bool tried = false;
+	int next_cpu = hctx->next_cpu;
 
 	if (hctx->queue->nr_hw_queues == 1)
 		return WORK_CPU_UNBOUND;
 
 	if (--hctx->next_cpu_batch <= 0) {
-		int next_cpu;
 select_cpu:
-		next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
+		next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
 				cpu_online_mask);
 		if (next_cpu >= nr_cpu_ids)
-			next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask);
-
-		/*
-		 * No online CPU is found, so have to make sure hctx->next_cpu
-		 * is set correctly for not breaking workqueue.
-		 */
-		if (next_cpu >= nr_cpu_ids)
-			hctx->next_cpu = cpumask_first(hctx->cpumask);
-		else
-			hctx->next_cpu = next_cpu;
+			next_cpu = blk_mq_first_mapped_cpu(hctx);
 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
 	}
 
@@ -1372,7 +1371,7 @@ select_cpu:
 	 * Do unbound schedule if we can't find a online CPU for this hctx,
 	 * and it should only happen in the path of handling CPU DEAD.
 	 */
-	if (!cpu_online(hctx->next_cpu)) {
+	if (!cpu_online(next_cpu)) {
 		if (!tried) {
 			tried = true;
 			goto select_cpu;
@@ -1382,18 +1381,18 @@ select_cpu:
 		 * Make sure to re-select CPU next time once after CPUs
 		 * in hctx->cpumask become online again.
 		 */
+		hctx->next_cpu = next_cpu;
 		hctx->next_cpu_batch = 1;
 		return WORK_CPU_UNBOUND;
 	}
-	return hctx->next_cpu;
+
+	hctx->next_cpu = next_cpu;
+	return next_cpu;
 }
 
 static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
 					unsigned long msecs)
 {
-	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
-		return;
-
 	if (unlikely(blk_mq_hctx_stopped(hctx)))
 		return;
 
@@ -1560,40 +1559,14 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
 
 	/*
-	 * If we are stopped, don't run the queue. The exception is if
-	 * BLK_MQ_S_START_ON_RUN is set. For that case, we auto-clear
-	 * the STOPPED bit and run it.
+	 * If we are stopped, don't run the queue.
 	 */
-	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state)) {
-		if (!test_bit(BLK_MQ_S_START_ON_RUN, &hctx->state))
-			return;
-
-		clear_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
+	if (test_bit(BLK_MQ_S_STOPPED, &hctx->state))
 		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
-	}
 
 	__blk_mq_run_hw_queue(hctx);
 }
 
-
-void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
-{
-	if (WARN_ON_ONCE(!blk_mq_hw_queue_mapped(hctx)))
-		return;
-
-	/*
-	 * Stop the hw queue, then modify currently delayed work.
-	 * This should prevent us from running the queue prematurely.
-	 * Mark the queue as auto-clearing STOPPED when it runs.
-	 */
-	blk_mq_stop_hw_queue(hctx);
-	set_bit(BLK_MQ_S_START_ON_RUN, &hctx->state);
-	kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
-					&hctx->run_work,
-					msecs_to_jiffies(msecs));
-}
-EXPORT_SYMBOL(blk_mq_delay_queue);
-
 static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
 					    struct request *rq,
 					    bool at_head)
@@ -1804,11 +1777,11 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 	if (q->elevator && !bypass_insert)
 		goto insert;
 
-	if (!blk_mq_get_driver_tag(rq, NULL, false))
+	if (!blk_mq_get_dispatch_budget(hctx))
 		goto insert;
 
-	if (!blk_mq_get_dispatch_budget(hctx)) {
-		blk_mq_put_driver_tag(rq);
+	if (!blk_mq_get_driver_tag(rq, NULL, false)) {
+		blk_mq_put_dispatch_budget(hctx);
 		goto insert;
 	}
 
@@ -2356,7 +2329,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
 
 static void blk_mq_map_swqueue(struct request_queue *q)
 {
-	unsigned int i, hctx_idx;
+	unsigned int i;
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	struct blk_mq_tag_set *set = q->tag_set;
@@ -2373,23 +2346,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 
 	/*
 	 * Map software to hardware queues.
-	 *
-	 * If the cpu isn't present, the cpu is mapped to first hctx.
 	 */
 	for_each_possible_cpu(i) {
-		hctx_idx = q->mq_map[i];
-		/* unmapped hw queue can be remapped after CPU topo changed */
-		if (!set->tags[hctx_idx] &&
-		    !__blk_mq_alloc_rq_map(set, hctx_idx)) {
-			/*
-			 * If tags initialization fail for some hctx,
-			 * that hctx won't be brought online.  In this
-			 * case, remap the current ctx to hctx[0] which
-			 * is guaranteed to always have tags allocated
-			 */
-			q->mq_map[i] = 0;
-		}
-
 		ctx = per_cpu_ptr(q->queue_ctx, i);
 		hctx = blk_mq_map_queue(q, i);
 
@@ -2401,21 +2359,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	mutex_unlock(&q->sysfs_lock);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		/*
-		 * If no software queues are mapped to this hardware queue,
-		 * disable it and free the request entries.
-		 */
-		if (!hctx->nr_ctx) {
-			/* Never unmap queue 0.  We need it as a
-			 * fallback in case of a new remap fails
-			 * allocation
-			 */
-			if (i && set->tags[i])
-				blk_mq_free_map_and_requests(set, i);
-
-			hctx->tags = NULL;
-			continue;
-		}
+		/* every hctx should get mapped by at least one CPU */
+		WARN_ON(!hctx->nr_ctx);
 
 		hctx->tags = set->tags[i];
 		WARN_ON(!hctx->tags);
@@ -2430,8 +2375,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 		/*
 		 * Initialize batch roundrobin counts
 		 */
-		hctx->next_cpu = cpumask_first_and(hctx->cpumask,
-				cpu_online_mask);
+		hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
 		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
 	}
 }
diff --git a/crypto/Kconfig b/crypto/Kconfig
index c0dabed5122e..76e8c88c97b4 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -500,6 +500,15 @@ config CRYPTO_CRC32_PCLMUL
 	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
 	  and gain better performance as compared with the table implementation.
 
+config CRYPTO_CRC32_MIPS
+	tristate "CRC32c and CRC32 CRC algorithm (MIPS)"
+	depends on MIPS_CRC_SUPPORT
+	select CRYPTO_HASH
+	help
+	  CRC32c and CRC32 CRC algorithms implemented using mips crypto
+	  instructions, when available.
+
+
 config CRYPTO_CRCT10DIF
 	tristate "CRCT10DIF algorithm"
 	select CRYPTO_HASH
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index c49766b03165..7846c0c20cfe 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -158,16 +158,16 @@ static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	void *private;
 	int err;
 
-	/* If caller uses non-allowed flag, return error. */
-	if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed))
-		return -EINVAL;
-
 	if (sock->state == SS_CONNECTED)
 		return -EINVAL;
 
 	if (addr_len < sizeof(*sa))
 		return -EINVAL;
 
+	/* If caller uses non-allowed flag, return error. */
+	if ((sa->salg_feat & ~allowed) || (sa->salg_mask & ~allowed))
+		return -EINVAL;
+
 	sa->salg_type[sizeof(sa->salg_type) - 1] = 0;
 	sa->salg_name[sizeof(sa->salg_name) + addr_len - sizeof(*sa) - 1] = 0;
 
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 9e702bc4960f..7a3a541046ed 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -34,6 +34,7 @@
 struct iort_its_msi_chip {
 	struct list_head	list;
 	struct fwnode_handle	*fw_node;
+	phys_addr_t		base_addr;
 	u32			translation_id;
 };
 
@@ -156,14 +157,16 @@ static LIST_HEAD(iort_msi_chip_list);
 static DEFINE_SPINLOCK(iort_msi_chip_lock);
 
 /**
- * iort_register_domain_token() - register domain token and related ITS ID
- * to the list from where we can get it back later on.
+ * iort_register_domain_token() - register domain token along with related
+ * ITS ID and base address to the list from where we can get it back later on.
  * @trans_id: ITS ID.
+ * @base: ITS base address.
  * @fw_node: Domain token.
  *
  * Returns: 0 on success, -ENOMEM if no memory when allocating list element
  */
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
+int iort_register_domain_token(int trans_id, phys_addr_t base,
+			       struct fwnode_handle *fw_node)
 {
 	struct iort_its_msi_chip *its_msi_chip;
 
@@ -173,6 +176,7 @@ int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
 
 	its_msi_chip->fw_node = fw_node;
 	its_msi_chip->translation_id = trans_id;
+	its_msi_chip->base_addr = base;
 
 	spin_lock(&iort_msi_chip_lock);
 	list_add(&its_msi_chip->list, &iort_msi_chip_list);
@@ -569,6 +573,24 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
 	return -ENODEV;
 }
 
+static int __maybe_unused iort_find_its_base(u32 its_id, phys_addr_t *base)
+{
+	struct iort_its_msi_chip *its_msi_chip;
+	int ret = -ENODEV;
+
+	spin_lock(&iort_msi_chip_lock);
+	list_for_each_entry(its_msi_chip, &iort_msi_chip_list, list) {
+		if (its_msi_chip->translation_id == its_id) {
+			*base = its_msi_chip->base_addr;
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&iort_msi_chip_lock);
+
+	return ret;
+}
+
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
@@ -754,6 +776,24 @@ static inline bool iort_iommu_driver_enabled(u8 type)
 }
 
 #ifdef CONFIG_IOMMU_API
+static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
+{
+	struct acpi_iort_node *iommu;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+	iommu = iort_get_iort_node(fwspec->iommu_fwnode);
+
+	if (iommu && (iommu->type == ACPI_IORT_NODE_SMMU_V3)) {
+		struct acpi_iort_smmu_v3 *smmu;
+
+		smmu = (struct acpi_iort_smmu_v3 *)iommu->node_data;
+		if (smmu->model == ACPI_IORT_SMMU_V3_HISILICON_HI161X)
+			return iommu;
+	}
+
+	return NULL;
+}
+
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(
 				struct iommu_fwspec *fwspec)
 {
@@ -770,6 +810,69 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
 
 	return err;
 }
+
+/**
+ * iort_iommu_msi_get_resv_regions - Reserved region driver helper
+ * @dev: Device from iommu_get_resv_regions()
+ * @head: Reserved region list from iommu_get_resv_regions()
+ *
+ * Returns: Number of msi reserved regions on success (0 if platform
+ *          doesn't require the reservation or no associated msi regions),
+ *          appropriate error value otherwise. The ITS interrupt translation
+ *          spaces (ITS_base + SZ_64K, SZ_64K) associated with the device
+ *          are the msi reserved regions.
+ */
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{
+	struct acpi_iort_its_group *its;
+	struct acpi_iort_node *iommu_node, *its_node = NULL;
+	int i, resv = 0;
+
+	iommu_node = iort_get_msi_resv_iommu(dev);
+	if (!iommu_node)
+		return 0;
+
+	/*
+	 * Current logic to reserve ITS regions relies on HW topologies
+	 * where a given PCI or named component maps its IDs to only one
+	 * ITS group; if a PCI or named component can map its IDs to
+	 * different ITS groups through IORT mappings this function has
+	 * to be reworked to ensure we reserve regions for all ITS groups
+	 * a given PCI or named component may map IDs to.
+	 */
+
+	for (i = 0; i < dev->iommu_fwspec->num_ids; i++) {
+		its_node = iort_node_map_id(iommu_node,
+					dev->iommu_fwspec->ids[i],
+					NULL, IORT_MSI_TYPE);
+		if (its_node)
+			break;
+	}
+
+	if (!its_node)
+		return 0;
+
+	/* Move to ITS specific data */
+	its = (struct acpi_iort_its_group *)its_node->node_data;
+
+	for (i = 0; i < its->its_count; i++) {
+		phys_addr_t base;
+
+		if (!iort_find_its_base(its->identifiers[i], &base)) {
+			int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+			struct iommu_resv_region *region;
+
+			region = iommu_alloc_resv_region(base + SZ_64K, SZ_64K,
+							 prot, IOMMU_RESV_MSI);
+			if (region) {
+				list_add_tail(&region->list, head);
+				resv++;
+			}
+		}
+	}
+
+	return (resv == its->its_count) ? resv : -ENODEV;
+}
 #else
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(
 				struct iommu_fwspec *fwspec)
@@ -777,6 +880,8 @@ static inline const struct iommu_ops *iort_fwspec_iommu_ops(
 static inline int iort_add_device_replay(const struct iommu_ops *ops,
 					 struct device *dev)
 { return 0; }
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{ return 0; }
 #endif
 
 static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 22a112b4f4d8..e2235ed3e4be 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -36,16 +36,6 @@ static bool force_enable_dimms;
 module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
 
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
-		"Number of times we overflow ARS results before abort");
-
 static bool disable_vendor_specific;
 module_param(disable_vendor_specific, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_vendor_specific,
@@ -60,6 +50,10 @@ module_param(default_dsm_family, int, S_IRUGO);
 MODULE_PARM_DESC(default_dsm_family,
 		"Try this DSM type first when identifying NVDIMM family");
 
+static bool no_init_ars;
+module_param(no_init_ars, bool, 0644);
+MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
+
 LIST_HEAD(acpi_descs);
 DEFINE_MUTEX(acpi_desc_lock);
 
@@ -197,7 +191,7 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd
 		 * In the _LSI, _LSR, _LSW case the locked status is
 		 * communicated via the read/write commands
 		 */
-		if (nfit_mem->has_lsi)
+		if (nfit_mem->has_lsr)
 			break;
 
 		if (status >> 16 & ND_CONFIG_LOCKED)
@@ -477,14 +471,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		in_buf.buffer.length = call_pkg->nd_size_in;
 	}
 
-	dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
-			__func__, dimm_name, cmd, func, in_buf.buffer.length);
+	dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n",
+		dimm_name, cmd, func, in_buf.buffer.length);
 	print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
 			in_buf.buffer.pointer,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
 	/* call the BIOS, prefer the named methods over _DSM if available */
-	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi)
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr)
 		out_obj = acpi_label_info(handle);
 	else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
 		struct nd_cmd_get_config_data_hdr *p = buf;
@@ -507,8 +501,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	}
 
 	if (!out_obj) {
-		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
-				cmd_name);
+		dev_dbg(dev, "%s _DSM failed cmd: %s\n", dimm_name, cmd_name);
 		return -EINVAL;
 	}
 
@@ -529,13 +522,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	}
 
 	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
-		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
-				__func__, dimm_name, cmd_name, out_obj->type);
+		dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
+				dimm_name, cmd_name, out_obj->type);
 		rc = -EINVAL;
 		goto out;
 	}
 
-	dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name,
+	dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
 			cmd_name, out_obj->buffer.length);
 	print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
 			out_obj->buffer.pointer,
@@ -547,14 +540,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 				out_obj->buffer.length - offset);
 
 		if (offset + out_size > out_obj->buffer.length) {
-			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s output object underflow cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			break;
 		}
 
 		if (in_buf.buffer.length + offset + out_size > buf_len) {
-			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s output overrun cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			rc = -ENXIO;
 			goto out;
 		}
@@ -656,7 +649,7 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_spa->list);
 	memcpy(nfit_spa->spa, spa, sizeof(*spa));
 	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
-	dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+	dev_dbg(dev, "spa index: %d type: %s\n",
 			spa->range_index,
 			spa_type_name(nfit_spa_type(spa)));
 	return true;
@@ -685,8 +678,8 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_memdev->list);
 	memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
 	list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-	dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
-			__func__, memdev->device_handle, memdev->range_index,
+	dev_dbg(dev, "memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
+			memdev->device_handle, memdev->range_index,
 			memdev->region_index, memdev->flags);
 	return true;
 }
@@ -754,7 +747,7 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_dcr->list);
 	memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
 	list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-	dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+	dev_dbg(dev, "dcr index: %d windows: %d\n",
 			dcr->region_index, dcr->windows);
 	return true;
 }
@@ -781,7 +774,7 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_bdw->list);
 	memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
 	list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
-	dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+	dev_dbg(dev, "bdw dcr: %d windows: %d\n",
 			bdw->region_index, bdw->windows);
 	return true;
 }
@@ -820,7 +813,7 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_idt->list);
 	memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
 	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
-	dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+	dev_dbg(dev, "idt index: %d num_lines: %d\n",
 			idt->interleave_index, idt->line_count);
 	return true;
 }
@@ -860,7 +853,7 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_flush->list);
 	memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
 	list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
-	dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+	dev_dbg(dev, "nfit_flush handle: %d hint_count: %d\n",
 			flush->device_handle, flush->hint_count);
 	return true;
 }
@@ -873,7 +866,7 @@ static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
 
 	mask = (1 << (pcap->highest_capability + 1)) - 1;
 	acpi_desc->platform_cap = pcap->capabilities & mask;
-	dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap);
+	dev_dbg(dev, "cap: %#x\n", acpi_desc->platform_cap);
 	return true;
 }
 
@@ -920,7 +913,7 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
 			return err;
 		break;
 	case ACPI_NFIT_TYPE_SMBIOS:
-		dev_dbg(dev, "%s: smbios\n", __func__);
+		dev_dbg(dev, "smbios\n");
 		break;
 	case ACPI_NFIT_TYPE_CAPABILITIES:
 		if (!add_platform_cap(acpi_desc, table))
@@ -1277,8 +1270,11 @@ static ssize_t scrub_show(struct device *dev,
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
+		mutex_lock(&acpi_desc->init_mutex);
 		rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
-				(work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+				work_busy(&acpi_desc->dwork.work)
+				&& !acpi_desc->cancel ? "+\n" : "\n");
+		mutex_unlock(&acpi_desc->init_mutex);
 	}
 	device_unlock(dev);
 	return rc;
@@ -1648,7 +1644,7 @@ void __acpi_nvdimm_notify(struct device *dev, u32 event)
 	struct nfit_mem *nfit_mem;
 	struct acpi_nfit_desc *acpi_desc;
 
-	dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
+	dev_dbg(dev->parent, "%s: event: %d\n", dev_name(dev),
 			event);
 
 	if (event != NFIT_NOTIFY_DIMM_HEALTH) {
@@ -1681,12 +1677,23 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
 	device_unlock(dev->parent);
 }
 
+static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
+{
+	acpi_handle handle;
+	acpi_status status;
+
+	status = acpi_get_handle(adev->handle, method, &handle);
+
+	if (ACPI_SUCCESS(status))
+		return true;
+	return false;
+}
+
 static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_mem *nfit_mem, u32 device_handle)
 {
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
-	union acpi_object *obj;
 	unsigned long dsm_mask;
 	const guid_t *guid;
 	int i;
@@ -1759,25 +1766,15 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 					1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
-	obj = acpi_label_info(adev_dimm->handle);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsi = 1;
-		dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev));
-	}
-
-	obj = acpi_label_read(adev_dimm->handle, 0, 0);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsr = 1;
+	if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
+			&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
 		dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
+		nfit_mem->has_lsr = true;
 	}
 
-	obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsw = 1;
+	if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
 		dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
+		nfit_mem->has_lsw = true;
 	}
 
 	return 0;
@@ -1866,10 +1863,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
 		}
 
-		if (nfit_mem->has_lsi)
+		if (nfit_mem->has_lsr) {
 			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
-		if (nfit_mem->has_lsr)
 			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
+		}
 		if (nfit_mem->has_lsw)
 			set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
 
@@ -2365,7 +2362,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	nvdimm = nd_blk_region_to_dimm(ndbr);
 	nfit_mem = nvdimm_provider_data(nvdimm);
 	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
-		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+		dev_dbg(dev, "missing%s%s%s\n",
 				nfit_mem ? "" : " nfit_mem",
 				(nfit_mem && nfit_mem->dcr) ? "" : " dcr",
 				(nfit_mem && nfit_mem->bdw) ? "" : " bdw");
@@ -2384,7 +2381,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
                         nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
 	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+		dev_dbg(dev, "%s failed to map bdw\n",
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 	}
@@ -2395,8 +2392,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
 			nfit_mem->memdev_bdw->interleave_ways);
 	if (rc) {
-		dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed to init bdw interleave\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
@@ -2407,7 +2404,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
 			nfit_mem->spa_dcr->length);
 	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+		dev_dbg(dev, "%s failed to map dcr\n",
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 	}
@@ -2418,15 +2415,15 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
 			nfit_mem->memdev_dcr->interleave_ways);
 	if (rc) {
-		dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed to init dcr interleave\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
 	rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
 	if (rc < 0) {
-		dev_dbg(dev, "%s: %s failed get DIMM flags\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed get DIMM flags\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
@@ -2476,7 +2473,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
 	memset(&ars_start, 0, sizeof(ars_start));
 	ars_start.address = spa->address;
 	ars_start.length = spa->length;
-	ars_start.flags = acpi_desc->ars_start_flags;
+	if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+		ars_start.flags = ND_ARS_RETURN_PREV_DATA;
 	if (nfit_spa_type(spa) == NFIT_SPA_PM)
 		ars_start.type = ND_ARS_PERSISTENT;
 	else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
@@ -2518,16 +2516,62 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
 	int rc, cmd_rc;
 
 	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
-			acpi_desc->ars_status_size, &cmd_rc);
+			acpi_desc->max_ars, &cmd_rc);
 	if (rc < 0)
 		return rc;
 	return cmd_rc;
 }
 
-static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc,
-		struct nd_cmd_ars_status *ars_status)
+static void ars_complete(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+	struct acpi_nfit_system_address *spa = nfit_spa->spa;
+	struct nd_region *nd_region = nfit_spa->nd_region;
+	struct device *dev;
+
+	if ((ars_status->address >= spa->address && ars_status->address
+				< spa->address + spa->length)
+			|| (ars_status->address < spa->address)) {
+		/*
+		 * Assume that if a scrub starts at an offset from the
+		 * start of nfit_spa that we are in the continuation
+		 * case.
+		 *
+		 * Otherwise, if the scrub covers the spa range, mark
+		 * any pending request complete.
+		 */
+		if (ars_status->address + ars_status->length
+				>= spa->address + spa->length)
+				/* complete */;
+		else
+			return;
+	} else
+		return;
+
+	if (test_bit(ARS_DONE, &nfit_spa->ars_state))
+		return;
+
+	if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
+		return;
+
+	if (nd_region) {
+		dev = nd_region_dev(nd_region);
+		nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+	} else
+		dev = acpi_desc->dev;
+
+	dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
+			test_bit(ARS_SHORT, &nfit_spa->ars_state)
+			? "short" : "long");
+	clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+	set_bit(ARS_DONE, &nfit_spa->ars_state);
+}
+
+static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
+	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
 	int rc;
 	u32 i;
 
@@ -2606,7 +2650,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_nfit_system_address *spa = nfit_spa->spa;
 	struct nd_blk_region_desc *ndbr_desc;
 	struct nfit_mem *nfit_mem;
-	int blk_valid = 0, rc;
+	int rc;
 
 	if (!nvdimm) {
 		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2626,15 +2670,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 		if (!nfit_mem || !nfit_mem->bdw) {
 			dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
 					spa->range_index, nvdimm_name(nvdimm));
-		} else {
-			mapping->size = nfit_mem->bdw->capacity;
-			mapping->start = nfit_mem->bdw->start_address;
-			ndr_desc->num_lanes = nfit_mem->bdw->windows;
-			blk_valid = 1;
+			break;
 		}
 
+		mapping->size = nfit_mem->bdw->capacity;
+		mapping->start = nfit_mem->bdw->start_address;
+		ndr_desc->num_lanes = nfit_mem->bdw->windows;
 		ndr_desc->mapping = mapping;
-		ndr_desc->num_mappings = blk_valid;
+		ndr_desc->num_mappings = 1;
 		ndbr_desc = to_blk_region_desc(ndr_desc);
 		ndbr_desc->enable = acpi_nfit_blk_region_enable;
 		ndbr_desc->do_io = acpi_desc->blk_do_io;
@@ -2682,8 +2725,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 		return 0;
 
 	if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
-		dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
-				__func__);
+		dev_dbg(acpi_desc->dev, "detected invalid spa index\n");
 		return 0;
 	}
 
@@ -2769,301 +2811,243 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 	return rc;
 }
 
-static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
-		u32 max_ars)
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc)
 {
 	struct device *dev = acpi_desc->dev;
 	struct nd_cmd_ars_status *ars_status;
 
-	if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
-		memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+	if (acpi_desc->ars_status) {
+		memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
 		return 0;
 	}
 
-	if (acpi_desc->ars_status)
-		devm_kfree(dev, acpi_desc->ars_status);
-	acpi_desc->ars_status = NULL;
-	ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+	ars_status = devm_kzalloc(dev, acpi_desc->max_ars, GFP_KERNEL);
 	if (!ars_status)
 		return -ENOMEM;
 	acpi_desc->ars_status = ars_status;
-	acpi_desc->ars_status_size = max_ars;
 	return 0;
 }
 
-static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_spa *nfit_spa)
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 {
-	struct acpi_nfit_system_address *spa = nfit_spa->spa;
 	int rc;
 
-	if (!nfit_spa->max_ars) {
-		struct nd_cmd_ars_cap ars_cap;
-
-		memset(&ars_cap, 0, sizeof(ars_cap));
-		rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
-		if (rc < 0)
-			return rc;
-		nfit_spa->max_ars = ars_cap.max_ars_out;
-		nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
-		/* check that the supported scrub types match the spa type */
-		if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
-				((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
-			return -ENOTTY;
-		else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
-				((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
-			return -ENOTTY;
-	}
-
-	if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+	if (ars_status_alloc(acpi_desc))
 		return -ENOMEM;
 
 	rc = ars_get_status(acpi_desc);
+
 	if (rc < 0 && rc != -ENOSPC)
 		return rc;
 
-	if (ars_status_process_records(acpi_desc, acpi_desc->ars_status))
+	if (ars_status_process_records(acpi_desc))
 		return -ENOMEM;
 
 	return 0;
 }
 
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_spa *nfit_spa)
+static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
+		int *query_rc)
 {
-	struct acpi_nfit_system_address *spa = nfit_spa->spa;
-	unsigned int overflow_retry = scrub_overflow_abort;
-	u64 init_ars_start = 0, init_ars_len = 0;
-	struct device *dev = acpi_desc->dev;
-	unsigned int tmo = scrub_timeout;
-	int rc;
+	int rc = *query_rc;
 
-	if (!nfit_spa->ars_required || !nfit_spa->nd_region)
-		return;
+	if (no_init_ars)
+		return acpi_nfit_register_region(acpi_desc, nfit_spa);
 
-	rc = ars_start(acpi_desc, nfit_spa);
-	/*
-	 * If we timed out the initial scan we'll still be busy here,
-	 * and will wait another timeout before giving up permanently.
-	 */
-	if (rc < 0 && rc != -EBUSY)
-		return;
-
-	do {
-		u64 ars_start, ars_len;
-
-		if (acpi_desc->cancel)
-			break;
-		rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-		if (rc == -ENOTTY)
-			break;
-		if (rc == -EBUSY && !tmo) {
-			dev_warn(dev, "range %d ars timeout, aborting\n",
-					spa->range_index);
-			break;
-		}
+	set_bit(ARS_REQ, &nfit_spa->ars_state);
+	set_bit(ARS_SHORT, &nfit_spa->ars_state);
 
+	switch (rc) {
+	case 0:
+	case -EAGAIN:
+		rc = ars_start(acpi_desc, nfit_spa);
 		if (rc == -EBUSY) {
-			/*
-			 * Note, entries may be appended to the list
-			 * while the lock is dropped, but the workqueue
-			 * being active prevents entries being deleted /
-			 * freed.
-			 */
-			mutex_unlock(&acpi_desc->init_mutex);
-			ssleep(1);
-			tmo--;
-			mutex_lock(&acpi_desc->init_mutex);
-			continue;
-		}
-
-		/* we got some results, but there are more pending... */
-		if (rc == -ENOSPC && overflow_retry--) {
-			if (!init_ars_len) {
-				init_ars_len = acpi_desc->ars_status->length;
-				init_ars_start = acpi_desc->ars_status->address;
-			}
-			rc = ars_continue(acpi_desc);
-		}
-
-		if (rc < 0) {
-			dev_warn(dev, "range %d ars continuation failed\n",
-					spa->range_index);
+			*query_rc = rc;
 			break;
-		}
-
-		if (init_ars_len) {
-			ars_start = init_ars_start;
-			ars_len = init_ars_len;
+		} else if (rc == 0) {
+			rc = acpi_nfit_query_poison(acpi_desc);
 		} else {
-			ars_start = acpi_desc->ars_status->address;
-			ars_len = acpi_desc->ars_status->length;
+			set_bit(ARS_FAILED, &nfit_spa->ars_state);
+			break;
 		}
-		dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
-				spa->range_index, ars_start, ars_len);
-		/* notify the region about new poison entries */
-		nvdimm_region_notify(nfit_spa->nd_region,
-				NVDIMM_REVALIDATE_POISON);
+		if (rc == -EAGAIN)
+			clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+		else if (rc == 0)
+			ars_complete(acpi_desc, nfit_spa);
 		break;
-	} while (1);
+	case -EBUSY:
+	case -ENOSPC:
+		break;
+	default:
+		set_bit(ARS_FAILED, &nfit_spa->ars_state);
+		break;
+	}
+
+	if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
+		set_bit(ARS_REQ, &nfit_spa->ars_state);
+
+	return acpi_nfit_register_region(acpi_desc, nfit_spa);
 }
 
-static void acpi_nfit_scrub(struct work_struct *work)
+static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
 {
-	struct device *dev;
-	u64 init_scrub_length = 0;
 	struct nfit_spa *nfit_spa;
-	u64 init_scrub_address = 0;
-	bool init_ars_done = false;
-	struct acpi_nfit_desc *acpi_desc;
-	unsigned int tmo = scrub_timeout;
-	unsigned int overflow_retry = scrub_overflow_abort;
-
-	acpi_desc = container_of(work, typeof(*acpi_desc), work);
-	dev = acpi_desc->dev;
-
-	/*
-	 * We scrub in 2 phases.  The first phase waits for any platform
-	 * firmware initiated scrubs to complete and then we go search for the
-	 * affected spa regions to mark them scanned.  In the second phase we
-	 * initiate a directed scrub for every range that was not scrubbed in
-	 * phase 1. If we're called for a 'rescan', we harmlessly pass through
-	 * the first phase, but really only care about running phase 2, where
-	 * regions can be notified of new poison.
-	 */
 
-	/* process platform firmware initiated scrubs */
- retry:
-	mutex_lock(&acpi_desc->init_mutex);
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		struct nd_cmd_ars_status *ars_status;
-		struct acpi_nfit_system_address *spa;
-		u64 ars_start, ars_len;
-		int rc;
-
-		if (acpi_desc->cancel)
-			break;
-
-		if (nfit_spa->nd_region)
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
+		ars_complete(acpi_desc, nfit_spa);
+	}
+}
 
-		if (init_ars_done) {
-			/*
-			 * No need to re-query, we're now just
-			 * reconciling all the ranges covered by the
-			 * initial scrub
-			 */
-			rc = 0;
-		} else
-			rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-
-		if (rc == -ENOTTY) {
-			/* no ars capability, just register spa and move on */
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
-			continue;
-		}
-
-		if (rc == -EBUSY && !tmo) {
-			/* fallthrough to directed scrub in phase 2 */
-			dev_warn(dev, "timeout awaiting ars results, continuing...\n");
-			break;
-		} else if (rc == -EBUSY) {
-			mutex_unlock(&acpi_desc->init_mutex);
-			ssleep(1);
-			tmo--;
-			goto retry;
-		}
-
-		/* we got some results, but there are more pending... */
-		if (rc == -ENOSPC && overflow_retry--) {
-			ars_status = acpi_desc->ars_status;
-			/*
-			 * Record the original scrub range, so that we
-			 * can recall all the ranges impacted by the
-			 * initial scrub.
-			 */
-			if (!init_scrub_length) {
-				init_scrub_length = ars_status->length;
-				init_scrub_address = ars_status->address;
-			}
-			rc = ars_continue(acpi_desc);
-			if (rc == 0) {
-				mutex_unlock(&acpi_desc->init_mutex);
-				goto retry;
-			}
-		}
+static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
+		int query_rc)
+{
+	unsigned int tmo = acpi_desc->scrub_tmo;
+	struct device *dev = acpi_desc->dev;
+	struct nfit_spa *nfit_spa;
 
-		if (rc < 0) {
-			/*
-			 * Initial scrub failed, we'll give it one more
-			 * try below...
-			 */
-			break;
-		}
+	if (acpi_desc->cancel)
+		return 0;
 
-		/* We got some final results, record completed ranges */
-		ars_status = acpi_desc->ars_status;
-		if (init_scrub_length) {
-			ars_start = init_scrub_address;
-			ars_len = ars_start + init_scrub_length;
-		} else {
-			ars_start = ars_status->address;
-			ars_len = ars_status->length;
-		}
-		spa = nfit_spa->spa;
+	if (query_rc == -EBUSY) {
+		dev_dbg(dev, "ARS: ARS busy\n");
+		return min(30U * 60U, tmo * 2);
+	}
+	if (query_rc == -ENOSPC) {
+		dev_dbg(dev, "ARS: ARS continue\n");
+		ars_continue(acpi_desc);
+		return 1;
+	}
+	if (query_rc && query_rc != -EAGAIN) {
+		unsigned long long addr, end;
 
-		if (!init_ars_done) {
-			init_ars_done = true;
-			dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
-					ars_start, ars_len);
-		}
-		if (ars_start <= spa->address && ars_start + ars_len
-				>= spa->address + spa->length)
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
+		addr = acpi_desc->ars_status->address;
+		end = addr + acpi_desc->ars_status->length;
+		dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
+				query_rc);
 	}
 
-	/*
-	 * For all the ranges not covered by an initial scrub we still
-	 * want to see if there are errors, but it's ok to discover them
-	 * asynchronously.
-	 */
+	ars_complete_all(acpi_desc);
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		/*
-		 * Flag all the ranges that still need scrubbing, but
-		 * register them now to make data available.
-		 */
-		if (!nfit_spa->nd_region) {
-			nfit_spa->ars_required = 1;
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+			continue;
+		if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
+			int rc = ars_start(acpi_desc, nfit_spa);
+
+			clear_bit(ARS_DONE, &nfit_spa->ars_state);
+			dev = nd_region_dev(nfit_spa->nd_region);
+			dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
+					nfit_spa->spa->range_index, rc);
+			if (rc == 0 || rc == -EBUSY)
+				return 1;
+			dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+					nfit_spa->spa->range_index, rc);
+			set_bit(ARS_FAILED, &nfit_spa->ars_state);
 		}
 	}
-	acpi_desc->init_complete = 1;
+	return 0;
+}
 
-	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-		acpi_nfit_async_scrub(acpi_desc, nfit_spa);
-	acpi_desc->scrub_count++;
-	acpi_desc->ars_start_flags = 0;
-	if (acpi_desc->scrub_count_state)
-		sysfs_notify_dirent(acpi_desc->scrub_count_state);
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+	struct acpi_nfit_desc *acpi_desc;
+	unsigned int tmo;
+	int query_rc;
+
+	acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
+	mutex_lock(&acpi_desc->init_mutex);
+	query_rc = acpi_nfit_query_poison(acpi_desc);
+	tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
+	if (tmo) {
+		queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
+		acpi_desc->scrub_tmo = tmo;
+	} else {
+		acpi_desc->scrub_count++;
+		if (acpi_desc->scrub_count_state)
+			sysfs_notify_dirent(acpi_desc->scrub_count_state);
+	}
+	memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
 	mutex_unlock(&acpi_desc->init_mutex);
 }
 
+static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	int type = nfit_spa_type(nfit_spa->spa);
+	struct nd_cmd_ars_cap ars_cap;
+	int rc;
+
+	memset(&ars_cap, 0, sizeof(ars_cap));
+	rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+	if (rc < 0)
+		return;
+	/* check that the supported scrub types match the spa type */
+	if (type == NFIT_SPA_VOLATILE && ((ars_cap.status >> 16)
+				& ND_ARS_VOLATILE) == 0)
+		return;
+	if (type == NFIT_SPA_PM && ((ars_cap.status >> 16)
+				& ND_ARS_PERSISTENT) == 0)
+		return;
+
+	nfit_spa->max_ars = ars_cap.max_ars_out;
+	nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+	acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+	clear_bit(ARS_FAILED, &nfit_spa->ars_state);
+	set_bit(ARS_REQ, &nfit_spa->ars_state);
+}
+
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_spa *nfit_spa;
-	int rc;
+	int rc, query_rc;
+
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		set_bit(ARS_FAILED, &nfit_spa->ars_state);
+		switch (nfit_spa_type(nfit_spa->spa)) {
+		case NFIT_SPA_VOLATILE:
+		case NFIT_SPA_PM:
+			acpi_nfit_init_ars(acpi_desc, nfit_spa);
+			break;
+		}
+	}
+
+	/*
+	 * Reap any results that might be pending before starting new
+	 * short requests.
+	 */
+	query_rc = acpi_nfit_query_poison(acpi_desc);
+	if (query_rc == 0)
+		ars_complete_all(acpi_desc);
 
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-		if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
-			/* BLK regions don't need to wait for ars results */
+		switch (nfit_spa_type(nfit_spa->spa)) {
+		case NFIT_SPA_VOLATILE:
+		case NFIT_SPA_PM:
+			/* register regions and kick off initial ARS run */
+			rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+			if (rc)
+				return rc;
+			break;
+		case NFIT_SPA_BDW:
+			/* nothing to register */
+			break;
+		case NFIT_SPA_DCR:
+		case NFIT_SPA_VDISK:
+		case NFIT_SPA_VCD:
+		case NFIT_SPA_PDISK:
+		case NFIT_SPA_PCD:
+			/* register known regions that don't support ARS */
 			rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
 			if (rc)
 				return rc;
+			break;
+		default:
+			/* don't register unknown regions */
+			break;
 		}
 
-	acpi_desc->ars_start_flags = 0;
-	if (!acpi_desc->cancel)
-		queue_work(nfit_wq, &acpi_desc->work);
+	queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
 	return 0;
 }
 
@@ -3173,8 +3157,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
 		data = add_table(acpi_desc, &prev, data, end);
 
 	if (IS_ERR(data)) {
-		dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
-				PTR_ERR(data));
+		dev_dbg(dev, "nfit table parsing error: %ld\n",	PTR_ERR(data));
 		rc = PTR_ERR(data);
 		goto out_unlock;
 	}
@@ -3199,49 +3182,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_init);
 
-struct acpi_nfit_flush_work {
-	struct work_struct work;
-	struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
-	struct acpi_nfit_flush_work *flush;
-
-	flush = container_of(work, typeof(*flush), work);
-	complete(&flush->cmp);
-}
-
 static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
 	struct device *dev = acpi_desc->dev;
-	struct acpi_nfit_flush_work flush;
-	int rc;
 
-	/* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+	/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
 	device_lock(dev);
 	device_unlock(dev);
 
-	/* bounce the init_mutex to make init_complete valid */
+	/* Bounce the init_mutex to complete initial registration */
 	mutex_lock(&acpi_desc->init_mutex);
-	if (acpi_desc->cancel || acpi_desc->init_complete) {
-		mutex_unlock(&acpi_desc->init_mutex);
-		return 0;
-	}
-
-	/*
-	 * Scrub work could take 10s of seconds, userspace may give up so we
-	 * need to be interruptible while waiting.
-	 */
-	INIT_WORK_ONSTACK(&flush.work, flush_probe);
-	init_completion(&flush.cmp);
-	queue_work(nfit_wq, &flush.work);
 	mutex_unlock(&acpi_desc->init_mutex);
 
-	rc = wait_for_completion_interruptible(&flush.cmp);
-	cancel_work_sync(&flush.work);
-	return rc;
+	return 0;
 }
 
 static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
@@ -3260,20 +3214,18 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 	 * just needs guarantees that any ars it initiates are not
 	 * interrupted by any intervening start reqeusts from userspace.
 	 */
-	if (work_busy(&acpi_desc->work))
+	if (work_busy(&acpi_desc->dwork.work))
 		return -EBUSY;
 
 	return 0;
 }
 
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 {
 	struct device *dev = acpi_desc->dev;
+	int scheduled = 0, busy = 0;
 	struct nfit_spa *nfit_spa;
 
-	if (work_busy(&acpi_desc->work))
-		return -EBUSY;
-
 	mutex_lock(&acpi_desc->init_mutex);
 	if (acpi_desc->cancel) {
 		mutex_unlock(&acpi_desc->init_mutex);
@@ -3281,19 +3233,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
 	}
 
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		struct acpi_nfit_system_address *spa = nfit_spa->spa;
+		int type = nfit_spa_type(nfit_spa->spa);
 
-		if (nfit_spa_type(spa) != NFIT_SPA_PM)
+		if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
+			continue;
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
 
-		nfit_spa->ars_required = 1;
+		if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
+			busy++;
+		else {
+			if (test_bit(ARS_SHORT, &flags))
+				set_bit(ARS_SHORT, &nfit_spa->ars_state);
+			scheduled++;
+		}
+	}
+	if (scheduled) {
+		queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+		dev_dbg(dev, "ars_scan triggered\n");
 	}
-	acpi_desc->ars_start_flags = flags;
-	queue_work(nfit_wq, &acpi_desc->work);
-	dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
 	mutex_unlock(&acpi_desc->init_mutex);
 
-	return 0;
+	if (scheduled)
+		return 0;
+	if (busy)
+		return -EBUSY;
+	return -ENOTTY;
 }
 
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
@@ -3320,7 +3285,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 	INIT_LIST_HEAD(&acpi_desc->dimms);
 	INIT_LIST_HEAD(&acpi_desc->list);
 	mutex_init(&acpi_desc->init_mutex);
-	INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+	acpi_desc->scrub_tmo = 1;
+	INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
 
@@ -3344,6 +3310,7 @@ void acpi_nfit_shutdown(void *data)
 
 	mutex_lock(&acpi_desc->init_mutex);
 	acpi_desc->cancel = 1;
+	cancel_delayed_work_sync(&acpi_desc->dwork);
 	mutex_unlock(&acpi_desc->init_mutex);
 
 	/*
@@ -3397,8 +3364,8 @@ static int acpi_nfit_add(struct acpi_device *adev)
 			rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
 					obj->buffer.length);
 		else
-			dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
-				 __func__, (int) obj->type);
+			dev_dbg(dev, "invalid type %d, ignoring _FIT\n",
+				(int) obj->type);
 		kfree(buf.pointer);
 	} else
 		/* skip over the lead-in header table */
@@ -3427,7 +3394,7 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 
 	if (!dev->driver) {
 		/* dev->driver may be null if we're being removed */
-		dev_dbg(dev, "%s: no driver found for dev\n", __func__);
+		dev_dbg(dev, "no driver found for dev\n");
 		return;
 	}
 
@@ -3465,15 +3432,15 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
-	u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
-			0 : ND_ARS_RETURN_PREV_DATA;
+	unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
+			0 : 1 << ARS_SHORT;
 
 	acpi_nfit_ars_rescan(acpi_desc, flags);
 }
 
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
 {
-	dev_dbg(dev, "%s: event: 0x%x\n", __func__, event);
+	dev_dbg(dev, "event: 0x%x\n", event);
 
 	switch (event) {
 	case NFIT_NOTIFY_UPDATE:
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index b92921439657..e9626bf6ca29 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -51,9 +51,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 			if ((spa->address + spa->length - 1) < mce->addr)
 				continue;
 			found_match = 1;
-			dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
-				__func__, spa->range_index, spa->address,
-				spa->length);
+			dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
+				spa->range_index, spa->address, spa->length);
 			/*
 			 * We can break at the first match because we're going
 			 * to rescan all the SPA ranges. There shouldn't be any
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 50d36e166d70..7d15856a739f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -117,10 +117,17 @@ enum nfit_dimm_notifiers {
 	NFIT_NOTIFY_DIMM_HEALTH = 0x81,
 };
 
+enum nfit_ars_state {
+	ARS_REQ,
+	ARS_DONE,
+	ARS_SHORT,
+	ARS_FAILED,
+};
+
 struct nfit_spa {
 	struct list_head list;
 	struct nd_region *nd_region;
-	unsigned int ars_required:1;
+	unsigned long ars_state;
 	u32 clear_err_unit;
 	u32 max_ars;
 	struct acpi_nfit_system_address spa[0];
@@ -171,9 +178,8 @@ struct nfit_mem {
 	struct resource *flush_wpq;
 	unsigned long dsm_mask;
 	int family;
-	u32 has_lsi:1;
-	u32 has_lsr:1;
-	u32 has_lsw:1;
+	bool has_lsr;
+	bool has_lsw;
 };
 
 struct acpi_nfit_desc {
@@ -191,18 +197,18 @@ struct acpi_nfit_desc {
 	struct device *dev;
 	u8 ars_start_flags;
 	struct nd_cmd_ars_status *ars_status;
-	size_t ars_status_size;
-	struct work_struct work;
+	struct delayed_work dwork;
 	struct list_head list;
 	struct kernfs_node *scrub_count_state;
+	unsigned int max_ars;
 	unsigned int scrub_count;
 	unsigned int scrub_mode;
 	unsigned int cancel:1;
-	unsigned int init_complete:1;
 	unsigned long dimm_cmd_force_en;
 	unsigned long bus_cmd_force_en;
 	unsigned long bus_nfit_cmd_force_en;
 	unsigned int platform_cap;
+	unsigned int scrub_tmo;
 	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 			void *iobuf, u64 len, int rw);
 };
@@ -244,7 +250,7 @@ struct nfit_blk {
 
 extern struct list_head acpi_descs;
 extern struct mutex acpi_desc_lock;
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags);
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
 
 #ifdef CONFIG_X86_MCE
 void nfit_mce_register(void);
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index c7cf48ad5cb9..a651ab3490d8 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -533,7 +533,7 @@ int acpi_processor_notify_smm(struct module *calling_module)
 
 EXPORT_SYMBOL(acpi_processor_notify_smm);
 
-static int acpi_processor_get_psd(struct acpi_processor	*pr)
+int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain)
 {
 	int result = 0;
 	acpi_status status = AE_OK;
@@ -541,9 +541,8 @@ static int acpi_processor_get_psd(struct acpi_processor	*pr)
 	struct acpi_buffer format = {sizeof("NNNNN"), "NNNNN"};
 	struct acpi_buffer state = {0, NULL};
 	union acpi_object  *psd = NULL;
-	struct acpi_psd_package *pdomain;
 
-	status = acpi_evaluate_object(pr->handle, "_PSD", NULL, &buffer);
+	status = acpi_evaluate_object(handle, "_PSD", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		return -ENODEV;
 	}
@@ -561,8 +560,6 @@ static int acpi_processor_get_psd(struct acpi_processor	*pr)
 		goto end;
 	}
 
-	pdomain = &(pr->performance->domain_info);
-
 	state.length = sizeof(struct acpi_psd_package);
 	state.pointer = pdomain;
 
@@ -597,6 +594,7 @@ end:
 	kfree(buffer.pointer);
 	return result;
 }
+EXPORT_SYMBOL(acpi_processor_get_psd);
 
 int acpi_processor_preregister_performance(
 		struct acpi_processor_performance __percpu *performance)
@@ -645,7 +643,8 @@ int acpi_processor_preregister_performance(
 
 		pr->performance = per_cpu_ptr(performance, i);
 		cpumask_set_cpu(i, pr->performance->shared_cpu_map);
-		if (acpi_processor_get_psd(pr)) {
+		pdomain = &(pr->performance->domain_info);
+		if (acpi_processor_get_psd(pr->handle, pdomain)) {
 			retval = -EINVAL;
 			continue;
 		}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 79fcd2bae96b..bffe8616bd55 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -837,11 +837,8 @@ int __init memory_dev_init(void)
 	 * during boot and have been initialized
 	 */
 	mutex_lock(&mem_sysfs_mutex);
-	for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) {
-		/* Don't iterate over sections we know are !present: */
-		if (i > __highest_present_section_nr)
-			break;
-
+	for (i = 0; i <= __highest_present_section_nr;
+		i += sections_per_block) {
 		err = add_memory_block(i);
 		if (!ret)
 			ret = err;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 264abaaff662..c9d04497a415 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1103,11 +1103,15 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	if (info->lo_encrypt_type) {
 		unsigned int type = info->lo_encrypt_type;
 
-		if (type >= MAX_LO_CRYPT)
-			return -EINVAL;
+		if (type >= MAX_LO_CRYPT) {
+			err = -EINVAL;
+			goto exit;
+		}
 		xfer = xfer_funcs[type];
-		if (xfer == NULL)
-			return -EINVAL;
+		if (xfer == NULL) {
+			err = -EINVAL;
+			goto exit;
+		}
 	} else
 		xfer = NULL;
 
@@ -1283,12 +1287,13 @@ static int
 loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
 	struct loop_info info;
 	struct loop_info64 info64;
-	int err = 0;
+	int err;
 
-	if (!arg)
-		err = -EINVAL;
-	if (!err)
-		err = loop_get_status(lo, &info64);
+	if (!arg) {
+		mutex_unlock(&lo->lo_ctl_mutex);
+		return -EINVAL;
+	}
+	err = loop_get_status(lo, &info64);
 	if (!err)
 		err = loop_info64_to_old(&info64, &info);
 	if (!err && copy_to_user(arg, &info, sizeof(info)))
@@ -1300,12 +1305,13 @@ loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
 static int
 loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
 	struct loop_info64 info64;
-	int err = 0;
+	int err;
 
-	if (!arg)
-		err = -EINVAL;
-	if (!err)
-		err = loop_get_status(lo, &info64);
+	if (!arg) {
+		mutex_unlock(&lo->lo_ctl_mutex);
+		return -EINVAL;
+	}
+	err = loop_get_status(lo, &info64);
 	if (!err && copy_to_user(arg, &info64, sizeof(info64)))
 		err = -EFAULT;
 
@@ -1529,12 +1535,13 @@ loop_get_status_compat(struct loop_device *lo,
 		       struct compat_loop_info __user *arg)
 {
 	struct loop_info64 info64;
-	int err = 0;
+	int err;
 
-	if (!arg)
-		err = -EINVAL;
-	if (!err)
-		err = loop_get_status(lo, &info64);
+	if (!arg) {
+		mutex_unlock(&lo->lo_ctl_mutex);
+		return -EINVAL;
+	}
+	err = loop_get_status(lo, &info64);
 	if (!err)
 		err = loop_info64_to_compat(&info64, arg);
 	return err;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1e03b04819c8..07dc5419bd63 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -32,6 +32,7 @@
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/cls_lock_client.h>
+#include <linux/ceph/striper.h>
 #include <linux/ceph/decode.h>
 #include <linux/parser.h>
 #include <linux/bsearch.h>
@@ -200,95 +201,81 @@ struct rbd_client {
 };
 
 struct rbd_img_request;
-typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
-
-#define	BAD_WHICH	U32_MAX		/* Good which or bad which, which? */
-
-struct rbd_obj_request;
-typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
 
 enum obj_request_type {
-	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
+	OBJ_REQUEST_NODATA = 1,
+	OBJ_REQUEST_BIO,	/* pointer into provided bio (list) */
+	OBJ_REQUEST_BVECS,	/* pointer into provided bio_vec array */
+	OBJ_REQUEST_OWN_BVECS,	/* private bio_vec array, doesn't own pages */
 };
 
 enum obj_operation_type {
+	OBJ_OP_READ = 1,
 	OBJ_OP_WRITE,
-	OBJ_OP_READ,
 	OBJ_OP_DISCARD,
 };
 
-enum obj_req_flags {
-	OBJ_REQ_DONE,		/* completion flag: not done = 0, done = 1 */
-	OBJ_REQ_IMG_DATA,	/* object usage: standalone = 0, image = 1 */
-	OBJ_REQ_KNOWN,		/* EXISTS flag valid: no = 0, yes = 1 */
-	OBJ_REQ_EXISTS,		/* target exists: no = 0, yes = 1 */
+/*
+ * Writes go through the following state machine to deal with
+ * layering:
+ *
+ *                       need copyup
+ * RBD_OBJ_WRITE_GUARD ---------------> RBD_OBJ_WRITE_COPYUP
+ *        |     ^                              |
+ *        v     \------------------------------/
+ *      done
+ *        ^
+ *        |
+ * RBD_OBJ_WRITE_FLAT
+ *
+ * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether
+ * there is a parent or not.
+ */
+enum rbd_obj_write_state {
+	RBD_OBJ_WRITE_FLAT = 1,
+	RBD_OBJ_WRITE_GUARD,
+	RBD_OBJ_WRITE_COPYUP,
 };
 
 struct rbd_obj_request {
-	u64			object_no;
-	u64			offset;		/* object start byte */
-	u64			length;		/* bytes from offset */
-	unsigned long		flags;
-
-	/*
-	 * An object request associated with an image will have its
-	 * img_data flag set; a standalone object request will not.
-	 *
-	 * A standalone object request will have which == BAD_WHICH
-	 * and a null obj_request pointer.
-	 *
-	 * An object request initiated in support of a layered image
-	 * object (to check for its existence before a write) will
-	 * have which == BAD_WHICH and a non-null obj_request pointer.
-	 *
-	 * Finally, an object request for rbd image data will have
-	 * which != BAD_WHICH, and will have a non-null img_request
-	 * pointer.  The value of which will be in the range
-	 * 0..(img_request->obj_request_count-1).
-	 */
+	struct ceph_object_extent ex;
 	union {
-		struct rbd_obj_request	*obj_request;	/* STAT op */
-		struct {
-			struct rbd_img_request	*img_request;
-			u64			img_offset;
-			/* links for img_request->obj_requests list */
-			struct list_head	links;
-		};
+		bool			tried_parent;	/* for reads */
+		enum rbd_obj_write_state write_state;	/* for writes */
 	};
-	u32			which;		/* posn image request list */
 
-	enum obj_request_type	type;
+	struct rbd_img_request	*img_request;
+	struct ceph_file_extent	*img_extents;
+	u32			num_img_extents;
+
 	union {
-		struct bio	*bio_list;
+		struct ceph_bio_iter	bio_pos;
 		struct {
-			struct page	**pages;
-			u32		page_count;
+			struct ceph_bvec_iter	bvec_pos;
+			u32			bvec_count;
+			u32			bvec_idx;
 		};
 	};
-	struct page		**copyup_pages;
-	u32			copyup_page_count;
+	struct bio_vec		*copyup_bvecs;
+	u32			copyup_bvec_count;
 
 	struct ceph_osd_request	*osd_req;
 
 	u64			xferred;	/* bytes transferred */
 	int			result;
 
-	rbd_obj_callback_t	callback;
-
 	struct kref		kref;
 };
 
 enum img_req_flags {
-	IMG_REQ_WRITE,		/* I/O direction: read = 0, write = 1 */
 	IMG_REQ_CHILD,		/* initiator: block = 0, child image = 1 */
 	IMG_REQ_LAYERED,	/* ENOENT handling: normal = 0, layered = 1 */
-	IMG_REQ_DISCARD,	/* discard: normal = 0, discard request = 1 */
 };
 
 struct rbd_img_request {
 	struct rbd_device	*rbd_dev;
-	u64			offset;	/* starting image byte offset */
-	u64			length;	/* byte count from offset */
+	enum obj_operation_type	op_type;
+	enum obj_request_type	data_type;
 	unsigned long		flags;
 	union {
 		u64			snap_id;	/* for reads */
@@ -298,26 +285,21 @@ struct rbd_img_request {
 		struct request		*rq;		/* block request */
 		struct rbd_obj_request	*obj_request;	/* obj req initiator */
 	};
-	struct page		**copyup_pages;
-	u32			copyup_page_count;
-	spinlock_t		completion_lock;/* protects next_completion */
-	u32			next_completion;
-	rbd_img_callback_t	callback;
+	spinlock_t		completion_lock;
 	u64			xferred;/* aggregate bytes transferred */
 	int			result;	/* first nonzero obj_request result */
 
+	struct list_head	object_extents;	/* obj_req.ex structs */
 	u32			obj_request_count;
-	struct list_head	obj_requests;	/* rbd_obj_request structs */
+	u32			pending_count;
 
 	struct kref		kref;
 };
 
 #define for_each_obj_request(ireq, oreq) \
-	list_for_each_entry(oreq, &(ireq)->obj_requests, links)
-#define for_each_obj_request_from(ireq, oreq) \
-	list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
+	list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
 #define for_each_obj_request_safe(ireq, oreq, n) \
-	list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
+	list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
 
 enum rbd_watch_state {
 	RBD_WATCH_STATE_UNREGISTERED,
@@ -433,8 +415,6 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 static struct kmem_cache	*rbd_img_request_cache;
 static struct kmem_cache	*rbd_obj_request_cache;
 
-static struct bio_set		*rbd_bio_clone;
-
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
@@ -447,8 +427,6 @@ static bool single_major = true;
 module_param(single_major, bool, S_IRUGO);
 MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
 
-static int rbd_img_request_submit(struct rbd_img_request *img_request);
-
 static ssize_t rbd_add(struct bus_type *bus, const char *buf,
 		       size_t count);
 static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
@@ -458,7 +436,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
 static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
 				       size_t count);
 static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
-static void rbd_spec_put(struct rbd_spec *spec);
 
 static int rbd_dev_id_to_minor(int dev_id)
 {
@@ -577,9 +554,6 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
 #  define rbd_assert(expr)	((void) 0)
 #endif /* !RBD_DEBUG */
 
-static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request);
-static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
-static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
 static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
 
 static int rbd_dev_refresh(struct rbd_device *rbd_dev);
@@ -857,26 +831,6 @@ static char* obj_op_name(enum obj_operation_type op_type)
 }
 
 /*
- * Get a ceph client with specific addr and configuration, if one does
- * not exist create it.  Either way, ceph_opts is consumed by this
- * function.
- */
-static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
-{
-	struct rbd_client *rbdc;
-
-	mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
-	rbdc = rbd_client_find(ceph_opts);
-	if (rbdc)	/* using an existing client */
-		ceph_destroy_options(ceph_opts);
-	else
-		rbdc = rbd_client_create(ceph_opts);
-	mutex_unlock(&client_mutex);
-
-	return rbdc;
-}
-
-/*
  * Destroy ceph client
  *
  * Caller must hold rbd_client_list_lock.
@@ -904,6 +858,56 @@ static void rbd_put_client(struct rbd_client *rbdc)
 		kref_put(&rbdc->kref, rbd_client_release);
 }
 
+static int wait_for_latest_osdmap(struct ceph_client *client)
+{
+	u64 newest_epoch;
+	int ret;
+
+	ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+	if (ret)
+		return ret;
+
+	if (client->osdc.osdmap->epoch >= newest_epoch)
+		return 0;
+
+	ceph_osdc_maybe_request_map(&client->osdc);
+	return ceph_monc_wait_osdmap(&client->monc, newest_epoch,
+				     client->options->mount_timeout);
+}
+
+/*
+ * Get a ceph client with specific addr and configuration, if one does
+ * not exist create it.  Either way, ceph_opts is consumed by this
+ * function.
+ */
+static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
+{
+	struct rbd_client *rbdc;
+	int ret;
+
+	mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
+	rbdc = rbd_client_find(ceph_opts);
+	if (rbdc) {
+		ceph_destroy_options(ceph_opts);
+
+		/*
+		 * Using an existing client.  Make sure ->pg_pools is up to
+		 * date before we look up the pool id in do_rbd_add().
+		 */
+		ret = wait_for_latest_osdmap(rbdc->client);
+		if (ret) {
+			rbd_warn(NULL, "failed to get latest osdmap: %d", ret);
+			rbd_put_client(rbdc);
+			rbdc = ERR_PTR(ret);
+		}
+	} else {
+		rbdc = rbd_client_create(ceph_opts);
+	}
+	mutex_unlock(&client_mutex);
+
+	return rbdc;
+}
+
 static bool rbd_image_format_valid(u32 image_format)
 {
 	return image_format == 1 || image_format == 2;
@@ -1223,272 +1227,59 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
 	rbd_dev->mapping.features = 0;
 }
 
-static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
-{
-	u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
-
-	return offset & (segment_size - 1);
-}
-
-static u64 rbd_segment_length(struct rbd_device *rbd_dev,
-				u64 offset, u64 length)
-{
-	u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
-
-	offset &= segment_size - 1;
-
-	rbd_assert(length <= U64_MAX - offset);
-	if (offset + length > segment_size)
-		length = segment_size - offset;
-
-	return length;
-}
-
-/*
- * bio helpers
- */
-
-static void bio_chain_put(struct bio *chain)
-{
-	struct bio *tmp;
-
-	while (chain) {
-		tmp = chain;
-		chain = chain->bi_next;
-		bio_put(tmp);
-	}
-}
-
-/*
- * zeros a bio chain, starting at specific offset
- */
-static void zero_bio_chain(struct bio *chain, int start_ofs)
+static void zero_bvec(struct bio_vec *bv)
 {
-	struct bio_vec bv;
-	struct bvec_iter iter;
-	unsigned long flags;
 	void *buf;
-	int pos = 0;
-
-	while (chain) {
-		bio_for_each_segment(bv, chain, iter) {
-			if (pos + bv.bv_len > start_ofs) {
-				int remainder = max(start_ofs - pos, 0);
-				buf = bvec_kmap_irq(&bv, &flags);
-				memset(buf + remainder, 0,
-				       bv.bv_len - remainder);
-				flush_dcache_page(bv.bv_page);
-				bvec_kunmap_irq(buf, &flags);
-			}
-			pos += bv.bv_len;
-		}
+	unsigned long flags;
 
-		chain = chain->bi_next;
-	}
+	buf = bvec_kmap_irq(bv, &flags);
+	memset(buf, 0, bv->bv_len);
+	flush_dcache_page(bv->bv_page);
+	bvec_kunmap_irq(buf, &flags);
 }
 
-/*
- * similar to zero_bio_chain(), zeros data defined by a page array,
- * starting at the given byte offset from the start of the array and
- * continuing up to the given end offset.  The pages array is
- * assumed to be big enough to hold all bytes up to the end.
- */
-static void zero_pages(struct page **pages, u64 offset, u64 end)
+static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
 {
-	struct page **page = &pages[offset >> PAGE_SHIFT];
+	struct ceph_bio_iter it = *bio_pos;
 
-	rbd_assert(end > offset);
-	rbd_assert(end - offset <= (u64)SIZE_MAX);
-	while (offset < end) {
-		size_t page_offset;
-		size_t length;
-		unsigned long flags;
-		void *kaddr;
-
-		page_offset = offset & ~PAGE_MASK;
-		length = min_t(size_t, PAGE_SIZE - page_offset, end - offset);
-		local_irq_save(flags);
-		kaddr = kmap_atomic(*page);
-		memset(kaddr + page_offset, 0, length);
-		flush_dcache_page(*page);
-		kunmap_atomic(kaddr);
-		local_irq_restore(flags);
-
-		offset += length;
-		page++;
-	}
+	ceph_bio_iter_advance(&it, off);
+	ceph_bio_iter_advance_step(&it, bytes, ({
+		zero_bvec(&bv);
+	}));
 }
 
-/*
- * Clone a portion of a bio, starting at the given byte offset
- * and continuing for the number of bytes indicated.
- */
-static struct bio *bio_clone_range(struct bio *bio_src,
-					unsigned int offset,
-					unsigned int len,
-					gfp_t gfpmask)
+static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
 {
-	struct bio *bio;
-
-	bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
-	if (!bio)
-		return NULL;	/* ENOMEM */
+	struct ceph_bvec_iter it = *bvec_pos;
 
-	bio_advance(bio, offset);
-	bio->bi_iter.bi_size = len;
-
-	return bio;
+	ceph_bvec_iter_advance(&it, off);
+	ceph_bvec_iter_advance_step(&it, bytes, ({
+		zero_bvec(&bv);
+	}));
 }
 
 /*
- * Clone a portion of a bio chain, starting at the given byte offset
- * into the first bio in the source chain and continuing for the
- * number of bytes indicated.  The result is another bio chain of
- * exactly the given length, or a null pointer on error.
- *
- * The bio_src and offset parameters are both in-out.  On entry they
- * refer to the first source bio and the offset into that bio where
- * the start of data to be cloned is located.
+ * Zero a range in @obj_req data buffer defined by a bio (list) or
+ * (private) bio_vec array.
  *
- * On return, bio_src is updated to refer to the bio in the source
- * chain that contains first un-cloned byte, and *offset will
- * contain the offset of that byte within that bio.
+ * @off is relative to the start of the data buffer.
  */
-static struct bio *bio_chain_clone_range(struct bio **bio_src,
-					unsigned int *offset,
-					unsigned int len,
-					gfp_t gfpmask)
+static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
+			       u32 bytes)
 {
-	struct bio *bi = *bio_src;
-	unsigned int off = *offset;
-	struct bio *chain = NULL;
-	struct bio **end;
-
-	/* Build up a chain of clone bios up to the limit */
-
-	if (!bi || off >= bi->bi_iter.bi_size || !len)
-		return NULL;		/* Nothing to clone */
-
-	end = &chain;
-	while (len) {
-		unsigned int bi_size;
-		struct bio *bio;
-
-		if (!bi) {
-			rbd_warn(NULL, "bio_chain exhausted with %u left", len);
-			goto out_err;	/* EINVAL; ran out of bio's */
-		}
-		bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
-		bio = bio_clone_range(bi, off, bi_size, gfpmask);
-		if (!bio)
-			goto out_err;	/* ENOMEM */
-
-		*end = bio;
-		end = &bio->bi_next;
-
-		off += bi_size;
-		if (off == bi->bi_iter.bi_size) {
-			bi = bi->bi_next;
-			off = 0;
-		}
-		len -= bi_size;
-	}
-	*bio_src = bi;
-	*offset = off;
-
-	return chain;
-out_err:
-	bio_chain_put(chain);
-
-	return NULL;
-}
-
-/*
- * The default/initial value for all object request flags is 0.  For
- * each flag, once its value is set to 1 it is never reset to 0
- * again.
- */
-static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
-{
-	if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
-		struct rbd_device *rbd_dev;
-
-		rbd_dev = obj_request->img_request->rbd_dev;
-		rbd_warn(rbd_dev, "obj_request %p already marked img_data",
-			obj_request);
-	}
-}
-
-static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
-}
-
-static void obj_request_done_set(struct rbd_obj_request *obj_request)
-{
-	if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
-		struct rbd_device *rbd_dev = NULL;
-
-		if (obj_request_img_data_test(obj_request))
-			rbd_dev = obj_request->img_request->rbd_dev;
-		rbd_warn(rbd_dev, "obj_request %p already marked done",
-			obj_request);
+	switch (obj_req->img_request->data_type) {
+	case OBJ_REQUEST_BIO:
+		zero_bios(&obj_req->bio_pos, off, bytes);
+		break;
+	case OBJ_REQUEST_BVECS:
+	case OBJ_REQUEST_OWN_BVECS:
+		zero_bvecs(&obj_req->bvec_pos, off, bytes);
+		break;
+	default:
+		rbd_assert(0);
 	}
 }
 
-static bool obj_request_done_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
-}
-
-/*
- * This sets the KNOWN flag after (possibly) setting the EXISTS
- * flag.  The latter is set based on the "exists" value provided.
- *
- * Note that for our purposes once an object exists it never goes
- * away again.  It's possible that the response from two existence
- * checks are separated by the creation of the target object, and
- * the first ("doesn't exist") response arrives *after* the second
- * ("does exist").  In that case we ignore the second one.
- */
-static void obj_request_existence_set(struct rbd_obj_request *obj_request,
-				bool exists)
-{
-	if (exists)
-		set_bit(OBJ_REQ_EXISTS, &obj_request->flags);
-	set_bit(OBJ_REQ_KNOWN, &obj_request->flags);
-	smp_mb();
-}
-
-static bool obj_request_known_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0;
-}
-
-static bool obj_request_exists_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
-}
-
-static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request)
-{
-	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-
-	return obj_request->img_offset <
-	    round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header));
-}
-
-static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
-{
-	dout("%s: obj %p (was %d)\n", __func__, obj_request,
-		kref_read(&obj_request->kref));
-	kref_get(&obj_request->kref);
-}
-
 static void rbd_obj_request_destroy(struct kref *kref);
 static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
 {
@@ -1505,18 +1296,13 @@ static void rbd_img_request_get(struct rbd_img_request *img_request)
 	kref_get(&img_request->kref);
 }
 
-static bool img_request_child_test(struct rbd_img_request *img_request);
-static void rbd_parent_request_destroy(struct kref *kref);
 static void rbd_img_request_destroy(struct kref *kref);
 static void rbd_img_request_put(struct rbd_img_request *img_request)
 {
 	rbd_assert(img_request != NULL);
 	dout("%s: img %p (was %d)\n", __func__, img_request,
 		kref_read(&img_request->kref));
-	if (img_request_child_test(img_request))
-		kref_put(&img_request->kref, rbd_parent_request_destroy);
-	else
-		kref_put(&img_request->kref, rbd_img_request_destroy);
+	kref_put(&img_request->kref, rbd_img_request_destroy);
 }
 
 static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
@@ -1526,139 +1312,37 @@ static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
 
 	/* Image request now owns object's original reference */
 	obj_request->img_request = img_request;
-	obj_request->which = img_request->obj_request_count;
-	rbd_assert(!obj_request_img_data_test(obj_request));
-	obj_request_img_data_set(obj_request);
-	rbd_assert(obj_request->which != BAD_WHICH);
 	img_request->obj_request_count++;
-	list_add_tail(&obj_request->links, &img_request->obj_requests);
-	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
-		obj_request->which);
+	img_request->pending_count++;
+	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
 }
 
 static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
 					struct rbd_obj_request *obj_request)
 {
-	rbd_assert(obj_request->which != BAD_WHICH);
-
-	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
-		obj_request->which);
-	list_del(&obj_request->links);
+	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+	list_del(&obj_request->ex.oe_item);
 	rbd_assert(img_request->obj_request_count > 0);
 	img_request->obj_request_count--;
-	rbd_assert(obj_request->which == img_request->obj_request_count);
-	obj_request->which = BAD_WHICH;
-	rbd_assert(obj_request_img_data_test(obj_request));
 	rbd_assert(obj_request->img_request == img_request);
-	obj_request->img_request = NULL;
-	obj_request->callback = NULL;
 	rbd_obj_request_put(obj_request);
 }
 
-static bool obj_request_type_valid(enum obj_request_type type)
-{
-	switch (type) {
-	case OBJ_REQUEST_NODATA:
-	case OBJ_REQUEST_BIO:
-	case OBJ_REQUEST_PAGES:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static void rbd_img_obj_callback(struct rbd_obj_request *obj_request);
-
 static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
 	dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__,
-	     obj_request, obj_request->object_no, obj_request->offset,
-	     obj_request->length, osd_req);
-	if (obj_request_img_data_test(obj_request)) {
-		WARN_ON(obj_request->callback != rbd_img_obj_callback);
-		rbd_img_request_get(obj_request->img_request);
-	}
+	     obj_request, obj_request->ex.oe_objno, obj_request->ex.oe_off,
+	     obj_request->ex.oe_len, osd_req);
 	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
 }
 
-static void rbd_img_request_complete(struct rbd_img_request *img_request)
-{
-
-	dout("%s: img %p\n", __func__, img_request);
-
-	/*
-	 * If no error occurred, compute the aggregate transfer
-	 * count for the image request.  We could instead use
-	 * atomic64_cmpxchg() to update it as each object request
-	 * completes; not clear which way is better off hand.
-	 */
-	if (!img_request->result) {
-		struct rbd_obj_request *obj_request;
-		u64 xferred = 0;
-
-		for_each_obj_request(img_request, obj_request)
-			xferred += obj_request->xferred;
-		img_request->xferred = xferred;
-	}
-
-	if (img_request->callback)
-		img_request->callback(img_request);
-	else
-		rbd_img_request_put(img_request);
-}
-
 /*
  * The default/initial value for all image request flags is 0.  Each
  * is conditionally set to 1 at image request initialization time
  * and currently never change thereafter.
  */
-static void img_request_write_set(struct rbd_img_request *img_request)
-{
-	set_bit(IMG_REQ_WRITE, &img_request->flags);
-	smp_mb();
-}
-
-static bool img_request_write_test(struct rbd_img_request *img_request)
-{
-	smp_mb();
-	return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0;
-}
-
-/*
- * Set the discard flag when the img_request is an discard request
- */
-static void img_request_discard_set(struct rbd_img_request *img_request)
-{
-	set_bit(IMG_REQ_DISCARD, &img_request->flags);
-	smp_mb();
-}
-
-static bool img_request_discard_test(struct rbd_img_request *img_request)
-{
-	smp_mb();
-	return test_bit(IMG_REQ_DISCARD, &img_request->flags) != 0;
-}
-
-static void img_request_child_set(struct rbd_img_request *img_request)
-{
-	set_bit(IMG_REQ_CHILD, &img_request->flags);
-	smp_mb();
-}
-
-static void img_request_child_clear(struct rbd_img_request *img_request)
-{
-	clear_bit(IMG_REQ_CHILD, &img_request->flags);
-	smp_mb();
-}
-
-static bool img_request_child_test(struct rbd_img_request *img_request)
-{
-	smp_mb();
-	return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0;
-}
-
 static void img_request_layered_set(struct rbd_img_request *img_request)
 {
 	set_bit(IMG_REQ_LAYERED, &img_request->flags);
@@ -1677,209 +1361,70 @@ static bool img_request_layered_test(struct rbd_img_request *img_request)
 	return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
 }
 
-static enum obj_operation_type
-rbd_img_request_op_type(struct rbd_img_request *img_request)
-{
-	if (img_request_write_test(img_request))
-		return OBJ_OP_WRITE;
-	else if (img_request_discard_test(img_request))
-		return OBJ_OP_DISCARD;
-	else
-		return OBJ_OP_READ;
-}
-
-static void
-rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
-{
-	u64 xferred = obj_request->xferred;
-	u64 length = obj_request->length;
-
-	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
-		obj_request, obj_request->img_request, obj_request->result,
-		xferred, length);
-	/*
-	 * ENOENT means a hole in the image.  We zero-fill the entire
-	 * length of the request.  A short read also implies zero-fill
-	 * to the end of the request.  An error requires the whole
-	 * length of the request to be reported finished with an error
-	 * to the block layer.  In each case we update the xferred
-	 * count to indicate the whole request was satisfied.
-	 */
-	rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
-	if (obj_request->result == -ENOENT) {
-		if (obj_request->type == OBJ_REQUEST_BIO)
-			zero_bio_chain(obj_request->bio_list, 0);
-		else
-			zero_pages(obj_request->pages, 0, length);
-		obj_request->result = 0;
-	} else if (xferred < length && !obj_request->result) {
-		if (obj_request->type == OBJ_REQUEST_BIO)
-			zero_bio_chain(obj_request->bio_list, xferred);
-		else
-			zero_pages(obj_request->pages, xferred, length);
-	}
-	obj_request->xferred = length;
-	obj_request_done_set(obj_request);
-}
-
-static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
+static bool rbd_obj_is_entire(struct rbd_obj_request *obj_req)
 {
-	dout("%s: obj %p cb %p\n", __func__, obj_request,
-		obj_request->callback);
-	obj_request->callback(obj_request);
-}
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
 
-static void rbd_obj_request_error(struct rbd_obj_request *obj_request, int err)
-{
-	obj_request->result = err;
-	obj_request->xferred = 0;
-	/*
-	 * kludge - mirror rbd_obj_request_submit() to match a put in
-	 * rbd_img_obj_callback()
-	 */
-	if (obj_request_img_data_test(obj_request)) {
-		WARN_ON(obj_request->callback != rbd_img_obj_callback);
-		rbd_img_request_get(obj_request->img_request);
-	}
-	obj_request_done_set(obj_request);
-	rbd_obj_request_complete(obj_request);
+	return !obj_req->ex.oe_off &&
+	       obj_req->ex.oe_len == rbd_dev->layout.object_size;
 }
 
-static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
+static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
 {
-	struct rbd_img_request *img_request = NULL;
-	struct rbd_device *rbd_dev = NULL;
-	bool layered = false;
-
-	if (obj_request_img_data_test(obj_request)) {
-		img_request = obj_request->img_request;
-		layered = img_request && img_request_layered_test(img_request);
-		rbd_dev = img_request->rbd_dev;
-	}
-
-	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
-		obj_request, img_request, obj_request->result,
-		obj_request->xferred, obj_request->length);
-	if (layered && obj_request->result == -ENOENT &&
-			obj_request->img_offset < rbd_dev->parent_overlap)
-		rbd_img_parent_read(obj_request);
-	else if (img_request)
-		rbd_img_obj_request_read_callback(obj_request);
-	else
-		obj_request_done_set(obj_request);
-}
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
 
-static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
-{
-	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
-		obj_request->result, obj_request->length);
-	/*
-	 * There is no such thing as a successful short write.  Set
-	 * it to our originally-requested length.
-	 */
-	obj_request->xferred = obj_request->length;
-	obj_request_done_set(obj_request);
+	return obj_req->ex.oe_off + obj_req->ex.oe_len ==
+					rbd_dev->layout.object_size;
 }
 
-static void rbd_osd_discard_callback(struct rbd_obj_request *obj_request)
+static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
 {
-	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
-		obj_request->result, obj_request->length);
-	/*
-	 * There is no such thing as a successful short discard.  Set
-	 * it to our originally-requested length.
-	 */
-	obj_request->xferred = obj_request->length;
-	/* discarding a non-existent object is not a problem */
-	if (obj_request->result == -ENOENT)
-		obj_request->result = 0;
-	obj_request_done_set(obj_request);
+	return ceph_file_extents_bytes(obj_req->img_extents,
+				       obj_req->num_img_extents);
 }
 
-/*
- * For a simple stat call there's nothing to do.  We'll do more if
- * this is part of a write sequence for a layered image.
- */
-static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request)
+static bool rbd_img_is_write(struct rbd_img_request *img_req)
 {
-	dout("%s: obj %p\n", __func__, obj_request);
-	obj_request_done_set(obj_request);
+	switch (img_req->op_type) {
+	case OBJ_OP_READ:
+		return false;
+	case OBJ_OP_WRITE:
+	case OBJ_OP_DISCARD:
+		return true;
+	default:
+		rbd_assert(0);
+	}
 }
 
-static void rbd_osd_call_callback(struct rbd_obj_request *obj_request)
-{
-	dout("%s: obj %p\n", __func__, obj_request);
-
-	if (obj_request_img_data_test(obj_request))
-		rbd_osd_copyup_callback(obj_request);
-	else
-		obj_request_done_set(obj_request);
-}
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req);
 
 static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
 {
-	struct rbd_obj_request *obj_request = osd_req->r_priv;
-	u16 opcode;
+	struct rbd_obj_request *obj_req = osd_req->r_priv;
 
-	dout("%s: osd_req %p\n", __func__, osd_req);
-	rbd_assert(osd_req == obj_request->osd_req);
-	if (obj_request_img_data_test(obj_request)) {
-		rbd_assert(obj_request->img_request);
-		rbd_assert(obj_request->which != BAD_WHICH);
-	} else {
-		rbd_assert(obj_request->which == BAD_WHICH);
-	}
-
-	if (osd_req->r_result < 0)
-		obj_request->result = osd_req->r_result;
-
-	/*
-	 * We support a 64-bit length, but ultimately it has to be
-	 * passed to the block layer, which just supports a 32-bit
-	 * length field.
-	 */
-	obj_request->xferred = osd_req->r_ops[0].outdata_len;
-	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
+	dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
+	     osd_req->r_result, obj_req);
+	rbd_assert(osd_req == obj_req->osd_req);
 
-	opcode = osd_req->r_ops[0].op;
-	switch (opcode) {
-	case CEPH_OSD_OP_READ:
-		rbd_osd_read_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_SETALLOCHINT:
-		rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE ||
-			   osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL);
-		/* fall through */
-	case CEPH_OSD_OP_WRITE:
-	case CEPH_OSD_OP_WRITEFULL:
-		rbd_osd_write_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_STAT:
-		rbd_osd_stat_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_DELETE:
-	case CEPH_OSD_OP_TRUNCATE:
-	case CEPH_OSD_OP_ZERO:
-		rbd_osd_discard_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_CALL:
-		rbd_osd_call_callback(obj_request);
-		break;
-	default:
-		rbd_warn(NULL, "unexpected OSD op: object_no %016llx opcode %d",
-			 obj_request->object_no, opcode);
-		break;
-	}
+	obj_req->result = osd_req->r_result < 0 ? osd_req->r_result : 0;
+	if (!obj_req->result && !rbd_img_is_write(obj_req->img_request))
+		obj_req->xferred = osd_req->r_result;
+	else
+		/*
+		 * Writes aren't allowed to return a data payload.  In some
+		 * guarded write cases (e.g. stat + zero on an empty object)
+		 * a stat response makes it through, but we don't care.
+		 */
+		obj_req->xferred = 0;
 
-	if (obj_request_done_test(obj_request))
-		rbd_obj_request_complete(obj_request);
+	rbd_obj_handle_request(obj_req);
 }
 
 static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
-	rbd_assert(obj_request_img_data_test(obj_request));
+	osd_req->r_flags = CEPH_OSD_FLAG_READ;
 	osd_req->r_snapid = obj_request->img_request->snap_id;
 }
 
@@ -1887,32 +1432,33 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
+	osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
 	ktime_get_real_ts(&osd_req->r_mtime);
-	osd_req->r_data_offset = obj_request->offset;
+	osd_req->r_data_offset = obj_request->ex.oe_off;
 }
 
 static struct ceph_osd_request *
-__rbd_osd_req_create(struct rbd_device *rbd_dev,
-		     struct ceph_snap_context *snapc,
-		     int num_ops, unsigned int flags,
-		     struct rbd_obj_request *obj_request)
+rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
 {
+	struct rbd_img_request *img_req = obj_req->img_request;
+	struct rbd_device *rbd_dev = img_req->rbd_dev;
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct ceph_osd_request *req;
 	const char *name_format = rbd_dev->image_format == 1 ?
 				      RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
 
-	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
+	req = ceph_osdc_alloc_request(osdc,
+			(rbd_img_is_write(img_req) ? img_req->snapc : NULL),
+			num_ops, false, GFP_NOIO);
 	if (!req)
 		return NULL;
 
-	req->r_flags = flags;
 	req->r_callback = rbd_osd_req_callback;
-	req->r_priv = obj_request;
+	req->r_priv = obj_req;
 
 	req->r_base_oloc.pool = rbd_dev->layout.pool_id;
 	if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
-			rbd_dev->header.object_prefix, obj_request->object_no))
+			rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
 		goto err_req;
 
 	if (ceph_osdc_alloc_messages(req, GFP_NOIO))
@@ -1925,83 +1471,20 @@ err_req:
 	return NULL;
 }
 
-/*
- * Create an osd request.  A read request has one osd op (read).
- * A write request has either one (watch) or two (hint+write) osd ops.
- * (All rbd data writes are prefixed with an allocation hint op, but
- * technically osd watch is a write request, hence this distinction.)
- */
-static struct ceph_osd_request *rbd_osd_req_create(
-					struct rbd_device *rbd_dev,
-					enum obj_operation_type op_type,
-					unsigned int num_ops,
-					struct rbd_obj_request *obj_request)
-{
-	struct ceph_snap_context *snapc = NULL;
-
-	if (obj_request_img_data_test(obj_request) &&
-		(op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) {
-		struct rbd_img_request *img_request = obj_request->img_request;
-		if (op_type == OBJ_OP_WRITE) {
-			rbd_assert(img_request_write_test(img_request));
-		} else {
-			rbd_assert(img_request_discard_test(img_request));
-		}
-		snapc = img_request->snapc;
-	}
-
-	rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2));
-
-	return __rbd_osd_req_create(rbd_dev, snapc, num_ops,
-	    (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ?
-	    CEPH_OSD_FLAG_WRITE : CEPH_OSD_FLAG_READ, obj_request);
-}
-
-/*
- * Create a copyup osd request based on the information in the object
- * request supplied.  A copyup request has two or three osd ops, a
- * copyup method call, potentially a hint op, and a write or truncate
- * or zero op.
- */
-static struct ceph_osd_request *
-rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
-{
-	struct rbd_img_request *img_request;
-	int num_osd_ops = 3;
-
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
-	rbd_assert(img_request);
-	rbd_assert(img_request_write_test(img_request) ||
-			img_request_discard_test(img_request));
-
-	if (img_request_discard_test(img_request))
-		num_osd_ops = 2;
-
-	return __rbd_osd_req_create(img_request->rbd_dev,
-				    img_request->snapc, num_osd_ops,
-				    CEPH_OSD_FLAG_WRITE, obj_request);
-}
-
 static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 {
 	ceph_osdc_put_request(osd_req);
 }
 
-static struct rbd_obj_request *
-rbd_obj_request_create(enum obj_request_type type)
+static struct rbd_obj_request *rbd_obj_request_create(void)
 {
 	struct rbd_obj_request *obj_request;
 
-	rbd_assert(obj_request_type_valid(type));
-
 	obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
 	if (!obj_request)
 		return NULL;
 
-	obj_request->which = BAD_WHICH;
-	obj_request->type = type;
-	INIT_LIST_HEAD(&obj_request->links);
+	ceph_object_extent_init(&obj_request->ex);
 	kref_init(&obj_request->kref);
 
 	dout("%s %p\n", __func__, obj_request);
@@ -2011,32 +1494,34 @@ rbd_obj_request_create(enum obj_request_type type)
 static void rbd_obj_request_destroy(struct kref *kref)
 {
 	struct rbd_obj_request *obj_request;
+	u32 i;
 
 	obj_request = container_of(kref, struct rbd_obj_request, kref);
 
 	dout("%s: obj %p\n", __func__, obj_request);
 
-	rbd_assert(obj_request->img_request == NULL);
-	rbd_assert(obj_request->which == BAD_WHICH);
-
 	if (obj_request->osd_req)
 		rbd_osd_req_destroy(obj_request->osd_req);
 
-	rbd_assert(obj_request_type_valid(obj_request->type));
-	switch (obj_request->type) {
+	switch (obj_request->img_request->data_type) {
 	case OBJ_REQUEST_NODATA:
-		break;		/* Nothing to do */
 	case OBJ_REQUEST_BIO:
-		if (obj_request->bio_list)
-			bio_chain_put(obj_request->bio_list);
-		break;
-	case OBJ_REQUEST_PAGES:
-		/* img_data requests don't own their page array */
-		if (obj_request->pages &&
-		    !obj_request_img_data_test(obj_request))
-			ceph_release_page_vector(obj_request->pages,
-						obj_request->page_count);
+	case OBJ_REQUEST_BVECS:
+		break;		/* Nothing to do */
+	case OBJ_REQUEST_OWN_BVECS:
+		kfree(obj_request->bvec_pos.bvecs);
 		break;
+	default:
+		rbd_assert(0);
+	}
+
+	kfree(obj_request->img_extents);
+	if (obj_request->copyup_bvecs) {
+		for (i = 0; i < obj_request->copyup_bvec_count; i++) {
+			if (obj_request->copyup_bvecs[i].bv_page)
+				__free_page(obj_request->copyup_bvecs[i].bv_page);
+		}
+		kfree(obj_request->copyup_bvecs);
 	}
 
 	kmem_cache_free(rbd_obj_request_cache, obj_request);
@@ -2111,7 +1596,6 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
  */
 static struct rbd_img_request *rbd_img_request_create(
 					struct rbd_device *rbd_dev,
-					u64 offset, u64 length,
 					enum obj_operation_type op_type,
 					struct ceph_snap_context *snapc)
 {
@@ -2122,27 +1606,21 @@ static struct rbd_img_request *rbd_img_request_create(
 		return NULL;
 
 	img_request->rbd_dev = rbd_dev;
-	img_request->offset = offset;
-	img_request->length = length;
-	if (op_type == OBJ_OP_DISCARD) {
-		img_request_discard_set(img_request);
-		img_request->snapc = snapc;
-	} else if (op_type == OBJ_OP_WRITE) {
-		img_request_write_set(img_request);
-		img_request->snapc = snapc;
-	} else {
+	img_request->op_type = op_type;
+	if (!rbd_img_is_write(img_request))
 		img_request->snap_id = rbd_dev->spec->snap_id;
-	}
+	else
+		img_request->snapc = snapc;
+
 	if (rbd_dev_parent_get(rbd_dev))
 		img_request_layered_set(img_request);
 
 	spin_lock_init(&img_request->completion_lock);
-	INIT_LIST_HEAD(&img_request->obj_requests);
+	INIT_LIST_HEAD(&img_request->object_extents);
 	kref_init(&img_request->kref);
 
-	dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
-		obj_op_name(op_type), offset, length, img_request);
-
+	dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
+	     obj_op_name(op_type), img_request);
 	return img_request;
 }
 
@@ -2165,829 +1643,934 @@ static void rbd_img_request_destroy(struct kref *kref)
 		rbd_dev_parent_put(img_request->rbd_dev);
 	}
 
-	if (img_request_write_test(img_request) ||
-		img_request_discard_test(img_request))
+	if (rbd_img_is_write(img_request))
 		ceph_put_snap_context(img_request->snapc);
 
 	kmem_cache_free(rbd_img_request_cache, img_request);
 }
 
-static struct rbd_img_request *rbd_parent_request_create(
-					struct rbd_obj_request *obj_request,
-					u64 img_offset, u64 length)
+static void prune_extents(struct ceph_file_extent *img_extents,
+			  u32 *num_img_extents, u64 overlap)
 {
-	struct rbd_img_request *parent_request;
-	struct rbd_device *rbd_dev;
+	u32 cnt = *num_img_extents;
 
-	rbd_assert(obj_request->img_request);
-	rbd_dev = obj_request->img_request->rbd_dev;
+	/* drop extents completely beyond the overlap */
+	while (cnt && img_extents[cnt - 1].fe_off >= overlap)
+		cnt--;
 
-	parent_request = rbd_img_request_create(rbd_dev->parent, img_offset,
-						length, OBJ_OP_READ, NULL);
-	if (!parent_request)
-		return NULL;
+	if (cnt) {
+		struct ceph_file_extent *ex = &img_extents[cnt - 1];
 
-	img_request_child_set(parent_request);
-	rbd_obj_request_get(obj_request);
-	parent_request->obj_request = obj_request;
+		/* trim final overlapping extent */
+		if (ex->fe_off + ex->fe_len > overlap)
+			ex->fe_len = overlap - ex->fe_off;
+	}
 
-	return parent_request;
+	*num_img_extents = cnt;
 }
 
-static void rbd_parent_request_destroy(struct kref *kref)
+/*
+ * Determine the byte range(s) covered by either just the object extent
+ * or the entire object in the parent image.
+ */
+static int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req,
+				    bool entire)
 {
-	struct rbd_img_request *parent_request;
-	struct rbd_obj_request *orig_request;
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	int ret;
 
-	parent_request = container_of(kref, struct rbd_img_request, kref);
-	orig_request = parent_request->obj_request;
+	if (!rbd_dev->parent_overlap)
+		return 0;
 
-	parent_request->obj_request = NULL;
-	rbd_obj_request_put(orig_request);
-	img_request_child_clear(parent_request);
+	ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno,
+				  entire ? 0 : obj_req->ex.oe_off,
+				  entire ? rbd_dev->layout.object_size :
+							obj_req->ex.oe_len,
+				  &obj_req->img_extents,
+				  &obj_req->num_img_extents);
+	if (ret)
+		return ret;
 
-	rbd_img_request_destroy(kref);
+	prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
+		      rbd_dev->parent_overlap);
+	return 0;
 }
 
-static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
+static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
 {
-	struct rbd_img_request *img_request;
-	unsigned int xferred;
-	int result;
-	bool more;
-
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
-
-	rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
-	xferred = (unsigned int)obj_request->xferred;
-	result = obj_request->result;
-	if (result) {
-		struct rbd_device *rbd_dev = img_request->rbd_dev;
-		enum obj_operation_type op_type;
-
-		if (img_request_discard_test(img_request))
-			op_type = OBJ_OP_DISCARD;
-		else if (img_request_write_test(img_request))
-			op_type = OBJ_OP_WRITE;
-		else
-			op_type = OBJ_OP_READ;
-
-		rbd_warn(rbd_dev, "%s %llx at %llx (%llx)",
-			obj_op_name(op_type), obj_request->length,
-			obj_request->img_offset, obj_request->offset);
-		rbd_warn(rbd_dev, "  result %d xferred %x",
-			result, xferred);
-		if (!img_request->result)
-			img_request->result = result;
-		/*
-		 * Need to end I/O on the entire obj_request worth of
-		 * bytes in case of error.
-		 */
-		xferred = obj_request->length;
+	switch (obj_req->img_request->data_type) {
+	case OBJ_REQUEST_BIO:
+		osd_req_op_extent_osd_data_bio(obj_req->osd_req, which,
+					       &obj_req->bio_pos,
+					       obj_req->ex.oe_len);
+		break;
+	case OBJ_REQUEST_BVECS:
+	case OBJ_REQUEST_OWN_BVECS:
+		rbd_assert(obj_req->bvec_pos.iter.bi_size ==
+							obj_req->ex.oe_len);
+		rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
+		osd_req_op_extent_osd_data_bvec_pos(obj_req->osd_req, which,
+						    &obj_req->bvec_pos);
+		break;
+	default:
+		rbd_assert(0);
 	}
+}
 
-	if (img_request_child_test(img_request)) {
-		rbd_assert(img_request->obj_request != NULL);
-		more = obj_request->which < img_request->obj_request_count - 1;
-	} else {
-		blk_status_t status = errno_to_blk_status(result);
+static int rbd_obj_setup_read(struct rbd_obj_request *obj_req)
+{
+	obj_req->osd_req = rbd_osd_req_create(obj_req, 1);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
-		rbd_assert(img_request->rq != NULL);
+	osd_req_op_extent_init(obj_req->osd_req, 0, CEPH_OSD_OP_READ,
+			       obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
+	rbd_osd_req_setup_data(obj_req, 0);
 
-		more = blk_update_request(img_request->rq, status, xferred);
-		if (!more)
-			__blk_mq_end_request(img_request->rq, status);
-	}
+	rbd_osd_req_format_read(obj_req);
+	return 0;
+}
+
+static int __rbd_obj_setup_stat(struct rbd_obj_request *obj_req,
+				unsigned int which)
+{
+	struct page **pages;
 
-	return more;
+	/*
+	 * The response data for a STAT call consists of:
+	 *     le64 length;
+	 *     struct {
+	 *         le32 tv_sec;
+	 *         le32 tv_nsec;
+	 *     } mtime;
+	 */
+	pages = ceph_alloc_page_vector(1, GFP_NOIO);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	osd_req_op_init(obj_req->osd_req, which, CEPH_OSD_OP_STAT, 0);
+	osd_req_op_raw_data_in_pages(obj_req->osd_req, which, pages,
+				     8 + sizeof(struct ceph_timespec),
+				     0, false, true);
+	return 0;
 }
 
-static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+static void __rbd_obj_setup_write(struct rbd_obj_request *obj_req,
+				  unsigned int which)
 {
-	struct rbd_img_request *img_request;
-	u32 which = obj_request->which;
-	bool more = true;
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	u16 opcode;
 
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
+	osd_req_op_alloc_hint_init(obj_req->osd_req, which++,
+				   rbd_dev->layout.object_size,
+				   rbd_dev->layout.object_size);
 
-	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
-	rbd_assert(img_request != NULL);
-	rbd_assert(img_request->obj_request_count > 0);
-	rbd_assert(which != BAD_WHICH);
-	rbd_assert(which < img_request->obj_request_count);
+	if (rbd_obj_is_entire(obj_req))
+		opcode = CEPH_OSD_OP_WRITEFULL;
+	else
+		opcode = CEPH_OSD_OP_WRITE;
 
-	spin_lock_irq(&img_request->completion_lock);
-	if (which != img_request->next_completion)
-		goto out;
+	osd_req_op_extent_init(obj_req->osd_req, which, opcode,
+			       obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
+	rbd_osd_req_setup_data(obj_req, which++);
+
+	rbd_assert(which == obj_req->osd_req->r_num_ops);
+	rbd_osd_req_format_write(obj_req);
+}
 
-	for_each_obj_request_from(img_request, obj_request) {
-		rbd_assert(more);
-		rbd_assert(which < img_request->obj_request_count);
+static int rbd_obj_setup_write(struct rbd_obj_request *obj_req)
+{
+	unsigned int num_osd_ops, which = 0;
+	int ret;
 
-		if (!obj_request_done_test(obj_request))
-			break;
-		more = rbd_img_obj_end_request(obj_request);
-		which++;
+	/* reverse map the entire object onto the parent */
+	ret = rbd_obj_calc_img_extents(obj_req, true);
+	if (ret)
+		return ret;
+
+	if (obj_req->num_img_extents) {
+		obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+		num_osd_ops = 3; /* stat + setallochint + write/writefull */
+	} else {
+		obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+		num_osd_ops = 2; /* setallochint + write/writefull */
 	}
 
-	rbd_assert(more ^ (which == img_request->obj_request_count));
-	img_request->next_completion = which;
-out:
-	spin_unlock_irq(&img_request->completion_lock);
-	rbd_img_request_put(img_request);
+	obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
-	if (!more)
-		rbd_img_request_complete(img_request);
+	if (obj_req->num_img_extents) {
+		ret = __rbd_obj_setup_stat(obj_req, which++);
+		if (ret)
+			return ret;
+	}
+
+	__rbd_obj_setup_write(obj_req, which);
+	return 0;
 }
 
-/*
- * Add individual osd ops to the given ceph_osd_request and prepare
- * them for submission. num_ops is the current number of
- * osd operations already to the object request.
- */
-static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
-				struct ceph_osd_request *osd_request,
-				enum obj_operation_type op_type,
-				unsigned int num_ops)
-{
-	struct rbd_img_request *img_request = obj_request->img_request;
-	struct rbd_device *rbd_dev = img_request->rbd_dev;
-	u64 object_size = rbd_obj_bytes(&rbd_dev->header);
-	u64 offset = obj_request->offset;
-	u64 length = obj_request->length;
-	u64 img_end;
+static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
+				    unsigned int which)
+{
 	u16 opcode;
 
-	if (op_type == OBJ_OP_DISCARD) {
-		if (!offset && length == object_size &&
-		    (!img_request_layered_test(img_request) ||
-		     !obj_request_overlaps_parent(obj_request))) {
-			opcode = CEPH_OSD_OP_DELETE;
-		} else if ((offset + length == object_size)) {
+	if (rbd_obj_is_entire(obj_req)) {
+		if (obj_req->num_img_extents) {
+			osd_req_op_init(obj_req->osd_req, which++,
+					CEPH_OSD_OP_CREATE, 0);
 			opcode = CEPH_OSD_OP_TRUNCATE;
 		} else {
-			down_read(&rbd_dev->header_rwsem);
-			img_end = rbd_dev->header.image_size;
-			up_read(&rbd_dev->header_rwsem);
-
-			if (obj_request->img_offset + length == img_end)
-				opcode = CEPH_OSD_OP_TRUNCATE;
-			else
-				opcode = CEPH_OSD_OP_ZERO;
+			osd_req_op_init(obj_req->osd_req, which++,
+					CEPH_OSD_OP_DELETE, 0);
+			opcode = 0;
 		}
-	} else if (op_type == OBJ_OP_WRITE) {
-		if (!offset && length == object_size)
-			opcode = CEPH_OSD_OP_WRITEFULL;
-		else
-			opcode = CEPH_OSD_OP_WRITE;
-		osd_req_op_alloc_hint_init(osd_request, num_ops,
-					object_size, object_size);
-		num_ops++;
+	} else if (rbd_obj_is_tail(obj_req)) {
+		opcode = CEPH_OSD_OP_TRUNCATE;
 	} else {
-		opcode = CEPH_OSD_OP_READ;
+		opcode = CEPH_OSD_OP_ZERO;
 	}
 
-	if (opcode == CEPH_OSD_OP_DELETE)
-		osd_req_op_init(osd_request, num_ops, opcode, 0);
-	else
-		osd_req_op_extent_init(osd_request, num_ops, opcode,
-				       offset, length, 0, 0);
-
-	if (obj_request->type == OBJ_REQUEST_BIO)
-		osd_req_op_extent_osd_data_bio(osd_request, num_ops,
-					obj_request->bio_list, length);
-	else if (obj_request->type == OBJ_REQUEST_PAGES)
-		osd_req_op_extent_osd_data_pages(osd_request, num_ops,
-					obj_request->pages, length,
-					offset & ~PAGE_MASK, false, false);
-
-	/* Discards are also writes */
-	if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
-		rbd_osd_req_format_write(obj_request);
-	else
-		rbd_osd_req_format_read(obj_request);
+	if (opcode)
+		osd_req_op_extent_init(obj_req->osd_req, which++, opcode,
+				       obj_req->ex.oe_off, obj_req->ex.oe_len,
+				       0, 0);
+
+	rbd_assert(which == obj_req->osd_req->r_num_ops);
+	rbd_osd_req_format_write(obj_req);
 }
 
-/*
- * Split up an image request into one or more object requests, each
- * to a different object.  The "type" parameter indicates whether
- * "data_desc" is the pointer to the head of a list of bio
- * structures, or the base of a page array.  In either case this
- * function assumes data_desc describes memory sufficient to hold
- * all data described by the image request.
- */
-static int rbd_img_request_fill(struct rbd_img_request *img_request,
-					enum obj_request_type type,
-					void *data_desc)
+static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
 {
-	struct rbd_device *rbd_dev = img_request->rbd_dev;
-	struct rbd_obj_request *obj_request = NULL;
-	struct rbd_obj_request *next_obj_request;
-	struct bio *bio_list = NULL;
-	unsigned int bio_offset = 0;
-	struct page **pages = NULL;
-	enum obj_operation_type op_type;
-	u64 img_offset;
-	u64 resid;
-
-	dout("%s: img %p type %d data_desc %p\n", __func__, img_request,
-		(int)type, data_desc);
+	unsigned int num_osd_ops, which = 0;
+	int ret;
 
-	img_offset = img_request->offset;
-	resid = img_request->length;
-	rbd_assert(resid > 0);
-	op_type = rbd_img_request_op_type(img_request);
+	/* reverse map the entire object onto the parent */
+	ret = rbd_obj_calc_img_extents(obj_req, true);
+	if (ret)
+		return ret;
 
-	if (type == OBJ_REQUEST_BIO) {
-		bio_list = data_desc;
-		rbd_assert(img_offset ==
-			   bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
-	} else if (type == OBJ_REQUEST_PAGES) {
-		pages = data_desc;
+	if (rbd_obj_is_entire(obj_req)) {
+		obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+		if (obj_req->num_img_extents)
+			num_osd_ops = 2; /* create + truncate */
+		else
+			num_osd_ops = 1; /* delete */
+	} else {
+		if (obj_req->num_img_extents) {
+			obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+			num_osd_ops = 2; /* stat + truncate/zero */
+		} else {
+			obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+			num_osd_ops = 1; /* truncate/zero */
+		}
 	}
 
-	while (resid) {
-		struct ceph_osd_request *osd_req;
-		u64 object_no = img_offset >> rbd_dev->header.obj_order;
-		u64 offset = rbd_segment_offset(rbd_dev, img_offset);
-		u64 length = rbd_segment_length(rbd_dev, img_offset, resid);
-
-		obj_request = rbd_obj_request_create(type);
-		if (!obj_request)
-			goto out_unwind;
-
-		obj_request->object_no = object_no;
-		obj_request->offset = offset;
-		obj_request->length = length;
-
-		/*
-		 * set obj_request->img_request before creating the
-		 * osd_request so that it gets the right snapc
-		 */
-		rbd_img_obj_request_add(img_request, obj_request);
-
-		if (type == OBJ_REQUEST_BIO) {
-			unsigned int clone_size;
-
-			rbd_assert(length <= (u64)UINT_MAX);
-			clone_size = (unsigned int)length;
-			obj_request->bio_list =
-					bio_chain_clone_range(&bio_list,
-								&bio_offset,
-								clone_size,
-								GFP_NOIO);
-			if (!obj_request->bio_list)
-				goto out_unwind;
-		} else if (type == OBJ_REQUEST_PAGES) {
-			unsigned int page_count;
-
-			obj_request->pages = pages;
-			page_count = (u32)calc_pages_for(offset, length);
-			obj_request->page_count = page_count;
-			if ((offset + length) & ~PAGE_MASK)
-				page_count--;	/* more on last page */
-			pages += page_count;
-		}
+	obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
-		osd_req = rbd_osd_req_create(rbd_dev, op_type,
-					(op_type == OBJ_OP_WRITE) ? 2 : 1,
-					obj_request);
-		if (!osd_req)
-			goto out_unwind;
+	if (!rbd_obj_is_entire(obj_req) && obj_req->num_img_extents) {
+		ret = __rbd_obj_setup_stat(obj_req, which++);
+		if (ret)
+			return ret;
+	}
 
-		obj_request->osd_req = osd_req;
-		obj_request->callback = rbd_img_obj_callback;
-		obj_request->img_offset = img_offset;
+	__rbd_obj_setup_discard(obj_req, which);
+	return 0;
+}
 
-		rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0);
+/*
+ * For each object request in @img_req, allocate an OSD request, add
+ * individual OSD ops and prepare them for submission.  The number of
+ * OSD ops depends on op_type and the overlap point (if any).
+ */
+static int __rbd_img_fill_request(struct rbd_img_request *img_req)
+{
+	struct rbd_obj_request *obj_req;
+	int ret;
 
-		img_offset += length;
-		resid -= length;
+	for_each_obj_request(img_req, obj_req) {
+		switch (img_req->op_type) {
+		case OBJ_OP_READ:
+			ret = rbd_obj_setup_read(obj_req);
+			break;
+		case OBJ_OP_WRITE:
+			ret = rbd_obj_setup_write(obj_req);
+			break;
+		case OBJ_OP_DISCARD:
+			ret = rbd_obj_setup_discard(obj_req);
+			break;
+		default:
+			rbd_assert(0);
+		}
+		if (ret)
+			return ret;
 	}
 
 	return 0;
+}
 
-out_unwind:
-	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
-		rbd_img_obj_request_del(img_request, obj_request);
+union rbd_img_fill_iter {
+	struct ceph_bio_iter	bio_iter;
+	struct ceph_bvec_iter	bvec_iter;
+};
 
-	return -ENOMEM;
-}
+struct rbd_img_fill_ctx {
+	enum obj_request_type	pos_type;
+	union rbd_img_fill_iter	*pos;
+	union rbd_img_fill_iter	iter;
+	ceph_object_extent_fn_t	set_pos_fn;
+	ceph_object_extent_fn_t	count_fn;
+	ceph_object_extent_fn_t	copy_fn;
+};
 
-static void
-rbd_osd_copyup_callback(struct rbd_obj_request *obj_request)
+static struct ceph_object_extent *alloc_object_extent(void *arg)
 {
-	struct rbd_img_request *img_request;
-	struct rbd_device *rbd_dev;
-	struct page **pages;
-	u32 page_count;
+	struct rbd_img_request *img_req = arg;
+	struct rbd_obj_request *obj_req;
 
-	dout("%s: obj %p\n", __func__, obj_request);
+	obj_req = rbd_obj_request_create();
+	if (!obj_req)
+		return NULL;
 
-	rbd_assert(obj_request->type == OBJ_REQUEST_BIO ||
-		obj_request->type == OBJ_REQUEST_NODATA);
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
-	rbd_assert(img_request);
+	rbd_img_obj_request_add(img_req, obj_req);
+	return &obj_req->ex;
+}
 
-	rbd_dev = img_request->rbd_dev;
-	rbd_assert(rbd_dev);
+/*
+ * While su != os && sc == 1 is technically not fancy (it's the same
+ * layout as su == os && sc == 1), we can't use the nocopy path for it
+ * because ->set_pos_fn() should be called only once per object.
+ * ceph_file_to_extents() invokes action_fn once per stripe unit, so
+ * treat su != os && sc == 1 as fancy.
+ */
+static bool rbd_layout_is_fancy(struct ceph_file_layout *l)
+{
+	return l->stripe_unit != l->object_size;
+}
 
-	pages = obj_request->copyup_pages;
-	rbd_assert(pages != NULL);
-	obj_request->copyup_pages = NULL;
-	page_count = obj_request->copyup_page_count;
-	rbd_assert(page_count);
-	obj_request->copyup_page_count = 0;
-	ceph_release_page_vector(pages, page_count);
+static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
+				       struct ceph_file_extent *img_extents,
+				       u32 num_img_extents,
+				       struct rbd_img_fill_ctx *fctx)
+{
+	u32 i;
+	int ret;
+
+	img_req->data_type = fctx->pos_type;
 
 	/*
-	 * We want the transfer count to reflect the size of the
-	 * original write request.  There is no such thing as a
-	 * successful short write, so if the request was successful
-	 * we can just set it to the originally-requested length.
+	 * Create object requests and set each object request's starting
+	 * position in the provided bio (list) or bio_vec array.
 	 */
-	if (!obj_request->result)
-		obj_request->xferred = obj_request->length;
+	fctx->iter = *fctx->pos;
+	for (i = 0; i < num_img_extents; i++) {
+		ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
+					   img_extents[i].fe_off,
+					   img_extents[i].fe_len,
+					   &img_req->object_extents,
+					   alloc_object_extent, img_req,
+					   fctx->set_pos_fn, &fctx->iter);
+		if (ret)
+			return ret;
+	}
 
-	obj_request_done_set(obj_request);
+	return __rbd_img_fill_request(img_req);
 }
 
-static void
-rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
+/*
+ * Map a list of image extents to a list of object extents, create the
+ * corresponding object requests (normally each to a different object,
+ * but not always) and add them to @img_req.  For each object request,
+ * set up its data descriptor to point to the corresponding chunk(s) of
+ * @fctx->pos data buffer.
+ *
+ * Because ceph_file_to_extents() will merge adjacent object extents
+ * together, each object request's data descriptor may point to multiple
+ * different chunks of @fctx->pos data buffer.
+ *
+ * @fctx->pos data buffer is assumed to be large enough.
+ */
+static int rbd_img_fill_request(struct rbd_img_request *img_req,
+				struct ceph_file_extent *img_extents,
+				u32 num_img_extents,
+				struct rbd_img_fill_ctx *fctx)
 {
-	struct rbd_obj_request *orig_request;
-	struct ceph_osd_request *osd_req;
-	struct rbd_device *rbd_dev;
-	struct page **pages;
-	enum obj_operation_type op_type;
-	u32 page_count;
-	int img_result;
-	u64 parent_length;
-
-	rbd_assert(img_request_child_test(img_request));
-
-	/* First get what we need from the image request */
-
-	pages = img_request->copyup_pages;
-	rbd_assert(pages != NULL);
-	img_request->copyup_pages = NULL;
-	page_count = img_request->copyup_page_count;
-	rbd_assert(page_count);
-	img_request->copyup_page_count = 0;
-
-	orig_request = img_request->obj_request;
-	rbd_assert(orig_request != NULL);
-	rbd_assert(obj_request_type_valid(orig_request->type));
-	img_result = img_request->result;
-	parent_length = img_request->length;
-	rbd_assert(img_result || parent_length == img_request->xferred);
-	rbd_img_request_put(img_request);
+	struct rbd_device *rbd_dev = img_req->rbd_dev;
+	struct rbd_obj_request *obj_req;
+	u32 i;
+	int ret;
 
-	rbd_assert(orig_request->img_request);
-	rbd_dev = orig_request->img_request->rbd_dev;
-	rbd_assert(rbd_dev);
+	if (fctx->pos_type == OBJ_REQUEST_NODATA ||
+	    !rbd_layout_is_fancy(&rbd_dev->layout))
+		return rbd_img_fill_request_nocopy(img_req, img_extents,
+						   num_img_extents, fctx);
+
+	img_req->data_type = OBJ_REQUEST_OWN_BVECS;
 
 	/*
-	 * If the overlap has become 0 (most likely because the
-	 * image has been flattened) we need to free the pages
-	 * and re-submit the original write request.
+	 * Create object requests and determine ->bvec_count for each object
+	 * request.  Note that ->bvec_count sum over all object requests may
+	 * be greater than the number of bio_vecs in the provided bio (list)
+	 * or bio_vec array because when mapped, those bio_vecs can straddle
+	 * stripe unit boundaries.
 	 */
-	if (!rbd_dev->parent_overlap) {
-		ceph_release_page_vector(pages, page_count);
-		rbd_obj_request_submit(orig_request);
-		return;
+	fctx->iter = *fctx->pos;
+	for (i = 0; i < num_img_extents; i++) {
+		ret = ceph_file_to_extents(&rbd_dev->layout,
+					   img_extents[i].fe_off,
+					   img_extents[i].fe_len,
+					   &img_req->object_extents,
+					   alloc_object_extent, img_req,
+					   fctx->count_fn, &fctx->iter);
+		if (ret)
+			return ret;
 	}
 
-	if (img_result)
-		goto out_err;
+	for_each_obj_request(img_req, obj_req) {
+		obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
+					      sizeof(*obj_req->bvec_pos.bvecs),
+					      GFP_NOIO);
+		if (!obj_req->bvec_pos.bvecs)
+			return -ENOMEM;
+	}
 
 	/*
-	 * The original osd request is of no use to use any more.
-	 * We need a new one that can hold the three ops in a copyup
-	 * request.  Allocate the new copyup osd request for the
-	 * original request, and release the old one.
+	 * Fill in each object request's private bio_vec array, splitting and
+	 * rearranging the provided bio_vecs in stripe unit chunks as needed.
 	 */
-	img_result = -ENOMEM;
-	osd_req = rbd_osd_req_create_copyup(orig_request);
-	if (!osd_req)
-		goto out_err;
-	rbd_osd_req_destroy(orig_request->osd_req);
-	orig_request->osd_req = osd_req;
-	orig_request->copyup_pages = pages;
-	orig_request->copyup_page_count = page_count;
+	fctx->iter = *fctx->pos;
+	for (i = 0; i < num_img_extents; i++) {
+		ret = ceph_iterate_extents(&rbd_dev->layout,
+					   img_extents[i].fe_off,
+					   img_extents[i].fe_len,
+					   &img_req->object_extents,
+					   fctx->copy_fn, &fctx->iter);
+		if (ret)
+			return ret;
+	}
 
-	/* Initialize the copyup op */
+	return __rbd_img_fill_request(img_req);
+}
 
-	osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
-	osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0,
-						false, false);
+static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
+			       u64 off, u64 len)
+{
+	struct ceph_file_extent ex = { off, len };
+	union rbd_img_fill_iter dummy;
+	struct rbd_img_fill_ctx fctx = {
+		.pos_type = OBJ_REQUEST_NODATA,
+		.pos = &dummy,
+	};
 
-	/* Add the other op(s) */
+	return rbd_img_fill_request(img_req, &ex, 1, &fctx);
+}
 
-	op_type = rbd_img_request_op_type(orig_request->img_request);
-	rbd_img_obj_request_fill(orig_request, osd_req, op_type, 1);
+static void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bio_iter *it = arg;
 
-	/* All set, send it off. */
+	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+	obj_req->bio_pos = *it;
+	ceph_bio_iter_advance(it, bytes);
+}
 
-	rbd_obj_request_submit(orig_request);
-	return;
+static void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bio_iter *it = arg;
+
+	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+	ceph_bio_iter_advance_step(it, bytes, ({
+		obj_req->bvec_count++;
+	}));
 
-out_err:
-	ceph_release_page_vector(pages, page_count);
-	rbd_obj_request_error(orig_request, img_result);
 }
 
-/*
- * Read from the parent image the range of data that covers the
- * entire target of the given object request.  This is used for
- * satisfying a layered image write request when the target of an
- * object request from the image request does not exist.
- *
- * A page array big enough to hold the returned data is allocated
- * and supplied to rbd_img_request_fill() as the "data descriptor."
- * When the read completes, this page array will be transferred to
- * the original object request for the copyup operation.
- *
- * If an error occurs, it is recorded as the result of the original
- * object request in rbd_img_obj_exists_callback().
- */
-static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
-{
-	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-	struct rbd_img_request *parent_request = NULL;
-	u64 img_offset;
-	u64 length;
-	struct page **pages = NULL;
-	u32 page_count;
-	int result;
+static void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bio_iter *it = arg;
 
-	rbd_assert(rbd_dev->parent != NULL);
+	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+	ceph_bio_iter_advance_step(it, bytes, ({
+		obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
+		obj_req->bvec_pos.iter.bi_size += bv.bv_len;
+	}));
+}
 
-	/*
-	 * Determine the byte range covered by the object in the
-	 * child image to which the original request was to be sent.
-	 */
-	img_offset = obj_request->img_offset - obj_request->offset;
-	length = rbd_obj_bytes(&rbd_dev->header);
+static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
+				   struct ceph_file_extent *img_extents,
+				   u32 num_img_extents,
+				   struct ceph_bio_iter *bio_pos)
+{
+	struct rbd_img_fill_ctx fctx = {
+		.pos_type = OBJ_REQUEST_BIO,
+		.pos = (union rbd_img_fill_iter *)bio_pos,
+		.set_pos_fn = set_bio_pos,
+		.count_fn = count_bio_bvecs,
+		.copy_fn = copy_bio_bvecs,
+	};
 
-	/*
-	 * There is no defined parent data beyond the parent
-	 * overlap, so limit what we read at that boundary if
-	 * necessary.
-	 */
-	if (img_offset + length > rbd_dev->parent_overlap) {
-		rbd_assert(img_offset < rbd_dev->parent_overlap);
-		length = rbd_dev->parent_overlap - img_offset;
-	}
+	return rbd_img_fill_request(img_req, img_extents, num_img_extents,
+				    &fctx);
+}
 
-	/*
-	 * Allocate a page array big enough to receive the data read
-	 * from the parent.
-	 */
-	page_count = (u32)calc_pages_for(0, length);
-	pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
-	if (IS_ERR(pages)) {
-		result = PTR_ERR(pages);
-		pages = NULL;
-		goto out_err;
-	}
+static int rbd_img_fill_from_bio(struct rbd_img_request *img_req,
+				 u64 off, u64 len, struct bio *bio)
+{
+	struct ceph_file_extent ex = { off, len };
+	struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter };
 
-	result = -ENOMEM;
-	parent_request = rbd_parent_request_create(obj_request,
-						img_offset, length);
-	if (!parent_request)
-		goto out_err;
+	return __rbd_img_fill_from_bio(img_req, &ex, 1, &it);
+}
 
-	result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
-	if (result)
-		goto out_err;
+static void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bvec_iter *it = arg;
 
-	parent_request->copyup_pages = pages;
-	parent_request->copyup_page_count = page_count;
-	parent_request->callback = rbd_img_obj_parent_read_full_callback;
+	obj_req->bvec_pos = *it;
+	ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes);
+	ceph_bvec_iter_advance(it, bytes);
+}
 
-	result = rbd_img_request_submit(parent_request);
-	if (!result)
-		return 0;
+static void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bvec_iter *it = arg;
 
-	parent_request->copyup_pages = NULL;
-	parent_request->copyup_page_count = 0;
-out_err:
-	if (pages)
-		ceph_release_page_vector(pages, page_count);
-	if (parent_request)
-		rbd_img_request_put(parent_request);
-	return result;
+	ceph_bvec_iter_advance_step(it, bytes, ({
+		obj_req->bvec_count++;
+	}));
 }
 
-static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
+static void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
 {
-	struct rbd_obj_request *orig_request;
-	struct rbd_device *rbd_dev;
-	int result;
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bvec_iter *it = arg;
 
-	rbd_assert(!obj_request_img_data_test(obj_request));
+	ceph_bvec_iter_advance_step(it, bytes, ({
+		obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
+		obj_req->bvec_pos.iter.bi_size += bv.bv_len;
+	}));
+}
 
-	/*
-	 * All we need from the object request is the original
-	 * request and the result of the STAT op.  Grab those, then
-	 * we're done with the request.
-	 */
-	orig_request = obj_request->obj_request;
-	obj_request->obj_request = NULL;
-	rbd_obj_request_put(orig_request);
-	rbd_assert(orig_request);
-	rbd_assert(orig_request->img_request);
-
-	result = obj_request->result;
-	obj_request->result = 0;
-
-	dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
-		obj_request, orig_request, result,
-		obj_request->xferred, obj_request->length);
-	rbd_obj_request_put(obj_request);
+static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
+				     struct ceph_file_extent *img_extents,
+				     u32 num_img_extents,
+				     struct ceph_bvec_iter *bvec_pos)
+{
+	struct rbd_img_fill_ctx fctx = {
+		.pos_type = OBJ_REQUEST_BVECS,
+		.pos = (union rbd_img_fill_iter *)bvec_pos,
+		.set_pos_fn = set_bvec_pos,
+		.count_fn = count_bvecs,
+		.copy_fn = copy_bvecs,
+	};
 
-	/*
-	 * If the overlap has become 0 (most likely because the
-	 * image has been flattened) we need to re-submit the
-	 * original request.
-	 */
-	rbd_dev = orig_request->img_request->rbd_dev;
-	if (!rbd_dev->parent_overlap) {
-		rbd_obj_request_submit(orig_request);
-		return;
-	}
+	return rbd_img_fill_request(img_req, img_extents, num_img_extents,
+				    &fctx);
+}
 
-	/*
-	 * Our only purpose here is to determine whether the object
-	 * exists, and we don't want to treat the non-existence as
-	 * an error.  If something else comes back, transfer the
-	 * error to the original request and complete it now.
-	 */
-	if (!result) {
-		obj_request_existence_set(orig_request, true);
-	} else if (result == -ENOENT) {
-		obj_request_existence_set(orig_request, false);
-	} else {
-		goto fail_orig_request;
-	}
+static int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
+				   struct ceph_file_extent *img_extents,
+				   u32 num_img_extents,
+				   struct bio_vec *bvecs)
+{
+	struct ceph_bvec_iter it = {
+		.bvecs = bvecs,
+		.iter = { .bi_size = ceph_file_extents_bytes(img_extents,
+							     num_img_extents) },
+	};
 
-	/*
-	 * Resubmit the original request now that we have recorded
-	 * whether the target object exists.
-	 */
-	result = rbd_img_obj_request_submit(orig_request);
-	if (result)
-		goto fail_orig_request;
+	return __rbd_img_fill_from_bvecs(img_req, img_extents, num_img_extents,
+					 &it);
+}
 
-	return;
+static void rbd_img_request_submit(struct rbd_img_request *img_request)
+{
+	struct rbd_obj_request *obj_request;
+
+	dout("%s: img %p\n", __func__, img_request);
+
+	rbd_img_request_get(img_request);
+	for_each_obj_request(img_request, obj_request)
+		rbd_obj_request_submit(obj_request);
 
-fail_orig_request:
-	rbd_obj_request_error(orig_request, result);
+	rbd_img_request_put(img_request);
 }
 
-static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
 {
-	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-	struct rbd_obj_request *stat_request;
-	struct page **pages;
-	u32 page_count;
-	size_t size;
+	struct rbd_img_request *img_req = obj_req->img_request;
+	struct rbd_img_request *child_img_req;
 	int ret;
 
-	stat_request = rbd_obj_request_create(OBJ_REQUEST_PAGES);
-	if (!stat_request)
+	child_img_req = rbd_img_request_create(img_req->rbd_dev->parent,
+					       OBJ_OP_READ, NULL);
+	if (!child_img_req)
 		return -ENOMEM;
 
-	stat_request->object_no = obj_request->object_no;
+	__set_bit(IMG_REQ_CHILD, &child_img_req->flags);
+	child_img_req->obj_request = obj_req;
 
-	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-						   stat_request);
-	if (!stat_request->osd_req) {
-		ret = -ENOMEM;
-		goto fail_stat_request;
+	if (!rbd_img_is_write(img_req)) {
+		switch (img_req->data_type) {
+		case OBJ_REQUEST_BIO:
+			ret = __rbd_img_fill_from_bio(child_img_req,
+						      obj_req->img_extents,
+						      obj_req->num_img_extents,
+						      &obj_req->bio_pos);
+			break;
+		case OBJ_REQUEST_BVECS:
+		case OBJ_REQUEST_OWN_BVECS:
+			ret = __rbd_img_fill_from_bvecs(child_img_req,
+						      obj_req->img_extents,
+						      obj_req->num_img_extents,
+						      &obj_req->bvec_pos);
+			break;
+		default:
+			rbd_assert(0);
+		}
+	} else {
+		ret = rbd_img_fill_from_bvecs(child_img_req,
+					      obj_req->img_extents,
+					      obj_req->num_img_extents,
+					      obj_req->copyup_bvecs);
+	}
+	if (ret) {
+		rbd_img_request_put(child_img_req);
+		return ret;
+	}
+
+	rbd_img_request_submit(child_img_req);
+	return 0;
+}
+
+static bool rbd_obj_handle_read(struct rbd_obj_request *obj_req)
+{
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	int ret;
+
+	if (obj_req->result == -ENOENT &&
+	    rbd_dev->parent_overlap && !obj_req->tried_parent) {
+		/* reverse map this object extent onto the parent */
+		ret = rbd_obj_calc_img_extents(obj_req, false);
+		if (ret) {
+			obj_req->result = ret;
+			return true;
+		}
+
+		if (obj_req->num_img_extents) {
+			obj_req->tried_parent = true;
+			ret = rbd_obj_read_from_parent(obj_req);
+			if (ret) {
+				obj_req->result = ret;
+				return true;
+			}
+			return false;
+		}
 	}
 
 	/*
-	 * The response data for a STAT call consists of:
-	 *     le64 length;
-	 *     struct {
-	 *         le32 tv_sec;
-	 *         le32 tv_nsec;
-	 *     } mtime;
+	 * -ENOENT means a hole in the image -- zero-fill the entire
+	 * length of the request.  A short read also implies zero-fill
+	 * to the end of the request.  In both cases we update xferred
+	 * count to indicate the whole request was satisfied.
 	 */
-	size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
-	page_count = (u32)calc_pages_for(0, size);
-	pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
-	if (IS_ERR(pages)) {
-		ret = PTR_ERR(pages);
-		goto fail_stat_request;
+	if (obj_req->result == -ENOENT ||
+	    (!obj_req->result && obj_req->xferred < obj_req->ex.oe_len)) {
+		rbd_assert(!obj_req->xferred || !obj_req->result);
+		rbd_obj_zero_range(obj_req, obj_req->xferred,
+				   obj_req->ex.oe_len - obj_req->xferred);
+		obj_req->result = 0;
+		obj_req->xferred = obj_req->ex.oe_len;
 	}
 
-	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
-	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
-				     false, false);
-
-	rbd_obj_request_get(obj_request);
-	stat_request->obj_request = obj_request;
-	stat_request->pages = pages;
-	stat_request->page_count = page_count;
-	stat_request->callback = rbd_img_obj_exists_callback;
+	return true;
+}
 
-	rbd_obj_request_submit(stat_request);
-	return 0;
+/*
+ * copyup_bvecs pages are never highmem pages
+ */
+static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
+{
+	struct ceph_bvec_iter it = {
+		.bvecs = bvecs,
+		.iter = { .bi_size = bytes },
+	};
 
-fail_stat_request:
-	rbd_obj_request_put(stat_request);
-	return ret;
+	ceph_bvec_iter_advance_step(&it, bytes, ({
+		if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0,
+			       bv.bv_len))
+			return false;
+	}));
+	return true;
 }
 
-static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
+static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
 {
-	struct rbd_img_request *img_request = obj_request->img_request;
-	struct rbd_device *rbd_dev = img_request->rbd_dev;
+	unsigned int num_osd_ops = obj_req->osd_req->r_num_ops;
 
-	/* Reads */
-	if (!img_request_write_test(img_request) &&
-	    !img_request_discard_test(img_request))
-		return true;
-
-	/* Non-layered writes */
-	if (!img_request_layered_test(img_request))
-		return true;
+	dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
+	rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
+	rbd_osd_req_destroy(obj_req->osd_req);
 
 	/*
-	 * Layered writes outside of the parent overlap range don't
-	 * share any data with the parent.
+	 * Create a copyup request with the same number of OSD ops as
+	 * the original request.  The original request was stat + op(s),
+	 * the new copyup request will be copyup + the same op(s).
 	 */
-	if (!obj_request_overlaps_parent(obj_request))
-		return true;
+	obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
 	/*
-	 * Entire-object layered writes - we will overwrite whatever
-	 * parent data there is anyway.
+	 * Only send non-zero copyup data to save some I/O and network
+	 * bandwidth -- zero copyup data is equivalent to the object not
+	 * existing.
 	 */
-	if (!obj_request->offset &&
-	    obj_request->length == rbd_obj_bytes(&rbd_dev->header))
-		return true;
+	if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) {
+		dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
+		bytes = 0;
+	}
 
-	/*
-	 * If the object is known to already exist, its parent data has
-	 * already been copied.
-	 */
-	if (obj_request_known_test(obj_request) &&
-	    obj_request_exists_test(obj_request))
-		return true;
+	osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
+			    "copyup");
+	osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
+					  obj_req->copyup_bvecs, bytes);
+
+	switch (obj_req->img_request->op_type) {
+	case OBJ_OP_WRITE:
+		__rbd_obj_setup_write(obj_req, 1);
+		break;
+	case OBJ_OP_DISCARD:
+		rbd_assert(!rbd_obj_is_entire(obj_req));
+		__rbd_obj_setup_discard(obj_req, 1);
+		break;
+	default:
+		rbd_assert(0);
+	}
 
-	return false;
+	rbd_obj_request_submit(obj_req);
+	return 0;
 }
 
-static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
+static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
 {
-	rbd_assert(obj_request_img_data_test(obj_request));
-	rbd_assert(obj_request_type_valid(obj_request->type));
-	rbd_assert(obj_request->img_request);
+	u32 i;
 
-	if (img_obj_request_simple(obj_request)) {
-		rbd_obj_request_submit(obj_request);
-		return 0;
-	}
+	rbd_assert(!obj_req->copyup_bvecs);
+	obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap);
+	obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count,
+					sizeof(*obj_req->copyup_bvecs),
+					GFP_NOIO);
+	if (!obj_req->copyup_bvecs)
+		return -ENOMEM;
 
-	/*
-	 * It's a layered write.  The target object might exist but
-	 * we may not know that yet.  If we know it doesn't exist,
-	 * start by reading the data for the full target object from
-	 * the parent so we can use it for a copyup to the target.
-	 */
-	if (obj_request_known_test(obj_request))
-		return rbd_img_obj_parent_read_full(obj_request);
+	for (i = 0; i < obj_req->copyup_bvec_count; i++) {
+		unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
 
-	/* We don't know whether the target exists.  Go find out. */
+		obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
+		if (!obj_req->copyup_bvecs[i].bv_page)
+			return -ENOMEM;
+
+		obj_req->copyup_bvecs[i].bv_offset = 0;
+		obj_req->copyup_bvecs[i].bv_len = len;
+		obj_overlap -= len;
+	}
 
-	return rbd_img_obj_exists_submit(obj_request);
+	rbd_assert(!obj_overlap);
+	return 0;
 }
 
-static int rbd_img_request_submit(struct rbd_img_request *img_request)
+static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
 {
-	struct rbd_obj_request *obj_request;
-	struct rbd_obj_request *next_obj_request;
-	int ret = 0;
-
-	dout("%s: img %p\n", __func__, img_request);
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	int ret;
 
-	rbd_img_request_get(img_request);
-	for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
-		ret = rbd_img_obj_request_submit(obj_request);
-		if (ret)
-			goto out_put_ireq;
+	rbd_assert(obj_req->num_img_extents);
+	prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
+		      rbd_dev->parent_overlap);
+	if (!obj_req->num_img_extents) {
+		/*
+		 * The overlap has become 0 (most likely because the
+		 * image has been flattened).  Use rbd_obj_issue_copyup()
+		 * to re-submit the original write request -- the copyup
+		 * operation itself will be a no-op, since someone must
+		 * have populated the child object while we weren't
+		 * looking.  Move to WRITE_FLAT state as we'll be done
+		 * with the operation once the null copyup completes.
+		 */
+		obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+		return rbd_obj_issue_copyup(obj_req, 0);
 	}
 
-out_put_ireq:
-	rbd_img_request_put(img_request);
-	return ret;
+	ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
+	if (ret)
+		return ret;
+
+	obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
+	return rbd_obj_read_from_parent(obj_req);
 }
 
-static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
+static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req)
 {
-	struct rbd_obj_request *obj_request;
-	struct rbd_device *rbd_dev;
-	u64 obj_end;
-	u64 img_xferred;
-	int img_result;
+	int ret;
 
-	rbd_assert(img_request_child_test(img_request));
+again:
+	switch (obj_req->write_state) {
+	case RBD_OBJ_WRITE_GUARD:
+		rbd_assert(!obj_req->xferred);
+		if (obj_req->result == -ENOENT) {
+			/*
+			 * The target object doesn't exist.  Read the data for
+			 * the entire target object up to the overlap point (if
+			 * any) from the parent, so we can use it for a copyup.
+			 */
+			ret = rbd_obj_handle_write_guard(obj_req);
+			if (ret) {
+				obj_req->result = ret;
+				return true;
+			}
+			return false;
+		}
+		/* fall through */
+	case RBD_OBJ_WRITE_FLAT:
+		if (!obj_req->result)
+			/*
+			 * There is no such thing as a successful short
+			 * write -- indicate the whole request was satisfied.
+			 */
+			obj_req->xferred = obj_req->ex.oe_len;
+		return true;
+	case RBD_OBJ_WRITE_COPYUP:
+		obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+		if (obj_req->result)
+			goto again;
 
-	/* First get what we need from the image request and release it */
+		rbd_assert(obj_req->xferred);
+		ret = rbd_obj_issue_copyup(obj_req, obj_req->xferred);
+		if (ret) {
+			obj_req->result = ret;
+			return true;
+		}
+		return false;
+	default:
+		rbd_assert(0);
+	}
+}
 
-	obj_request = img_request->obj_request;
-	img_xferred = img_request->xferred;
-	img_result = img_request->result;
-	rbd_img_request_put(img_request);
+/*
+ * Returns true if @obj_req is completed, or false otherwise.
+ */
+static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+{
+	switch (obj_req->img_request->op_type) {
+	case OBJ_OP_READ:
+		return rbd_obj_handle_read(obj_req);
+	case OBJ_OP_WRITE:
+		return rbd_obj_handle_write(obj_req);
+	case OBJ_OP_DISCARD:
+		if (rbd_obj_handle_write(obj_req)) {
+			/*
+			 * Hide -ENOENT from delete/truncate/zero -- discarding
+			 * a non-existent object is not a problem.
+			 */
+			if (obj_req->result == -ENOENT) {
+				obj_req->result = 0;
+				obj_req->xferred = obj_req->ex.oe_len;
+			}
+			return true;
+		}
+		return false;
+	default:
+		rbd_assert(0);
+	}
+}
 
-	/*
-	 * If the overlap has become 0 (most likely because the
-	 * image has been flattened) we need to re-submit the
-	 * original request.
-	 */
-	rbd_assert(obj_request);
-	rbd_assert(obj_request->img_request);
-	rbd_dev = obj_request->img_request->rbd_dev;
-	if (!rbd_dev->parent_overlap) {
-		rbd_obj_request_submit(obj_request);
+static void rbd_obj_end_request(struct rbd_obj_request *obj_req)
+{
+	struct rbd_img_request *img_req = obj_req->img_request;
+
+	rbd_assert((!obj_req->result &&
+		    obj_req->xferred == obj_req->ex.oe_len) ||
+		   (obj_req->result < 0 && !obj_req->xferred));
+	if (!obj_req->result) {
+		img_req->xferred += obj_req->xferred;
 		return;
 	}
 
-	obj_request->result = img_result;
-	if (obj_request->result)
-		goto out;
+	rbd_warn(img_req->rbd_dev,
+		 "%s at objno %llu %llu~%llu result %d xferred %llu",
+		 obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
+		 obj_req->ex.oe_off, obj_req->ex.oe_len, obj_req->result,
+		 obj_req->xferred);
+	if (!img_req->result) {
+		img_req->result = obj_req->result;
+		img_req->xferred = 0;
+	}
+}
 
-	/*
-	 * We need to zero anything beyond the parent overlap
-	 * boundary.  Since rbd_img_obj_request_read_callback()
-	 * will zero anything beyond the end of a short read, an
-	 * easy way to do this is to pretend the data from the
-	 * parent came up short--ending at the overlap boundary.
-	 */
-	rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
-	obj_end = obj_request->img_offset + obj_request->length;
-	if (obj_end > rbd_dev->parent_overlap) {
-		u64 xferred = 0;
+static void rbd_img_end_child_request(struct rbd_img_request *img_req)
+{
+	struct rbd_obj_request *obj_req = img_req->obj_request;
 
-		if (obj_request->img_offset < rbd_dev->parent_overlap)
-			xferred = rbd_dev->parent_overlap -
-					obj_request->img_offset;
+	rbd_assert(test_bit(IMG_REQ_CHILD, &img_req->flags));
+	rbd_assert((!img_req->result &&
+		    img_req->xferred == rbd_obj_img_extents_bytes(obj_req)) ||
+		   (img_req->result < 0 && !img_req->xferred));
 
-		obj_request->xferred = min(img_xferred, xferred);
-	} else {
-		obj_request->xferred = img_xferred;
-	}
-out:
-	rbd_img_obj_request_read_callback(obj_request);
-	rbd_obj_request_complete(obj_request);
+	obj_req->result = img_req->result;
+	obj_req->xferred = img_req->xferred;
+	rbd_img_request_put(img_req);
 }
 
-static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
+static void rbd_img_end_request(struct rbd_img_request *img_req)
 {
-	struct rbd_img_request *img_request;
-	int result;
+	rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
+	rbd_assert((!img_req->result &&
+		    img_req->xferred == blk_rq_bytes(img_req->rq)) ||
+		   (img_req->result < 0 && !img_req->xferred));
 
-	rbd_assert(obj_request_img_data_test(obj_request));
-	rbd_assert(obj_request->img_request != NULL);
-	rbd_assert(obj_request->result == (s32) -ENOENT);
-	rbd_assert(obj_request_type_valid(obj_request->type));
+	blk_mq_end_request(img_req->rq,
+			   errno_to_blk_status(img_req->result));
+	rbd_img_request_put(img_req);
+}
 
-	/* rbd_read_finish(obj_request, obj_request->length); */
-	img_request = rbd_parent_request_create(obj_request,
-						obj_request->img_offset,
-						obj_request->length);
-	result = -ENOMEM;
-	if (!img_request)
-		goto out_err;
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+{
+	struct rbd_img_request *img_req;
 
-	if (obj_request->type == OBJ_REQUEST_BIO)
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
-						obj_request->bio_list);
-	else
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
-						obj_request->pages);
-	if (result)
-		goto out_err;
+again:
+	if (!__rbd_obj_handle_request(obj_req))
+		return;
 
-	img_request->callback = rbd_img_parent_read_callback;
-	result = rbd_img_request_submit(img_request);
-	if (result)
-		goto out_err;
+	img_req = obj_req->img_request;
+	spin_lock(&img_req->completion_lock);
+	rbd_obj_end_request(obj_req);
+	rbd_assert(img_req->pending_count);
+	if (--img_req->pending_count) {
+		spin_unlock(&img_req->completion_lock);
+		return;
+	}
 
-	return;
-out_err:
-	if (img_request)
-		rbd_img_request_put(img_request);
-	obj_request->result = result;
-	obj_request->xferred = 0;
-	obj_request_done_set(obj_request);
+	spin_unlock(&img_req->completion_lock);
+	if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
+		obj_req = img_req->obj_request;
+		rbd_img_end_child_request(img_req);
+		goto again;
+	}
+	rbd_img_end_request(img_req);
 }
 
 static const struct rbd_client_id rbd_empty_cid;
@@ -3091,8 +2674,8 @@ static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_client_id cid = rbd_get_cid(rbd_dev);
-	int buf_size = 4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN;
-	char buf[buf_size];
+	char buf[4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN];
+	int buf_size = sizeof(buf);
 	void *p = buf;
 
 	dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op);
@@ -3610,8 +3193,8 @@ static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
 				     u64 notify_id, u64 cookie, s32 *result)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
-	int buf_size = 4 + CEPH_ENCODING_START_BLK_LEN;
-	char buf[buf_size];
+	char buf[4 + CEPH_ENCODING_START_BLK_LEN];
+	int buf_size = sizeof(buf);
 	int ret;
 
 	if (result) {
@@ -3887,7 +3470,7 @@ static void rbd_reregister_watch(struct work_struct *work)
 
 	ret = rbd_dev_refresh(rbd_dev);
 	if (ret)
-		rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
+		rbd_warn(rbd_dev, "reregistration refresh failed: %d", ret);
 }
 
 /*
@@ -4070,8 +3653,7 @@ static void rbd_queue_workfn(struct work_struct *work)
 		}
 	}
 
-	img_request = rbd_img_request_create(rbd_dev, offset, length, op_type,
-					     snapc);
+	img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
 	if (!img_request) {
 		result = -ENOMEM;
 		goto err_unlock;
@@ -4080,18 +3662,14 @@ static void rbd_queue_workfn(struct work_struct *work)
 	snapc = NULL; /* img_request consumes a ref */
 
 	if (op_type == OBJ_OP_DISCARD)
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
-					      NULL);
+		result = rbd_img_fill_nodata(img_request, offset, length);
 	else
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
-					      rq->bio);
-	if (result)
-		goto err_img_request;
-
-	result = rbd_img_request_submit(img_request);
+		result = rbd_img_fill_from_bio(img_request, offset, length,
+					       rq->bio);
 	if (result)
 		goto err_img_request;
 
+	rbd_img_request_submit(img_request);
 	if (must_be_locked)
 		up_read(&rbd_dev->lock_rwsem);
 	return;
@@ -4369,7 +3947,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
 	q->limits.max_sectors = queue_max_hw_sectors(q);
 	blk_queue_max_segments(q, USHRT_MAX);
-	blk_queue_max_segment_size(q, segment_size);
+	blk_queue_max_segment_size(q, UINT_MAX);
 	blk_queue_io_min(q, segment_size);
 	blk_queue_io_opt(q, segment_size);
 
@@ -5057,9 +4635,6 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 	} __attribute__ ((packed)) striping_info_buf = { 0 };
 	size_t size = sizeof (striping_info_buf);
 	void *p;
-	u64 obj_size;
-	u64 stripe_unit;
-	u64 stripe_count;
 	int ret;
 
 	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
@@ -5071,31 +4646,9 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 	if (ret < size)
 		return -ERANGE;
 
-	/*
-	 * We don't actually support the "fancy striping" feature
-	 * (STRIPINGV2) yet, but if the striping sizes are the
-	 * defaults the behavior is the same as before.  So find
-	 * out, and only fail if the image has non-default values.
-	 */
-	ret = -EINVAL;
-	obj_size = rbd_obj_bytes(&rbd_dev->header);
 	p = &striping_info_buf;
-	stripe_unit = ceph_decode_64(&p);
-	if (stripe_unit != obj_size) {
-		rbd_warn(rbd_dev, "unsupported stripe unit "
-				"(got %llu want %llu)",
-				stripe_unit, obj_size);
-		return -EINVAL;
-	}
-	stripe_count = ceph_decode_64(&p);
-	if (stripe_count != 1) {
-		rbd_warn(rbd_dev, "unsupported stripe count "
-				"(got %llu want 1)", stripe_count);
-		return -EINVAL;
-	}
-	rbd_dev->header.stripe_unit = stripe_unit;
-	rbd_dev->header.stripe_count = stripe_count;
-
+	rbd_dev->header.stripe_unit = ceph_decode_64(&p);
+	rbd_dev->header.stripe_count = ceph_decode_64(&p);
 	return 0;
 }
 
@@ -5653,39 +5206,6 @@ out_err:
 	return ret;
 }
 
-/*
- * Return pool id (>= 0) or a negative error code.
- */
-static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
-{
-	struct ceph_options *opts = rbdc->client->options;
-	u64 newest_epoch;
-	int tries = 0;
-	int ret;
-
-again:
-	ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
-	if (ret == -ENOENT && tries++ < 1) {
-		ret = ceph_monc_get_version(&rbdc->client->monc, "osdmap",
-					    &newest_epoch);
-		if (ret < 0)
-			return ret;
-
-		if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
-			ceph_osdc_maybe_request_map(&rbdc->client->osdc);
-			(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
-						     newest_epoch,
-						     opts->mount_timeout);
-			goto again;
-		} else {
-			/* the osdmap we have is new enough */
-			return -ENOENT;
-		}
-	}
-
-	return ret;
-}
-
 static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
 {
 	down_write(&rbd_dev->lock_rwsem);
@@ -6114,7 +5634,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	}
 
 	/* pick the pool */
-	rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
+	rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name);
 	if (rc < 0) {
 		if (rc == -ENOENT)
 			pr_info("pool %s does not exist\n", spec->pool_name);
@@ -6366,16 +5886,8 @@ static int rbd_slab_init(void)
 	if (!rbd_obj_request_cache)
 		goto out_err;
 
-	rbd_assert(!rbd_bio_clone);
-	rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (!rbd_bio_clone)
-		goto out_err_clone;
-
 	return 0;
 
-out_err_clone:
-	kmem_cache_destroy(rbd_obj_request_cache);
-	rbd_obj_request_cache = NULL;
 out_err:
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
@@ -6391,10 +5903,6 @@ static void rbd_slab_exit(void)
 	rbd_assert(rbd_img_request_cache);
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
-
-	rbd_assert(rbd_bio_clone);
-	bioset_free(rbd_bio_clone);
-	rbd_bio_clone = NULL;
 }
 
 static int __init rbd_init(void)
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 0c858d027bf3..57dc546628b5 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -809,89 +809,6 @@ static __poll_t rtc_poll(struct file *file, poll_table *wait)
 }
 #endif
 
-int rtc_register(rtc_task_t *task)
-{
-#ifndef RTC_IRQ
-	return -EIO;
-#else
-	if (task == NULL || task->func == NULL)
-		return -EINVAL;
-	spin_lock_irq(&rtc_lock);
-	if (rtc_status & RTC_IS_OPEN) {
-		spin_unlock_irq(&rtc_lock);
-		return -EBUSY;
-	}
-	spin_lock(&rtc_task_lock);
-	if (rtc_callback) {
-		spin_unlock(&rtc_task_lock);
-		spin_unlock_irq(&rtc_lock);
-		return -EBUSY;
-	}
-	rtc_status |= RTC_IS_OPEN;
-	rtc_callback = task;
-	spin_unlock(&rtc_task_lock);
-	spin_unlock_irq(&rtc_lock);
-	return 0;
-#endif
-}
-EXPORT_SYMBOL(rtc_register);
-
-int rtc_unregister(rtc_task_t *task)
-{
-#ifndef RTC_IRQ
-	return -EIO;
-#else
-	unsigned char tmp;
-
-	spin_lock_irq(&rtc_lock);
-	spin_lock(&rtc_task_lock);
-	if (rtc_callback != task) {
-		spin_unlock(&rtc_task_lock);
-		spin_unlock_irq(&rtc_lock);
-		return -ENXIO;
-	}
-	rtc_callback = NULL;
-
-	/* disable controls */
-	if (!hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE)) {
-		tmp = CMOS_READ(RTC_CONTROL);
-		tmp &= ~RTC_PIE;
-		tmp &= ~RTC_AIE;
-		tmp &= ~RTC_UIE;
-		CMOS_WRITE(tmp, RTC_CONTROL);
-		CMOS_READ(RTC_INTR_FLAGS);
-	}
-	if (rtc_status & RTC_TIMER_ON) {
-		rtc_status &= ~RTC_TIMER_ON;
-		del_timer(&rtc_irq_timer);
-	}
-	rtc_status &= ~RTC_IS_OPEN;
-	spin_unlock(&rtc_task_lock);
-	spin_unlock_irq(&rtc_lock);
-	return 0;
-#endif
-}
-EXPORT_SYMBOL(rtc_unregister);
-
-int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
-{
-#ifndef RTC_IRQ
-	return -EIO;
-#else
-	unsigned long flags;
-	if (cmd != RTC_PIE_ON && cmd != RTC_PIE_OFF && cmd != RTC_IRQP_SET)
-		return -EINVAL;
-	spin_lock_irqsave(&rtc_task_lock, flags);
-	if (rtc_callback != task) {
-		spin_unlock_irqrestore(&rtc_task_lock, flags);
-		return -ENXIO;
-	}
-	spin_unlock_irqrestore(&rtc_task_lock, flags);
-	return rtc_do_ioctl(cmd, arg, 1);
-#endif
-}
-EXPORT_SYMBOL(rtc_control);
-
 /*
  *	The various file operations we support.
  */
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 9e80a953d693..c43a9e28995e 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -369,20 +369,40 @@ err_len:
 	return -EINVAL;
 }
 
-/**
- * tmp_transmit - Internal kernel interface to transmit TPM commands.
- *
- * @chip: TPM chip to use
- * @buf: TPM command buffer
- * @bufsiz: length of the TPM command buffer
- * @flags: tpm transmit flags - bitmap
- *
- * Return:
- *     0 when the operation is successful.
- *     A negative number for system errors (errno).
- */
-ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
-		     u8 *buf, size_t bufsiz, unsigned int flags)
+static int tpm_request_locality(struct tpm_chip *chip)
+{
+	int rc;
+
+	if (!chip->ops->request_locality)
+		return 0;
+
+	rc = chip->ops->request_locality(chip, 0);
+	if (rc < 0)
+		return rc;
+
+	chip->locality = rc;
+
+	return 0;
+}
+
+static void tpm_relinquish_locality(struct tpm_chip *chip)
+{
+	int rc;
+
+	if (!chip->ops->relinquish_locality)
+		return;
+
+	rc = chip->ops->relinquish_locality(chip, chip->locality);
+	if (rc)
+		dev_err(&chip->dev, "%s: : error %d\n", __func__, rc);
+
+	chip->locality = -1;
+}
+
+static ssize_t tpm_try_transmit(struct tpm_chip *chip,
+				struct tpm_space *space,
+				u8 *buf, size_t bufsiz,
+				unsigned int flags)
 {
 	struct tpm_output_header *header = (void *)buf;
 	int rc;
@@ -422,8 +442,6 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
 	if (!(flags & TPM_TRANSMIT_UNLOCKED))
 		mutex_lock(&chip->tpm_mutex);
 
-	if (chip->dev.parent)
-		pm_runtime_get_sync(chip->dev.parent);
 
 	if (chip->ops->clk_enable != NULL)
 		chip->ops->clk_enable(chip, true);
@@ -431,19 +449,20 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
 	/* Store the decision as chip->locality will be changed. */
 	need_locality = chip->locality == -1;
 
-	if (!(flags & TPM_TRANSMIT_RAW) &&
-	    need_locality && chip->ops->request_locality)  {
-		rc = chip->ops->request_locality(chip, 0);
+	if (!(flags & TPM_TRANSMIT_RAW) && need_locality) {
+		rc = tpm_request_locality(chip);
 		if (rc < 0)
 			goto out_no_locality;
-		chip->locality = rc;
 	}
 
+	if (chip->dev.parent)
+		pm_runtime_get_sync(chip->dev.parent);
+
 	rc = tpm2_prepare_space(chip, space, ordinal, buf);
 	if (rc)
 		goto out;
 
-	rc = chip->ops->send(chip, (u8 *) buf, count);
+	rc = chip->ops->send(chip, buf, count);
 	if (rc < 0) {
 		if (rc != -EPIPE)
 			dev_err(&chip->dev,
@@ -480,7 +499,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
 	goto out;
 
 out_recv:
-	len = chip->ops->recv(chip, (u8 *) buf, bufsiz);
+	len = chip->ops->recv(chip, buf, bufsiz);
 	if (len < 0) {
 		rc = len;
 		dev_err(&chip->dev,
@@ -499,27 +518,95 @@ out_recv:
 	rc = tpm2_commit_space(chip, space, ordinal, buf, &len);
 
 out:
-	if (need_locality && chip->ops->relinquish_locality) {
-		chip->ops->relinquish_locality(chip, chip->locality);
-		chip->locality = -1;
-	}
+	if (chip->dev.parent)
+		pm_runtime_put_sync(chip->dev.parent);
+
+	if (need_locality)
+		tpm_relinquish_locality(chip);
+
 out_no_locality:
 	if (chip->ops->clk_enable != NULL)
 		chip->ops->clk_enable(chip, false);
 
-	if (chip->dev.parent)
-		pm_runtime_put_sync(chip->dev.parent);
-
 	if (!(flags & TPM_TRANSMIT_UNLOCKED))
 		mutex_unlock(&chip->tpm_mutex);
 	return rc ? rc : len;
 }
 
 /**
- * tmp_transmit_cmd - send a tpm command to the device
+ * tpm_transmit - Internal kernel interface to transmit TPM commands.
+ *
+ * @chip: TPM chip to use
+ * @space: tpm space
+ * @buf: TPM command buffer
+ * @bufsiz: length of the TPM command buffer
+ * @flags: tpm transmit flags - bitmap
+ *
+ * A wrapper around tpm_try_transmit that handles TPM2_RC_RETRY
+ * returns from the TPM and retransmits the command after a delay up
+ * to a maximum wait of TPM2_DURATION_LONG.
+ *
+ * Note: TPM1 never returns TPM2_RC_RETRY so the retry logic is TPM2
+ * only
+ *
+ * Return:
+ *     the length of the return when the operation is successful.
+ *     A negative number for system errors (errno).
+ */
+ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
+		     u8 *buf, size_t bufsiz, unsigned int flags)
+{
+	struct tpm_output_header *header = (struct tpm_output_header *)buf;
+	/* space for header and handles */
+	u8 save[TPM_HEADER_SIZE + 3*sizeof(u32)];
+	unsigned int delay_msec = TPM2_DURATION_SHORT;
+	u32 rc = 0;
+	ssize_t ret;
+	const size_t save_size = min(space ? sizeof(save) : TPM_HEADER_SIZE,
+				     bufsiz);
+	/* the command code is where the return code will be */
+	u32 cc = be32_to_cpu(header->return_code);
+
+	/*
+	 * Subtlety here: if we have a space, the handles will be
+	 * transformed, so when we restore the header we also have to
+	 * restore the handles.
+	 */
+	memcpy(save, buf, save_size);
+
+	for (;;) {
+		ret = tpm_try_transmit(chip, space, buf, bufsiz, flags);
+		if (ret < 0)
+			break;
+		rc = be32_to_cpu(header->return_code);
+		if (rc != TPM2_RC_RETRY && rc != TPM2_RC_TESTING)
+			break;
+		/*
+		 * return immediately if self test returns test
+		 * still running to shorten boot time.
+		 */
+		if (rc == TPM2_RC_TESTING && cc == TPM2_CC_SELF_TEST)
+			break;
+		delay_msec *= 2;
+		if (delay_msec > TPM2_DURATION_LONG) {
+			if (rc == TPM2_RC_RETRY)
+				dev_err(&chip->dev, "in retry loop\n");
+			else
+				dev_err(&chip->dev,
+					"self test is still running\n");
+			break;
+		}
+		tpm_msleep(delay_msec);
+		memcpy(buf, save, save_size);
+	}
+	return ret;
+}
+/**
+ * tpm_transmit_cmd - send a tpm command to the device
  *    The function extracts tpm out header return code
  *
  * @chip: TPM chip to use
+ * @space: tpm space
  * @buf: TPM command buffer
  * @bufsiz: length of the buffer
  * @min_rsp_body_length: minimum expected length of response body
@@ -532,7 +619,7 @@ out_no_locality:
  *     A positive number for a TPM error.
  */
 ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space,
-			 const void *buf, size_t bufsiz,
+			 void *buf, size_t bufsiz,
 			 size_t min_rsp_body_length, unsigned int flags,
 			 const char *desc)
 {
@@ -540,7 +627,7 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space,
 	int err;
 	ssize_t len;
 
-	len = tpm_transmit(chip, space, (u8 *)buf, bufsiz, flags);
+	len = tpm_transmit(chip, space, buf, bufsiz, flags);
 	if (len <  0)
 		return len;
 
@@ -666,6 +753,8 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 		    msecs_to_jiffies(TPM2_DURATION_MEDIUM);
 		chip->duration[TPM_LONG] =
 		    msecs_to_jiffies(TPM2_DURATION_LONG);
+		chip->duration[TPM_LONG_LONG] =
+		    msecs_to_jiffies(TPM2_DURATION_LONG_LONG);
 
 		chip->flags |= TPM_CHIP_FLAG_HAVE_TIMEOUTS;
 		return 0;
@@ -754,6 +843,7 @@ int tpm_get_timeouts(struct tpm_chip *chip)
 		usecs_to_jiffies(be32_to_cpu(cap.duration.tpm_medium));
 	chip->duration[TPM_LONG] =
 		usecs_to_jiffies(be32_to_cpu(cap.duration.tpm_long));
+	chip->duration[TPM_LONG_LONG] = 0; /* not used under 1.2 */
 
 	/* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
 	 * value wrong and apparently reports msecs rather than usecs. So we
@@ -969,6 +1059,10 @@ int tpm_do_selftest(struct tpm_chip *chip)
 	loops = jiffies_to_msecs(duration) / delay_msec;
 
 	rc = tpm_continue_selftest(chip);
+	if (rc == TPM_ERR_INVALID_POSTINIT) {
+		chip->flags |= TPM_CHIP_FLAG_ALWAYS_POWERED;
+		dev_info(&chip->dev, "TPM not ready (%d)\n", rc);
+	}
 	/* This may fail if there was no TPM driver during a suspend/resume
 	 * cycle; some may return 10 (BAD_ORDINAL), others 28 (FAILEDSELFTEST)
 	 */
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index f895fba4e20d..7f2d0f489e9c 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -67,7 +67,9 @@ enum tpm_duration {
 	TPM_SHORT = 0,
 	TPM_MEDIUM = 1,
 	TPM_LONG = 2,
+	TPM_LONG_LONG = 3,
 	TPM_UNDEFINED,
+	TPM_NUM_DURATIONS = TPM_UNDEFINED,
 };
 
 #define TPM_WARN_RETRY          0x800
@@ -79,15 +81,20 @@ enum tpm_duration {
 #define TPM_HEADER_SIZE		10
 
 enum tpm2_const {
-	TPM2_PLATFORM_PCR	= 24,
-	TPM2_PCR_SELECT_MIN	= ((TPM2_PLATFORM_PCR + 7) / 8),
-	TPM2_TIMEOUT_A		= 750,
-	TPM2_TIMEOUT_B		= 2000,
-	TPM2_TIMEOUT_C		= 200,
-	TPM2_TIMEOUT_D		= 30,
-	TPM2_DURATION_SHORT	= 20,
-	TPM2_DURATION_MEDIUM	= 750,
-	TPM2_DURATION_LONG	= 2000,
+	TPM2_PLATFORM_PCR       =     24,
+	TPM2_PCR_SELECT_MIN     = ((TPM2_PLATFORM_PCR + 7) / 8),
+};
+
+enum tpm2_timeouts {
+	TPM2_TIMEOUT_A          =    750,
+	TPM2_TIMEOUT_B          =   2000,
+	TPM2_TIMEOUT_C          =    200,
+	TPM2_TIMEOUT_D          =     30,
+	TPM2_DURATION_SHORT     =     20,
+	TPM2_DURATION_MEDIUM    =    750,
+	TPM2_DURATION_LONG      =   2000,
+	TPM2_DURATION_LONG_LONG = 300000,
+	TPM2_DURATION_DEFAULT   = 120000,
 };
 
 enum tpm2_structures {
@@ -104,10 +111,12 @@ enum tpm2_return_codes {
 	TPM2_RC_HASH		= 0x0083, /* RC_FMT1 */
 	TPM2_RC_HANDLE		= 0x008B,
 	TPM2_RC_INITIALIZE	= 0x0100, /* RC_VER1 */
+	TPM2_RC_FAILURE		= 0x0101,
 	TPM2_RC_DISABLED	= 0x0120,
 	TPM2_RC_COMMAND_CODE    = 0x0143,
 	TPM2_RC_TESTING		= 0x090A, /* RC_WARN */
 	TPM2_RC_REFERENCE_H0	= 0x0910,
+	TPM2_RC_RETRY		= 0x0922,
 };
 
 enum tpm2_algorithms {
@@ -123,6 +132,7 @@ enum tpm2_algorithms {
 
 enum tpm2_command_codes {
 	TPM2_CC_FIRST		= 0x011F,
+	TPM2_CC_CREATE_PRIMARY  = 0x0131,
 	TPM2_CC_SELF_TEST	= 0x0143,
 	TPM2_CC_STARTUP		= 0x0144,
 	TPM2_CC_SHUTDOWN	= 0x0145,
@@ -227,7 +237,7 @@ struct tpm_chip {
 	unsigned long timeout_c; /* jiffies */
 	unsigned long timeout_d; /* jiffies */
 	bool timeout_adjusted;
-	unsigned long duration[3]; /* jiffies */
+	unsigned long duration[TPM_NUM_DURATIONS]; /* jiffies */
 	bool duration_adjusted;
 
 	struct dentry *bios_dir[TPM_NUM_EVENT_LOG_FILES];
@@ -506,7 +516,7 @@ enum tpm_transmit_flags {
 ssize_t tpm_transmit(struct tpm_chip *chip, struct tpm_space *space,
 		     u8 *buf, size_t bufsiz, unsigned int flags);
 ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_space *space,
-			 const void *buf, size_t bufsiz,
+			 void *buf, size_t bufsiz,
 			 size_t min_rsp_body_length, unsigned int flags,
 			 const char *desc);
 int tpm_startup(struct tpm_chip *chip);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index a700f8f9ead7..96c77c8e7f40 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -31,10 +31,6 @@ struct tpm2_startup_in {
 	__be16	startup_type;
 } __packed;
 
-struct tpm2_self_test_in {
-	u8	full_test;
-} __packed;
-
 struct tpm2_get_tpm_pt_in {
 	__be32	cap_id;
 	__be32	property_id;
@@ -60,7 +56,6 @@ struct tpm2_get_random_out {
 
 union tpm2_cmd_params {
 	struct	tpm2_startup_in		startup_in;
-	struct	tpm2_self_test_in	selftest_in;
 	struct	tpm2_get_tpm_pt_in	get_tpm_pt_in;
 	struct	tpm2_get_tpm_pt_out	get_tpm_pt_out;
 	struct	tpm2_get_random_in	getrandom_in;
@@ -90,6 +85,8 @@ static struct tpm2_hash tpm2_hash_map[] = {
  * of time the chip could take to return the result. The values
  * of the SHORT, MEDIUM, and LONG durations are taken from the
  * PC Client Profile (PTP) specification.
+ * LONG_LONG is for commands that generates keys which empirically
+ * takes longer time on some systems.
  */
 static const u8 tpm2_ordinal_duration[TPM2_CC_LAST - TPM2_CC_FIRST + 1] = {
 	TPM_UNDEFINED,		/* 11F */
@@ -110,7 +107,7 @@ static const u8 tpm2_ordinal_duration[TPM2_CC_LAST - TPM2_CC_FIRST + 1] = {
 	TPM_UNDEFINED,		/* 12e */
 	TPM_UNDEFINED,		/* 12f */
 	TPM_UNDEFINED,		/* 130 */
-	TPM_UNDEFINED,		/* 131 */
+	TPM_LONG_LONG,		/* 131 */
 	TPM_UNDEFINED,		/* 132 */
 	TPM_UNDEFINED,		/* 133 */
 	TPM_UNDEFINED,		/* 134 */
@@ -144,7 +141,7 @@ static const u8 tpm2_ordinal_duration[TPM2_CC_LAST - TPM2_CC_FIRST + 1] = {
 	TPM_UNDEFINED,		/* 150 */
 	TPM_UNDEFINED,		/* 151 */
 	TPM_UNDEFINED,		/* 152 */
-	TPM_UNDEFINED,		/* 153 */
+	TPM_LONG_LONG,		/* 153 */
 	TPM_UNDEFINED,		/* 154 */
 	TPM_UNDEFINED,		/* 155 */
 	TPM_UNDEFINED,		/* 156 */
@@ -821,22 +818,12 @@ unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal)
 		duration = chip->duration[index];
 
 	if (duration <= 0)
-		duration = 2 * 60 * HZ;
+		duration = msecs_to_jiffies(TPM2_DURATION_DEFAULT);
 
 	return duration;
 }
 EXPORT_SYMBOL_GPL(tpm2_calc_ordinal_duration);
 
-#define TPM2_SELF_TEST_IN_SIZE \
-	(sizeof(struct tpm_input_header) + \
-	 sizeof(struct tpm2_self_test_in))
-
-static const struct tpm_input_header tpm2_selftest_header = {
-	.tag = cpu_to_be16(TPM2_ST_NO_SESSIONS),
-	.length = cpu_to_be32(TPM2_SELF_TEST_IN_SIZE),
-	.ordinal = cpu_to_be32(TPM2_CC_SELF_TEST)
-};
-
 /**
  * tpm2_do_selftest() - ensure that all self tests have passed
  *
@@ -852,27 +839,24 @@ static const struct tpm_input_header tpm2_selftest_header = {
  */
 static int tpm2_do_selftest(struct tpm_chip *chip)
 {
+	struct tpm_buf buf;
+	int full;
 	int rc;
-	unsigned int delay_msec = 10;
-	long duration;
-	struct tpm2_cmd cmd;
 
-	duration = jiffies_to_msecs(
-		tpm2_calc_ordinal_duration(chip, TPM2_CC_SELF_TEST));
-
-	while (1) {
-		cmd.header.in = tpm2_selftest_header;
-		cmd.params.selftest_in.full_test = 0;
-
-		rc = tpm_transmit_cmd(chip, NULL, &cmd, TPM2_SELF_TEST_IN_SIZE,
-				      0, 0, "continue selftest");
+	for (full = 0; full < 2; full++) {
+		rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_SELF_TEST);
+		if (rc)
+			return rc;
 
-		if (rc != TPM2_RC_TESTING || delay_msec >= duration)
-			break;
+		tpm_buf_append_u8(&buf, full);
+		rc = tpm_transmit_cmd(chip, NULL, buf.data, PAGE_SIZE, 0, 0,
+				      "attempting the self test");
+		tpm_buf_destroy(&buf);
 
-		/* wait longer than before */
-		delay_msec *= 2;
-		tpm_msleep(delay_msec);
+		if (rc == TPM2_RC_TESTING)
+			rc = TPM2_RC_SUCCESS;
+		if (rc == TPM2_RC_INITIALIZE || rc == TPM2_RC_SUCCESS)
+			return rc;
 	}
 
 	return rc;
@@ -1058,10 +1042,8 @@ int tpm2_auto_startup(struct tpm_chip *chip)
 		goto out;
 
 	rc = tpm2_do_selftest(chip);
-	if (rc != 0 && rc != TPM2_RC_INITIALIZE) {
-		dev_err(&chip->dev, "TPM self test failed\n");
+	if (rc && rc != TPM2_RC_INITIALIZE)
 		goto out;
-	}
 
 	if (rc == TPM2_RC_INITIALIZE) {
 		rc = tpm_startup(chip);
@@ -1069,10 +1051,8 @@ int tpm2_auto_startup(struct tpm_chip *chip)
 			goto out;
 
 		rc = tpm2_do_selftest(chip);
-		if (rc) {
-			dev_err(&chip->dev, "TPM self test failed\n");
+		if (rc)
 			goto out;
-		}
 	}
 
 	rc = tpm2_get_pcr_allocation(chip);
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index 7b3c2a8aa9de..7f78482cd157 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -112,6 +112,25 @@ struct tpm2_crb_smc {
 	u32 smc_func_id;
 };
 
+static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
+				unsigned long timeout)
+{
+	ktime_t start;
+	ktime_t stop;
+
+	start = ktime_get();
+	stop = ktime_add(start, ms_to_ktime(timeout));
+
+	do {
+		if ((ioread32(reg) & mask) == value)
+			return true;
+
+		usleep_range(50, 100);
+	} while (ktime_before(ktime_get(), stop));
+
+	return ((ioread32(reg) & mask) == value);
+}
+
 /**
  * crb_go_idle - request tpm crb device to go the idle state
  *
@@ -128,7 +147,7 @@ struct tpm2_crb_smc {
  *
  * Return: 0 always
  */
-static int __maybe_unused crb_go_idle(struct device *dev, struct crb_priv *priv)
+static int crb_go_idle(struct device *dev, struct crb_priv *priv)
 {
 	if ((priv->sm == ACPI_TPM2_START_METHOD) ||
 	    (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
@@ -136,30 +155,17 @@ static int __maybe_unused crb_go_idle(struct device *dev, struct crb_priv *priv)
 		return 0;
 
 	iowrite32(CRB_CTRL_REQ_GO_IDLE, &priv->regs_t->ctrl_req);
-	/* we don't really care when this settles */
 
+	if (!crb_wait_for_reg_32(&priv->regs_t->ctrl_req,
+				 CRB_CTRL_REQ_GO_IDLE/* mask */,
+				 0, /* value */
+				 TPM2_TIMEOUT_C)) {
+		dev_warn(dev, "goIdle timed out\n");
+		return -ETIME;
+	}
 	return 0;
 }
 
-static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
-				unsigned long timeout)
-{
-	ktime_t start;
-	ktime_t stop;
-
-	start = ktime_get();
-	stop = ktime_add(start, ms_to_ktime(timeout));
-
-	do {
-		if ((ioread32(reg) & mask) == value)
-			return true;
-
-		usleep_range(50, 100);
-	} while (ktime_before(ktime_get(), stop));
-
-	return false;
-}
-
 /**
  * crb_cmd_ready - request tpm crb device to enter ready state
  *
@@ -175,8 +181,7 @@ static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value,
  *
  * Return: 0 on success -ETIME on timeout;
  */
-static int __maybe_unused crb_cmd_ready(struct device *dev,
-					struct crb_priv *priv)
+static int crb_cmd_ready(struct device *dev, struct crb_priv *priv)
 {
 	if ((priv->sm == ACPI_TPM2_START_METHOD) ||
 	    (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD) ||
@@ -195,11 +200,11 @@ static int __maybe_unused crb_cmd_ready(struct device *dev,
 	return 0;
 }
 
-static int crb_request_locality(struct tpm_chip *chip, int loc)
+static int __crb_request_locality(struct device *dev,
+				  struct crb_priv *priv, int loc)
 {
-	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
 	u32 value = CRB_LOC_STATE_LOC_ASSIGNED |
-		CRB_LOC_STATE_TPM_REG_VALID_STS;
+		    CRB_LOC_STATE_TPM_REG_VALID_STS;
 
 	if (!priv->regs_h)
 		return 0;
@@ -207,21 +212,45 @@ static int crb_request_locality(struct tpm_chip *chip, int loc)
 	iowrite32(CRB_LOC_CTRL_REQUEST_ACCESS, &priv->regs_h->loc_ctrl);
 	if (!crb_wait_for_reg_32(&priv->regs_h->loc_state, value, value,
 				 TPM2_TIMEOUT_C)) {
-		dev_warn(&chip->dev, "TPM_LOC_STATE_x.requestAccess timed out\n");
+		dev_warn(dev, "TPM_LOC_STATE_x.requestAccess timed out\n");
 		return -ETIME;
 	}
 
 	return 0;
 }
 
-static void crb_relinquish_locality(struct tpm_chip *chip, int loc)
+static int crb_request_locality(struct tpm_chip *chip, int loc)
 {
 	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
 
+	return __crb_request_locality(&chip->dev, priv, loc);
+}
+
+static int __crb_relinquish_locality(struct device *dev,
+				     struct crb_priv *priv, int loc)
+{
+	u32 mask = CRB_LOC_STATE_LOC_ASSIGNED |
+		   CRB_LOC_STATE_TPM_REG_VALID_STS;
+	u32 value = CRB_LOC_STATE_TPM_REG_VALID_STS;
+
 	if (!priv->regs_h)
-		return;
+		return 0;
 
 	iowrite32(CRB_LOC_CTRL_RELINQUISH, &priv->regs_h->loc_ctrl);
+	if (!crb_wait_for_reg_32(&priv->regs_h->loc_state, mask, value,
+				 TPM2_TIMEOUT_C)) {
+		dev_warn(dev, "TPM_LOC_STATE_x.requestAccess timed out\n");
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+static int crb_relinquish_locality(struct tpm_chip *chip, int loc)
+{
+	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
+
+	return __crb_relinquish_locality(&chip->dev, priv, loc);
 }
 
 static u8 crb_status(struct tpm_chip *chip)
@@ -442,6 +471,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 	u32 pa_high, pa_low;
 	u64 cmd_pa;
 	u32 cmd_size;
+	__le64 __rsp_pa;
 	u64 rsp_pa;
 	u32 rsp_size;
 	int ret;
@@ -475,6 +505,10 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 			dev_warn(dev, FW_BUG "Bad ACPI memory layout");
 	}
 
+	ret = __crb_request_locality(dev, priv, 0);
+	if (ret)
+		return ret;
+
 	priv->regs_t = crb_map_res(dev, priv, &io_res, buf->control_address,
 				   sizeof(struct crb_regs_tail));
 	if (IS_ERR(priv->regs_t))
@@ -503,8 +537,8 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
 		goto out;
 	}
 
-	memcpy_fromio(&rsp_pa, &priv->regs_t->ctrl_rsp_pa, 8);
-	rsp_pa = le64_to_cpu(rsp_pa);
+	memcpy_fromio(&__rsp_pa, &priv->regs_t->ctrl_rsp_pa, 8);
+	rsp_pa = le64_to_cpu(__rsp_pa);
 	rsp_size = crb_fixup_cmd_size(dev, &io_res, rsp_pa,
 				      ioread32(&priv->regs_t->ctrl_rsp_size));
 
@@ -531,6 +565,8 @@ out:
 
 	crb_go_idle(dev, priv);
 
+	__crb_relinquish_locality(dev, priv, 0);
+
 	return ret;
 }
 
@@ -588,10 +624,14 @@ static int crb_acpi_add(struct acpi_device *device)
 	chip->acpi_dev_handle = device->handle;
 	chip->flags = TPM_CHIP_FLAG_TPM2;
 
-	rc  = crb_cmd_ready(dev, priv);
+	rc = __crb_request_locality(dev, priv, 0);
 	if (rc)
 		return rc;
 
+	rc  = crb_cmd_ready(dev, priv);
+	if (rc)
+		goto out;
+
 	pm_runtime_get_noresume(dev);
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
@@ -601,12 +641,15 @@ static int crb_acpi_add(struct acpi_device *device)
 		crb_go_idle(dev, priv);
 		pm_runtime_put_noidle(dev);
 		pm_runtime_disable(dev);
-		return rc;
+		goto out;
 	}
 
-	pm_runtime_put(dev);
+	pm_runtime_put_sync(dev);
 
-	return 0;
+out:
+	__crb_relinquish_locality(dev, priv, 0);
+
+	return rc;
 }
 
 static int crb_acpi_remove(struct acpi_device *device)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index da074e3db19b..5a1f47b43947 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -143,11 +143,13 @@ static bool check_locality(struct tpm_chip *chip, int l)
 	return false;
 }
 
-static void release_locality(struct tpm_chip *chip, int l)
+static int release_locality(struct tpm_chip *chip, int l)
 {
 	struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
 
 	tpm_tis_write8(priv, TPM_ACCESS(l), TPM_ACCESS_ACTIVE_LOCALITY);
+
+	return 0;
 }
 
 static int request_locality(struct tpm_chip *chip, int l)
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 7ae23b25b406..41492e980ef4 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -55,8 +55,10 @@ config COMMON_CLK_RK808
 	  by control register.
 
 config COMMON_CLK_HI655X
-	tristate "Clock driver for Hi655x"
-	depends on MFD_HI655X_PMIC || COMPILE_TEST
+	tristate "Clock driver for Hi655x" if EXPERT
+	depends on (MFD_HI655X_PMIC || COMPILE_TEST)
+	depends on REGMAP
+	default MFD_HI655X_PMIC
 	---help---
 	  This driver supports the hi655x PMIC clock. This
 	  multi-function device has one fixed-rate oscillator, clocked
@@ -101,6 +103,15 @@ config COMMON_CLK_SI514
 	  This driver supports the Silicon Labs 514 programmable clock
 	  generator.
 
+config COMMON_CLK_SI544
+	tristate "Clock driver for SiLabs 544 devices"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	---help---
+	  This driver supports the Silicon Labs 544 programmable clock
+	  generator.
+
 config COMMON_CLK_SI570
 	tristate "Clock driver for SiLabs 570 and compatible devices"
 	depends on I2C
@@ -248,6 +259,26 @@ config COMMON_CLK_VC5
 	  This driver supports the IDT VersaClock 5 and VersaClock 6
 	  programmable clock generators.
 
+config COMMON_CLK_STM32MP157
+	def_bool COMMON_CLK && MACH_STM32MP157
+	help
+	---help---
+	  Support for stm32mp157 SoC family clocks
+
+config COMMON_CLK_STM32F
+	bool "Clock driver for stm32f4 and stm32f7 SoC families"
+	depends on MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746
+	help
+	---help---
+	  Support for stm32f4 and stm32f7 SoC families clocks
+
+config COMMON_CLK_STM32H7
+	bool "Clock driver for stm32h7 SoC family"
+	depends on MACH_STM32H743
+	help
+	---help---
+	  Support for stm32h7 SoC family clocks
+
 source "drivers/clk/bcm/Kconfig"
 source "drivers/clk/hisilicon/Kconfig"
 source "drivers/clk/imgtec/Kconfig"
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 6605513eaa94..de6d06ac790b 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -45,9 +45,11 @@ obj-$(CONFIG_COMMON_CLK_SCMI)           += clk-scmi.o
 obj-$(CONFIG_COMMON_CLK_SCPI)           += clk-scpi.o
 obj-$(CONFIG_COMMON_CLK_SI5351)		+= clk-si5351.o
 obj-$(CONFIG_COMMON_CLK_SI514)		+= clk-si514.o
+obj-$(CONFIG_COMMON_CLK_SI544)		+= clk-si544.o
 obj-$(CONFIG_COMMON_CLK_SI570)		+= clk-si570.o
-obj-$(CONFIG_ARCH_STM32)		+= clk-stm32f4.o
-obj-$(CONFIG_ARCH_STM32)		+= clk-stm32h7.o
+obj-$(CONFIG_COMMON_CLK_STM32F)		+= clk-stm32f4.o
+obj-$(CONFIG_COMMON_CLK_STM32H7)	+= clk-stm32h7.o
+obj-$(CONFIG_COMMON_CLK_STM32MP157)	+= clk-stm32mp1.o
 obj-$(CONFIG_ARCH_TANGO)		+= clk-tango4.o
 obj-$(CONFIG_CLK_TWL6040)		+= clk-twl6040.o
 obj-$(CONFIG_ARCH_U300)			+= clk-u300.o
@@ -62,6 +64,7 @@ obj-$(CONFIG_ARCH_ARTPEC)		+= axis/
 obj-$(CONFIG_ARC_PLAT_AXS10X)		+= axs10x/
 obj-y					+= bcm/
 obj-$(CONFIG_ARCH_BERLIN)		+= berlin/
+obj-$(CONFIG_ARCH_DAVINCI)		+= davinci/
 obj-$(CONFIG_H8300)			+= h8300/
 obj-$(CONFIG_ARCH_HISI)			+= hisilicon/
 obj-y					+= imgtec/
@@ -89,6 +92,7 @@ obj-$(CONFIG_ARCH_SOCFPGA)		+= socfpga/
 obj-$(CONFIG_PLAT_SPEAR)		+= spear/
 obj-$(CONFIG_ARCH_SPRD)			+= sprd/
 obj-$(CONFIG_ARCH_STI)			+= st/
+obj-$(CONFIG_ARCH_STRATIX10)		+= socfpga/
 obj-$(CONFIG_ARCH_SUNXI)		+= sunxi/
 obj-$(CONFIG_ARCH_SUNXI)		+= sunxi-ng/
 obj-$(CONFIG_ARCH_TEGRA)		+= tegra/
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index a07f6451694a..fa0d5c8611a0 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -602,9 +602,7 @@ static void bcm2835_pll_off(struct clk_hw *hw)
 	const struct bcm2835_pll_data *data = pll->data;
 
 	spin_lock(&cprman->regs_lock);
-	cprman_write(cprman, data->cm_ctrl_reg,
-		     cprman_read(cprman, data->cm_ctrl_reg) |
-		     CM_PLL_ANARST);
+	cprman_write(cprman, data->cm_ctrl_reg, CM_PLL_ANARST);
 	cprman_write(cprman, data->a2w_ctrl_reg,
 		     cprman_read(cprman, data->a2w_ctrl_reg) |
 		     A2W_PLL_CTRL_PWRDN);
@@ -640,6 +638,10 @@ static int bcm2835_pll_on(struct clk_hw *hw)
 		cpu_relax();
 	}
 
+	cprman_write(cprman, data->a2w_ctrl_reg,
+		     cprman_read(cprman, data->a2w_ctrl_reg) |
+		     A2W_PLL_CTRL_PRST_DISABLE);
+
 	return 0;
 }
 
diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index e8ea81c30f0c..c58019750b7e 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -549,7 +549,7 @@ static int cs2000_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops cs2000_pm_ops = {
-	.resume_early	= cs2000_resume,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, cs2000_resume)
 };
 
 static struct i2c_driver cs2000_driver = {
diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index b49942b9fe50..b6234a5da12d 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -28,12 +28,10 @@
  * parent - fixed parent.  No clk_set_parent support
  */
 
-#define div_mask(width)	((1 << (width)) - 1)
-
 static unsigned int _get_table_maxdiv(const struct clk_div_table *table,
 				      u8 width)
 {
-	unsigned int maxdiv = 0, mask = div_mask(width);
+	unsigned int maxdiv = 0, mask = clk_div_mask(width);
 	const struct clk_div_table *clkt;
 
 	for (clkt = table; clkt->div; clkt++)
@@ -57,12 +55,12 @@ static unsigned int _get_maxdiv(const struct clk_div_table *table, u8 width,
 				unsigned long flags)
 {
 	if (flags & CLK_DIVIDER_ONE_BASED)
-		return div_mask(width);
+		return clk_div_mask(width);
 	if (flags & CLK_DIVIDER_POWER_OF_TWO)
-		return 1 << div_mask(width);
+		return 1 << clk_div_mask(width);
 	if (table)
 		return _get_table_maxdiv(table, width);
-	return div_mask(width) + 1;
+	return clk_div_mask(width) + 1;
 }
 
 static unsigned int _get_table_div(const struct clk_div_table *table,
@@ -84,7 +82,7 @@ static unsigned int _get_div(const struct clk_div_table *table,
 	if (flags & CLK_DIVIDER_POWER_OF_TWO)
 		return 1 << val;
 	if (flags & CLK_DIVIDER_MAX_AT_ZERO)
-		return val ? val : div_mask(width) + 1;
+		return val ? val : clk_div_mask(width) + 1;
 	if (table)
 		return _get_table_div(table, val);
 	return val + 1;
@@ -109,7 +107,7 @@ static unsigned int _get_val(const struct clk_div_table *table,
 	if (flags & CLK_DIVIDER_POWER_OF_TWO)
 		return __ffs(div);
 	if (flags & CLK_DIVIDER_MAX_AT_ZERO)
-		return (div == div_mask(width) + 1) ? 0 : div;
+		return (div == clk_div_mask(width) + 1) ? 0 : div;
 	if (table)
 		return  _get_table_val(table, div);
 	return div - 1;
@@ -141,7 +139,7 @@ static unsigned long clk_divider_recalc_rate(struct clk_hw *hw,
 	unsigned int val;
 
 	val = clk_readl(divider->reg) >> divider->shift;
-	val &= div_mask(divider->width);
+	val &= clk_div_mask(divider->width);
 
 	return divider_recalc_rate(hw, parent_rate, val, divider->table,
 				   divider->flags, divider->width);
@@ -344,19 +342,43 @@ long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
 }
 EXPORT_SYMBOL_GPL(divider_round_rate_parent);
 
+long divider_ro_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
+				  unsigned long rate, unsigned long *prate,
+				  const struct clk_div_table *table, u8 width,
+				  unsigned long flags, unsigned int val)
+{
+	int div;
+
+	div = _get_div(table, val, flags, width);
+
+	/* Even a read-only clock can propagate a rate change */
+	if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) {
+		if (!parent)
+			return -EINVAL;
+
+		*prate = clk_hw_round_rate(parent, rate * div);
+	}
+
+	return DIV_ROUND_UP_ULL((u64)*prate, div);
+}
+EXPORT_SYMBOL_GPL(divider_ro_round_rate_parent);
+
+
 static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
 				unsigned long *prate)
 {
 	struct clk_divider *divider = to_clk_divider(hw);
-	int bestdiv;
 
 	/* if read only, just return current value */
 	if (divider->flags & CLK_DIVIDER_READ_ONLY) {
-		bestdiv = clk_readl(divider->reg) >> divider->shift;
-		bestdiv &= div_mask(divider->width);
-		bestdiv = _get_div(divider->table, bestdiv, divider->flags,
-			divider->width);
-		return DIV_ROUND_UP_ULL((u64)*prate, bestdiv);
+		u32 val;
+
+		val = clk_readl(divider->reg) >> divider->shift;
+		val &= clk_div_mask(divider->width);
+
+		return divider_ro_round_rate(hw, rate, prate, divider->table,
+					     divider->width, divider->flags,
+					     val);
 	}
 
 	return divider_round_rate(hw, rate, prate, divider->table,
@@ -376,7 +398,7 @@ int divider_get_val(unsigned long rate, unsigned long parent_rate,
 
 	value = _get_val(table, div, flags, width);
 
-	return min_t(unsigned int, value, div_mask(width));
+	return min_t(unsigned int, value, clk_div_mask(width));
 }
 EXPORT_SYMBOL_GPL(divider_get_val);
 
@@ -399,10 +421,10 @@ static int clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 		__acquire(divider->lock);
 
 	if (divider->flags & CLK_DIVIDER_HIWORD_MASK) {
-		val = div_mask(divider->width) << (divider->shift + 16);
+		val = clk_div_mask(divider->width) << (divider->shift + 16);
 	} else {
 		val = clk_readl(divider->reg);
-		val &= ~(div_mask(divider->width) << divider->shift);
+		val &= ~(clk_div_mask(divider->width) << divider->shift);
 	}
 	val |= (u32)value << divider->shift;
 	clk_writel(val, divider->reg);
diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c
index 151513c655c3..40af4fbab4d2 100644
--- a/drivers/clk/clk-gpio.c
+++ b/drivers/clk/clk-gpio.c
@@ -73,14 +73,14 @@ static u8 clk_gpio_mux_get_parent(struct clk_hw *hw)
 {
 	struct clk_gpio *clk = to_clk_gpio(hw);
 
-	return gpiod_get_value(clk->gpiod);
+	return gpiod_get_value_cansleep(clk->gpiod);
 }
 
 static int clk_gpio_mux_set_parent(struct clk_hw *hw, u8 index)
 {
 	struct clk_gpio *clk = to_clk_gpio(hw);
 
-	gpiod_set_value(clk->gpiod, index);
+	gpiod_set_value_cansleep(clk->gpiod, index);
 
 	return 0;
 }
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 39cabe157163..ac4a042f8658 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -26,35 +26,24 @@
  * parent - parent is adjustable through clk_set_parent
  */
 
-static u8 clk_mux_get_parent(struct clk_hw *hw)
+int clk_mux_val_to_index(struct clk_hw *hw, u32 *table, unsigned int flags,
+			 unsigned int val)
 {
-	struct clk_mux *mux = to_clk_mux(hw);
 	int num_parents = clk_hw_get_num_parents(hw);
-	u32 val;
 
-	/*
-	 * FIXME need a mux-specific flag to determine if val is bitwise or numeric
-	 * e.g. sys_clkin_ck's clksel field is 3 bits wide, but ranges from 0x1
-	 * to 0x7 (index starts at one)
-	 * OTOH, pmd_trace_clk_mux_ck uses a separate bit for each clock, so
-	 * val = 0x4 really means "bit 2, index starts at bit 0"
-	 */
-	val = clk_readl(mux->reg) >> mux->shift;
-	val &= mux->mask;
-
-	if (mux->table) {
+	if (table) {
 		int i;
 
 		for (i = 0; i < num_parents; i++)
-			if (mux->table[i] == val)
+			if (table[i] == val)
 				return i;
 		return -EINVAL;
 	}
 
-	if (val && (mux->flags & CLK_MUX_INDEX_BIT))
+	if (val && (flags & CLK_MUX_INDEX_BIT))
 		val = ffs(val) - 1;
 
-	if (val && (mux->flags & CLK_MUX_INDEX_ONE))
+	if (val && (flags & CLK_MUX_INDEX_ONE))
 		val--;
 
 	if (val >= num_parents)
@@ -62,36 +51,58 @@ static u8 clk_mux_get_parent(struct clk_hw *hw)
 
 	return val;
 }
+EXPORT_SYMBOL_GPL(clk_mux_val_to_index);
 
-static int clk_mux_set_parent(struct clk_hw *hw, u8 index)
+unsigned int clk_mux_index_to_val(u32 *table, unsigned int flags, u8 index)
 {
-	struct clk_mux *mux = to_clk_mux(hw);
-	u32 val;
-	unsigned long flags = 0;
+	unsigned int val = index;
 
-	if (mux->table) {
-		index = mux->table[index];
+	if (table) {
+		val = table[index];
 	} else {
-		if (mux->flags & CLK_MUX_INDEX_BIT)
-			index = 1 << index;
+		if (flags & CLK_MUX_INDEX_BIT)
+			val = 1 << index;
 
-		if (mux->flags & CLK_MUX_INDEX_ONE)
-			index++;
+		if (flags & CLK_MUX_INDEX_ONE)
+			val++;
 	}
 
+	return val;
+}
+EXPORT_SYMBOL_GPL(clk_mux_index_to_val);
+
+static u8 clk_mux_get_parent(struct clk_hw *hw)
+{
+	struct clk_mux *mux = to_clk_mux(hw);
+	u32 val;
+
+	val = clk_readl(mux->reg) >> mux->shift;
+	val &= mux->mask;
+
+	return clk_mux_val_to_index(hw, mux->table, mux->flags, val);
+}
+
+static int clk_mux_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct clk_mux *mux = to_clk_mux(hw);
+	u32 val = clk_mux_index_to_val(mux->table, mux->flags, index);
+	unsigned long flags = 0;
+	u32 reg;
+
 	if (mux->lock)
 		spin_lock_irqsave(mux->lock, flags);
 	else
 		__acquire(mux->lock);
 
 	if (mux->flags & CLK_MUX_HIWORD_MASK) {
-		val = mux->mask << (mux->shift + 16);
+		reg = mux->mask << (mux->shift + 16);
 	} else {
-		val = clk_readl(mux->reg);
-		val &= ~(mux->mask << mux->shift);
+		reg = clk_readl(mux->reg);
+		reg &= ~(mux->mask << mux->shift);
 	}
-	val |= index << mux->shift;
-	clk_writel(val, mux->reg);
+	val = val << mux->shift;
+	reg |= val;
+	clk_writel(reg, mux->reg);
 
 	if (mux->lock)
 		spin_unlock_irqrestore(mux->lock, flags);
diff --git a/drivers/clk/clk-si544.c b/drivers/clk/clk-si544.c
new file mode 100644
index 000000000000..1c96a9f6c022
--- /dev/null
+++ b/drivers/clk/clk-si544.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Silicon Labs Si544 Programmable Oscillator
+ * Copyright (C) 2018 Topic Embedded Products
+ * Author: Mike Looijmans <mike.looijmans@topic.nl>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/* I2C registers (decimal as in datasheet) */
+#define SI544_REG_CONTROL	7
+#define SI544_REG_OE_STATE	17
+#define SI544_REG_HS_DIV	23
+#define SI544_REG_LS_HS_DIV	24
+#define SI544_REG_FBDIV0	26
+#define SI544_REG_FBDIV8	27
+#define SI544_REG_FBDIV16	28
+#define SI544_REG_FBDIV24	29
+#define SI544_REG_FBDIV32	30
+#define SI544_REG_FBDIV40	31
+#define SI544_REG_FCAL_OVR	69
+#define SI544_REG_ADPLL_DELTA_M0	231
+#define SI544_REG_ADPLL_DELTA_M8	232
+#define SI544_REG_ADPLL_DELTA_M16	233
+#define SI544_REG_PAGE_SELECT	255
+
+/* Register values */
+#define SI544_CONTROL_RESET	BIT(7)
+#define SI544_CONTROL_MS_ICAL2	BIT(3)
+
+#define SI544_OE_STATE_ODC_OE	BIT(0)
+
+/* Max freq depends on speed grade */
+#define SI544_MIN_FREQ	    200000U
+
+/* Si544 Internal oscilator runs at 55.05 MHz */
+#define FXO		  55050000U
+
+/* VCO range is 10.8 .. 12.1 GHz, max depends on speed grade */
+#define FVCO_MIN       10800000000ULL
+
+#define HS_DIV_MAX	2046
+#define HS_DIV_MAX_ODD	33
+
+/* Lowest frequency synthesizeable using only the HS divider */
+#define MIN_HSDIV_FREQ	(FVCO_MIN / HS_DIV_MAX)
+
+enum si544_speed_grade {
+	si544a,
+	si544b,
+	si544c,
+};
+
+struct clk_si544 {
+	struct clk_hw hw;
+	struct regmap *regmap;
+	struct i2c_client *i2c_client;
+	enum si544_speed_grade speed_grade;
+};
+#define to_clk_si544(_hw)	container_of(_hw, struct clk_si544, hw)
+
+/**
+ * struct clk_si544_muldiv - Multiplier/divider settings
+ * @fb_div_frac:	integer part of feedback divider (32 bits)
+ * @fb_div_int:		fractional part of feedback divider (11 bits)
+ * @hs_div:		1st divider, 5..2046, must be even when >33
+ * @ls_div_bits:	2nd divider, as 2^x, range 0..5
+ *                      If ls_div_bits is non-zero, hs_div must be even
+ */
+struct clk_si544_muldiv {
+	u32 fb_div_frac;
+	u16 fb_div_int;
+	u16 hs_div;
+	u8 ls_div_bits;
+};
+
+/* Enables or disables the output driver */
+static int si544_enable_output(struct clk_si544 *data, bool enable)
+{
+	return regmap_update_bits(data->regmap, SI544_REG_OE_STATE,
+		SI544_OE_STATE_ODC_OE, enable ? SI544_OE_STATE_ODC_OE : 0);
+}
+
+/* Retrieve clock multiplier and dividers from hardware */
+static int si544_get_muldiv(struct clk_si544 *data,
+	struct clk_si544_muldiv *settings)
+{
+	int err;
+	u8 reg[6];
+
+	err = regmap_bulk_read(data->regmap, SI544_REG_HS_DIV, reg, 2);
+	if (err)
+		return err;
+
+	settings->ls_div_bits = (reg[1] >> 4) & 0x07;
+	settings->hs_div = (reg[1] & 0x07) << 8 | reg[0];
+
+	err = regmap_bulk_read(data->regmap, SI544_REG_FBDIV0, reg, 6);
+	if (err)
+		return err;
+
+	settings->fb_div_int = reg[4] | (reg[5] & 0x07) << 8;
+	settings->fb_div_frac = reg[0] | reg[1] << 8 | reg[2] << 16 |
+				reg[3] << 24;
+	return 0;
+}
+
+static int si544_set_muldiv(struct clk_si544 *data,
+	struct clk_si544_muldiv *settings)
+{
+	int err;
+	u8 reg[6];
+
+	reg[0] = settings->hs_div;
+	reg[1] = settings->hs_div >> 8 | settings->ls_div_bits << 4;
+
+	err = regmap_bulk_write(data->regmap, SI544_REG_HS_DIV, reg, 2);
+	if (err < 0)
+		return err;
+
+	reg[0] = settings->fb_div_frac;
+	reg[1] = settings->fb_div_frac >> 8;
+	reg[2] = settings->fb_div_frac >> 16;
+	reg[3] = settings->fb_div_frac >> 24;
+	reg[4] = settings->fb_div_int;
+	reg[5] = settings->fb_div_int >> 8;
+
+	/*
+	 * Writing to SI544_REG_FBDIV40 triggers the clock change, so that
+	 * must be written last
+	 */
+	return regmap_bulk_write(data->regmap, SI544_REG_FBDIV0, reg, 6);
+}
+
+static bool is_valid_frequency(const struct clk_si544 *data,
+	unsigned long frequency)
+{
+	unsigned long max_freq = 0;
+
+	if (frequency < SI544_MIN_FREQ)
+		return false;
+
+	switch (data->speed_grade) {
+	case si544a:
+		max_freq = 1500000000;
+		break;
+	case si544b:
+		max_freq = 800000000;
+		break;
+	case si544c:
+		max_freq = 350000000;
+		break;
+	}
+
+	return frequency <= max_freq;
+}
+
+/* Calculate divider settings for a given frequency */
+static int si544_calc_muldiv(struct clk_si544_muldiv *settings,
+	unsigned long frequency)
+{
+	u64 vco;
+	u32 ls_freq;
+	u32 tmp;
+	u8 res;
+
+	/* Determine the minimum value of LS_DIV and resulting target freq. */
+	ls_freq = frequency;
+	settings->ls_div_bits = 0;
+
+	if (frequency >= MIN_HSDIV_FREQ) {
+		settings->ls_div_bits = 0;
+	} else {
+		res = 1;
+		tmp = 2 * HS_DIV_MAX;
+		while (tmp <= (HS_DIV_MAX * 32)) {
+			if (((u64)frequency * tmp) >= FVCO_MIN)
+				break;
+			++res;
+			tmp <<= 1;
+		}
+		settings->ls_div_bits = res;
+		ls_freq = frequency << res;
+	}
+
+	/* Determine minimum HS_DIV by rounding up */
+	vco = FVCO_MIN + ls_freq - 1;
+	do_div(vco, ls_freq);
+	settings->hs_div = vco;
+
+	/* round up to even number when required */
+	if ((settings->hs_div & 1) &&
+	    (settings->hs_div > HS_DIV_MAX_ODD || settings->ls_div_bits))
+		++settings->hs_div;
+
+	/* Calculate VCO frequency (in 10..12GHz range) */
+	vco = (u64)ls_freq * settings->hs_div;
+
+	/* Calculate the integer part of the feedback divider */
+	tmp = do_div(vco, FXO);
+	settings->fb_div_int = vco;
+
+	/* And the fractional bits using the remainder */
+	vco = (u64)tmp << 32;
+	do_div(vco, FXO);
+	settings->fb_div_frac = vco;
+
+	return 0;
+}
+
+/* Calculate resulting frequency given the register settings */
+static unsigned long si544_calc_rate(struct clk_si544_muldiv *settings)
+{
+	u32 d = settings->hs_div * BIT(settings->ls_div_bits);
+	u64 vco;
+
+	/* Calculate VCO from the fractional part */
+	vco = (u64)settings->fb_div_frac * FXO;
+	vco += (FXO / 2);
+	vco >>= 32;
+
+	/* Add the integer part of the VCO frequency */
+	vco += (u64)settings->fb_div_int * FXO;
+
+	/* Apply divider to obtain the generated frequency */
+	do_div(vco, d);
+
+	return vco;
+}
+
+static unsigned long si544_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_si544 *data = to_clk_si544(hw);
+	struct clk_si544_muldiv settings;
+	int err;
+
+	err = si544_get_muldiv(data, &settings);
+	if (err)
+		return 0;
+
+	return si544_calc_rate(&settings);
+}
+
+static long si544_round_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long *parent_rate)
+{
+	struct clk_si544 *data = to_clk_si544(hw);
+	struct clk_si544_muldiv settings;
+	int err;
+
+	if (!is_valid_frequency(data, rate))
+		return -EINVAL;
+
+	err = si544_calc_muldiv(&settings, rate);
+	if (err)
+		return err;
+
+	return si544_calc_rate(&settings);
+}
+
+/*
+ * Update output frequency for "big" frequency changes
+ */
+static int si544_set_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long parent_rate)
+{
+	struct clk_si544 *data = to_clk_si544(hw);
+	struct clk_si544_muldiv settings;
+	int err;
+
+	if (!is_valid_frequency(data, rate))
+		return -EINVAL;
+
+	err = si544_calc_muldiv(&settings, rate);
+	if (err)
+		return err;
+
+	si544_enable_output(data, false);
+
+	/* Allow FCAL for this frequency update */
+	err = regmap_write(data->regmap, SI544_REG_FCAL_OVR, 0);
+	if (err < 0)
+		return err;
+
+
+	err = si544_set_muldiv(data, &settings);
+	if (err < 0)
+		return err; /* Undefined state now, best to leave disabled */
+
+	/* Trigger calibration */
+	err = regmap_write(data->regmap, SI544_REG_CONTROL,
+			   SI544_CONTROL_MS_ICAL2);
+	if (err < 0)
+		return err;
+
+	/* Applying a new frequency can take up to 10ms */
+	usleep_range(10000, 12000);
+
+	si544_enable_output(data, true);
+
+	return err;
+}
+
+static const struct clk_ops si544_clk_ops = {
+	.recalc_rate = si544_recalc_rate,
+	.round_rate = si544_round_rate,
+	.set_rate = si544_set_rate,
+};
+
+static bool si544_regmap_is_volatile(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case SI544_REG_CONTROL:
+	case SI544_REG_FCAL_OVR:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config si544_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_RBTREE,
+	.max_register = SI544_REG_PAGE_SELECT,
+	.volatile_reg = si544_regmap_is_volatile,
+};
+
+static int si544_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	struct clk_si544 *data;
+	struct clk_init_data init;
+	int err;
+
+	data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	init.ops = &si544_clk_ops;
+	init.flags = 0;
+	init.num_parents = 0;
+	data->hw.init = &init;
+	data->i2c_client = client;
+	data->speed_grade = id->driver_data;
+
+	if (of_property_read_string(client->dev.of_node, "clock-output-names",
+			&init.name))
+		init.name = client->dev.of_node->name;
+
+	data->regmap = devm_regmap_init_i2c(client, &si544_regmap_config);
+	if (IS_ERR(data->regmap))
+		return PTR_ERR(data->regmap);
+
+	i2c_set_clientdata(client, data);
+
+	/* Select page 0, just to be sure, there appear to be no more */
+	err = regmap_write(data->regmap, SI544_REG_PAGE_SELECT, 0);
+	if (err < 0)
+		return err;
+
+	err = devm_clk_hw_register(&client->dev, &data->hw);
+	if (err) {
+		dev_err(&client->dev, "clock registration failed\n");
+		return err;
+	}
+	err = devm_of_clk_add_hw_provider(&client->dev, of_clk_hw_simple_get,
+					  &data->hw);
+	if (err) {
+		dev_err(&client->dev, "unable to add clk provider\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id si544_id[] = {
+	{ "si544a", si544a },
+	{ "si544b", si544b },
+	{ "si544c", si544c },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, si544_id);
+
+static const struct of_device_id clk_si544_of_match[] = {
+	{ .compatible = "silabs,si544a" },
+	{ .compatible = "silabs,si544b" },
+	{ .compatible = "silabs,si544c" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, clk_si544_of_match);
+
+static struct i2c_driver si544_driver = {
+	.driver = {
+		.name = "si544",
+		.of_match_table = clk_si544_of_match,
+	},
+	.probe		= si544_probe,
+	.id_table	= si544_id,
+};
+module_i2c_driver(si544_driver);
+
+MODULE_AUTHOR("Mike Looijmans <mike.looijmans@topic.nl>");
+MODULE_DESCRIPTION("Si544 driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c
index da44f8dc1d29..294850bdc195 100644
--- a/drivers/clk/clk-stm32f4.c
+++ b/drivers/clk/clk-stm32f4.c
@@ -282,6 +282,7 @@ static const struct stm32f4_gate_data stm32f746_gates[] __initconst = {
 
 	{ STM32F4_RCC_APB2ENR,  0,	"tim1",		"apb2_mul" },
 	{ STM32F4_RCC_APB2ENR,  1,	"tim8",		"apb2_mul" },
+	{ STM32F4_RCC_APB2ENR,  7,	"sdmmc2",	"sdmux"    },
 	{ STM32F4_RCC_APB2ENR,  8,	"adc1",		"apb2_div" },
 	{ STM32F4_RCC_APB2ENR,  9,	"adc2",		"apb2_div" },
 	{ STM32F4_RCC_APB2ENR, 10,	"adc3",		"apb2_div" },
@@ -315,7 +316,7 @@ static const u64 stm32f46xx_gate_map[MAX_GATE_MAP] = { 0x000000f17ef417ffull,
 
 static const u64 stm32f746_gate_map[MAX_GATE_MAP] = { 0x000000f17ef417ffull,
 						      0x0000000000000003ull,
-						      0x04f77f033e01c9ffull };
+						      0x04f77f833e01c9ffull };
 
 static const u64 *stm32f4_gate_map;
 
@@ -521,7 +522,7 @@ static const struct stm32f4_pll_data stm32f429_pll[MAX_PLL_DIV] = {
 };
 
 static const struct stm32f4_pll_data stm32f469_pll[MAX_PLL_DIV] = {
-	{ PLL,	   50, { "pll",	     "pll-q",    NULL	    } },
+	{ PLL,	   50, { "pll",	     "pll-q",    "pll-r"    } },
 	{ PLL_I2S, 50, { "plli2s-p", "plli2s-q", "plli2s-r" } },
 	{ PLL_SAI, 50, { "pllsai-p", "pllsai-q", "pllsai-r" } },
 };
@@ -1047,6 +1048,8 @@ static const char *rtc_parents[4] = {
 	"no-clock", "lse", "lsi", "hse-rtc"
 };
 
+static const char *dsi_parent[2] = { NULL, "pll-r" };
+
 static const char *lcd_parent[1] = { "pllsai-r-div" };
 
 static const char *i2s_parents[2] = { "plli2s-r", NULL };
@@ -1156,6 +1159,12 @@ static const struct stm32_aux_clk stm32f469_aux_clk[] = {
 		NO_GATE, 0,
 		0
 	},
+	{
+		CLK_F469_DSI, "dsi", dsi_parent, ARRAY_SIZE(dsi_parent),
+		STM32F4_RCC_DCKCFGR, 29, 1,
+		STM32F4_RCC_APB2ENR, 27,
+		CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT
+	},
 };
 
 static const struct stm32_aux_clk stm32f746_aux_clk[] = {
@@ -1450,6 +1459,7 @@ static void __init stm32f4_rcc_init(struct device_node *np)
 	stm32f4_gate_map = data->gates_map;
 
 	hse_clk = of_clk_get_parent_name(np, 0);
+	dsi_parent[0] = hse_clk;
 
 	i2s_in_clk = of_clk_get_parent_name(np, 1);
 
diff --git a/drivers/clk/clk-stm32mp1.c b/drivers/clk/clk-stm32mp1.c
new file mode 100644
index 000000000000..f1d5967b4b39
--- /dev/null
+++ b/drivers/clk/clk-stm32mp1.c
@@ -0,0 +1,2117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: Olivier Bideau <olivier.bideau@st.com> for STMicroelectronics.
+ * Author: Gabriel Fernandez <gabriel.fernandez@st.com> for STMicroelectronics.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include <dt-bindings/clock/stm32mp1-clks.h>
+
+static DEFINE_SPINLOCK(rlock);
+
+#define RCC_OCENSETR		0x0C
+#define RCC_HSICFGR		0x18
+#define RCC_RDLSICR		0x144
+#define RCC_PLL1CR		0x80
+#define RCC_PLL1CFGR1		0x84
+#define RCC_PLL1CFGR2		0x88
+#define RCC_PLL2CR		0x94
+#define RCC_PLL2CFGR1		0x98
+#define RCC_PLL2CFGR2		0x9C
+#define RCC_PLL3CR		0x880
+#define RCC_PLL3CFGR1		0x884
+#define RCC_PLL3CFGR2		0x888
+#define RCC_PLL4CR		0x894
+#define RCC_PLL4CFGR1		0x898
+#define RCC_PLL4CFGR2		0x89C
+#define RCC_APB1ENSETR		0xA00
+#define RCC_APB2ENSETR		0xA08
+#define RCC_APB3ENSETR		0xA10
+#define RCC_APB4ENSETR		0x200
+#define RCC_APB5ENSETR		0x208
+#define RCC_AHB2ENSETR		0xA18
+#define RCC_AHB3ENSETR		0xA20
+#define RCC_AHB4ENSETR		0xA28
+#define RCC_AHB5ENSETR		0x210
+#define RCC_AHB6ENSETR		0x218
+#define RCC_AHB6LPENSETR	0x318
+#define RCC_RCK12SELR		0x28
+#define RCC_RCK3SELR		0x820
+#define RCC_RCK4SELR		0x824
+#define RCC_MPCKSELR		0x20
+#define RCC_ASSCKSELR		0x24
+#define RCC_MSSCKSELR		0x48
+#define RCC_SPI6CKSELR		0xC4
+#define RCC_SDMMC12CKSELR	0x8F4
+#define RCC_SDMMC3CKSELR	0x8F8
+#define RCC_FMCCKSELR		0x904
+#define RCC_I2C46CKSELR		0xC0
+#define RCC_I2C12CKSELR		0x8C0
+#define RCC_I2C35CKSELR		0x8C4
+#define RCC_UART1CKSELR		0xC8
+#define RCC_QSPICKSELR		0x900
+#define RCC_ETHCKSELR		0x8FC
+#define RCC_RNG1CKSELR		0xCC
+#define RCC_RNG2CKSELR		0x920
+#define RCC_GPUCKSELR		0x938
+#define RCC_USBCKSELR		0x91C
+#define RCC_STGENCKSELR		0xD4
+#define RCC_SPDIFCKSELR		0x914
+#define RCC_SPI2S1CKSELR	0x8D8
+#define RCC_SPI2S23CKSELR	0x8DC
+#define RCC_SPI2S45CKSELR	0x8E0
+#define RCC_CECCKSELR		0x918
+#define RCC_LPTIM1CKSELR	0x934
+#define RCC_LPTIM23CKSELR	0x930
+#define RCC_LPTIM45CKSELR	0x92C
+#define RCC_UART24CKSELR	0x8E8
+#define RCC_UART35CKSELR	0x8EC
+#define RCC_UART6CKSELR		0x8E4
+#define RCC_UART78CKSELR	0x8F0
+#define RCC_FDCANCKSELR		0x90C
+#define RCC_SAI1CKSELR		0x8C8
+#define RCC_SAI2CKSELR		0x8CC
+#define RCC_SAI3CKSELR		0x8D0
+#define RCC_SAI4CKSELR		0x8D4
+#define RCC_ADCCKSELR		0x928
+#define RCC_MPCKDIVR		0x2C
+#define RCC_DSICKSELR		0x924
+#define RCC_CPERCKSELR		0xD0
+#define RCC_MCO1CFGR		0x800
+#define RCC_MCO2CFGR		0x804
+#define RCC_BDCR		0x140
+#define RCC_AXIDIVR		0x30
+#define RCC_MCUDIVR		0x830
+#define RCC_APB1DIVR		0x834
+#define RCC_APB2DIVR		0x838
+#define RCC_APB3DIVR		0x83C
+#define RCC_APB4DIVR		0x3C
+#define RCC_APB5DIVR		0x40
+#define RCC_TIMG1PRER		0x828
+#define RCC_TIMG2PRER		0x82C
+#define RCC_RTCDIVR		0x44
+#define RCC_DBGCFGR		0x80C
+
+#define RCC_CLR	0x4
+
+static const char * const ref12_parents[] = {
+	"ck_hsi", "ck_hse"
+};
+
+static const char * const ref3_parents[] = {
+	"ck_hsi", "ck_hse", "ck_csi"
+};
+
+static const char * const ref4_parents[] = {
+	"ck_hsi", "ck_hse", "ck_csi"
+};
+
+static const char * const cpu_src[] = {
+	"ck_hsi", "ck_hse", "pll1_p"
+};
+
+static const char * const axi_src[] = {
+	"ck_hsi", "ck_hse", "pll2_p", "pll3_p"
+};
+
+static const char * const per_src[] = {
+	"ck_hsi", "ck_csi", "ck_hse"
+};
+
+static const char * const mcu_src[] = {
+	"ck_hsi", "ck_hse", "ck_csi", "pll3_p"
+};
+
+static const char * const sdmmc12_src[] = {
+	"ck_axi", "pll3_r", "pll4_p", "ck_hsi"
+};
+
+static const char * const sdmmc3_src[] = {
+	"ck_mcu", "pll3_r", "pll4_p", "ck_hsi"
+};
+
+static const char * const fmc_src[] = {
+	"ck_axi", "pll3_r", "pll4_p", "ck_per"
+};
+
+static const char * const qspi_src[] = {
+	"ck_axi", "pll3_r", "pll4_p", "ck_per"
+};
+
+static const char * const eth_src[] = {
+	"pll4_p", "pll3_q"
+};
+
+static const char * const rng_src[] = {
+	"ck_csi", "pll4_r", "ck_lse", "ck_lsi"
+};
+
+static const char * const usbphy_src[] = {
+	"ck_hse", "pll4_r", "clk-hse-div2"
+};
+
+static const char * const usbo_src[] = {
+	"pll4_r", "ck_usbo_48m"
+};
+
+static const char * const stgen_src[] = {
+	"ck_hsi", "ck_hse"
+};
+
+static const char * const spdif_src[] = {
+	"pll4_p", "pll3_q", "ck_hsi"
+};
+
+static const char * const spi123_src[] = {
+	"pll4_p", "pll3_q", "i2s_ckin", "ck_per", "pll3_r"
+};
+
+static const char * const spi45_src[] = {
+	"pclk2", "pll4_q", "ck_hsi", "ck_csi", "ck_hse"
+};
+
+static const char * const spi6_src[] = {
+	"pclk5", "pll4_q", "ck_hsi", "ck_csi", "ck_hse", "pll3_q"
+};
+
+static const char * const cec_src[] = {
+	"ck_lse", "ck_lsi", "ck_csi"
+};
+
+static const char * const i2c12_src[] = {
+	"pclk1", "pll4_r", "ck_hsi", "ck_csi"
+};
+
+static const char * const i2c35_src[] = {
+	"pclk1", "pll4_r", "ck_hsi", "ck_csi"
+};
+
+static const char * const i2c46_src[] = {
+	"pclk5", "pll3_q", "ck_hsi", "ck_csi"
+};
+
+static const char * const lptim1_src[] = {
+	"pclk1", "pll4_p", "pll3_q", "ck_lse", "ck_lsi", "ck_per"
+};
+
+static const char * const lptim23_src[] = {
+	"pclk3", "pll4_q", "ck_per", "ck_lse", "ck_lsi"
+};
+
+static const char * const lptim45_src[] = {
+	"pclk3", "pll4_p", "pll3_q", "ck_lse", "ck_lsi", "ck_per"
+};
+
+static const char * const usart1_src[] = {
+	"pclk5", "pll3_q", "ck_hsi", "ck_csi", "pll4_q", "ck_hse"
+};
+
+const char * const usart234578_src[] = {
+	"pclk1", "pll4_q", "ck_hsi", "ck_csi", "ck_hse"
+};
+
+static const char * const usart6_src[] = {
+	"pclk2", "pll4_q", "ck_hsi", "ck_csi", "ck_hse"
+};
+
+static const char * const dfsdm_src[] = {
+	"pclk2", "ck_mcu"
+};
+
+static const char * const fdcan_src[] = {
+	"ck_hse", "pll3_q", "pll4_q"
+};
+
+static const char * const sai_src[] = {
+	"pll4_q", "pll3_q", "i2s_ckin", "ck_per"
+};
+
+static const char * const sai2_src[] = {
+	"pll4_q", "pll3_q", "i2s_ckin", "ck_per", "spdif_ck_symb"
+};
+
+static const char * const adc12_src[] = {
+	"pll4_q", "ck_per"
+};
+
+static const char * const dsi_src[] = {
+	"ck_dsi_phy", "pll4_p"
+};
+
+static const char * const rtc_src[] = {
+	"off", "ck_lse", "ck_lsi", "ck_hse_rtc"
+};
+
+static const char * const mco1_src[] = {
+	"ck_hsi", "ck_hse", "ck_csi", "ck_lsi", "ck_lse"
+};
+
+static const char * const mco2_src[] = {
+	"ck_mpu", "ck_axi", "ck_mcu", "pll4_p", "ck_hse", "ck_hsi"
+};
+
+static const char * const ck_trace_src[] = {
+	"ck_axi"
+};
+
+static const struct clk_div_table axi_div_table[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 3 }, { 3, 4 },
+	{ 4, 4 }, { 5, 4 }, { 6, 4 }, { 7, 4 },
+	{ 0 },
+};
+
+static const struct clk_div_table mcu_div_table[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 4 }, { 3, 8 },
+	{ 4, 16 }, { 5, 32 }, { 6, 64 }, { 7, 128 },
+	{ 8, 512 }, { 9, 512 }, { 10, 512}, { 11, 512 },
+	{ 12, 512 }, { 13, 512 }, { 14, 512}, { 15, 512 },
+	{ 0 },
+};
+
+static const struct clk_div_table apb_div_table[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 4 }, { 3, 8 },
+	{ 4, 16 }, { 5, 16 }, { 6, 16 }, { 7, 16 },
+	{ 0 },
+};
+
+static const struct clk_div_table ck_trace_div_table[] = {
+	{ 0, 1 }, { 1, 2 }, { 2, 4 }, { 3, 8 },
+	{ 4, 16 }, { 5, 16 }, { 6, 16 }, { 7, 16 },
+	{ 0 },
+};
+
+#define MAX_MUX_CLK 2
+
+struct stm32_mmux {
+	u8 nbr_clk;
+	struct clk_hw *hws[MAX_MUX_CLK];
+};
+
+struct stm32_clk_mmux {
+	struct clk_mux mux;
+	struct stm32_mmux *mmux;
+};
+
+struct stm32_mgate {
+	u8 nbr_clk;
+	u32 flag;
+};
+
+struct stm32_clk_mgate {
+	struct clk_gate gate;
+	struct stm32_mgate *mgate;
+	u32 mask;
+};
+
+struct clock_config {
+	u32 id;
+	const char *name;
+	union {
+		const char *parent_name;
+		const char * const *parent_names;
+	};
+	int num_parents;
+	unsigned long flags;
+	void *cfg;
+	struct clk_hw * (*func)(struct device *dev,
+				struct clk_hw_onecell_data *clk_data,
+				void __iomem *base, spinlock_t *lock,
+				const struct clock_config *cfg);
+};
+
+#define NO_ID ~0
+
+struct gate_cfg {
+	u32 reg_off;
+	u8 bit_idx;
+	u8 gate_flags;
+};
+
+struct fixed_factor_cfg {
+	unsigned int mult;
+	unsigned int div;
+};
+
+struct div_cfg {
+	u32 reg_off;
+	u8 shift;
+	u8 width;
+	u8 div_flags;
+	const struct clk_div_table *table;
+};
+
+struct mux_cfg {
+	u32 reg_off;
+	u8 shift;
+	u8 width;
+	u8 mux_flags;
+	u32 *table;
+};
+
+struct stm32_gate_cfg {
+	struct gate_cfg		*gate;
+	struct stm32_mgate	*mgate;
+	const struct clk_ops	*ops;
+};
+
+struct stm32_div_cfg {
+	struct div_cfg		*div;
+	const struct clk_ops	*ops;
+};
+
+struct stm32_mux_cfg {
+	struct mux_cfg		*mux;
+	struct stm32_mmux	*mmux;
+	const struct clk_ops	*ops;
+};
+
+/* STM32 Composite clock */
+struct stm32_composite_cfg {
+	const struct stm32_gate_cfg	*gate;
+	const struct stm32_div_cfg	*div;
+	const struct stm32_mux_cfg	*mux;
+};
+
+static struct clk_hw *
+_clk_hw_register_gate(struct device *dev,
+		      struct clk_hw_onecell_data *clk_data,
+		      void __iomem *base, spinlock_t *lock,
+		      const struct clock_config *cfg)
+{
+	struct gate_cfg *gate_cfg = cfg->cfg;
+
+	return clk_hw_register_gate(dev,
+				    cfg->name,
+				    cfg->parent_name,
+				    cfg->flags,
+				    gate_cfg->reg_off + base,
+				    gate_cfg->bit_idx,
+				    gate_cfg->gate_flags,
+				    lock);
+}
+
+static struct clk_hw *
+_clk_hw_register_fixed_factor(struct device *dev,
+			      struct clk_hw_onecell_data *clk_data,
+			      void __iomem *base, spinlock_t *lock,
+			      const struct clock_config *cfg)
+{
+	struct fixed_factor_cfg *ff_cfg = cfg->cfg;
+
+	return clk_hw_register_fixed_factor(dev, cfg->name, cfg->parent_name,
+					    cfg->flags, ff_cfg->mult,
+					    ff_cfg->div);
+}
+
+static struct clk_hw *
+_clk_hw_register_divider_table(struct device *dev,
+			       struct clk_hw_onecell_data *clk_data,
+			       void __iomem *base, spinlock_t *lock,
+			       const struct clock_config *cfg)
+{
+	struct div_cfg *div_cfg = cfg->cfg;
+
+	return clk_hw_register_divider_table(dev,
+					     cfg->name,
+					     cfg->parent_name,
+					     cfg->flags,
+					     div_cfg->reg_off + base,
+					     div_cfg->shift,
+					     div_cfg->width,
+					     div_cfg->div_flags,
+					     div_cfg->table,
+					     lock);
+}
+
+static struct clk_hw *
+_clk_hw_register_mux(struct device *dev,
+		     struct clk_hw_onecell_data *clk_data,
+		     void __iomem *base, spinlock_t *lock,
+		     const struct clock_config *cfg)
+{
+	struct mux_cfg *mux_cfg = cfg->cfg;
+
+	return clk_hw_register_mux(dev, cfg->name, cfg->parent_names,
+				   cfg->num_parents, cfg->flags,
+				   mux_cfg->reg_off + base, mux_cfg->shift,
+				   mux_cfg->width, mux_cfg->mux_flags, lock);
+}
+
+/* MP1 Gate clock with set & clear registers */
+
+static int mp1_gate_clk_enable(struct clk_hw *hw)
+{
+	if (!clk_gate_ops.is_enabled(hw))
+		clk_gate_ops.enable(hw);
+
+	return 0;
+}
+
+static void mp1_gate_clk_disable(struct clk_hw *hw)
+{
+	struct clk_gate *gate = to_clk_gate(hw);
+	unsigned long flags = 0;
+
+	if (clk_gate_ops.is_enabled(hw)) {
+		spin_lock_irqsave(gate->lock, flags);
+		writel_relaxed(BIT(gate->bit_idx), gate->reg + RCC_CLR);
+		spin_unlock_irqrestore(gate->lock, flags);
+	}
+}
+
+const struct clk_ops mp1_gate_clk_ops = {
+	.enable		= mp1_gate_clk_enable,
+	.disable	= mp1_gate_clk_disable,
+	.is_enabled	= clk_gate_is_enabled,
+};
+
+static struct clk_hw *_get_stm32_mux(void __iomem *base,
+				     const struct stm32_mux_cfg *cfg,
+				     spinlock_t *lock)
+{
+	struct stm32_clk_mmux *mmux;
+	struct clk_mux *mux;
+	struct clk_hw *mux_hw;
+
+	if (cfg->mmux) {
+		mmux = kzalloc(sizeof(*mmux), GFP_KERNEL);
+		if (!mmux)
+			return ERR_PTR(-ENOMEM);
+
+		mmux->mux.reg = cfg->mux->reg_off + base;
+		mmux->mux.shift = cfg->mux->shift;
+		mmux->mux.mask = (1 << cfg->mux->width) - 1;
+		mmux->mux.flags = cfg->mux->mux_flags;
+		mmux->mux.table = cfg->mux->table;
+		mmux->mux.lock = lock;
+		mmux->mmux = cfg->mmux;
+		mux_hw = &mmux->mux.hw;
+		cfg->mmux->hws[cfg->mmux->nbr_clk++] = mux_hw;
+
+	} else {
+		mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+		if (!mux)
+			return ERR_PTR(-ENOMEM);
+
+		mux->reg = cfg->mux->reg_off + base;
+		mux->shift = cfg->mux->shift;
+		mux->mask = (1 << cfg->mux->width) - 1;
+		mux->flags = cfg->mux->mux_flags;
+		mux->table = cfg->mux->table;
+		mux->lock = lock;
+		mux_hw = &mux->hw;
+	}
+
+	return mux_hw;
+}
+
+static struct clk_hw *_get_stm32_div(void __iomem *base,
+				     const struct stm32_div_cfg *cfg,
+				     spinlock_t *lock)
+{
+	struct clk_divider *div;
+
+	div = kzalloc(sizeof(*div), GFP_KERNEL);
+
+	if (!div)
+		return ERR_PTR(-ENOMEM);
+
+	div->reg = cfg->div->reg_off + base;
+	div->shift = cfg->div->shift;
+	div->width = cfg->div->width;
+	div->flags = cfg->div->div_flags;
+	div->table = cfg->div->table;
+	div->lock = lock;
+
+	return &div->hw;
+}
+
+static struct clk_hw *
+_get_stm32_gate(void __iomem *base,
+		const struct stm32_gate_cfg *cfg, spinlock_t *lock)
+{
+	struct stm32_clk_mgate *mgate;
+	struct clk_gate *gate;
+	struct clk_hw *gate_hw;
+
+	if (cfg->mgate) {
+		mgate = kzalloc(sizeof(*mgate), GFP_KERNEL);
+		if (!mgate)
+			return ERR_PTR(-ENOMEM);
+
+		mgate->gate.reg = cfg->gate->reg_off + base;
+		mgate->gate.bit_idx = cfg->gate->bit_idx;
+		mgate->gate.flags = cfg->gate->gate_flags;
+		mgate->gate.lock = lock;
+		mgate->mask = BIT(cfg->mgate->nbr_clk++);
+
+		mgate->mgate = cfg->mgate;
+
+		gate_hw = &mgate->gate.hw;
+
+	} else {
+		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+		if (!gate)
+			return ERR_PTR(-ENOMEM);
+
+		gate->reg = cfg->gate->reg_off + base;
+		gate->bit_idx = cfg->gate->bit_idx;
+		gate->flags = cfg->gate->gate_flags;
+		gate->lock = lock;
+
+		gate_hw = &gate->hw;
+	}
+
+	return gate_hw;
+}
+
+static struct clk_hw *
+clk_stm32_register_gate_ops(struct device *dev,
+			    const char *name,
+			    const char *parent_name,
+			    unsigned long flags,
+			    void __iomem *base,
+			    const struct stm32_gate_cfg *cfg,
+			    spinlock_t *lock)
+{
+	struct clk_init_data init = { NULL };
+	struct clk_gate *gate;
+	struct clk_hw *hw;
+	int ret;
+
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = flags;
+
+	init.ops = &clk_gate_ops;
+
+	if (cfg->ops)
+		init.ops = cfg->ops;
+
+	hw = _get_stm32_gate(base, cfg, lock);
+	if (IS_ERR(hw))
+		return ERR_PTR(-ENOMEM);
+
+	hw->init = &init;
+
+	ret = clk_hw_register(dev, hw);
+	if (ret) {
+		kfree(gate);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+
+static struct clk_hw *
+clk_stm32_register_composite(struct device *dev,
+			     const char *name, const char * const *parent_names,
+			     int num_parents, void __iomem *base,
+			     const struct stm32_composite_cfg *cfg,
+			     unsigned long flags, spinlock_t *lock)
+{
+	const struct clk_ops *mux_ops, *div_ops, *gate_ops;
+	struct clk_hw *mux_hw, *div_hw, *gate_hw;
+
+	mux_hw = NULL;
+	div_hw = NULL;
+	gate_hw = NULL;
+	mux_ops = NULL;
+	div_ops = NULL;
+	gate_ops = NULL;
+
+	if (cfg->mux) {
+		mux_hw = _get_stm32_mux(base, cfg->mux, lock);
+
+		if (!IS_ERR(mux_hw)) {
+			mux_ops = &clk_mux_ops;
+
+			if (cfg->mux->ops)
+				mux_ops = cfg->mux->ops;
+		}
+	}
+
+	if (cfg->div) {
+		div_hw = _get_stm32_div(base, cfg->div, lock);
+
+		if (!IS_ERR(div_hw)) {
+			div_ops = &clk_divider_ops;
+
+			if (cfg->div->ops)
+				div_ops = cfg->div->ops;
+		}
+	}
+
+	if (cfg->gate) {
+		gate_hw = _get_stm32_gate(base, cfg->gate, lock);
+
+		if (!IS_ERR(gate_hw)) {
+			gate_ops = &clk_gate_ops;
+
+			if (cfg->gate->ops)
+				gate_ops = cfg->gate->ops;
+		}
+	}
+
+	return clk_hw_register_composite(dev, name, parent_names, num_parents,
+				       mux_hw, mux_ops, div_hw, div_ops,
+				       gate_hw, gate_ops, flags);
+}
+
+#define to_clk_mgate(_gate) container_of(_gate, struct stm32_clk_mgate, gate)
+
+static int mp1_mgate_clk_enable(struct clk_hw *hw)
+{
+	struct clk_gate *gate = to_clk_gate(hw);
+	struct stm32_clk_mgate *clk_mgate = to_clk_mgate(gate);
+
+	clk_mgate->mgate->flag |= clk_mgate->mask;
+
+	mp1_gate_clk_enable(hw);
+
+	return  0;
+}
+
+static void mp1_mgate_clk_disable(struct clk_hw *hw)
+{
+	struct clk_gate *gate = to_clk_gate(hw);
+	struct stm32_clk_mgate *clk_mgate = to_clk_mgate(gate);
+
+	clk_mgate->mgate->flag &= ~clk_mgate->mask;
+
+	if (clk_mgate->mgate->flag == 0)
+		mp1_gate_clk_disable(hw);
+}
+
+const struct clk_ops mp1_mgate_clk_ops = {
+	.enable		= mp1_mgate_clk_enable,
+	.disable	= mp1_mgate_clk_disable,
+	.is_enabled	= clk_gate_is_enabled,
+
+};
+
+#define to_clk_mmux(_mux) container_of(_mux, struct stm32_clk_mmux, mux)
+
+static u8 clk_mmux_get_parent(struct clk_hw *hw)
+{
+	return clk_mux_ops.get_parent(hw);
+}
+
+static int clk_mmux_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct clk_mux *mux = to_clk_mux(hw);
+	struct stm32_clk_mmux *clk_mmux = to_clk_mmux(mux);
+	struct clk_hw *hwp;
+	int ret, n;
+
+	ret = clk_mux_ops.set_parent(hw, index);
+	if (ret)
+		return ret;
+
+	hwp = clk_hw_get_parent(hw);
+
+	for (n = 0; n < clk_mmux->mmux->nbr_clk; n++)
+		if (clk_mmux->mmux->hws[n] != hw)
+			clk_hw_reparent(clk_mmux->mmux->hws[n], hwp);
+
+	return 0;
+}
+
+const struct clk_ops clk_mmux_ops = {
+	.get_parent	= clk_mmux_get_parent,
+	.set_parent	= clk_mmux_set_parent,
+	.determine_rate	= __clk_mux_determine_rate,
+};
+
+/* STM32 PLL */
+struct stm32_pll_obj {
+	/* lock pll enable/disable registers */
+	spinlock_t *lock;
+	void __iomem *reg;
+	struct clk_hw hw;
+};
+
+#define to_pll(_hw) container_of(_hw, struct stm32_pll_obj, hw)
+
+#define PLL_ON		BIT(0)
+#define PLL_RDY		BIT(1)
+#define DIVN_MASK	0x1FF
+#define DIVM_MASK	0x3F
+#define DIVM_SHIFT	16
+#define DIVN_SHIFT	0
+#define FRAC_OFFSET	0xC
+#define FRAC_MASK	0x1FFF
+#define FRAC_SHIFT	3
+#define FRACLE		BIT(16)
+
+static int __pll_is_enabled(struct clk_hw *hw)
+{
+	struct stm32_pll_obj *clk_elem = to_pll(hw);
+
+	return readl_relaxed(clk_elem->reg) & PLL_ON;
+}
+
+#define TIMEOUT 5
+
+static int pll_enable(struct clk_hw *hw)
+{
+	struct stm32_pll_obj *clk_elem = to_pll(hw);
+	u32 reg;
+	unsigned long flags = 0;
+	unsigned int timeout = TIMEOUT;
+	int bit_status = 0;
+
+	spin_lock_irqsave(clk_elem->lock, flags);
+
+	if (__pll_is_enabled(hw))
+		goto unlock;
+
+	reg = readl_relaxed(clk_elem->reg);
+	reg |= PLL_ON;
+	writel_relaxed(reg, clk_elem->reg);
+
+	/* We can't use readl_poll_timeout() because we can be blocked if
+	 * someone enables this clock before clocksource changes.
+	 * Only jiffies counter is available. Jiffies are incremented by
+	 * interruptions and enable op does not allow to be interrupted.
+	 */
+	do {
+		bit_status = !(readl_relaxed(clk_elem->reg) & PLL_RDY);
+
+		if (bit_status)
+			udelay(120);
+
+	} while (bit_status && --timeout);
+
+unlock:
+	spin_unlock_irqrestore(clk_elem->lock, flags);
+
+	return bit_status;
+}
+
+static void pll_disable(struct clk_hw *hw)
+{
+	struct stm32_pll_obj *clk_elem = to_pll(hw);
+	u32 reg;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(clk_elem->lock, flags);
+
+	reg = readl_relaxed(clk_elem->reg);
+	reg &= ~PLL_ON;
+	writel_relaxed(reg, clk_elem->reg);
+
+	spin_unlock_irqrestore(clk_elem->lock, flags);
+}
+
+static u32 pll_frac_val(struct clk_hw *hw)
+{
+	struct stm32_pll_obj *clk_elem = to_pll(hw);
+	u32 reg, frac = 0;
+
+	reg = readl_relaxed(clk_elem->reg + FRAC_OFFSET);
+	if (reg & FRACLE)
+		frac = (reg >> FRAC_SHIFT) & FRAC_MASK;
+
+	return frac;
+}
+
+static unsigned long pll_recalc_rate(struct clk_hw *hw,
+				     unsigned long parent_rate)
+{
+	struct stm32_pll_obj *clk_elem = to_pll(hw);
+	u32 reg;
+	u32 frac, divm, divn;
+	u64 rate, rate_frac = 0;
+
+	reg = readl_relaxed(clk_elem->reg + 4);
+
+	divm = ((reg >> DIVM_SHIFT) & DIVM_MASK) + 1;
+	divn = ((reg >> DIVN_SHIFT) & DIVN_MASK) + 1;
+	rate = (u64)parent_rate * divn;
+
+	do_div(rate, divm);
+
+	frac = pll_frac_val(hw);
+	if (frac) {
+		rate_frac = (u64)parent_rate * (u64)frac;
+		do_div(rate_frac, (divm * 8192));
+	}
+
+	return rate + rate_frac;
+}
+
+static int pll_is_enabled(struct clk_hw *hw)
+{
+	struct stm32_pll_obj *clk_elem = to_pll(hw);
+	unsigned long flags = 0;
+	int ret;
+
+	spin_lock_irqsave(clk_elem->lock, flags);
+	ret = __pll_is_enabled(hw);
+	spin_unlock_irqrestore(clk_elem->lock, flags);
+
+	return ret;
+}
+
+static const struct clk_ops pll_ops = {
+	.enable		= pll_enable,
+	.disable	= pll_disable,
+	.recalc_rate	= pll_recalc_rate,
+	.is_enabled	= pll_is_enabled,
+};
+
+static struct clk_hw *clk_register_pll(struct device *dev, const char *name,
+				       const char *parent_name,
+				       void __iomem *reg,
+				       unsigned long flags,
+				       spinlock_t *lock)
+{
+	struct stm32_pll_obj *element;
+	struct clk_init_data init;
+	struct clk_hw *hw;
+	int err;
+
+	element = kzalloc(sizeof(*element), GFP_KERNEL);
+	if (!element)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &pll_ops;
+	init.flags = flags;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	element->hw.init = &init;
+	element->reg = reg;
+	element->lock = lock;
+
+	hw = &element->hw;
+	err = clk_hw_register(dev, hw);
+
+	if (err) {
+		kfree(element);
+		return ERR_PTR(err);
+	}
+
+	return hw;
+}
+
+/* Kernel Timer */
+struct timer_cker {
+	/* lock the kernel output divider register */
+	spinlock_t *lock;
+	void __iomem *apbdiv;
+	void __iomem *timpre;
+	struct clk_hw hw;
+};
+
+#define to_timer_cker(_hw) container_of(_hw, struct timer_cker, hw)
+
+#define APB_DIV_MASK 0x07
+#define TIM_PRE_MASK 0x01
+
+static unsigned long __bestmult(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	struct timer_cker *tim_ker = to_timer_cker(hw);
+	u32 prescaler;
+	unsigned int mult = 0;
+
+	prescaler = readl_relaxed(tim_ker->apbdiv) & APB_DIV_MASK;
+	if (prescaler < 2)
+		return 1;
+
+	mult = 2;
+
+	if (rate / parent_rate >= 4)
+		mult = 4;
+
+	return mult;
+}
+
+static long timer_ker_round_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long *parent_rate)
+{
+	unsigned long factor = __bestmult(hw, rate, *parent_rate);
+
+	return *parent_rate * factor;
+}
+
+static int timer_ker_set_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long parent_rate)
+{
+	struct timer_cker *tim_ker = to_timer_cker(hw);
+	unsigned long flags = 0;
+	unsigned long factor = __bestmult(hw, rate, parent_rate);
+	int ret = 0;
+
+	spin_lock_irqsave(tim_ker->lock, flags);
+
+	switch (factor) {
+	case 1:
+		break;
+	case 2:
+		writel_relaxed(0, tim_ker->timpre);
+		break;
+	case 4:
+		writel_relaxed(1, tim_ker->timpre);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	spin_unlock_irqrestore(tim_ker->lock, flags);
+
+	return ret;
+}
+
+static unsigned long timer_ker_recalc_rate(struct clk_hw *hw,
+					   unsigned long parent_rate)
+{
+	struct timer_cker *tim_ker = to_timer_cker(hw);
+	u32 prescaler, timpre;
+	u32 mul;
+
+	prescaler = readl_relaxed(tim_ker->apbdiv) & APB_DIV_MASK;
+
+	timpre = readl_relaxed(tim_ker->timpre) & TIM_PRE_MASK;
+
+	if (!prescaler)
+		return parent_rate;
+
+	mul = (timpre + 1) * 2;
+
+	return parent_rate * mul;
+}
+
+static const struct clk_ops timer_ker_ops = {
+	.recalc_rate	= timer_ker_recalc_rate,
+	.round_rate	= timer_ker_round_rate,
+	.set_rate	= timer_ker_set_rate,
+
+};
+
+static struct clk_hw *clk_register_cktim(struct device *dev, const char *name,
+					 const char *parent_name,
+					 unsigned long flags,
+					 void __iomem *apbdiv,
+					 void __iomem *timpre,
+					 spinlock_t *lock)
+{
+	struct timer_cker *tim_ker;
+	struct clk_init_data init;
+	struct clk_hw *hw;
+	int err;
+
+	tim_ker = kzalloc(sizeof(*tim_ker), GFP_KERNEL);
+	if (!tim_ker)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &timer_ker_ops;
+	init.flags = flags;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	tim_ker->hw.init = &init;
+	tim_ker->lock = lock;
+	tim_ker->apbdiv = apbdiv;
+	tim_ker->timpre = timpre;
+
+	hw = &tim_ker->hw;
+	err = clk_hw_register(dev, hw);
+
+	if (err) {
+		kfree(tim_ker);
+		return ERR_PTR(err);
+	}
+
+	return hw;
+}
+
+struct stm32_pll_cfg {
+	u32 offset;
+};
+
+struct clk_hw *_clk_register_pll(struct device *dev,
+				 struct clk_hw_onecell_data *clk_data,
+				 void __iomem *base, spinlock_t *lock,
+				 const struct clock_config *cfg)
+{
+	struct stm32_pll_cfg *stm_pll_cfg = cfg->cfg;
+
+	return clk_register_pll(dev, cfg->name, cfg->parent_name,
+				base + stm_pll_cfg->offset, cfg->flags, lock);
+}
+
+struct stm32_cktim_cfg {
+	u32 offset_apbdiv;
+	u32 offset_timpre;
+};
+
+static struct clk_hw *_clk_register_cktim(struct device *dev,
+					  struct clk_hw_onecell_data *clk_data,
+					  void __iomem *base, spinlock_t *lock,
+					  const struct clock_config *cfg)
+{
+	struct stm32_cktim_cfg *cktim_cfg = cfg->cfg;
+
+	return clk_register_cktim(dev, cfg->name, cfg->parent_name, cfg->flags,
+				  cktim_cfg->offset_apbdiv + base,
+				  cktim_cfg->offset_timpre + base, lock);
+}
+
+static struct clk_hw *
+_clk_stm32_register_gate(struct device *dev,
+			 struct clk_hw_onecell_data *clk_data,
+			 void __iomem *base, spinlock_t *lock,
+			 const struct clock_config *cfg)
+{
+	return clk_stm32_register_gate_ops(dev,
+				    cfg->name,
+				    cfg->parent_name,
+				    cfg->flags,
+				    base,
+				    cfg->cfg,
+				    lock);
+}
+
+static struct clk_hw *
+_clk_stm32_register_composite(struct device *dev,
+			      struct clk_hw_onecell_data *clk_data,
+			      void __iomem *base, spinlock_t *lock,
+			      const struct clock_config *cfg)
+{
+	return clk_stm32_register_composite(dev, cfg->name, cfg->parent_names,
+					    cfg->num_parents, base, cfg->cfg,
+					    cfg->flags, lock);
+}
+
+#define GATE(_id, _name, _parent, _flags, _offset, _bit_idx, _gate_flags)\
+{\
+	.id		= _id,\
+	.name		= _name,\
+	.parent_name	= _parent,\
+	.flags		= _flags,\
+	.cfg		=  &(struct gate_cfg) {\
+		.reg_off	= _offset,\
+		.bit_idx	= _bit_idx,\
+		.gate_flags	= _gate_flags,\
+	},\
+	.func		= _clk_hw_register_gate,\
+}
+
+#define FIXED_FACTOR(_id, _name, _parent, _flags, _mult, _div)\
+{\
+	.id		= _id,\
+	.name		= _name,\
+	.parent_name	= _parent,\
+	.flags		= _flags,\
+	.cfg		=  &(struct fixed_factor_cfg) {\
+		.mult = _mult,\
+		.div = _div,\
+	},\
+	.func		= _clk_hw_register_fixed_factor,\
+}
+
+#define DIV_TABLE(_id, _name, _parent, _flags, _offset, _shift, _width,\
+		  _div_flags, _div_table)\
+{\
+	.id		= _id,\
+	.name		= _name,\
+	.parent_name	= _parent,\
+	.flags		= _flags,\
+	.cfg		=  &(struct div_cfg) {\
+		.reg_off	= _offset,\
+		.shift		= _shift,\
+		.width		= _width,\
+		.div_flags	= _div_flags,\
+		.table		= _div_table,\
+	},\
+	.func		= _clk_hw_register_divider_table,\
+}
+
+#define DIV(_id, _name, _parent, _flags, _offset, _shift, _width, _div_flags)\
+	DIV_TABLE(_id, _name, _parent, _flags, _offset, _shift, _width,\
+		  _div_flags, NULL)
+
+#define MUX(_id, _name, _parents, _flags, _offset, _shift, _width, _mux_flags)\
+{\
+	.id		= _id,\
+	.name		= _name,\
+	.parent_names	= _parents,\
+	.num_parents	= ARRAY_SIZE(_parents),\
+	.flags		= _flags,\
+	.cfg		=  &(struct mux_cfg) {\
+		.reg_off	= _offset,\
+		.shift		= _shift,\
+		.width		= _width,\
+		.mux_flags	= _mux_flags,\
+	},\
+	.func		= _clk_hw_register_mux,\
+}
+
+#define PLL(_id, _name, _parent, _flags, _offset)\
+{\
+	.id		= _id,\
+	.name		= _name,\
+	.parent_name	= _parent,\
+	.flags		= _flags,\
+	.cfg		=  &(struct stm32_pll_cfg) {\
+		.offset = _offset,\
+	},\
+	.func		= _clk_register_pll,\
+}
+
+#define STM32_CKTIM(_name, _parent, _flags, _offset_apbdiv, _offset_timpre)\
+{\
+	.id		= NO_ID,\
+	.name		= _name,\
+	.parent_name	= _parent,\
+	.flags		= _flags,\
+	.cfg		=  &(struct stm32_cktim_cfg) {\
+		.offset_apbdiv = _offset_apbdiv,\
+		.offset_timpre = _offset_timpre,\
+	},\
+	.func		= _clk_register_cktim,\
+}
+
+#define STM32_TIM(_id, _name, _parent, _offset_set, _bit_idx)\
+		  GATE_MP1(_id, _name, _parent, CLK_SET_RATE_PARENT,\
+			   _offset_set, _bit_idx, 0)
+
+/* STM32 GATE */
+#define STM32_GATE(_id, _name, _parent, _flags, _gate)\
+{\
+	.id		= _id,\
+	.name		= _name,\
+	.parent_name	= _parent,\
+	.flags		= _flags,\
+	.cfg		= (struct stm32_gate_cfg *) {_gate},\
+	.func		= _clk_stm32_register_gate,\
+}
+
+#define _STM32_GATE(_gate_offset, _gate_bit_idx, _gate_flags, _mgate, _ops)\
+	(&(struct stm32_gate_cfg) {\
+		&(struct gate_cfg) {\
+			.reg_off	= _gate_offset,\
+			.bit_idx	= _gate_bit_idx,\
+			.gate_flags	= _gate_flags,\
+		},\
+		.mgate		= _mgate,\
+		.ops		= _ops,\
+	})
+
+#define _STM32_MGATE(_mgate)\
+	(&per_gate_cfg[_mgate])
+
+#define _GATE(_gate_offset, _gate_bit_idx, _gate_flags)\
+	_STM32_GATE(_gate_offset, _gate_bit_idx, _gate_flags,\
+		    NULL, NULL)\
+
+#define _GATE_MP1(_gate_offset, _gate_bit_idx, _gate_flags)\
+	_STM32_GATE(_gate_offset, _gate_bit_idx, _gate_flags,\
+		    NULL, &mp1_gate_clk_ops)\
+
+#define _MGATE_MP1(_mgate)\
+	.gate = &per_gate_cfg[_mgate]
+
+#define GATE_MP1(_id, _name, _parent, _flags, _offset, _bit_idx, _gate_flags)\
+	STM32_GATE(_id, _name, _parent, _flags,\
+		   _GATE_MP1(_offset, _bit_idx, _gate_flags))
+
+#define MGATE_MP1(_id, _name, _parent, _flags, _mgate)\
+	STM32_GATE(_id, _name, _parent, _flags,\
+		   _STM32_MGATE(_mgate))
+
+#define _STM32_DIV(_div_offset, _div_shift, _div_width,\
+		   _div_flags, _div_table, _ops)\
+	.div = &(struct stm32_div_cfg) {\
+		&(struct div_cfg) {\
+			.reg_off	= _div_offset,\
+			.shift		= _div_shift,\
+			.width		= _div_width,\
+			.div_flags	= _div_flags,\
+			.table		= _div_table,\
+		},\
+		.ops		= _ops,\
+	}
+
+#define _DIV(_div_offset, _div_shift, _div_width, _div_flags, _div_table)\
+	_STM32_DIV(_div_offset, _div_shift, _div_width,\
+		   _div_flags, _div_table, NULL)\
+
+#define _STM32_MUX(_offset, _shift, _width, _mux_flags, _mmux, _ops)\
+	.mux = &(struct stm32_mux_cfg) {\
+		&(struct mux_cfg) {\
+			.reg_off	= _offset,\
+			.shift		= _shift,\
+			.width		= _width,\
+			.mux_flags	= _mux_flags,\
+			.table		= NULL,\
+		},\
+		.mmux		= _mmux,\
+		.ops		= _ops,\
+	}
+
+#define _MUX(_offset, _shift, _width, _mux_flags)\
+	_STM32_MUX(_offset, _shift, _width, _mux_flags, NULL, NULL)\
+
+#define _MMUX(_mmux) .mux = &ker_mux_cfg[_mmux]
+
+#define PARENT(_parent) ((const char *[]) { _parent})
+
+#define _NO_MUX .mux = NULL
+#define _NO_DIV .div = NULL
+#define _NO_GATE .gate = NULL
+
+#define COMPOSITE(_id, _name, _parents, _flags, _gate, _mux, _div)\
+{\
+	.id		= _id,\
+	.name		= _name,\
+	.parent_names	= _parents,\
+	.num_parents	= ARRAY_SIZE(_parents),\
+	.flags		= _flags,\
+	.cfg		= &(struct stm32_composite_cfg) {\
+		_gate,\
+		_mux,\
+		_div,\
+	},\
+	.func		= _clk_stm32_register_composite,\
+}
+
+#define PCLK(_id, _name, _parent, _flags, _mgate)\
+	MGATE_MP1(_id, _name, _parent, _flags, _mgate)
+
+#define KCLK(_id, _name, _parents, _flags, _mgate, _mmux)\
+	     COMPOSITE(_id, _name, _parents, CLK_OPS_PARENT_ENABLE | _flags,\
+		  _MGATE_MP1(_mgate),\
+		  _MMUX(_mmux),\
+		  _NO_DIV)
+
+enum {
+	G_SAI1,
+	G_SAI2,
+	G_SAI3,
+	G_SAI4,
+	G_SPI1,
+	G_SPI2,
+	G_SPI3,
+	G_SPI4,
+	G_SPI5,
+	G_SPI6,
+	G_SPDIF,
+	G_I2C1,
+	G_I2C2,
+	G_I2C3,
+	G_I2C4,
+	G_I2C5,
+	G_I2C6,
+	G_USART2,
+	G_UART4,
+	G_USART3,
+	G_UART5,
+	G_USART1,
+	G_USART6,
+	G_UART7,
+	G_UART8,
+	G_LPTIM1,
+	G_LPTIM2,
+	G_LPTIM3,
+	G_LPTIM4,
+	G_LPTIM5,
+	G_LTDC,
+	G_DSI,
+	G_QSPI,
+	G_FMC,
+	G_SDMMC1,
+	G_SDMMC2,
+	G_SDMMC3,
+	G_USBO,
+	G_USBPHY,
+	G_RNG1,
+	G_RNG2,
+	G_FDCAN,
+	G_DAC12,
+	G_CEC,
+	G_ADC12,
+	G_GPU,
+	G_STGEN,
+	G_DFSDM,
+	G_ADFSDM,
+	G_TIM2,
+	G_TIM3,
+	G_TIM4,
+	G_TIM5,
+	G_TIM6,
+	G_TIM7,
+	G_TIM12,
+	G_TIM13,
+	G_TIM14,
+	G_MDIO,
+	G_TIM1,
+	G_TIM8,
+	G_TIM15,
+	G_TIM16,
+	G_TIM17,
+	G_SYSCFG,
+	G_VREF,
+	G_TMPSENS,
+	G_PMBCTRL,
+	G_HDP,
+	G_IWDG2,
+	G_STGENRO,
+	G_DMA1,
+	G_DMA2,
+	G_DMAMUX,
+	G_DCMI,
+	G_CRYP2,
+	G_HASH2,
+	G_CRC2,
+	G_HSEM,
+	G_IPCC,
+	G_GPIOA,
+	G_GPIOB,
+	G_GPIOC,
+	G_GPIOD,
+	G_GPIOE,
+	G_GPIOF,
+	G_GPIOG,
+	G_GPIOH,
+	G_GPIOI,
+	G_GPIOJ,
+	G_GPIOK,
+	G_MDMA,
+	G_ETHCK,
+	G_ETHTX,
+	G_ETHRX,
+	G_ETHMAC,
+	G_CRC1,
+	G_USBH,
+	G_ETHSTP,
+	G_RTCAPB,
+	G_TZC,
+	G_TZPC,
+	G_IWDG1,
+	G_BSEC,
+	G_GPIOZ,
+	G_CRYP1,
+	G_HASH1,
+	G_BKPSRAM,
+
+	G_LAST
+};
+
+struct stm32_mgate mp1_mgate[G_LAST];
+
+#define _K_GATE(_id, _gate_offset, _gate_bit_idx, _gate_flags,\
+	       _mgate, _ops)\
+	[_id] = {\
+		&(struct gate_cfg) {\
+			.reg_off	= _gate_offset,\
+			.bit_idx	= _gate_bit_idx,\
+			.gate_flags	= _gate_flags,\
+		},\
+		.mgate		= _mgate,\
+		.ops		= _ops,\
+	}
+
+#define K_GATE(_id, _gate_offset, _gate_bit_idx, _gate_flags)\
+	_K_GATE(_id, _gate_offset, _gate_bit_idx, _gate_flags,\
+	       NULL, &mp1_gate_clk_ops)
+
+#define K_MGATE(_id, _gate_offset, _gate_bit_idx, _gate_flags)\
+	_K_GATE(_id, _gate_offset, _gate_bit_idx, _gate_flags,\
+	       &mp1_mgate[_id], &mp1_mgate_clk_ops)
+
+/* Peripheral gates */
+struct stm32_gate_cfg per_gate_cfg[G_LAST] = {
+	/* Multi gates */
+	K_GATE(G_MDIO,		RCC_APB1ENSETR, 31, 0),
+	K_MGATE(G_DAC12,	RCC_APB1ENSETR, 29, 0),
+	K_MGATE(G_CEC,		RCC_APB1ENSETR, 27, 0),
+	K_MGATE(G_SPDIF,	RCC_APB1ENSETR, 26, 0),
+	K_MGATE(G_I2C5,		RCC_APB1ENSETR, 24, 0),
+	K_MGATE(G_I2C3,		RCC_APB1ENSETR, 23, 0),
+	K_MGATE(G_I2C2,		RCC_APB1ENSETR, 22, 0),
+	K_MGATE(G_I2C1,		RCC_APB1ENSETR, 21, 0),
+	K_MGATE(G_UART8,	RCC_APB1ENSETR, 19, 0),
+	K_MGATE(G_UART7,	RCC_APB1ENSETR, 18, 0),
+	K_MGATE(G_UART5,	RCC_APB1ENSETR, 17, 0),
+	K_MGATE(G_UART4,	RCC_APB1ENSETR, 16, 0),
+	K_MGATE(G_USART3,	RCC_APB1ENSETR, 15, 0),
+	K_MGATE(G_USART2,	RCC_APB1ENSETR, 14, 0),
+	K_MGATE(G_SPI3,		RCC_APB1ENSETR, 12, 0),
+	K_MGATE(G_SPI2,		RCC_APB1ENSETR, 11, 0),
+	K_MGATE(G_LPTIM1,	RCC_APB1ENSETR, 9, 0),
+	K_GATE(G_TIM14,		RCC_APB1ENSETR, 8, 0),
+	K_GATE(G_TIM13,		RCC_APB1ENSETR, 7, 0),
+	K_GATE(G_TIM12,		RCC_APB1ENSETR, 6, 0),
+	K_GATE(G_TIM7,		RCC_APB1ENSETR, 5, 0),
+	K_GATE(G_TIM6,		RCC_APB1ENSETR, 4, 0),
+	K_GATE(G_TIM5,		RCC_APB1ENSETR, 3, 0),
+	K_GATE(G_TIM4,		RCC_APB1ENSETR, 2, 0),
+	K_GATE(G_TIM3,		RCC_APB1ENSETR, 1, 0),
+	K_GATE(G_TIM2,		RCC_APB1ENSETR, 0, 0),
+
+	K_MGATE(G_FDCAN,	RCC_APB2ENSETR, 24, 0),
+	K_GATE(G_ADFSDM,	RCC_APB2ENSETR, 21, 0),
+	K_GATE(G_DFSDM,		RCC_APB2ENSETR, 20, 0),
+	K_MGATE(G_SAI3,		RCC_APB2ENSETR, 18, 0),
+	K_MGATE(G_SAI2,		RCC_APB2ENSETR, 17, 0),
+	K_MGATE(G_SAI1,		RCC_APB2ENSETR, 16, 0),
+	K_MGATE(G_USART6,	RCC_APB2ENSETR, 13, 0),
+	K_MGATE(G_SPI5,		RCC_APB2ENSETR, 10, 0),
+	K_MGATE(G_SPI4,		RCC_APB2ENSETR, 9, 0),
+	K_MGATE(G_SPI1,		RCC_APB2ENSETR, 8, 0),
+	K_GATE(G_TIM17,		RCC_APB2ENSETR, 4, 0),
+	K_GATE(G_TIM16,		RCC_APB2ENSETR, 3, 0),
+	K_GATE(G_TIM15,		RCC_APB2ENSETR, 2, 0),
+	K_GATE(G_TIM8,		RCC_APB2ENSETR, 1, 0),
+	K_GATE(G_TIM1,		RCC_APB2ENSETR, 0, 0),
+
+	K_GATE(G_HDP,		RCC_APB3ENSETR, 20, 0),
+	K_GATE(G_PMBCTRL,	RCC_APB3ENSETR, 17, 0),
+	K_GATE(G_TMPSENS,	RCC_APB3ENSETR, 16, 0),
+	K_GATE(G_VREF,		RCC_APB3ENSETR, 13, 0),
+	K_GATE(G_SYSCFG,	RCC_APB3ENSETR, 11, 0),
+	K_MGATE(G_SAI4,		RCC_APB3ENSETR, 8, 0),
+	K_MGATE(G_LPTIM5,	RCC_APB3ENSETR, 3, 0),
+	K_MGATE(G_LPTIM4,	RCC_APB3ENSETR, 2, 0),
+	K_MGATE(G_LPTIM3,	RCC_APB3ENSETR, 1, 0),
+	K_MGATE(G_LPTIM2,	RCC_APB3ENSETR, 0, 0),
+
+	K_GATE(G_STGENRO,	RCC_APB4ENSETR, 20, 0),
+	K_MGATE(G_USBPHY,	RCC_APB4ENSETR, 16, 0),
+	K_GATE(G_IWDG2,		RCC_APB4ENSETR, 15, 0),
+	K_MGATE(G_DSI,		RCC_APB4ENSETR, 4, 0),
+	K_MGATE(G_LTDC,		RCC_APB4ENSETR, 0, 0),
+
+	K_GATE(G_STGEN,		RCC_APB5ENSETR, 20, 0),
+	K_GATE(G_BSEC,		RCC_APB5ENSETR, 16, 0),
+	K_GATE(G_IWDG1,		RCC_APB5ENSETR, 15, 0),
+	K_GATE(G_TZPC,		RCC_APB5ENSETR, 13, 0),
+	K_GATE(G_TZC,		RCC_APB5ENSETR, 12, 0),
+	K_GATE(G_RTCAPB,	RCC_APB5ENSETR, 8, 0),
+	K_MGATE(G_USART1,	RCC_APB5ENSETR, 4, 0),
+	K_MGATE(G_I2C6,		RCC_APB5ENSETR, 3, 0),
+	K_MGATE(G_I2C4,		RCC_APB5ENSETR, 2, 0),
+	K_MGATE(G_SPI6,		RCC_APB5ENSETR, 0, 0),
+
+	K_MGATE(G_SDMMC3,	RCC_AHB2ENSETR, 16, 0),
+	K_MGATE(G_USBO,		RCC_AHB2ENSETR, 8, 0),
+	K_MGATE(G_ADC12,	RCC_AHB2ENSETR, 5, 0),
+	K_GATE(G_DMAMUX,	RCC_AHB2ENSETR, 2, 0),
+	K_GATE(G_DMA2,		RCC_AHB2ENSETR, 1, 0),
+	K_GATE(G_DMA1,		RCC_AHB2ENSETR, 0, 0),
+
+	K_GATE(G_IPCC,		RCC_AHB3ENSETR, 12, 0),
+	K_GATE(G_HSEM,		RCC_AHB3ENSETR, 11, 0),
+	K_GATE(G_CRC2,		RCC_AHB3ENSETR, 7, 0),
+	K_MGATE(G_RNG2,		RCC_AHB3ENSETR, 6, 0),
+	K_GATE(G_HASH2,		RCC_AHB3ENSETR, 5, 0),
+	K_GATE(G_CRYP2,		RCC_AHB3ENSETR, 4, 0),
+	K_GATE(G_DCMI,		RCC_AHB3ENSETR, 0, 0),
+
+	K_GATE(G_GPIOK,		RCC_AHB4ENSETR, 10, 0),
+	K_GATE(G_GPIOJ,		RCC_AHB4ENSETR, 9, 0),
+	K_GATE(G_GPIOI,		RCC_AHB4ENSETR, 8, 0),
+	K_GATE(G_GPIOH,		RCC_AHB4ENSETR, 7, 0),
+	K_GATE(G_GPIOG,		RCC_AHB4ENSETR, 6, 0),
+	K_GATE(G_GPIOF,		RCC_AHB4ENSETR, 5, 0),
+	K_GATE(G_GPIOE,		RCC_AHB4ENSETR, 4, 0),
+	K_GATE(G_GPIOD,		RCC_AHB4ENSETR, 3, 0),
+	K_GATE(G_GPIOC,		RCC_AHB4ENSETR, 2, 0),
+	K_GATE(G_GPIOB,		RCC_AHB4ENSETR, 1, 0),
+	K_GATE(G_GPIOA,		RCC_AHB4ENSETR, 0, 0),
+
+	K_GATE(G_BKPSRAM,	RCC_AHB5ENSETR, 8, 0),
+	K_MGATE(G_RNG1,		RCC_AHB5ENSETR, 6, 0),
+	K_GATE(G_HASH1,		RCC_AHB5ENSETR, 5, 0),
+	K_GATE(G_CRYP1,		RCC_AHB5ENSETR, 4, 0),
+	K_GATE(G_GPIOZ,		RCC_AHB5ENSETR, 0, 0),
+
+	K_GATE(G_USBH,		RCC_AHB6ENSETR, 24, 0),
+	K_GATE(G_CRC1,		RCC_AHB6ENSETR, 20, 0),
+	K_MGATE(G_SDMMC2,	RCC_AHB6ENSETR, 17, 0),
+	K_MGATE(G_SDMMC1,	RCC_AHB6ENSETR, 16, 0),
+	K_MGATE(G_QSPI,		RCC_AHB6ENSETR, 14, 0),
+	K_MGATE(G_FMC,		RCC_AHB6ENSETR, 12, 0),
+	K_GATE(G_ETHMAC,	RCC_AHB6ENSETR, 10, 0),
+	K_GATE(G_ETHRX,		RCC_AHB6ENSETR, 9, 0),
+	K_GATE(G_ETHTX,		RCC_AHB6ENSETR, 8, 0),
+	K_GATE(G_ETHCK,		RCC_AHB6ENSETR, 7, 0),
+	K_MGATE(G_GPU,		RCC_AHB6ENSETR, 5, 0),
+	K_GATE(G_MDMA,		RCC_AHB6ENSETR, 0, 0),
+	K_GATE(G_ETHSTP,	RCC_AHB6LPENSETR, 11, 0),
+};
+
+enum {
+	M_SDMMC12,
+	M_SDMMC3,
+	M_FMC,
+	M_QSPI,
+	M_RNG1,
+	M_RNG2,
+	M_USBPHY,
+	M_USBO,
+	M_STGEN,
+	M_SPDIF,
+	M_SPI1,
+	M_SPI23,
+	M_SPI45,
+	M_SPI6,
+	M_CEC,
+	M_I2C12,
+	M_I2C35,
+	M_I2C46,
+	M_LPTIM1,
+	M_LPTIM23,
+	M_LPTIM45,
+	M_USART1,
+	M_UART24,
+	M_UART35,
+	M_USART6,
+	M_UART78,
+	M_SAI1,
+	M_SAI2,
+	M_SAI3,
+	M_SAI4,
+	M_DSI,
+	M_FDCAN,
+	M_ADC12,
+	M_ETHCK,
+	M_CKPER,
+	M_LAST
+};
+
+struct stm32_mmux ker_mux[M_LAST];
+
+#define _K_MUX(_id, _offset, _shift, _width, _mux_flags, _mmux, _ops)\
+	[_id] = {\
+		&(struct mux_cfg) {\
+			.reg_off	= _offset,\
+			.shift		= _shift,\
+			.width		= _width,\
+			.mux_flags	= _mux_flags,\
+			.table		= NULL,\
+		},\
+		.mmux		= _mmux,\
+		.ops		= _ops,\
+	}
+
+#define K_MUX(_id, _offset, _shift, _width, _mux_flags)\
+	_K_MUX(_id, _offset, _shift, _width, _mux_flags,\
+			NULL, NULL)
+
+#define K_MMUX(_id, _offset, _shift, _width, _mux_flags)\
+	_K_MUX(_id, _offset, _shift, _width, _mux_flags,\
+			&ker_mux[_id], &clk_mmux_ops)
+
+const struct stm32_mux_cfg ker_mux_cfg[M_LAST] = {
+	/* Kernel multi mux */
+	K_MMUX(M_SDMMC12, RCC_SDMMC12CKSELR, 0, 3, 0),
+	K_MMUX(M_SPI23, RCC_SPI2S23CKSELR, 0, 3, 0),
+	K_MMUX(M_SPI45, RCC_SPI2S45CKSELR, 0, 3, 0),
+	K_MMUX(M_I2C12, RCC_I2C12CKSELR, 0, 3, 0),
+	K_MMUX(M_I2C35, RCC_I2C35CKSELR, 0, 3, 0),
+	K_MMUX(M_LPTIM23, RCC_LPTIM23CKSELR, 0, 3, 0),
+	K_MMUX(M_LPTIM45, RCC_LPTIM45CKSELR, 0, 3, 0),
+	K_MMUX(M_UART24, RCC_UART24CKSELR, 0, 3, 0),
+	K_MMUX(M_UART35, RCC_UART35CKSELR, 0, 3, 0),
+	K_MMUX(M_UART78, RCC_UART78CKSELR, 0, 3, 0),
+	K_MMUX(M_SAI1, RCC_SAI1CKSELR, 0, 3, 0),
+	K_MMUX(M_ETHCK, RCC_ETHCKSELR, 0, 2, 0),
+	K_MMUX(M_I2C46, RCC_I2C46CKSELR, 0, 3, 0),
+
+	/*  Kernel simple mux */
+	K_MUX(M_RNG2, RCC_RNG2CKSELR, 0, 2, 0),
+	K_MUX(M_SDMMC3, RCC_SDMMC3CKSELR, 0, 3, 0),
+	K_MUX(M_FMC, RCC_FMCCKSELR, 0, 2, 0),
+	K_MUX(M_QSPI, RCC_QSPICKSELR, 0, 2, 0),
+	K_MUX(M_USBPHY, RCC_USBCKSELR, 0, 2, 0),
+	K_MUX(M_USBO, RCC_USBCKSELR, 4, 1, 0),
+	K_MUX(M_SPDIF, RCC_SPDIFCKSELR, 0, 2, 0),
+	K_MUX(M_SPI1, RCC_SPI2S1CKSELR, 0, 3, 0),
+	K_MUX(M_CEC, RCC_CECCKSELR, 0, 2, 0),
+	K_MUX(M_LPTIM1, RCC_LPTIM1CKSELR, 0, 3, 0),
+	K_MUX(M_USART6, RCC_UART6CKSELR, 0, 3, 0),
+	K_MUX(M_FDCAN, RCC_FDCANCKSELR, 0, 2, 0),
+	K_MUX(M_SAI2, RCC_SAI2CKSELR, 0, 3, 0),
+	K_MUX(M_SAI3, RCC_SAI3CKSELR, 0, 3, 0),
+	K_MUX(M_SAI4, RCC_SAI4CKSELR, 0, 3, 0),
+	K_MUX(M_ADC12, RCC_ADCCKSELR, 0, 2, 0),
+	K_MUX(M_DSI, RCC_DSICKSELR, 0, 1, 0),
+	K_MUX(M_CKPER, RCC_CPERCKSELR, 0, 2, 0),
+	K_MUX(M_RNG1, RCC_RNG1CKSELR, 0, 2, 0),
+	K_MUX(M_STGEN, RCC_STGENCKSELR, 0, 2, 0),
+	K_MUX(M_USART1, RCC_UART1CKSELR, 0, 3, 0),
+	K_MUX(M_SPI6, RCC_SPI6CKSELR, 0, 3, 0),
+};
+
+static const struct clock_config stm32mp1_clock_cfg[] = {
+	/* Oscillator divider */
+	DIV(NO_ID, "clk-hsi-div", "clk-hsi", 0, RCC_HSICFGR, 0, 2,
+	    CLK_DIVIDER_READ_ONLY),
+
+	/*  External / Internal Oscillators */
+	GATE_MP1(CK_HSE, "ck_hse", "clk-hse", 0, RCC_OCENSETR, 8, 0),
+	GATE_MP1(CK_CSI, "ck_csi", "clk-csi", 0, RCC_OCENSETR, 4, 0),
+	GATE_MP1(CK_HSI, "ck_hsi", "clk-hsi-div", 0, RCC_OCENSETR, 0, 0),
+	GATE(CK_LSI, "ck_lsi", "clk-lsi", 0, RCC_RDLSICR, 0, 0),
+	GATE(CK_LSE, "ck_lse", "clk-lse", 0, RCC_BDCR, 0, 0),
+
+	FIXED_FACTOR(CK_HSE_DIV2, "clk-hse-div2", "ck_hse", 0, 1, 2),
+
+	/* ref clock pll */
+	MUX(NO_ID, "ref1", ref12_parents, CLK_OPS_PARENT_ENABLE, RCC_RCK12SELR,
+	    0, 2, CLK_MUX_READ_ONLY),
+
+	MUX(NO_ID, "ref3", ref3_parents, CLK_OPS_PARENT_ENABLE, RCC_RCK3SELR,
+	    0, 2, CLK_MUX_READ_ONLY),
+
+	MUX(NO_ID, "ref4", ref4_parents, CLK_OPS_PARENT_ENABLE, RCC_RCK4SELR,
+	    0, 2, CLK_MUX_READ_ONLY),
+
+	/* PLLs */
+	PLL(PLL1, "pll1", "ref1", CLK_IGNORE_UNUSED, RCC_PLL1CR),
+	PLL(PLL2, "pll2", "ref1", CLK_IGNORE_UNUSED, RCC_PLL2CR),
+	PLL(PLL3, "pll3", "ref3", CLK_IGNORE_UNUSED, RCC_PLL3CR),
+	PLL(PLL4, "pll4", "ref4", CLK_IGNORE_UNUSED, RCC_PLL4CR),
+
+	/* ODF */
+	COMPOSITE(PLL1_P, "pll1_p", PARENT("pll1"), 0,
+		  _GATE(RCC_PLL1CR, 4, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL1CFGR2, 0, 7, 0, NULL)),
+
+	COMPOSITE(PLL2_P, "pll2_p", PARENT("pll2"), 0,
+		  _GATE(RCC_PLL2CR, 4, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL2CFGR2, 0, 7, 0, NULL)),
+
+	COMPOSITE(PLL2_Q, "pll2_q", PARENT("pll2"), 0,
+		  _GATE(RCC_PLL2CR, 5, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL2CFGR2, 8, 7, 0, NULL)),
+
+	COMPOSITE(PLL2_R, "pll2_r", PARENT("pll2"), CLK_IS_CRITICAL,
+		  _GATE(RCC_PLL2CR, 6, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL2CFGR2, 16, 7, 0, NULL)),
+
+	COMPOSITE(PLL3_P, "pll3_p", PARENT("pll3"), 0,
+		  _GATE(RCC_PLL3CR, 4, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL3CFGR2, 0, 7, 0, NULL)),
+
+	COMPOSITE(PLL3_Q, "pll3_q", PARENT("pll3"), 0,
+		  _GATE(RCC_PLL3CR, 5, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL3CFGR2, 8, 7, 0, NULL)),
+
+	COMPOSITE(PLL3_R, "pll3_r", PARENT("pll3"), 0,
+		  _GATE(RCC_PLL3CR, 6, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL3CFGR2, 16, 7, 0, NULL)),
+
+	COMPOSITE(PLL4_P, "pll4_p", PARENT("pll4"), 0,
+		  _GATE(RCC_PLL4CR, 4, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL4CFGR2, 0, 7, 0, NULL)),
+
+	COMPOSITE(PLL4_Q, "pll4_q", PARENT("pll4"), 0,
+		  _GATE(RCC_PLL4CR, 5, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL4CFGR2, 8, 7, 0, NULL)),
+
+	COMPOSITE(PLL4_R, "pll4_r", PARENT("pll4"), 0,
+		  _GATE(RCC_PLL4CR, 6, 0),
+		  _NO_MUX,
+		  _DIV(RCC_PLL4CFGR2, 16, 7, 0, NULL)),
+
+	/* MUX system clocks */
+	MUX(CK_PER, "ck_per", per_src, CLK_OPS_PARENT_ENABLE,
+	    RCC_CPERCKSELR, 0, 2, 0),
+
+	MUX(CK_MPU, "ck_mpu", cpu_src, CLK_OPS_PARENT_ENABLE |
+	     CLK_IS_CRITICAL, RCC_MPCKSELR, 0, 2, 0),
+
+	COMPOSITE(CK_AXI, "ck_axi", axi_src, CLK_IS_CRITICAL |
+		   CLK_OPS_PARENT_ENABLE,
+		   _NO_GATE,
+		   _MUX(RCC_ASSCKSELR, 0, 2, 0),
+		   _DIV(RCC_AXIDIVR, 0, 3, 0, axi_div_table)),
+
+	COMPOSITE(CK_MCU, "ck_mcu", mcu_src, CLK_IS_CRITICAL |
+		   CLK_OPS_PARENT_ENABLE,
+		   _NO_GATE,
+		   _MUX(RCC_MSSCKSELR, 0, 2, 0),
+		   _DIV(RCC_MCUDIVR, 0, 4, 0, mcu_div_table)),
+
+	DIV_TABLE(NO_ID, "pclk1", "ck_mcu", CLK_IGNORE_UNUSED, RCC_APB1DIVR, 0,
+		  3, CLK_DIVIDER_READ_ONLY, apb_div_table),
+
+	DIV_TABLE(NO_ID, "pclk2", "ck_mcu", CLK_IGNORE_UNUSED, RCC_APB2DIVR, 0,
+		  3, CLK_DIVIDER_READ_ONLY, apb_div_table),
+
+	DIV_TABLE(NO_ID, "pclk3", "ck_mcu", CLK_IGNORE_UNUSED, RCC_APB3DIVR, 0,
+		  3, CLK_DIVIDER_READ_ONLY, apb_div_table),
+
+	DIV_TABLE(NO_ID, "pclk4", "ck_axi", CLK_IGNORE_UNUSED, RCC_APB4DIVR, 0,
+		  3, CLK_DIVIDER_READ_ONLY, apb_div_table),
+
+	DIV_TABLE(NO_ID, "pclk5", "ck_axi", CLK_IGNORE_UNUSED, RCC_APB5DIVR, 0,
+		  3, CLK_DIVIDER_READ_ONLY, apb_div_table),
+
+	/* Kernel Timers */
+	STM32_CKTIM("ck1_tim", "pclk1", 0, RCC_APB1DIVR, RCC_TIMG1PRER),
+	STM32_CKTIM("ck2_tim", "pclk2", 0, RCC_APB2DIVR, RCC_TIMG2PRER),
+
+	STM32_TIM(TIM2_K, "tim2_k", "ck1_tim", RCC_APB1ENSETR, 0),
+	STM32_TIM(TIM3_K, "tim3_k", "ck1_tim", RCC_APB1ENSETR, 1),
+	STM32_TIM(TIM4_K, "tim4_k", "ck1_tim", RCC_APB1ENSETR, 2),
+	STM32_TIM(TIM5_K, "tim5_k", "ck1_tim", RCC_APB1ENSETR, 3),
+	STM32_TIM(TIM6_K, "tim6_k", "ck1_tim", RCC_APB1ENSETR, 4),
+	STM32_TIM(TIM7_K, "tim7_k", "ck1_tim", RCC_APB1ENSETR, 5),
+	STM32_TIM(TIM12_K, "tim12_k", "ck1_tim", RCC_APB1ENSETR, 6),
+	STM32_TIM(TIM13_K, "tim13_k", "ck1_tim", RCC_APB1ENSETR, 7),
+	STM32_TIM(TIM14_K, "tim14_k", "ck1_tim", RCC_APB1ENSETR, 8),
+	STM32_TIM(TIM1_K, "tim1_k", "ck2_tim", RCC_APB2ENSETR, 0),
+	STM32_TIM(TIM8_K, "tim8_k", "ck2_tim", RCC_APB2ENSETR, 1),
+	STM32_TIM(TIM15_K, "tim15_k", "ck2_tim", RCC_APB2ENSETR, 2),
+	STM32_TIM(TIM16_K, "tim16_k", "ck2_tim", RCC_APB2ENSETR, 3),
+	STM32_TIM(TIM17_K, "tim17_k", "ck2_tim", RCC_APB2ENSETR, 4),
+
+	/* Peripheral clocks */
+	PCLK(TIM2, "tim2", "pclk1", CLK_IGNORE_UNUSED, G_TIM2),
+	PCLK(TIM3, "tim3", "pclk1", CLK_IGNORE_UNUSED, G_TIM3),
+	PCLK(TIM4, "tim4", "pclk1", CLK_IGNORE_UNUSED, G_TIM4),
+	PCLK(TIM5, "tim5", "pclk1", CLK_IGNORE_UNUSED, G_TIM5),
+	PCLK(TIM6, "tim6", "pclk1", CLK_IGNORE_UNUSED, G_TIM6),
+	PCLK(TIM7, "tim7", "pclk1", CLK_IGNORE_UNUSED, G_TIM7),
+	PCLK(TIM12, "tim12", "pclk1", CLK_IGNORE_UNUSED, G_TIM12),
+	PCLK(TIM13, "tim13", "pclk1", CLK_IGNORE_UNUSED, G_TIM13),
+	PCLK(TIM14, "tim14", "pclk1", CLK_IGNORE_UNUSED, G_TIM14),
+	PCLK(LPTIM1, "lptim1", "pclk1", 0, G_LPTIM1),
+	PCLK(SPI2, "spi2", "pclk1", 0, G_SPI2),
+	PCLK(SPI3, "spi3", "pclk1", 0, G_SPI3),
+	PCLK(USART2, "usart2", "pclk1", 0, G_USART2),
+	PCLK(USART3, "usart3", "pclk1", 0, G_USART3),
+	PCLK(UART4, "uart4", "pclk1", 0, G_UART4),
+	PCLK(UART5, "uart5", "pclk1", 0, G_UART5),
+	PCLK(UART7, "uart7", "pclk1", 0, G_UART7),
+	PCLK(UART8, "uart8", "pclk1", 0, G_UART8),
+	PCLK(I2C1, "i2c1", "pclk1", 0, G_I2C1),
+	PCLK(I2C2, "i2c2", "pclk1", 0, G_I2C2),
+	PCLK(I2C3, "i2c3", "pclk1", 0, G_I2C3),
+	PCLK(I2C5, "i2c5", "pclk1", 0, G_I2C5),
+	PCLK(SPDIF, "spdif", "pclk1", 0, G_SPDIF),
+	PCLK(CEC, "cec", "pclk1", 0, G_CEC),
+	PCLK(DAC12, "dac12", "pclk1", 0, G_DAC12),
+	PCLK(MDIO, "mdio", "pclk1", 0, G_MDIO),
+	PCLK(TIM1, "tim1", "pclk2", CLK_IGNORE_UNUSED, G_TIM1),
+	PCLK(TIM8, "tim8", "pclk2", CLK_IGNORE_UNUSED, G_TIM8),
+	PCLK(TIM15, "tim15", "pclk2", CLK_IGNORE_UNUSED, G_TIM15),
+	PCLK(TIM16, "tim16", "pclk2", CLK_IGNORE_UNUSED, G_TIM16),
+	PCLK(TIM17, "tim17", "pclk2", CLK_IGNORE_UNUSED, G_TIM17),
+	PCLK(SPI1, "spi1", "pclk2", 0, G_SPI1),
+	PCLK(SPI4, "spi4", "pclk2", 0, G_SPI4),
+	PCLK(SPI5, "spi5", "pclk2", 0, G_SPI5),
+	PCLK(USART6, "usart6", "pclk2", 0, G_USART6),
+	PCLK(SAI1, "sai1", "pclk2", 0, G_SAI1),
+	PCLK(SAI2, "sai2", "pclk2", 0, G_SAI2),
+	PCLK(SAI3, "sai3", "pclk2", 0, G_SAI3),
+	PCLK(DFSDM, "dfsdm", "pclk2", 0, G_DFSDM),
+	PCLK(FDCAN, "fdcan", "pclk2", 0, G_FDCAN),
+	PCLK(LPTIM2, "lptim2", "pclk3", 0, G_LPTIM2),
+	PCLK(LPTIM3, "lptim3", "pclk3", 0, G_LPTIM3),
+	PCLK(LPTIM4, "lptim4", "pclk3", 0, G_LPTIM4),
+	PCLK(LPTIM5, "lptim5", "pclk3", 0, G_LPTIM5),
+	PCLK(SAI4, "sai4", "pclk3", 0, G_SAI4),
+	PCLK(SYSCFG, "syscfg", "pclk3", 0, G_SYSCFG),
+	PCLK(VREF, "vref", "pclk3", 13, G_VREF),
+	PCLK(TMPSENS, "tmpsens", "pclk3", 0, G_TMPSENS),
+	PCLK(PMBCTRL, "pmbctrl", "pclk3", 0, G_PMBCTRL),
+	PCLK(HDP, "hdp", "pclk3", 0, G_HDP),
+	PCLK(LTDC, "ltdc", "pclk4", 0, G_LTDC),
+	PCLK(DSI, "dsi", "pclk4", 0, G_DSI),
+	PCLK(IWDG2, "iwdg2", "pclk4", 0, G_IWDG2),
+	PCLK(USBPHY, "usbphy", "pclk4", 0, G_USBPHY),
+	PCLK(STGENRO, "stgenro", "pclk4", 0, G_STGENRO),
+	PCLK(SPI6, "spi6", "pclk5", 0, G_SPI6),
+	PCLK(I2C4, "i2c4", "pclk5", 0, G_I2C4),
+	PCLK(I2C6, "i2c6", "pclk5", 0, G_I2C6),
+	PCLK(USART1, "usart1", "pclk5", 0, G_USART1),
+	PCLK(RTCAPB, "rtcapb", "pclk5", CLK_IGNORE_UNUSED |
+	     CLK_IS_CRITICAL, G_RTCAPB),
+	PCLK(TZC, "tzc", "pclk5", CLK_IGNORE_UNUSED, G_TZC),
+	PCLK(TZPC, "tzpc", "pclk5", CLK_IGNORE_UNUSED, G_TZPC),
+	PCLK(IWDG1, "iwdg1", "pclk5", 0, G_IWDG1),
+	PCLK(BSEC, "bsec", "pclk5", CLK_IGNORE_UNUSED, G_BSEC),
+	PCLK(STGEN, "stgen", "pclk5", CLK_IGNORE_UNUSED, G_STGEN),
+	PCLK(DMA1, "dma1", "ck_mcu", 0, G_DMA1),
+	PCLK(DMA2, "dma2", "ck_mcu",  0, G_DMA2),
+	PCLK(DMAMUX, "dmamux", "ck_mcu", 0, G_DMAMUX),
+	PCLK(ADC12, "adc12", "ck_mcu", 0, G_ADC12),
+	PCLK(USBO, "usbo", "ck_mcu", 0, G_USBO),
+	PCLK(SDMMC3, "sdmmc3", "ck_mcu", 0, G_SDMMC3),
+	PCLK(DCMI, "dcmi", "ck_mcu", 0, G_DCMI),
+	PCLK(CRYP2, "cryp2", "ck_mcu", 0, G_CRYP2),
+	PCLK(HASH2, "hash2", "ck_mcu", 0, G_HASH2),
+	PCLK(RNG2, "rng2", "ck_mcu", 0, G_RNG2),
+	PCLK(CRC2, "crc2", "ck_mcu", 0, G_CRC2),
+	PCLK(HSEM, "hsem", "ck_mcu", 0, G_HSEM),
+	PCLK(IPCC, "ipcc", "ck_mcu", 0, G_IPCC),
+	PCLK(GPIOA, "gpioa", "ck_mcu", 0, G_GPIOA),
+	PCLK(GPIOB, "gpiob", "ck_mcu", 0, G_GPIOB),
+	PCLK(GPIOC, "gpioc", "ck_mcu", 0, G_GPIOC),
+	PCLK(GPIOD, "gpiod", "ck_mcu", 0, G_GPIOD),
+	PCLK(GPIOE, "gpioe", "ck_mcu", 0, G_GPIOE),
+	PCLK(GPIOF, "gpiof", "ck_mcu", 0, G_GPIOF),
+	PCLK(GPIOG, "gpiog", "ck_mcu", 0, G_GPIOG),
+	PCLK(GPIOH, "gpioh", "ck_mcu", 0, G_GPIOH),
+	PCLK(GPIOI, "gpioi", "ck_mcu", 0, G_GPIOI),
+	PCLK(GPIOJ, "gpioj", "ck_mcu", 0, G_GPIOJ),
+	PCLK(GPIOK, "gpiok", "ck_mcu", 0, G_GPIOK),
+	PCLK(GPIOZ, "gpioz", "ck_axi", CLK_IGNORE_UNUSED, G_GPIOZ),
+	PCLK(CRYP1, "cryp1", "ck_axi", CLK_IGNORE_UNUSED, G_CRYP1),
+	PCLK(HASH1, "hash1", "ck_axi", CLK_IGNORE_UNUSED, G_HASH1),
+	PCLK(RNG1, "rng1", "ck_axi", 0, G_RNG1),
+	PCLK(BKPSRAM, "bkpsram", "ck_axi", CLK_IGNORE_UNUSED, G_BKPSRAM),
+	PCLK(MDMA, "mdma", "ck_axi", 0, G_MDMA),
+	PCLK(GPU, "gpu", "ck_axi", 0, G_GPU),
+	PCLK(ETHTX, "ethtx", "ck_axi", 0, G_ETHTX),
+	PCLK(ETHRX, "ethrx", "ck_axi", 0, G_ETHRX),
+	PCLK(ETHMAC, "ethmac", "ck_axi", 0, G_ETHMAC),
+	PCLK(FMC, "fmc", "ck_axi", CLK_IGNORE_UNUSED, G_FMC),
+	PCLK(QSPI, "qspi", "ck_axi", CLK_IGNORE_UNUSED, G_QSPI),
+	PCLK(SDMMC1, "sdmmc1", "ck_axi", 0, G_SDMMC1),
+	PCLK(SDMMC2, "sdmmc2", "ck_axi", 0, G_SDMMC2),
+	PCLK(CRC1, "crc1", "ck_axi", 0, G_CRC1),
+	PCLK(USBH, "usbh", "ck_axi", 0, G_USBH),
+	PCLK(ETHSTP, "ethstp", "ck_axi", 0, G_ETHSTP),
+
+	/* Kernel clocks */
+	KCLK(SDMMC1_K, "sdmmc1_k", sdmmc12_src, 0, G_SDMMC1, M_SDMMC12),
+	KCLK(SDMMC2_K, "sdmmc2_k", sdmmc12_src, 0, G_SDMMC2, M_SDMMC12),
+	KCLK(SDMMC3_K, "sdmmc3_k", sdmmc3_src, 0, G_SDMMC3, M_SDMMC3),
+	KCLK(FMC_K, "fmc_k", fmc_src, 0, G_FMC, M_FMC),
+	KCLK(QSPI_K, "qspi_k", qspi_src, 0, G_QSPI, M_QSPI),
+	KCLK(RNG1_K, "rng1_k", rng_src, 0, G_RNG1, M_RNG1),
+	KCLK(RNG2_K, "rng2_k", rng_src, 0, G_RNG2, M_RNG2),
+	KCLK(USBPHY_K, "usbphy_k", usbphy_src, 0, G_USBPHY, M_USBPHY),
+	KCLK(STGEN_K, "stgen_k",  stgen_src, CLK_IGNORE_UNUSED,
+	     G_STGEN, M_STGEN),
+	KCLK(SPDIF_K, "spdif_k", spdif_src, 0, G_SPDIF, M_SPDIF),
+	KCLK(SPI1_K, "spi1_k", spi123_src, 0, G_SPI1, M_SPI1),
+	KCLK(SPI2_K, "spi2_k", spi123_src, 0, G_SPI2, M_SPI23),
+	KCLK(SPI3_K, "spi3_k", spi123_src, 0, G_SPI3, M_SPI23),
+	KCLK(SPI4_K, "spi4_k", spi45_src, 0, G_SPI4, M_SPI45),
+	KCLK(SPI5_K, "spi5_k", spi45_src, 0, G_SPI5, M_SPI45),
+	KCLK(SPI6_K, "spi6_k", spi6_src, 0, G_SPI6, M_SPI6),
+	KCLK(CEC_K, "cec_k", cec_src, 0, G_CEC, M_CEC),
+	KCLK(I2C1_K, "i2c1_k", i2c12_src, 0, G_I2C1, M_I2C12),
+	KCLK(I2C2_K, "i2c2_k", i2c12_src, 0, G_I2C2, M_I2C12),
+	KCLK(I2C3_K, "i2c3_k", i2c35_src, 0, G_I2C3, M_I2C35),
+	KCLK(I2C5_K, "i2c5_k", i2c35_src, 0, G_I2C5, M_I2C35),
+	KCLK(I2C4_K, "i2c4_k", i2c46_src, 0, G_I2C4, M_I2C46),
+	KCLK(I2C6_K, "i2c6_k", i2c46_src, 0, G_I2C6, M_I2C46),
+	KCLK(LPTIM1_K, "lptim1_k", lptim1_src, 0, G_LPTIM1, M_LPTIM1),
+	KCLK(LPTIM2_K, "lptim2_k", lptim23_src, 0, G_LPTIM2, M_LPTIM23),
+	KCLK(LPTIM3_K, "lptim3_k", lptim23_src, 0, G_LPTIM3, M_LPTIM23),
+	KCLK(LPTIM4_K, "lptim4_k", lptim45_src, 0, G_LPTIM4, M_LPTIM45),
+	KCLK(LPTIM5_K, "lptim5_k", lptim45_src, 0, G_LPTIM5, M_LPTIM45),
+	KCLK(USART1_K, "usart1_k", usart1_src, 0, G_USART1, M_USART1),
+	KCLK(USART2_K, "usart2_k", usart234578_src, 0, G_USART2, M_UART24),
+	KCLK(USART3_K, "usart3_k", usart234578_src, 0, G_USART3, M_UART35),
+	KCLK(UART4_K, "uart4_k", usart234578_src, 0, G_UART4, M_UART24),
+	KCLK(UART5_K, "uart5_k", usart234578_src, 0, G_UART5, M_UART35),
+	KCLK(USART6_K, "uart6_k", usart6_src, 0, G_USART6, M_USART6),
+	KCLK(UART7_K, "uart7_k", usart234578_src, 0, G_UART7, M_UART78),
+	KCLK(UART8_K, "uart8_k", usart234578_src, 0, G_UART8, M_UART78),
+	KCLK(FDCAN_K, "fdcan_k", fdcan_src, 0, G_FDCAN, M_FDCAN),
+	KCLK(SAI1_K, "sai1_k", sai_src, 0, G_SAI1, M_SAI1),
+	KCLK(SAI2_K, "sai2_k", sai2_src, 0, G_SAI2, M_SAI2),
+	KCLK(SAI3_K, "sai3_k", sai_src, 0, G_SAI2, M_SAI3),
+	KCLK(SAI4_K, "sai4_k", sai_src, 0, G_SAI2, M_SAI4),
+	KCLK(ADC12_K, "adc12_k", adc12_src, 0, G_ADC12, M_ADC12),
+	KCLK(DSI_K, "dsi_k", dsi_src, 0, G_DSI, M_DSI),
+	KCLK(ADFSDM_K, "adfsdm_k", sai_src, 0, G_ADFSDM, M_SAI1),
+	KCLK(USBO_K, "usbo_k", usbo_src, 0, G_USBO, M_USBO),
+	KCLK(ETHCK_K, "ethck_k", eth_src, 0, G_ETHCK, M_ETHCK),
+
+	/* Particulary Kernel Clocks (no mux or no gate) */
+	MGATE_MP1(DFSDM_K, "dfsdm_k", "ck_mcu", 0, G_DFSDM),
+	MGATE_MP1(DSI_PX, "dsi_px", "pll4_q", CLK_SET_RATE_PARENT, G_DSI),
+	MGATE_MP1(LTDC_PX, "ltdc_px", "pll4_q", CLK_SET_RATE_PARENT, G_LTDC),
+	MGATE_MP1(GPU_K, "gpu_k", "pll2_q", 0, G_GPU),
+	MGATE_MP1(DAC12_K, "dac12_k", "ck_lsi", 0, G_DAC12),
+
+	COMPOSITE(ETHPTP_K, "ethptp_k", eth_src, CLK_OPS_PARENT_ENABLE,
+		  _NO_GATE,
+		  _MMUX(M_ETHCK),
+		  _DIV(RCC_ETHCKSELR, 4, 4, CLK_DIVIDER_ALLOW_ZERO, NULL)),
+
+	/* RTC clock */
+	DIV(NO_ID, "ck_hse_rtc", "ck_hse", 0, RCC_RTCDIVR, 0, 7,
+	    CLK_DIVIDER_ALLOW_ZERO),
+
+	COMPOSITE(RTC, "ck_rtc", rtc_src, CLK_OPS_PARENT_ENABLE |
+		   CLK_SET_RATE_PARENT,
+		  _GATE(RCC_BDCR, 20, 0),
+		  _MUX(RCC_BDCR, 16, 2, 0),
+		  _NO_DIV),
+
+	/* MCO clocks */
+	COMPOSITE(CK_MCO1, "ck_mco1", mco1_src, CLK_OPS_PARENT_ENABLE |
+		  CLK_SET_RATE_NO_REPARENT,
+		  _GATE(RCC_MCO1CFGR, 12, 0),
+		  _MUX(RCC_MCO1CFGR, 0, 3, 0),
+		  _DIV(RCC_MCO1CFGR, 4, 4, 0, NULL)),
+
+	COMPOSITE(CK_MCO2, "ck_mco2", mco2_src, CLK_OPS_PARENT_ENABLE |
+		  CLK_SET_RATE_NO_REPARENT,
+		  _GATE(RCC_MCO2CFGR, 12, 0),
+		  _MUX(RCC_MCO2CFGR, 0, 3, 0),
+		  _DIV(RCC_MCO2CFGR, 4, 4, 0, NULL)),
+
+	/* Debug clocks */
+	FIXED_FACTOR(NO_ID, "ck_axi_div2", "ck_axi", 0, 1, 2),
+
+	GATE(DBG, "ck_apb_dbg", "ck_axi_div2", 0, RCC_DBGCFGR, 8, 0),
+
+	GATE(CK_DBG, "ck_sys_dbg", "ck_axi", 0, RCC_DBGCFGR, 8, 0),
+
+	COMPOSITE(CK_TRACE, "ck_trace", ck_trace_src, CLK_OPS_PARENT_ENABLE,
+		  _GATE(RCC_DBGCFGR, 9, 0),
+		  _NO_MUX,
+		  _DIV(RCC_DBGCFGR, 0, 3, 0, ck_trace_div_table)),
+};
+
+struct stm32_clock_match_data {
+	const struct clock_config *cfg;
+	unsigned int num;
+	unsigned int maxbinding;
+};
+
+static struct stm32_clock_match_data stm32mp1_data = {
+	.cfg		= stm32mp1_clock_cfg,
+	.num		= ARRAY_SIZE(stm32mp1_clock_cfg),
+	.maxbinding	= STM32MP1_LAST_CLK,
+};
+
+static const struct of_device_id stm32mp1_match_data[] = {
+	{
+		.compatible = "st,stm32mp1-rcc",
+		.data = &stm32mp1_data,
+	},
+	{ }
+};
+
+static int stm32_register_hw_clk(struct device *dev,
+				 struct clk_hw_onecell_data *clk_data,
+				 void __iomem *base, spinlock_t *lock,
+				 const struct clock_config *cfg)
+{
+	static struct clk_hw **hws;
+	struct clk_hw *hw = ERR_PTR(-ENOENT);
+
+	hws = clk_data->hws;
+
+	if (cfg->func)
+		hw = (*cfg->func)(dev, clk_data, base, lock, cfg);
+
+	if (IS_ERR(hw)) {
+		pr_err("Unable to register %s\n", cfg->name);
+		return  PTR_ERR(hw);
+	}
+
+	if (cfg->id != NO_ID)
+		hws[cfg->id] = hw;
+
+	return 0;
+}
+
+static int stm32_rcc_init(struct device_node *np,
+			  void __iomem *base,
+			  const struct of_device_id *match_data)
+{
+	struct clk_hw_onecell_data *clk_data;
+	struct clk_hw **hws;
+	const struct of_device_id *match;
+	const struct stm32_clock_match_data *data;
+	int err, n, max_binding;
+
+	match = of_match_node(match_data, np);
+	if (!match) {
+		pr_err("%s: match data not found\n", __func__);
+		return -ENODEV;
+	}
+
+	data = match->data;
+
+	max_binding =  data->maxbinding;
+
+	clk_data = kzalloc(sizeof(*clk_data) +
+				  sizeof(*clk_data->hws) * max_binding,
+				  GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
+
+	clk_data->num = max_binding;
+
+	hws = clk_data->hws;
+
+	for (n = 0; n < max_binding; n++)
+		hws[n] = ERR_PTR(-ENOENT);
+
+	for (n = 0; n < data->num; n++) {
+		err = stm32_register_hw_clk(NULL, clk_data, base, &rlock,
+					    &data->cfg[n]);
+		if (err) {
+			pr_err("%s: can't register  %s\n", __func__,
+			       data->cfg[n].name);
+
+			kfree(clk_data);
+
+			return err;
+		}
+	}
+
+	return of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
+}
+
+static void stm32mp1_rcc_init(struct device_node *np)
+{
+	void __iomem *base;
+
+	base = of_iomap(np, 0);
+	if (!base) {
+		pr_err("%s: unable to map resource", np->name);
+		of_node_put(np);
+		return;
+	}
+
+	if (stm32_rcc_init(np, base, stm32mp1_match_data)) {
+		iounmap(base);
+		of_node_put(np);
+	}
+}
+
+CLK_OF_DECLARE_DRIVER(stm32mp1_rcc, "st,stm32mp1-rcc", stm32mp1_rcc_init);
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 076d4244d672..ea67ac81c6f9 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2375,6 +2375,9 @@ static int clk_core_get_phase(struct clk_core *core)
 	int ret;
 
 	clk_prepare_lock();
+	/* Always try to update cached phase if possible */
+	if (core->ops->get_phase)
+		core->phase = core->ops->get_phase(core->hw);
 	ret = core->phase;
 	clk_prepare_unlock();
 
@@ -2491,19 +2494,7 @@ static int clk_summary_show(struct seq_file *s, void *data)
 
 	return 0;
 }
-
-
-static int clk_summary_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, clk_summary_show, inode->i_private);
-}
-
-static const struct file_operations clk_summary_fops = {
-	.open		= clk_summary_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(clk_summary);
 
 static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level)
 {
@@ -2537,7 +2528,7 @@ static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level)
 	seq_putc(s, '}');
 }
 
-static int clk_dump(struct seq_file *s, void *data)
+static int clk_dump_show(struct seq_file *s, void *data)
 {
 	struct clk_core *c;
 	bool first_node = true;
@@ -2560,19 +2551,7 @@ static int clk_dump(struct seq_file *s, void *data)
 	seq_puts(s, "}\n");
 	return 0;
 }
-
-
-static int clk_dump_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, clk_dump, inode->i_private);
-}
-
-static const struct file_operations clk_dump_fops = {
-	.open		= clk_dump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(clk_dump);
 
 static const struct {
 	unsigned long flag;
@@ -2594,7 +2573,7 @@ static const struct {
 #undef ENTRY
 };
 
-static int clk_flags_dump(struct seq_file *s, void *data)
+static int clk_flags_show(struct seq_file *s, void *data)
 {
 	struct clk_core *core = s->private;
 	unsigned long flags = core->flags;
@@ -2613,20 +2592,9 @@ static int clk_flags_dump(struct seq_file *s, void *data)
 
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(clk_flags);
 
-static int clk_flags_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, clk_flags_dump, inode->i_private);
-}
-
-static const struct file_operations clk_flags_fops = {
-	.open		= clk_flags_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int possible_parents_dump(struct seq_file *s, void *data)
+static int possible_parents_show(struct seq_file *s, void *data)
 {
 	struct clk_core *core = s->private;
 	int i;
@@ -2638,18 +2606,7 @@ static int possible_parents_dump(struct seq_file *s, void *data)
 
 	return 0;
 }
-
-static int possible_parents_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, possible_parents_dump, inode->i_private);
-}
-
-static const struct file_operations possible_parents_fops = {
-	.open		= possible_parents_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(possible_parents);
 
 static int clk_debug_create_one(struct clk_core *core, struct dentry *pdentry)
 {
@@ -2933,6 +2890,17 @@ static int __clk_core_init(struct clk_core *core)
 	}
 
 	/*
+	 * optional platform-specific magic
+	 *
+	 * The .init callback is not used by any of the basic clock types, but
+	 * exists for weird hardware that must perform initialization magic.
+	 * Please consider other ways of solving initialization problems before
+	 * using this callback, as its use is discouraged.
+	 */
+	if (core->ops->init)
+		core->ops->init(core->hw);
+
+	/*
 	 * Set clk's accuracy.  The preferred method is to use
 	 * .recalc_accuracy. For simple clocks and lazy developers the default
 	 * fallback is to use the parent's accuracy.  If a clock doesn't have a
@@ -3009,17 +2977,6 @@ static int __clk_core_init(struct clk_core *core)
 		}
 	}
 
-	/*
-	 * optional platform-specific magic
-	 *
-	 * The .init callback is not used by any of the basic clock types, but
-	 * exists for weird hardware that must perform initialization magic.
-	 * Please consider other ways of solving initialization problems before
-	 * using this callback, as its use is discouraged.
-	 */
-	if (core->ops->init)
-		core->ops->init(core->hw);
-
 	kref_init(&core->ref);
 out:
 	clk_pm_runtime_put(core);
diff --git a/drivers/clk/davinci/Makefile b/drivers/clk/davinci/Makefile
new file mode 100644
index 000000000000..11178b79b483
--- /dev/null
+++ b/drivers/clk/davinci/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ifeq ($(CONFIG_COMMON_CLK), y)
+obj-$(CONFIG_ARCH_DAVINCI_DA8XX)	+= da8xx-cfgchip.o
+
+obj-y += pll.o
+obj-$(CONFIG_ARCH_DAVINCI_DA830)	+= pll-da830.o
+obj-$(CONFIG_ARCH_DAVINCI_DA850)	+= pll-da850.o
+obj-$(CONFIG_ARCH_DAVINCI_DM355)	+= pll-dm355.o
+obj-$(CONFIG_ARCH_DAVINCI_DM365)	+= pll-dm365.o
+obj-$(CONFIG_ARCH_DAVINCI_DM644x)	+= pll-dm644x.o
+obj-$(CONFIG_ARCH_DAVINCI_DM646x)	+= pll-dm646x.o
+
+obj-y += psc.o
+obj-$(CONFIG_ARCH_DAVINCI_DA830)	+= psc-da830.o
+obj-$(CONFIG_ARCH_DAVINCI_DA850)	+= psc-da850.o
+obj-$(CONFIG_ARCH_DAVINCI_DM355)	+= psc-dm355.o
+obj-$(CONFIG_ARCH_DAVINCI_DM365)	+= psc-dm365.o
+obj-$(CONFIG_ARCH_DAVINCI_DM644x)	+= psc-dm644x.o
+obj-$(CONFIG_ARCH_DAVINCI_DM646x)	+= psc-dm646x.o
+endif
diff --git a/drivers/clk/davinci/da8xx-cfgchip.c b/drivers/clk/davinci/da8xx-cfgchip.c
new file mode 100644
index 000000000000..c971111d2601
--- /dev/null
+++ b/drivers/clk/davinci/da8xx-cfgchip.c
@@ -0,0 +1,790 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clock driver for DA8xx/AM17xx/AM18xx/OMAP-L13x CFGCHIP
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/mfd/da8xx-cfgchip.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_data/clk-da8xx-cfgchip.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/* --- Gate clocks --- */
+
+#define DA8XX_GATE_CLOCK_IS_DIV4P5	BIT(1)
+
+struct da8xx_cfgchip_gate_clk_info {
+	const char *name;
+	u32 cfgchip;
+	u32 bit;
+	u32 flags;
+};
+
+struct da8xx_cfgchip_gate_clk {
+	struct clk_hw hw;
+	struct regmap *regmap;
+	u32 reg;
+	u32 mask;
+};
+
+#define to_da8xx_cfgchip_gate_clk(_hw) \
+	container_of((_hw), struct da8xx_cfgchip_gate_clk, hw)
+
+static int da8xx_cfgchip_gate_clk_enable(struct clk_hw *hw)
+{
+	struct da8xx_cfgchip_gate_clk *clk = to_da8xx_cfgchip_gate_clk(hw);
+
+	return regmap_write_bits(clk->regmap, clk->reg, clk->mask, clk->mask);
+}
+
+static void da8xx_cfgchip_gate_clk_disable(struct clk_hw *hw)
+{
+	struct da8xx_cfgchip_gate_clk *clk = to_da8xx_cfgchip_gate_clk(hw);
+
+	regmap_write_bits(clk->regmap, clk->reg, clk->mask, 0);
+}
+
+static int da8xx_cfgchip_gate_clk_is_enabled(struct clk_hw *hw)
+{
+	struct da8xx_cfgchip_gate_clk *clk = to_da8xx_cfgchip_gate_clk(hw);
+	unsigned int val;
+
+	regmap_read(clk->regmap, clk->reg, &val);
+
+	return !!(val & clk->mask);
+}
+
+static unsigned long da8xx_cfgchip_div4p5_recalc_rate(struct clk_hw *hw,
+						      unsigned long parent_rate)
+{
+	/* this clock divides by 4.5 */
+	return parent_rate * 2 / 9;
+}
+
+static const struct clk_ops da8xx_cfgchip_gate_clk_ops = {
+	.enable		= da8xx_cfgchip_gate_clk_enable,
+	.disable	= da8xx_cfgchip_gate_clk_disable,
+	.is_enabled	= da8xx_cfgchip_gate_clk_is_enabled,
+};
+
+static const struct clk_ops da8xx_cfgchip_div4p5_clk_ops = {
+	.enable		= da8xx_cfgchip_gate_clk_enable,
+	.disable	= da8xx_cfgchip_gate_clk_disable,
+	.is_enabled	= da8xx_cfgchip_gate_clk_is_enabled,
+	.recalc_rate	= da8xx_cfgchip_div4p5_recalc_rate,
+};
+
+static struct da8xx_cfgchip_gate_clk * __init
+da8xx_cfgchip_gate_clk_register(struct device *dev,
+				const struct da8xx_cfgchip_gate_clk_info *info,
+				struct regmap *regmap)
+{
+	struct clk *parent;
+	const char *parent_name;
+	struct da8xx_cfgchip_gate_clk *gate;
+	struct clk_init_data init;
+	int ret;
+
+	parent = devm_clk_get(dev, NULL);
+	if (IS_ERR(parent))
+		return ERR_CAST(parent);
+
+	parent_name = __clk_get_name(parent);
+
+	gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = info->name;
+	if (info->flags & DA8XX_GATE_CLOCK_IS_DIV4P5)
+		init.ops = &da8xx_cfgchip_div4p5_clk_ops;
+	else
+		init.ops = &da8xx_cfgchip_gate_clk_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = 0;
+
+	gate->hw.init = &init;
+	gate->regmap = regmap;
+	gate->reg = info->cfgchip;
+	gate->mask = info->bit;
+
+	ret = devm_clk_hw_register(dev, &gate->hw);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	return gate;
+}
+
+static const struct da8xx_cfgchip_gate_clk_info da8xx_tbclksync_info __initconst = {
+	.name = "ehrpwm_tbclk",
+	.cfgchip = CFGCHIP(1),
+	.bit = CFGCHIP1_TBCLKSYNC,
+};
+
+static int __init da8xx_cfgchip_register_tbclk(struct device *dev,
+					       struct regmap *regmap)
+{
+	struct da8xx_cfgchip_gate_clk *gate;
+
+	gate = da8xx_cfgchip_gate_clk_register(dev, &da8xx_tbclksync_info,
+					       regmap);
+	if (IS_ERR(gate))
+		return PTR_ERR(gate);
+
+	clk_hw_register_clkdev(&gate->hw, "tbclk", "ehrpwm.0");
+	clk_hw_register_clkdev(&gate->hw, "tbclk", "ehrpwm.1");
+
+	return 0;
+}
+
+static const struct da8xx_cfgchip_gate_clk_info da8xx_div4p5ena_info __initconst = {
+	.name = "div4.5",
+	.cfgchip = CFGCHIP(3),
+	.bit = CFGCHIP3_DIV45PENA,
+	.flags = DA8XX_GATE_CLOCK_IS_DIV4P5,
+};
+
+static int __init da8xx_cfgchip_register_div4p5(struct device *dev,
+						struct regmap *regmap)
+{
+	struct da8xx_cfgchip_gate_clk *gate;
+
+	gate = da8xx_cfgchip_gate_clk_register(dev, &da8xx_div4p5ena_info, regmap);
+	if (IS_ERR(gate))
+		return PTR_ERR(gate);
+
+	return 0;
+}
+
+static int __init
+of_da8xx_cfgchip_gate_clk_init(struct device *dev,
+			       const struct da8xx_cfgchip_gate_clk_info *info,
+			       struct regmap *regmap)
+{
+	struct da8xx_cfgchip_gate_clk *gate;
+
+	gate = da8xx_cfgchip_gate_clk_register(dev, info, regmap);
+	if (IS_ERR(gate))
+		return PTR_ERR(gate);
+
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, gate);
+}
+
+static int __init of_da8xx_tbclksync_init(struct device *dev,
+					  struct regmap *regmap)
+{
+	return of_da8xx_cfgchip_gate_clk_init(dev, &da8xx_tbclksync_info, regmap);
+}
+
+static int __init of_da8xx_div4p5ena_init(struct device *dev,
+					  struct regmap *regmap)
+{
+	return of_da8xx_cfgchip_gate_clk_init(dev, &da8xx_div4p5ena_info, regmap);
+}
+
+/* --- MUX clocks --- */
+
+struct da8xx_cfgchip_mux_clk_info {
+	const char *name;
+	const char *parent0;
+	const char *parent1;
+	u32 cfgchip;
+	u32 bit;
+};
+
+struct da8xx_cfgchip_mux_clk {
+	struct clk_hw hw;
+	struct regmap *regmap;
+	u32 reg;
+	u32 mask;
+};
+
+#define to_da8xx_cfgchip_mux_clk(_hw) \
+	container_of((_hw), struct da8xx_cfgchip_mux_clk, hw)
+
+static int da8xx_cfgchip_mux_clk_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct da8xx_cfgchip_mux_clk *clk = to_da8xx_cfgchip_mux_clk(hw);
+	unsigned int val = index ? clk->mask : 0;
+
+	return regmap_write_bits(clk->regmap, clk->reg, clk->mask, val);
+}
+
+static u8 da8xx_cfgchip_mux_clk_get_parent(struct clk_hw *hw)
+{
+	struct da8xx_cfgchip_mux_clk *clk = to_da8xx_cfgchip_mux_clk(hw);
+	unsigned int val;
+
+	regmap_read(clk->regmap, clk->reg, &val);
+
+	return (val & clk->mask) ? 1 : 0;
+}
+
+static const struct clk_ops da8xx_cfgchip_mux_clk_ops = {
+	.set_parent	= da8xx_cfgchip_mux_clk_set_parent,
+	.get_parent	= da8xx_cfgchip_mux_clk_get_parent,
+};
+
+static struct da8xx_cfgchip_mux_clk * __init
+da8xx_cfgchip_mux_clk_register(struct device *dev,
+			       const struct da8xx_cfgchip_mux_clk_info *info,
+			       struct regmap *regmap)
+{
+	const char * const parent_names[] = { info->parent0, info->parent1 };
+	struct da8xx_cfgchip_mux_clk *mux;
+	struct clk_init_data init;
+	int ret;
+
+	mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = info->name;
+	init.ops = &da8xx_cfgchip_mux_clk_ops;
+	init.parent_names = parent_names;
+	init.num_parents = 2;
+	init.flags = 0;
+
+	mux->hw.init = &init;
+	mux->regmap = regmap;
+	mux->reg = info->cfgchip;
+	mux->mask = info->bit;
+
+	ret = devm_clk_hw_register(dev, &mux->hw);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	return mux;
+}
+
+static const struct da8xx_cfgchip_mux_clk_info da850_async1_info __initconst = {
+	.name = "async1",
+	.parent0 = "pll0_sysclk3",
+	.parent1 = "div4.5",
+	.cfgchip = CFGCHIP(3),
+	.bit = CFGCHIP3_EMA_CLKSRC,
+};
+
+static int __init da8xx_cfgchip_register_async1(struct device *dev,
+						struct regmap *regmap)
+{
+	struct da8xx_cfgchip_mux_clk *mux;
+
+	mux = da8xx_cfgchip_mux_clk_register(dev, &da850_async1_info, regmap);
+	if (IS_ERR(mux))
+		return PTR_ERR(mux);
+
+	clk_hw_register_clkdev(&mux->hw, "async1", "da850-psc0");
+
+	return 0;
+}
+
+static const struct da8xx_cfgchip_mux_clk_info da850_async3_info __initconst = {
+	.name = "async3",
+	.parent0 = "pll0_sysclk2",
+	.parent1 = "pll1_sysclk2",
+	.cfgchip = CFGCHIP(3),
+	.bit = CFGCHIP3_ASYNC3_CLKSRC,
+};
+
+static int __init da850_cfgchip_register_async3(struct device *dev,
+						struct regmap *regmap)
+{
+	struct da8xx_cfgchip_mux_clk *mux;
+	struct clk_hw *parent;
+
+	mux = da8xx_cfgchip_mux_clk_register(dev, &da850_async3_info, regmap);
+	if (IS_ERR(mux))
+		return PTR_ERR(mux);
+
+	clk_hw_register_clkdev(&mux->hw, "async3", "da850-psc1");
+
+	/* pll1_sysclk2 is not affected by CPU scaling, so use it for async3 */
+	parent = clk_hw_get_parent_by_index(&mux->hw, 1);
+	if (parent)
+		clk_set_parent(mux->hw.clk, parent->clk);
+	else
+		dev_warn(dev, "Failed to find async3 parent clock\n");
+
+	return 0;
+}
+
+static int __init
+of_da8xx_cfgchip_init_mux_clock(struct device *dev,
+				const struct da8xx_cfgchip_mux_clk_info *info,
+				struct regmap *regmap)
+{
+	struct da8xx_cfgchip_mux_clk *mux;
+
+	mux = da8xx_cfgchip_mux_clk_register(dev, info, regmap);
+	if (IS_ERR(mux))
+		return PTR_ERR(mux);
+
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, &mux->hw);
+}
+
+static int __init of_da850_async1_init(struct device *dev, struct regmap *regmap)
+{
+	return of_da8xx_cfgchip_init_mux_clock(dev, &da850_async1_info, regmap);
+}
+
+static int __init of_da850_async3_init(struct device *dev, struct regmap *regmap)
+{
+	return of_da8xx_cfgchip_init_mux_clock(dev, &da850_async3_info, regmap);
+}
+
+/* --- USB 2.0 PHY clock --- */
+
+struct da8xx_usb0_clk48 {
+	struct clk_hw hw;
+	struct clk *fck;
+	struct regmap *regmap;
+};
+
+#define to_da8xx_usb0_clk48(_hw) \
+	container_of((_hw), struct da8xx_usb0_clk48, hw)
+
+static int da8xx_usb0_clk48_prepare(struct clk_hw *hw)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+
+	/* The USB 2.0 PSC clock is only needed temporarily during the USB 2.0
+	 * PHY clock enable, but since clk_prepare() can't be called in an
+	 * atomic context (i.e. in clk_enable()), we have to prepare it here.
+	 */
+	return clk_prepare(usb0->fck);
+}
+
+static void da8xx_usb0_clk48_unprepare(struct clk_hw *hw)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+
+	clk_unprepare(usb0->fck);
+}
+
+static int da8xx_usb0_clk48_enable(struct clk_hw *hw)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+	unsigned int mask, val;
+	int ret;
+
+	/* Locking the USB 2.O PLL requires that the USB 2.O PSC is enabled
+	 * temporaily. It can be turned back off once the PLL is locked.
+	 */
+	clk_enable(usb0->fck);
+
+	/* Turn on the USB 2.0 PHY, but just the PLL, and not OTG. The USB 1.1
+	 * PHY may use the USB 2.0 PLL clock without USB 2.0 OTG being used.
+	 */
+	mask = CFGCHIP2_RESET | CFGCHIP2_PHYPWRDN | CFGCHIP2_PHY_PLLON;
+	val = CFGCHIP2_PHY_PLLON;
+
+	regmap_write_bits(usb0->regmap, CFGCHIP(2), mask, val);
+	ret = regmap_read_poll_timeout(usb0->regmap, CFGCHIP(2), val,
+				       val & CFGCHIP2_PHYCLKGD, 0, 500000);
+
+	clk_disable(usb0->fck);
+
+	return ret;
+}
+
+static void da8xx_usb0_clk48_disable(struct clk_hw *hw)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+	unsigned int val;
+
+	val = CFGCHIP2_PHYPWRDN;
+	regmap_write_bits(usb0->regmap, CFGCHIP(2), val, val);
+}
+
+static int da8xx_usb0_clk48_is_enabled(struct clk_hw *hw)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+	unsigned int val;
+
+	regmap_read(usb0->regmap, CFGCHIP(2), &val);
+
+	return !!(val & CFGCHIP2_PHYCLKGD);
+}
+
+static unsigned long da8xx_usb0_clk48_recalc_rate(struct clk_hw *hw,
+						  unsigned long parent_rate)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+	unsigned int mask, val;
+
+	/* The parent clock rate must be one of the following */
+	mask = CFGCHIP2_REFFREQ_MASK;
+	switch (parent_rate) {
+	case 12000000:
+		val = CFGCHIP2_REFFREQ_12MHZ;
+		break;
+	case 13000000:
+		val = CFGCHIP2_REFFREQ_13MHZ;
+		break;
+	case 19200000:
+		val = CFGCHIP2_REFFREQ_19_2MHZ;
+		break;
+	case 20000000:
+		val = CFGCHIP2_REFFREQ_20MHZ;
+		break;
+	case 24000000:
+		val = CFGCHIP2_REFFREQ_24MHZ;
+		break;
+	case 26000000:
+		val = CFGCHIP2_REFFREQ_26MHZ;
+		break;
+	case 38400000:
+		val = CFGCHIP2_REFFREQ_38_4MHZ;
+		break;
+	case 40000000:
+		val = CFGCHIP2_REFFREQ_40MHZ;
+		break;
+	case 48000000:
+		val = CFGCHIP2_REFFREQ_48MHZ;
+		break;
+	default:
+		return 0;
+	}
+
+	regmap_write_bits(usb0->regmap, CFGCHIP(2), mask, val);
+
+	/* USB 2.0 PLL always supplies 48MHz */
+	return 48000000;
+}
+
+static long da8xx_usb0_clk48_round_rate(struct clk_hw *hw, unsigned long rate,
+					unsigned long *parent_rate)
+{
+	return 48000000;
+}
+
+static int da8xx_usb0_clk48_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+
+	return regmap_write_bits(usb0->regmap, CFGCHIP(2),
+				 CFGCHIP2_USB2PHYCLKMUX,
+				 index ? CFGCHIP2_USB2PHYCLKMUX : 0);
+}
+
+static u8 da8xx_usb0_clk48_get_parent(struct clk_hw *hw)
+{
+	struct da8xx_usb0_clk48 *usb0 = to_da8xx_usb0_clk48(hw);
+	unsigned int val;
+
+	regmap_read(usb0->regmap, CFGCHIP(2), &val);
+
+	return (val & CFGCHIP2_USB2PHYCLKMUX) ? 1 : 0;
+}
+
+static const struct clk_ops da8xx_usb0_clk48_ops = {
+	.prepare	= da8xx_usb0_clk48_prepare,
+	.unprepare	= da8xx_usb0_clk48_unprepare,
+	.enable		= da8xx_usb0_clk48_enable,
+	.disable	= da8xx_usb0_clk48_disable,
+	.is_enabled	= da8xx_usb0_clk48_is_enabled,
+	.recalc_rate	= da8xx_usb0_clk48_recalc_rate,
+	.round_rate	= da8xx_usb0_clk48_round_rate,
+	.set_parent	= da8xx_usb0_clk48_set_parent,
+	.get_parent	= da8xx_usb0_clk48_get_parent,
+};
+
+static struct da8xx_usb0_clk48 *
+da8xx_cfgchip_register_usb0_clk48(struct device *dev,
+				  struct regmap *regmap)
+{
+	const char * const parent_names[] = { "usb_refclkin", "pll0_auxclk" };
+	struct clk *fck_clk;
+	struct da8xx_usb0_clk48 *usb0;
+	struct clk_init_data init;
+	int ret;
+
+	fck_clk = devm_clk_get(dev, "fck");
+	if (IS_ERR(fck_clk)) {
+		if (PTR_ERR(fck_clk) != -EPROBE_DEFER)
+			dev_err(dev, "Missing fck clock\n");
+		return ERR_CAST(fck_clk);
+	}
+
+	usb0 = devm_kzalloc(dev, sizeof(*usb0), GFP_KERNEL);
+	if (!usb0)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = "usb0_clk48";
+	init.ops = &da8xx_usb0_clk48_ops;
+	init.parent_names = parent_names;
+	init.num_parents = 2;
+
+	usb0->hw.init = &init;
+	usb0->fck = fck_clk;
+	usb0->regmap = regmap;
+
+	ret = devm_clk_hw_register(dev, &usb0->hw);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	return usb0;
+}
+
+/* --- USB 1.1 PHY clock --- */
+
+struct da8xx_usb1_clk48 {
+	struct clk_hw hw;
+	struct regmap *regmap;
+};
+
+#define to_da8xx_usb1_clk48(_hw) \
+	container_of((_hw), struct da8xx_usb1_clk48, hw)
+
+static int da8xx_usb1_clk48_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct da8xx_usb1_clk48 *usb1 = to_da8xx_usb1_clk48(hw);
+
+	return regmap_write_bits(usb1->regmap, CFGCHIP(2),
+				 CFGCHIP2_USB1PHYCLKMUX,
+				 index ? CFGCHIP2_USB1PHYCLKMUX : 0);
+}
+
+static u8 da8xx_usb1_clk48_get_parent(struct clk_hw *hw)
+{
+	struct da8xx_usb1_clk48 *usb1 = to_da8xx_usb1_clk48(hw);
+	unsigned int val;
+
+	regmap_read(usb1->regmap, CFGCHIP(2), &val);
+
+	return (val & CFGCHIP2_USB1PHYCLKMUX) ? 1 : 0;
+}
+
+static const struct clk_ops da8xx_usb1_clk48_ops = {
+	.set_parent	= da8xx_usb1_clk48_set_parent,
+	.get_parent	= da8xx_usb1_clk48_get_parent,
+};
+
+/**
+ * da8xx_cfgchip_register_usb1_clk48 - Register a new USB 1.1 PHY clock
+ * @regmap: The CFGCHIP regmap
+ */
+static struct da8xx_usb1_clk48 *
+da8xx_cfgchip_register_usb1_clk48(struct device *dev,
+				  struct regmap *regmap)
+{
+	const char * const parent_names[] = { "usb0_clk48", "usb_refclkin" };
+	struct da8xx_usb1_clk48 *usb1;
+	struct clk_init_data init;
+	int ret;
+
+	usb1 = devm_kzalloc(dev, sizeof(*usb1), GFP_KERNEL);
+	if (!usb1)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = "usb1_clk48";
+	init.ops = &da8xx_usb1_clk48_ops;
+	init.parent_names = parent_names;
+	init.num_parents = 2;
+
+	usb1->hw.init = &init;
+	usb1->regmap = regmap;
+
+	ret = devm_clk_hw_register(dev, &usb1->hw);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	return usb1;
+}
+
+static int da8xx_cfgchip_register_usb_phy_clk(struct device *dev,
+					      struct regmap *regmap)
+{
+	struct da8xx_usb0_clk48 *usb0;
+	struct da8xx_usb1_clk48 *usb1;
+	struct clk_hw *parent;
+
+	usb0 = da8xx_cfgchip_register_usb0_clk48(dev, regmap);
+	if (IS_ERR(usb0))
+		return PTR_ERR(usb0);
+
+	/*
+	 * All existing boards use pll0_auxclk as the parent and new boards
+	 * should use device tree, so hard-coding the value (1) here.
+	 */
+	parent = clk_hw_get_parent_by_index(&usb0->hw, 1);
+	if (parent)
+		clk_set_parent(usb0->hw.clk, parent->clk);
+	else
+		dev_warn(dev, "Failed to find usb0 parent clock\n");
+
+	usb1 = da8xx_cfgchip_register_usb1_clk48(dev, regmap);
+	if (IS_ERR(usb1))
+		return PTR_ERR(usb1);
+
+	/*
+	 * All existing boards use usb0_clk48 as the parent and new boards
+	 * should use device tree, so hard-coding the value (0) here.
+	 */
+	parent = clk_hw_get_parent_by_index(&usb1->hw, 0);
+	if (parent)
+		clk_set_parent(usb1->hw.clk, parent->clk);
+	else
+		dev_warn(dev, "Failed to find usb1 parent clock\n");
+
+	clk_hw_register_clkdev(&usb0->hw, "usb0_clk48", "da8xx-usb-phy");
+	clk_hw_register_clkdev(&usb1->hw, "usb1_clk48", "da8xx-usb-phy");
+
+	return 0;
+}
+
+static int of_da8xx_usb_phy_clk_init(struct device *dev, struct regmap *regmap)
+{
+	struct clk_hw_onecell_data *clk_data;
+	struct da8xx_usb0_clk48 *usb0;
+	struct da8xx_usb1_clk48 *usb1;
+
+	clk_data = devm_kzalloc(dev, sizeof(*clk_data) + 2 *
+				sizeof(*clk_data->hws), GFP_KERNEL);
+	if (!clk_data)
+		return -ENOMEM;
+
+	clk_data->num = 2;
+
+	usb0 = da8xx_cfgchip_register_usb0_clk48(dev, regmap);
+	if (IS_ERR(usb0)) {
+		if (PTR_ERR(usb0) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		dev_warn(dev, "Failed to register usb0_clk48 (%ld)\n",
+			 PTR_ERR(usb0));
+
+		clk_data->hws[0] = ERR_PTR(-ENOENT);
+	} else {
+		clk_data->hws[0] = &usb0->hw;
+	}
+
+	usb1 = da8xx_cfgchip_register_usb1_clk48(dev, regmap);
+	if (IS_ERR(usb1)) {
+		if (PTR_ERR(usb0) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		dev_warn(dev, "Failed to register usb1_clk48 (%ld)\n",
+			 PTR_ERR(usb1));
+
+		clk_data->hws[1] = ERR_PTR(-ENOENT);
+	} else {
+		clk_data->hws[1] = &usb1->hw;
+	}
+
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, clk_data);
+}
+
+/* --- platform device --- */
+
+static const struct of_device_id da8xx_cfgchip_of_match[] = {
+	{
+		.compatible = "ti,da830-tbclksync",
+		.data = of_da8xx_tbclksync_init,
+	},
+	{
+		.compatible = "ti,da830-div4p5ena",
+		.data = of_da8xx_div4p5ena_init,
+	},
+	{
+		.compatible = "ti,da850-async1-clksrc",
+		.data = of_da850_async1_init,
+	},
+	{
+		.compatible = "ti,da850-async3-clksrc",
+		.data = of_da850_async3_init,
+	},
+	{
+		.compatible = "ti,da830-usb-phy-clocks",
+		.data = of_da8xx_usb_phy_clk_init,
+	},
+	{ }
+};
+
+static const struct platform_device_id da8xx_cfgchip_id_table[] = {
+	{
+		.name = "da830-tbclksync",
+		.driver_data = (kernel_ulong_t)da8xx_cfgchip_register_tbclk,
+	},
+	{
+		.name = "da830-div4p5ena",
+		.driver_data = (kernel_ulong_t)da8xx_cfgchip_register_div4p5,
+	},
+	{
+		.name = "da850-async1-clksrc",
+		.driver_data = (kernel_ulong_t)da8xx_cfgchip_register_async1,
+	},
+	{
+		.name = "da850-async3-clksrc",
+		.driver_data = (kernel_ulong_t)da850_cfgchip_register_async3,
+	},
+	{
+		.name = "da830-usb-phy-clks",
+		.driver_data = (kernel_ulong_t)da8xx_cfgchip_register_usb_phy_clk,
+	},
+	{ }
+};
+
+typedef int (*da8xx_cfgchip_init)(struct device *dev, struct regmap *regmap);
+
+static int da8xx_cfgchip_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct da8xx_cfgchip_clk_platform_data *pdata = dev->platform_data;
+	const struct of_device_id *of_id;
+	da8xx_cfgchip_init clk_init = NULL;
+	struct regmap *regmap = NULL;
+
+	of_id = of_match_device(da8xx_cfgchip_of_match, dev);
+	if (of_id) {
+		struct device_node *parent;
+
+		clk_init = of_id->data;
+		parent = of_get_parent(dev->of_node);
+		regmap = syscon_node_to_regmap(parent);
+		of_node_put(parent);
+	} else if (pdev->id_entry && pdata) {
+		clk_init = (void *)pdev->id_entry->driver_data;
+		regmap = pdata->cfgchip;
+	}
+
+	if (!clk_init) {
+		dev_err(dev, "unable to find driver data\n");
+		return -EINVAL;
+	}
+
+	if (IS_ERR_OR_NULL(regmap)) {
+		dev_err(dev, "no regmap for CFGCHIP syscon\n");
+		return regmap ? PTR_ERR(regmap) : -ENOENT;
+	}
+
+	return clk_init(dev, regmap);
+}
+
+static struct platform_driver da8xx_cfgchip_driver = {
+	.probe		= da8xx_cfgchip_probe,
+	.driver		= {
+		.name		= "da8xx-cfgchip-clk",
+		.of_match_table	= da8xx_cfgchip_of_match,
+	},
+	.id_table	= da8xx_cfgchip_id_table,
+};
+
+static int __init da8xx_cfgchip_driver_init(void)
+{
+	return platform_driver_register(&da8xx_cfgchip_driver);
+}
+
+/* has to be postcore_initcall because PSC devices depend on the async3 clock */
+postcore_initcall(da8xx_cfgchip_driver_init);
diff --git a/drivers/clk/davinci/pll-da830.c b/drivers/clk/davinci/pll-da830.c
new file mode 100644
index 000000000000..929a3d3a9adb
--- /dev/null
+++ b/drivers/clk/davinci/pll-da830.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock descriptions for TI DA830/OMAP-L137/AM17XX
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clkdev.h>
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include "pll.h"
+
+static const struct davinci_pll_clk_info da830_pll_info = {
+	.name = "pll0",
+	.pllm_mask = GENMASK(4, 0),
+	.pllm_min = 4,
+	.pllm_max = 32,
+	.pllout_min_rate = 300000000,
+	.pllout_max_rate = 600000000,
+	.flags = PLL_HAS_CLKMODE | PLL_HAS_PREDIV | PLL_HAS_POSTDIV,
+};
+
+/*
+ * NB: Technically, the clocks flagged as SYSCLK_FIXED_DIV are "fixed ratio",
+ * meaning that we could change the divider as long as we keep the correct
+ * ratio between all of the clocks, but we don't support that because there is
+ * currently not a need for it.
+ */
+
+SYSCLK(2, pll0_sysclk2, pll0_pllen, 5, SYSCLK_FIXED_DIV);
+SYSCLK(3, pll0_sysclk3, pll0_pllen, 5, 0);
+SYSCLK(4, pll0_sysclk4, pll0_pllen, 5, SYSCLK_FIXED_DIV);
+SYSCLK(5, pll0_sysclk5, pll0_pllen, 5, 0);
+SYSCLK(6, pll0_sysclk6, pll0_pllen, 5, SYSCLK_FIXED_DIV);
+SYSCLK(7, pll0_sysclk7, pll0_pllen, 5, 0);
+
+int da830_pll_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &da830_pll_info, "ref_clk", base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk2, base);
+	clk_register_clkdev(clk, "pll0_sysclk2", "da830-psc0");
+	clk_register_clkdev(clk, "pll0_sysclk2", "da830-psc1");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk3, base);
+	clk_register_clkdev(clk, "pll0_sysclk3", "da830-psc0");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk4, base);
+	clk_register_clkdev(clk, "pll0_sysclk4", "da830-psc0");
+	clk_register_clkdev(clk, "pll0_sysclk4", "da830-psc1");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk5, base);
+	clk_register_clkdev(clk, "pll0_sysclk5", "da830-psc1");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk6, base);
+	clk_register_clkdev(clk, "pll0_sysclk6", "da830-psc0");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk7, base);
+
+	clk = davinci_pll_auxclk_register(dev, "pll0_auxclk", base);
+	clk_register_clkdev(clk, NULL, "i2c_davinci.1");
+	clk_register_clkdev(clk, "timer0", NULL);
+	clk_register_clkdev(clk, NULL, "davinci-wdt");
+
+	return 0;
+}
diff --git a/drivers/clk/davinci/pll-da850.c b/drivers/clk/davinci/pll-da850.c
new file mode 100644
index 000000000000..2a038b7908cc
--- /dev/null
+++ b/drivers/clk/davinci/pll-da850.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock descriptions for TI DA850/OMAP-L138/AM18XX
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mfd/da8xx-cfgchip.h>
+#include <linux/of.h>
+#include <linux/types.h>
+
+#include "pll.h"
+
+#define OCSEL_OCSRC_OSCIN		0x14
+#define OCSEL_OCSRC_PLL0_SYSCLK(n)	(0x16 + (n))
+#define OCSEL_OCSRC_PLL1_OBSCLK		0x1e
+#define OCSEL_OCSRC_PLL1_SYSCLK(n)	(0x16 + (n))
+
+static const struct davinci_pll_clk_info da850_pll0_info = {
+	.name = "pll0",
+	.unlock_reg = CFGCHIP(0),
+	.unlock_mask = CFGCHIP0_PLL_MASTER_LOCK,
+	.pllm_mask = GENMASK(4, 0),
+	.pllm_min = 4,
+	.pllm_max = 32,
+	.pllout_min_rate = 300000000,
+	.pllout_max_rate = 600000000,
+	.flags = PLL_HAS_CLKMODE | PLL_HAS_PREDIV | PLL_HAS_POSTDIV |
+		 PLL_HAS_EXTCLKSRC,
+};
+
+/*
+ * NB: Technically, the clocks flagged as SYSCLK_FIXED_DIV are "fixed ratio",
+ * meaning that we could change the divider as long as we keep the correct
+ * ratio between all of the clocks, but we don't support that because there is
+ * currently not a need for it.
+ */
+
+SYSCLK(1, pll0_sysclk1, pll0_pllen, 5, SYSCLK_FIXED_DIV);
+SYSCLK(2, pll0_sysclk2, pll0_pllen, 5, SYSCLK_FIXED_DIV);
+SYSCLK(3, pll0_sysclk3, pll0_pllen, 5, 0);
+SYSCLK(4, pll0_sysclk4, pll0_pllen, 5, SYSCLK_FIXED_DIV);
+SYSCLK(5, pll0_sysclk5, pll0_pllen, 5, 0);
+SYSCLK(6, pll0_sysclk6, pll0_pllen, 5, SYSCLK_ARM_RATE | SYSCLK_FIXED_DIV);
+SYSCLK(7, pll0_sysclk7, pll0_pllen, 5, 0);
+
+static const char * const da850_pll0_obsclk_parent_names[] = {
+	"oscin",
+	"pll0_sysclk1",
+	"pll0_sysclk2",
+	"pll0_sysclk3",
+	"pll0_sysclk4",
+	"pll0_sysclk5",
+	"pll0_sysclk6",
+	"pll0_sysclk7",
+	"pll1_obsclk",
+};
+
+static u32 da850_pll0_obsclk_table[] = {
+	OCSEL_OCSRC_OSCIN,
+	OCSEL_OCSRC_PLL0_SYSCLK(1),
+	OCSEL_OCSRC_PLL0_SYSCLK(2),
+	OCSEL_OCSRC_PLL0_SYSCLK(3),
+	OCSEL_OCSRC_PLL0_SYSCLK(4),
+	OCSEL_OCSRC_PLL0_SYSCLK(5),
+	OCSEL_OCSRC_PLL0_SYSCLK(6),
+	OCSEL_OCSRC_PLL0_SYSCLK(7),
+	OCSEL_OCSRC_PLL1_OBSCLK,
+};
+
+static const struct davinci_pll_obsclk_info da850_pll0_obsclk_info = {
+	.name = "pll0_obsclk",
+	.parent_names = da850_pll0_obsclk_parent_names,
+	.num_parents = ARRAY_SIZE(da850_pll0_obsclk_parent_names),
+	.table = da850_pll0_obsclk_table,
+	.ocsrc_mask = GENMASK(4, 0),
+};
+
+int da850_pll0_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &da850_pll0_info, "ref_clk", base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk1, base);
+	clk_register_clkdev(clk, "pll0_sysclk1", "da850-psc0");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk2, base);
+	clk_register_clkdev(clk, "pll0_sysclk2", "da850-psc0");
+	clk_register_clkdev(clk, "pll0_sysclk2", "da850-psc1");
+	clk_register_clkdev(clk, "pll0_sysclk2", "da850-async3-clksrc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk3, base);
+	clk_register_clkdev(clk, "pll0_sysclk3", "da850-async1-clksrc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk4, base);
+	clk_register_clkdev(clk, "pll0_sysclk4", "da850-psc0");
+	clk_register_clkdev(clk, "pll0_sysclk4", "da850-psc1");
+
+	davinci_pll_sysclk_register(dev, &pll0_sysclk5, base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll0_sysclk6, base);
+	clk_register_clkdev(clk, "pll0_sysclk6", "da850-psc0");
+
+	davinci_pll_sysclk_register(dev, &pll0_sysclk7, base);
+
+	davinci_pll_auxclk_register(dev, "pll0_auxclk", base);
+
+	clk = clk_register_fixed_factor(dev, "async2", "pll0_auxclk",
+					CLK_IS_CRITICAL, 1, 1);
+
+	clk_register_clkdev(clk, NULL, "i2c_davinci.1");
+	clk_register_clkdev(clk, "timer0", NULL);
+	clk_register_clkdev(clk, NULL, "davinci-wdt");
+
+	davinci_pll_obsclk_register(dev, &da850_pll0_obsclk_info, base);
+
+	return 0;
+}
+
+static const struct davinci_pll_sysclk_info *da850_pll0_sysclk_info[] = {
+	&pll0_sysclk1,
+	&pll0_sysclk2,
+	&pll0_sysclk3,
+	&pll0_sysclk4,
+	&pll0_sysclk5,
+	&pll0_sysclk6,
+	&pll0_sysclk7,
+	NULL
+};
+
+int of_da850_pll0_init(struct device *dev, void __iomem *base)
+{
+	return of_davinci_pll_init(dev, &da850_pll0_info,
+				   &da850_pll0_obsclk_info,
+				   da850_pll0_sysclk_info, 7, base);
+}
+
+static const struct davinci_pll_clk_info da850_pll1_info = {
+	.name = "pll1",
+	.unlock_reg = CFGCHIP(3),
+	.unlock_mask = CFGCHIP3_PLL1_MASTER_LOCK,
+	.pllm_mask = GENMASK(4, 0),
+	.pllm_min = 4,
+	.pllm_max = 32,
+	.pllout_min_rate = 300000000,
+	.pllout_max_rate = 600000000,
+	.flags = PLL_HAS_POSTDIV,
+};
+
+SYSCLK(1, pll1_sysclk1, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(2, pll1_sysclk2, pll1_pllen, 5, 0);
+SYSCLK(3, pll1_sysclk3, pll1_pllen, 5, 0);
+
+static const char * const da850_pll1_obsclk_parent_names[] = {
+	"oscin",
+	"pll1_sysclk1",
+	"pll1_sysclk2",
+	"pll1_sysclk3",
+};
+
+static u32 da850_pll1_obsclk_table[] = {
+	OCSEL_OCSRC_OSCIN,
+	OCSEL_OCSRC_PLL1_SYSCLK(1),
+	OCSEL_OCSRC_PLL1_SYSCLK(2),
+	OCSEL_OCSRC_PLL1_SYSCLK(3),
+};
+
+static const struct davinci_pll_obsclk_info da850_pll1_obsclk_info = {
+	.name = "pll1_obsclk",
+	.parent_names = da850_pll1_obsclk_parent_names,
+	.num_parents = ARRAY_SIZE(da850_pll1_obsclk_parent_names),
+	.table = da850_pll1_obsclk_table,
+	.ocsrc_mask = GENMASK(4, 0),
+};
+
+int da850_pll1_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &da850_pll1_info, "oscin", base);
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk1, base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk2, base);
+	clk_register_clkdev(clk, "pll1_sysclk2", "da850-async3-clksrc");
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk3, base);
+
+	davinci_pll_obsclk_register(dev, &da850_pll1_obsclk_info, base);
+
+	return 0;
+}
+
+static const struct davinci_pll_sysclk_info *da850_pll1_sysclk_info[] = {
+	&pll1_sysclk1,
+	&pll1_sysclk2,
+	&pll1_sysclk3,
+	NULL
+};
+
+int of_da850_pll1_init(struct device *dev, void __iomem *base)
+{
+	return of_davinci_pll_init(dev, &da850_pll1_info,
+				   &da850_pll1_obsclk_info,
+				   da850_pll1_sysclk_info, 3, base);
+}
diff --git a/drivers/clk/davinci/pll-dm355.c b/drivers/clk/davinci/pll-dm355.c
new file mode 100644
index 000000000000..5345f8286c50
--- /dev/null
+++ b/drivers/clk/davinci/pll-dm355.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock descriptions for TI DM355
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include "pll.h"
+
+static const struct davinci_pll_clk_info dm355_pll1_info = {
+	.name = "pll1",
+	.pllm_mask = GENMASK(7, 0),
+	.pllm_min = 92,
+	.pllm_max = 184,
+	.flags = PLL_HAS_CLKMODE | PLL_HAS_PREDIV | PLL_PREDIV_ALWAYS_ENABLED |
+		 PLL_PREDIV_FIXED8 | PLL_HAS_POSTDIV |
+		 PLL_POSTDIV_ALWAYS_ENABLED | PLL_POSTDIV_FIXED_DIV,
+};
+
+SYSCLK(1, pll1_sysclk1, pll1, 5, SYSCLK_FIXED_DIV | SYSCLK_ALWAYS_ENABLED);
+SYSCLK(2, pll1_sysclk2, pll1, 5, SYSCLK_FIXED_DIV | SYSCLK_ALWAYS_ENABLED);
+SYSCLK(3, pll1_sysclk3, pll1, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(4, pll1_sysclk4, pll1, 5, SYSCLK_ALWAYS_ENABLED);
+
+int dm355_pll1_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &dm355_pll1_info, "ref_clk", base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk1, base);
+	clk_register_clkdev(clk, "pll1_sysclk1", "dm355-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk2, base);
+	clk_register_clkdev(clk, "pll1_sysclk2", "dm355-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk3, base);
+	clk_register_clkdev(clk, "pll1_sysclk3", "dm355-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk4, base);
+	clk_register_clkdev(clk, "pll1_sysclk4", "dm355-psc");
+
+	clk = davinci_pll_auxclk_register(dev, "pll1_auxclk", base);
+	clk_register_clkdev(clk, "pll1_auxclk", "dm355-psc");
+
+	davinci_pll_sysclkbp_clk_register(dev, "pll1_sysclkbp", base);
+
+	return 0;
+}
+
+static const struct davinci_pll_clk_info dm355_pll2_info = {
+	.name = "pll2",
+	.pllm_mask = GENMASK(7, 0),
+	.pllm_min = 92,
+	.pllm_max = 184,
+	.flags = PLL_HAS_PREDIV | PLL_PREDIV_ALWAYS_ENABLED | PLL_HAS_POSTDIV |
+		 PLL_POSTDIV_ALWAYS_ENABLED | PLL_POSTDIV_FIXED_DIV,
+};
+
+SYSCLK(1, pll2_sysclk1, pll2, 5, SYSCLK_FIXED_DIV);
+SYSCLK(2, pll2_sysclk2, pll2, 5, SYSCLK_FIXED_DIV | SYSCLK_ALWAYS_ENABLED);
+
+int dm355_pll2_init(struct device *dev, void __iomem *base)
+{
+	davinci_pll_clk_register(dev, &dm355_pll2_info, "oscin", base);
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk1, base);
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk2, base);
+
+	davinci_pll_sysclkbp_clk_register(dev, "pll2_sysclkbp", base);
+
+	return 0;
+}
diff --git a/drivers/clk/davinci/pll-dm365.c b/drivers/clk/davinci/pll-dm365.c
new file mode 100644
index 000000000000..5f8d9f42d0f3
--- /dev/null
+++ b/drivers/clk/davinci/pll-dm365.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock descriptions for TI DM365
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "pll.h"
+
+#define OCSEL_OCSRC_ENABLE	0
+
+static const struct davinci_pll_clk_info dm365_pll1_info = {
+	.name = "pll1",
+	.pllm_mask = GENMASK(9, 0),
+	.pllm_min = 1,
+	.pllm_max = 1023,
+	.flags = PLL_HAS_CLKMODE | PLL_HAS_PREDIV | PLL_HAS_POSTDIV |
+		 PLL_POSTDIV_ALWAYS_ENABLED | PLL_PLLM_2X,
+};
+
+SYSCLK(1, pll1_sysclk1, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(2, pll1_sysclk2, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(3, pll1_sysclk3, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(4, pll1_sysclk4, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(5, pll1_sysclk5, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(6, pll1_sysclk6, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(7, pll1_sysclk7, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(8, pll1_sysclk8, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(9, pll1_sysclk9, pll1_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+
+/*
+ * This is a bit of a hack to make OCSEL[OCSRC] on DM365 look like OCSEL[OCSRC]
+ * on DA850. On DM365, OCSEL[OCSRC] is just an enable/disable bit instead of a
+ * multiplexer. By modeling it as a single parent mux clock, the clock code will
+ * still do the right thing in this case.
+ */
+static const char * const dm365_pll_obsclk_parent_names[] = {
+	"oscin",
+};
+
+static u32 dm365_pll_obsclk_table[] = {
+	OCSEL_OCSRC_ENABLE,
+};
+
+static const struct davinci_pll_obsclk_info dm365_pll1_obsclk_info = {
+	.name = "pll1_obsclk",
+	.parent_names = dm365_pll_obsclk_parent_names,
+	.num_parents = ARRAY_SIZE(dm365_pll_obsclk_parent_names),
+	.table = dm365_pll_obsclk_table,
+	.ocsrc_mask = BIT(4),
+};
+
+int dm365_pll1_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &dm365_pll1_info, "ref_clk", base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk1, base);
+	clk_register_clkdev(clk, "pll1_sysclk1", "dm365-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk2, base);
+	clk_register_clkdev(clk, "pll1_sysclk2", "dm365-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk3, base);
+	clk_register_clkdev(clk, "pll1_sysclk3", "dm365-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk4, base);
+	clk_register_clkdev(clk, "pll1_sysclk4", "dm365-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk5, base);
+	clk_register_clkdev(clk, "pll1_sysclk5", "dm365-psc");
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk6, base);
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk7, base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk8, base);
+	clk_register_clkdev(clk, "pll1_sysclk8", "dm365-psc");
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk9, base);
+
+	clk = davinci_pll_auxclk_register(dev, "pll1_auxclk", base);
+	clk_register_clkdev(clk, "pll1_auxclk", "dm355-psc");
+
+	davinci_pll_sysclkbp_clk_register(dev, "pll1_sysclkbp", base);
+
+	davinci_pll_obsclk_register(dev, &dm365_pll1_obsclk_info, base);
+
+	return 0;
+}
+
+static const struct davinci_pll_clk_info dm365_pll2_info = {
+	.name = "pll2",
+	.pllm_mask = GENMASK(9, 0),
+	.pllm_min = 1,
+	.pllm_max = 1023,
+	.flags = PLL_HAS_PREDIV | PLL_HAS_POSTDIV | PLL_POSTDIV_ALWAYS_ENABLED |
+		 PLL_PLLM_2X,
+};
+
+SYSCLK(1, pll2_sysclk1, pll2_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(2, pll2_sysclk2, pll2_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(3, pll2_sysclk3, pll2_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(4, pll2_sysclk4, pll2_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+SYSCLK(5, pll2_sysclk5, pll2_pllen, 5, SYSCLK_ALWAYS_ENABLED);
+
+static const struct davinci_pll_obsclk_info dm365_pll2_obsclk_info = {
+	.name = "pll2_obsclk",
+	.parent_names = dm365_pll_obsclk_parent_names,
+	.num_parents = ARRAY_SIZE(dm365_pll_obsclk_parent_names),
+	.table = dm365_pll_obsclk_table,
+	.ocsrc_mask = BIT(4),
+};
+
+int dm365_pll2_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &dm365_pll2_info, "oscin", base);
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk1, base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll2_sysclk2, base);
+	clk_register_clkdev(clk, "pll1_sysclk2", "dm365-psc");
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk3, base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll2_sysclk4, base);
+	clk_register_clkdev(clk, "pll1_sysclk4", "dm365-psc");
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk5, base);
+
+	davinci_pll_auxclk_register(dev, "pll2_auxclk", base);
+
+	davinci_pll_obsclk_register(dev, &dm365_pll2_obsclk_info, base);
+
+	return 0;
+}
diff --git a/drivers/clk/davinci/pll-dm644x.c b/drivers/clk/davinci/pll-dm644x.c
new file mode 100644
index 000000000000..69bf785377cf
--- /dev/null
+++ b/drivers/clk/davinci/pll-dm644x.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock descriptions for TI DM644X
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include "pll.h"
+
+static const struct davinci_pll_clk_info dm644x_pll1_info = {
+	.name = "pll1",
+	.pllm_mask = GENMASK(4, 0),
+	.pllm_min = 1,
+	.pllm_max = 32,
+	.pllout_min_rate = 400000000,
+	.pllout_max_rate = 600000000, /* 810MHz @ 1.3V, -810 only */
+	.flags = PLL_HAS_CLKMODE | PLL_HAS_POSTDIV,
+};
+
+SYSCLK(1, pll1_sysclk1, pll1_pllen, 4, SYSCLK_FIXED_DIV);
+SYSCLK(2, pll1_sysclk2, pll1_pllen, 4, SYSCLK_FIXED_DIV);
+SYSCLK(3, pll1_sysclk3, pll1_pllen, 4, SYSCLK_FIXED_DIV);
+SYSCLK(5, pll1_sysclk5, pll1_pllen, 4, SYSCLK_FIXED_DIV);
+
+int dm644x_pll1_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &dm644x_pll1_info, "ref_clk", base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk1, base);
+	clk_register_clkdev(clk, "pll1_sysclk1", "dm644x-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk2, base);
+	clk_register_clkdev(clk, "pll1_sysclk2", "dm644x-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk3, base);
+	clk_register_clkdev(clk, "pll1_sysclk3", "dm644x-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk5, base);
+	clk_register_clkdev(clk, "pll1_sysclk5", "dm644x-psc");
+
+	clk = davinci_pll_auxclk_register(dev, "pll1_auxclk", base);
+	clk_register_clkdev(clk, "pll1_auxclk", "dm644x-psc");
+
+	davinci_pll_sysclkbp_clk_register(dev, "pll1_sysclkbp", base);
+
+	return 0;
+}
+
+static const struct davinci_pll_clk_info dm644x_pll2_info = {
+	.name = "pll2",
+	.pllm_mask = GENMASK(4, 0),
+	.pllm_min = 1,
+	.pllm_max = 32,
+	.pllout_min_rate = 400000000,
+	.pllout_max_rate = 900000000,
+	.flags = PLL_HAS_POSTDIV | PLL_POSTDIV_FIXED_DIV,
+};
+
+SYSCLK(1, pll2_sysclk1, pll2_pllen, 4, 0);
+SYSCLK(2, pll2_sysclk2, pll2_pllen, 4, 0);
+
+int dm644x_pll2_init(struct device *dev, void __iomem *base)
+{
+	davinci_pll_clk_register(dev, &dm644x_pll2_info, "oscin", base);
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk1, base);
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk2, base);
+
+	davinci_pll_sysclkbp_clk_register(dev, "pll2_sysclkbp", base);
+
+	return 0;
+}
diff --git a/drivers/clk/davinci/pll-dm646x.c b/drivers/clk/davinci/pll-dm646x.c
new file mode 100644
index 000000000000..a61cc3256418
--- /dev/null
+++ b/drivers/clk/davinci/pll-dm646x.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock descriptions for TI DM646X
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/types.h>
+
+#include "pll.h"
+
+static const struct davinci_pll_clk_info dm646x_pll1_info = {
+	.name = "pll1",
+	.pllm_mask = GENMASK(4, 0),
+	.pllm_min = 14,
+	.pllm_max = 32,
+	.flags = PLL_HAS_CLKMODE,
+};
+
+SYSCLK(1, pll1_sysclk1, pll1_pllen, 4, SYSCLK_FIXED_DIV);
+SYSCLK(2, pll1_sysclk2, pll1_pllen, 4, SYSCLK_FIXED_DIV);
+SYSCLK(3, pll1_sysclk3, pll1_pllen, 4, SYSCLK_FIXED_DIV);
+SYSCLK(4, pll1_sysclk4, pll1_pllen, 4, 0);
+SYSCLK(5, pll1_sysclk5, pll1_pllen, 4, 0);
+SYSCLK(6, pll1_sysclk6, pll1_pllen, 4, 0);
+SYSCLK(8, pll1_sysclk8, pll1_pllen, 4, 0);
+SYSCLK(9, pll1_sysclk9, pll1_pllen, 4, 0);
+
+int dm646x_pll1_init(struct device *dev, void __iomem *base)
+{
+	struct clk *clk;
+
+	davinci_pll_clk_register(dev, &dm646x_pll1_info, "ref_clk", base);
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk1, base);
+	clk_register_clkdev(clk, "pll1_sysclk1", "dm646x-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk2, base);
+	clk_register_clkdev(clk, "pll1_sysclk2", "dm646x-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk3, base);
+	clk_register_clkdev(clk, "pll1_sysclk3", "dm646x-psc");
+	clk_register_clkdev(clk, NULL, "davinci-wdt");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk4, base);
+	clk_register_clkdev(clk, "pll1_sysclk4", "dm646x-psc");
+
+	clk = davinci_pll_sysclk_register(dev, &pll1_sysclk5, base);
+	clk_register_clkdev(clk, "pll1_sysclk5", "dm646x-psc");
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk6, base);
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk8, base);
+
+	davinci_pll_sysclk_register(dev, &pll1_sysclk9, base);
+
+	davinci_pll_sysclkbp_clk_register(dev, "pll1_sysclkbp", base);
+
+	davinci_pll_auxclk_register(dev, "pll1_auxclk", base);
+
+	return 0;
+}
+
+static const struct davinci_pll_clk_info dm646x_pll2_info = {
+	.name = "pll2",
+	.pllm_mask = GENMASK(4, 0),
+	.pllm_min = 14,
+	.pllm_max = 32,
+	.flags = 0,
+};
+
+SYSCLK(1, pll2_sysclk1, pll2_pllen, 4, 0);
+
+int dm646x_pll2_init(struct device *dev, void __iomem *base)
+{
+	davinci_pll_clk_register(dev, &dm646x_pll2_info, "oscin", base);
+
+	davinci_pll_sysclk_register(dev, &pll2_sysclk1, base);
+
+	return 0;
+}
diff --git a/drivers/clk/davinci/pll.c b/drivers/clk/davinci/pll.c
new file mode 100644
index 000000000000..23a24c944f1d
--- /dev/null
+++ b/drivers/clk/davinci/pll.c
@@ -0,0 +1,899 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock driver for TI Davinci SoCs
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ *
+ * Based on arch/arm/mach-davinci/clock.c
+ * Copyright (C) 2006-2007 Texas Instruments.
+ * Copyright (C) 2008-2009 Deep Root Systems, LLC
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/notifier.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_data/clk-davinci-pll.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "pll.h"
+
+#define MAX_NAME_SIZE	20
+#define OSCIN_CLK_NAME	"oscin"
+
+#define REVID		0x000
+#define PLLCTL		0x100
+#define OCSEL		0x104
+#define PLLSECCTL	0x108
+#define PLLM		0x110
+#define PREDIV		0x114
+#define PLLDIV1		0x118
+#define PLLDIV2		0x11c
+#define PLLDIV3		0x120
+#define OSCDIV		0x124
+#define POSTDIV		0x128
+#define BPDIV		0x12c
+#define PLLCMD		0x138
+#define PLLSTAT		0x13c
+#define ALNCTL		0x140
+#define DCHANGE		0x144
+#define CKEN		0x148
+#define CKSTAT		0x14c
+#define SYSTAT		0x150
+#define PLLDIV4		0x160
+#define PLLDIV5		0x164
+#define PLLDIV6		0x168
+#define PLLDIV7		0x16c
+#define PLLDIV8		0x170
+#define PLLDIV9		0x174
+
+#define PLLCTL_PLLEN		BIT(0)
+#define PLLCTL_PLLPWRDN		BIT(1)
+#define PLLCTL_PLLRST		BIT(3)
+#define PLLCTL_PLLDIS		BIT(4)
+#define PLLCTL_PLLENSRC		BIT(5)
+#define PLLCTL_CLKMODE		BIT(8)
+
+/* shared by most *DIV registers */
+#define DIV_RATIO_SHIFT		0
+#define DIV_RATIO_WIDTH		5
+#define DIV_ENABLE_SHIFT	15
+
+#define PLLCMD_GOSET		BIT(0)
+#define PLLSTAT_GOSTAT		BIT(0)
+
+#define CKEN_OBSCLK_SHIFT	1
+#define CKEN_AUXEN_SHIFT	0
+
+/*
+ * OMAP-L138 system reference guide recommends a wait for 4 OSCIN/CLKIN
+ * cycles to ensure that the PLLC has switched to bypass mode. Delay of 1us
+ * ensures we are good for all > 4MHz OSCIN/CLKIN inputs. Typically the input
+ * is ~25MHz. Units are micro seconds.
+ */
+#define PLL_BYPASS_TIME		1
+
+/* From OMAP-L138 datasheet table 6-4. Units are micro seconds */
+#define PLL_RESET_TIME		1
+
+/*
+ * From OMAP-L138 datasheet table 6-4; assuming prediv = 1, sqrt(pllm) = 4
+ * Units are micro seconds.
+ */
+#define PLL_LOCK_TIME		20
+
+/**
+ * struct davinci_pll_clk - Main PLL clock (aka PLLOUT)
+ * @hw: clk_hw for the pll
+ * @base: Base memory address
+ * @pllm_min: The minimum allowable PLLM[PLLM] value
+ * @pllm_max: The maxiumum allowable PLLM[PLLM] value
+ * @pllm_mask: Bitmask for PLLM[PLLM] value
+ */
+struct davinci_pll_clk {
+	struct clk_hw hw;
+	void __iomem *base;
+	u32 pllm_min;
+	u32 pllm_max;
+	u32 pllm_mask;
+};
+
+#define to_davinci_pll_clk(_hw) \
+	container_of((_hw), struct davinci_pll_clk, hw)
+
+static unsigned long davinci_pll_recalc_rate(struct clk_hw *hw,
+					     unsigned long parent_rate)
+{
+	struct davinci_pll_clk *pll = to_davinci_pll_clk(hw);
+	unsigned long rate = parent_rate;
+	u32 mult;
+
+	mult = readl(pll->base + PLLM) & pll->pllm_mask;
+	rate *= mult + 1;
+
+	return rate;
+}
+
+static int davinci_pll_determine_rate(struct clk_hw *hw,
+				      struct clk_rate_request *req)
+{
+	struct davinci_pll_clk *pll = to_davinci_pll_clk(hw);
+	struct clk_hw *parent = req->best_parent_hw;
+	unsigned long parent_rate = req->best_parent_rate;
+	unsigned long rate = req->rate;
+	unsigned long best_rate, r;
+	u32 mult;
+
+	/* there is a limited range of valid outputs (see datasheet) */
+	if (rate < req->min_rate)
+		return -EINVAL;
+
+	rate = min(rate, req->max_rate);
+	mult = rate / parent_rate;
+	best_rate = parent_rate * mult;
+
+	/* easy case when there is no PREDIV */
+	if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
+		if (best_rate < req->min_rate)
+			return -EINVAL;
+
+		if (mult < pll->pllm_min || mult > pll->pllm_max)
+			return -EINVAL;
+
+		req->rate = best_rate;
+
+		return 0;
+	}
+
+	/* see if the PREDIV clock can help us */
+	best_rate = 0;
+
+	for (mult = pll->pllm_min; mult <= pll->pllm_max; mult++) {
+		parent_rate = clk_hw_round_rate(parent, rate / mult);
+		r = parent_rate * mult;
+		if (r < req->min_rate)
+			continue;
+		if (r > rate || r > req->max_rate)
+			break;
+		if (r > best_rate) {
+			best_rate = r;
+			req->rate = best_rate;
+			req->best_parent_rate = parent_rate;
+			if (best_rate == rate)
+				break;
+		}
+	}
+
+	return 0;
+}
+
+static int davinci_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	struct davinci_pll_clk *pll = to_davinci_pll_clk(hw);
+	u32 mult;
+
+	mult = rate / parent_rate;
+	writel(mult - 1, pll->base + PLLM);
+
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int davinci_pll_debug_init(struct clk_hw *hw, struct dentry *dentry);
+#else
+#define davinci_pll_debug_init NULL
+#endif
+
+static const struct clk_ops davinci_pll_ops = {
+	.recalc_rate	= davinci_pll_recalc_rate,
+	.determine_rate	= davinci_pll_determine_rate,
+	.set_rate	= davinci_pll_set_rate,
+	.debug_init	= davinci_pll_debug_init,
+};
+
+/* PLLM works differently on DM365 */
+static unsigned long dm365_pll_recalc_rate(struct clk_hw *hw,
+					   unsigned long parent_rate)
+{
+	struct davinci_pll_clk *pll = to_davinci_pll_clk(hw);
+	unsigned long rate = parent_rate;
+	u32 mult;
+
+	mult = readl(pll->base + PLLM) & pll->pllm_mask;
+	rate *= mult * 2;
+
+	return rate;
+}
+
+static const struct clk_ops dm365_pll_ops = {
+	.recalc_rate	= dm365_pll_recalc_rate,
+	.debug_init	= davinci_pll_debug_init,
+};
+
+/**
+ * davinci_pll_div_register - common *DIV clock implementation
+ * @name: the clock name
+ * @parent_name: the parent clock name
+ * @reg: the *DIV register
+ * @fixed: if true, the divider is a fixed value
+ * @flags: bitmap of CLK_* flags from clock-provider.h
+ */
+static struct clk *davinci_pll_div_register(struct device *dev,
+					    const char *name,
+					    const char *parent_name,
+					    void __iomem *reg,
+					    bool fixed, u32 flags)
+{
+	const char * const *parent_names = parent_name ? &parent_name : NULL;
+	int num_parents = parent_name ? 1 : 0;
+	const struct clk_ops *divider_ops = &clk_divider_ops;
+	struct clk_gate *gate;
+	struct clk_divider *divider;
+
+	gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	gate->reg = reg;
+	gate->bit_idx = DIV_ENABLE_SHIFT;
+
+	divider = devm_kzalloc(dev, sizeof(*divider), GFP_KERNEL);
+	if (!divider)
+		return ERR_PTR(-ENOMEM);
+
+	divider->reg = reg;
+	divider->shift = DIV_RATIO_SHIFT;
+	divider->width = DIV_RATIO_WIDTH;
+
+	if (fixed) {
+		divider->flags |= CLK_DIVIDER_READ_ONLY;
+		divider_ops = &clk_divider_ro_ops;
+	}
+
+	return clk_register_composite(dev, name, parent_names, num_parents,
+				      NULL, NULL, &divider->hw, divider_ops,
+				      &gate->hw, &clk_gate_ops, flags);
+}
+
+struct davinci_pllen_clk {
+	struct clk_hw hw;
+	void __iomem *base;
+};
+
+#define to_davinci_pllen_clk(_hw) \
+	container_of((_hw), struct davinci_pllen_clk, hw)
+
+static const struct clk_ops davinci_pllen_ops = {
+	/* this clocks just uses the clock notification feature */
+};
+
+/*
+ * The PLL has to be switched into bypass mode while we are chaning the rate,
+ * so we do that on the PLLEN clock since it is the end of the line. This will
+ * switch to bypass before any of the parent clocks (PREDIV, PLL, POSTDIV) are
+ * changed and will switch back to the PLL after the changes have been made.
+ */
+static int davinci_pllen_rate_change(struct notifier_block *nb,
+				     unsigned long flags, void *data)
+{
+	struct clk_notifier_data *cnd = data;
+	struct clk_hw *hw = __clk_get_hw(cnd->clk);
+	struct davinci_pllen_clk *pll = to_davinci_pllen_clk(hw);
+	u32 ctrl;
+
+	ctrl = readl(pll->base + PLLCTL);
+
+	if (flags == PRE_RATE_CHANGE) {
+		/* Switch the PLL to bypass mode */
+		ctrl &= ~(PLLCTL_PLLENSRC | PLLCTL_PLLEN);
+		writel(ctrl, pll->base + PLLCTL);
+
+		udelay(PLL_BYPASS_TIME);
+
+		/* Reset and enable PLL */
+		ctrl &= ~(PLLCTL_PLLRST | PLLCTL_PLLDIS);
+		writel(ctrl, pll->base + PLLCTL);
+	} else {
+		udelay(PLL_RESET_TIME);
+
+		/* Bring PLL out of reset */
+		ctrl |= PLLCTL_PLLRST;
+		writel(ctrl, pll->base + PLLCTL);
+
+		udelay(PLL_LOCK_TIME);
+
+		/* Remove PLL from bypass mode */
+		ctrl |= PLLCTL_PLLEN;
+		writel(ctrl, pll->base + PLLCTL);
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct davinci_pll_platform_data *davinci_pll_get_pdata(struct device *dev)
+{
+	struct davinci_pll_platform_data *pdata = dev_get_platdata(dev);
+
+	/*
+	 * Platform data is optional, so allocate a new struct if one was not
+	 * provided. For device tree, this will always be the case.
+	 */
+	if (!pdata)
+		pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	/* for device tree, we need to fill in the struct */
+	if (dev->of_node)
+		pdata->cfgchip =
+			syscon_regmap_lookup_by_compatible("ti,da830-cfgchip");
+
+	return pdata;
+}
+
+static struct notifier_block davinci_pllen_notifier = {
+	.notifier_call = davinci_pllen_rate_change,
+};
+
+/**
+ * davinci_pll_clk_register - Register a PLL clock
+ * @info: The device-specific clock info
+ * @parent_name: The parent clock name
+ * @base: The PLL's memory region
+ *
+ * This creates a series of clocks that represent the PLL.
+ *
+ *     OSCIN > [PREDIV >] PLL > [POSTDIV >] PLLEN
+ *
+ * - OSCIN is the parent clock (on secondary PLL, may come from primary PLL)
+ * - PREDIV and POSTDIV are optional (depends on the PLL controller)
+ * - PLL is the PLL output (aka PLLOUT)
+ * - PLLEN is the bypass multiplexer
+ *
+ * Returns: The PLLOUT clock or a negative error code.
+ */
+struct clk *davinci_pll_clk_register(struct device *dev,
+				     const struct davinci_pll_clk_info *info,
+				     const char *parent_name,
+				     void __iomem *base)
+{
+	struct davinci_pll_platform_data *pdata;
+	char prediv_name[MAX_NAME_SIZE];
+	char pllout_name[MAX_NAME_SIZE];
+	char postdiv_name[MAX_NAME_SIZE];
+	char pllen_name[MAX_NAME_SIZE];
+	struct clk_init_data init;
+	struct davinci_pll_clk *pllout;
+	struct davinci_pllen_clk *pllen;
+	struct clk *pllout_clk, *clk;
+
+	pdata = davinci_pll_get_pdata(dev);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	if (info->flags & PLL_HAS_CLKMODE) {
+		/*
+		 * If a PLL has PLLCTL[CLKMODE], then it is the primary PLL.
+		 * We register a clock named "oscin" that serves as the internal
+		 * "input clock" domain shared by both PLLs (if there are 2)
+		 * and will be the parent clock to the AUXCLK, SYSCLKBP and
+		 * OBSCLK domains. NB: The various TRMs use "OSCIN" to mean
+		 * a number of different things. In this driver we use it to
+		 * mean the signal after the PLLCTL[CLKMODE] switch.
+		 */
+		clk = clk_register_fixed_factor(dev, OSCIN_CLK_NAME,
+						parent_name, 0, 1, 1);
+		if (IS_ERR(clk))
+			return clk;
+
+		parent_name = OSCIN_CLK_NAME;
+	}
+
+	if (info->flags & PLL_HAS_PREDIV) {
+		bool fixed = info->flags & PLL_PREDIV_FIXED_DIV;
+		u32 flags = 0;
+
+		snprintf(prediv_name, MAX_NAME_SIZE, "%s_prediv", info->name);
+
+		if (info->flags & PLL_PREDIV_ALWAYS_ENABLED)
+			flags |= CLK_IS_CRITICAL;
+
+		/* Some? DM355 chips don't correctly report the PREDIV value */
+		if (info->flags & PLL_PREDIV_FIXED8)
+			clk = clk_register_fixed_factor(dev, prediv_name,
+						parent_name, flags, 1, 8);
+		else
+			clk = davinci_pll_div_register(dev, prediv_name,
+				parent_name, base + PREDIV, fixed, flags);
+		if (IS_ERR(clk))
+			return clk;
+
+		parent_name = prediv_name;
+	}
+
+	/* Unlock writing to PLL registers */
+	if (info->unlock_reg) {
+		if (IS_ERR_OR_NULL(pdata->cfgchip))
+			dev_warn(dev, "Failed to get CFGCHIP (%ld)\n",
+				 PTR_ERR(pdata->cfgchip));
+		else
+			regmap_write_bits(pdata->cfgchip, info->unlock_reg,
+					  info->unlock_mask, 0);
+	}
+
+	pllout = devm_kzalloc(dev, sizeof(*pllout), GFP_KERNEL);
+	if (!pllout)
+		return ERR_PTR(-ENOMEM);
+
+	snprintf(pllout_name, MAX_NAME_SIZE, "%s_pllout", info->name);
+
+	init.name = pllout_name;
+	if (info->flags & PLL_PLLM_2X)
+		init.ops = &dm365_pll_ops;
+	else
+		init.ops = &davinci_pll_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = 0;
+
+	if (info->flags & PLL_HAS_PREDIV)
+		init.flags |= CLK_SET_RATE_PARENT;
+
+	pllout->hw.init = &init;
+	pllout->base = base;
+	pllout->pllm_mask = info->pllm_mask;
+	pllout->pllm_min = info->pllm_min;
+	pllout->pllm_max = info->pllm_max;
+
+	pllout_clk = devm_clk_register(dev, &pllout->hw);
+	if (IS_ERR(pllout_clk))
+		return pllout_clk;
+
+	clk_hw_set_rate_range(&pllout->hw, info->pllout_min_rate,
+			      info->pllout_max_rate);
+
+	parent_name = pllout_name;
+
+	if (info->flags & PLL_HAS_POSTDIV) {
+		bool fixed = info->flags & PLL_POSTDIV_FIXED_DIV;
+		u32 flags = CLK_SET_RATE_PARENT;
+
+		snprintf(postdiv_name, MAX_NAME_SIZE, "%s_postdiv", info->name);
+
+		if (info->flags & PLL_POSTDIV_ALWAYS_ENABLED)
+			flags |= CLK_IS_CRITICAL;
+
+		clk = davinci_pll_div_register(dev, postdiv_name, parent_name,
+					       base + POSTDIV, fixed, flags);
+		if (IS_ERR(clk))
+			return clk;
+
+		parent_name = postdiv_name;
+	}
+
+	pllen = devm_kzalloc(dev, sizeof(*pllout), GFP_KERNEL);
+	if (!pllen)
+		return ERR_PTR(-ENOMEM);
+
+	snprintf(pllen_name, MAX_NAME_SIZE, "%s_pllen", info->name);
+
+	init.name = pllen_name;
+	init.ops = &davinci_pllen_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_PARENT;
+
+	pllen->hw.init = &init;
+	pllen->base = base;
+
+	clk = devm_clk_register(dev, &pllen->hw);
+	if (IS_ERR(clk))
+		return clk;
+
+	clk_notifier_register(clk, &davinci_pllen_notifier);
+
+	return pllout_clk;
+}
+
+/**
+ * davinci_pll_auxclk_register - Register bypass clock (AUXCLK)
+ * @name: The clock name
+ * @base: The PLL memory region
+ */
+struct clk *davinci_pll_auxclk_register(struct device *dev,
+					const char *name,
+					void __iomem *base)
+{
+	return clk_register_gate(dev, name, OSCIN_CLK_NAME, 0, base + CKEN,
+				 CKEN_AUXEN_SHIFT, 0, NULL);
+}
+
+/**
+ * davinci_pll_sysclkbp_clk_register - Register bypass divider clock (SYSCLKBP)
+ * @name: The clock name
+ * @base: The PLL memory region
+ */
+struct clk *davinci_pll_sysclkbp_clk_register(struct device *dev,
+					      const char *name,
+					      void __iomem *base)
+{
+	return clk_register_divider(dev, name, OSCIN_CLK_NAME, 0, base + BPDIV,
+				    DIV_RATIO_SHIFT, DIV_RATIO_WIDTH,
+				    CLK_DIVIDER_READ_ONLY, NULL);
+}
+
+/**
+ * davinci_pll_obsclk_register - Register oscillator divider clock (OBSCLK)
+ * @info: The clock info
+ * @base: The PLL memory region
+ */
+struct clk *
+davinci_pll_obsclk_register(struct device *dev,
+			    const struct davinci_pll_obsclk_info *info,
+			    void __iomem *base)
+{
+	struct clk_mux *mux;
+	struct clk_gate *gate;
+	struct clk_divider *divider;
+	u32 oscdiv;
+
+	mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return ERR_PTR(-ENOMEM);
+
+	mux->reg = base + OCSEL;
+	mux->table = info->table;
+	mux->mask = info->ocsrc_mask;
+
+	gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	gate->reg = base + CKEN;
+	gate->bit_idx = CKEN_OBSCLK_SHIFT;
+
+	divider = devm_kzalloc(dev, sizeof(*divider), GFP_KERNEL);
+	if (!divider)
+		return ERR_PTR(-ENOMEM);
+
+	divider->reg = base + OSCDIV;
+	divider->shift = DIV_RATIO_SHIFT;
+	divider->width = DIV_RATIO_WIDTH;
+
+	/* make sure divider is enabled just in case bootloader disabled it */
+	oscdiv = readl(base + OSCDIV);
+	oscdiv |= BIT(DIV_ENABLE_SHIFT);
+	writel(oscdiv, base + OSCDIV);
+
+	return clk_register_composite(dev, info->name, info->parent_names,
+				      info->num_parents,
+				      &mux->hw, &clk_mux_ops,
+				      &divider->hw, &clk_divider_ops,
+				      &gate->hw, &clk_gate_ops, 0);
+}
+
+/* The PLL SYSCLKn clocks have a mechanism for synchronizing rate changes. */
+static int davinci_pll_sysclk_rate_change(struct notifier_block *nb,
+					  unsigned long flags, void *data)
+{
+	struct clk_notifier_data *cnd = data;
+	struct clk_hw *hw = __clk_get_hw(clk_get_parent(cnd->clk));
+	struct davinci_pllen_clk *pll = to_davinci_pllen_clk(hw);
+	u32 pllcmd, pllstat;
+
+	switch (flags) {
+	case POST_RATE_CHANGE:
+		/* apply the changes */
+		pllcmd = readl(pll->base + PLLCMD);
+		pllcmd |= PLLCMD_GOSET;
+		writel(pllcmd, pll->base + PLLCMD);
+		/* fallthrough */
+	case PRE_RATE_CHANGE:
+		/* Wait until for outstanding changes to take effect */
+		do {
+			pllstat = readl(pll->base + PLLSTAT);
+		} while (pllstat & PLLSTAT_GOSTAT);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block davinci_pll_sysclk_notifier = {
+	.notifier_call = davinci_pll_sysclk_rate_change,
+};
+
+/**
+ * davinci_pll_sysclk_register - Register divider clocks (SYSCLKn)
+ * @info: The clock info
+ * @base: The PLL memory region
+ */
+struct clk *
+davinci_pll_sysclk_register(struct device *dev,
+			    const struct davinci_pll_sysclk_info *info,
+			    void __iomem *base)
+{
+	const struct clk_ops *divider_ops = &clk_divider_ops;
+	struct clk_gate *gate;
+	struct clk_divider *divider;
+	struct clk *clk;
+	u32 reg;
+	u32 flags = 0;
+
+	/* PLLDIVn registers are not entirely consecutive */
+	if (info->id < 4)
+		reg = PLLDIV1 + 4 * (info->id - 1);
+	else
+		reg = PLLDIV4 + 4 * (info->id - 4);
+
+	gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	gate->reg = base + reg;
+	gate->bit_idx = DIV_ENABLE_SHIFT;
+
+	divider = devm_kzalloc(dev, sizeof(*divider), GFP_KERNEL);
+	if (!divider)
+		return ERR_PTR(-ENOMEM);
+
+	divider->reg = base + reg;
+	divider->shift = DIV_RATIO_SHIFT;
+	divider->width = info->ratio_width;
+	divider->flags = 0;
+
+	if (info->flags & SYSCLK_FIXED_DIV) {
+		divider->flags |= CLK_DIVIDER_READ_ONLY;
+		divider_ops = &clk_divider_ro_ops;
+	}
+
+	/* Only the ARM clock can change the parent PLL rate */
+	if (info->flags & SYSCLK_ARM_RATE)
+		flags |= CLK_SET_RATE_PARENT;
+
+	if (info->flags & SYSCLK_ALWAYS_ENABLED)
+		flags |= CLK_IS_CRITICAL;
+
+	clk = clk_register_composite(dev, info->name, &info->parent_name, 1,
+				     NULL, NULL, &divider->hw, divider_ops,
+				     &gate->hw, &clk_gate_ops, flags);
+	if (IS_ERR(clk))
+		return clk;
+
+	clk_notifier_register(clk, &davinci_pll_sysclk_notifier);
+
+	return clk;
+}
+
+int of_davinci_pll_init(struct device *dev,
+			const struct davinci_pll_clk_info *info,
+			const struct davinci_pll_obsclk_info *obsclk_info,
+			const struct davinci_pll_sysclk_info **div_info,
+			u8 max_sysclk_id,
+			void __iomem *base)
+{
+	struct device_node *node = dev->of_node;
+	struct device_node *child;
+	const char *parent_name;
+	struct clk *clk;
+
+	if (info->flags & PLL_HAS_CLKMODE)
+		parent_name = of_clk_get_parent_name(node, 0);
+	else
+		parent_name = OSCIN_CLK_NAME;
+
+	clk = davinci_pll_clk_register(dev, info, parent_name, base);
+	if (IS_ERR(clk)) {
+		dev_err(dev, "failed to register %s\n", info->name);
+		return PTR_ERR(clk);
+	}
+
+	child = of_get_child_by_name(node, "pllout");
+	if (of_device_is_available(child))
+		of_clk_add_provider(child, of_clk_src_simple_get, clk);
+	of_node_put(child);
+
+	child = of_get_child_by_name(node, "sysclk");
+	if (of_device_is_available(child)) {
+		struct clk_onecell_data *clk_data;
+		struct clk **clks;
+		int n_clks =  max_sysclk_id + 1;
+		int i;
+
+		clk_data = devm_kzalloc(dev, sizeof(*clk_data), GFP_KERNEL);
+		if (!clk_data)
+			return -ENOMEM;
+
+		clks = devm_kmalloc_array(dev, n_clks, sizeof(*clks), GFP_KERNEL);
+		if (!clks)
+			return -ENOMEM;
+
+		clk_data->clks = clks;
+		clk_data->clk_num = n_clks;
+
+		for (i = 0; i < n_clks; i++)
+			clks[i] = ERR_PTR(-ENOENT);
+
+		for (; *div_info; div_info++) {
+			clk = davinci_pll_sysclk_register(dev, *div_info, base);
+			if (IS_ERR(clk))
+				dev_warn(dev, "failed to register %s (%ld)\n",
+					 (*div_info)->name, PTR_ERR(clk));
+			else
+				clks[(*div_info)->id] = clk;
+		}
+		of_clk_add_provider(child, of_clk_src_onecell_get, clk_data);
+	}
+	of_node_put(child);
+
+	child = of_get_child_by_name(node, "auxclk");
+	if (of_device_is_available(child)) {
+		char child_name[MAX_NAME_SIZE];
+
+		snprintf(child_name, MAX_NAME_SIZE, "%s_auxclk", info->name);
+
+		clk = davinci_pll_auxclk_register(dev, child_name, base);
+		if (IS_ERR(clk))
+			dev_warn(dev, "failed to register %s (%ld)\n",
+				 child_name, PTR_ERR(clk));
+		else
+			of_clk_add_provider(child, of_clk_src_simple_get, clk);
+	}
+	of_node_put(child);
+
+	child = of_get_child_by_name(node, "obsclk");
+	if (of_device_is_available(child)) {
+		if (obsclk_info)
+			clk = davinci_pll_obsclk_register(dev, obsclk_info, base);
+		else
+			clk = ERR_PTR(-EINVAL);
+
+		if (IS_ERR(clk))
+			dev_warn(dev, "failed to register obsclk (%ld)\n",
+				 PTR_ERR(clk));
+		else
+			of_clk_add_provider(child, of_clk_src_simple_get, clk);
+	}
+	of_node_put(child);
+
+	return 0;
+}
+
+static const struct of_device_id davinci_pll_of_match[] = {
+	{ .compatible = "ti,da850-pll0", .data = of_da850_pll0_init },
+	{ .compatible = "ti,da850-pll1", .data = of_da850_pll1_init },
+	{ }
+};
+
+static const struct platform_device_id davinci_pll_id_table[] = {
+	{ .name = "da830-pll",   .driver_data = (kernel_ulong_t)da830_pll_init   },
+	{ .name = "da850-pll0",  .driver_data = (kernel_ulong_t)da850_pll0_init  },
+	{ .name = "da850-pll1",  .driver_data = (kernel_ulong_t)da850_pll1_init  },
+	{ .name = "dm355-pll1",  .driver_data = (kernel_ulong_t)dm355_pll1_init  },
+	{ .name = "dm355-pll2",  .driver_data = (kernel_ulong_t)dm355_pll2_init  },
+	{ .name = "dm365-pll1",  .driver_data = (kernel_ulong_t)dm365_pll1_init  },
+	{ .name = "dm365-pll2",  .driver_data = (kernel_ulong_t)dm365_pll2_init  },
+	{ .name = "dm644x-pll1", .driver_data = (kernel_ulong_t)dm644x_pll1_init },
+	{ .name = "dm644x-pll2", .driver_data = (kernel_ulong_t)dm644x_pll2_init },
+	{ .name = "dm646x-pll1", .driver_data = (kernel_ulong_t)dm646x_pll1_init },
+	{ .name = "dm646x-pll2", .driver_data = (kernel_ulong_t)dm646x_pll2_init },
+	{ }
+};
+
+typedef int (*davinci_pll_init)(struct device *dev, void __iomem *base);
+
+static int davinci_pll_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id;
+	davinci_pll_init pll_init = NULL;
+	struct resource *res;
+	void __iomem *base;
+
+	of_id = of_match_device(davinci_pll_of_match, dev);
+	if (of_id)
+		pll_init = of_id->data;
+	else if (pdev->id_entry)
+		pll_init = (void *)pdev->id_entry->driver_data;
+
+	if (!pll_init) {
+		dev_err(dev, "unable to find driver data\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	return pll_init(dev, base);
+}
+
+static struct platform_driver davinci_pll_driver = {
+	.probe		= davinci_pll_probe,
+	.driver		= {
+		.name		= "davinci-pll-clk",
+		.of_match_table	= davinci_pll_of_match,
+	},
+	.id_table	= davinci_pll_id_table,
+};
+
+static int __init davinci_pll_driver_init(void)
+{
+	return platform_driver_register(&davinci_pll_driver);
+}
+
+/* has to be postcore_initcall because PSC devices depend on PLL parent clocks */
+postcore_initcall(davinci_pll_driver_init);
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+
+#define DEBUG_REG(n)	\
+{			\
+	.name	= #n,	\
+	.offset	= n,	\
+}
+
+static const struct debugfs_reg32 davinci_pll_regs[] = {
+	DEBUG_REG(REVID),
+	DEBUG_REG(PLLCTL),
+	DEBUG_REG(OCSEL),
+	DEBUG_REG(PLLSECCTL),
+	DEBUG_REG(PLLM),
+	DEBUG_REG(PREDIV),
+	DEBUG_REG(PLLDIV1),
+	DEBUG_REG(PLLDIV2),
+	DEBUG_REG(PLLDIV3),
+	DEBUG_REG(OSCDIV),
+	DEBUG_REG(POSTDIV),
+	DEBUG_REG(BPDIV),
+	DEBUG_REG(PLLCMD),
+	DEBUG_REG(PLLSTAT),
+	DEBUG_REG(ALNCTL),
+	DEBUG_REG(DCHANGE),
+	DEBUG_REG(CKEN),
+	DEBUG_REG(CKSTAT),
+	DEBUG_REG(SYSTAT),
+	DEBUG_REG(PLLDIV4),
+	DEBUG_REG(PLLDIV5),
+	DEBUG_REG(PLLDIV6),
+	DEBUG_REG(PLLDIV7),
+	DEBUG_REG(PLLDIV8),
+	DEBUG_REG(PLLDIV9),
+};
+
+static int davinci_pll_debug_init(struct clk_hw *hw, struct dentry *dentry)
+{
+	struct davinci_pll_clk *pll = to_davinci_pll_clk(hw);
+	struct debugfs_regset32 *regset;
+	struct dentry *d;
+
+	regset = kzalloc(sizeof(*regset), GFP_KERNEL);
+	if (!regset)
+		return -ENOMEM;
+
+	regset->regs = davinci_pll_regs;
+	regset->nregs = ARRAY_SIZE(davinci_pll_regs);
+	regset->base = pll->base;
+
+	d = debugfs_create_regset32("registers", 0400, dentry, regset);
+	if (IS_ERR(d)) {
+		kfree(regset);
+		return PTR_ERR(d);
+	}
+
+	return 0;
+}
+#endif
diff --git a/drivers/clk/davinci/pll.h b/drivers/clk/davinci/pll.h
new file mode 100644
index 000000000000..b1b6fb23f972
--- /dev/null
+++ b/drivers/clk/davinci/pll.h
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clock driver for TI Davinci PSC controllers
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#ifndef __CLK_DAVINCI_PLL_H___
+#define __CLK_DAVINCI_PLL_H___
+
+#include <linux/bitops.h>
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/types.h>
+
+#define PLL_HAS_CLKMODE			BIT(0) /* PLL has PLLCTL[CLKMODE] */
+#define PLL_HAS_PREDIV			BIT(1) /* has prediv before PLL */
+#define PLL_PREDIV_ALWAYS_ENABLED	BIT(2) /* don't clear DEN bit */
+#define PLL_PREDIV_FIXED_DIV		BIT(3) /* fixed divider value */
+#define PLL_HAS_POSTDIV			BIT(4) /* has postdiv after PLL */
+#define PLL_POSTDIV_ALWAYS_ENABLED	BIT(5) /* don't clear DEN bit */
+#define PLL_POSTDIV_FIXED_DIV		BIT(6) /* fixed divider value */
+#define PLL_HAS_EXTCLKSRC		BIT(7) /* has selectable bypass */
+#define PLL_PLLM_2X			BIT(8) /* PLLM value is 2x (DM365) */
+#define PLL_PREDIV_FIXED8		BIT(9) /* DM355 quirk */
+
+/** davinci_pll_clk_info - controller-specific PLL info
+ * @name: The name of the PLL
+ * @unlock_reg: Option CFGCHIP register for unlocking PLL
+ * @unlock_mask: Bitmask used with @unlock_reg
+ * @pllm_mask: Bitmask for PLLM[PLLM] value
+ * @pllm_min: Minimum allowable value for PLLM[PLLM]
+ * @pllm_max: Maximum allowable value for PLLM[PLLM]
+ * @pllout_min_rate: Minimum allowable rate for PLLOUT
+ * @pllout_max_rate: Maximum allowable rate for PLLOUT
+ * @flags: Bitmap of PLL_* flags.
+ */
+struct davinci_pll_clk_info {
+	const char *name;
+	u32 unlock_reg;
+	u32 unlock_mask;
+	u32 pllm_mask;
+	u32 pllm_min;
+	u32 pllm_max;
+	unsigned long pllout_min_rate;
+	unsigned long pllout_max_rate;
+	u32 flags;
+};
+
+#define SYSCLK_ARM_RATE		BIT(0) /* Controls ARM rate */
+#define SYSCLK_ALWAYS_ENABLED	BIT(1) /* Or bad things happen */
+#define SYSCLK_FIXED_DIV	BIT(2) /* Fixed divider */
+
+/** davinci_pll_sysclk_info - SYSCLKn-specific info
+ * @name: The name of the clock
+ * @parent_name: The name of the parent clock
+ * @id: "n" in "SYSCLKn"
+ * @ratio_width: Width (in bits) of RATIO in PLLDIVn register
+ * @flags: Bitmap of SYSCLK_* flags.
+ */
+struct davinci_pll_sysclk_info {
+	const char *name;
+	const char *parent_name;
+	u32 id;
+	u32 ratio_width;
+	u32 flags;
+};
+
+#define SYSCLK(i, n, p, w, f)				\
+static const struct davinci_pll_sysclk_info n = {	\
+	.name		= #n,				\
+	.parent_name	= #p,				\
+	.id		= (i),				\
+	.ratio_width	= (w),				\
+	.flags		= (f),				\
+}
+
+/** davinci_pll_obsclk_info - OBSCLK-specific info
+ * @name: The name of the clock
+ * @parent_names: Array of names of the parent clocks
+ * @num_parents: Length of @parent_names
+ * @table: Array of values to write to OCSEL[OCSRC] cooresponding to
+ *         @parent_names
+ * @ocsrc_mask: Bitmask for OCSEL[OCSRC]
+ */
+struct davinci_pll_obsclk_info {
+	const char *name;
+	const char * const *parent_names;
+	u8 num_parents;
+	u32 *table;
+	u32 ocsrc_mask;
+};
+
+struct clk *davinci_pll_clk_register(struct device *dev,
+				     const struct davinci_pll_clk_info *info,
+				     const char *parent_name,
+				     void __iomem *base);
+struct clk *davinci_pll_auxclk_register(struct device *dev,
+					const char *name,
+					void __iomem *base);
+struct clk *davinci_pll_sysclkbp_clk_register(struct device *dev,
+					      const char *name,
+					      void __iomem *base);
+struct clk *
+davinci_pll_obsclk_register(struct device *dev,
+			    const struct davinci_pll_obsclk_info *info,
+			    void __iomem *base);
+struct clk *
+davinci_pll_sysclk_register(struct device *dev,
+			    const struct davinci_pll_sysclk_info *info,
+			    void __iomem *base);
+
+int of_davinci_pll_init(struct device *dev,
+			const struct davinci_pll_clk_info *info,
+			const struct davinci_pll_obsclk_info *obsclk_info,
+			const struct davinci_pll_sysclk_info **div_info,
+			u8 max_sysclk_id,
+			void __iomem *base);
+
+/* Platform-specific callbacks */
+
+int da830_pll_init(struct device *dev, void __iomem *base);
+
+int da850_pll0_init(struct device *dev, void __iomem *base);
+int da850_pll1_init(struct device *dev, void __iomem *base);
+int of_da850_pll0_init(struct device *dev, void __iomem *base);
+int of_da850_pll1_init(struct device *dev, void __iomem *base);
+
+int dm355_pll1_init(struct device *dev, void __iomem *base);
+int dm355_pll2_init(struct device *dev, void __iomem *base);
+
+int dm365_pll1_init(struct device *dev, void __iomem *base);
+int dm365_pll2_init(struct device *dev, void __iomem *base);
+
+int dm644x_pll1_init(struct device *dev, void __iomem *base);
+int dm644x_pll2_init(struct device *dev, void __iomem *base);
+
+int dm646x_pll1_init(struct device *dev, void __iomem *base);
+int dm646x_pll2_init(struct device *dev, void __iomem *base);
+
+#endif /* __CLK_DAVINCI_PLL_H___ */
diff --git a/drivers/clk/davinci/psc-da830.c b/drivers/clk/davinci/psc-da830.c
new file mode 100644
index 000000000000..f61abf5632ff
--- /dev/null
+++ b/drivers/clk/davinci/psc-da830.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PSC clock descriptions for TI DA830/OMAP-L137/AM17XX
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "psc.h"
+
+LPSC_CLKDEV1(spi0_clkdev,	NULL,	"spi_davinci.0");
+LPSC_CLKDEV1(mmcsd_clkdev,	NULL,	"da830-mmc.0");
+LPSC_CLKDEV1(uart0_clkdev,	NULL,	"serial8250.0");
+
+static const struct davinci_lpsc_clk_info da830_psc0_info[] = {
+	LPSC(0,  0, tpcc,     pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(1,  0, tptc0,    pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(2,  0, tptc1,    pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(3,  0, aemif,    pll0_sysclk3, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(4,  0, spi0,     pll0_sysclk2, spi0_clkdev,  0),
+	LPSC(5,  0, mmcsd,    pll0_sysclk2, mmcsd_clkdev, 0),
+	LPSC(6,  0, aintc,    pll0_sysclk4, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(7,  0, arm_rom,  pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(8,  0, secu_mgr, pll0_sysclk4, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(9,  0, uart0,    pll0_sysclk2, uart0_clkdev, 0),
+	LPSC(10, 0, scr0_ss,  pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(11, 0, scr1_ss,  pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(12, 0, scr2_ss,  pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(13, 0, pruss,    pll0_sysclk2, NULL,         LPSC_ALWAYS_ENABLED),
+	LPSC(14, 0, arm,      pll0_sysclk6, NULL,         LPSC_ALWAYS_ENABLED),
+	{ }
+};
+
+static int da830_psc0_init(struct device *dev, void __iomem *base)
+{
+	return davinci_psc_register_clocks(dev, da830_psc0_info, 16, base);
+}
+
+static struct clk_bulk_data da830_psc0_parent_clks[] = {
+	{ .id = "pll0_sysclk2" },
+	{ .id = "pll0_sysclk3" },
+	{ .id = "pll0_sysclk4" },
+	{ .id = "pll0_sysclk6" },
+};
+
+const struct davinci_psc_init_data da830_psc0_init_data = {
+	.parent_clks		= da830_psc0_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(da830_psc0_parent_clks),
+	.psc_init		= &da830_psc0_init,
+};
+
+LPSC_CLKDEV2(usb0_clkdev,	NULL,	"musb-da8xx",
+				NULL,	"cppi41-dmaengine");
+LPSC_CLKDEV1(usb1_clkdev,	NULL,	"ohci-da8xx");
+/* REVISIT: gpio-davinci.c should be modified to drop con_id */
+LPSC_CLKDEV1(gpio_clkdev,	"gpio",	NULL);
+LPSC_CLKDEV2(emac_clkdev,	NULL,	"davinci_emac.1",
+				"fck",	"davinci_mdio.0");
+LPSC_CLKDEV1(mcasp0_clkdev,	NULL,	"davinci-mcasp.0");
+LPSC_CLKDEV1(mcasp1_clkdev,	NULL,	"davinci-mcasp.1");
+LPSC_CLKDEV1(mcasp2_clkdev,	NULL,	"davinci-mcasp.2");
+LPSC_CLKDEV1(spi1_clkdev,	NULL,	"spi_davinci.1");
+LPSC_CLKDEV1(i2c1_clkdev,	NULL,	"i2c_davinci.2");
+LPSC_CLKDEV1(uart1_clkdev,	NULL,	"serial8250.1");
+LPSC_CLKDEV1(uart2_clkdev,	NULL,	"serial8250.2");
+LPSC_CLKDEV1(lcdc_clkdev,	"fck",	"da8xx_lcdc.0");
+LPSC_CLKDEV2(pwm_clkdev,	"fck",	"ehrpwm.0",
+				"fck",	"ehrpwm.1");
+LPSC_CLKDEV3(ecap_clkdev,	"fck",	"ecap.0",
+				"fck",	"ecap.1",
+				"fck",	"ecap.2");
+LPSC_CLKDEV2(eqep_clkdev,	NULL,	"eqep.0",
+				NULL,	"eqep.1");
+
+static const struct davinci_lpsc_clk_info da830_psc1_info[] = {
+	LPSC(1,  0, usb0,   pll0_sysclk2, usb0_clkdev,   0),
+	LPSC(2,  0, usb1,   pll0_sysclk4, usb1_clkdev,   0),
+	LPSC(3,  0, gpio,   pll0_sysclk4, gpio_clkdev,   0),
+	LPSC(5,  0, emac,   pll0_sysclk4, emac_clkdev,   0),
+	LPSC(6,  0, emif3,  pll0_sysclk5, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(7,  0, mcasp0, pll0_sysclk2, mcasp0_clkdev, 0),
+	LPSC(8,  0, mcasp1, pll0_sysclk2, mcasp1_clkdev, 0),
+	LPSC(9,  0, mcasp2, pll0_sysclk2, mcasp2_clkdev, 0),
+	LPSC(10, 0, spi1,   pll0_sysclk2, spi1_clkdev,   0),
+	LPSC(11, 0, i2c1,   pll0_sysclk4, i2c1_clkdev,   0),
+	LPSC(12, 0, uart1,  pll0_sysclk2, uart1_clkdev,  0),
+	LPSC(13, 0, uart2,  pll0_sysclk2, uart2_clkdev,  0),
+	LPSC(16, 0, lcdc,   pll0_sysclk2, lcdc_clkdev,   0),
+	LPSC(17, 0, pwm,    pll0_sysclk2, pwm_clkdev,    0),
+	LPSC(20, 0, ecap,   pll0_sysclk2, ecap_clkdev,   0),
+	LPSC(21, 0, eqep,   pll0_sysclk2, eqep_clkdev,   0),
+	{ }
+};
+
+static int da830_psc1_init(struct device *dev, void __iomem *base)
+{
+	return davinci_psc_register_clocks(dev, da830_psc1_info, 32, base);
+}
+
+static struct clk_bulk_data da830_psc1_parent_clks[] = {
+	{ .id = "pll0_sysclk2" },
+	{ .id = "pll0_sysclk4" },
+	{ .id = "pll0_sysclk5" },
+};
+
+const struct davinci_psc_init_data da830_psc1_init_data = {
+	.parent_clks		= da830_psc1_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(da830_psc1_parent_clks),
+	.psc_init		= &da830_psc1_init,
+};
diff --git a/drivers/clk/davinci/psc-da850.c b/drivers/clk/davinci/psc-da850.c
new file mode 100644
index 000000000000..d196dcbed560
--- /dev/null
+++ b/drivers/clk/davinci/psc-da850.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PSC clock descriptions for TI DA850/OMAP-L138/AM18XX
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/reset-controller.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/types.h>
+
+#include "psc.h"
+
+LPSC_CLKDEV2(emifa_clkdev,	NULL,		"ti-aemif",
+				"aemif",	"davinci_nand.0");
+LPSC_CLKDEV1(spi0_clkdev,	NULL,		"spi_davinci.0");
+LPSC_CLKDEV1(mmcsd0_clkdev,	NULL,		"da830-mmc.0");
+LPSC_CLKDEV1(uart0_clkdev,	NULL,		"serial8250.0");
+/* REVISIT: used dev_id instead of con_id */
+LPSC_CLKDEV1(arm_clkdev,	"arm",		NULL);
+LPSC_CLKDEV1(dsp_clkdev,	NULL,		"davinci-rproc.0");
+
+static const struct davinci_lpsc_clk_info da850_psc0_info[] = {
+	LPSC(0,  0, tpcc0,   pll0_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(1,  0, tptc0,   pll0_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(2,  0, tptc1,   pll0_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(3,  0, emifa,   async1,       emifa_clkdev,  0),
+	LPSC(4,  0, spi0,    pll0_sysclk2, spi0_clkdev,   0),
+	LPSC(5,  0, mmcsd0,  pll0_sysclk2, mmcsd0_clkdev, 0),
+	LPSC(6,  0, aintc,   pll0_sysclk4, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(7,  0, arm_rom, pll0_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(9,  0, uart0,   pll0_sysclk2, uart0_clkdev,  0),
+	LPSC(13, 0, pruss,   pll0_sysclk2, NULL,          0),
+	LPSC(14, 0, arm,     pll0_sysclk6, arm_clkdev,    LPSC_ALWAYS_ENABLED | LPSC_SET_RATE_PARENT),
+	LPSC(15, 1, dsp,     pll0_sysclk1, dsp_clkdev,    LPSC_FORCE | LPSC_LOCAL_RESET),
+	{ }
+};
+
+LPSC_CLKDEV3(usb0_clkdev,	"fck",	"da830-usb-phy-clks",
+				NULL,	"musb-da8xx",
+				NULL,	"cppi41-dmaengine");
+LPSC_CLKDEV1(usb1_clkdev,	NULL,	"ohci-da8xx");
+/* REVISIT: gpio-davinci.c should be modified to drop con_id */
+LPSC_CLKDEV1(gpio_clkdev,	"gpio",	NULL);
+LPSC_CLKDEV2(emac_clkdev,	NULL,	"davinci_emac.1",
+				"fck",	"davinci_mdio.0");
+LPSC_CLKDEV1(mcasp0_clkdev,	NULL,	"davinci-mcasp.0");
+LPSC_CLKDEV1(sata_clkdev,	"fck",	"ahci_da850");
+LPSC_CLKDEV1(vpif_clkdev,	NULL,	"vpif");
+LPSC_CLKDEV1(spi1_clkdev,	NULL,	"spi_davinci.1");
+LPSC_CLKDEV1(i2c1_clkdev,	NULL,	"i2c_davinci.2");
+LPSC_CLKDEV1(uart1_clkdev,	NULL,	"serial8250.1");
+LPSC_CLKDEV1(uart2_clkdev,	NULL,	"serial8250.2");
+LPSC_CLKDEV1(mcbsp0_clkdev,	NULL,	"davinci-mcbsp.0");
+LPSC_CLKDEV1(mcbsp1_clkdev,	NULL,	"davinci-mcbsp.1");
+LPSC_CLKDEV1(lcdc_clkdev,	"fck",	"da8xx_lcdc.0");
+LPSC_CLKDEV3(ehrpwm_clkdev,	"fck",	"ehrpwm.0",
+				"fck",	"ehrpwm.1",
+				NULL,	"da830-tbclksync");
+LPSC_CLKDEV1(mmcsd1_clkdev,	NULL,	"da830-mmc.1");
+LPSC_CLKDEV3(ecap_clkdev,	"fck",	"ecap.0",
+				"fck",	"ecap.1",
+				"fck",	"ecap.2");
+
+static struct reset_control_lookup da850_psc0_reset_lookup_table[] = {
+	RESET_LOOKUP("da850-psc0", 15, "davinci-rproc.0", NULL),
+};
+
+static int da850_psc0_init(struct device *dev, void __iomem *base)
+{
+	reset_controller_add_lookup(da850_psc0_reset_lookup_table,
+				    ARRAY_SIZE(da850_psc0_reset_lookup_table));
+	return davinci_psc_register_clocks(dev, da850_psc0_info, 16, base);
+}
+
+static int of_da850_psc0_init(struct device *dev, void __iomem *base)
+{
+	return of_davinci_psc_clk_init(dev, da850_psc0_info, 16, base);
+}
+
+static struct clk_bulk_data da850_psc0_parent_clks[] = {
+	{ .id = "pll0_sysclk1" },
+	{ .id = "pll0_sysclk2" },
+	{ .id = "pll0_sysclk4" },
+	{ .id = "pll0_sysclk6" },
+	{ .id = "async1"       },
+};
+
+const struct davinci_psc_init_data da850_psc0_init_data = {
+	.parent_clks		= da850_psc0_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(da850_psc0_parent_clks),
+	.psc_init		= &da850_psc0_init,
+};
+
+const struct davinci_psc_init_data of_da850_psc0_init_data = {
+	.parent_clks		= da850_psc0_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(da850_psc0_parent_clks),
+	.psc_init		= &of_da850_psc0_init,
+};
+
+static const struct davinci_lpsc_clk_info da850_psc1_info[] = {
+	LPSC(0,  0, tpcc1,  pll0_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(1,  0, usb0,   pll0_sysclk2, usb0_clkdev,   0),
+	LPSC(2,  0, usb1,   pll0_sysclk4, usb1_clkdev,   0),
+	LPSC(3,  0, gpio,   pll0_sysclk4, gpio_clkdev,   0),
+	LPSC(5,  0, emac,   pll0_sysclk4, emac_clkdev,   0),
+	LPSC(6,  0, ddr,    pll0_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(7,  0, mcasp0, async3,       mcasp0_clkdev, 0),
+	LPSC(8,  0, sata,   pll0_sysclk2, sata_clkdev,   LPSC_FORCE),
+	LPSC(9,  0, vpif,   pll0_sysclk2, vpif_clkdev,   0),
+	LPSC(10, 0, spi1,   async3,       spi1_clkdev,   0),
+	LPSC(11, 0, i2c1,   pll0_sysclk4, i2c1_clkdev,   0),
+	LPSC(12, 0, uart1,  async3,       uart1_clkdev,  0),
+	LPSC(13, 0, uart2,  async3,       uart2_clkdev,  0),
+	LPSC(14, 0, mcbsp0, async3,       mcbsp0_clkdev, 0),
+	LPSC(15, 0, mcbsp1, async3,       mcbsp1_clkdev, 0),
+	LPSC(16, 0, lcdc,   pll0_sysclk2, lcdc_clkdev,   0),
+	LPSC(17, 0, ehrpwm, async3,       ehrpwm_clkdev, 0),
+	LPSC(18, 0, mmcsd1, pll0_sysclk2, mmcsd1_clkdev, 0),
+	LPSC(20, 0, ecap,   async3,       ecap_clkdev,   0),
+	LPSC(21, 0, tptc2,  pll0_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	{ }
+};
+
+static int da850_psc1_init(struct device *dev, void __iomem *base)
+{
+	return davinci_psc_register_clocks(dev, da850_psc1_info, 32, base);
+}
+
+static int of_da850_psc1_init(struct device *dev, void __iomem *base)
+{
+	return of_davinci_psc_clk_init(dev, da850_psc1_info, 32, base);
+}
+
+static struct clk_bulk_data da850_psc1_parent_clks[] = {
+	{ .id = "pll0_sysclk2" },
+	{ .id = "pll0_sysclk4" },
+	{ .id = "async3"       },
+};
+
+const struct davinci_psc_init_data da850_psc1_init_data = {
+	.parent_clks		= da850_psc1_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(da850_psc1_parent_clks),
+	.psc_init		= &da850_psc1_init,
+};
+
+const struct davinci_psc_init_data of_da850_psc1_init_data = {
+	.parent_clks		= da850_psc1_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(da850_psc1_parent_clks),
+	.psc_init		= &of_da850_psc1_init,
+};
diff --git a/drivers/clk/davinci/psc-dm355.c b/drivers/clk/davinci/psc-dm355.c
new file mode 100644
index 000000000000..6995ecea2677
--- /dev/null
+++ b/drivers/clk/davinci/psc-dm355.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PSC clock descriptions for TI DaVinci DM355
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "psc.h"
+
+LPSC_CLKDEV1(vpss_master_clkdev,	"master",	"vpss");
+LPSC_CLKDEV1(vpss_slave_clkdev,		"slave",	"vpss");
+LPSC_CLKDEV1(spi1_clkdev,		NULL,		"spi_davinci.1");
+LPSC_CLKDEV1(mmcsd1_clkdev,		NULL,		"dm6441-mmc.1");
+LPSC_CLKDEV1(mcbsp1_clkdev,		NULL,		"davinci-mcbsp.1");
+LPSC_CLKDEV1(usb_clkdev,		"usb",		NULL);
+LPSC_CLKDEV1(spi2_clkdev,		NULL,		"spi_davinci.2");
+LPSC_CLKDEV1(aemif_clkdev,		"aemif",	NULL);
+LPSC_CLKDEV1(mmcsd0_clkdev,		NULL,		"dm6441-mmc.0");
+LPSC_CLKDEV1(mcbsp0_clkdev,		NULL,		"davinci-mcbsp.0");
+LPSC_CLKDEV1(i2c_clkdev,		NULL,		"i2c_davinci.1");
+LPSC_CLKDEV1(uart0_clkdev,		NULL,		"serial8250.0");
+LPSC_CLKDEV1(uart1_clkdev,		NULL,		"serial8250.1");
+LPSC_CLKDEV1(uart2_clkdev,		NULL,		"serial8250.2");
+LPSC_CLKDEV1(spi0_clkdev,		NULL,		"spi_davinci.0");
+/* REVISIT: gpio-davinci.c should be modified to drop con_id */
+LPSC_CLKDEV1(gpio_clkdev,		"gpio",		NULL);
+LPSC_CLKDEV1(timer0_clkdev,		"timer0",	NULL);
+LPSC_CLKDEV1(timer2_clkdev,		NULL,		"davinci-wdt");
+LPSC_CLKDEV1(vpss_dac_clkdev,		"vpss_dac",	NULL);
+
+static const struct davinci_lpsc_clk_info dm355_psc_info[] = {
+	LPSC(0,  0, vpss_master, pll1_sysclk4, vpss_master_clkdev, 0),
+	LPSC(1,  0, vpss_slave,  pll1_sysclk4, vpss_slave_clkdev,  0),
+	LPSC(5,  0, timer3,      pll1_auxclk,  NULL,               0),
+	LPSC(6,  0, spi1,        pll1_sysclk2, spi1_clkdev,        0),
+	LPSC(7,  0, mmcsd1,      pll1_sysclk2, mmcsd1_clkdev,      0),
+	LPSC(8,  0, asp1,        pll1_sysclk2, NULL,               0),
+	LPSC(9,  0, usb,         pll1_sysclk2, usb_clkdev,         0),
+	LPSC(10, 0, pwm3,        pll1_auxclk,  NULL,               0),
+	LPSC(11, 0, spi2,        pll1_sysclk2, spi2_clkdev,        0),
+	LPSC(12, 0, rto,         pll1_auxclk,  NULL,               0),
+	LPSC(14, 0, aemif,       pll1_sysclk2, aemif_clkdev,       0),
+	LPSC(15, 0, mmcsd0,      pll1_sysclk2, mmcsd0_clkdev,      0),
+	LPSC(17, 0, asp0,        pll1_sysclk2, NULL,               0),
+	LPSC(18, 0, i2c,         pll1_auxclk,  i2c_clkdev,         0),
+	LPSC(19, 0, uart0,       pll1_auxclk,  uart0_clkdev,       0),
+	LPSC(20, 0, uart1,       pll1_auxclk,  uart1_clkdev,       0),
+	LPSC(21, 0, uart2,       pll1_sysclk2, uart2_clkdev,       0),
+	LPSC(22, 0, spi0,        pll1_sysclk2, spi0_clkdev,        0),
+	LPSC(23, 0, pwm0,        pll1_auxclk,  NULL,               0),
+	LPSC(24, 0, pwm1,        pll1_auxclk,  NULL,               0),
+	LPSC(25, 0, pwm2,        pll1_auxclk,  NULL,               0),
+	LPSC(26, 0, gpio,        pll1_sysclk2, gpio_clkdev,        0),
+	LPSC(27, 0, timer0,      pll1_auxclk,  timer0_clkdev,      LPSC_ALWAYS_ENABLED),
+	LPSC(28, 0, timer1,      pll1_auxclk,  NULL,               0),
+	/* REVISIT: why can't this be disabled? */
+	LPSC(29, 0, timer2,      pll1_auxclk,  timer2_clkdev,      LPSC_ALWAYS_ENABLED),
+	LPSC(31, 0, arm,         pll1_sysclk1, NULL,               LPSC_ALWAYS_ENABLED),
+	LPSC(40, 0, mjcp,        pll1_sysclk1, NULL,               0),
+	LPSC(41, 0, vpss_dac,    pll1_sysclk3, vpss_dac_clkdev,    0),
+	{ }
+};
+
+static int dm355_psc_init(struct device *dev, void __iomem *base)
+{
+	return davinci_psc_register_clocks(dev, dm355_psc_info, 42, base);
+}
+
+static struct clk_bulk_data dm355_psc_parent_clks[] = {
+	{ .id = "pll1_sysclk1" },
+	{ .id = "pll1_sysclk2" },
+	{ .id = "pll1_sysclk3" },
+	{ .id = "pll1_sysclk4" },
+	{ .id = "pll1_auxclk"  },
+};
+
+const struct davinci_psc_init_data dm355_psc_init_data = {
+	.parent_clks		= dm355_psc_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(dm355_psc_parent_clks),
+	.psc_init		= &dm355_psc_init,
+};
diff --git a/drivers/clk/davinci/psc-dm365.c b/drivers/clk/davinci/psc-dm365.c
new file mode 100644
index 000000000000..3ad915f37376
--- /dev/null
+++ b/drivers/clk/davinci/psc-dm365.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PSC clock descriptions for TI DaVinci DM365
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "psc.h"
+
+LPSC_CLKDEV1(vpss_slave_clkdev,		"slave",	"vpss");
+LPSC_CLKDEV1(spi1_clkdev,		NULL,		"spi_davinci.1");
+LPSC_CLKDEV1(mmcsd1_clkdev,		NULL,		"da830-mmc.1");
+LPSC_CLKDEV1(asp0_clkdev,		NULL,		"davinci-mcbsp");
+LPSC_CLKDEV1(usb_clkdev,		"usb",		NULL);
+LPSC_CLKDEV1(spi2_clkdev,		NULL,		"spi_davinci.2");
+LPSC_CLKDEV1(aemif_clkdev,		"aemif",	NULL);
+LPSC_CLKDEV1(mmcsd0_clkdev,		NULL,		"da830-mmc.0");
+LPSC_CLKDEV1(i2c_clkdev,		NULL,		"i2c_davinci.1");
+LPSC_CLKDEV1(uart0_clkdev,		NULL,		"serial8250.0");
+LPSC_CLKDEV1(uart1_clkdev,		NULL,		"serial8250.1");
+LPSC_CLKDEV1(spi0_clkdev,		NULL,		"spi_davinci.0");
+/* REVISIT: gpio-davinci.c should be modified to drop con_id */
+LPSC_CLKDEV1(gpio_clkdev,		"gpio",		NULL);
+LPSC_CLKDEV1(timer0_clkdev,		"timer0",	NULL);
+LPSC_CLKDEV1(timer2_clkdev,		NULL,		"davinci-wdt");
+LPSC_CLKDEV1(spi3_clkdev,		NULL,		"spi_davinci.3");
+LPSC_CLKDEV1(spi4_clkdev,		NULL,		"spi_davinci.4");
+LPSC_CLKDEV2(emac_clkdev,		NULL,		"davinci_emac.1",
+					"fck",		"davinci_mdio.0");
+LPSC_CLKDEV1(voice_codec_clkdev,	NULL,		"davinci_voicecodec");
+LPSC_CLKDEV1(vpss_dac_clkdev,		"vpss_dac",	NULL);
+LPSC_CLKDEV1(vpss_master_clkdev,	"master",	"vpss");
+
+static const struct davinci_lpsc_clk_info dm365_psc_info[] = {
+	LPSC(1,  0, vpss_slave,  pll1_sysclk5, vpss_slave_clkdev,  0),
+	LPSC(5,  0, timer3,      pll1_auxclk,  NULL,               0),
+	LPSC(6,  0, spi1,        pll1_sysclk4, spi1_clkdev,        0),
+	LPSC(7,  0, mmcsd1,      pll1_sysclk4, mmcsd1_clkdev,      0),
+	LPSC(8,  0, asp0,        pll1_sysclk4, asp0_clkdev,        0),
+	LPSC(9,  0, usb,         pll1_auxclk,  usb_clkdev,         0),
+	LPSC(10, 0, pwm3,        pll1_auxclk,  NULL,               0),
+	LPSC(11, 0, spi2,        pll1_sysclk4, spi2_clkdev,        0),
+	LPSC(12, 0, rto,         pll1_sysclk4, NULL,               0),
+	LPSC(14, 0, aemif,       pll1_sysclk4, aemif_clkdev,       0),
+	LPSC(15, 0, mmcsd0,      pll1_sysclk8, mmcsd0_clkdev,      0),
+	LPSC(18, 0, i2c,         pll1_auxclk,  i2c_clkdev,         0),
+	LPSC(19, 0, uart0,       pll1_auxclk,  uart0_clkdev,       0),
+	LPSC(20, 0, uart1,       pll1_sysclk4, uart1_clkdev,       0),
+	LPSC(22, 0, spi0,        pll1_sysclk4, spi0_clkdev,        0),
+	LPSC(23, 0, pwm0,        pll1_auxclk,  NULL,               0),
+	LPSC(24, 0, pwm1,        pll1_auxclk,  NULL,               0),
+	LPSC(25, 0, pwm2,        pll1_auxclk,  NULL,               0),
+	LPSC(26, 0, gpio,        pll1_sysclk4, gpio_clkdev,        0),
+	LPSC(27, 0, timer0,      pll1_auxclk,  timer0_clkdev,      LPSC_ALWAYS_ENABLED),
+	LPSC(28, 0, timer1,      pll1_auxclk,  NULL,               0),
+	/* REVISIT: why can't this be disabled? */
+	LPSC(29, 0, timer2,      pll1_auxclk,  timer2_clkdev,      LPSC_ALWAYS_ENABLED),
+	LPSC(31, 0, arm,         pll2_sysclk2, NULL,               LPSC_ALWAYS_ENABLED),
+	LPSC(38, 0, spi3,        pll1_sysclk4, spi3_clkdev,        0),
+	LPSC(39, 0, spi4,        pll1_auxclk,  spi4_clkdev,        0),
+	LPSC(40, 0, emac,        pll2_sysclk4, emac_clkdev,        0),
+	LPSC(44, 1, voice_codec, pll1_sysclk3, voice_codec_clkdev, 0),
+	LPSC(46, 1, vpss_dac,    pll1_sysclk3, vpss_dac_clkdev,    0),
+	LPSC(47, 0, vpss_master, pll1_sysclk5, vpss_master_clkdev, 0),
+	LPSC(50, 0, mjcp,        pll1_sysclk3, NULL,               0),
+	{ }
+};
+
+static int dm365_psc_init(struct device *dev, void __iomem *base)
+{
+	return davinci_psc_register_clocks(dev, dm365_psc_info, 52, base);
+}
+
+static struct clk_bulk_data dm365_psc_parent_clks[] = {
+	{ .id = "pll1_sysclk1" },
+	{ .id = "pll1_sysclk3" },
+	{ .id = "pll1_sysclk4" },
+	{ .id = "pll1_sysclk5" },
+	{ .id = "pll1_sysclk8" },
+	{ .id = "pll2_sysclk2" },
+	{ .id = "pll2_sysclk4" },
+	{ .id = "pll1_auxclk"  },
+};
+
+const struct davinci_psc_init_data dm365_psc_init_data = {
+	.parent_clks		= dm365_psc_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(dm365_psc_parent_clks),
+	.psc_init		= &dm365_psc_init,
+};
diff --git a/drivers/clk/davinci/psc-dm644x.c b/drivers/clk/davinci/psc-dm644x.c
new file mode 100644
index 000000000000..c22367baa46f
--- /dev/null
+++ b/drivers/clk/davinci/psc-dm644x.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PSC clock descriptions for TI DaVinci DM644x
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "psc.h"
+
+LPSC_CLKDEV1(vpss_master_clkdev,	"master",	"vpss");
+LPSC_CLKDEV1(vpss_slave_clkdev,		"slave",	"vpss");
+LPSC_CLKDEV2(emac_clkdev,		NULL,		"davinci_emac.1",
+					"fck",		"davinci_mdio.0");
+LPSC_CLKDEV1(usb_clkdev,		"usb",		NULL);
+LPSC_CLKDEV1(ide_clkdev,		NULL,		"palm_bk3710");
+LPSC_CLKDEV1(aemif_clkdev,		"aemif",	NULL);
+LPSC_CLKDEV1(mmcsd_clkdev,		NULL,		"dm6441-mmc.0");
+LPSC_CLKDEV1(asp0_clkdev,		NULL,		"davinci-mcbsp");
+LPSC_CLKDEV1(i2c_clkdev,		NULL,		"i2c_davinci.1");
+LPSC_CLKDEV1(uart0_clkdev,		NULL,		"serial8250.0");
+LPSC_CLKDEV1(uart1_clkdev,		NULL,		"serial8250.1");
+LPSC_CLKDEV1(uart2_clkdev,		NULL,		"serial8250.2");
+/* REVISIT: gpio-davinci.c should be modified to drop con_id */
+LPSC_CLKDEV1(gpio_clkdev,		"gpio",		NULL);
+LPSC_CLKDEV1(timer0_clkdev,		"timer0",	NULL);
+LPSC_CLKDEV1(timer2_clkdev,		NULL,		"davinci-wdt");
+
+static const struct davinci_lpsc_clk_info dm644x_psc_info[] = {
+	LPSC(0,  0, vpss_master, pll1_sysclk3, vpss_master_clkdev, 0),
+	LPSC(1,  0, vpss_slave,  pll1_sysclk3, vpss_slave_clkdev,  0),
+	LPSC(6,  0, emac,        pll1_sysclk5, emac_clkdev,        0),
+	LPSC(9,  0, usb,         pll1_sysclk5, usb_clkdev,         0),
+	LPSC(10, 0, ide,         pll1_sysclk5, ide_clkdev,         0),
+	LPSC(11, 0, vlynq,       pll1_sysclk5, NULL,               0),
+	LPSC(14, 0, aemif,       pll1_sysclk5, aemif_clkdev,       0),
+	LPSC(15, 0, mmcsd,       pll1_sysclk5, mmcsd_clkdev,       0),
+	LPSC(17, 0, asp0,        pll1_sysclk5, asp0_clkdev,        0),
+	LPSC(18, 0, i2c,         pll1_auxclk,  i2c_clkdev,         0),
+	LPSC(19, 0, uart0,       pll1_auxclk,  uart0_clkdev,       0),
+	LPSC(20, 0, uart1,       pll1_auxclk,  uart1_clkdev,       0),
+	LPSC(21, 0, uart2,       pll1_auxclk,  uart2_clkdev,       0),
+	LPSC(22, 0, spi,         pll1_sysclk5, NULL,               0),
+	LPSC(23, 0, pwm0,        pll1_auxclk,  NULL,               0),
+	LPSC(24, 0, pwm1,        pll1_auxclk,  NULL,               0),
+	LPSC(25, 0, pwm2,        pll1_auxclk,  NULL,               0),
+	LPSC(26, 0, gpio,        pll1_sysclk5, gpio_clkdev,        0),
+	LPSC(27, 0, timer0,      pll1_auxclk,  timer0_clkdev,      LPSC_ALWAYS_ENABLED),
+	LPSC(28, 0, timer1,      pll1_auxclk,  NULL,               0),
+	/* REVISIT: why can't this be disabled? */
+	LPSC(29, 0, timer2,      pll1_auxclk,  timer2_clkdev,      LPSC_ALWAYS_ENABLED),
+	LPSC(31, 0, arm,         pll1_sysclk2, NULL,               LPSC_ALWAYS_ENABLED),
+	/* REVISIT how to disable? */
+	LPSC(39, 1, dsp,         pll1_sysclk1, NULL,               LPSC_ALWAYS_ENABLED),
+	/* REVISIT how to disable? */
+	LPSC(40, 1, vicp,        pll1_sysclk2, NULL,               LPSC_ALWAYS_ENABLED),
+	{ }
+};
+
+static int dm644x_psc_init(struct device *dev, void __iomem *base)
+{
+	return davinci_psc_register_clocks(dev, dm644x_psc_info, 41, base);
+}
+
+static struct clk_bulk_data dm644x_psc_parent_clks[] = {
+	{ .id = "pll1_sysclk1" },
+	{ .id = "pll1_sysclk2" },
+	{ .id = "pll1_sysclk3" },
+	{ .id = "pll1_sysclk5" },
+	{ .id = "pll1_auxclk"  },
+};
+
+const struct davinci_psc_init_data dm644x_psc_init_data = {
+	.parent_clks		= dm644x_psc_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(dm644x_psc_parent_clks),
+	.psc_init		= &dm644x_psc_init,
+};
diff --git a/drivers/clk/davinci/psc-dm646x.c b/drivers/clk/davinci/psc-dm646x.c
new file mode 100644
index 000000000000..468ef86ea40b
--- /dev/null
+++ b/drivers/clk/davinci/psc-dm646x.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PSC clock descriptions for TI DaVinci DM646x
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "psc.h"
+
+LPSC_CLKDEV1(ide_clkdev,	NULL,		"palm_bk3710");
+LPSC_CLKDEV2(emac_clkdev,	NULL,		"davinci_emac.1",
+				"fck",		"davinci_mdio.0");
+LPSC_CLKDEV1(aemif_clkdev,	"aemif",	NULL);
+LPSC_CLKDEV1(mcasp0_clkdev,	NULL,		"davinci-mcasp.0");
+LPSC_CLKDEV1(mcasp1_clkdev,	NULL,		"davinci-mcasp.1");
+LPSC_CLKDEV1(uart0_clkdev,	NULL,		"serial8250.0");
+LPSC_CLKDEV1(uart1_clkdev,	NULL,		"serial8250.1");
+LPSC_CLKDEV1(uart2_clkdev,	NULL,		"serial8250.2");
+LPSC_CLKDEV1(i2c_clkdev,	NULL,		"i2c_davinci.1");
+/* REVISIT: gpio-davinci.c should be modified to drop con_id */
+LPSC_CLKDEV1(gpio_clkdev,	"gpio",		NULL);
+LPSC_CLKDEV1(timer0_clkdev,	"timer0",	 NULL);
+
+static const struct davinci_lpsc_clk_info dm646x_psc_info[] = {
+	LPSC(0,  0, arm,      pll1_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	/* REVISIT how to disable? */
+	LPSC(1,  0, dsp,      pll1_sysclk1, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(4,  0, edma_cc,  pll1_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(5,  0, edma_tc0, pll1_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(6,  0, edma_tc1, pll1_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(7,  0, edma_tc2, pll1_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(8,  0, edma_tc3, pll1_sysclk2, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(10, 0, ide,      pll1_sysclk4, ide_clkdev,    0),
+	LPSC(14, 0, emac,     pll1_sysclk3, emac_clkdev,   0),
+	LPSC(16, 0, vpif0,    ref_clk,      NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(17, 0, vpif1,    ref_clk,      NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(21, 0, aemif,    pll1_sysclk3, aemif_clkdev,  LPSC_ALWAYS_ENABLED),
+	LPSC(22, 0, mcasp0,   pll1_sysclk3, mcasp0_clkdev, 0),
+	LPSC(23, 0, mcasp1,   pll1_sysclk3, mcasp1_clkdev, 0),
+	LPSC(26, 0, uart0,    aux_clkin,    uart0_clkdev,  0),
+	LPSC(27, 0, uart1,    aux_clkin,    uart1_clkdev,  0),
+	LPSC(28, 0, uart2,    aux_clkin,    uart2_clkdev,  0),
+	/* REVIST: disabling hangs system */
+	LPSC(29, 0, pwm0,     pll1_sysclk3, NULL,          LPSC_ALWAYS_ENABLED),
+	/* REVIST: disabling hangs system */
+	LPSC(30, 0, pwm1,     pll1_sysclk3, NULL,          LPSC_ALWAYS_ENABLED),
+	LPSC(31, 0, i2c,      pll1_sysclk3, i2c_clkdev,    0),
+	LPSC(33, 0, gpio,     pll1_sysclk3, gpio_clkdev,   0),
+	LPSC(34, 0, timer0,   pll1_sysclk3, timer0_clkdev, LPSC_ALWAYS_ENABLED),
+	LPSC(35, 0, timer1,   pll1_sysclk3, NULL,          0),
+	{ }
+};
+
+static int dm646x_psc_init(struct device *dev, void __iomem *base)
+{
+	return davinci_psc_register_clocks(dev, dm646x_psc_info, 46, base);
+}
+
+static struct clk_bulk_data dm646x_psc_parent_clks[] = {
+	{ .id = "ref_clk"      },
+	{ .id = "aux_clkin"    },
+	{ .id = "pll1_sysclk1" },
+	{ .id = "pll1_sysclk2" },
+	{ .id = "pll1_sysclk3" },
+	{ .id = "pll1_sysclk4" },
+	{ .id = "pll1_sysclk5" },
+};
+
+const struct davinci_psc_init_data dm646x_psc_init_data = {
+	.parent_clks		= dm646x_psc_parent_clks,
+	.num_parent_clks	= ARRAY_SIZE(dm646x_psc_parent_clks),
+	.psc_init		= &dm646x_psc_init,
+};
diff --git a/drivers/clk/davinci/psc.c b/drivers/clk/davinci/psc.c
new file mode 100644
index 000000000000..ce170e600f09
--- /dev/null
+++ b/drivers/clk/davinci/psc.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clock driver for TI Davinci PSC controllers
+ *
+ * Copyright (C) 2017 David Lechner <david@lechnology.com>
+ *
+ * Based on: drivers/clk/keystone/gate.c
+ * Copyright (C) 2013 Texas Instruments.
+ *	Murali Karicheri <m-karicheri2@ti.com>
+ *	Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * And: arch/arm/mach-davinci/psc.c
+ * Copyright (C) 2006 Texas Instruments.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/err.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_domain.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "psc.h"
+
+/* PSC register offsets */
+#define EPCPR			0x070
+#define PTCMD			0x120
+#define PTSTAT			0x128
+#define PDSTAT(n)		(0x200 + 4 * (n))
+#define PDCTL(n)		(0x300 + 4 * (n))
+#define MDSTAT(n)		(0x800 + 4 * (n))
+#define MDCTL(n)		(0xa00 + 4 * (n))
+
+/* PSC module states */
+enum davinci_lpsc_state {
+	LPSC_STATE_SWRSTDISABLE	= 0,
+	LPSC_STATE_SYNCRST	= 1,
+	LPSC_STATE_DISABLE	= 2,
+	LPSC_STATE_ENABLE	= 3,
+};
+
+#define MDSTAT_STATE_MASK	GENMASK(5, 0)
+#define MDSTAT_MCKOUT		BIT(12)
+#define PDSTAT_STATE_MASK	GENMASK(4, 0)
+#define MDCTL_FORCE		BIT(31)
+#define MDCTL_LRESET		BIT(8)
+#define PDCTL_EPCGOOD		BIT(8)
+#define PDCTL_NEXT		BIT(0)
+
+struct davinci_psc_data {
+	struct clk_onecell_data clk_data;
+	struct genpd_onecell_data pm_data;
+	struct reset_controller_dev rcdev;
+};
+
+/**
+ * struct davinci_lpsc_clk - LPSC clock structure
+ * @dev: the device that provides this LPSC
+ * @hw: clk_hw for the LPSC
+ * @pm_domain: power domain for the LPSC
+ * @genpd_clk: clock reference owned by @pm_domain
+ * @regmap: PSC MMIO region
+ * @md: Module domain (LPSC module id)
+ * @pd: Power domain
+ * @flags: LPSC_* quirk flags
+ */
+struct davinci_lpsc_clk {
+	struct device *dev;
+	struct clk_hw hw;
+	struct generic_pm_domain pm_domain;
+	struct clk *genpd_clk;
+	struct regmap *regmap;
+	u32 md;
+	u32 pd;
+	u32 flags;
+};
+
+#define to_davinci_psc_data(x) container_of(x, struct davinci_psc_data, x)
+#define to_davinci_lpsc_clk(x) container_of(x, struct davinci_lpsc_clk, x)
+
+/**
+ * best_dev_name - get the "best" device name.
+ * @dev: the device
+ *
+ * Returns the device tree compatible name if the device has a DT node,
+ * otherwise return the device name. This is mainly needed because clkdev
+ * lookups are limited to 20 chars for dev_id and when using device tree,
+ * dev_name(dev) is much longer than that.
+ */
+static inline const char *best_dev_name(struct device *dev)
+{
+	const char *compatible;
+
+	if (!of_property_read_string(dev->of_node, "compatible", &compatible))
+		return compatible;
+
+	return dev_name(dev);
+}
+
+static void davinci_lpsc_config(struct davinci_lpsc_clk *lpsc,
+				enum davinci_lpsc_state next_state)
+{
+	u32 epcpr, pdstat, mdstat, ptstat;
+
+	regmap_write_bits(lpsc->regmap, MDCTL(lpsc->md), MDSTAT_STATE_MASK,
+			  next_state);
+
+	if (lpsc->flags & LPSC_FORCE)
+		regmap_write_bits(lpsc->regmap, MDCTL(lpsc->md), MDCTL_FORCE,
+				  MDCTL_FORCE);
+
+	regmap_read(lpsc->regmap, PDSTAT(lpsc->pd), &pdstat);
+	if ((pdstat & PDSTAT_STATE_MASK) == 0) {
+		regmap_write_bits(lpsc->regmap, PDCTL(lpsc->pd), PDCTL_NEXT,
+				  PDCTL_NEXT);
+
+		regmap_write(lpsc->regmap, PTCMD, BIT(lpsc->pd));
+
+		regmap_read_poll_timeout(lpsc->regmap, EPCPR, epcpr,
+					 epcpr & BIT(lpsc->pd), 0, 0);
+
+		regmap_write_bits(lpsc->regmap, PDCTL(lpsc->pd), PDCTL_EPCGOOD,
+				  PDCTL_EPCGOOD);
+	} else {
+		regmap_write(lpsc->regmap, PTCMD, BIT(lpsc->pd));
+	}
+
+	regmap_read_poll_timeout(lpsc->regmap, PTSTAT, ptstat,
+				 !(ptstat & BIT(lpsc->pd)), 0, 0);
+
+	regmap_read_poll_timeout(lpsc->regmap, MDSTAT(lpsc->md), mdstat,
+				 (mdstat & MDSTAT_STATE_MASK) == next_state,
+				 0, 0);
+}
+
+static int davinci_lpsc_clk_enable(struct clk_hw *hw)
+{
+	struct davinci_lpsc_clk *lpsc = to_davinci_lpsc_clk(hw);
+
+	davinci_lpsc_config(lpsc, LPSC_STATE_ENABLE);
+
+	return 0;
+}
+
+static void davinci_lpsc_clk_disable(struct clk_hw *hw)
+{
+	struct davinci_lpsc_clk *lpsc = to_davinci_lpsc_clk(hw);
+
+	davinci_lpsc_config(lpsc, LPSC_STATE_DISABLE);
+}
+
+static int davinci_lpsc_clk_is_enabled(struct clk_hw *hw)
+{
+	struct davinci_lpsc_clk *lpsc = to_davinci_lpsc_clk(hw);
+	u32 mdstat;
+
+	regmap_read(lpsc->regmap, MDSTAT(lpsc->md), &mdstat);
+
+	return (mdstat & MDSTAT_MCKOUT) ? 1 : 0;
+}
+
+static const struct clk_ops davinci_lpsc_clk_ops = {
+	.enable		= davinci_lpsc_clk_enable,
+	.disable	= davinci_lpsc_clk_disable,
+	.is_enabled	= davinci_lpsc_clk_is_enabled,
+};
+
+static int davinci_psc_genpd_attach_dev(struct generic_pm_domain *pm_domain,
+					struct device *dev)
+{
+	struct davinci_lpsc_clk *lpsc = to_davinci_lpsc_clk(pm_domain);
+	struct clk *clk;
+	int ret;
+
+	/*
+	 * pm_clk_remove_clk() will call clk_put(), so we have to use clk_get()
+	 * to get the clock instead of using lpsc->hw.clk directly.
+	 */
+	clk = clk_get_sys(best_dev_name(lpsc->dev), clk_hw_get_name(&lpsc->hw));
+	if (IS_ERR(clk))
+		return (PTR_ERR(clk));
+
+	ret = pm_clk_create(dev);
+	if (ret < 0)
+		goto fail_clk_put;
+
+	ret = pm_clk_add_clk(dev, clk);
+	if (ret < 0)
+		goto fail_pm_clk_destroy;
+
+	lpsc->genpd_clk = clk;
+
+	return 0;
+
+fail_pm_clk_destroy:
+	pm_clk_destroy(dev);
+fail_clk_put:
+	clk_put(clk);
+
+	return ret;
+}
+
+static void davinci_psc_genpd_detach_dev(struct generic_pm_domain *pm_domain,
+					 struct device *dev)
+{
+	struct davinci_lpsc_clk *lpsc = to_davinci_lpsc_clk(pm_domain);
+
+	pm_clk_remove_clk(dev, lpsc->genpd_clk);
+	pm_clk_destroy(dev);
+
+	lpsc->genpd_clk = NULL;
+}
+
+/**
+ * davinci_lpsc_clk_register - register LPSC clock
+ * @name: name of this clock
+ * @parent_name: name of clock's parent
+ * @regmap: PSC MMIO region
+ * @md: local PSC number
+ * @pd: power domain
+ * @flags: LPSC_* flags
+ */
+static struct davinci_lpsc_clk *
+davinci_lpsc_clk_register(struct device *dev, const char *name,
+			  const char *parent_name, struct regmap *regmap,
+			  u32 md, u32 pd, u32 flags)
+{
+	struct clk_init_data init;
+	struct davinci_lpsc_clk *lpsc;
+	int ret;
+	bool is_on;
+
+	lpsc = devm_kzalloc(dev, sizeof(*lpsc), GFP_KERNEL);
+	if (!lpsc)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &davinci_lpsc_clk_ops;
+	init.parent_names = (parent_name ? &parent_name : NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+	init.flags = 0;
+
+	if (flags & LPSC_ALWAYS_ENABLED)
+		init.flags |= CLK_IS_CRITICAL;
+
+	if (flags & LPSC_SET_RATE_PARENT)
+		init.flags |= CLK_SET_RATE_PARENT;
+
+	lpsc->dev = dev;
+	lpsc->regmap = regmap;
+	lpsc->hw.init = &init;
+	lpsc->md = md;
+	lpsc->pd = pd;
+	lpsc->flags = flags;
+
+	ret = devm_clk_hw_register(dev, &lpsc->hw);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	/* genpd attach needs a way to look up this clock */
+	ret = clk_hw_register_clkdev(&lpsc->hw, name, best_dev_name(dev));
+
+	lpsc->pm_domain.name = devm_kasprintf(dev, GFP_KERNEL, "%s: %s",
+					      best_dev_name(dev), name);
+	lpsc->pm_domain.attach_dev = davinci_psc_genpd_attach_dev;
+	lpsc->pm_domain.detach_dev = davinci_psc_genpd_detach_dev;
+	lpsc->pm_domain.flags = GENPD_FLAG_PM_CLK;
+
+	is_on = davinci_lpsc_clk_is_enabled(&lpsc->hw);
+	pm_genpd_init(&lpsc->pm_domain, NULL, is_on);
+
+	return lpsc;
+}
+
+static int davinci_lpsc_clk_reset(struct clk *clk, bool reset)
+{
+	struct clk_hw *hw = __clk_get_hw(clk);
+	struct davinci_lpsc_clk *lpsc = to_davinci_lpsc_clk(hw);
+	u32 mdctl;
+
+	if (IS_ERR_OR_NULL(lpsc))
+		return -EINVAL;
+
+	mdctl = reset ? 0 : MDCTL_LRESET;
+	regmap_write_bits(lpsc->regmap, MDCTL(lpsc->md), MDCTL_LRESET, mdctl);
+
+	return 0;
+}
+
+/*
+ * REVISIT: These exported functions can be removed after a non-DT lookup is
+ * added to the reset controller framework and the davinci-rproc driver is
+ * updated to use the generic reset controller framework.
+ */
+
+int davinci_clk_reset_assert(struct clk *clk)
+{
+	return davinci_lpsc_clk_reset(clk, true);
+}
+EXPORT_SYMBOL(davinci_clk_reset_assert);
+
+int davinci_clk_reset_deassert(struct clk *clk)
+{
+	return davinci_lpsc_clk_reset(clk, false);
+}
+EXPORT_SYMBOL(davinci_clk_reset_deassert);
+
+static int davinci_psc_reset_assert(struct reset_controller_dev *rcdev,
+				    unsigned long id)
+{
+	struct davinci_psc_data *psc = to_davinci_psc_data(rcdev);
+	struct clk *clk = psc->clk_data.clks[id];
+
+	return davinci_lpsc_clk_reset(clk, true);
+}
+
+static int davinci_psc_reset_deassert(struct reset_controller_dev *rcdev,
+				      unsigned long id)
+{
+	struct davinci_psc_data *psc = to_davinci_psc_data(rcdev);
+	struct clk *clk = psc->clk_data.clks[id];
+
+	return davinci_lpsc_clk_reset(clk, false);
+}
+
+static const struct reset_control_ops davinci_psc_reset_ops = {
+	.assert		= davinci_psc_reset_assert,
+	.deassert	= davinci_psc_reset_deassert,
+};
+
+static int davinci_psc_reset_of_xlate(struct reset_controller_dev *rcdev,
+				      const struct of_phandle_args *reset_spec)
+{
+	struct of_phandle_args clkspec = *reset_spec; /* discard const qualifier */
+	struct clk *clk;
+	struct clk_hw *hw;
+	struct davinci_lpsc_clk *lpsc;
+
+	/* the clock node is the same as the reset node */
+	clk = of_clk_get_from_provider(&clkspec);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	hw = __clk_get_hw(clk);
+	lpsc = to_davinci_lpsc_clk(hw);
+	clk_put(clk);
+
+	/* not all modules support local reset */
+	if (!(lpsc->flags & LPSC_LOCAL_RESET))
+		return -EINVAL;
+
+	return lpsc->md;
+}
+
+static const struct regmap_config davinci_psc_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+};
+
+static struct davinci_psc_data *
+__davinci_psc_register_clocks(struct device *dev,
+			      const struct davinci_lpsc_clk_info *info,
+			      int num_clks,
+			      void __iomem *base)
+{
+	struct davinci_psc_data *psc;
+	struct clk **clks;
+	struct generic_pm_domain **pm_domains;
+	struct regmap *regmap;
+	int i, ret;
+
+	psc = devm_kzalloc(dev, sizeof(*psc), GFP_KERNEL);
+	if (!psc)
+		return ERR_PTR(-ENOMEM);
+
+	clks = devm_kmalloc_array(dev, num_clks, sizeof(*clks), GFP_KERNEL);
+	if (!clks)
+		return ERR_PTR(-ENOMEM);
+
+	psc->clk_data.clks = clks;
+	psc->clk_data.clk_num = num_clks;
+
+	/*
+	 * init array with error so that of_clk_src_onecell_get() doesn't
+	 * return NULL for gaps in the sparse array
+	 */
+	for (i = 0; i < num_clks; i++)
+		clks[i] = ERR_PTR(-ENOENT);
+
+	pm_domains = devm_kcalloc(dev, num_clks, sizeof(*pm_domains), GFP_KERNEL);
+	if (!pm_domains)
+		return ERR_PTR(-ENOMEM);
+
+	psc->pm_data.domains = pm_domains;
+	psc->pm_data.num_domains = num_clks;
+
+	regmap = devm_regmap_init_mmio(dev, base, &davinci_psc_regmap_config);
+	if (IS_ERR(regmap))
+		return ERR_CAST(regmap);
+
+	for (; info->name; info++) {
+		struct davinci_lpsc_clk *lpsc;
+
+		lpsc = davinci_lpsc_clk_register(dev, info->name, info->parent,
+						 regmap, info->md, info->pd,
+						 info->flags);
+		if (IS_ERR(lpsc)) {
+			dev_warn(dev, "Failed to register %s (%ld)\n",
+				 info->name, PTR_ERR(lpsc));
+			continue;
+		}
+
+		clks[info->md] = lpsc->hw.clk;
+		pm_domains[info->md] = &lpsc->pm_domain;
+	}
+
+	psc->rcdev.ops = &davinci_psc_reset_ops;
+	psc->rcdev.owner = THIS_MODULE;
+	psc->rcdev.dev = dev;
+	psc->rcdev.of_node = dev->of_node;
+	psc->rcdev.of_reset_n_cells = 1;
+	psc->rcdev.of_xlate = davinci_psc_reset_of_xlate;
+	psc->rcdev.nr_resets = num_clks;
+
+	ret = devm_reset_controller_register(dev, &psc->rcdev);
+	if (ret < 0)
+		dev_warn(dev, "Failed to register reset controller (%d)\n", ret);
+
+	return psc;
+}
+
+int davinci_psc_register_clocks(struct device *dev,
+				const struct davinci_lpsc_clk_info *info,
+				u8 num_clks,
+				void __iomem *base)
+{
+	struct davinci_psc_data *psc;
+
+	psc = __davinci_psc_register_clocks(dev, info, num_clks, base);
+	if (IS_ERR(psc))
+		return PTR_ERR(psc);
+
+	for (; info->name; info++) {
+		const struct davinci_lpsc_clkdev_info *cdevs = info->cdevs;
+		struct clk *clk = psc->clk_data.clks[info->md];
+
+		if (!cdevs || IS_ERR_OR_NULL(clk))
+			continue;
+
+		for (; cdevs->con_id || cdevs->dev_id; cdevs++)
+			clk_register_clkdev(clk, cdevs->con_id, cdevs->dev_id);
+	}
+
+	return 0;
+}
+
+int of_davinci_psc_clk_init(struct device *dev,
+			    const struct davinci_lpsc_clk_info *info,
+			    u8 num_clks,
+			    void __iomem *base)
+{
+	struct device_node *node = dev->of_node;
+	struct davinci_psc_data *psc;
+
+	psc = __davinci_psc_register_clocks(dev, info, num_clks, base);
+	if (IS_ERR(psc))
+		return PTR_ERR(psc);
+
+	of_genpd_add_provider_onecell(node, &psc->pm_data);
+
+	of_clk_add_provider(node, of_clk_src_onecell_get, &psc->clk_data);
+
+	return 0;
+}
+
+static const struct of_device_id davinci_psc_of_match[] = {
+	{ .compatible = "ti,da850-psc0", .data = &of_da850_psc0_init_data },
+	{ .compatible = "ti,da850-psc1", .data = &of_da850_psc1_init_data },
+	{ }
+};
+
+static const struct platform_device_id davinci_psc_id_table[] = {
+	{ .name = "da830-psc0", .driver_data = (kernel_ulong_t)&da830_psc0_init_data },
+	{ .name = "da830-psc1", .driver_data = (kernel_ulong_t)&da830_psc1_init_data },
+	{ .name = "da850-psc0", .driver_data = (kernel_ulong_t)&da850_psc0_init_data },
+	{ .name = "da850-psc1", .driver_data = (kernel_ulong_t)&da850_psc1_init_data },
+	{ .name = "dm355-psc",  .driver_data = (kernel_ulong_t)&dm355_psc_init_data  },
+	{ .name = "dm365-psc",  .driver_data = (kernel_ulong_t)&dm365_psc_init_data  },
+	{ .name = "dm644x-psc", .driver_data = (kernel_ulong_t)&dm644x_psc_init_data },
+	{ .name = "dm646x-psc", .driver_data = (kernel_ulong_t)&dm646x_psc_init_data },
+	{ }
+};
+
+static int davinci_psc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id;
+	const struct davinci_psc_init_data *init_data = NULL;
+	struct resource *res;
+	void __iomem *base;
+	int ret;
+
+	of_id = of_match_device(davinci_psc_of_match, dev);
+	if (of_id)
+		init_data = of_id->data;
+	else if (pdev->id_entry)
+		init_data = (void *)pdev->id_entry->driver_data;
+
+	if (!init_data) {
+		dev_err(dev, "unable to find driver init data\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	ret = devm_clk_bulk_get(dev, init_data->num_parent_clks,
+				init_data->parent_clks);
+	if (ret < 0)
+		return ret;
+
+	return init_data->psc_init(dev, base);
+}
+
+static struct platform_driver davinci_psc_driver = {
+	.probe		= davinci_psc_probe,
+	.driver		= {
+		.name		= "davinci-psc-clk",
+		.of_match_table	= davinci_psc_of_match,
+	},
+	.id_table	= davinci_psc_id_table,
+};
+
+static int __init davinci_psc_driver_init(void)
+{
+	return platform_driver_register(&davinci_psc_driver);
+}
+
+/* has to be postcore_initcall because davinci_gpio depend on PSC clocks */
+postcore_initcall(davinci_psc_driver_init);
diff --git a/drivers/clk/davinci/psc.h b/drivers/clk/davinci/psc.h
new file mode 100644
index 000000000000..c2a7df6413fe
--- /dev/null
+++ b/drivers/clk/davinci/psc.h
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Clock driver for TI Davinci PSC controllers
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#ifndef __CLK_DAVINCI_PSC_H__
+#define __CLK_DAVINCI_PSC_H__
+
+#include <linux/clk-provider.h>
+#include <linux/types.h>
+
+/* PSC quirk flags */
+#define LPSC_ALWAYS_ENABLED	BIT(0) /* never disable this clock */
+#define LPSC_SET_RATE_PARENT	BIT(1) /* propagate set_rate to parent clock */
+#define LPSC_FORCE		BIT(2) /* requires MDCTL FORCE bit */
+#define LPSC_LOCAL_RESET	BIT(3) /* acts as reset provider */
+
+struct davinci_lpsc_clkdev_info {
+	const char *con_id;
+	const char *dev_id;
+};
+
+#define LPSC_CLKDEV(c, d) {	\
+	.con_id = (c),		\
+	.dev_id = (d)		\
+}
+
+#define LPSC_CLKDEV1(n, c, d) \
+static const struct davinci_lpsc_clkdev_info n[] __initconst = {	\
+	LPSC_CLKDEV((c), (d)),						\
+	{ }								\
+}
+
+#define LPSC_CLKDEV2(n, c1, d1, c2, d2) \
+static const struct davinci_lpsc_clkdev_info n[] __initconst = {	\
+	LPSC_CLKDEV((c1), (d1)),					\
+	LPSC_CLKDEV((c2), (d2)),					\
+	{ }								\
+}
+
+#define LPSC_CLKDEV3(n, c1, d1, c2, d2, c3, d3) \
+static const struct davinci_lpsc_clkdev_info n[] __initconst = {	\
+	LPSC_CLKDEV((c1), (d1)),					\
+	LPSC_CLKDEV((c2), (d2)),					\
+	LPSC_CLKDEV((c3), (d3)),					\
+	{ }								\
+}
+
+/**
+ * davinci_lpsc_clk_info - LPSC module-specific clock information
+ * @name: the clock name
+ * @parent: the parent clock name
+ * @cdevs: optional array of clkdev lookup table info
+ * @md: the local module domain (LPSC id)
+ * @pd: the power domain id
+ * @flags: bitmask of LPSC_* flags
+ */
+struct davinci_lpsc_clk_info {
+	const char *name;
+	const char *parent;
+	const struct davinci_lpsc_clkdev_info *cdevs;
+	u32 md;
+	u32 pd;
+	unsigned long flags;
+};
+
+#define LPSC(m, d, n, p, c, f)	\
+{				\
+	.name	= #n,		\
+	.parent	= #p,		\
+	.cdevs	= (c),		\
+	.md	= (m),		\
+	.pd	= (d),		\
+	.flags	= (f),		\
+}
+
+int davinci_psc_register_clocks(struct device *dev,
+				const struct davinci_lpsc_clk_info *info,
+				u8 num_clks,
+				void __iomem *base);
+
+int of_davinci_psc_clk_init(struct device *dev,
+			    const struct davinci_lpsc_clk_info *info,
+			    u8 num_clks,
+			    void __iomem *base);
+
+/* Device-specific data */
+
+struct davinci_psc_init_data {
+	struct clk_bulk_data *parent_clks;
+	int num_parent_clks;
+	int (*psc_init)(struct device *dev, void __iomem *base);
+};
+
+extern const struct davinci_psc_init_data da830_psc0_init_data;
+extern const struct davinci_psc_init_data da830_psc1_init_data;
+extern const struct davinci_psc_init_data da850_psc0_init_data;
+extern const struct davinci_psc_init_data da850_psc1_init_data;
+extern const struct davinci_psc_init_data of_da850_psc0_init_data;
+extern const struct davinci_psc_init_data of_da850_psc1_init_data;
+extern const struct davinci_psc_init_data dm355_psc_init_data;
+extern const struct davinci_psc_init_data dm365_psc_init_data;
+extern const struct davinci_psc_init_data dm644x_psc_init_data;
+extern const struct davinci_psc_init_data dm646x_psc_init_data;
+
+#endif /* __CLK_DAVINCI_PSC_H__ */
diff --git a/drivers/clk/hisilicon/Makefile b/drivers/clk/hisilicon/Makefile
index 4806fc2cb4ac..2a714c0f9657 100644
--- a/drivers/clk/hisilicon/Makefile
+++ b/drivers/clk/hisilicon/Makefile
@@ -3,7 +3,7 @@
 # Hisilicon Clock specific Makefile
 #
 
-obj-y	+= clk.o clkgate-separated.o clkdivider-hi6220.o
+obj-y	+= clk.o clkgate-separated.o clkdivider-hi6220.o clk-hisi-phase.o
 
 obj-$(CONFIG_ARCH_HI3xxx)	+= clk-hi3620.o
 obj-$(CONFIG_ARCH_HIP04)	+= clk-hip04.o
diff --git a/drivers/clk/hisilicon/clk-hisi-phase.c b/drivers/clk/hisilicon/clk-hisi-phase.c
new file mode 100644
index 000000000000..5bce9297b78b
--- /dev/null
+++ b/drivers/clk/hisilicon/clk-hisi-phase.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017 HiSilicon Technologies Co., Ltd.
+ *
+ * Simple HiSilicon phase clock implementation.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "clk.h"
+
+struct clk_hisi_phase {
+	struct clk_hw	hw;
+	void __iomem	*reg;
+	u32		*phase_degrees;
+	u32		*phase_regvals;
+	u8		phase_num;
+	u32		mask;
+	u8		shift;
+	u8		flags;
+	spinlock_t	*lock;
+};
+
+#define to_clk_hisi_phase(_hw) container_of(_hw, struct clk_hisi_phase, hw)
+
+static int hisi_phase_regval_to_degrees(struct clk_hisi_phase *phase,
+					u32 regval)
+{
+	int i;
+
+	for (i = 0; i < phase->phase_num; i++)
+		if (phase->phase_regvals[i] == regval)
+			return phase->phase_degrees[i];
+
+	return -EINVAL;
+}
+
+static int hisi_clk_get_phase(struct clk_hw *hw)
+{
+	struct clk_hisi_phase *phase = to_clk_hisi_phase(hw);
+	u32 regval;
+
+	regval = readl(phase->reg);
+	regval = (regval & phase->mask) >> phase->shift;
+
+	return hisi_phase_regval_to_degrees(phase, regval);
+}
+
+static int hisi_phase_degrees_to_regval(struct clk_hisi_phase *phase,
+					int degrees)
+{
+	int i;
+
+	for (i = 0; i < phase->phase_num; i++)
+		if (phase->phase_degrees[i] == degrees)
+			return phase->phase_regvals[i];
+
+	return -EINVAL;
+}
+
+static int hisi_clk_set_phase(struct clk_hw *hw, int degrees)
+{
+	struct clk_hisi_phase *phase = to_clk_hisi_phase(hw);
+	unsigned long flags = 0;
+	int regval;
+	u32 val;
+
+	regval = hisi_phase_degrees_to_regval(phase, degrees);
+	if (regval < 0)
+		return regval;
+
+	spin_lock_irqsave(phase->lock, flags);
+
+	val = clk_readl(phase->reg);
+	val &= ~phase->mask;
+	val |= regval << phase->shift;
+	clk_writel(val, phase->reg);
+
+	spin_unlock_irqrestore(phase->lock, flags);
+
+	return 0;
+}
+
+static const struct clk_ops clk_phase_ops = {
+	.get_phase = hisi_clk_get_phase,
+	.set_phase = hisi_clk_set_phase,
+};
+
+struct clk *clk_register_hisi_phase(struct device *dev,
+		const struct hisi_phase_clock *clks,
+		void __iomem *base, spinlock_t *lock)
+{
+	struct clk_hisi_phase *phase;
+	struct clk_init_data init;
+
+	phase = devm_kzalloc(dev, sizeof(struct clk_hisi_phase), GFP_KERNEL);
+	if (!phase)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = clks->name;
+	init.ops = &clk_phase_ops;
+	init.flags = clks->flags | CLK_IS_BASIC;
+	init.parent_names = clks->parent_names ? &clks->parent_names : NULL;
+	init.num_parents = clks->parent_names ? 1 : 0;
+
+	phase->reg = base + clks->offset;
+	phase->shift = clks->shift;
+	phase->mask = (BIT(clks->width) - 1) << clks->shift;
+	phase->lock = lock;
+	phase->phase_degrees = clks->phase_degrees;
+	phase->phase_regvals = clks->phase_regvals;
+	phase->phase_num = clks->phase_num;
+	phase->hw.init = &init;
+
+	return devm_clk_register(dev, &phase->hw);
+}
+EXPORT_SYMBOL_GPL(clk_register_hisi_phase);
diff --git a/drivers/clk/hisilicon/clk.c b/drivers/clk/hisilicon/clk.c
index b73c1dfae7f1..953c8dacef8b 100644
--- a/drivers/clk/hisilicon/clk.c
+++ b/drivers/clk/hisilicon/clk.c
@@ -49,6 +49,8 @@ struct hisi_clock_data *hisi_clk_alloc(struct platform_device *pdev,
 		return NULL;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return NULL;
 	clk_data->base = devm_ioremap(&pdev->dev,
 				res->start, resource_size(res));
 	if (!clk_data->base)
@@ -197,6 +199,30 @@ err:
 }
 EXPORT_SYMBOL_GPL(hisi_clk_register_mux);
 
+int hisi_clk_register_phase(struct device *dev,
+			    const struct hisi_phase_clock *clks,
+			    int nums, struct hisi_clock_data *data)
+{
+	void __iomem *base = data->base;
+	struct clk *clk;
+	int i;
+
+	for (i = 0; i < nums; i++) {
+		clk = clk_register_hisi_phase(dev, &clks[i], base,
+					      &hisi_clk_lock);
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n", __func__,
+			       clks[i].name);
+			return PTR_ERR(clk);
+		}
+
+		data->clk_data.clks[clks[i].id] = clk;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_clk_register_phase);
+
 int hisi_clk_register_divider(const struct hisi_divider_clock *clks,
 				      int nums, struct hisi_clock_data *data)
 {
diff --git a/drivers/clk/hisilicon/clk.h b/drivers/clk/hisilicon/clk.h
index 4e1d1affc6f5..8d7ee5c3231b 100644
--- a/drivers/clk/hisilicon/clk.h
+++ b/drivers/clk/hisilicon/clk.h
@@ -68,6 +68,19 @@ struct hisi_mux_clock {
 	const char		*alias;
 };
 
+struct hisi_phase_clock {
+	unsigned int		id;
+	const char		*name;
+	const char		*parent_names;
+	unsigned long		flags;
+	unsigned long		offset;
+	u8			shift;
+	u8			width;
+	u32			*phase_degrees;
+	u32			*phase_regvals;
+	u8			phase_num;
+};
+
 struct hisi_divider_clock {
 	unsigned int		id;
 	const char		*name;
@@ -120,6 +133,12 @@ int hisi_clk_register_fixed_factor(const struct hisi_fixed_factor_clock *,
 				int, struct hisi_clock_data *);
 int hisi_clk_register_mux(const struct hisi_mux_clock *, int,
 				struct hisi_clock_data *);
+struct clk *clk_register_hisi_phase(struct device *dev,
+				const struct hisi_phase_clock *clks,
+				void __iomem *base, spinlock_t *lock);
+int hisi_clk_register_phase(struct device *dev,
+				const struct hisi_phase_clock *clks,
+				int nums, struct hisi_clock_data *data);
 int hisi_clk_register_divider(const struct hisi_divider_clock *,
 				int, struct hisi_clock_data *);
 int hisi_clk_register_gate(const struct hisi_gate_clock *,
diff --git a/drivers/clk/hisilicon/crg-hi3516cv300.c b/drivers/clk/hisilicon/crg-hi3516cv300.c
index 2007123832bb..53450b651e4c 100644
--- a/drivers/clk/hisilicon/crg-hi3516cv300.c
+++ b/drivers/clk/hisilicon/crg-hi3516cv300.c
@@ -204,7 +204,7 @@ static const struct hisi_crg_funcs hi3516cv300_crg_funcs = {
 /* hi3516CV300 sysctrl CRG */
 #define HI3516CV300_SYSCTRL_NR_CLKS 16
 
-static const char *wdt_mux_p[] __initconst = { "3m", "apb" };
+static const char *const wdt_mux_p[] __initconst = { "3m", "apb" };
 static u32 wdt_mux_table[] = {0, 1};
 
 static const struct hisi_mux_clock hi3516cv300_sysctrl_mux_clks[] = {
diff --git a/drivers/clk/hisilicon/crg-hi3798cv200.c b/drivers/clk/hisilicon/crg-hi3798cv200.c
index 8478948e858e..743eec131528 100644
--- a/drivers/clk/hisilicon/crg-hi3798cv200.c
+++ b/drivers/clk/hisilicon/crg-hi3798cv200.c
@@ -27,30 +27,31 @@
 #include "reset.h"
 
 /* hi3798CV200 core CRG */
-#define HI3798CV200_INNER_CLK_OFFSET	64
-#define HI3798CV200_FIXED_24M	65
-#define HI3798CV200_FIXED_25M	66
-#define HI3798CV200_FIXED_50M	67
-#define HI3798CV200_FIXED_75M	68
-#define HI3798CV200_FIXED_100M	69
-#define HI3798CV200_FIXED_150M	70
-#define HI3798CV200_FIXED_200M	71
-#define HI3798CV200_FIXED_250M	72
-#define HI3798CV200_FIXED_300M	73
-#define HI3798CV200_FIXED_400M	74
-#define HI3798CV200_MMC_MUX	75
-#define HI3798CV200_ETH_PUB_CLK	76
-#define HI3798CV200_ETH_BUS_CLK	77
-#define HI3798CV200_ETH_BUS0_CLK	78
-#define HI3798CV200_ETH_BUS1_CLK	79
-#define HI3798CV200_COMBPHY1_MUX	80
-#define HI3798CV200_FIXED_12M	81
-#define HI3798CV200_FIXED_48M	82
-#define HI3798CV200_FIXED_60M	83
-#define HI3798CV200_FIXED_166P5M	84
-#define HI3798CV200_SDIO0_MUX	85
-
-#define HI3798CV200_CRG_NR_CLKS		128
+#define HI3798CV200_INNER_CLK_OFFSET		64
+#define HI3798CV200_FIXED_24M			65
+#define HI3798CV200_FIXED_25M			66
+#define HI3798CV200_FIXED_50M			67
+#define HI3798CV200_FIXED_75M			68
+#define HI3798CV200_FIXED_100M			69
+#define HI3798CV200_FIXED_150M			70
+#define HI3798CV200_FIXED_200M			71
+#define HI3798CV200_FIXED_250M			72
+#define HI3798CV200_FIXED_300M			73
+#define HI3798CV200_FIXED_400M			74
+#define HI3798CV200_MMC_MUX			75
+#define HI3798CV200_ETH_PUB_CLK			76
+#define HI3798CV200_ETH_BUS_CLK			77
+#define HI3798CV200_ETH_BUS0_CLK		78
+#define HI3798CV200_ETH_BUS1_CLK		79
+#define HI3798CV200_COMBPHY1_MUX		80
+#define HI3798CV200_FIXED_12M			81
+#define HI3798CV200_FIXED_48M			82
+#define HI3798CV200_FIXED_60M			83
+#define HI3798CV200_FIXED_166P5M		84
+#define HI3798CV200_SDIO0_MUX			85
+#define HI3798CV200_COMBPHY0_MUX		86
+
+#define HI3798CV200_CRG_NR_CLKS			128
 
 static const struct hisi_fixed_rate_clock hi3798cv200_fixed_rate_clks[] = {
 	{ HISTB_OSC_CLK, "clk_osc", NULL, 0, 24000000, },
@@ -74,9 +75,9 @@ static const char *const mmc_mux_p[] = {
 		"100m", "50m", "25m", "200m", "150m" };
 static u32 mmc_mux_table[] = {0, 1, 2, 3, 6};
 
-static const char *const comphy1_mux_p[] = {
+static const char *const comphy_mux_p[] = {
 		"100m", "25m"};
-static u32 comphy1_mux_table[] = {2, 3};
+static u32 comphy_mux_table[] = {2, 3};
 
 static const char *const sdio_mux_p[] = {
 		"100m", "50m", "150m", "166p5m" };
@@ -85,14 +86,29 @@ static u32 sdio_mux_table[] = {0, 1, 2, 3};
 static struct hisi_mux_clock hi3798cv200_mux_clks[] = {
 	{ HI3798CV200_MMC_MUX, "mmc_mux", mmc_mux_p, ARRAY_SIZE(mmc_mux_p),
 		CLK_SET_RATE_PARENT, 0xa0, 8, 3, 0, mmc_mux_table, },
+	{ HI3798CV200_COMBPHY0_MUX, "combphy0_mux",
+		comphy_mux_p, ARRAY_SIZE(comphy_mux_p),
+		CLK_SET_RATE_PARENT, 0x188, 2, 2, 0, comphy_mux_table, },
 	{ HI3798CV200_COMBPHY1_MUX, "combphy1_mux",
-		comphy1_mux_p, ARRAY_SIZE(comphy1_mux_p),
-		CLK_SET_RATE_PARENT, 0x188, 10, 2, 0, comphy1_mux_table, },
+		comphy_mux_p, ARRAY_SIZE(comphy_mux_p),
+		CLK_SET_RATE_PARENT, 0x188, 10, 2, 0, comphy_mux_table, },
 	{ HI3798CV200_SDIO0_MUX, "sdio0_mux", sdio_mux_p,
 		ARRAY_SIZE(sdio_mux_p), CLK_SET_RATE_PARENT,
 		0x9c, 8, 2, 0, sdio_mux_table, },
 };
 
+static u32 mmc_phase_regvals[] = {0, 1, 2, 3, 4, 5, 6, 7};
+static u32 mmc_phase_degrees[] = {0, 45, 90, 135, 180, 225, 270, 315};
+
+static struct hisi_phase_clock hi3798cv200_phase_clks[] = {
+	{ HISTB_MMC_SAMPLE_CLK, "mmc_sample", "clk_mmc_ciu",
+		CLK_SET_RATE_PARENT, 0xa0, 12, 3, mmc_phase_degrees,
+		mmc_phase_regvals, ARRAY_SIZE(mmc_phase_regvals) },
+	{ HISTB_MMC_DRV_CLK, "mmc_drive", "clk_mmc_ciu",
+		CLK_SET_RATE_PARENT, 0xa0, 16, 3, mmc_phase_degrees,
+		mmc_phase_regvals, ARRAY_SIZE(mmc_phase_regvals) },
+};
+
 static const struct hisi_gate_clock hi3798cv200_gate_clks[] = {
 	/* UART */
 	{ HISTB_UART2_CLK, "clk_uart2", "75m",
@@ -147,6 +163,9 @@ static const struct hisi_gate_clock hi3798cv200_gate_clks[] = {
 		CLK_SET_RATE_PARENT, 0xcc, 4, 0, },
 	{ HISTB_ETH1_MACIF_CLK, "clk_macif1", "clk_bus_m1",
 		CLK_SET_RATE_PARENT, 0xcc, 25, 0, },
+	/* COMBPHY0 */
+	{ HISTB_COMBPHY0_CLK, "clk_combphy0", "combphy0_mux",
+		CLK_SET_RATE_PARENT, 0x188, 0, 0, },
 	/* COMBPHY1 */
 	{ HISTB_COMBPHY1_CLK, "clk_combphy1", "combphy1_mux",
 		CLK_SET_RATE_PARENT, 0x188, 8, 0, },
@@ -161,6 +180,8 @@ static const struct hisi_gate_clock hi3798cv200_gate_clks[] = {
 		CLK_SET_RATE_PARENT, 0xb8, 1, 0 },
 	{ HISTB_USB2_UTMI_CLK, "clk_u2_utmi", "60m",
 		CLK_SET_RATE_PARENT, 0xb8, 5, 0 },
+	{ HISTB_USB2_OTG_UTMI_CLK, "clk_u2_otg_utmi", "60m",
+		CLK_SET_RATE_PARENT, 0xb8, 3, 0 },
 	{ HISTB_USB2_PHY1_REF_CLK, "clk_u2_phy1_ref", "24m",
 		CLK_SET_RATE_PARENT, 0xbc, 0, 0 },
 	{ HISTB_USB2_PHY2_REF_CLK, "clk_u2_phy2_ref", "24m",
@@ -177,6 +198,14 @@ static struct hisi_clock_data *hi3798cv200_clk_register(
 	if (!clk_data)
 		return ERR_PTR(-ENOMEM);
 
+	/* hisi_phase_clock is resource managed */
+	ret = hisi_clk_register_phase(&pdev->dev,
+				hi3798cv200_phase_clks,
+				ARRAY_SIZE(hi3798cv200_phase_clks),
+				clk_data);
+	if (ret)
+		return ERR_PTR(ret);
+
 	ret = hisi_clk_register_fixed_rate(hi3798cv200_fixed_rate_clks,
 				     ARRAY_SIZE(hi3798cv200_fixed_rate_clks),
 				     clk_data);
@@ -202,18 +231,17 @@ static struct hisi_clock_data *hi3798cv200_clk_register(
 
 	return clk_data;
 
-unregister_fixed_rate:
-	hisi_clk_unregister_fixed_rate(hi3798cv200_fixed_rate_clks,
-				ARRAY_SIZE(hi3798cv200_fixed_rate_clks),
+unregister_gate:
+	hisi_clk_unregister_gate(hi3798cv200_gate_clks,
+				ARRAY_SIZE(hi3798cv200_gate_clks),
 				clk_data);
-
 unregister_mux:
 	hisi_clk_unregister_mux(hi3798cv200_mux_clks,
 				ARRAY_SIZE(hi3798cv200_mux_clks),
 				clk_data);
-unregister_gate:
-	hisi_clk_unregister_gate(hi3798cv200_gate_clks,
-				ARRAY_SIZE(hi3798cv200_gate_clks),
+unregister_fixed_rate:
+	hisi_clk_unregister_fixed_rate(hi3798cv200_fixed_rate_clks,
+				ARRAY_SIZE(hi3798cv200_fixed_rate_clks),
 				clk_data);
 	return ERR_PTR(ret);
 }
@@ -245,7 +273,7 @@ static const struct hisi_crg_funcs hi3798cv200_crg_funcs = {
 #define HI3798CV200_SYSCTRL_NR_CLKS 16
 
 static const struct hisi_gate_clock hi3798cv200_sysctrl_gate_clks[] = {
-	{ HISTB_IR_CLK, "clk_ir", "100m",
+	{ HISTB_IR_CLK, "clk_ir", "24m",
 		CLK_SET_RATE_PARENT, 0x48, 4, 0, },
 	{ HISTB_TIMER01_CLK, "clk_timer01", "24m",
 		CLK_SET_RATE_PARENT, 0x48, 6, 0, },
diff --git a/drivers/clk/imx/Makefile b/drivers/clk/imx/Makefile
index f91f2b2e11cd..8c3baa7e6496 100644
--- a/drivers/clk/imx/Makefile
+++ b/drivers/clk/imx/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_SOC_IMX35)  += clk-imx35.o
 obj-$(CONFIG_SOC_IMX5)   += clk-imx51-imx53.o
 obj-$(CONFIG_SOC_IMX6Q)  += clk-imx6q.o
 obj-$(CONFIG_SOC_IMX6SL) += clk-imx6sl.o
+obj-$(CONFIG_SOC_IMX6SLL) += clk-imx6sll.o
 obj-$(CONFIG_SOC_IMX6SX) += clk-imx6sx.o
 obj-$(CONFIG_SOC_IMX6UL) += clk-imx6ul.o
 obj-$(CONFIG_SOC_IMX7D)  += clk-imx7d.o
diff --git a/drivers/clk/imx/clk-busy.c b/drivers/clk/imx/clk-busy.c
index 6df3389687bc..99036527eb0d 100644
--- a/drivers/clk/imx/clk-busy.c
+++ b/drivers/clk/imx/clk-busy.c
@@ -101,7 +101,7 @@ struct clk *imx_clk_busy_divider(const char *name, const char *parent_name,
 
 	init.name = name;
 	init.ops = &clk_busy_divider_ops;
-	init.flags = CLK_SET_RATE_PARENT;
+	init.flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
 
@@ -175,7 +175,7 @@ struct clk *imx_clk_busy_mux(const char *name, void __iomem *reg, u8 shift,
 
 	init.name = name;
 	init.ops = &clk_busy_mux_ops;
-	init.flags = 0;
+	init.flags = CLK_IS_CRITICAL;
 	init.parent_names = parent_names;
 	init.num_parents = num_parents;
 
diff --git a/drivers/clk/imx/clk-imx6sll.c b/drivers/clk/imx/clk-imx6sll.c
new file mode 100644
index 000000000000..3651c77fbabe
--- /dev/null
+++ b/drivers/clk/imx/clk-imx6sll.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017-2018 NXP.
+ */
+
+#include <dt-bindings/clock/imx6sll-clock.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include "clk.h"
+
+#define CCM_ANALOG_PLL_BYPASS		(0x1 << 16)
+#define BM_CCM_CCDR_MMDC_CH0_MASK	(0x2 << 16)
+#define xPLL_CLR(offset)		(offset + 0x8)
+
+static const char *pll_bypass_src_sels[] = { "osc", "dummy", };
+static const char *pll1_bypass_sels[] = { "pll1", "pll1_bypass_src", };
+static const char *pll2_bypass_sels[] = { "pll2", "pll2_bypass_src", };
+static const char *pll3_bypass_sels[] = { "pll3", "pll3_bypass_src", };
+static const char *pll4_bypass_sels[] = { "pll4", "pll4_bypass_src", };
+static const char *pll5_bypass_sels[] = { "pll5", "pll5_bypass_src", };
+static const char *pll6_bypass_sels[] = { "pll6", "pll6_bypass_src", };
+static const char *pll7_bypass_sels[] = { "pll7", "pll7_bypass_src", };
+static const char *step_sels[] = { "osc", "pll2_pfd2_396m", };
+static const char *pll1_sw_sels[] = { "pll1_sys", "step", };
+static const char *axi_alt_sels[] = { "pll2_pfd2_396m", "pll3_pfd1_540m", };
+static const char *axi_sels[] = {"periph", "axi_alt_sel", };
+static const char *periph_pre_sels[] = { "pll2_bus", "pll2_pfd2_396m", "pll2_pfd0_352m", "pll2_198m", };
+static const char *periph2_pre_sels[] = { "pll2_bus", "pll2_pfd2_396m", "pll2_pfd0_352m", "pll4_audio_div", };
+static const char *periph_clk2_sels[] = { "pll3_usb_otg", "osc", "osc", };
+static const char *periph2_clk2_sels[] = { "pll3_usb_otg", "osc", };
+static const char *periph_sels[] = { "periph_pre", "periph_clk2", };
+static const char *periph2_sels[] = { "periph2_pre", "periph2_clk2", };
+static const char *usdhc_sels[] = { "pll2_pfd2_396m", "pll2_pfd0_352m", };
+static const char *ssi_sels[] = {"pll3_pfd2_508m", "pll3_pfd3_454m", "pll4_audio_div", "dummy",};
+static const char *spdif_sels[] = { "pll4_audio_div", "pll3_pfd2_508m", "pll5_video_div", "pll3_usb_otg", };
+static const char *ldb_di0_div_sels[] = { "ldb_di0_div_3_5", "ldb_di0_div_7", };
+static const char *ldb_di1_div_sels[] = { "ldb_di1_div_3_5", "ldb_di1_div_7", };
+static const char *ldb_di0_sels[] = { "pll5_video_div", "pll2_pfd0_352m", "pll2_pfd2_396m", "pll2_pfd3_594m", "pll2_pfd1_594m", "pll3_pfd3_454m", };
+static const char *ldb_di1_sels[] = { "pll3_usb_otg", "pll2_pfd0_352m", "pll2_pfd2_396m", "pll2_bus", "pll3_pfd3_454m", "pll3_pfd2_508m", };
+static const char *lcdif_pre_sels[] = { "pll2_bus", "pll3_pfd3_454m", "pll5_video_div", "pll2_pfd0_352m", "pll2_pfd1_594m", "pll3_pfd1_540m", };
+static const char *ecspi_sels[] = { "pll3_60m", "osc", };
+static const char *uart_sels[] = { "pll3_80m", "osc", };
+static const char *perclk_sels[] = { "ipg", "osc", };
+static const char *lcdif_sels[] = { "lcdif_podf", "ipp_di0", "ipp_di1", "ldb_di0", "ldb_di1", };
+
+static const char *epdc_pre_sels[] = { "pll2_bus", "pll3_usb_otg", "pll5_video_div", "pll2_pfd0_352m", "pll2_pfd2_396m", "pll3_pfd2_508m", };
+static const char *epdc_sels[] = { "epdc_podf", "ipp_di0", "ipp_di1", "ldb_di0", "ldb_di1", };
+
+static struct clk *clks[IMX6SLL_CLK_END];
+static struct clk_onecell_data clk_data;
+
+static const struct clk_div_table post_div_table[] = {
+	{ .val = 2, .div = 1, },
+	{ .val = 1, .div = 2, },
+	{ .val = 0, .div = 4, },
+	{ }
+};
+
+static const struct clk_div_table video_div_table[] = {
+	{ .val = 0, .div = 1, },
+	{ .val = 1, .div = 2, },
+	{ .val = 2, .div = 1, },
+	{ .val = 3, .div = 4, },
+	{ }
+};
+
+static u32 share_count_audio;
+static u32 share_count_ssi1;
+static u32 share_count_ssi2;
+static u32 share_count_ssi3;
+
+static void __init imx6sll_clocks_init(struct device_node *ccm_node)
+{
+	struct device_node *np;
+	void __iomem *base;
+
+	clks[IMX6SLL_CLK_DUMMY] = imx_clk_fixed("dummy", 0);
+
+	clks[IMX6SLL_CLK_CKIL] = of_clk_get_by_name(ccm_node, "ckil");
+	clks[IMX6SLL_CLK_OSC] = of_clk_get_by_name(ccm_node, "osc");
+
+	/* ipp_di clock is external input */
+	clks[IMX6SLL_CLK_IPP_DI0] = of_clk_get_by_name(ccm_node, "ipp_di0");
+	clks[IMX6SLL_CLK_IPP_DI1] = of_clk_get_by_name(ccm_node, "ipp_di1");
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,imx6sll-anatop");
+	base = of_iomap(np, 0);
+	WARN_ON(!base);
+
+	/* Do not bypass PLLs initially */
+	writel_relaxed(CCM_ANALOG_PLL_BYPASS, base + xPLL_CLR(0x0));
+	writel_relaxed(CCM_ANALOG_PLL_BYPASS, base + xPLL_CLR(0x10));
+	writel_relaxed(CCM_ANALOG_PLL_BYPASS, base + xPLL_CLR(0x20));
+	writel_relaxed(CCM_ANALOG_PLL_BYPASS, base + xPLL_CLR(0x30));
+	writel_relaxed(CCM_ANALOG_PLL_BYPASS, base + xPLL_CLR(0x70));
+	writel_relaxed(CCM_ANALOG_PLL_BYPASS, base + xPLL_CLR(0xa0));
+	writel_relaxed(CCM_ANALOG_PLL_BYPASS, base + xPLL_CLR(0xe0));
+
+	clks[IMX6SLL_PLL1_BYPASS_SRC] = imx_clk_mux("pll1_bypass_src", base + 0x00, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
+	clks[IMX6SLL_PLL2_BYPASS_SRC] = imx_clk_mux("pll2_bypass_src", base + 0x30, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
+	clks[IMX6SLL_PLL3_BYPASS_SRC] = imx_clk_mux("pll3_bypass_src", base + 0x10, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
+	clks[IMX6SLL_PLL4_BYPASS_SRC] = imx_clk_mux("pll4_bypass_src", base + 0x70, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
+	clks[IMX6SLL_PLL5_BYPASS_SRC] = imx_clk_mux("pll5_bypass_src", base + 0xa0, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
+	clks[IMX6SLL_PLL6_BYPASS_SRC] = imx_clk_mux("pll6_bypass_src", base + 0xe0, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
+	clks[IMX6SLL_PLL7_BYPASS_SRC] = imx_clk_mux("pll7_bypass_src", base + 0x20, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels));
+
+	clks[IMX6SLL_CLK_PLL1] = imx_clk_pllv3(IMX_PLLV3_SYS,	 "pll1", "pll1_bypass_src", base + 0x00, 0x7f);
+	clks[IMX6SLL_CLK_PLL2] = imx_clk_pllv3(IMX_PLLV3_GENERIC, "pll2", "pll2_bypass_src", base + 0x30, 0x1);
+	clks[IMX6SLL_CLK_PLL3] = imx_clk_pllv3(IMX_PLLV3_USB,	 "pll3", "pll3_bypass_src", base + 0x10, 0x3);
+	clks[IMX6SLL_CLK_PLL4] = imx_clk_pllv3(IMX_PLLV3_AV,	 "pll4", "pll4_bypass_src", base + 0x70, 0x7f);
+	clks[IMX6SLL_CLK_PLL5] = imx_clk_pllv3(IMX_PLLV3_AV,	 "pll5", "pll5_bypass_src", base + 0xa0, 0x7f);
+	clks[IMX6SLL_CLK_PLL6] = imx_clk_pllv3(IMX_PLLV3_ENET,	 "pll6", "pll6_bypass_src", base + 0xe0, 0x3);
+	clks[IMX6SLL_CLK_PLL7] = imx_clk_pllv3(IMX_PLLV3_USB,	 "pll7", "pll7_bypass_src", base + 0x20, 0x3);
+
+	clks[IMX6SLL_PLL1_BYPASS] = imx_clk_mux_flags("pll1_bypass", base + 0x00, 16, 1, pll1_bypass_sels, ARRAY_SIZE(pll1_bypass_sels), CLK_SET_RATE_PARENT);
+	clks[IMX6SLL_PLL2_BYPASS] = imx_clk_mux_flags("pll2_bypass", base + 0x30, 16, 1, pll2_bypass_sels, ARRAY_SIZE(pll2_bypass_sels), CLK_SET_RATE_PARENT);
+	clks[IMX6SLL_PLL3_BYPASS] = imx_clk_mux_flags("pll3_bypass", base + 0x10, 16, 1, pll3_bypass_sels, ARRAY_SIZE(pll3_bypass_sels), CLK_SET_RATE_PARENT);
+	clks[IMX6SLL_PLL4_BYPASS] = imx_clk_mux_flags("pll4_bypass", base + 0x70, 16, 1, pll4_bypass_sels, ARRAY_SIZE(pll4_bypass_sels), CLK_SET_RATE_PARENT);
+	clks[IMX6SLL_PLL5_BYPASS] = imx_clk_mux_flags("pll5_bypass", base + 0xa0, 16, 1, pll5_bypass_sels, ARRAY_SIZE(pll5_bypass_sels), CLK_SET_RATE_PARENT);
+	clks[IMX6SLL_PLL6_BYPASS] = imx_clk_mux_flags("pll6_bypass", base + 0xe0, 16, 1, pll6_bypass_sels, ARRAY_SIZE(pll6_bypass_sels), CLK_SET_RATE_PARENT);
+	clks[IMX6SLL_PLL7_BYPASS] = imx_clk_mux_flags("pll7_bypass", base + 0x20, 16, 1, pll7_bypass_sels, ARRAY_SIZE(pll7_bypass_sels), CLK_SET_RATE_PARENT);
+
+	clks[IMX6SLL_CLK_PLL1_SYS]	= imx_clk_fixed_factor("pll1_sys", "pll1_bypass", 1, 1);
+	clks[IMX6SLL_CLK_PLL2_BUS]	= imx_clk_gate("pll2_bus",	   "pll2_bypass", base + 0x30, 13);
+	clks[IMX6SLL_CLK_PLL3_USB_OTG]	= imx_clk_gate("pll3_usb_otg",	   "pll3_bypass", base + 0x10, 13);
+	clks[IMX6SLL_CLK_PLL4_AUDIO]	= imx_clk_gate("pll4_audio",	   "pll4_bypass", base + 0x70, 13);
+	clks[IMX6SLL_CLK_PLL5_VIDEO]	= imx_clk_gate("pll5_video",	   "pll5_bypass", base + 0xa0, 13);
+	clks[IMX6SLL_CLK_PLL6_ENET]	= imx_clk_gate("pll6_enet",	   "pll6_bypass", base + 0xe0, 13);
+	clks[IMX6SLL_CLK_PLL7_USB_HOST]	= imx_clk_gate("pll7_usb_host",	   "pll7_bypass", base + 0x20, 13);
+
+	/*
+	 * Bit 20 is the reserved and read-only bit, we do this only for:
+	 * - Do nothing for usbphy clk_enable/disable
+	 * - Keep refcount when do usbphy clk_enable/disable, in that case,
+	 * the clk framework many need to enable/disable usbphy's parent
+	 */
+	clks[IMX6SLL_CLK_USBPHY1] = imx_clk_gate("usbphy1", "pll3_usb_otg",  base + 0x10, 20);
+	clks[IMX6SLL_CLK_USBPHY2] = imx_clk_gate("usbphy2", "pll7_usb_host", base + 0x20, 20);
+
+	/*
+	 * usbphy*_gate needs to be on after system boots up, and software
+	 * never needs to control it anymore.
+	 */
+	if (IS_ENABLED(CONFIG_USB_MXS_PHY)) {
+		clks[IMX6SLL_CLK_USBPHY1_GATE] = imx_clk_gate_flags("usbphy1_gate", "dummy", base + 0x10, 6, CLK_IS_CRITICAL);
+		clks[IMX6SLL_CLK_USBPHY2_GATE] = imx_clk_gate_flags("usbphy2_gate", "dummy", base + 0x20, 6, CLK_IS_CRITICAL);
+	}
+
+	/*					name		   parent_name	   reg		idx */
+	clks[IMX6SLL_CLK_PLL2_PFD0] = imx_clk_pfd("pll2_pfd0_352m", "pll2_bus", base + 0x100, 0);
+	clks[IMX6SLL_CLK_PLL2_PFD1] = imx_clk_pfd("pll2_pfd1_594m", "pll2_bus", base + 0x100, 1);
+	clks[IMX6SLL_CLK_PLL2_PFD2] = imx_clk_pfd("pll2_pfd2_396m", "pll2_bus", base + 0x100, 2);
+	clks[IMX6SLL_CLK_PLL2_PFD3] = imx_clk_pfd("pll2_pfd3_594m", "pll2_bus",	base + 0x100, 3);
+	clks[IMX6SLL_CLK_PLL3_PFD0] = imx_clk_pfd("pll3_pfd0_720m", "pll3_usb_otg", base + 0xf0, 0);
+	clks[IMX6SLL_CLK_PLL3_PFD1] = imx_clk_pfd("pll3_pfd1_540m", "pll3_usb_otg", base + 0xf0, 1);
+	clks[IMX6SLL_CLK_PLL3_PFD2] = imx_clk_pfd("pll3_pfd2_508m", "pll3_usb_otg", base + 0xf0, 2);
+	clks[IMX6SLL_CLK_PLL3_PFD3] = imx_clk_pfd("pll3_pfd3_454m", "pll3_usb_otg", base + 0xf0, 3);
+
+	clks[IMX6SLL_CLK_PLL4_POST_DIV]  = clk_register_divider_table(NULL, "pll4_post_div", "pll4_audio",
+		 CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x70, 19, 2, 0, post_div_table, &imx_ccm_lock);
+	clks[IMX6SLL_CLK_PLL4_AUDIO_DIV] = clk_register_divider(NULL, "pll4_audio_div", "pll4_post_div",
+		 CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x170, 15, 1, 0, &imx_ccm_lock);
+	clks[IMX6SLL_CLK_PLL5_POST_DIV]  = clk_register_divider_table(NULL, "pll5_post_div", "pll5_video",
+		 CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0xa0, 19, 2, 0, post_div_table, &imx_ccm_lock);
+	clks[IMX6SLL_CLK_PLL5_VIDEO_DIV] = clk_register_divider_table(NULL, "pll5_video_div", "pll5_post_div",
+		 CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x170, 30, 2, 0, video_div_table, &imx_ccm_lock);
+
+	/*						   name		parent_name	 mult  div */
+	clks[IMX6SLL_CLK_PLL2_198M] = imx_clk_fixed_factor("pll2_198m", "pll2_pfd2_396m", 1, 2);
+	clks[IMX6SLL_CLK_PLL3_120M] = imx_clk_fixed_factor("pll3_120m", "pll3_usb_otg",   1, 4);
+	clks[IMX6SLL_CLK_PLL3_80M]  = imx_clk_fixed_factor("pll3_80m",  "pll3_usb_otg",   1, 6);
+	clks[IMX6SLL_CLK_PLL3_60M]  = imx_clk_fixed_factor("pll3_60m",  "pll3_usb_otg",   1, 8);
+
+	np = ccm_node;
+	base = of_iomap(np, 0);
+	WARN_ON(!base);
+
+	clks[IMX6SLL_CLK_STEP] 	 	  = imx_clk_mux("step", base + 0x0c, 8, 1, step_sels, ARRAY_SIZE(step_sels));
+	clks[IMX6SLL_CLK_PLL1_SW] 	  = imx_clk_mux_flags("pll1_sw",   base + 0x0c, 2,  1, pll1_sw_sels, ARRAY_SIZE(pll1_sw_sels), 0);
+	clks[IMX6SLL_CLK_AXI_ALT_SEL]	  = imx_clk_mux("axi_alt_sel",	   base + 0x14, 7,  1, axi_alt_sels, ARRAY_SIZE(axi_alt_sels));
+	clks[IMX6SLL_CLK_AXI_SEL] 	  = imx_clk_mux_flags("axi_sel",   base + 0x14, 6,  1, axi_sels, ARRAY_SIZE(axi_sels), 0);
+	clks[IMX6SLL_CLK_PERIPH_PRE]	  = imx_clk_mux("periph_pre",      base + 0x18, 18, 2, periph_pre_sels, ARRAY_SIZE(periph_pre_sels));
+	clks[IMX6SLL_CLK_PERIPH2_PRE]	  = imx_clk_mux("periph2_pre",     base + 0x18, 21, 2, periph2_pre_sels, ARRAY_SIZE(periph2_pre_sels));
+	clks[IMX6SLL_CLK_PERIPH_CLK2_SEL]  = imx_clk_mux("periph_clk2_sel",  base + 0x18, 12, 2, periph_clk2_sels, ARRAY_SIZE(periph_clk2_sels));
+	clks[IMX6SLL_CLK_PERIPH2_CLK2_SEL] = imx_clk_mux("periph2_clk2_sel", base + 0x18, 20, 1, periph2_clk2_sels, ARRAY_SIZE(periph2_clk2_sels));
+	clks[IMX6SLL_CLK_USDHC1_SEL]	  = imx_clk_mux("usdhc1_sel",   base + 0x1c, 16, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels));
+	clks[IMX6SLL_CLK_USDHC2_SEL]	  = imx_clk_mux("usdhc2_sel",   base + 0x1c, 17, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels));
+	clks[IMX6SLL_CLK_USDHC3_SEL]	  = imx_clk_mux("usdhc3_sel",   base + 0x1c, 18, 1, usdhc_sels, ARRAY_SIZE(usdhc_sels));
+	clks[IMX6SLL_CLK_SSI1_SEL]	  = imx_clk_mux("ssi1_sel",     base + 0x1c, 10, 2, ssi_sels, ARRAY_SIZE(ssi_sels));
+	clks[IMX6SLL_CLK_SSI2_SEL]	  = imx_clk_mux("ssi2_sel",     base + 0x1c, 12, 2, ssi_sels, ARRAY_SIZE(ssi_sels));
+	clks[IMX6SLL_CLK_SSI3_SEL]	  = imx_clk_mux("ssi3_sel",     base + 0x1c, 14, 2, ssi_sels, ARRAY_SIZE(ssi_sels));
+	clks[IMX6SLL_CLK_PERCLK_SEL] 	  = imx_clk_mux("perclk_sel",	base + 0x1c, 6,  1, perclk_sels, ARRAY_SIZE(perclk_sels));
+	clks[IMX6SLL_CLK_UART_SEL]	  = imx_clk_mux("uart_sel",	base + 0x24, 6,  1, uart_sels, ARRAY_SIZE(uart_sels));
+	clks[IMX6SLL_CLK_SPDIF_SEL]	  = imx_clk_mux("spdif_sel",	base + 0x30, 20, 2, spdif_sels, ARRAY_SIZE(spdif_sels));
+	clks[IMX6SLL_CLK_EXTERN_AUDIO_SEL] = imx_clk_mux("extern_audio_sel", base + 0x30, 7,  2, spdif_sels, ARRAY_SIZE(spdif_sels));
+	clks[IMX6SLL_CLK_EPDC_PRE_SEL]	  = imx_clk_mux("epdc_pre_sel",	base + 0x34, 15, 3, epdc_pre_sels, ARRAY_SIZE(epdc_pre_sels));
+	clks[IMX6SLL_CLK_EPDC_SEL]	  = imx_clk_mux("epdc_sel",	base + 0x34, 9, 3, epdc_sels, ARRAY_SIZE(epdc_sels));
+	clks[IMX6SLL_CLK_ECSPI_SEL]	  = imx_clk_mux("ecspi_sel",	base + 0x38, 18, 1, ecspi_sels, ARRAY_SIZE(ecspi_sels));
+	clks[IMX6SLL_CLK_LCDIF_PRE_SEL]	  = imx_clk_mux("lcdif_pre_sel", base + 0x38, 15, 3, lcdif_pre_sels, ARRAY_SIZE(lcdif_pre_sels));
+	clks[IMX6SLL_CLK_LCDIF_SEL]	  = imx_clk_mux("lcdif_sel", 	 base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels));
+
+	clks[IMX6SLL_CLK_PERIPH]  = imx_clk_busy_mux("periph",  base + 0x14, 25, 1, base + 0x48, 5, periph_sels, ARRAY_SIZE(periph_sels));
+	clks[IMX6SLL_CLK_PERIPH2] = imx_clk_busy_mux("periph2", base + 0x14, 26, 1, base + 0x48, 3, periph2_sels, ARRAY_SIZE(periph2_sels));
+
+	clks[IMX6SLL_CLK_PERIPH_CLK2]	= imx_clk_divider("periph_clk2",   "periph_clk2_sel",  	base + 0x14, 27, 3);
+	clks[IMX6SLL_CLK_PERIPH2_CLK2]	= imx_clk_divider("periph2_clk2",  "periph2_clk2_sel", 	base + 0x14, 0,  3);
+	clks[IMX6SLL_CLK_IPG]		= imx_clk_divider("ipg",	   "ahb",		base + 0x14, 8,	 2);
+	clks[IMX6SLL_CLK_LCDIF_PODF]	= imx_clk_divider("lcdif_podf",	   "lcdif_pred",	base + 0x18, 23, 3);
+	clks[IMX6SLL_CLK_PERCLK]	= imx_clk_divider("perclk",	   "perclk_sel",	base + 0x1c, 0,  6);
+	clks[IMX6SLL_CLK_USDHC3_PODF]   = imx_clk_divider("usdhc3_podf",   "usdhc3_sel",	base + 0x24, 19, 3);
+	clks[IMX6SLL_CLK_USDHC2_PODF]	= imx_clk_divider("usdhc2_podf",   "usdhc2_sel",	base + 0x24, 16, 3);
+	clks[IMX6SLL_CLK_USDHC1_PODF]	= imx_clk_divider("usdhc1_podf",   "usdhc1_sel",	base + 0x24, 11, 3);
+	clks[IMX6SLL_CLK_UART_PODF]	= imx_clk_divider("uart_podf",	   "uart_sel",		base + 0x24, 0,  6);
+	clks[IMX6SLL_CLK_SSI3_PRED]	= imx_clk_divider("ssi3_pred",	   "ssi3_sel",		base + 0x28, 22, 3);
+	clks[IMX6SLL_CLK_SSI3_PODF]	= imx_clk_divider("ssi3_podf",	   "ssi3_pred",		base + 0x28, 16, 6);
+	clks[IMX6SLL_CLK_SSI1_PRED]	= imx_clk_divider("ssi1_pred",	   "ssi1_sel",		base + 0x28, 6,	 3);
+	clks[IMX6SLL_CLK_SSI1_PODF]	= imx_clk_divider("ssi1_podf",	   "ssi1_pred",		base + 0x28, 0,	 6);
+	clks[IMX6SLL_CLK_SSI2_PRED]	= imx_clk_divider("ssi2_pred",	   "ssi2_sel",		base + 0x2c, 6,	 3);
+	clks[IMX6SLL_CLK_SSI2_PODF]	= imx_clk_divider("ssi2_podf",	   "ssi2_pred",		base + 0x2c, 0,  6);
+	clks[IMX6SLL_CLK_SPDIF_PRED]	= imx_clk_divider("spdif_pred",	   "spdif_sel",		base + 0x30, 25, 3);
+	clks[IMX6SLL_CLK_SPDIF_PODF]	= imx_clk_divider("spdif_podf",	   "spdif_pred",	base + 0x30, 22, 3);
+	clks[IMX6SLL_CLK_EXTERN_AUDIO_PRED] = imx_clk_divider("extern_audio_pred", "extern_audio_sel",  base + 0x30, 12, 3);
+	clks[IMX6SLL_CLK_EXTERN_AUDIO_PODF] = imx_clk_divider("extern_audio_podf", "extern_audio_pred", base + 0x30, 9,  3);
+	clks[IMX6SLL_CLK_EPDC_PODF]  = imx_clk_divider("epdc_podf",  "epdc_pre_sel",  base + 0x34, 12, 3);
+	clks[IMX6SLL_CLK_ECSPI_PODF] = imx_clk_divider("ecspi_podf", "ecspi_sel",     base + 0x38, 19, 6);
+	clks[IMX6SLL_CLK_LCDIF_PRED] = imx_clk_divider("lcdif_pred", "lcdif_pre_sel", base + 0x38, 12, 3);
+
+	clks[IMX6SLL_CLK_ARM]		= imx_clk_busy_divider("arm", 	    "pll1_sw",	base +	0x10, 0,  3,  base + 0x48, 16);
+	clks[IMX6SLL_CLK_MMDC_PODF]	= imx_clk_busy_divider("mmdc_podf", "periph2",	base +  0x14, 3,  3,  base + 0x48, 2);
+	clks[IMX6SLL_CLK_AXI_PODF]	= imx_clk_busy_divider("axi",       "axi_sel",	base +  0x14, 16, 3,  base + 0x48, 0);
+	clks[IMX6SLL_CLK_AHB]		= imx_clk_busy_divider("ahb",	    "periph",	base +  0x14, 10, 3,  base + 0x48, 1);
+
+	clks[IMX6SLL_CLK_LDB_DI0_DIV_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7);
+	clks[IMX6SLL_CLK_LDB_DI0_DIV_7]	  = imx_clk_fixed_factor("ldb_di0_div_7",   "ldb_di0_sel", 1, 7);
+	clks[IMX6SLL_CLK_LDB_DI1_DIV_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7);
+	clks[IMX6SLL_CLK_LDB_DI1_DIV_7]	  = imx_clk_fixed_factor("ldb_di1_div_7",   "ldb_di1_sel", 1, 7);
+
+	clks[IMX6SLL_CLK_LDB_DI0_SEL]	= imx_clk_mux("ldb_di0_sel", base + 0x2c, 9, 3, ldb_di0_sels, ARRAY_SIZE(ldb_di0_sels));
+	clks[IMX6SLL_CLK_LDB_DI1_SEL]   = imx_clk_mux("ldb_di1_sel", base + 0x1c, 7, 3, ldb_di1_sels, ARRAY_SIZE(ldb_di1_sels));
+	clks[IMX6SLL_CLK_LDB_DI0_DIV_SEL] = imx_clk_mux("ldb_di0_div_sel", base + 0x20, 10, 1, ldb_di0_div_sels, ARRAY_SIZE(ldb_di0_div_sels));
+	clks[IMX6SLL_CLK_LDB_DI1_DIV_SEL] = imx_clk_mux("ldb_di1_div_sel", base + 0x20, 10, 1, ldb_di1_div_sels, ARRAY_SIZE(ldb_di1_div_sels));
+
+	/* CCGR0 */
+	clks[IMX6SLL_CLK_AIPSTZ1]	= imx_clk_gate2_flags("aips_tz1", "ahb", base + 0x68, 0, CLK_IS_CRITICAL);
+	clks[IMX6SLL_CLK_AIPSTZ2]	= imx_clk_gate2_flags("aips_tz2", "ahb", base + 0x68, 2, CLK_IS_CRITICAL);
+	clks[IMX6SLL_CLK_DCP]		= imx_clk_gate2("dcp", "ahb", base + 0x68, 10);
+	clks[IMX6SLL_CLK_UART2_IPG]	= imx_clk_gate2("uart2_ipg", "ipg", base + 0x68, 28);
+	clks[IMX6SLL_CLK_UART2_SERIAL]	= imx_clk_gate2("uart2_serial",	"uart_podf", base + 0x68, 28);
+
+	/* CCGR1 */
+	clks[IMX6SLL_CLK_ECSPI1]	= imx_clk_gate2("ecspi1",	"ecspi_podf", base + 0x6c, 0);
+	clks[IMX6SLL_CLK_ECSPI2]	= imx_clk_gate2("ecspi2",	"ecspi_podf", base + 0x6c, 2);
+	clks[IMX6SLL_CLK_ECSPI3]	= imx_clk_gate2("ecspi3",	"ecspi_podf", base + 0x6c, 4);
+	clks[IMX6SLL_CLK_ECSPI4]	= imx_clk_gate2("ecspi4",	"ecspi_podf", base + 0x6c, 6);
+	clks[IMX6SLL_CLK_UART3_IPG]	= imx_clk_gate2("uart3_ipg",	"ipg", base + 0x6c, 10);
+	clks[IMX6SLL_CLK_UART3_SERIAL]	= imx_clk_gate2("uart3_serial",	"uart_podf", base + 0x6c, 10);
+	clks[IMX6SLL_CLK_EPIT1]		= imx_clk_gate2("epit1",	"perclk", base + 0x6c, 12);
+	clks[IMX6SLL_CLK_EPIT2]		= imx_clk_gate2("epit2",	"perclk", base + 0x6c, 14);
+	clks[IMX6SLL_CLK_GPT_BUS]	= imx_clk_gate2("gpt1_bus",	"perclk", base + 0x6c, 20);
+	clks[IMX6SLL_CLK_GPT_SERIAL]	= imx_clk_gate2("gpt1_serial",	"perclk", base + 0x6c, 22);
+	clks[IMX6SLL_CLK_UART4_IPG]	= imx_clk_gate2("uart4_ipg",	"ipg", base + 0x6c, 24);
+	clks[IMX6SLL_CLK_UART4_SERIAL]	= imx_clk_gate2("uart4_serail",	"uart_podf", base + 0x6c, 24);
+
+	/* CCGR2 */
+	clks[IMX6SLL_CLK_CSI]		= imx_clk_gate2("csi",		"axi",    base + 0x70,	2);
+	clks[IMX6SLL_CLK_I2C1]		= imx_clk_gate2("i2c1",		"perclk", base + 0x70,	6);
+	clks[IMX6SLL_CLK_I2C2]		= imx_clk_gate2("i2c2",		"perclk", base + 0x70,	8);
+	clks[IMX6SLL_CLK_I2C3]		= imx_clk_gate2("i2c3",		"perclk", base + 0x70,	10);
+	clks[IMX6SLL_CLK_OCOTP]		= imx_clk_gate2("ocotp",	"ipg",    base + 0x70,	12);
+	clks[IMX6SLL_CLK_LCDIF_APB]	= imx_clk_gate2("lcdif_apb",	"axi",    base + 0x70,	28);
+	clks[IMX6SLL_CLK_PXP]		= imx_clk_gate2("pxp",		"axi",    base + 0x70,	30);
+
+	/* CCGR3 */
+	clks[IMX6SLL_CLK_UART5_IPG]	= imx_clk_gate2("uart5_ipg",	"ipg",		base + 0x74, 2);
+	clks[IMX6SLL_CLK_UART5_SERIAL]	= imx_clk_gate2("uart5_serial",	"uart_podf",	base + 0x74, 2);
+	clks[IMX6SLL_CLK_EPDC_AXI]	= imx_clk_gate2("epdc_aclk",	"axi",		base + 0x74, 4);
+	clks[IMX6SLL_CLK_EPDC_PIX]	= imx_clk_gate2("epdc_pix",	"epdc_podf",	base + 0x74, 4);
+	clks[IMX6SLL_CLK_LCDIF_PIX]	= imx_clk_gate2("lcdif_pix",	"lcdif_podf",	base + 0x74, 10);
+	clks[IMX6SLL_CLK_WDOG1]		= imx_clk_gate2("wdog1",	"ipg",		base + 0x74, 16);
+	clks[IMX6SLL_CLK_MMDC_P0_FAST]	= imx_clk_gate_flags("mmdc_p0_fast", "mmdc_podf",  base + 0x74,	20, CLK_IS_CRITICAL);
+	clks[IMX6SLL_CLK_MMDC_P0_IPG]	= imx_clk_gate2_flags("mmdc_p0_ipg", "ipg",	   base + 0x74,	24, CLK_IS_CRITICAL);
+	clks[IMX6SLL_CLK_OCRAM]		= imx_clk_gate_flags("ocram","ahb",		   base + 0x74,	28, CLK_IS_CRITICAL);
+
+	/* CCGR4 */
+	clks[IMX6SLL_CLK_PWM1]		= imx_clk_gate2("pwm1", "perclk", base + 0x78, 16);
+	clks[IMX6SLL_CLK_PWM2]		= imx_clk_gate2("pwm2", "perclk", base + 0x78, 18);
+	clks[IMX6SLL_CLK_PWM3]		= imx_clk_gate2("pwm3", "perclk", base + 0x78, 20);
+	clks[IMX6SLL_CLK_PWM4]		= imx_clk_gate2("pwm4", "perclk", base + 0x78, 22);
+
+	/* CCGR5 */
+	clks[IMX6SLL_CLK_ROM]		= imx_clk_gate2_flags("rom", "ahb", base + 0x7c, 0, CLK_IS_CRITICAL);
+	clks[IMX6SLL_CLK_SDMA]		= imx_clk_gate2("sdma",	 "ahb",	base + 0x7c, 6);
+	clks[IMX6SLL_CLK_WDOG2]		= imx_clk_gate2("wdog2", "ipg",	base + 0x7c, 10);
+	clks[IMX6SLL_CLK_SPBA]		= imx_clk_gate2("spba",	 "ipg",	base + 0x7c, 12);
+	clks[IMX6SLL_CLK_EXTERN_AUDIO]	= imx_clk_gate2_shared("extern_audio",  "extern_audio_podf", base + 0x7c, 14, &share_count_audio);
+	clks[IMX6SLL_CLK_SPDIF]		= imx_clk_gate2_shared("spdif",		"spdif_podf",	base + 0x7c, 14, &share_count_audio);
+	clks[IMX6SLL_CLK_SPDIF_GCLK]	= imx_clk_gate2_shared("spdif_gclk",	"ipg",		base + 0x7c, 14, &share_count_audio);
+	clks[IMX6SLL_CLK_SSI1]		= imx_clk_gate2_shared("ssi1",		"ssi1_podf",	base + 0x7c, 18, &share_count_ssi1);
+	clks[IMX6SLL_CLK_SSI1_IPG]	= imx_clk_gate2_shared("ssi1_ipg",	"ipg",		base + 0x7c, 18, &share_count_ssi1);
+	clks[IMX6SLL_CLK_SSI2]		= imx_clk_gate2_shared("ssi2",		"ssi2_podf",	base + 0x7c, 20, &share_count_ssi2);
+	clks[IMX6SLL_CLK_SSI2_IPG]	= imx_clk_gate2_shared("ssi2_ipg",	"ipg",		base + 0x7c, 20, &share_count_ssi2);
+	clks[IMX6SLL_CLK_SSI3]		= imx_clk_gate2_shared("ssi3",		"ssi3_podf",	base + 0x7c, 22, &share_count_ssi3);
+	clks[IMX6SLL_CLK_SSI3_IPG]	= imx_clk_gate2_shared("ssi3_ipg",	"ipg",		base + 0x7c, 22, &share_count_ssi3);
+	clks[IMX6SLL_CLK_UART1_IPG]	= imx_clk_gate2("uart1_ipg",	"ipg",		base + 0x7c, 24);
+	clks[IMX6SLL_CLK_UART1_SERIAL]	= imx_clk_gate2("uart1_serial",	"uart_podf",	base + 0x7c, 24);
+
+	/* CCGR6 */
+	clks[IMX6SLL_CLK_USBOH3]	= imx_clk_gate2("usboh3", "ipg",	  base + 0x80,	0);
+	clks[IMX6SLL_CLK_USDHC1]	= imx_clk_gate2("usdhc1", "usdhc1_podf",  base + 0x80,	2);
+	clks[IMX6SLL_CLK_USDHC2]	= imx_clk_gate2("usdhc2", "usdhc2_podf",  base + 0x80,	4);
+	clks[IMX6SLL_CLK_USDHC3]	= imx_clk_gate2("usdhc3", "usdhc3_podf",  base + 0x80,	6);
+
+	/* mask handshake of mmdc */
+	writel_relaxed(BM_CCM_CCDR_MMDC_CH0_MASK, base + 0x4);
+
+	imx_check_clocks(clks, ARRAY_SIZE(clks));
+
+	clk_data.clks = clks;
+	clk_data.clk_num = ARRAY_SIZE(clks);
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+
+	/* Lower the AHB clock rate before changing the clock source. */
+	clk_set_rate(clks[IMX6SLL_CLK_AHB], 99000000);
+
+	/* Change periph_pre clock to pll2_bus to adjust AXI rate to 264MHz */
+	clk_set_parent(clks[IMX6SLL_CLK_PERIPH_CLK2_SEL], clks[IMX6SLL_CLK_PLL3_USB_OTG]);
+	clk_set_parent(clks[IMX6SLL_CLK_PERIPH], clks[IMX6SLL_CLK_PERIPH_CLK2]);
+	clk_set_parent(clks[IMX6SLL_CLK_PERIPH_PRE], clks[IMX6SLL_CLK_PLL2_BUS]);
+	clk_set_parent(clks[IMX6SLL_CLK_PERIPH], clks[IMX6SLL_CLK_PERIPH_PRE]);
+
+	clk_set_rate(clks[IMX6SLL_CLK_AHB], 132000000);
+}
+CLK_OF_DECLARE_DRIVER(imx6sll, "fsl,imx6sll-ccm", imx6sll_clocks_init);
diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c
index e6d389e333d7..bc3f9ebf2d9e 100644
--- a/drivers/clk/imx/clk-imx6sx.c
+++ b/drivers/clk/imx/clk-imx6sx.c
@@ -63,17 +63,17 @@ static const char *lcdif2_sels[]	= { "lcdif2_podf", "ipp_di0", "ipp_di1", "ldb_d
 static const char *display_sels[]	= { "pll2_bus", "pll2_pfd2_396m", "pll3_usb_otg", "pll3_pfd1_540m", };
 static const char *csi_sels[]		= { "osc", "pll2_pfd2_396m", "pll3_120m", "pll3_pfd1_540m", };
 static const char *cko1_sels[]		= {
-	"pll3_usb_otg", "pll2_bus", "pll1_sys", "pll5_video_div",
-	"dummy", "ocram", "dummy", "pxp_axi", "epdc_axi", "lcdif_pix",
-	"epdc_pix", "ahb", "ipg", "perclk", "ckil", "pll4_audio_div",
+	"dummy", "dummy", "dummy", "dummy",
+	"vadc", "ocram", "qspi2", "m4", "enet_ahb", "lcdif2_pix",
+	"lcdif1_pix", "ahb", "ipg", "perclk", "ckil", "pll4_audio_div",
 };
 static const char *cko2_sels[]		= {
 	"dummy", "mmdc_p0_fast", "usdhc4", "usdhc1", "dummy", "wrck",
 	"ecspi_root", "dummy", "usdhc3", "pcie", "arm", "csi_core",
-	"lcdif_axi", "dummy", "osc", "dummy", "gpu2d_ovg_core",
-	"usdhc2", "ssi1", "ssi2", "ssi3", "gpu2d_core", "dummy",
-	"dummy", "dummy", "dummy", "esai_extal", "eim_slow", "uart_serial",
-	"spdif", "asrc", "dummy",
+	"display_axi", "dummy", "osc", "dummy", "dummy",
+	"usdhc2", "ssi1", "ssi2", "ssi3", "gpu_axi_podf", "dummy",
+	"can_podf", "lvds1_out", "qspi1", "esai_extal", "eim_slow",
+	"uart_serial", "spdif", "audio", "dummy",
 };
 static const char *cko_sels[] = { "cko1", "cko2", };
 static const char *lvds_sels[]	= {
diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c
index 85c118164469..114ecbb94ec5 100644
--- a/drivers/clk/imx/clk-imx6ul.c
+++ b/drivers/clk/imx/clk-imx6ul.c
@@ -308,7 +308,10 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node)
 	clks[IMX6UL_CLK_SAI2_PODF]	= imx_clk_divider("sai2_podf",	   "sai2_pred",		base + 0x2c, 0,  6);
 	clks[IMX6UL_CLK_SPDIF_PRED]	= imx_clk_divider("spdif_pred",	   "spdif_sel",		base + 0x30, 25, 3);
 	clks[IMX6UL_CLK_SPDIF_PODF]	= imx_clk_divider("spdif_podf",	   "spdif_pred",	base + 0x30, 22, 3);
-	clks[IMX6UL_CLK_SIM_PODF]	= imx_clk_divider("sim_podf",	   "sim_pre_sel",	base + 0x34, 12, 3);
+	if (clk_on_imx6ul())
+		clks[IMX6UL_CLK_SIM_PODF]	= imx_clk_divider("sim_podf",	   "sim_pre_sel",	base + 0x34, 12, 3);
+	else if (clk_on_imx6ull())
+		clks[IMX6ULL_CLK_EPDC_PODF]	= imx_clk_divider("epdc_podf",	   "epdc_pre_sel",	base + 0x34, 12, 3);
 	clks[IMX6UL_CLK_ECSPI_PODF]	= imx_clk_divider("ecspi_podf",	   "ecspi_sel",		base + 0x38, 19, 6);
 	clks[IMX6UL_CLK_LCDIF_PRED]	= imx_clk_divider("lcdif_pred",	   "lcdif_pre_sel",	base + 0x38, 12, 3);
 	clks[IMX6UL_CLK_CSI_PODF]       = imx_clk_divider("csi_podf",      "csi_sel",           base + 0x3c, 11, 3);
diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index 617beb234259..975a20d3cc94 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -51,20 +51,20 @@ static const char *arm_a7_sel[] = { "osc", "pll_arm_main_clk",
 
 static const char *arm_m4_sel[] = { "osc", "pll_sys_main_240m_clk",
 	"pll_enet_250m_clk", "pll_sys_pfd2_270m_clk",
-	"pll_dram_533m_clk", "pll_audio_post_div", "pll_video_main_clk",
+	"pll_dram_533m_clk", "pll_audio_post_div", "pll_video_post_div",
 	"pll_usb_main_clk", };
 
 static const char *axi_sel[] = { "osc", "pll_sys_pfd1_332m_clk",
 	"pll_dram_533m_clk", "pll_enet_250m_clk", "pll_sys_pfd5_clk",
-	"pll_audio_post_div", "pll_video_main_clk", "pll_sys_pfd7_clk", };
+	"pll_audio_post_div", "pll_video_post_div", "pll_sys_pfd7_clk", };
 
 static const char *disp_axi_sel[] = { "osc", "pll_sys_pfd1_332m_clk",
 	"pll_dram_533m_clk", "pll_enet_250m_clk", "pll_sys_pfd6_clk",
-	"pll_sys_pfd7_clk", "pll_audio_post_div", "pll_video_main_clk", };
+	"pll_sys_pfd7_clk", "pll_audio_post_div", "pll_video_post_div", };
 
 static const char *enet_axi_sel[] = { "osc", "pll_sys_pfd2_270m_clk",
 	"pll_dram_533m_clk", "pll_enet_250m_clk",
-	"pll_sys_main_240m_clk", "pll_audio_post_div", "pll_video_main_clk",
+	"pll_sys_main_240m_clk", "pll_audio_post_div", "pll_video_post_div",
 	"pll_sys_pfd4_clk", };
 
 static const char *nand_usdhc_bus_sel[] = { "osc", "pll_sys_pfd2_270m_clk",
@@ -74,8 +74,8 @@ static const char *nand_usdhc_bus_sel[] = { "osc", "pll_sys_pfd2_270m_clk",
 
 static const char *ahb_channel_sel[] = { "osc", "pll_sys_pfd2_270m_clk",
 	"pll_dram_533m_clk", "pll_sys_pfd0_392m_clk",
-	"pll_enet_125m_clk", "pll_usb_main_clk", "pll_audio_post_div",
-	"pll_video_main_clk", };
+	"pll_enet_250m_clk", "pll_usb_main_clk", "pll_audio_post_div",
+	"pll_video_post_div", };
 
 static const char *dram_phym_sel[] = { "pll_dram_main_clk",
 	"dram_phym_alt_clk", };
@@ -86,7 +86,7 @@ static const char *dram_sel[] = { "pll_dram_main_clk",
 static const char *dram_phym_alt_sel[] = { "osc", "pll_dram_533m_clk",
 	"pll_sys_main_clk", "pll_enet_500m_clk",
 	"pll_usb_main_clk", "pll_sys_pfd7_clk", "pll_audio_post_div",
-	"pll_video_main_clk", };
+	"pll_video_post_div", };
 
 static const char *dram_alt_sel[] = { "osc", "pll_dram_533m_clk",
 	"pll_sys_main_clk", "pll_enet_500m_clk",
@@ -108,62 +108,62 @@ static const char *pcie_phy_sel[] = { "osc", "pll_enet_100m_clk",
 
 static const char *epdc_pixel_sel[] = { "osc", "pll_sys_pfd1_332m_clk",
 	"pll_dram_533m_clk", "pll_sys_main_clk", "pll_sys_pfd5_clk",
-	"pll_sys_pfd6_clk", "pll_sys_pfd7_clk", "pll_video_main_clk", };
+	"pll_sys_pfd6_clk", "pll_sys_pfd7_clk", "pll_video_post_div", };
 
 static const char *lcdif_pixel_sel[] = { "osc", "pll_sys_pfd5_clk",
 	"pll_dram_533m_clk", "ext_clk_3", "pll_sys_pfd4_clk",
-	"pll_sys_pfd2_270m_clk", "pll_video_main_clk",
+	"pll_sys_pfd2_270m_clk", "pll_video_post_div",
 	"pll_usb_main_clk", };
 
 static const char *mipi_dsi_sel[] = { "osc", "pll_sys_pfd5_clk",
 	"pll_sys_pfd3_clk", "pll_sys_main_clk", "pll_sys_pfd0_196m_clk",
-	"pll_dram_533m_clk", "pll_video_main_clk", "pll_audio_post_div", };
+	"pll_dram_533m_clk", "pll_video_post_div", "pll_audio_post_div", };
 
 static const char *mipi_csi_sel[] = { "osc", "pll_sys_pfd4_clk",
 	"pll_sys_pfd3_clk", "pll_sys_main_clk", "pll_sys_pfd0_196m_clk",
-	"pll_dram_533m_clk", "pll_video_main_clk", "pll_audio_post_div", };
+	"pll_dram_533m_clk", "pll_video_post_div", "pll_audio_post_div", };
 
 static const char *mipi_dphy_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_dram_533m_clk", "pll_sys_pfd5_clk", "ref_1m_clk", "ext_clk_2",
-	"pll_video_main_clk", "ext_clk_3", };
+	"pll_video_post_div", "ext_clk_3", };
 
 static const char *sai1_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_post_div",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_clk_2", };
 
 static const char *sai2_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_post_div",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_clk_2", };
 
 static const char *sai3_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_post_div",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_clk_3", };
 
 static const char *spdif_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_post_div",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_3_clk", };
 
 static const char *enet1_ref_sel[] = { "osc", "pll_enet_125m_clk",
 	"pll_enet_50m_clk", "pll_enet_25m_clk",
-	"pll_sys_main_120m_clk", "pll_audio_post_div", "pll_video_main_clk",
+	"pll_sys_main_120m_clk", "pll_audio_post_div", "pll_video_post_div",
 	"ext_clk_4", };
 
 static const char *enet1_time_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_audio_post_div", "ext_clk_1", "ext_clk_2", "ext_clk_3",
-	"ext_clk_4", "pll_video_main_clk", };
+	"ext_clk_4", "pll_video_post_div", };
 
 static const char *enet2_ref_sel[] = { "osc", "pll_enet_125m_clk",
 	"pll_enet_50m_clk", "pll_enet_25m_clk",
-	"pll_sys_main_120m_clk", "pll_audio_post_div", "pll_video_main_clk",
+	"pll_sys_main_120m_clk", "pll_audio_post_div", "pll_video_post_div",
 	"ext_clk_4", };
 
 static const char *enet2_time_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_audio_post_div", "ext_clk_1", "ext_clk_2", "ext_clk_3",
-	"ext_clk_4", "pll_video_main_clk", };
+	"ext_clk_4", "pll_video_post_div", };
 
 static const char *enet_phy_ref_sel[] = { "osc", "pll_enet_25m_clk",
 	"pll_enet_50m_clk", "pll_enet_125m_clk",
-	"pll_dram_533m_clk", "pll_audio_post_div", "pll_video_main_clk",
+	"pll_dram_533m_clk", "pll_audio_post_div", "pll_video_post_div",
 	"pll_sys_pfd3_clk", };
 
 static const char *eim_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
@@ -174,7 +174,7 @@ static const char *eim_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 static const char *nand_sel[] = { "osc", "pll_sys_main_clk",
 	"pll_dram_533m_clk", "pll_sys_pfd0_392m_clk", "pll_sys_pfd3_clk",
 	"pll_enet_500m_clk", "pll_enet_250m_clk",
-	"pll_video_main_clk", };
+	"pll_video_post_div", };
 
 static const char *qspi_sel[] = { "osc", "pll_sys_pfd4_clk",
 	"pll_dram_533m_clk", "pll_enet_500m_clk", "pll_sys_pfd3_clk",
@@ -204,22 +204,22 @@ static const char *can2_sel[] = { "osc", "pll_sys_main_120m_clk",
 
 static const char *i2c1_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_post_div", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *i2c2_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_post_div", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *i2c3_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_post_div", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *i2c4_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_post_div", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *uart1_sel[] = { "osc", "pll_sys_main_240m_clk",
@@ -279,27 +279,27 @@ static const char *ecspi4_sel[] = { "osc", "pll_sys_main_240m_clk",
 
 static const char *pwm1_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
-	"ext_clk_1", "ref_1m_clk", "pll_video_main_clk", };
+	"ext_clk_1", "ref_1m_clk", "pll_video_post_div", };
 
 static const char *pwm2_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
-	"ext_clk_1", "ref_1m_clk", "pll_video_main_clk", };
+	"ext_clk_1", "ref_1m_clk", "pll_video_post_div", };
 
 static const char *pwm3_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
-	"ext_clk_2", "ref_1m_clk", "pll_video_main_clk", };
+	"ext_clk_2", "ref_1m_clk", "pll_video_post_div", };
 
 static const char *pwm4_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
-	"ext_clk_2", "ref_1m_clk", "pll_video_main_clk", };
+	"ext_clk_2", "ref_1m_clk", "pll_video_post_div", };
 
 static const char *flextimer1_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
-	"ext_clk_3", "ref_1m_clk", "pll_video_main_clk", };
+	"ext_clk_3", "ref_1m_clk", "pll_video_post_div", };
 
 static const char *flextimer2_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
-	"ext_clk_3", "ref_1m_clk", "pll_video_main_clk", };
+	"ext_clk_3", "ref_1m_clk", "pll_video_post_div", };
 
 static const char *sim1_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
@@ -308,23 +308,23 @@ static const char *sim1_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 
 static const char *sim2_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
-	"pll_usb_main_clk", "pll_video_main_clk", "pll_enet_125m_clk",
+	"pll_usb_main_clk", "pll_video_post_div", "pll_enet_125m_clk",
 	"pll_sys_pfd7_clk", };
 
 static const char *gpt1_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
+	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_post_div",
 	"ref_1m_clk", "pll_audio_post_div", "ext_clk_1", };
 
 static const char *gpt2_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
+	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_post_div",
 	"ref_1m_clk", "pll_audio_post_div", "ext_clk_2", };
 
 static const char *gpt3_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
+	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_post_div",
 	"ref_1m_clk", "pll_audio_post_div", "ext_clk_3", };
 
 static const char *gpt4_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
+	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_post_div",
 	"ref_1m_clk", "pll_audio_post_div", "ext_clk_4", };
 
 static const char *trace_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
@@ -339,12 +339,12 @@ static const char *wdog_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 
 static const char *csi_mclk_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
-	"pll_enet_125m_clk", "pll_audio_post_div", "pll_video_main_clk",
+	"pll_enet_125m_clk", "pll_audio_post_div", "pll_video_post_div",
 	"pll_usb_main_clk", };
 
 static const char *audio_mclk_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
-	"pll_enet_125m_clk", "pll_audio_post_div", "pll_video_main_clk",
+	"pll_enet_125m_clk", "pll_audio_post_div", "pll_video_post_div",
 	"pll_usb_main_clk", };
 
 static const char *wrclk_sel[] = { "osc", "pll_enet_40m_clk",
@@ -358,13 +358,13 @@ static const char *clko1_sel[] = { "osc", "pll_sys_main_clk",
 
 static const char *clko2_sel[] = { "osc", "pll_sys_main_240m_clk",
 	"pll_sys_pfd0_392m_clk", "pll_sys_pfd1_166m_clk", "pll_sys_pfd4_clk",
-	"pll_audio_post_div", "pll_video_main_clk", "ckil", };
+	"pll_audio_post_div", "pll_video_post_div", "ckil", };
 
 static const char *lvds1_sel[] = { "pll_arm_main_clk",
 	"pll_sys_main_clk", "pll_sys_pfd0_392m_clk", "pll_sys_pfd1_332m_clk",
 	"pll_sys_pfd2_270m_clk", "pll_sys_pfd3_clk", "pll_sys_pfd4_clk",
 	"pll_sys_pfd5_clk", "pll_sys_pfd6_clk", "pll_sys_pfd7_clk",
-	"pll_audio_post_div", "pll_video_main_clk", "pll_enet_500m_clk",
+	"pll_audio_post_div", "pll_video_post_div", "pll_enet_500m_clk",
 	"pll_enet_250m_clk", "pll_enet_125m_clk", "pll_enet_100m_clk",
 	"pll_enet_50m_clk", "pll_enet_40m_clk", "pll_enet_25m_clk",
 	"pll_dram_main_clk", };
@@ -433,23 +433,22 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_PLL_AUDIO_MAIN_BYPASS] = imx_clk_mux_flags("pll_audio_main_bypass", base + 0xf0, 16, 1, pll_audio_bypass_sel, ARRAY_SIZE(pll_audio_bypass_sel), CLK_SET_RATE_PARENT);
 	clks[IMX7D_PLL_VIDEO_MAIN_BYPASS] = imx_clk_mux_flags("pll_video_main_bypass", base + 0x130, 16, 1, pll_video_bypass_sel, ARRAY_SIZE(pll_video_bypass_sel), CLK_SET_RATE_PARENT);
 
-	clk_set_parent(clks[IMX7D_PLL_ARM_MAIN_BYPASS], clks[IMX7D_PLL_ARM_MAIN]);
-	clk_set_parent(clks[IMX7D_PLL_DRAM_MAIN_BYPASS], clks[IMX7D_PLL_DRAM_MAIN]);
-	clk_set_parent(clks[IMX7D_PLL_SYS_MAIN_BYPASS], clks[IMX7D_PLL_SYS_MAIN]);
-	clk_set_parent(clks[IMX7D_PLL_ENET_MAIN_BYPASS], clks[IMX7D_PLL_ENET_MAIN]);
-	clk_set_parent(clks[IMX7D_PLL_AUDIO_MAIN_BYPASS], clks[IMX7D_PLL_AUDIO_MAIN]);
-	clk_set_parent(clks[IMX7D_PLL_VIDEO_MAIN_BYPASS], clks[IMX7D_PLL_VIDEO_MAIN]);
-
 	clks[IMX7D_PLL_ARM_MAIN_CLK] = imx_clk_gate("pll_arm_main_clk", "pll_arm_main_bypass", base + 0x60, 13);
-	clks[IMX7D_PLL_DRAM_MAIN_CLK] = imx_clk_gate("pll_dram_main_clk", "pll_dram_main_bypass", base + 0x70, 13);
+	clks[IMX7D_PLL_DRAM_MAIN_CLK] = imx_clk_gate("pll_dram_main_clk", "pll_dram_test_div", base + 0x70, 13);
 	clks[IMX7D_PLL_SYS_MAIN_CLK] = imx_clk_gate("pll_sys_main_clk", "pll_sys_main_bypass", base + 0xb0, 13);
 	clks[IMX7D_PLL_AUDIO_MAIN_CLK] = imx_clk_gate("pll_audio_main_clk", "pll_audio_main_bypass", base + 0xf0, 13);
 	clks[IMX7D_PLL_VIDEO_MAIN_CLK] = imx_clk_gate("pll_video_main_clk", "pll_video_main_bypass", base + 0x130, 13);
 
+	clks[IMX7D_PLL_DRAM_TEST_DIV]  = clk_register_divider_table(NULL, "pll_dram_test_div", "pll_dram_main_bypass",
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x70, 21, 2, 0, test_div_table, &imx_ccm_lock);
 	clks[IMX7D_PLL_AUDIO_TEST_DIV]  = clk_register_divider_table(NULL, "pll_audio_test_div", "pll_audio_main_clk",
 				CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0xf0, 19, 2, 0, test_div_table, &imx_ccm_lock);
 	clks[IMX7D_PLL_AUDIO_POST_DIV] = clk_register_divider_table(NULL, "pll_audio_post_div", "pll_audio_test_div",
 				CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0xf0, 22, 2, 0, post_div_table, &imx_ccm_lock);
+	clks[IMX7D_PLL_VIDEO_TEST_DIV]  = clk_register_divider_table(NULL, "pll_video_test_div", "pll_video_main_clk",
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x130, 19, 2, 0, test_div_table, &imx_ccm_lock);
+	clks[IMX7D_PLL_VIDEO_POST_DIV] = clk_register_divider_table(NULL, "pll_video_post_div", "pll_video_test_div",
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0x130, 22, 2, 0, post_div_table, &imx_ccm_lock);
 
 	clks[IMX7D_PLL_SYS_PFD0_392M_CLK] = imx_clk_pfd("pll_sys_pfd0_392m_clk", "pll_sys_main_clk", base + 0xc0, 0);
 	clks[IMX7D_PLL_SYS_PFD1_332M_CLK] = imx_clk_pfd("pll_sys_pfd1_332m_clk", "pll_sys_main_clk", base + 0xc0, 1);
@@ -797,7 +796,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_OCOTP_CLK] = imx_clk_gate4("ocotp_clk", "ipg_root_clk", base + 0x4230, 0);
 	clks[IMX7D_SNVS_CLK] = imx_clk_gate4("snvs_clk", "ipg_root_clk", base + 0x4250, 0);
 	clks[IMX7D_CAAM_CLK] = imx_clk_gate4("caam_clk", "ipg_root_clk", base + 0x4240, 0);
-	clks[IMX7D_USB_HSIC_ROOT_CLK] = imx_clk_gate4("usb_hsic_root_clk", "usb_hsic_post_div", base + 0x4420, 0);
+	clks[IMX7D_USB_HSIC_ROOT_CLK] = imx_clk_gate4("usb_hsic_root_clk", "usb_hsic_post_div", base + 0x4690, 0);
 	clks[IMX7D_SDMA_CORE_CLK] = imx_clk_gate4("sdma_root_clk", "ahb_root_clk", base + 0x4480, 0);
 	clks[IMX7D_PCIE_CTRL_ROOT_CLK] = imx_clk_gate4("pcie_ctrl_root_clk", "pcie_ctrl_post_div", base + 0x4600, 0);
 	clks[IMX7D_PCIE_PHY_ROOT_CLK] = imx_clk_gate4("pcie_phy_root_clk", "pcie_phy_post_div", base + 0x4600, 0);
@@ -863,6 +862,9 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	clks[IMX7D_CSI_MCLK_ROOT_CLK] = imx_clk_gate4("csi_mclk_root_clk", "csi_mclk_post_div", base + 0x4490, 0);
 	clks[IMX7D_AUDIO_MCLK_ROOT_CLK] = imx_clk_gate4("audio_mclk_root_clk", "audio_mclk_post_div", base + 0x4790, 0);
 	clks[IMX7D_WRCLK_ROOT_CLK] = imx_clk_gate4("wrclk_root_clk", "wrclk_post_div", base + 0x47a0, 0);
+	clks[IMX7D_USB_CTRL_CLK] = imx_clk_gate4("usb_ctrl_clk", "ahb_root_clk", base + 0x4680, 0);
+	clks[IMX7D_USB_PHY1_CLK] = imx_clk_gate4("usb_phy1_clk", "pll_usb1_main_clk", base + 0x46a0, 0);
+	clks[IMX7D_USB_PHY2_CLK] = imx_clk_gate4("usb_phy2_clk", "pll_usb_main_clk", base + 0x46b0, 0);
 	clks[IMX7D_ADC_ROOT_CLK] = imx_clk_gate4("adc_root_clk", "ipg_root_clk", base + 0x4200, 0);
 
 	clks[IMX7D_GPT_3M_CLK] = imx_clk_fixed_factor("gpt_3m", "osc", 1, 8);
@@ -882,12 +884,23 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node)
 	for (i = 0; i < ARRAY_SIZE(clks_init_on); i++)
 		clk_prepare_enable(clks[clks_init_on[i]]);
 
+	clk_set_parent(clks[IMX7D_PLL_ARM_MAIN_BYPASS], clks[IMX7D_PLL_ARM_MAIN]);
+	clk_set_parent(clks[IMX7D_PLL_DRAM_MAIN_BYPASS], clks[IMX7D_PLL_DRAM_MAIN]);
+	clk_set_parent(clks[IMX7D_PLL_SYS_MAIN_BYPASS], clks[IMX7D_PLL_SYS_MAIN]);
+	clk_set_parent(clks[IMX7D_PLL_ENET_MAIN_BYPASS], clks[IMX7D_PLL_ENET_MAIN]);
+	clk_set_parent(clks[IMX7D_PLL_AUDIO_MAIN_BYPASS], clks[IMX7D_PLL_AUDIO_MAIN]);
+	clk_set_parent(clks[IMX7D_PLL_VIDEO_MAIN_BYPASS], clks[IMX7D_PLL_VIDEO_MAIN]);
+
 	/* use old gpt clk setting, gpt1 root clk must be twice as gpt counter freq */
 	clk_set_parent(clks[IMX7D_GPT1_ROOT_SRC], clks[IMX7D_OSC_24M_CLK]);
 
 	/* set uart module clock's parent clock source that must be great then 80MHz */
 	clk_set_parent(clks[IMX7D_UART1_ROOT_SRC], clks[IMX7D_OSC_24M_CLK]);
 
+	/* Set clock rate for USBPHY, the USB_PLL at CCM is from USBOTG2 */
+	clks[IMX7D_USB1_MAIN_480M_CLK] = imx_clk_fixed_factor("pll_usb1_main_clk", "osc", 20, 1);
+	clks[IMX7D_USB_MAIN_480M_CLK] = imx_clk_fixed_factor("pll_usb_main_clk", "osc", 20, 1);
+
 	imx_register_uart_clocks(uart_clks);
 
 }
diff --git a/drivers/clk/imx/clk-pllv2.c b/drivers/clk/imx/clk-pllv2.c
index 85b5cbe9744c..eeba3cb14e2d 100644
--- a/drivers/clk/imx/clk-pllv2.c
+++ b/drivers/clk/imx/clk-pllv2.c
@@ -182,8 +182,12 @@ static long clk_pllv2_round_rate(struct clk_hw *hw, unsigned long rate,
 		unsigned long *prate)
 {
 	u32 dp_op, dp_mfd, dp_mfn;
+	int ret;
+
+	ret = __clk_pllv2_set_rate(rate, *prate, &dp_op, &dp_mfd, &dp_mfn);
+	if (ret)
+		return ret;
 
-	__clk_pllv2_set_rate(rate, *prate, &dp_op, &dp_mfd, &dp_mfn);
 	return __clk_pllv2_recalc_rate(*prate, MXC_PLL_DP_CTL_DPDCK0_2_EN,
 			dp_op, dp_mfd, dp_mfn);
 }
diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h
index d69c4bbf3597..8076ec040f37 100644
--- a/drivers/clk/imx/clk.h
+++ b/drivers/clk/imx/clk.h
@@ -123,6 +123,13 @@ static inline struct clk *imx_clk_gate(const char *name, const char *parent,
 			shift, 0, &imx_ccm_lock);
 }
 
+static inline struct clk *imx_clk_gate_flags(const char *name, const char *parent,
+		void __iomem *reg, u8 shift, unsigned long flags)
+{
+	return clk_register_gate(NULL, name, parent, flags | CLK_SET_RATE_PARENT, reg,
+			shift, 0, &imx_ccm_lock);
+}
+
 static inline struct clk *imx_clk_gate_dis(const char *name, const char *parent,
 		void __iomem *reg, u8 shift)
 {
@@ -137,6 +144,13 @@ static inline struct clk *imx_clk_gate2(const char *name, const char *parent,
 			shift, 0x3, 0, &imx_ccm_lock, NULL);
 }
 
+static inline struct clk *imx_clk_gate2_flags(const char *name, const char *parent,
+		void __iomem *reg, u8 shift, unsigned long flags)
+{
+	return clk_register_gate2(NULL, name, parent, flags | CLK_SET_RATE_PARENT, reg,
+			shift, 0x3, 0, &imx_ccm_lock, NULL);
+}
+
 static inline struct clk *imx_clk_gate2_shared(const char *name,
 		const char *parent, void __iomem *reg, u8 shift,
 		unsigned int *share_count)
diff --git a/drivers/clk/keystone/sci-clk.c b/drivers/clk/keystone/sci-clk.c
index 9cdf9d5050ac..4cb70bed89a9 100644
--- a/drivers/clk/keystone/sci-clk.c
+++ b/drivers/clk/keystone/sci-clk.c
@@ -29,21 +29,10 @@
 #define SCI_CLK_INPUT_TERMINATION	BIT(2)
 
 /**
- * struct sci_clk_data - TI SCI clock data
- * @dev: device index
- * @num_clks: number of clocks for this device
- */
-struct sci_clk_data {
-	u16 dev;
-	u16 num_clks;
-};
-
-/**
  * struct sci_clk_provider - TI SCI clock provider representation
  * @sci: Handle to the System Control Interface protocol handler
  * @ops: Pointer to the SCI ops to be used by the clocks
  * @dev: Device pointer for the clock provider
- * @clk_data: Clock data
  * @clocks: Clocks array for this device
  * @num_clocks: Total number of clocks for this provider
  */
@@ -51,8 +40,7 @@ struct sci_clk_provider {
 	const struct ti_sci_handle *sci;
 	const struct ti_sci_clk_ops *ops;
 	struct device *dev;
-	const struct sci_clk_data *clk_data;
-	struct clk_hw **clocks;
+	struct sci_clk **clocks;
 	int num_clocks;
 };
 
@@ -61,6 +49,7 @@ struct sci_clk_provider {
  * @hw:		 Hardware clock cookie for common clock framework
  * @dev_id:	 Device index
  * @clk_id:	 Clock index
+ * @num_parents: Number of parents for this clock
  * @provider:	 Master clock provider
  * @flags:	 Flags for the clock
  */
@@ -68,6 +57,7 @@ struct sci_clk {
 	struct clk_hw hw;
 	u16 dev_id;
 	u8 clk_id;
+	u8 num_parents;
 	struct sci_clk_provider *provider;
 	u8 flags;
 };
@@ -273,38 +263,22 @@ static const struct clk_ops sci_clk_ops = {
 /**
  * _sci_clk_get - Gets a handle for an SCI clock
  * @provider: Handle to SCI clock provider
- * @dev_id: device ID for the clock to register
- * @clk_id: clock ID for the clock to register
+ * @sci_clk: Handle to the SCI clock to populate
  *
  * Gets a handle to an existing TI SCI hw clock, or builds a new clock
  * entry and registers it with the common clock framework. Called from
  * the common clock framework, when a corresponding of_clk_get call is
  * executed, or recursively from itself when parsing parent clocks.
- * Returns a pointer to the hw clock struct, or ERR_PTR value in failure.
+ * Returns 0 on success, negative error code on failure.
  */
-static struct clk_hw *_sci_clk_build(struct sci_clk_provider *provider,
-				     u16 dev_id, u8 clk_id)
+static int _sci_clk_build(struct sci_clk_provider *provider,
+			  struct sci_clk *sci_clk)
 {
 	struct clk_init_data init = { NULL };
-	struct sci_clk *sci_clk = NULL;
 	char *name = NULL;
 	char **parent_names = NULL;
 	int i;
-	int ret;
-
-	sci_clk = devm_kzalloc(provider->dev, sizeof(*sci_clk), GFP_KERNEL);
-	if (!sci_clk)
-		return ERR_PTR(-ENOMEM);
-
-	sci_clk->dev_id = dev_id;
-	sci_clk->clk_id = clk_id;
-	sci_clk->provider = provider;
-
-	ret = provider->ops->get_num_parents(provider->sci, dev_id,
-					     clk_id,
-					     &init.num_parents);
-	if (ret)
-		goto err;
+	int ret = 0;
 
 	name = kasprintf(GFP_KERNEL, "%s:%d:%d", dev_name(provider->dev),
 			 sci_clk->dev_id, sci_clk->clk_id);
@@ -317,11 +291,11 @@ static struct clk_hw *_sci_clk_build(struct sci_clk_provider *provider,
 	 * to have mux functionality. Otherwise it is going to act as a root
 	 * clock.
 	 */
-	if (init.num_parents < 2)
-		init.num_parents = 0;
+	if (sci_clk->num_parents < 2)
+		sci_clk->num_parents = 0;
 
-	if (init.num_parents) {
-		parent_names = kcalloc(init.num_parents, sizeof(char *),
+	if (sci_clk->num_parents) {
+		parent_names = kcalloc(sci_clk->num_parents, sizeof(char *),
 				       GFP_KERNEL);
 
 		if (!parent_names) {
@@ -329,7 +303,7 @@ static struct clk_hw *_sci_clk_build(struct sci_clk_provider *provider,
 			goto err;
 		}
 
-		for (i = 0; i < init.num_parents; i++) {
+		for (i = 0; i < sci_clk->num_parents; i++) {
 			char *parent_name;
 
 			parent_name = kasprintf(GFP_KERNEL, "%s:%d:%d",
@@ -346,6 +320,7 @@ static struct clk_hw *_sci_clk_build(struct sci_clk_provider *provider,
 	}
 
 	init.ops = &sci_clk_ops;
+	init.num_parents = sci_clk->num_parents;
 	sci_clk->hw.init = &init;
 
 	ret = devm_clk_hw_register(provider->dev, &sci_clk->hw);
@@ -354,7 +329,7 @@ static struct clk_hw *_sci_clk_build(struct sci_clk_provider *provider,
 
 err:
 	if (parent_names) {
-		for (i = 0; i < init.num_parents; i++)
+		for (i = 0; i < sci_clk->num_parents; i++)
 			kfree(parent_names[i]);
 
 		kfree(parent_names);
@@ -362,10 +337,7 @@ err:
 
 	kfree(name);
 
-	if (ret)
-		return ERR_PTR(ret);
-
-	return &sci_clk->hw;
+	return ret;
 }
 
 static int _cmp_sci_clk(const void *a, const void *b)
@@ -414,253 +386,20 @@ static struct clk_hw *sci_clk_get(struct of_phandle_args *clkspec, void *data)
 
 static int ti_sci_init_clocks(struct sci_clk_provider *p)
 {
-	const struct sci_clk_data *data = p->clk_data;
-	struct clk_hw *hw;
 	int i;
-	int num_clks = 0;
-
-	while (data->num_clks) {
-		num_clks += data->num_clks;
-		data++;
-	}
-
-	p->num_clocks = num_clks;
-
-	p->clocks = devm_kcalloc(p->dev, num_clks, sizeof(struct sci_clk),
-				 GFP_KERNEL);
-	if (!p->clocks)
-		return -ENOMEM;
-
-	num_clks = 0;
-
-	data = p->clk_data;
-
-	while (data->num_clks) {
-		for (i = 0; i < data->num_clks; i++) {
-			hw = _sci_clk_build(p, data->dev, i);
-			if (!IS_ERR(hw)) {
-				p->clocks[num_clks++] = hw;
-				continue;
-			}
-
-			/* Skip any holes in the clock lists */
-			if (PTR_ERR(hw) == -ENODEV)
-				continue;
+	int ret;
 
-			return PTR_ERR(hw);
-		}
-		data++;
+	for (i = 0; i < p->num_clocks; i++) {
+		ret = _sci_clk_build(p, p->clocks[i]);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
 }
 
-static const struct sci_clk_data k2g_clk_data[] = {
-	/* pmmc */
-	{ .dev = 0x0, .num_clks = 4 },
-
-	/* mlb0 */
-	{ .dev = 0x1, .num_clks = 5 },
-
-	/* dss0 */
-	{ .dev = 0x2, .num_clks = 2 },
-
-	/* mcbsp0 */
-	{ .dev = 0x3, .num_clks = 8 },
-
-	/* mcasp0 */
-	{ .dev = 0x4, .num_clks = 8 },
-
-	/* mcasp1 */
-	{ .dev = 0x5, .num_clks = 8 },
-
-	/* mcasp2 */
-	{ .dev = 0x6, .num_clks = 8 },
-
-	/* dcan0 */
-	{ .dev = 0x8, .num_clks = 2 },
-
-	/* dcan1 */
-	{ .dev = 0x9, .num_clks = 2 },
-
-	/* emif0 */
-	{ .dev = 0xa, .num_clks = 6 },
-
-	/* mmchs0 */
-	{ .dev = 0xb, .num_clks = 3 },
-
-	/* mmchs1 */
-	{ .dev = 0xc, .num_clks = 3 },
-
-	/* gpmc0 */
-	{ .dev = 0xd, .num_clks = 1 },
-
-	/* elm0 */
-	{ .dev = 0xe, .num_clks = 1 },
-
-	/* spi0 */
-	{ .dev = 0x10, .num_clks = 1 },
-
-	/* spi1 */
-	{ .dev = 0x11, .num_clks = 1 },
-
-	/* spi2 */
-	{ .dev = 0x12, .num_clks = 1 },
-
-	/* spi3 */
-	{ .dev = 0x13, .num_clks = 1 },
-
-	/* icss0 */
-	{ .dev = 0x14, .num_clks = 6 },
-
-	/* icss1 */
-	{ .dev = 0x15, .num_clks = 6 },
-
-	/* usb0 */
-	{ .dev = 0x16, .num_clks = 7 },
-
-	/* usb1 */
-	{ .dev = 0x17, .num_clks = 7 },
-
-	/* nss0 */
-	{ .dev = 0x18, .num_clks = 14 },
-
-	/* pcie0 */
-	{ .dev = 0x19, .num_clks = 1 },
-
-	/* gpio0 */
-	{ .dev = 0x1b, .num_clks = 1 },
-
-	/* gpio1 */
-	{ .dev = 0x1c, .num_clks = 1 },
-
-	/* timer64_0 */
-	{ .dev = 0x1d, .num_clks = 9 },
-
-	/* timer64_1 */
-	{ .dev = 0x1e, .num_clks = 9 },
-
-	/* timer64_2 */
-	{ .dev = 0x1f, .num_clks = 9 },
-
-	/* timer64_3 */
-	{ .dev = 0x20, .num_clks = 9 },
-
-	/* timer64_4 */
-	{ .dev = 0x21, .num_clks = 9 },
-
-	/* timer64_5 */
-	{ .dev = 0x22, .num_clks = 9 },
-
-	/* timer64_6 */
-	{ .dev = 0x23, .num_clks = 9 },
-
-	/* msgmgr0 */
-	{ .dev = 0x25, .num_clks = 1 },
-
-	/* bootcfg0 */
-	{ .dev = 0x26, .num_clks = 1 },
-
-	/* arm_bootrom0 */
-	{ .dev = 0x27, .num_clks = 1 },
-
-	/* dsp_bootrom0 */
-	{ .dev = 0x29, .num_clks = 1 },
-
-	/* debugss0 */
-	{ .dev = 0x2b, .num_clks = 8 },
-
-	/* uart0 */
-	{ .dev = 0x2c, .num_clks = 1 },
-
-	/* uart1 */
-	{ .dev = 0x2d, .num_clks = 1 },
-
-	/* uart2 */
-	{ .dev = 0x2e, .num_clks = 1 },
-
-	/* ehrpwm0 */
-	{ .dev = 0x2f, .num_clks = 1 },
-
-	/* ehrpwm1 */
-	{ .dev = 0x30, .num_clks = 1 },
-
-	/* ehrpwm2 */
-	{ .dev = 0x31, .num_clks = 1 },
-
-	/* ehrpwm3 */
-	{ .dev = 0x32, .num_clks = 1 },
-
-	/* ehrpwm4 */
-	{ .dev = 0x33, .num_clks = 1 },
-
-	/* ehrpwm5 */
-	{ .dev = 0x34, .num_clks = 1 },
-
-	/* eqep0 */
-	{ .dev = 0x35, .num_clks = 1 },
-
-	/* eqep1 */
-	{ .dev = 0x36, .num_clks = 1 },
-
-	/* eqep2 */
-	{ .dev = 0x37, .num_clks = 1 },
-
-	/* ecap0 */
-	{ .dev = 0x38, .num_clks = 1 },
-
-	/* ecap1 */
-	{ .dev = 0x39, .num_clks = 1 },
-
-	/* i2c0 */
-	{ .dev = 0x3a, .num_clks = 1 },
-
-	/* i2c1 */
-	{ .dev = 0x3b, .num_clks = 1 },
-
-	/* i2c2 */
-	{ .dev = 0x3c, .num_clks = 1 },
-
-	/* edma0 */
-	{ .dev = 0x3f, .num_clks = 2 },
-
-	/* semaphore0 */
-	{ .dev = 0x40, .num_clks = 1 },
-
-	/* intc0 */
-	{ .dev = 0x41, .num_clks = 1 },
-
-	/* gic0 */
-	{ .dev = 0x42, .num_clks = 1 },
-
-	/* qspi0 */
-	{ .dev = 0x43, .num_clks = 5 },
-
-	/* arm_64b_counter0 */
-	{ .dev = 0x44, .num_clks = 2 },
-
-	/* tetris0 */
-	{ .dev = 0x45, .num_clks = 2 },
-
-	/* cgem0 */
-	{ .dev = 0x46, .num_clks = 2 },
-
-	/* msmc0 */
-	{ .dev = 0x47, .num_clks = 1 },
-
-	/* cbass0 */
-	{ .dev = 0x49, .num_clks = 1 },
-
-	/* board0 */
-	{ .dev = 0x4c, .num_clks = 36 },
-
-	/* edma1 */
-	{ .dev = 0x4f, .num_clks = 2 },
-	{ .num_clks = 0 },
-};
-
 static const struct of_device_id ti_sci_clk_of_match[] = {
-	{ .compatible = "ti,k2g-sci-clk", .data = &k2g_clk_data },
+	{ .compatible = "ti,k2g-sci-clk" },
 	{ /* Sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, ti_sci_clk_of_match);
@@ -681,12 +420,16 @@ static int ti_sci_clk_probe(struct platform_device *pdev)
 	struct device_node *np = dev->of_node;
 	struct sci_clk_provider *provider;
 	const struct ti_sci_handle *handle;
-	const struct sci_clk_data *data;
 	int ret;
-
-	data = of_device_get_match_data(dev);
-	if (!data)
-		return -EINVAL;
+	int num_clks = 0;
+	struct sci_clk **clks = NULL;
+	struct sci_clk **tmp_clks;
+	struct sci_clk *sci_clk;
+	int max_clks = 0;
+	int clk_id = 0;
+	int dev_id = 0;
+	u8 num_parents;
+	int gap_size = 0;
 
 	handle = devm_ti_sci_get_handle(dev);
 	if (IS_ERR(handle))
@@ -696,12 +439,69 @@ static int ti_sci_clk_probe(struct platform_device *pdev)
 	if (!provider)
 		return -ENOMEM;
 
-	provider->clk_data = data;
-
 	provider->sci = handle;
 	provider->ops = &handle->ops.clk_ops;
 	provider->dev = dev;
 
+	while (1) {
+		ret = provider->ops->get_num_parents(provider->sci, dev_id,
+						     clk_id, &num_parents);
+		if (ret) {
+			gap_size++;
+			if (!clk_id) {
+				if (gap_size >= 5)
+					break;
+				dev_id++;
+			} else {
+				if (gap_size >= 2) {
+					dev_id++;
+					clk_id = 0;
+					gap_size = 0;
+				} else {
+					clk_id++;
+				}
+			}
+			continue;
+		}
+
+		gap_size = 0;
+
+		if (num_clks == max_clks) {
+			tmp_clks = devm_kmalloc_array(dev, max_clks + 64,
+						      sizeof(sci_clk),
+						      GFP_KERNEL);
+			memcpy(tmp_clks, clks, max_clks * sizeof(sci_clk));
+			if (max_clks)
+				devm_kfree(dev, clks);
+			max_clks += 64;
+			clks = tmp_clks;
+		}
+
+		sci_clk = devm_kzalloc(dev, sizeof(*sci_clk), GFP_KERNEL);
+		if (!sci_clk)
+			return -ENOMEM;
+		sci_clk->dev_id = dev_id;
+		sci_clk->clk_id = clk_id;
+		sci_clk->provider = provider;
+		sci_clk->num_parents = num_parents;
+
+		clks[num_clks] = sci_clk;
+
+		clk_id++;
+		num_clks++;
+	}
+
+	provider->clocks = devm_kmalloc_array(dev, num_clks, sizeof(sci_clk),
+					      GFP_KERNEL);
+	if (!provider->clocks)
+		return -ENOMEM;
+
+	memcpy(provider->clocks, clks, num_clks * sizeof(sci_clk));
+
+	provider->num_clocks = num_clks;
+
+	devm_kfree(dev, clks);
+
 	ret = ti_sci_init_clocks(provider);
 	if (ret) {
 		pr_err("ti-sci-init-clocks failed.\n");
diff --git a/drivers/clk/mediatek/Kconfig b/drivers/clk/mediatek/Kconfig
index 1f9ea0f21df1..92afe5989e97 100644
--- a/drivers/clk/mediatek/Kconfig
+++ b/drivers/clk/mediatek/Kconfig
@@ -54,6 +54,12 @@ config COMMON_CLK_MT2701_BDPSYS
 	---help---
 	  This driver supports MediaTek MT2701 bdpsys clocks.
 
+config COMMON_CLK_MT2701_AUDSYS
+	bool "Clock driver for Mediatek MT2701 audsys"
+	depends on COMMON_CLK_MT2701
+	---help---
+	  This driver supports Mediatek MT2701 audsys clocks.
+
 config COMMON_CLK_MT2712
 	bool "Clock driver for MediaTek MT2712"
 	depends on (ARCH_MEDIATEK && ARM64) || COMPILE_TEST
diff --git a/drivers/clk/mediatek/Makefile b/drivers/clk/mediatek/Makefile
index 5160fdc4bbb8..b80eff2abb31 100644
--- a/drivers/clk/mediatek/Makefile
+++ b/drivers/clk/mediatek/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_COMMON_CLK_MT6797_MMSYS) += clk-mt6797-mm.o
 obj-$(CONFIG_COMMON_CLK_MT6797_VDECSYS) += clk-mt6797-vdec.o
 obj-$(CONFIG_COMMON_CLK_MT6797_VENCSYS) += clk-mt6797-venc.o
 obj-$(CONFIG_COMMON_CLK_MT2701) += clk-mt2701.o
+obj-$(CONFIG_COMMON_CLK_MT2701_AUDSYS) += clk-mt2701-aud.o
 obj-$(CONFIG_COMMON_CLK_MT2701_BDPSYS) += clk-mt2701-bdp.o
 obj-$(CONFIG_COMMON_CLK_MT2701_ETHSYS) += clk-mt2701-eth.o
 obj-$(CONFIG_COMMON_CLK_MT2701_HIFSYS) += clk-mt2701-hif.o
diff --git a/drivers/clk/mediatek/clk-mt2701-aud.c b/drivers/clk/mediatek/clk-mt2701-aud.c
new file mode 100644
index 000000000000..e66896a44fad
--- /dev/null
+++ b/drivers/clk/mediatek/clk-mt2701-aud.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: Ryder Lee <ryder.lee@mediatek.com>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-mtk.h"
+#include "clk-gate.h"
+
+#include <dt-bindings/clock/mt2701-clk.h>
+
+#define GATE_AUDIO0(_id, _name, _parent, _shift) {	\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &audio0_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_no_setclr,	\
+	}
+
+#define GATE_AUDIO1(_id, _name, _parent, _shift) {	\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &audio1_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_no_setclr,	\
+	}
+
+#define GATE_AUDIO2(_id, _name, _parent, _shift) {	\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &audio2_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_no_setclr,	\
+	}
+
+#define GATE_AUDIO3(_id, _name, _parent, _shift) {	\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &audio3_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_no_setclr,	\
+	}
+
+static const struct mtk_gate_regs audio0_cg_regs = {
+	.set_ofs = 0x0,
+	.clr_ofs = 0x0,
+	.sta_ofs = 0x0,
+};
+
+static const struct mtk_gate_regs audio1_cg_regs = {
+	.set_ofs = 0x10,
+	.clr_ofs = 0x10,
+	.sta_ofs = 0x10,
+};
+
+static const struct mtk_gate_regs audio2_cg_regs = {
+	.set_ofs = 0x14,
+	.clr_ofs = 0x14,
+	.sta_ofs = 0x14,
+};
+
+static const struct mtk_gate_regs audio3_cg_regs = {
+	.set_ofs = 0x634,
+	.clr_ofs = 0x634,
+	.sta_ofs = 0x634,
+};
+
+static const struct mtk_gate audio_clks[] = {
+	/* AUDIO0 */
+	GATE_AUDIO0(CLK_AUD_AFE, "audio_afe", "aud_intbus_sel", 2),
+	GATE_AUDIO0(CLK_AUD_HDMI, "audio_hdmi", "audpll_sel", 20),
+	GATE_AUDIO0(CLK_AUD_SPDF, "audio_spdf", "audpll_sel", 21),
+	GATE_AUDIO0(CLK_AUD_SPDF2, "audio_spdf2", "audpll_sel", 22),
+	GATE_AUDIO0(CLK_AUD_APLL, "audio_apll", "audpll_sel", 23),
+	/* AUDIO1 */
+	GATE_AUDIO1(CLK_AUD_I2SIN1, "audio_i2sin1", "aud_mux1_sel", 0),
+	GATE_AUDIO1(CLK_AUD_I2SIN2, "audio_i2sin2", "aud_mux1_sel", 1),
+	GATE_AUDIO1(CLK_AUD_I2SIN3, "audio_i2sin3", "aud_mux1_sel", 2),
+	GATE_AUDIO1(CLK_AUD_I2SIN4, "audio_i2sin4", "aud_mux1_sel", 3),
+	GATE_AUDIO1(CLK_AUD_I2SIN5, "audio_i2sin5", "aud_mux1_sel", 4),
+	GATE_AUDIO1(CLK_AUD_I2SIN6, "audio_i2sin6", "aud_mux1_sel", 5),
+	GATE_AUDIO1(CLK_AUD_I2SO1, "audio_i2so1", "aud_mux1_sel", 6),
+	GATE_AUDIO1(CLK_AUD_I2SO2, "audio_i2so2", "aud_mux1_sel", 7),
+	GATE_AUDIO1(CLK_AUD_I2SO3, "audio_i2so3", "aud_mux1_sel", 8),
+	GATE_AUDIO1(CLK_AUD_I2SO4, "audio_i2so4", "aud_mux1_sel", 9),
+	GATE_AUDIO1(CLK_AUD_I2SO5, "audio_i2so5", "aud_mux1_sel", 10),
+	GATE_AUDIO1(CLK_AUD_I2SO6, "audio_i2so6", "aud_mux1_sel", 11),
+	GATE_AUDIO1(CLK_AUD_ASRCI1, "audio_asrci1", "asm_h_sel", 12),
+	GATE_AUDIO1(CLK_AUD_ASRCI2, "audio_asrci2", "asm_h_sel", 13),
+	GATE_AUDIO1(CLK_AUD_ASRCO1, "audio_asrco1", "asm_h_sel", 14),
+	GATE_AUDIO1(CLK_AUD_ASRCO2, "audio_asrco2", "asm_h_sel", 15),
+	GATE_AUDIO1(CLK_AUD_INTDIR, "audio_intdir", "intdir_sel", 20),
+	GATE_AUDIO1(CLK_AUD_A1SYS, "audio_a1sys", "aud_mux1_sel", 21),
+	GATE_AUDIO1(CLK_AUD_A2SYS, "audio_a2sys", "aud_mux2_sel", 22),
+	GATE_AUDIO1(CLK_AUD_AFE_CONN, "audio_afe_conn", "aud_mux1_sel", 23),
+	GATE_AUDIO1(CLK_AUD_AFE_MRGIF, "audio_afe_mrgif", "aud_mux1_sel", 25),
+	/* AUDIO2 */
+	GATE_AUDIO2(CLK_AUD_MMIF_UL1, "audio_ul1", "aud_mux1_sel", 0),
+	GATE_AUDIO2(CLK_AUD_MMIF_UL2, "audio_ul2", "aud_mux1_sel", 1),
+	GATE_AUDIO2(CLK_AUD_MMIF_UL3, "audio_ul3", "aud_mux1_sel", 2),
+	GATE_AUDIO2(CLK_AUD_MMIF_UL4, "audio_ul4", "aud_mux1_sel", 3),
+	GATE_AUDIO2(CLK_AUD_MMIF_UL5, "audio_ul5", "aud_mux1_sel", 4),
+	GATE_AUDIO2(CLK_AUD_MMIF_UL6, "audio_ul6", "aud_mux1_sel", 5),
+	GATE_AUDIO2(CLK_AUD_MMIF_DL1, "audio_dl1", "aud_mux1_sel", 6),
+	GATE_AUDIO2(CLK_AUD_MMIF_DL2, "audio_dl2", "aud_mux1_sel", 7),
+	GATE_AUDIO2(CLK_AUD_MMIF_DL3, "audio_dl3", "aud_mux1_sel", 8),
+	GATE_AUDIO2(CLK_AUD_MMIF_DL4, "audio_dl4", "aud_mux1_sel", 9),
+	GATE_AUDIO2(CLK_AUD_MMIF_DL5, "audio_dl5", "aud_mux1_sel", 10),
+	GATE_AUDIO2(CLK_AUD_MMIF_DL6, "audio_dl6", "aud_mux1_sel", 11),
+	GATE_AUDIO2(CLK_AUD_MMIF_DLMCH, "audio_dlmch", "aud_mux1_sel", 12),
+	GATE_AUDIO2(CLK_AUD_MMIF_ARB1, "audio_arb1", "aud_mux1_sel", 13),
+	GATE_AUDIO2(CLK_AUD_MMIF_AWB1, "audio_awb", "aud_mux1_sel", 14),
+	GATE_AUDIO2(CLK_AUD_MMIF_AWB2, "audio_awb2", "aud_mux1_sel", 15),
+	GATE_AUDIO2(CLK_AUD_MMIF_DAI, "audio_dai", "aud_mux1_sel", 16),
+	/* AUDIO3 */
+	GATE_AUDIO3(CLK_AUD_ASRCI3, "audio_asrci3", "asm_h_sel", 2),
+	GATE_AUDIO3(CLK_AUD_ASRCI4, "audio_asrci4", "asm_h_sel", 3),
+	GATE_AUDIO3(CLK_AUD_ASRCI5, "audio_asrci5", "asm_h_sel", 4),
+	GATE_AUDIO3(CLK_AUD_ASRCI6, "audio_asrci6", "asm_h_sel", 5),
+	GATE_AUDIO3(CLK_AUD_ASRCO3, "audio_asrco3", "asm_h_sel", 6),
+	GATE_AUDIO3(CLK_AUD_ASRCO4, "audio_asrco4", "asm_h_sel", 7),
+	GATE_AUDIO3(CLK_AUD_ASRCO5, "audio_asrco5", "asm_h_sel", 8),
+	GATE_AUDIO3(CLK_AUD_ASRCO6, "audio_asrco6", "asm_h_sel", 9),
+	GATE_AUDIO3(CLK_AUD_MEM_ASRC1, "audio_mem_asrc1", "asm_h_sel", 10),
+	GATE_AUDIO3(CLK_AUD_MEM_ASRC2, "audio_mem_asrc2", "asm_h_sel", 11),
+	GATE_AUDIO3(CLK_AUD_MEM_ASRC3, "audio_mem_asrc3", "asm_h_sel", 12),
+	GATE_AUDIO3(CLK_AUD_MEM_ASRC4, "audio_mem_asrc4", "asm_h_sel", 13),
+	GATE_AUDIO3(CLK_AUD_MEM_ASRC5, "audio_mem_asrc5", "asm_h_sel", 14),
+};
+
+static const struct of_device_id of_match_clk_mt2701_aud[] = {
+	{ .compatible = "mediatek,mt2701-audsys", },
+	{}
+};
+
+static int clk_mt2701_aud_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+	int r;
+
+	clk_data = mtk_alloc_clk_data(CLK_AUD_NR);
+
+	mtk_clk_register_gates(node, audio_clks, ARRAY_SIZE(audio_clks),
+			       clk_data);
+
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r) {
+		dev_err(&pdev->dev,
+			"could not register clock provider: %s: %d\n",
+			pdev->name, r);
+
+		goto err_clk_provider;
+	}
+
+	r = devm_of_platform_populate(&pdev->dev);
+	if (r)
+		goto err_plat_populate;
+
+	return 0;
+
+err_plat_populate:
+	of_clk_del_provider(node);
+err_clk_provider:
+	return r;
+}
+
+static struct platform_driver clk_mt2701_aud_drv = {
+	.probe = clk_mt2701_aud_probe,
+	.driver = {
+		.name = "clk-mt2701-aud",
+		.of_match_table = of_match_clk_mt2701_aud,
+	},
+};
+
+builtin_platform_driver(clk_mt2701_aud_drv);
diff --git a/drivers/clk/mediatek/clk-mt2701.c b/drivers/clk/mediatek/clk-mt2701.c
index 8e7f16fd87c9..deca7527f92f 100644
--- a/drivers/clk/mediatek/clk-mt2701.c
+++ b/drivers/clk/mediatek/clk-mt2701.c
@@ -148,6 +148,7 @@ static const struct mtk_fixed_factor top_fixed_divs[] = {
 	FACTOR(CLK_TOP_CLK26M_D8, "clk26m_d8", "clk26m", 1, 8),
 	FACTOR(CLK_TOP_32K_INTERNAL, "32k_internal", "clk26m", 1, 793),
 	FACTOR(CLK_TOP_32K_EXTERNAL, "32k_external", "rtc32k", 1, 1),
+	FACTOR(CLK_TOP_AXISEL_D4, "axisel_d4", "axi_sel", 1, 4),
 };
 
 static const char * const axi_parents[] = {
@@ -857,13 +858,13 @@ static const struct mtk_gate peri_clks[] = {
 	GATE_PERI0(CLK_PERI_USB1, "usb1_ck", "usb20_sel", 11),
 	GATE_PERI0(CLK_PERI_USB0, "usb0_ck", "usb20_sel", 10),
 	GATE_PERI0(CLK_PERI_PWM, "pwm_ck", "axi_sel", 9),
-	GATE_PERI0(CLK_PERI_PWM7, "pwm7_ck", "axi_sel", 8),
-	GATE_PERI0(CLK_PERI_PWM6, "pwm6_ck", "axi_sel", 7),
-	GATE_PERI0(CLK_PERI_PWM5, "pwm5_ck", "axi_sel", 6),
-	GATE_PERI0(CLK_PERI_PWM4, "pwm4_ck", "axi_sel", 5),
-	GATE_PERI0(CLK_PERI_PWM3, "pwm3_ck", "axi_sel", 4),
-	GATE_PERI0(CLK_PERI_PWM2, "pwm2_ck", "axi_sel", 3),
-	GATE_PERI0(CLK_PERI_PWM1, "pwm1_ck", "axi_sel", 2),
+	GATE_PERI0(CLK_PERI_PWM7, "pwm7_ck", "axisel_d4", 8),
+	GATE_PERI0(CLK_PERI_PWM6, "pwm6_ck", "axisel_d4", 7),
+	GATE_PERI0(CLK_PERI_PWM5, "pwm5_ck", "axisel_d4", 6),
+	GATE_PERI0(CLK_PERI_PWM4, "pwm4_ck", "axisel_d4", 5),
+	GATE_PERI0(CLK_PERI_PWM3, "pwm3_ck", "axisel_d4", 4),
+	GATE_PERI0(CLK_PERI_PWM2, "pwm2_ck", "axisel_d4", 3),
+	GATE_PERI0(CLK_PERI_PWM1, "pwm1_ck", "axisel_d4", 2),
 	GATE_PERI0(CLK_PERI_THERM, "therm_ck", "axi_sel", 1),
 	GATE_PERI0(CLK_PERI_NFI, "nfi_ck", "nfi2x_sel", 0),
 
diff --git a/drivers/clk/mediatek/clk-mt2712.c b/drivers/clk/mediatek/clk-mt2712.c
index 498d13799388..991d4093726e 100644
--- a/drivers/clk/mediatek/clk-mt2712.c
+++ b/drivers/clk/mediatek/clk-mt2712.c
@@ -221,6 +221,8 @@ static const struct mtk_fixed_factor top_divs[] = {
 		4),
 	FACTOR(CLK_TOP_D2A_ULCLK_6P5M, "d2a_ulclk_6p5m", "clk26m", 1,
 		4),
+	FACTOR(CLK_TOP_APLL1_D3, "apll1_d3", "apll1_ck", 1,
+		3),
 };
 
 static const char * const axi_parents[] = {
@@ -625,7 +627,7 @@ static const char * const ether_125m_parents[] = {
 static const char * const ether_50m_parents[] = {
 	"clk26m",
 	"etherpll_50m",
-	"univpll_d26",
+	"apll1_d3",
 	"univpll3_d4"
 };
 
@@ -686,7 +688,7 @@ static const char * const i2c_parents[] = {
 
 static const char * const msdc0p_aes_parents[] = {
 	"clk26m",
-	"msdcpll_ck",
+	"syspll_d2",
 	"univpll_d3",
 	"vcodecpll_ck"
 };
@@ -719,6 +721,17 @@ static const char * const aud_apll2_parents[] = {
 	"clkaud_ext_i_2"
 };
 
+static const char * const apll1_ref_parents[] = {
+	"clkaud_ext_i_2",
+	"clkaud_ext_i_1",
+	"clki2si0_mck_i",
+	"clki2si1_mck_i",
+	"clki2si2_mck_i",
+	"clktdmin_mclk_i",
+	"clki2si2_mck_i",
+	"clktdmin_mclk_i"
+};
+
 static const char * const audull_vtx_parents[] = {
 	"d2a_ulclk_6p5m",
 	"clkaud_ext_i_0"
@@ -886,6 +899,10 @@ static struct mtk_composite top_muxes[] = {
 		aud_apll2_parents, 0x134, 1, 1),
 	MUX(CLK_TOP_DA_AUDULL_VTX_6P5M_SEL, "audull_vtx_sel",
 		audull_vtx_parents, 0x134, 31, 1),
+	MUX(CLK_TOP_APLL1_REF_SEL, "apll1_ref_sel",
+		apll1_ref_parents, 0x134, 4, 3),
+	MUX(CLK_TOP_APLL2_REF_SEL, "apll2_ref_sel",
+		apll1_ref_parents, 0x134, 7, 3),
 };
 
 static const char * const mcu_mp0_parents[] = {
@@ -932,36 +949,56 @@ static const struct mtk_clk_divider top_adj_divs[] = {
 	DIV_ADJ(CLK_TOP_APLL_DIV7, "apll_div7", "i2si3_sel", 0x128, 24, 8),
 };
 
-static const struct mtk_gate_regs top_cg_regs = {
+static const struct mtk_gate_regs top0_cg_regs = {
 	.set_ofs = 0x120,
 	.clr_ofs = 0x120,
 	.sta_ofs = 0x120,
 };
 
-#define GATE_TOP(_id, _name, _parent, _shift) {	\
+static const struct mtk_gate_regs top1_cg_regs = {
+	.set_ofs = 0x424,
+	.clr_ofs = 0x424,
+	.sta_ofs = 0x424,
+};
+
+#define GATE_TOP0(_id, _name, _parent, _shift) {	\
 		.id = _id,				\
 		.name = _name,				\
 		.parent_name = _parent,			\
-		.regs = &top_cg_regs,			\
+		.regs = &top0_cg_regs,			\
 		.shift = _shift,			\
 		.ops = &mtk_clk_gate_ops_no_setclr,	\
 	}
 
+#define GATE_TOP1(_id, _name, _parent, _shift) {	\
+		.id = _id,				\
+		.name = _name,				\
+		.parent_name = _parent,			\
+		.regs = &top1_cg_regs,			\
+		.shift = _shift,			\
+		.ops = &mtk_clk_gate_ops_no_setclr_inv,	\
+	}
+
 static const struct mtk_gate top_clks[] = {
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN0, "apll_div_pdn0", "i2so1_sel", 0),
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN1, "apll_div_pdn1", "i2so2_sel", 1),
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN2, "apll_div_pdn2", "i2so3_sel", 2),
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN3, "apll_div_pdn3", "tdmo0_sel", 3),
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN4, "apll_div_pdn4", "tdmo1_sel", 4),
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN5, "apll_div_pdn5", "i2si1_sel", 5),
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN6, "apll_div_pdn6", "i2si2_sel", 6),
-	GATE_TOP(CLK_TOP_APLL_DIV_PDN7, "apll_div_pdn7", "i2si3_sel", 7),
+	/* TOP0 */
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN0, "apll_div_pdn0", "i2so1_sel", 0),
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN1, "apll_div_pdn1", "i2so2_sel", 1),
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN2, "apll_div_pdn2", "i2so3_sel", 2),
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN3, "apll_div_pdn3", "tdmo0_sel", 3),
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN4, "apll_div_pdn4", "tdmo1_sel", 4),
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN5, "apll_div_pdn5", "i2si1_sel", 5),
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN6, "apll_div_pdn6", "i2si2_sel", 6),
+	GATE_TOP0(CLK_TOP_APLL_DIV_PDN7, "apll_div_pdn7", "i2si3_sel", 7),
+	/* TOP1 */
+	GATE_TOP1(CLK_TOP_NFI2X_EN, "nfi2x_en", "nfi2x_sel", 0),
+	GATE_TOP1(CLK_TOP_NFIECC_EN, "nfiecc_en", "nfiecc_sel", 1),
+	GATE_TOP1(CLK_TOP_NFI1X_CK_EN, "nfi1x_ck_en", "nfi2x_sel", 2),
 };
 
 static const struct mtk_gate_regs infra_cg_regs = {
 	.set_ofs = 0x40,
 	.clr_ofs = 0x44,
-	.sta_ofs = 0x40,
+	.sta_ofs = 0x48,
 };
 
 #define GATE_INFRA(_id, _name, _parent, _shift) {	\
@@ -1120,6 +1157,10 @@ static const struct mtk_gate peri_clks[] = {
 		"msdc50_0_h_sel", 4),
 	GATE_PERI2(CLK_PERI_MSDC50_3_HCLK_EN, "per_msdc50_3_h",
 		"msdc50_3_h_sel", 5),
+	GATE_PERI2(CLK_PERI_MSDC30_0_QTR_EN, "per_msdc30_0_q",
+		"axi_sel", 6),
+	GATE_PERI2(CLK_PERI_MSDC30_3_QTR_EN, "per_msdc30_3_q",
+		"mem_sel", 7),
 };
 
 #define MT2712_PLL_FMAX		(3000UL * MHZ)
diff --git a/drivers/clk/mediatek/clk-mt7622-aud.c b/drivers/clk/mediatek/clk-mt7622-aud.c
index fad7d9fc53ba..4f3d47b41b3e 100644
--- a/drivers/clk/mediatek/clk-mt7622-aud.c
+++ b/drivers/clk/mediatek/clk-mt7622-aud.c
@@ -106,6 +106,7 @@ static const struct mtk_gate audio_clks[] = {
 	GATE_AUDIO1(CLK_AUDIO_INTDIR, "audio_intdir", "intdir_sel", 20),
 	GATE_AUDIO1(CLK_AUDIO_A1SYS, "audio_a1sys", "a1sys_hp_sel", 21),
 	GATE_AUDIO1(CLK_AUDIO_A2SYS, "audio_a2sys", "a2sys_hp_sel", 22),
+	GATE_AUDIO1(CLK_AUDIO_AFE_CONN, "audio_afe_conn", "a1sys_hp_sel", 23),
 	/* AUDIO2 */
 	GATE_AUDIO2(CLK_AUDIO_UL1, "audio_ul1", "a1sys_hp_sel", 0),
 	GATE_AUDIO2(CLK_AUDIO_UL2, "audio_ul2", "a1sys_hp_sel", 1),
@@ -149,11 +150,23 @@ static int clk_mt7622_audiosys_init(struct platform_device *pdev)
 			       clk_data);
 
 	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
-	if (r)
+	if (r) {
 		dev_err(&pdev->dev,
 			"could not register clock provider: %s: %d\n",
 			pdev->name, r);
 
+		goto err_clk_provider;
+	}
+
+	r = devm_of_platform_populate(&pdev->dev);
+	if (r)
+		goto err_plat_populate;
+
+	return 0;
+
+err_plat_populate:
+	of_clk_del_provider(node);
+err_clk_provider:
 	return r;
 }
 
diff --git a/drivers/clk/meson/Kconfig b/drivers/clk/meson/Kconfig
index 7694302c70a4..d5cbec522aec 100644
--- a/drivers/clk/meson/Kconfig
+++ b/drivers/clk/meson/Kconfig
@@ -3,10 +3,15 @@ config COMMON_CLK_AMLOGIC
 	depends on OF
 	depends on ARCH_MESON || COMPILE_TEST
 
+config COMMON_CLK_REGMAP_MESON
+	bool
+	select REGMAP
+
 config COMMON_CLK_MESON8B
 	bool
 	depends on COMMON_CLK_AMLOGIC
 	select RESET_CONTROLLER
+	select COMMON_CLK_REGMAP_MESON
 	help
 	  Support for the clock controller on AmLogic S802 (Meson8),
 	  S805 (Meson8b) and S812 (Meson8m2) devices. Say Y if you
@@ -16,6 +21,8 @@ config COMMON_CLK_GXBB
 	bool
 	depends on COMMON_CLK_AMLOGIC
 	select RESET_CONTROLLER
+	select COMMON_CLK_REGMAP_MESON
+	select MFD_SYSCON
 	help
 	  Support for the clock controller on AmLogic S905 devices, aka gxbb.
 	  Say Y if you want peripherals and CPU frequency scaling to work.
@@ -24,6 +31,8 @@ config COMMON_CLK_AXG
 	bool
 	depends on COMMON_CLK_AMLOGIC
 	select RESET_CONTROLLER
+	select COMMON_CLK_REGMAP_MESON
+	select MFD_SYSCON
 	help
 	  Support for the clock controller on AmLogic A113D devices, aka axg.
 	  Say Y if you want peripherals and CPU frequency scaling to work.
diff --git a/drivers/clk/meson/Makefile b/drivers/clk/meson/Makefile
index 3c03ce583798..ffee82e60b7a 100644
--- a/drivers/clk/meson/Makefile
+++ b/drivers/clk/meson/Makefile
@@ -2,7 +2,8 @@
 # Makefile for Meson specific clk
 #
 
-obj-$(CONFIG_COMMON_CLK_AMLOGIC) += clk-pll.o clk-cpu.o clk-mpll.o clk-audio-divider.o
+obj-$(CONFIG_COMMON_CLK_AMLOGIC) += clk-pll.o clk-mpll.o clk-audio-divider.o
 obj-$(CONFIG_COMMON_CLK_MESON8B) += meson8b.o
-obj-$(CONFIG_COMMON_CLK_GXBB)	 += gxbb.o gxbb-aoclk.o gxbb-aoclk-regmap.o gxbb-aoclk-32k.o
+obj-$(CONFIG_COMMON_CLK_GXBB)	 += gxbb.o gxbb-aoclk.o gxbb-aoclk-32k.o
 obj-$(CONFIG_COMMON_CLK_AXG)	 += axg.o
+obj-$(CONFIG_COMMON_CLK_REGMAP_MESON)	+= clk-regmap.o
diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c
index 1294f3ad7cd5..5f5d468c1efe 100644
--- a/drivers/clk/meson/axg.c
+++ b/drivers/clk/meson/axg.c
@@ -11,125 +11,51 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/init.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
-#include <linux/init.h>
+#include <linux/regmap.h>
 
 #include "clkc.h"
 #include "axg.h"
 
 static DEFINE_SPINLOCK(meson_clk_lock);
 
-static const struct pll_rate_table sys_pll_rate_table[] = {
-	PLL_RATE(24000000, 56, 1, 2),
-	PLL_RATE(48000000, 64, 1, 2),
-	PLL_RATE(72000000, 72, 1, 2),
-	PLL_RATE(96000000, 64, 1, 2),
-	PLL_RATE(120000000, 80, 1, 2),
-	PLL_RATE(144000000, 96, 1, 2),
-	PLL_RATE(168000000, 56, 1, 1),
-	PLL_RATE(192000000, 64, 1, 1),
-	PLL_RATE(216000000, 72, 1, 1),
-	PLL_RATE(240000000, 80, 1, 1),
-	PLL_RATE(264000000, 88, 1, 1),
-	PLL_RATE(288000000, 96, 1, 1),
-	PLL_RATE(312000000, 52, 1, 2),
-	PLL_RATE(336000000, 56, 1, 2),
-	PLL_RATE(360000000, 60, 1, 2),
-	PLL_RATE(384000000, 64, 1, 2),
-	PLL_RATE(408000000, 68, 1, 2),
-	PLL_RATE(432000000, 72, 1, 2),
-	PLL_RATE(456000000, 76, 1, 2),
-	PLL_RATE(480000000, 80, 1, 2),
-	PLL_RATE(504000000, 84, 1, 2),
-	PLL_RATE(528000000, 88, 1, 2),
-	PLL_RATE(552000000, 92, 1, 2),
-	PLL_RATE(576000000, 96, 1, 2),
-	PLL_RATE(600000000, 50, 1, 1),
-	PLL_RATE(624000000, 52, 1, 1),
-	PLL_RATE(648000000, 54, 1, 1),
-	PLL_RATE(672000000, 56, 1, 1),
-	PLL_RATE(696000000, 58, 1, 1),
-	PLL_RATE(720000000, 60, 1, 1),
-	PLL_RATE(744000000, 62, 1, 1),
-	PLL_RATE(768000000, 64, 1, 1),
-	PLL_RATE(792000000, 66, 1, 1),
-	PLL_RATE(816000000, 68, 1, 1),
-	PLL_RATE(840000000, 70, 1, 1),
-	PLL_RATE(864000000, 72, 1, 1),
-	PLL_RATE(888000000, 74, 1, 1),
-	PLL_RATE(912000000, 76, 1, 1),
-	PLL_RATE(936000000, 78, 1, 1),
-	PLL_RATE(960000000, 80, 1, 1),
-	PLL_RATE(984000000, 82, 1, 1),
-	PLL_RATE(1008000000, 84, 1, 1),
-	PLL_RATE(1032000000, 86, 1, 1),
-	PLL_RATE(1056000000, 88, 1, 1),
-	PLL_RATE(1080000000, 90, 1, 1),
-	PLL_RATE(1104000000, 92, 1, 1),
-	PLL_RATE(1128000000, 94, 1, 1),
-	PLL_RATE(1152000000, 96, 1, 1),
-	PLL_RATE(1176000000, 98, 1, 1),
-	PLL_RATE(1200000000, 50, 1, 0),
-	PLL_RATE(1224000000, 51, 1, 0),
-	PLL_RATE(1248000000, 52, 1, 0),
-	PLL_RATE(1272000000, 53, 1, 0),
-	PLL_RATE(1296000000, 54, 1, 0),
-	PLL_RATE(1320000000, 55, 1, 0),
-	PLL_RATE(1344000000, 56, 1, 0),
-	PLL_RATE(1368000000, 57, 1, 0),
-	PLL_RATE(1392000000, 58, 1, 0),
-	PLL_RATE(1416000000, 59, 1, 0),
-	PLL_RATE(1440000000, 60, 1, 0),
-	PLL_RATE(1464000000, 61, 1, 0),
-	PLL_RATE(1488000000, 62, 1, 0),
-	PLL_RATE(1512000000, 63, 1, 0),
-	PLL_RATE(1536000000, 64, 1, 0),
-	PLL_RATE(1560000000, 65, 1, 0),
-	PLL_RATE(1584000000, 66, 1, 0),
-	PLL_RATE(1608000000, 67, 1, 0),
-	PLL_RATE(1632000000, 68, 1, 0),
-	PLL_RATE(1656000000, 68, 1, 0),
-	PLL_RATE(1680000000, 68, 1, 0),
-	PLL_RATE(1704000000, 68, 1, 0),
-	PLL_RATE(1728000000, 69, 1, 0),
-	PLL_RATE(1752000000, 69, 1, 0),
-	PLL_RATE(1776000000, 69, 1, 0),
-	PLL_RATE(1800000000, 69, 1, 0),
-	PLL_RATE(1824000000, 70, 1, 0),
-	PLL_RATE(1848000000, 70, 1, 0),
-	PLL_RATE(1872000000, 70, 1, 0),
-	PLL_RATE(1896000000, 70, 1, 0),
-	PLL_RATE(1920000000, 71, 1, 0),
-	PLL_RATE(1944000000, 71, 1, 0),
-	PLL_RATE(1968000000, 71, 1, 0),
-	PLL_RATE(1992000000, 71, 1, 0),
-	PLL_RATE(2016000000, 72, 1, 0),
-	PLL_RATE(2040000000, 72, 1, 0),
-	PLL_RATE(2064000000, 72, 1, 0),
-	PLL_RATE(2088000000, 72, 1, 0),
-	PLL_RATE(2112000000, 73, 1, 0),
-	{ /* sentinel */ },
-};
-
-static struct meson_clk_pll axg_fixed_pll = {
-	.m = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
+static struct clk_regmap axg_fixed_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.frac = {
+			.reg_off = HHI_MPLL_CNTL2,
+			.shift   = 0,
+			.width   = 12,
+		},
+		.l = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "fixed_pll",
 		.ops = &meson_clk_pll_ro_ops,
@@ -138,25 +64,34 @@ static struct meson_clk_pll axg_fixed_pll = {
 	},
 };
 
-static struct meson_clk_pll axg_sys_pll = {
-	.m = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
+static struct clk_regmap axg_sys_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.l = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
 	},
-	.n = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 10,
-		.width   = 2,
-	},
-	.rate_table = sys_pll_rate_table,
-	.rate_count = ARRAY_SIZE(sys_pll_rate_table),
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "sys_pll",
 		.ops = &meson_clk_pll_ro_ops,
@@ -257,40 +192,51 @@ static const struct pll_rate_table axg_gp0_pll_rate_table[] = {
 	{ /* sentinel */ },
 };
 
-static struct pll_params_table axg_gp0_params_table[] = {
-	PLL_PARAM(HHI_GP0_PLL_CNTL, 0x40010250),
-	PLL_PARAM(HHI_GP0_PLL_CNTL1, 0xc084a000),
-	PLL_PARAM(HHI_GP0_PLL_CNTL2, 0xb75020be),
-	PLL_PARAM(HHI_GP0_PLL_CNTL3, 0x0a59a288),
-	PLL_PARAM(HHI_GP0_PLL_CNTL4, 0xc000004d),
-	PLL_PARAM(HHI_GP0_PLL_CNTL5, 0x00078000),
-};
-
-static struct meson_clk_pll axg_gp0_pll = {
-	.m = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.params = {
-		.params_table = axg_gp0_params_table,
-		.params_count =	ARRAY_SIZE(axg_gp0_params_table),
-		.no_init_reset = true,
-		.reset_lock_loop = true,
-	},
-	.rate_table = axg_gp0_pll_rate_table,
-	.rate_count = ARRAY_SIZE(axg_gp0_pll_rate_table),
-	.lock = &meson_clk_lock,
+static const struct reg_sequence axg_gp0_init_regs[] = {
+	{ .reg = HHI_GP0_PLL_CNTL1,	.def = 0xc084b000 },
+	{ .reg = HHI_GP0_PLL_CNTL2,	.def = 0xb75020be },
+	{ .reg = HHI_GP0_PLL_CNTL3,	.def = 0x0a59a288 },
+	{ .reg = HHI_GP0_PLL_CNTL4,	.def = 0xc000004d },
+	{ .reg = HHI_GP0_PLL_CNTL5,	.def = 0x00078000 },
+	{ .reg = HHI_GP0_PLL_CNTL,	.def = 0x40010250 },
+};
+
+static struct clk_regmap axg_gp0_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.frac = {
+			.reg_off = HHI_GP0_PLL_CNTL1,
+			.shift   = 0,
+			.width   = 10,
+		},
+		.l = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
+		.table = axg_gp0_pll_rate_table,
+		.init_regs = axg_gp0_init_regs,
+		.init_count = ARRAY_SIZE(axg_gp0_init_regs),
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "gp0_pll",
 		.ops = &meson_clk_pll_ops,
@@ -299,234 +245,427 @@ static struct meson_clk_pll axg_gp0_pll = {
 	},
 };
 
+static const struct reg_sequence axg_hifi_init_regs[] = {
+	{ .reg = HHI_HIFI_PLL_CNTL1,	.def = 0xc084b000 },
+	{ .reg = HHI_HIFI_PLL_CNTL2,	.def = 0xb75020be },
+	{ .reg = HHI_HIFI_PLL_CNTL3,	.def = 0x0a6a3a88 },
+	{ .reg = HHI_HIFI_PLL_CNTL4,	.def = 0xc000004d },
+	{ .reg = HHI_HIFI_PLL_CNTL5,	.def = 0x00058000 },
+	{ .reg = HHI_HIFI_PLL_CNTL,	.def = 0x40010250 },
+};
+
+static struct clk_regmap axg_hifi_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_HIFI_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_HIFI_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_HIFI_PLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.frac = {
+			.reg_off = HHI_HIFI_PLL_CNTL5,
+			.shift   = 0,
+			.width   = 13,
+		},
+		.l = {
+			.reg_off = HHI_HIFI_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_HIFI_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
+		.table = axg_gp0_pll_rate_table,
+		.init_regs = axg_hifi_init_regs,
+		.init_count = ARRAY_SIZE(axg_hifi_init_regs),
+		.flags = CLK_MESON_PLL_ROUND_CLOSEST,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "hifi_pll",
+		.ops = &meson_clk_pll_ops,
+		.parent_names = (const char *[]){ "xtal" },
+		.num_parents = 1,
+	},
+};
 
-static struct clk_fixed_factor axg_fclk_div2 = {
+static struct clk_fixed_factor axg_fclk_div2_div = {
 	.mult = 1,
 	.div = 2,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div2",
+		.name = "fclk_div2_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor axg_fclk_div3 = {
+static struct clk_regmap axg_fclk_div2 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 27,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div2",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div2_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor axg_fclk_div3_div = {
 	.mult = 1,
 	.div = 3,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div3",
+		.name = "fclk_div3_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor axg_fclk_div4 = {
+static struct clk_regmap axg_fclk_div3 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 28,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div3",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div3_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor axg_fclk_div4_div = {
 	.mult = 1,
 	.div = 4,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div4",
+		.name = "fclk_div4_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor axg_fclk_div5 = {
+static struct clk_regmap axg_fclk_div4 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 29,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div4",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div4_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor axg_fclk_div5_div = {
 	.mult = 1,
 	.div = 5,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div5",
+		.name = "fclk_div5_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor axg_fclk_div7 = {
+static struct clk_regmap axg_fclk_div5 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 30,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div5",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div5_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor axg_fclk_div7_div = {
 	.mult = 1,
 	.div = 7,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div7",
+		.name = "fclk_div7_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll axg_mpll0 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 0,
-		.width   = 14,
+static struct clk_regmap axg_fclk_div7 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 31,
 	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 15,
-		.width	 = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div7",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div7_div" },
+		.num_parents = 1,
 	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 16,
-		.width   = 9,
+};
+
+static struct clk_regmap axg_mpll_prediv = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MPLL_CNTL5,
+		.shift = 12,
+		.width = 1,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 14,
-		.width	 = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll_prediv",
+		.ops = &clk_regmap_divider_ro_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
 	},
-	.ssen = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 25,
-		.width	 = 1,
+};
+
+static struct clk_regmap axg_mpll0_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 15,
+			.width	 = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.ssen = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 25,
+			.width	 = 1,
+		},
+		.misc = {
+			.reg_off = HHI_PLL_TOP_MISC,
+			.shift   = 0,
+			.width	 = 1,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll0",
+		.name = "mpll0_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll axg_mpll1 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 0,
-		.width   = 14,
+static struct clk_regmap axg_mpll0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL7,
+		.bit_idx = 14,
 	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 15,
-		.width	 = 1,
-	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 16,
-		.width   = 9,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll0",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll0_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 14,
-		.width	 = 1,
+};
+
+static struct clk_regmap axg_mpll1_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 15,
+			.width	 = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.misc = {
+			.reg_off = HHI_PLL_TOP_MISC,
+			.shift   = 1,
+			.width	 = 1,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll1",
+		.name = "mpll1_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll axg_mpll2 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 0,
-		.width   = 14,
+static struct clk_regmap axg_mpll1 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL8,
+		.bit_idx = 14,
 	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 15,
-		.width	 = 1,
-	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 16,
-		.width   = 9,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll1",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll1_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 14,
-		.width	 = 1,
+};
+
+static struct clk_regmap axg_mpll2_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 15,
+			.width	 = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.misc = {
+			.reg_off = HHI_PLL_TOP_MISC,
+			.shift   = 2,
+			.width	 = 1,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll2",
+		.name = "mpll2_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll axg_mpll3 = {
-	.sdm = {
-		.reg_off = HHI_MPLL3_CNTL0,
-		.shift   = 12,
-		.width   = 14,
+static struct clk_regmap axg_mpll2 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL9,
+		.bit_idx = 14,
 	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL3_CNTL0,
-		.shift   = 11,
-		.width	 = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll2",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll2_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_regmap axg_mpll3_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL3_CNTL0,
+			.shift   = 12,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL3_CNTL0,
+			.shift   = 11,
+			.width	 = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL3_CNTL0,
+			.shift   = 2,
+			.width   = 9,
+		},
+		.misc = {
+			.reg_off = HHI_PLL_TOP_MISC,
+			.shift   = 3,
+			.width	 = 1,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.n2 = {
-		.reg_off = HHI_MPLL3_CNTL0,
-		.shift   = 2,
-		.width   = 9,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll3_div",
+		.ops = &meson_clk_mpll_ops,
+		.parent_names = (const char *[]){ "mpll_prediv" },
+		.num_parents = 1,
 	},
-	.en = {
-		.reg_off = HHI_MPLL3_CNTL0,
-		.shift   = 0,
-		.width	 = 1,
+};
+
+static struct clk_regmap axg_mpll3 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL3_CNTL0,
+		.bit_idx = 0,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "mpll3",
-		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll3_div" },
 		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-/*
- * FIXME The legacy composite clocks (e.g. clk81) are both PLL post-dividers
- * and should be modeled with their respective PLLs via the forthcoming
- * coordinated clock rates feature
- */
 static u32 mux_table_clk81[]	= { 0, 2, 3, 4, 5, 6, 7 };
 static const char * const clk81_parent_names[] = {
 	"xtal", "fclk_div7", "mpll1", "mpll2", "fclk_div4",
 	"fclk_div3", "fclk_div5"
 };
 
-static struct clk_mux axg_mpeg_clk_sel = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 12,
-	.flags = CLK_MUX_READ_ONLY,
-	.table = mux_table_clk81,
-	.lock = &meson_clk_lock,
+static struct clk_regmap axg_mpeg_clk_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 12,
+		.table = mux_table_clk81,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mpeg_clk_sel",
-		.ops = &clk_mux_ro_ops,
+		.ops = &clk_regmap_mux_ro_ops,
 		.parent_names = clk81_parent_names,
 		.num_parents = ARRAY_SIZE(clk81_parent_names),
 	},
 };
 
-static struct clk_divider axg_mpeg_clk_div = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap axg_mpeg_clk_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mpeg_clk_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "mpeg_clk_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate axg_clk81 = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.bit_idx = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap axg_clk81 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.bit_idx = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "clk81",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "mpeg_clk_div" },
 		.num_parents = 1,
 		.flags = (CLK_SET_RATE_PARENT | CLK_IS_CRITICAL),
@@ -545,42 +684,45 @@ static const char * const axg_sd_emmc_clk0_parent_names[] = {
 };
 
 /* SDcard clock */
-static struct clk_mux axg_sd_emmc_b_clk0_sel = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 25,
-	.lock = &meson_clk_lock,
+static struct clk_regmap axg_sd_emmc_b_clk0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 25,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_b_clk0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = axg_sd_emmc_clk0_parent_names,
 		.num_parents = ARRAY_SIZE(axg_sd_emmc_clk0_parent_names),
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_divider axg_sd_emmc_b_clk0_div = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.shift = 16,
-	.width = 7,
-	.lock = &meson_clk_lock,
-	.flags = CLK_DIVIDER_ROUND_CLOSEST,
+static struct clk_regmap axg_sd_emmc_b_clk0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.shift = 16,
+		.width = 7,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_b_clk0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "sd_emmc_b_clk0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate axg_sd_emmc_b_clk0 = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.bit_idx = 23,
-	.lock = &meson_clk_lock,
+static struct clk_regmap axg_sd_emmc_b_clk0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.bit_idx = 23,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sd_emmc_b_clk0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "sd_emmc_b_clk0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
@@ -588,42 +730,45 @@ static struct clk_gate axg_sd_emmc_b_clk0 = {
 };
 
 /* EMMC/NAND clock */
-static struct clk_mux axg_sd_emmc_c_clk0_sel = {
-	.reg = (void *)HHI_NAND_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 9,
-	.lock = &meson_clk_lock,
+static struct clk_regmap axg_sd_emmc_c_clk0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_NAND_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 9,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_c_clk0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = axg_sd_emmc_clk0_parent_names,
 		.num_parents = ARRAY_SIZE(axg_sd_emmc_clk0_parent_names),
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_divider axg_sd_emmc_c_clk0_div = {
-	.reg = (void *)HHI_NAND_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
-	.flags = CLK_DIVIDER_ROUND_CLOSEST,
+static struct clk_regmap axg_sd_emmc_c_clk0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_NAND_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_c_clk0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "sd_emmc_c_clk0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate axg_sd_emmc_c_clk0 = {
-	.reg = (void *)HHI_NAND_CLK_CNTL,
-	.bit_idx = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap axg_sd_emmc_c_clk0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_NAND_CLK_CNTL,
+		.bit_idx = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sd_emmc_c_clk0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "sd_emmc_c_clk0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
@@ -750,27 +895,24 @@ static struct clk_hw_onecell_data axg_hw_onecell_data = {
 		[CLKID_SD_EMMC_C_CLK0_SEL]	= &axg_sd_emmc_c_clk0_sel.hw,
 		[CLKID_SD_EMMC_C_CLK0_DIV]	= &axg_sd_emmc_c_clk0_div.hw,
 		[CLKID_SD_EMMC_C_CLK0]		= &axg_sd_emmc_c_clk0.hw,
+		[CLKID_MPLL0_DIV]		= &axg_mpll0_div.hw,
+		[CLKID_MPLL1_DIV]		= &axg_mpll1_div.hw,
+		[CLKID_MPLL2_DIV]		= &axg_mpll2_div.hw,
+		[CLKID_MPLL3_DIV]		= &axg_mpll3_div.hw,
+		[CLKID_HIFI_PLL]		= &axg_hifi_pll.hw,
+		[CLKID_MPLL_PREDIV]		= &axg_mpll_prediv.hw,
+		[CLKID_FCLK_DIV2_DIV]		= &axg_fclk_div2_div.hw,
+		[CLKID_FCLK_DIV3_DIV]		= &axg_fclk_div3_div.hw,
+		[CLKID_FCLK_DIV4_DIV]		= &axg_fclk_div4_div.hw,
+		[CLKID_FCLK_DIV5_DIV]		= &axg_fclk_div5_div.hw,
+		[CLKID_FCLK_DIV7_DIV]		= &axg_fclk_div7_div.hw,
 		[NR_CLKS]			= NULL,
 	},
 	.num = NR_CLKS,
 };
 
-/* Convenience tables to populate base addresses in .probe */
-
-static struct meson_clk_pll *const axg_clk_plls[] = {
-	&axg_fixed_pll,
-	&axg_sys_pll,
-	&axg_gp0_pll,
-};
-
-static struct meson_clk_mpll *const axg_clk_mplls[] = {
-	&axg_mpll0,
-	&axg_mpll1,
-	&axg_mpll2,
-	&axg_mpll3,
-};
-
-static struct clk_gate *const axg_clk_gates[] = {
+/* Convenience table to populate regmap in .probe */
+static struct clk_regmap *const axg_clk_regmaps[] = {
 	&axg_clk81,
 	&axg_ddr,
 	&axg_audio_locker,
@@ -818,113 +960,100 @@ static struct clk_gate *const axg_clk_gates[] = {
 	&axg_ao_i2c,
 	&axg_sd_emmc_b_clk0,
 	&axg_sd_emmc_c_clk0,
-};
-
-static struct clk_mux *const axg_clk_muxes[] = {
-	&axg_mpeg_clk_sel,
-	&axg_sd_emmc_b_clk0_sel,
-	&axg_sd_emmc_c_clk0_sel,
-};
-
-static struct clk_divider *const axg_clk_dividers[] = {
 	&axg_mpeg_clk_div,
 	&axg_sd_emmc_b_clk0_div,
 	&axg_sd_emmc_c_clk0_div,
-};
-
-struct clkc_data {
-	struct clk_gate *const *clk_gates;
-	unsigned int clk_gates_count;
-	struct meson_clk_mpll *const *clk_mplls;
-	unsigned int clk_mplls_count;
-	struct meson_clk_pll *const *clk_plls;
-	unsigned int clk_plls_count;
-	struct clk_mux *const *clk_muxes;
-	unsigned int clk_muxes_count;
-	struct clk_divider *const *clk_dividers;
-	unsigned int clk_dividers_count;
-	struct clk_hw_onecell_data *hw_onecell_data;
-};
-
-static const struct clkc_data axg_clkc_data = {
-	.clk_gates = axg_clk_gates,
-	.clk_gates_count = ARRAY_SIZE(axg_clk_gates),
-	.clk_mplls = axg_clk_mplls,
-	.clk_mplls_count = ARRAY_SIZE(axg_clk_mplls),
-	.clk_plls = axg_clk_plls,
-	.clk_plls_count = ARRAY_SIZE(axg_clk_plls),
-	.clk_muxes = axg_clk_muxes,
-	.clk_muxes_count = ARRAY_SIZE(axg_clk_muxes),
-	.clk_dividers = axg_clk_dividers,
-	.clk_dividers_count = ARRAY_SIZE(axg_clk_dividers),
-	.hw_onecell_data = &axg_hw_onecell_data,
+	&axg_mpeg_clk_sel,
+	&axg_sd_emmc_b_clk0_sel,
+	&axg_sd_emmc_c_clk0_sel,
+	&axg_mpll0,
+	&axg_mpll1,
+	&axg_mpll2,
+	&axg_mpll3,
+	&axg_mpll0_div,
+	&axg_mpll1_div,
+	&axg_mpll2_div,
+	&axg_mpll3_div,
+	&axg_fixed_pll,
+	&axg_sys_pll,
+	&axg_gp0_pll,
+	&axg_hifi_pll,
+	&axg_mpll_prediv,
+	&axg_fclk_div2,
+	&axg_fclk_div3,
+	&axg_fclk_div4,
+	&axg_fclk_div5,
+	&axg_fclk_div7,
 };
 
 static const struct of_device_id clkc_match_table[] = {
-	{ .compatible = "amlogic,axg-clkc", .data = &axg_clkc_data },
+	{ .compatible = "amlogic,axg-clkc" },
 	{}
 };
 
+static const struct regmap_config clkc_regmap_config = {
+	.reg_bits       = 32,
+	.val_bits       = 32,
+	.reg_stride     = 4,
+};
+
 static int axg_clkc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct clkc_data *clkc_data;
 	struct resource *res;
-	void __iomem *clk_base;
-	int ret, clkid, i;
-
-	clkc_data = of_device_get_match_data(&pdev->dev);
-	if (!clkc_data)
-		return -EINVAL;
-
-	/*  Generic clocks and PLLs */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-	clk_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
-	if (!clk_base) {
-		dev_err(&pdev->dev, "Unable to map clk base\n");
-		return -ENXIO;
-	}
+	void __iomem *clk_base = NULL;
+	struct regmap *map;
+	int ret, i;
 
-	/* Populate base address for PLLs */
-	for (i = 0; i < clkc_data->clk_plls_count; i++)
-		clkc_data->clk_plls[i]->base = clk_base;
+	/* Get the hhi system controller node if available */
+	map = syscon_node_to_regmap(of_get_parent(dev->of_node));
+	if (IS_ERR(map)) {
+		dev_err(dev,
+			"failed to get HHI regmap - Trying obsolete regs\n");
 
-	/* Populate base address for MPLLs */
-	for (i = 0; i < clkc_data->clk_mplls_count; i++)
-		clkc_data->clk_mplls[i]->base = clk_base;
+		/*
+		 * FIXME: HHI registers should be accessed through
+		 * the appropriate system controller. This is required because
+		 * there is more than just clocks in this register space
+		 *
+		 * This fallback method is only provided temporarily until
+		 * all the platform DTs are properly using the syscon node
+		 */
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!res)
+			return -EINVAL;
 
-	/* Populate base address for gates */
-	for (i = 0; i < clkc_data->clk_gates_count; i++)
-		clkc_data->clk_gates[i]->reg = clk_base +
-			(u64)clkc_data->clk_gates[i]->reg;
 
-	/* Populate base address for muxes */
-	for (i = 0; i < clkc_data->clk_muxes_count; i++)
-		clkc_data->clk_muxes[i]->reg = clk_base +
-			(u64)clkc_data->clk_muxes[i]->reg;
+		clk_base = devm_ioremap(dev, res->start, resource_size(res));
+		if (!clk_base) {
+			dev_err(dev, "Unable to map clk base\n");
+			return -ENXIO;
+		}
+
+		map = devm_regmap_init_mmio(dev, clk_base,
+					    &clkc_regmap_config);
+		if (IS_ERR(map))
+			return PTR_ERR(map);
+	}
 
-	/* Populate base address for dividers */
-	for (i = 0; i < clkc_data->clk_dividers_count; i++)
-		clkc_data->clk_dividers[i]->reg = clk_base +
-			(u64)clkc_data->clk_dividers[i]->reg;
+	/* Populate regmap for the regmap backed clocks */
+	for (i = 0; i < ARRAY_SIZE(axg_clk_regmaps); i++)
+		axg_clk_regmaps[i]->map = map;
 
-	for (clkid = 0; clkid < clkc_data->hw_onecell_data->num; clkid++) {
+	for (i = 0; i < axg_hw_onecell_data.num; i++) {
 		/* array might be sparse */
-		if (!clkc_data->hw_onecell_data->hws[clkid])
+		if (!axg_hw_onecell_data.hws[i])
 			continue;
 
-		ret = devm_clk_hw_register(dev,
-					clkc_data->hw_onecell_data->hws[clkid]);
+		ret = devm_clk_hw_register(dev, axg_hw_onecell_data.hws[i]);
 		if (ret) {
-			dev_err(&pdev->dev, "Clock registration failed\n");
+			dev_err(dev, "Clock registration failed\n");
 			return ret;
 		}
 	}
 
-	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
-			clkc_data->hw_onecell_data);
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
+					   &axg_hw_onecell_data);
 }
 
 static struct platform_driver axg_driver = {
diff --git a/drivers/clk/meson/axg.h b/drivers/clk/meson/axg.h
index ce0bafdb6b28..b421df6a7ea0 100644
--- a/drivers/clk/meson/axg.h
+++ b/drivers/clk/meson/axg.h
@@ -117,8 +117,18 @@
 #define CLKID_SD_EMMC_B_CLK0_DIV		62
 #define CLKID_SD_EMMC_C_CLK0_SEL		63
 #define CLKID_SD_EMMC_C_CLK0_DIV		64
+#define CLKID_MPLL0_DIV				65
+#define CLKID_MPLL1_DIV				66
+#define CLKID_MPLL2_DIV				67
+#define CLKID_MPLL3_DIV				68
+#define CLKID_MPLL_PREDIV			70
+#define CLKID_FCLK_DIV2_DIV			71
+#define CLKID_FCLK_DIV3_DIV			72
+#define CLKID_FCLK_DIV4_DIV			73
+#define CLKID_FCLK_DIV5_DIV			74
+#define CLKID_FCLK_DIV7_DIV			75
 
-#define NR_CLKS					65
+#define NR_CLKS					76
 
 /* include the CLKIDs that have been made part of the DT binding */
 #include <dt-bindings/clock/axg-clkc.h>
diff --git a/drivers/clk/meson/clk-audio-divider.c b/drivers/clk/meson/clk-audio-divider.c
index 6c07db06642d..f7ab5b1db342 100644
--- a/drivers/clk/meson/clk-audio-divider.c
+++ b/drivers/clk/meson/clk-audio-divider.c
@@ -28,8 +28,11 @@
 #include <linux/clk-provider.h>
 #include "clkc.h"
 
-#define to_meson_clk_audio_divider(_hw) container_of(_hw, \
-				struct meson_clk_audio_divider, hw)
+static inline struct meson_clk_audio_div_data *
+meson_clk_audio_div_data(struct clk_regmap *clk)
+{
+	return (struct meson_clk_audio_div_data *)clk->data;
+}
 
 static int _div_round(unsigned long parent_rate, unsigned long rate,
 		      unsigned long flags)
@@ -45,15 +48,9 @@ static int _get_val(unsigned long parent_rate, unsigned long rate)
 	return DIV_ROUND_UP_ULL((u64)parent_rate, rate) - 1;
 }
 
-static int _valid_divider(struct clk_hw *hw, int divider)
+static int _valid_divider(unsigned int width, int divider)
 {
-	struct meson_clk_audio_divider *adiv =
-		to_meson_clk_audio_divider(hw);
-	int max_divider;
-	u8 width;
-
-	width = adiv->div.width;
-	max_divider = 1 << width;
+	int max_divider = 1 << width;
 
 	return clamp(divider, 1, max_divider);
 }
@@ -61,14 +58,11 @@ static int _valid_divider(struct clk_hw *hw, int divider)
 static unsigned long audio_divider_recalc_rate(struct clk_hw *hw,
 					       unsigned long parent_rate)
 {
-	struct meson_clk_audio_divider *adiv =
-		to_meson_clk_audio_divider(hw);
-	struct parm *p;
-	unsigned long reg, divider;
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_audio_div_data *adiv = meson_clk_audio_div_data(clk);
+	unsigned long divider;
 
-	p = &adiv->div;
-	reg = readl(adiv->base + p->reg_off);
-	divider = PARM_GET(p->width, p->shift, reg) + 1;
+	divider = meson_parm_read(clk->map, &adiv->div);
 
 	return DIV_ROUND_UP_ULL((u64)parent_rate, divider);
 }
@@ -77,14 +71,14 @@ static long audio_divider_round_rate(struct clk_hw *hw,
 				     unsigned long rate,
 				     unsigned long *parent_rate)
 {
-	struct meson_clk_audio_divider *adiv =
-		to_meson_clk_audio_divider(hw);
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_audio_div_data *adiv = meson_clk_audio_div_data(clk);
 	unsigned long max_prate;
 	int divider;
 
 	if (!(clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT)) {
 		divider = _div_round(*parent_rate, rate, adiv->flags);
-		divider = _valid_divider(hw, divider);
+		divider = _valid_divider(adiv->div.width, divider);
 		return DIV_ROUND_UP_ULL((u64)*parent_rate, divider);
 	}
 
@@ -93,7 +87,7 @@ static long audio_divider_round_rate(struct clk_hw *hw,
 
 	/* Get the corresponding rounded down divider */
 	divider = max_prate / rate;
-	divider = _valid_divider(hw, divider);
+	divider = _valid_divider(adiv->div.width, divider);
 
 	/* Get actual rate of the parent */
 	*parent_rate = clk_hw_round_rate(clk_hw_get_parent(hw),
@@ -106,28 +100,11 @@ static int audio_divider_set_rate(struct clk_hw *hw,
 				  unsigned long rate,
 				  unsigned long parent_rate)
 {
-	struct meson_clk_audio_divider *adiv =
-		to_meson_clk_audio_divider(hw);
-	struct parm *p;
-	unsigned long reg, flags = 0;
-	int val;
-
-	val = _get_val(parent_rate, rate);
-
-	if (adiv->lock)
-		spin_lock_irqsave(adiv->lock, flags);
-	else
-		__acquire(adiv->lock);
-
-	p = &adiv->div;
-	reg = readl(adiv->base + p->reg_off);
-	reg = PARM_SET(p->width, p->shift, reg, val);
-	writel(reg, adiv->base + p->reg_off);
-
-	if (adiv->lock)
-		spin_unlock_irqrestore(adiv->lock, flags);
-	else
-		__release(adiv->lock);
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_audio_div_data *adiv = meson_clk_audio_div_data(clk);
+	int val = _get_val(parent_rate, rate);
+
+	meson_parm_write(clk->map, &adiv->div, val);
 
 	return 0;
 }
diff --git a/drivers/clk/meson/clk-cpu.c b/drivers/clk/meson/clk-cpu.c
deleted file mode 100644
index f8b2b7efd016..000000000000
--- a/drivers/clk/meson/clk-cpu.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2015 Endless Mobile, Inc.
- * Author: Carlo Caione <carlo@endlessm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
- * CPU clock path:
- *
- *                           +-[/N]-----|3|
- *             MUX2  +--[/3]-+----------|2| MUX1
- * [sys_pll]---|1|   |--[/2]------------|1|-|1|
- *             | |---+------------------|0| | |----- [a5_clk]
- *          +--|0|                          | |
- * [xtal]---+-------------------------------|0|
- *
- *
- *
- */
-
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/slab.h>
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-
-#define MESON_CPU_CLK_CNTL1		0x00
-#define MESON_CPU_CLK_CNTL		0x40
-
-#define MESON_CPU_CLK_MUX1		BIT(7)
-#define MESON_CPU_CLK_MUX2		BIT(0)
-
-#define MESON_N_WIDTH			9
-#define MESON_N_SHIFT			20
-#define MESON_SEL_WIDTH			2
-#define MESON_SEL_SHIFT			2
-
-#include "clkc.h"
-
-#define to_meson_clk_cpu_hw(_hw) container_of(_hw, struct meson_clk_cpu, hw)
-#define to_meson_clk_cpu_nb(_nb) container_of(_nb, struct meson_clk_cpu, clk_nb)
-
-static long meson_clk_cpu_round_rate(struct clk_hw *hw, unsigned long rate,
-				     unsigned long *prate)
-{
-	struct meson_clk_cpu *clk_cpu = to_meson_clk_cpu_hw(hw);
-
-	return divider_round_rate(hw, rate, prate, clk_cpu->div_table,
-				  MESON_N_WIDTH, CLK_DIVIDER_ROUND_CLOSEST);
-}
-
-static int meson_clk_cpu_set_rate(struct clk_hw *hw, unsigned long rate,
-				  unsigned long parent_rate)
-{
-	struct meson_clk_cpu *clk_cpu = to_meson_clk_cpu_hw(hw);
-	unsigned int div, sel, N = 0;
-	u32 reg;
-
-	div = DIV_ROUND_UP(parent_rate, rate);
-
-	if (div <= 3) {
-		sel = div - 1;
-	} else {
-		sel = 3;
-		N = div / 2;
-	}
-
-	reg = readl(clk_cpu->base + clk_cpu->reg_off + MESON_CPU_CLK_CNTL1);
-	reg = PARM_SET(MESON_N_WIDTH, MESON_N_SHIFT, reg, N);
-	writel(reg, clk_cpu->base + clk_cpu->reg_off + MESON_CPU_CLK_CNTL1);
-
-	reg = readl(clk_cpu->base + clk_cpu->reg_off + MESON_CPU_CLK_CNTL);
-	reg = PARM_SET(MESON_SEL_WIDTH, MESON_SEL_SHIFT, reg, sel);
-	writel(reg, clk_cpu->base + clk_cpu->reg_off + MESON_CPU_CLK_CNTL);
-
-	return 0;
-}
-
-static unsigned long meson_clk_cpu_recalc_rate(struct clk_hw *hw,
-					       unsigned long parent_rate)
-{
-	struct meson_clk_cpu *clk_cpu = to_meson_clk_cpu_hw(hw);
-	unsigned int N, sel;
-	unsigned int div = 1;
-	u32 reg;
-
-	reg = readl(clk_cpu->base + clk_cpu->reg_off + MESON_CPU_CLK_CNTL1);
-	N = PARM_GET(MESON_N_WIDTH, MESON_N_SHIFT, reg);
-
-	reg = readl(clk_cpu->base + clk_cpu->reg_off + MESON_CPU_CLK_CNTL);
-	sel = PARM_GET(MESON_SEL_WIDTH, MESON_SEL_SHIFT, reg);
-
-	if (sel < 3)
-		div = sel + 1;
-	else
-		div = 2 * N;
-
-	return parent_rate / div;
-}
-
-/* FIXME MUX1 & MUX2 should be struct clk_hw objects */
-static int meson_clk_cpu_pre_rate_change(struct meson_clk_cpu *clk_cpu,
-					 struct clk_notifier_data *ndata)
-{
-	u32 cpu_clk_cntl;
-
-	/* switch MUX1 to xtal */
-	cpu_clk_cntl = readl(clk_cpu->base + clk_cpu->reg_off
-				+ MESON_CPU_CLK_CNTL);
-	cpu_clk_cntl &= ~MESON_CPU_CLK_MUX1;
-	writel(cpu_clk_cntl, clk_cpu->base + clk_cpu->reg_off
-				+ MESON_CPU_CLK_CNTL);
-	udelay(100);
-
-	/* switch MUX2 to sys-pll */
-	cpu_clk_cntl |= MESON_CPU_CLK_MUX2;
-	writel(cpu_clk_cntl, clk_cpu->base + clk_cpu->reg_off
-				+ MESON_CPU_CLK_CNTL);
-
-	return 0;
-}
-
-/* FIXME MUX1 & MUX2 should be struct clk_hw objects */
-static int meson_clk_cpu_post_rate_change(struct meson_clk_cpu *clk_cpu,
-					  struct clk_notifier_data *ndata)
-{
-	u32 cpu_clk_cntl;
-
-	/* switch MUX1 to divisors' output */
-	cpu_clk_cntl = readl(clk_cpu->base + clk_cpu->reg_off
-				+ MESON_CPU_CLK_CNTL);
-	cpu_clk_cntl |= MESON_CPU_CLK_MUX1;
-	writel(cpu_clk_cntl, clk_cpu->base + clk_cpu->reg_off
-				+ MESON_CPU_CLK_CNTL);
-	udelay(100);
-
-	return 0;
-}
-
-/*
- * This clock notifier is called when the frequency of the of the parent
- * PLL clock is to be changed. We use the xtal input as temporary parent
- * while the PLL frequency is stabilized.
- */
-int meson_clk_cpu_notifier_cb(struct notifier_block *nb,
-				     unsigned long event, void *data)
-{
-	struct clk_notifier_data *ndata = data;
-	struct meson_clk_cpu *clk_cpu = to_meson_clk_cpu_nb(nb);
-	int ret = 0;
-
-	if (event == PRE_RATE_CHANGE)
-		ret = meson_clk_cpu_pre_rate_change(clk_cpu, ndata);
-	else if (event == POST_RATE_CHANGE)
-		ret = meson_clk_cpu_post_rate_change(clk_cpu, ndata);
-
-	return notifier_from_errno(ret);
-}
-
-const struct clk_ops meson_clk_cpu_ops = {
-	.recalc_rate	= meson_clk_cpu_recalc_rate,
-	.round_rate	= meson_clk_cpu_round_rate,
-	.set_rate	= meson_clk_cpu_set_rate,
-};
diff --git a/drivers/clk/meson/clk-mpll.c b/drivers/clk/meson/clk-mpll.c
index 5144360e2c80..0df1227b65b3 100644
--- a/drivers/clk/meson/clk-mpll.c
+++ b/drivers/clk/meson/clk-mpll.c
@@ -68,11 +68,15 @@
 #define N2_MIN	4
 #define N2_MAX	511
 
-#define to_meson_clk_mpll(_hw) container_of(_hw, struct meson_clk_mpll, hw)
+static inline struct meson_clk_mpll_data *
+meson_clk_mpll_data(struct clk_regmap *clk)
+{
+	return (struct meson_clk_mpll_data *)clk->data;
+}
 
 static long rate_from_params(unsigned long parent_rate,
-				      unsigned long sdm,
-				      unsigned long n2)
+			     unsigned int sdm,
+			     unsigned int n2)
 {
 	unsigned long divisor = (SDM_DEN * n2) + sdm;
 
@@ -84,8 +88,8 @@ static long rate_from_params(unsigned long parent_rate,
 
 static void params_from_rate(unsigned long requested_rate,
 			     unsigned long parent_rate,
-			     unsigned long *sdm,
-			     unsigned long *n2)
+			     unsigned int *sdm,
+			     unsigned int *n2)
 {
 	uint64_t div = parent_rate;
 	unsigned long rem = do_div(div, requested_rate);
@@ -105,31 +109,23 @@ static void params_from_rate(unsigned long requested_rate,
 static unsigned long mpll_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
-	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
-	struct parm *p;
-	unsigned long reg, sdm, n2;
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_mpll_data *mpll = meson_clk_mpll_data(clk);
+	unsigned int sdm, n2;
 	long rate;
 
-	p = &mpll->sdm;
-	reg = readl(mpll->base + p->reg_off);
-	sdm = PARM_GET(p->width, p->shift, reg);
-
-	p = &mpll->n2;
-	reg = readl(mpll->base + p->reg_off);
-	n2 = PARM_GET(p->width, p->shift, reg);
+	sdm = meson_parm_read(clk->map, &mpll->sdm);
+	n2 = meson_parm_read(clk->map, &mpll->n2);
 
 	rate = rate_from_params(parent_rate, sdm, n2);
-	if (rate < 0)
-		return 0;
-
-	return rate;
+	return rate < 0 ? 0 : rate;
 }
 
 static long mpll_round_rate(struct clk_hw *hw,
 			    unsigned long rate,
 			    unsigned long *parent_rate)
 {
-	unsigned long sdm, n2;
+	unsigned int sdm, n2;
 
 	params_from_rate(rate, *parent_rate, &sdm, &n2);
 	return rate_from_params(*parent_rate, sdm, n2);
@@ -139,9 +135,9 @@ static int mpll_set_rate(struct clk_hw *hw,
 			 unsigned long rate,
 			 unsigned long parent_rate)
 {
-	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
-	struct parm *p;
-	unsigned long reg, sdm, n2;
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_mpll_data *mpll = meson_clk_mpll_data(clk);
+	unsigned int sdm, n2;
 	unsigned long flags = 0;
 
 	params_from_rate(rate, parent_rate, &sdm, &n2);
@@ -151,97 +147,36 @@ static int mpll_set_rate(struct clk_hw *hw,
 	else
 		__acquire(mpll->lock);
 
-	p = &mpll->sdm;
-	reg = readl(mpll->base + p->reg_off);
-	reg = PARM_SET(p->width, p->shift, reg, sdm);
-	writel(reg, mpll->base + p->reg_off);
-
-	p = &mpll->sdm_en;
-	reg = readl(mpll->base + p->reg_off);
-	reg = PARM_SET(p->width, p->shift, reg, 1);
-	writel(reg, mpll->base + p->reg_off);
-
-	p = &mpll->ssen;
-	if (p->width != 0) {
-		reg = readl(mpll->base + p->reg_off);
-		reg = PARM_SET(p->width, p->shift, reg, 1);
-		writel(reg, mpll->base + p->reg_off);
-	}
-
-	p = &mpll->n2;
-	reg = readl(mpll->base + p->reg_off);
-	reg = PARM_SET(p->width, p->shift, reg, n2);
-	writel(reg, mpll->base + p->reg_off);
-
-	if (mpll->lock)
-		spin_unlock_irqrestore(mpll->lock, flags);
-	else
-		__release(mpll->lock);
-
-	return 0;
-}
+	/* Enable and set the fractional part */
+	meson_parm_write(clk->map, &mpll->sdm, sdm);
+	meson_parm_write(clk->map, &mpll->sdm_en, 1);
 
-static void mpll_enable_core(struct clk_hw *hw, int enable)
-{
-	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
-	struct parm *p;
-	unsigned long reg;
-	unsigned long flags = 0;
+	/* Set additional fractional part enable if required */
+	if (MESON_PARM_APPLICABLE(&mpll->ssen))
+		meson_parm_write(clk->map, &mpll->ssen, 1);
 
-	if (mpll->lock)
-		spin_lock_irqsave(mpll->lock, flags);
-	else
-		__acquire(mpll->lock);
+	/* Set the integer divider part */
+	meson_parm_write(clk->map, &mpll->n2, n2);
 
-	p = &mpll->en;
-	reg = readl(mpll->base + p->reg_off);
-	reg = PARM_SET(p->width, p->shift, reg, enable ? 1 : 0);
-	writel(reg, mpll->base + p->reg_off);
+	/* Set the magic misc bit if required */
+	if (MESON_PARM_APPLICABLE(&mpll->misc))
+		meson_parm_write(clk->map, &mpll->misc, 1);
 
 	if (mpll->lock)
 		spin_unlock_irqrestore(mpll->lock, flags);
 	else
 		__release(mpll->lock);
-}
-
-
-static int mpll_enable(struct clk_hw *hw)
-{
-	mpll_enable_core(hw, 1);
 
 	return 0;
 }
 
-static void mpll_disable(struct clk_hw *hw)
-{
-	mpll_enable_core(hw, 0);
-}
-
-static int mpll_is_enabled(struct clk_hw *hw)
-{
-	struct meson_clk_mpll *mpll = to_meson_clk_mpll(hw);
-	struct parm *p;
-	unsigned long reg;
-	int en;
-
-	p = &mpll->en;
-	reg = readl(mpll->base + p->reg_off);
-	en = PARM_GET(p->width, p->shift, reg);
-
-	return en;
-}
-
 const struct clk_ops meson_clk_mpll_ro_ops = {
 	.recalc_rate	= mpll_recalc_rate,
 	.round_rate	= mpll_round_rate,
-	.is_enabled	= mpll_is_enabled,
 };
 
 const struct clk_ops meson_clk_mpll_ops = {
 	.recalc_rate	= mpll_recalc_rate,
 	.round_rate	= mpll_round_rate,
 	.set_rate	= mpll_set_rate,
-	.enable		= mpll_enable,
-	.disable	= mpll_disable,
-	.is_enabled	= mpll_is_enabled,
 };
diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c
index 01341553f50b..65a7bd903551 100644
--- a/drivers/clk/meson/clk-pll.c
+++ b/drivers/clk/meson/clk-pll.c
@@ -2,6 +2,9 @@
  * Copyright (c) 2015 Endless Mobile, Inc.
  * Author: Carlo Caione <carlo@endlessm.com>
  *
+ * Copyright (c) 2018 Baylibre, SAS.
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
  * version 2, as published by the Free Software Foundation.
@@ -27,13 +30,14 @@
  *                |        |
  *               FREF     VCO
  *
- * out = (in * M / N) >> OD
+ * out = in * (m + frac / frac_max) / (n << sum(ods))
  */
 
 #include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/slab.h>
@@ -41,209 +45,213 @@
 
 #include "clkc.h"
 
-#define MESON_PLL_RESET				BIT(29)
-#define MESON_PLL_LOCK				BIT(31)
-
-#define to_meson_clk_pll(_hw) container_of(_hw, struct meson_clk_pll, hw)
-
-static unsigned long meson_clk_pll_recalc_rate(struct clk_hw *hw,
-						unsigned long parent_rate)
+static inline struct meson_clk_pll_data *
+meson_clk_pll_data(struct clk_regmap *clk)
 {
-	struct meson_clk_pll *pll = to_meson_clk_pll(hw);
-	struct parm *p;
-	unsigned long parent_rate_mhz = parent_rate / 1000000;
-	unsigned long rate_mhz;
-	u16 n, m, frac = 0, od, od2 = 0;
-	u32 reg;
-
-	p = &pll->n;
-	reg = readl(pll->base + p->reg_off);
-	n = PARM_GET(p->width, p->shift, reg);
-
-	p = &pll->m;
-	reg = readl(pll->base + p->reg_off);
-	m = PARM_GET(p->width, p->shift, reg);
-
-	p = &pll->od;
-	reg = readl(pll->base + p->reg_off);
-	od = PARM_GET(p->width, p->shift, reg);
-
-	p = &pll->od2;
-	if (p->width) {
-		reg = readl(pll->base + p->reg_off);
-		od2 = PARM_GET(p->width, p->shift, reg);
-	}
-
-	p = &pll->frac;
-	if (p->width) {
-		reg = readl(pll->base + p->reg_off);
-		frac = PARM_GET(p->width, p->shift, reg);
-		rate_mhz = (parent_rate_mhz * m + \
-				(parent_rate_mhz * frac >> 12)) * 2 / n;
-		rate_mhz = rate_mhz >> od >> od2;
-	} else
-		rate_mhz = (parent_rate_mhz * m / n) >> od >> od2;
-
-	return rate_mhz * 1000000;
+	return (struct meson_clk_pll_data *)clk->data;
 }
 
-static long meson_clk_pll_round_rate(struct clk_hw *hw, unsigned long rate,
-				     unsigned long *parent_rate)
+static unsigned long __pll_params_to_rate(unsigned long parent_rate,
+					  const struct pll_rate_table *pllt,
+					  u16 frac,
+					  struct meson_clk_pll_data *pll)
 {
-	struct meson_clk_pll *pll = to_meson_clk_pll(hw);
-	const struct pll_rate_table *rate_table = pll->rate_table;
-	int i;
+	u64 rate = (u64)parent_rate * pllt->m;
+	unsigned int od = pllt->od + pllt->od2 + pllt->od3;
 
-	for (i = 0; i < pll->rate_count; i++) {
-		if (rate <= rate_table[i].rate)
-			return rate_table[i].rate;
+	if (frac && MESON_PARM_APPLICABLE(&pll->frac)) {
+		u64 frac_rate = (u64)parent_rate * frac;
+
+		rate += DIV_ROUND_UP_ULL(frac_rate,
+					 (1 << pll->frac.width));
 	}
 
-	/* else return the smallest value */
-	return rate_table[0].rate;
+	return DIV_ROUND_UP_ULL(rate, pllt->n << od);
 }
 
-static const struct pll_rate_table *meson_clk_get_pll_settings(struct meson_clk_pll *pll,
-							       unsigned long rate)
+static unsigned long meson_clk_pll_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
 {
-	const struct pll_rate_table *rate_table = pll->rate_table;
-	int i;
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_pll_data *pll = meson_clk_pll_data(clk);
+	struct pll_rate_table pllt;
+	u16 frac;
 
-	for (i = 0; i < pll->rate_count; i++) {
-		if (rate == rate_table[i].rate)
-			return &rate_table[i];
-	}
-	return NULL;
+	pllt.n = meson_parm_read(clk->map, &pll->n);
+	pllt.m = meson_parm_read(clk->map, &pll->m);
+	pllt.od = meson_parm_read(clk->map, &pll->od);
+
+	pllt.od2 = MESON_PARM_APPLICABLE(&pll->od2) ?
+		meson_parm_read(clk->map, &pll->od2) :
+		0;
+
+	pllt.od3 = MESON_PARM_APPLICABLE(&pll->od3) ?
+		meson_parm_read(clk->map, &pll->od3) :
+		0;
+
+	frac = MESON_PARM_APPLICABLE(&pll->frac) ?
+		meson_parm_read(clk->map, &pll->frac) :
+		0;
+
+	return __pll_params_to_rate(parent_rate, &pllt, frac, pll);
 }
 
-/* Specific wait loop for GXL/GXM GP0 PLL */
-static int meson_clk_pll_wait_lock_reset(struct meson_clk_pll *pll,
-					 struct parm *p_n)
+static u16 __pll_params_with_frac(unsigned long rate,
+				  unsigned long parent_rate,
+				  const struct pll_rate_table *pllt,
+				  struct meson_clk_pll_data *pll)
 {
-	int delay = 100;
-	u32 reg;
+	u16 frac_max = (1 << pll->frac.width);
+	u64 val = (u64)rate * pllt->n;
 
-	while (delay > 0) {
-		reg = readl(pll->base + p_n->reg_off);
-		writel(reg | MESON_PLL_RESET, pll->base + p_n->reg_off);
-		udelay(10);
-		writel(reg & ~MESON_PLL_RESET, pll->base + p_n->reg_off);
+	val <<= pllt->od + pllt->od2 + pllt->od3;
 
-		/* This delay comes from AMLogic tree clk-gp0-gxl driver */
-		mdelay(1);
+	if (pll->flags & CLK_MESON_PLL_ROUND_CLOSEST)
+		val = DIV_ROUND_CLOSEST_ULL(val * frac_max, parent_rate);
+	else
+		val = div_u64(val * frac_max, parent_rate);
 
-		reg = readl(pll->base + p_n->reg_off);
-		if (reg & MESON_PLL_LOCK)
-			return 0;
-		delay--;
+	val -= pllt->m * frac_max;
+
+	return min((u16)val, (u16)(frac_max - 1));
+}
+
+static const struct pll_rate_table *
+meson_clk_get_pll_settings(unsigned long rate,
+			   struct meson_clk_pll_data *pll)
+{
+	const struct pll_rate_table *table = pll->table;
+	unsigned int i = 0;
+
+	if (!table)
+		return NULL;
+
+	/* Find the first table element exceeding rate */
+	while (table[i].rate && table[i].rate <= rate)
+		i++;
+
+	if (i != 0) {
+		if (MESON_PARM_APPLICABLE(&pll->frac) ||
+		    !(pll->flags & CLK_MESON_PLL_ROUND_CLOSEST) ||
+		    (abs(rate - table[i - 1].rate) <
+		     abs(rate - table[i].rate)))
+			i--;
 	}
-	return -ETIMEDOUT;
+
+	return (struct pll_rate_table *)&table[i];
 }
 
-static int meson_clk_pll_wait_lock(struct meson_clk_pll *pll,
-				   struct parm *p_n)
+static long meson_clk_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long *parent_rate)
 {
-	int delay = 24000000;
-	u32 reg;
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_pll_data *pll = meson_clk_pll_data(clk);
+	const struct pll_rate_table *pllt =
+		meson_clk_get_pll_settings(rate, pll);
+	u16 frac;
+
+	if (!pllt)
+		return meson_clk_pll_recalc_rate(hw, *parent_rate);
+
+	if (!MESON_PARM_APPLICABLE(&pll->frac)
+	    || rate == pllt->rate)
+		return pllt->rate;
+
+	/*
+	 * The rate provided by the setting is not an exact match, let's
+	 * try to improve the result using the fractional parameter
+	 */
+	frac = __pll_params_with_frac(rate, *parent_rate, pllt, pll);
+
+	return __pll_params_to_rate(*parent_rate, pllt, frac, pll);
+}
 
-	while (delay > 0) {
-		reg = readl(pll->base + p_n->reg_off);
+static int meson_clk_pll_wait_lock(struct clk_hw *hw)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_pll_data *pll = meson_clk_pll_data(clk);
+	int delay = 24000000;
 
-		if (reg & MESON_PLL_LOCK)
+	do {
+		/* Is the clock locked now ? */
+		if (meson_parm_read(clk->map, &pll->l))
 			return 0;
+
 		delay--;
-	}
+	} while (delay > 0);
+
 	return -ETIMEDOUT;
 }
 
-static void meson_clk_pll_init_params(struct meson_clk_pll *pll)
+static void meson_clk_pll_init(struct clk_hw *hw)
 {
-	int i;
-
-	for (i = 0 ; i < pll->params.params_count ; ++i)
-		writel(pll->params.params_table[i].value,
-		       pll->base + pll->params.params_table[i].reg_off);
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_pll_data *pll = meson_clk_pll_data(clk);
+
+	if (pll->init_count) {
+		meson_parm_write(clk->map, &pll->rst, 1);
+		regmap_multi_reg_write(clk->map, pll->init_regs,
+				       pll->init_count);
+		meson_parm_write(clk->map, &pll->rst, 0);
+	}
 }
 
 static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long parent_rate)
 {
-	struct meson_clk_pll *pll = to_meson_clk_pll(hw);
-	struct parm *p;
-	const struct pll_rate_table *rate_set;
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct meson_clk_pll_data *pll = meson_clk_pll_data(clk);
+	const struct pll_rate_table *pllt;
 	unsigned long old_rate;
-	int ret = 0;
-	u32 reg;
+	u16 frac = 0;
 
 	if (parent_rate == 0 || rate == 0)
 		return -EINVAL;
 
 	old_rate = rate;
 
-	rate_set = meson_clk_get_pll_settings(pll, rate);
-	if (!rate_set)
+	pllt = meson_clk_get_pll_settings(rate, pll);
+	if (!pllt)
 		return -EINVAL;
 
-	/* Initialize the PLL in a clean state if specified */
-	if (pll->params.params_count)
-		meson_clk_pll_init_params(pll);
-
-	/* PLL reset */
-	p = &pll->n;
-	reg = readl(pll->base + p->reg_off);
-	/* If no_init_reset is provided, avoid resetting at this point */
-	if (!pll->params.no_init_reset)
-		writel(reg | MESON_PLL_RESET, pll->base + p->reg_off);
-
-	reg = PARM_SET(p->width, p->shift, reg, rate_set->n);
-	writel(reg, pll->base + p->reg_off);
-
-	p = &pll->m;
-	reg = readl(pll->base + p->reg_off);
-	reg = PARM_SET(p->width, p->shift, reg, rate_set->m);
-	writel(reg, pll->base + p->reg_off);
-
-	p = &pll->od;
-	reg = readl(pll->base + p->reg_off);
-	reg = PARM_SET(p->width, p->shift, reg, rate_set->od);
-	writel(reg, pll->base + p->reg_off);
-
-	p = &pll->od2;
-	if (p->width) {
-		reg = readl(pll->base + p->reg_off);
-		reg = PARM_SET(p->width, p->shift, reg, rate_set->od2);
-		writel(reg, pll->base + p->reg_off);
-	}
+	/* Put the pll in reset to write the params */
+	meson_parm_write(clk->map, &pll->rst, 1);
 
-	p = &pll->frac;
-	if (p->width) {
-		reg = readl(pll->base + p->reg_off);
-		reg = PARM_SET(p->width, p->shift, reg, rate_set->frac);
-		writel(reg, pll->base + p->reg_off);
-	}
+	meson_parm_write(clk->map, &pll->n, pllt->n);
+	meson_parm_write(clk->map, &pll->m, pllt->m);
+	meson_parm_write(clk->map, &pll->od, pllt->od);
+
+	if (MESON_PARM_APPLICABLE(&pll->od2))
+		meson_parm_write(clk->map, &pll->od2, pllt->od2);
+
+	if (MESON_PARM_APPLICABLE(&pll->od3))
+		meson_parm_write(clk->map, &pll->od3, pllt->od3);
 
-	p = &pll->n;
-	/* If clear_reset_for_lock is provided, remove the reset bit here */
-	if (pll->params.clear_reset_for_lock) {
-		reg = readl(pll->base + p->reg_off);
-		writel(reg & ~MESON_PLL_RESET, pll->base + p->reg_off);
+	if (MESON_PARM_APPLICABLE(&pll->frac)) {
+		frac = __pll_params_with_frac(rate, parent_rate, pllt, pll);
+		meson_parm_write(clk->map, &pll->frac, frac);
 	}
 
-	/* If reset_lock_loop, use a special loop including resetting */
-	if (pll->params.reset_lock_loop)
-		ret = meson_clk_pll_wait_lock_reset(pll, p);
-	else
-		ret = meson_clk_pll_wait_lock(pll, p);
-	if (ret) {
+	/* make sure the reset is cleared at this point */
+	meson_parm_write(clk->map, &pll->rst, 0);
+
+	if (meson_clk_pll_wait_lock(hw)) {
 		pr_warn("%s: pll did not lock, trying to restore old rate %lu\n",
 			__func__, old_rate);
+		/*
+		 * FIXME: Do we really need/want this HACK ?
+		 * It looks unsafe. what happens if the clock gets into a
+		 * broken state and we can't lock back on the old_rate ? Looks
+		 * like an infinite recursion is possible
+		 */
 		meson_clk_pll_set_rate(hw, old_rate, parent_rate);
 	}
 
-	return ret;
+	return 0;
 }
 
 const struct clk_ops meson_clk_pll_ops = {
+	.init		= meson_clk_pll_init,
 	.recalc_rate	= meson_clk_pll_recalc_rate,
 	.round_rate	= meson_clk_pll_round_rate,
 	.set_rate	= meson_clk_pll_set_rate,
diff --git a/drivers/clk/meson/clk-regmap.c b/drivers/clk/meson/clk-regmap.c
new file mode 100644
index 000000000000..3645fdb62343
--- /dev/null
+++ b/drivers/clk/meson/clk-regmap.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 BayLibre, SAS.
+// Author: Jerome Brunet <jbrunet@baylibre.com>
+
+#include "clk-regmap.h"
+
+static int clk_regmap_gate_endisable(struct clk_hw *hw, int enable)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct clk_regmap_gate_data *gate = clk_get_regmap_gate_data(clk);
+	int set = gate->flags & CLK_GATE_SET_TO_DISABLE ? 1 : 0;
+
+	set ^= enable;
+
+	return regmap_update_bits(clk->map, gate->offset, BIT(gate->bit_idx),
+				  set ? BIT(gate->bit_idx) : 0);
+}
+
+static int clk_regmap_gate_enable(struct clk_hw *hw)
+{
+	return clk_regmap_gate_endisable(hw, 1);
+}
+
+static void clk_regmap_gate_disable(struct clk_hw *hw)
+{
+	clk_regmap_gate_endisable(hw, 0);
+}
+
+static int clk_regmap_gate_is_enabled(struct clk_hw *hw)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct clk_regmap_gate_data *gate = clk_get_regmap_gate_data(clk);
+	unsigned int val;
+
+	regmap_read(clk->map, gate->offset, &val);
+	if (gate->flags & CLK_GATE_SET_TO_DISABLE)
+		val ^= BIT(gate->bit_idx);
+
+	val &= BIT(gate->bit_idx);
+
+	return val ? 1 : 0;
+}
+
+const struct clk_ops clk_regmap_gate_ops = {
+	.enable = clk_regmap_gate_enable,
+	.disable = clk_regmap_gate_disable,
+	.is_enabled = clk_regmap_gate_is_enabled,
+};
+EXPORT_SYMBOL_GPL(clk_regmap_gate_ops);
+
+static unsigned long clk_regmap_div_recalc_rate(struct clk_hw *hw,
+						unsigned long prate)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct clk_regmap_div_data *div = clk_get_regmap_div_data(clk);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(clk->map, div->offset, &val);
+	if (ret)
+		/* Gives a hint that something is wrong */
+		return 0;
+
+	val >>= div->shift;
+	val &= clk_div_mask(div->width);
+	return divider_recalc_rate(hw, prate, val, div->table, div->flags,
+				   div->width);
+}
+
+static long clk_regmap_div_round_rate(struct clk_hw *hw, unsigned long rate,
+				      unsigned long *prate)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct clk_regmap_div_data *div = clk_get_regmap_div_data(clk);
+	unsigned int val;
+	int ret;
+
+	/* if read only, just return current value */
+	if (div->flags & CLK_DIVIDER_READ_ONLY) {
+		ret = regmap_read(clk->map, div->offset, &val);
+		if (ret)
+			/* Gives a hint that something is wrong */
+			return 0;
+
+		val >>= div->shift;
+		val &= clk_div_mask(div->width);
+
+		return divider_ro_round_rate(hw, rate, prate, div->table,
+					     div->width, div->flags, val);
+	}
+
+	return divider_round_rate(hw, rate, prate, div->table, div->width,
+				  div->flags);
+}
+
+static int clk_regmap_div_set_rate(struct clk_hw *hw, unsigned long rate,
+				   unsigned long parent_rate)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct clk_regmap_div_data *div = clk_get_regmap_div_data(clk);
+	unsigned int val;
+	int ret;
+
+	ret = divider_get_val(rate, parent_rate, div->table, div->width,
+			      div->flags);
+	if (ret < 0)
+		return ret;
+
+	val = (unsigned int)ret << div->shift;
+	return regmap_update_bits(clk->map, div->offset,
+				  clk_div_mask(div->width) << div->shift, val);
+};
+
+/* Would prefer clk_regmap_div_ro_ops but clashes with qcom */
+
+const struct clk_ops clk_regmap_divider_ops = {
+	.recalc_rate = clk_regmap_div_recalc_rate,
+	.round_rate = clk_regmap_div_round_rate,
+	.set_rate = clk_regmap_div_set_rate,
+};
+EXPORT_SYMBOL_GPL(clk_regmap_divider_ops);
+
+const struct clk_ops clk_regmap_divider_ro_ops = {
+	.recalc_rate = clk_regmap_div_recalc_rate,
+	.round_rate = clk_regmap_div_round_rate,
+};
+EXPORT_SYMBOL_GPL(clk_regmap_divider_ro_ops);
+
+static u8 clk_regmap_mux_get_parent(struct clk_hw *hw)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct clk_regmap_mux_data *mux = clk_get_regmap_mux_data(clk);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(clk->map, mux->offset, &val);
+	if (ret)
+		return ret;
+
+	val >>= mux->shift;
+	val &= mux->mask;
+	return clk_mux_val_to_index(hw, mux->table, mux->flags, val);
+}
+
+static int clk_regmap_mux_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct clk_regmap *clk = to_clk_regmap(hw);
+	struct clk_regmap_mux_data *mux = clk_get_regmap_mux_data(clk);
+	unsigned int val = clk_mux_index_to_val(mux->table, mux->flags, index);
+
+	return regmap_update_bits(clk->map, mux->offset,
+				  mux->mask << mux->shift,
+				  val << mux->shift);
+}
+
+const struct clk_ops clk_regmap_mux_ops = {
+	.get_parent = clk_regmap_mux_get_parent,
+	.set_parent = clk_regmap_mux_set_parent,
+	.determine_rate = __clk_mux_determine_rate,
+};
+EXPORT_SYMBOL_GPL(clk_regmap_mux_ops);
+
+const struct clk_ops clk_regmap_mux_ro_ops = {
+	.get_parent = clk_regmap_mux_get_parent,
+};
+EXPORT_SYMBOL_GPL(clk_regmap_mux_ro_ops);
diff --git a/drivers/clk/meson/clk-regmap.h b/drivers/clk/meson/clk-regmap.h
new file mode 100644
index 000000000000..627c888026d7
--- /dev/null
+++ b/drivers/clk/meson/clk-regmap.h
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 BayLibre, SAS.
+// Author: Jerome Brunet <jbrunet@baylibre.com>
+
+#ifndef __CLK_REGMAP_H
+#define __CLK_REGMAP_H
+
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+
+/**
+ * struct clk_regmap - regmap backed clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @map:	pointer to the regmap structure controlling the clock
+ * @data:	data specific to the clock type
+ *
+ * Clock which is controlled by regmap backed registers. The actual type of
+ * of the clock is controlled by the clock_ops and data.
+ */
+struct clk_regmap {
+	struct clk_hw	hw;
+	struct regmap	*map;
+	void		*data;
+};
+
+#define to_clk_regmap(_hw) container_of(_hw, struct clk_regmap, hw)
+
+/**
+ * struct clk_regmap_gate_data - regmap backed gate specific data
+ *
+ * @offset:	offset of the register controlling gate
+ * @bit_idx:	single bit controlling gate
+ * @flags:	hardware-specific flags
+ *
+ * Flags:
+ * Same as clk_gate except CLK_GATE_HIWORD_MASK which is ignored
+ */
+struct clk_regmap_gate_data {
+	unsigned int	offset;
+	u8		bit_idx;
+	u8		flags;
+};
+
+static inline struct clk_regmap_gate_data *
+clk_get_regmap_gate_data(struct clk_regmap *clk)
+{
+	return (struct clk_regmap_gate_data *)clk->data;
+}
+
+extern const struct clk_ops clk_regmap_gate_ops;
+
+/**
+ * struct clk_regmap_div_data - regmap backed adjustable divider specific data
+ *
+ * @offset:	offset of the register controlling the divider
+ * @shift:	shift to the divider bit field
+ * @width:	width of the divider bit field
+ * @table:	array of value/divider pairs, last entry should have div = 0
+ *
+ * Flags:
+ * Same as clk_divider except CLK_DIVIDER_HIWORD_MASK which is ignored
+ */
+struct clk_regmap_div_data {
+	unsigned int	offset;
+	u8		shift;
+	u8		width;
+	u8		flags;
+	const struct clk_div_table	*table;
+};
+
+static inline struct clk_regmap_div_data *
+clk_get_regmap_div_data(struct clk_regmap *clk)
+{
+	return (struct clk_regmap_div_data *)clk->data;
+}
+
+extern const struct clk_ops clk_regmap_divider_ops;
+extern const struct clk_ops clk_regmap_divider_ro_ops;
+
+/**
+ * struct clk_regmap_mux_data - regmap backed multiplexer clock specific data
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @offset:	offset of theregister controlling multiplexer
+ * @table:	array of parent indexed register values
+ * @shift:	shift to multiplexer bit field
+ * @mask:	mask of mutliplexer bit field
+ * @flags:	hardware-specific flags
+ *
+ * Flags:
+ * Same as clk_divider except CLK_MUX_HIWORD_MASK which is ignored
+ */
+struct clk_regmap_mux_data {
+	unsigned int	offset;
+	u32		*table;
+	u32		mask;
+	u8		shift;
+	u8		flags;
+};
+
+static inline struct clk_regmap_mux_data *
+clk_get_regmap_mux_data(struct clk_regmap *clk)
+{
+	return (struct clk_regmap_mux_data *)clk->data;
+}
+
+extern const struct clk_ops clk_regmap_mux_ops;
+extern const struct clk_ops clk_regmap_mux_ro_ops;
+
+#endif /* __CLK_REGMAP_H */
diff --git a/drivers/clk/meson/clkc.h b/drivers/clk/meson/clkc.h
index c2ff0520ce53..8fe73c4edca8 100644
--- a/drivers/clk/meson/clkc.h
+++ b/drivers/clk/meson/clkc.h
@@ -18,6 +18,9 @@
 #ifndef __CLKC_H
 #define __CLKC_H
 
+#include <linux/clk-provider.h>
+#include "clk-regmap.h"
+
 #define PMASK(width)			GENMASK(width - 1, 0)
 #define SETPMASK(width, shift)		GENMASK(shift + width - 1, shift)
 #define CLRPMASK(width, shift)		(~SETPMASK(width, shift))
@@ -35,13 +38,29 @@ struct parm {
 	u8	width;
 };
 
+static inline unsigned int meson_parm_read(struct regmap *map, struct parm *p)
+{
+	unsigned int val;
+
+	regmap_read(map, p->reg_off, &val);
+	return PARM_GET(p->width, p->shift, val);
+}
+
+static inline void meson_parm_write(struct regmap *map, struct parm *p,
+				    unsigned int val)
+{
+	regmap_update_bits(map, p->reg_off, SETPMASK(p->width, p->shift),
+			   val << p->shift);
+}
+
+
 struct pll_rate_table {
 	unsigned long	rate;
 	u16		m;
 	u16		n;
 	u16		od;
 	u16		od2;
-	u16		frac;
+	u16		od3;
 };
 
 #define PLL_RATE(_r, _m, _n, _od)					\
@@ -50,97 +69,53 @@ struct pll_rate_table {
 		.m		= (_m),					\
 		.n		= (_n),					\
 		.od		= (_od),				\
-	}								\
-
-#define PLL_FRAC_RATE(_r, _m, _n, _od, _od2, _frac)			\
-	{								\
-		.rate		= (_r),					\
-		.m		= (_m),					\
-		.n		= (_n),					\
-		.od		= (_od),				\
-		.od2		= (_od2),				\
-		.frac		= (_frac),				\
-	}								\
-
-struct pll_params_table {
-	unsigned int reg_off;
-	unsigned int value;
-};
-
-#define PLL_PARAM(_reg, _val)						\
-	{								\
-		.reg_off	= (_reg),				\
-		.value		= (_val),				\
 	}
 
-struct pll_setup_params {
-	struct pll_params_table *params_table;
-	unsigned int params_count;
-	/* Workaround for GP0, do not reset before configuring */
-	bool no_init_reset;
-	/* Workaround for GP0, unreset right before checking for lock */
-	bool clear_reset_for_lock;
-	/* Workaround for GXL GP0, reset in the lock checking loop */
-	bool reset_lock_loop;
-};
+#define CLK_MESON_PLL_ROUND_CLOSEST	BIT(0)
 
-struct meson_clk_pll {
-	struct clk_hw hw;
-	void __iomem *base;
+struct meson_clk_pll_data {
 	struct parm m;
 	struct parm n;
 	struct parm frac;
 	struct parm od;
 	struct parm od2;
-	const struct pll_setup_params params;
-	const struct pll_rate_table *rate_table;
-	unsigned int rate_count;
-	spinlock_t *lock;
+	struct parm od3;
+	struct parm l;
+	struct parm rst;
+	const struct reg_sequence *init_regs;
+	unsigned int init_count;
+	const struct pll_rate_table *table;
+	u8 flags;
 };
 
 #define to_meson_clk_pll(_hw) container_of(_hw, struct meson_clk_pll, hw)
 
-struct meson_clk_cpu {
-	struct clk_hw hw;
-	void __iomem *base;
-	u16 reg_off;
-	struct notifier_block clk_nb;
-	const struct clk_div_table *div_table;
-};
-
-int meson_clk_cpu_notifier_cb(struct notifier_block *nb, unsigned long event,
-		void *data);
-
-struct meson_clk_mpll {
-	struct clk_hw hw;
-	void __iomem *base;
+struct meson_clk_mpll_data {
 	struct parm sdm;
 	struct parm sdm_en;
 	struct parm n2;
-	struct parm en;
 	struct parm ssen;
+	struct parm misc;
 	spinlock_t *lock;
 };
 
-struct meson_clk_audio_divider {
-	struct clk_hw hw;
-	void __iomem *base;
+struct meson_clk_audio_div_data {
 	struct parm div;
 	u8 flags;
-	spinlock_t *lock;
 };
 
 #define MESON_GATE(_name, _reg, _bit)					\
-struct clk_gate _name = { 						\
-	.reg = (void __iomem *) _reg, 					\
-	.bit_idx = (_bit), 						\
-	.lock = &meson_clk_lock,					\
-	.hw.init = &(struct clk_init_data) { 				\
-		.name = #_name,					\
-		.ops = &clk_gate_ops,					\
+struct clk_regmap _name = {						\
+	.data = &(struct clk_regmap_gate_data){				\
+		.offset = (_reg),					\
+		.bit_idx = (_bit),					\
+	},								\
+	.hw.init = &(struct clk_init_data) {				\
+		.name = #_name,						\
+		.ops = &clk_regmap_gate_ops,				\
 		.parent_names = (const char *[]){ "clk81" },		\
 		.num_parents = 1,					\
-		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED), 	\
+		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),	\
 	},								\
 };
 
diff --git a/drivers/clk/meson/gxbb-aoclk-regmap.c b/drivers/clk/meson/gxbb-aoclk-regmap.c
deleted file mode 100644
index 2515fbfa0467..000000000000
--- a/drivers/clk/meson/gxbb-aoclk-regmap.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2017 BayLibre, SAS.
- * Author: Neil Armstrong <narmstrong@baylibre.com>
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-#include <linux/clk-provider.h>
-#include <linux/bitfield.h>
-#include <linux/regmap.h>
-#include "gxbb-aoclk.h"
-
-static int aoclk_gate_regmap_enable(struct clk_hw *hw)
-{
-	struct aoclk_gate_regmap *gate = to_aoclk_gate_regmap(hw);
-
-	return regmap_update_bits(gate->regmap, AO_RTI_GEN_CNTL_REG0,
-				  BIT(gate->bit_idx), BIT(gate->bit_idx));
-}
-
-static void aoclk_gate_regmap_disable(struct clk_hw *hw)
-{
-	struct aoclk_gate_regmap *gate = to_aoclk_gate_regmap(hw);
-
-	regmap_update_bits(gate->regmap, AO_RTI_GEN_CNTL_REG0,
-			   BIT(gate->bit_idx), 0);
-}
-
-static int aoclk_gate_regmap_is_enabled(struct clk_hw *hw)
-{
-	struct aoclk_gate_regmap *gate = to_aoclk_gate_regmap(hw);
-	unsigned int val;
-	int ret;
-
-	ret = regmap_read(gate->regmap, AO_RTI_GEN_CNTL_REG0, &val);
-	if (ret)
-		return ret;
-
-	return (val & BIT(gate->bit_idx)) != 0;
-}
-
-const struct clk_ops meson_aoclk_gate_regmap_ops = {
-	.enable = aoclk_gate_regmap_enable,
-	.disable = aoclk_gate_regmap_disable,
-	.is_enabled = aoclk_gate_regmap_is_enabled,
-};
diff --git a/drivers/clk/meson/gxbb-aoclk.c b/drivers/clk/meson/gxbb-aoclk.c
index 6c161e0a8e59..9ec23ae9a219 100644
--- a/drivers/clk/meson/gxbb-aoclk.c
+++ b/drivers/clk/meson/gxbb-aoclk.c
@@ -62,10 +62,9 @@
 #include <linux/delay.h>
 #include <dt-bindings/clock/gxbb-aoclkc.h>
 #include <dt-bindings/reset/gxbb-aoclkc.h>
+#include "clk-regmap.h"
 #include "gxbb-aoclk.h"
 
-static DEFINE_SPINLOCK(gxbb_aoclk_lock);
-
 struct gxbb_aoclk_reset_controller {
 	struct reset_controller_dev reset;
 	unsigned int *data;
@@ -87,12 +86,14 @@ static const struct reset_control_ops gxbb_aoclk_reset_ops = {
 };
 
 #define GXBB_AO_GATE(_name, _bit)					\
-static struct aoclk_gate_regmap _name##_ao = {				\
-	.bit_idx = (_bit),						\
-	.lock = &gxbb_aoclk_lock,					\
+static struct clk_regmap _name##_ao = {					\
+	.data = &(struct clk_regmap_gate_data) {			\
+		.offset = AO_RTI_GEN_CNTL_REG0,				\
+		.bit_idx = (_bit),					\
+	},								\
 	.hw.init = &(struct clk_init_data) {				\
 		.name = #_name "_ao",					\
-		.ops = &meson_aoclk_gate_regmap_ops,			\
+		.ops = &clk_regmap_gate_ops,				\
 		.parent_names = (const char *[]){ "clk81" },		\
 		.num_parents = 1,					\
 		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),	\
@@ -107,7 +108,6 @@ GXBB_AO_GATE(uart2, 5);
 GXBB_AO_GATE(ir_blaster, 6);
 
 static struct aoclk_cec_32k cec_32k_ao = {
-	.lock = &gxbb_aoclk_lock,
 	.hw.init = &(struct clk_init_data) {
 		.name = "cec_32k_ao",
 		.ops = &meson_aoclk_cec_32k_ops,
@@ -126,7 +126,7 @@ static unsigned int gxbb_aoclk_reset[] = {
 	[RESET_AO_IR_BLASTER] = 23,
 };
 
-static struct aoclk_gate_regmap *gxbb_aoclk_gate[] = {
+static struct clk_regmap *gxbb_aoclk_gate[] = {
 	[CLKID_AO_REMOTE] = &remote_ao,
 	[CLKID_AO_I2C_MASTER] = &i2c_master_ao,
 	[CLKID_AO_I2C_SLAVE] = &i2c_slave_ao,
@@ -177,10 +177,10 @@ static int gxbb_aoclkc_probe(struct platform_device *pdev)
 	 * Populate regmap and register all clks
 	 */
 	for (clkid = 0; clkid < ARRAY_SIZE(gxbb_aoclk_gate); clkid++) {
-		gxbb_aoclk_gate[clkid]->regmap = regmap;
+		gxbb_aoclk_gate[clkid]->map = regmap;
 
 		ret = devm_clk_hw_register(dev,
-					gxbb_aoclk_onecell_data.hws[clkid]);
+					   gxbb_aoclk_onecell_data.hws[clkid]);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/clk/meson/gxbb-aoclk.h b/drivers/clk/meson/gxbb-aoclk.h
index e8604c8f7eee..0be78383f257 100644
--- a/drivers/clk/meson/gxbb-aoclk.h
+++ b/drivers/clk/meson/gxbb-aoclk.h
@@ -17,22 +17,11 @@
 #define AO_RTC_ALT_CLK_CNTL0	0x94
 #define AO_RTC_ALT_CLK_CNTL1	0x98
 
-struct aoclk_gate_regmap {
-	struct clk_hw hw;
-	unsigned bit_idx;
-	struct regmap *regmap;
-	spinlock_t *lock;
-};
-
-#define to_aoclk_gate_regmap(_hw) \
-	container_of(_hw, struct aoclk_gate_regmap, hw)
-
 extern const struct clk_ops meson_aoclk_gate_regmap_ops;
 
 struct aoclk_cec_32k {
 	struct clk_hw hw;
 	struct regmap *regmap;
-	spinlock_t *lock;
 };
 
 #define to_aoclk_cec_32k(_hw) container_of(_hw, struct aoclk_cec_32k, hw)
diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index af24455af5b4..b1e4d9557610 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -19,108 +19,19 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/init.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/mfd/syscon.h>
 #include <linux/platform_device.h>
-#include <linux/init.h>
+#include <linux/regmap.h>
 
 #include "clkc.h"
 #include "gxbb.h"
+#include "clk-regmap.h"
 
 static DEFINE_SPINLOCK(meson_clk_lock);
 
-static const struct pll_rate_table sys_pll_rate_table[] = {
-	PLL_RATE(24000000, 56, 1, 2),
-	PLL_RATE(48000000, 64, 1, 2),
-	PLL_RATE(72000000, 72, 1, 2),
-	PLL_RATE(96000000, 64, 1, 2),
-	PLL_RATE(120000000, 80, 1, 2),
-	PLL_RATE(144000000, 96, 1, 2),
-	PLL_RATE(168000000, 56, 1, 1),
-	PLL_RATE(192000000, 64, 1, 1),
-	PLL_RATE(216000000, 72, 1, 1),
-	PLL_RATE(240000000, 80, 1, 1),
-	PLL_RATE(264000000, 88, 1, 1),
-	PLL_RATE(288000000, 96, 1, 1),
-	PLL_RATE(312000000, 52, 1, 2),
-	PLL_RATE(336000000, 56, 1, 2),
-	PLL_RATE(360000000, 60, 1, 2),
-	PLL_RATE(384000000, 64, 1, 2),
-	PLL_RATE(408000000, 68, 1, 2),
-	PLL_RATE(432000000, 72, 1, 2),
-	PLL_RATE(456000000, 76, 1, 2),
-	PLL_RATE(480000000, 80, 1, 2),
-	PLL_RATE(504000000, 84, 1, 2),
-	PLL_RATE(528000000, 88, 1, 2),
-	PLL_RATE(552000000, 92, 1, 2),
-	PLL_RATE(576000000, 96, 1, 2),
-	PLL_RATE(600000000, 50, 1, 1),
-	PLL_RATE(624000000, 52, 1, 1),
-	PLL_RATE(648000000, 54, 1, 1),
-	PLL_RATE(672000000, 56, 1, 1),
-	PLL_RATE(696000000, 58, 1, 1),
-	PLL_RATE(720000000, 60, 1, 1),
-	PLL_RATE(744000000, 62, 1, 1),
-	PLL_RATE(768000000, 64, 1, 1),
-	PLL_RATE(792000000, 66, 1, 1),
-	PLL_RATE(816000000, 68, 1, 1),
-	PLL_RATE(840000000, 70, 1, 1),
-	PLL_RATE(864000000, 72, 1, 1),
-	PLL_RATE(888000000, 74, 1, 1),
-	PLL_RATE(912000000, 76, 1, 1),
-	PLL_RATE(936000000, 78, 1, 1),
-	PLL_RATE(960000000, 80, 1, 1),
-	PLL_RATE(984000000, 82, 1, 1),
-	PLL_RATE(1008000000, 84, 1, 1),
-	PLL_RATE(1032000000, 86, 1, 1),
-	PLL_RATE(1056000000, 88, 1, 1),
-	PLL_RATE(1080000000, 90, 1, 1),
-	PLL_RATE(1104000000, 92, 1, 1),
-	PLL_RATE(1128000000, 94, 1, 1),
-	PLL_RATE(1152000000, 96, 1, 1),
-	PLL_RATE(1176000000, 98, 1, 1),
-	PLL_RATE(1200000000, 50, 1, 0),
-	PLL_RATE(1224000000, 51, 1, 0),
-	PLL_RATE(1248000000, 52, 1, 0),
-	PLL_RATE(1272000000, 53, 1, 0),
-	PLL_RATE(1296000000, 54, 1, 0),
-	PLL_RATE(1320000000, 55, 1, 0),
-	PLL_RATE(1344000000, 56, 1, 0),
-	PLL_RATE(1368000000, 57, 1, 0),
-	PLL_RATE(1392000000, 58, 1, 0),
-	PLL_RATE(1416000000, 59, 1, 0),
-	PLL_RATE(1440000000, 60, 1, 0),
-	PLL_RATE(1464000000, 61, 1, 0),
-	PLL_RATE(1488000000, 62, 1, 0),
-	PLL_RATE(1512000000, 63, 1, 0),
-	PLL_RATE(1536000000, 64, 1, 0),
-	PLL_RATE(1560000000, 65, 1, 0),
-	PLL_RATE(1584000000, 66, 1, 0),
-	PLL_RATE(1608000000, 67, 1, 0),
-	PLL_RATE(1632000000, 68, 1, 0),
-	PLL_RATE(1656000000, 68, 1, 0),
-	PLL_RATE(1680000000, 68, 1, 0),
-	PLL_RATE(1704000000, 68, 1, 0),
-	PLL_RATE(1728000000, 69, 1, 0),
-	PLL_RATE(1752000000, 69, 1, 0),
-	PLL_RATE(1776000000, 69, 1, 0),
-	PLL_RATE(1800000000, 69, 1, 0),
-	PLL_RATE(1824000000, 70, 1, 0),
-	PLL_RATE(1848000000, 70, 1, 0),
-	PLL_RATE(1872000000, 70, 1, 0),
-	PLL_RATE(1896000000, 70, 1, 0),
-	PLL_RATE(1920000000, 71, 1, 0),
-	PLL_RATE(1944000000, 71, 1, 0),
-	PLL_RATE(1968000000, 71, 1, 0),
-	PLL_RATE(1992000000, 71, 1, 0),
-	PLL_RATE(2016000000, 72, 1, 0),
-	PLL_RATE(2040000000, 72, 1, 0),
-	PLL_RATE(2064000000, 72, 1, 0),
-	PLL_RATE(2088000000, 72, 1, 0),
-	PLL_RATE(2112000000, 73, 1, 0),
-	{ /* sentinel */ },
-};
-
 static const struct pll_rate_table gxbb_gp0_pll_rate_table[] = {
 	PLL_RATE(96000000, 32, 1, 3),
 	PLL_RATE(99000000, 33, 1, 3),
@@ -278,23 +189,39 @@ static const struct pll_rate_table gxl_gp0_pll_rate_table[] = {
 	{ /* sentinel */ },
 };
 
-static struct meson_clk_pll gxbb_fixed_pll = {
-	.m = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
+static struct clk_regmap gxbb_fixed_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.frac = {
+			.reg_off = HHI_MPLL_CNTL2,
+			.shift   = 0,
+			.width   = 12,
+		},
+		.l = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
 	},
-	.n = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "fixed_pll",
 		.ops = &meson_clk_pll_ro_ops,
@@ -304,33 +231,118 @@ static struct meson_clk_pll gxbb_fixed_pll = {
 	},
 };
 
-static struct meson_clk_pll gxbb_hdmi_pll = {
-	.m = {
-		.reg_off = HHI_HDMI_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_HDMI_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
+static struct clk_fixed_factor gxbb_hdmi_pll_pre_mult = {
+	.mult = 2,
+	.div = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "hdmi_pll_pre_mult",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "xtal" },
+		.num_parents = 1,
 	},
-	.frac = {
-		.reg_off = HHI_HDMI_PLL_CNTL2,
-		.shift   = 0,
-		.width   = 12,
+};
+
+static struct clk_regmap gxbb_hdmi_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.frac = {
+			.reg_off = HHI_HDMI_PLL_CNTL2,
+			.shift   = 0,
+			.width   = 12,
+		},
+		.od = {
+			.reg_off = HHI_HDMI_PLL_CNTL2,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.od2 = {
+			.reg_off = HHI_HDMI_PLL_CNTL2,
+			.shift   = 22,
+			.width   = 2,
+		},
+		.od3 = {
+			.reg_off = HHI_HDMI_PLL_CNTL2,
+			.shift   = 18,
+			.width   = 2,
+		},
+		.l = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 28,
+			.width   = 1,
+		},
 	},
-	.od = {
-		.reg_off = HHI_HDMI_PLL_CNTL2,
-		.shift   = 16,
-		.width   = 2,
+	.hw.init = &(struct clk_init_data){
+		.name = "hdmi_pll",
+		.ops = &meson_clk_pll_ro_ops,
+		.parent_names = (const char *[]){ "hdmi_pll_pre_mult" },
+		.num_parents = 1,
+		.flags = CLK_GET_RATE_NOCACHE,
 	},
-	.od2 = {
-		.reg_off = HHI_HDMI_PLL_CNTL2,
-		.shift   = 22,
-		.width   = 2,
+};
+
+static struct clk_regmap gxl_hdmi_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.frac = {
+			/*
+			 * On gxl, there is a register shift due to
+			 * HHI_HDMI_PLL_CNTL1 which does not exist on gxbb,
+			 * so we compute the register offset based on the PLL
+			 * base to get it right
+			 */
+			.reg_off = HHI_HDMI_PLL_CNTL + 4,
+			.shift   = 0,
+			.width   = 12,
+		},
+		.od = {
+			.reg_off = HHI_HDMI_PLL_CNTL + 8,
+			.shift   = 21,
+			.width   = 2,
+		},
+		.od2 = {
+			.reg_off = HHI_HDMI_PLL_CNTL + 8,
+			.shift   = 23,
+			.width   = 2,
+		},
+		.od3 = {
+			.reg_off = HHI_HDMI_PLL_CNTL + 8,
+			.shift   = 19,
+			.width   = 2,
+		},
+		.l = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_HDMI_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "hdmi_pll",
 		.ops = &meson_clk_pll_ro_ops,
@@ -340,25 +352,34 @@ static struct meson_clk_pll gxbb_hdmi_pll = {
 	},
 };
 
-static struct meson_clk_pll gxbb_sys_pll = {
-	.m = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
+static struct clk_regmap gxbb_sys_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 10,
+			.width   = 2,
+		},
+		.l = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
 	},
-	.od = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 10,
-		.width   = 2,
-	},
-	.rate_table = sys_pll_rate_table,
-	.rate_count = ARRAY_SIZE(sys_pll_rate_table),
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "sys_pll",
 		.ops = &meson_clk_pll_ro_ops,
@@ -368,38 +389,44 @@ static struct meson_clk_pll gxbb_sys_pll = {
 	},
 };
 
-struct pll_params_table gxbb_gp0_params_table[] = {
-	PLL_PARAM(HHI_GP0_PLL_CNTL, 0x6a000228),
-	PLL_PARAM(HHI_GP0_PLL_CNTL2, 0x69c80000),
-	PLL_PARAM(HHI_GP0_PLL_CNTL3, 0x0a5590c4),
-	PLL_PARAM(HHI_GP0_PLL_CNTL4, 0x0000500d),
-};
-
-static struct meson_clk_pll gxbb_gp0_pll = {
-	.m = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.params = {
-		.params_table = gxbb_gp0_params_table,
-		.params_count =	ARRAY_SIZE(gxbb_gp0_params_table),
-		.no_init_reset = true,
-		.clear_reset_for_lock = true,
+static const struct reg_sequence gxbb_gp0_init_regs[] = {
+	{ .reg = HHI_GP0_PLL_CNTL2,	.def = 0x69c80000 },
+	{ .reg = HHI_GP0_PLL_CNTL3,	.def = 0x0a5590c4 },
+	{ .reg = HHI_GP0_PLL_CNTL4,	.def = 0x0000500d },
+	{ .reg = HHI_GP0_PLL_CNTL,	.def = 0x4a000228 },
+};
+
+static struct clk_regmap gxbb_gp0_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.l = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
+		.table = gxbb_gp0_pll_rate_table,
+		.init_regs = gxbb_gp0_init_regs,
+		.init_count = ARRAY_SIZE(gxbb_gp0_init_regs),
 	},
-	.rate_table = gxbb_gp0_pll_rate_table,
-	.rate_count = ARRAY_SIZE(gxbb_gp0_pll_rate_table),
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "gp0_pll",
 		.ops = &meson_clk_pll_ops,
@@ -409,40 +436,51 @@ static struct meson_clk_pll gxbb_gp0_pll = {
 	},
 };
 
-struct pll_params_table gxl_gp0_params_table[] = {
-	PLL_PARAM(HHI_GP0_PLL_CNTL, 0x40010250),
-	PLL_PARAM(HHI_GP0_PLL_CNTL1, 0xc084a000),
-	PLL_PARAM(HHI_GP0_PLL_CNTL2, 0xb75020be),
-	PLL_PARAM(HHI_GP0_PLL_CNTL3, 0x0a59a288),
-	PLL_PARAM(HHI_GP0_PLL_CNTL4, 0xc000004d),
-	PLL_PARAM(HHI_GP0_PLL_CNTL5, 0x00078000),
-};
-
-static struct meson_clk_pll gxl_gp0_pll = {
-	.m = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_GP0_PLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
+static const struct reg_sequence gxl_gp0_init_regs[] = {
+	{ .reg = HHI_GP0_PLL_CNTL1,	.def = 0xc084b000 },
+	{ .reg = HHI_GP0_PLL_CNTL2,	.def = 0xb75020be },
+	{ .reg = HHI_GP0_PLL_CNTL3,	.def = 0x0a59a288 },
+	{ .reg = HHI_GP0_PLL_CNTL4,	.def = 0xc000004d },
+	{ .reg = HHI_GP0_PLL_CNTL5,	.def = 0x00078000 },
+	{ .reg = HHI_GP0_PLL_CNTL,	.def = 0x40010250 },
+};
+
+static struct clk_regmap gxl_gp0_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.frac = {
+			.reg_off = HHI_GP0_PLL_CNTL1,
+			.shift   = 0,
+			.width   = 10,
+		},
+		.l = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_GP0_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
+		.table = gxl_gp0_pll_rate_table,
+		.init_regs = gxl_gp0_init_regs,
+		.init_count = ARRAY_SIZE(gxl_gp0_init_regs),
 	},
-	.params = {
-		.params_table = gxl_gp0_params_table,
-		.params_count =	ARRAY_SIZE(gxl_gp0_params_table),
-		.no_init_reset = true,
-		.reset_lock_loop = true,
-	},
-	.rate_table = gxl_gp0_pll_rate_table,
-	.rate_count = ARRAY_SIZE(gxl_gp0_pll_rate_table),
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "gp0_pll",
 		.ops = &meson_clk_pll_ops,
@@ -452,161 +490,267 @@ static struct meson_clk_pll gxl_gp0_pll = {
 	},
 };
 
-static struct clk_fixed_factor gxbb_fclk_div2 = {
+static struct clk_fixed_factor gxbb_fclk_div2_div = {
 	.mult = 1,
 	.div = 2,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div2",
+		.name = "fclk_div2_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor gxbb_fclk_div3 = {
+static struct clk_regmap gxbb_fclk_div2 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 27,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div2",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div2_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor gxbb_fclk_div3_div = {
 	.mult = 1,
 	.div = 3,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div3",
+		.name = "fclk_div3_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor gxbb_fclk_div4 = {
+static struct clk_regmap gxbb_fclk_div3 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 28,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div3",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div3_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor gxbb_fclk_div4_div = {
 	.mult = 1,
 	.div = 4,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div4",
+		.name = "fclk_div4_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor gxbb_fclk_div5 = {
+static struct clk_regmap gxbb_fclk_div4 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 29,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div4",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div4_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor gxbb_fclk_div5_div = {
 	.mult = 1,
 	.div = 5,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div5",
+		.name = "fclk_div5_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor gxbb_fclk_div7 = {
+static struct clk_regmap gxbb_fclk_div5 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 30,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div5",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div5_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor gxbb_fclk_div7_div = {
 	.mult = 1,
 	.div = 7,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div7",
+		.name = "fclk_div7_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll gxbb_mpll0 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 0,
-		.width   = 14,
+static struct clk_regmap gxbb_fclk_div7 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 31,
 	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 15,
-		.width	 = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div7",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div7_div" },
+		.num_parents = 1,
 	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 16,
-		.width   = 9,
+};
+
+static struct clk_regmap gxbb_mpll_prediv = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MPLL_CNTL5,
+		.shift = 12,
+		.width = 1,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 14,
-		.width	 = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll_prediv",
+		.ops = &clk_regmap_divider_ro_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
 	},
-	.ssen = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 25,
-		.width	 = 1,
+};
+
+static struct clk_regmap gxbb_mpll0_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 15,
+			.width	 = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.ssen = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 25,
+			.width	 = 1,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll0",
+		.name = "mpll0_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll gxbb_mpll1 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 0,
-		.width   = 14,
-	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 15,
-		.width	 = 1,
+static struct clk_regmap gxbb_mpll0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL7,
+		.bit_idx = 14,
 	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 16,
-		.width   = 9,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll0",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll0_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 14,
-		.width	 = 1,
+};
+
+static struct clk_regmap gxbb_mpll1_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 15,
+			.width	 = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll1",
+		.name = "mpll1_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll gxbb_mpll2 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 0,
-		.width   = 14,
-	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 15,
-		.width	 = 1,
+static struct clk_regmap gxbb_mpll1 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL8,
+		.bit_idx = 14,
 	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 16,
-		.width   = 9,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll1",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll1_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 14,
-		.width	 = 1,
+};
+
+static struct clk_regmap gxbb_mpll2_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 15,
+			.width	 = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll2",
+		.name = "mpll2_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-/*
- * FIXME The legacy composite clocks (e.g. clk81) are both PLL post-dividers
- * and should be modeled with their respective PLLs via the forthcoming
- * coordinated clock rates feature
- */
+static struct clk_regmap gxbb_mpll2 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL9,
+		.bit_idx = 14,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll2",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll2_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
 
 static u32 mux_table_clk81[]	= { 0, 2, 3, 4, 5, 6, 7 };
 static const char * const clk81_parent_names[] = {
@@ -614,16 +758,16 @@ static const char * const clk81_parent_names[] = {
 	"fclk_div3", "fclk_div5"
 };
 
-static struct clk_mux gxbb_mpeg_clk_sel = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 12,
-	.flags = CLK_MUX_READ_ONLY,
-	.table = mux_table_clk81,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mpeg_clk_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 12,
+		.table = mux_table_clk81,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mpeg_clk_sel",
-		.ops = &clk_mux_ro_ops,
+		.ops = &clk_regmap_mux_ro_ops,
 		/*
 		 * bits 14:12 selects from 8 possible parents:
 		 * xtal, 1'b0 (wtf), fclk_div7, mpll_clkout1, mpll_clkout2,
@@ -631,72 +775,75 @@ static struct clk_mux gxbb_mpeg_clk_sel = {
 		 */
 		.parent_names = clk81_parent_names,
 		.num_parents = ARRAY_SIZE(clk81_parent_names),
-		.flags = (CLK_SET_RATE_NO_REPARENT | CLK_IGNORE_UNUSED),
 	},
 };
 
-static struct clk_divider gxbb_mpeg_clk_div = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mpeg_clk_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mpeg_clk_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ro_ops,
 		.parent_names = (const char *[]){ "mpeg_clk_sel" },
 		.num_parents = 1,
-		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),
 	},
 };
 
-/* the mother of dragons^W gates */
-static struct clk_gate gxbb_clk81 = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.bit_idx = 7,
-	.lock = &meson_clk_lock,
+/* the mother of dragons gates */
+static struct clk_regmap gxbb_clk81 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.bit_idx = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "clk81",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "mpeg_clk_div" },
 		.num_parents = 1,
-		.flags = (CLK_SET_RATE_PARENT | CLK_IS_CRITICAL),
+		.flags = CLK_IS_CRITICAL,
 	},
 };
 
-static struct clk_mux gxbb_sar_adc_clk_sel = {
-	.reg = (void *)HHI_SAR_CLK_CNTL,
-	.mask = 0x3,
-	.shift = 9,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sar_adc_clk_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_SAR_CLK_CNTL,
+		.mask = 0x3,
+		.shift = 9,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sar_adc_clk_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/* NOTE: The datasheet doesn't list the parents for bit 10 */
 		.parent_names = (const char *[]){ "xtal", "clk81", },
 		.num_parents = 2,
 	},
 };
 
-static struct clk_divider gxbb_sar_adc_clk_div = {
-	.reg = (void *)HHI_SAR_CLK_CNTL,
-	.shift = 0,
-	.width = 8,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sar_adc_clk_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_SAR_CLK_CNTL,
+		.shift = 0,
+		.width = 8,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sar_adc_clk_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "sar_adc_clk_sel" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_gate gxbb_sar_adc_clk = {
-	.reg = (void *)HHI_SAR_CLK_CNTL,
-	.bit_idx = 8,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sar_adc_clk = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_SAR_CLK_CNTL,
+		.bit_idx = 8,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sar_adc_clk",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "sar_adc_clk_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
@@ -708,21 +855,20 @@ static struct clk_gate gxbb_sar_adc_clk = {
  * muxed by a glitch-free switch.
  */
 
-static u32 mux_table_mali_0_1[] = {0, 1, 2, 3, 4, 5, 6, 7};
 static const char * const gxbb_mali_0_1_parent_names[] = {
 	"xtal", "gp0_pll", "mpll2", "mpll1", "fclk_div7",
 	"fclk_div4", "fclk_div3", "fclk_div5"
 };
 
-static struct clk_mux gxbb_mali_0_sel = {
-	.reg = (void *)HHI_MALI_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 9,
-	.table = mux_table_mali_0_1,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mali_0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_MALI_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 9,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mali_0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bits 10:9 selects from 8 possible parents:
 		 * xtal, gp0_pll, mpll2, mpll1, fclk_div7,
@@ -734,42 +880,44 @@ static struct clk_mux gxbb_mali_0_sel = {
 	},
 };
 
-static struct clk_divider gxbb_mali_0_div = {
-	.reg = (void *)HHI_MALI_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mali_0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MALI_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mali_0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "mali_0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_NO_REPARENT,
 	},
 };
 
-static struct clk_gate gxbb_mali_0 = {
-	.reg = (void *)HHI_MALI_CLK_CNTL,
-	.bit_idx = 8,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mali_0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MALI_CLK_CNTL,
+		.bit_idx = 8,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mali_0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "mali_0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_mux gxbb_mali_1_sel = {
-	.reg = (void *)HHI_MALI_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 25,
-	.table = mux_table_mali_0_1,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mali_1_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_MALI_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 25,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mali_1_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bits 10:9 selects from 8 possible parents:
 		 * xtal, gp0_pll, mpll2, mpll1, fclk_div7,
@@ -781,77 +929,79 @@ static struct clk_mux gxbb_mali_1_sel = {
 	},
 };
 
-static struct clk_divider gxbb_mali_1_div = {
-	.reg = (void *)HHI_MALI_CLK_CNTL,
-	.shift = 16,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mali_1_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MALI_CLK_CNTL,
+		.shift = 16,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mali_1_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "mali_1_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_NO_REPARENT,
 	},
 };
 
-static struct clk_gate gxbb_mali_1 = {
-	.reg = (void *)HHI_MALI_CLK_CNTL,
-	.bit_idx = 24,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mali_1 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MALI_CLK_CNTL,
+		.bit_idx = 24,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mali_1",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "mali_1_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static u32 mux_table_mali[] = {0, 1};
 static const char * const gxbb_mali_parent_names[] = {
 	"mali_0", "mali_1"
 };
 
-static struct clk_mux gxbb_mali = {
-	.reg = (void *)HHI_MALI_CLK_CNTL,
-	.mask = 1,
-	.shift = 31,
-	.table = mux_table_mali,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_mali = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_MALI_CLK_CNTL,
+		.mask = 1,
+		.shift = 31,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mali",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = gxbb_mali_parent_names,
 		.num_parents = 2,
 		.flags = CLK_SET_RATE_NO_REPARENT,
 	},
 };
 
-static struct clk_mux gxbb_cts_amclk_sel = {
-	.reg = (void *) HHI_AUD_CLK_CNTL,
-	.mask = 0x3,
-	.shift = 9,
-	/* Default parent unknown (register reset value: 0) */
-	.table = (u32[]){ 1, 2, 3 },
-	.lock = &meson_clk_lock,
-		.hw.init = &(struct clk_init_data){
+static struct clk_regmap gxbb_cts_amclk_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_AUD_CLK_CNTL,
+		.mask = 0x3,
+		.shift = 9,
+		.table = (u32[]){ 1, 2, 3 },
+	},
+	.hw.init = &(struct clk_init_data){
 		.name = "cts_amclk_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = (const char *[]){ "mpll0", "mpll1", "mpll2" },
 		.num_parents = 3,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct meson_clk_audio_divider gxbb_cts_amclk_div = {
-	.div = {
-		.reg_off = HHI_AUD_CLK_CNTL,
-		.shift   = 0,
-		.width   = 8,
+static struct clk_regmap gxbb_cts_amclk_div = {
+	.data = &(struct meson_clk_audio_div_data){
+		.div = {
+			.reg_off = HHI_AUD_CLK_CNTL,
+			.shift   = 0,
+			.width   = 8,
+		},
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
 	},
-	.flags = CLK_DIVIDER_ROUND_CLOSEST,
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "cts_amclk_div",
 		.ops = &meson_clk_audio_divider_ops,
@@ -861,71 +1011,75 @@ static struct meson_clk_audio_divider gxbb_cts_amclk_div = {
 	},
 };
 
-static struct clk_gate gxbb_cts_amclk = {
-	.reg = (void *) HHI_AUD_CLK_CNTL,
-	.bit_idx = 8,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_cts_amclk = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_AUD_CLK_CNTL,
+		.bit_idx = 8,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "cts_amclk",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "cts_amclk_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_mux gxbb_cts_mclk_i958_sel = {
-	.reg = (void *)HHI_AUD_CLK_CNTL2,
-	.mask = 0x3,
-	.shift = 25,
-	/* Default parent unknown (register reset value: 0) */
-	.table = (u32[]){ 1, 2, 3 },
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_cts_mclk_i958_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_AUD_CLK_CNTL2,
+		.mask = 0x3,
+		.shift = 25,
+		.table = (u32[]){ 1, 2, 3 },
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "cts_mclk_i958_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = (const char *[]){ "mpll0", "mpll1", "mpll2" },
 		.num_parents = 3,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_divider gxbb_cts_mclk_i958_div = {
-	.reg = (void *)HHI_AUD_CLK_CNTL2,
-	.shift = 16,
-	.width = 8,
-	.lock = &meson_clk_lock,
-	.flags = CLK_DIVIDER_ROUND_CLOSEST,
+static struct clk_regmap gxbb_cts_mclk_i958_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_AUD_CLK_CNTL2,
+		.shift = 16,
+		.width = 8,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "cts_mclk_i958_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "cts_mclk_i958_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_cts_mclk_i958 = {
-	.reg = (void *)HHI_AUD_CLK_CNTL2,
-	.bit_idx = 24,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_cts_mclk_i958 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_AUD_CLK_CNTL2,
+		.bit_idx = 24,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "cts_mclk_i958",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "cts_mclk_i958_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_mux gxbb_cts_i958 = {
-	.reg = (void *)HHI_AUD_CLK_CNTL2,
-	.mask = 0x1,
-	.shift = 27,
-	.lock = &meson_clk_lock,
-		.hw.init = &(struct clk_init_data){
+static struct clk_regmap gxbb_cts_i958 = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_AUD_CLK_CNTL2,
+		.mask = 0x1,
+		.shift = 27,
+		},
+	.hw.init = &(struct clk_init_data){
 		.name = "cts_i958",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = (const char *[]){ "cts_amclk", "cts_mclk_i958" },
 		.num_parents = 2,
 		/*
@@ -936,27 +1090,29 @@ static struct clk_mux gxbb_cts_i958 = {
 	},
 };
 
-static struct clk_divider gxbb_32k_clk_div = {
-	.reg = (void *)HHI_32K_CLK_CNTL,
-	.shift = 0,
-	.width = 14,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_32k_clk_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_32K_CLK_CNTL,
+		.shift = 0,
+		.width = 14,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "32k_clk_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "32k_clk_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT | CLK_DIVIDER_ROUND_CLOSEST,
 	},
 };
 
-static struct clk_gate gxbb_32k_clk = {
-	.reg = (void *)HHI_32K_CLK_CNTL,
-	.bit_idx = 15,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_32k_clk = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_32K_CLK_CNTL,
+		.bit_idx = 15,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "32k_clk",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "32k_clk_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
@@ -967,14 +1123,15 @@ static const char * const gxbb_32k_clk_parent_names[] = {
 	"xtal", "cts_slow_oscin", "fclk_div3", "fclk_div5"
 };
 
-static struct clk_mux gxbb_32k_clk_sel = {
-	.reg = (void *)HHI_32K_CLK_CNTL,
-	.mask = 0x3,
-	.shift = 16,
-	.lock = &meson_clk_lock,
-		.hw.init = &(struct clk_init_data){
+static struct clk_regmap gxbb_32k_clk_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_32K_CLK_CNTL,
+		.mask = 0x3,
+		.shift = 16,
+		},
+	.hw.init = &(struct clk_init_data){
 		.name = "32k_clk_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = gxbb_32k_clk_parent_names,
 		.num_parents = 4,
 		.flags = CLK_SET_RATE_PARENT,
@@ -993,42 +1150,45 @@ static const char * const gxbb_sd_emmc_clk0_parent_names[] = {
 };
 
 /* SDIO clock */
-static struct clk_mux gxbb_sd_emmc_a_clk0_sel = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 9,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sd_emmc_a_clk0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 9,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_a_clk0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = gxbb_sd_emmc_clk0_parent_names,
 		.num_parents = ARRAY_SIZE(gxbb_sd_emmc_clk0_parent_names),
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_divider gxbb_sd_emmc_a_clk0_div = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
-	.flags = CLK_DIVIDER_ROUND_CLOSEST,
+static struct clk_regmap gxbb_sd_emmc_a_clk0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_a_clk0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "sd_emmc_a_clk0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_sd_emmc_a_clk0 = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.bit_idx = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sd_emmc_a_clk0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.bit_idx = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sd_emmc_a_clk0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "sd_emmc_a_clk0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
@@ -1036,42 +1196,45 @@ static struct clk_gate gxbb_sd_emmc_a_clk0 = {
 };
 
 /* SDcard clock */
-static struct clk_mux gxbb_sd_emmc_b_clk0_sel = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 25,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sd_emmc_b_clk0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 25,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_b_clk0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = gxbb_sd_emmc_clk0_parent_names,
 		.num_parents = ARRAY_SIZE(gxbb_sd_emmc_clk0_parent_names),
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_divider gxbb_sd_emmc_b_clk0_div = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.shift = 16,
-	.width = 7,
-	.lock = &meson_clk_lock,
-	.flags = CLK_DIVIDER_ROUND_CLOSEST,
+static struct clk_regmap gxbb_sd_emmc_b_clk0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.shift = 16,
+		.width = 7,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_b_clk0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "sd_emmc_b_clk0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_sd_emmc_b_clk0 = {
-	.reg = (void *)HHI_SD_EMMC_CLK_CNTL,
-	.bit_idx = 23,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sd_emmc_b_clk0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_SD_EMMC_CLK_CNTL,
+		.bit_idx = 23,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sd_emmc_b_clk0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "sd_emmc_b_clk0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
@@ -1079,42 +1242,45 @@ static struct clk_gate gxbb_sd_emmc_b_clk0 = {
 };
 
 /* EMMC/NAND clock */
-static struct clk_mux gxbb_sd_emmc_c_clk0_sel = {
-	.reg = (void *)HHI_NAND_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 9,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sd_emmc_c_clk0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_NAND_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 9,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_c_clk0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		.parent_names = gxbb_sd_emmc_clk0_parent_names,
 		.num_parents = ARRAY_SIZE(gxbb_sd_emmc_clk0_parent_names),
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_divider gxbb_sd_emmc_c_clk0_div = {
-	.reg = (void *)HHI_NAND_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
-	.flags = CLK_DIVIDER_ROUND_CLOSEST,
+static struct clk_regmap gxbb_sd_emmc_c_clk0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_NAND_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+		.flags = CLK_DIVIDER_ROUND_CLOSEST,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "sd_emmc_c_clk0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "sd_emmc_c_clk0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_sd_emmc_c_clk0 = {
-	.reg = (void *)HHI_NAND_CLK_CNTL,
-	.bit_idx = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_sd_emmc_c_clk0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_NAND_CLK_CNTL,
+		.bit_idx = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "sd_emmc_c_clk0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "sd_emmc_c_clk0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
@@ -1123,20 +1289,19 @@ static struct clk_gate gxbb_sd_emmc_c_clk0 = {
 
 /* VPU Clock */
 
-static u32 mux_table_vpu[] = {0, 1, 2, 3};
 static const char * const gxbb_vpu_parent_names[] = {
 	"fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7"
 };
 
-static struct clk_mux gxbb_vpu_0_sel = {
-	.reg = (void *)HHI_VPU_CLK_CNTL,
-	.mask = 0x3,
-	.shift = 9,
-	.lock = &meson_clk_lock,
-	.table = mux_table_vpu,
+static struct clk_regmap gxbb_vpu_0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_VPU_CLK_CNTL,
+		.mask = 0x3,
+		.shift = 9,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vpu_0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bits 9:10 selects from 4 possible parents:
 		 * fclk_div4, fclk_div3, fclk_div5, fclk_div7,
@@ -1147,42 +1312,44 @@ static struct clk_mux gxbb_vpu_0_sel = {
 	},
 };
 
-static struct clk_divider gxbb_vpu_0_div = {
-	.reg = (void *)HHI_VPU_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vpu_0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_VPU_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vpu_0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "vpu_0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_vpu_0 = {
-	.reg = (void *)HHI_VPU_CLK_CNTL,
-	.bit_idx = 8,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vpu_0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_VPU_CLK_CNTL,
+		.bit_idx = 8,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "vpu_0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "vpu_0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
 	},
 };
 
-static struct clk_mux gxbb_vpu_1_sel = {
-	.reg = (void *)HHI_VPU_CLK_CNTL,
-	.mask = 0x3,
-	.shift = 25,
-	.lock = &meson_clk_lock,
-	.table = mux_table_vpu,
+static struct clk_regmap gxbb_vpu_1_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_VPU_CLK_CNTL,
+		.mask = 0x3,
+		.shift = 25,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vpu_1_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bits 25:26 selects from 4 possible parents:
 		 * fclk_div4, fclk_div3, fclk_div5, fclk_div7,
@@ -1193,41 +1360,44 @@ static struct clk_mux gxbb_vpu_1_sel = {
 	},
 };
 
-static struct clk_divider gxbb_vpu_1_div = {
-	.reg = (void *)HHI_VPU_CLK_CNTL,
-	.shift = 16,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vpu_1_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_VPU_CLK_CNTL,
+		.shift = 16,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vpu_1_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "vpu_1_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_vpu_1 = {
-	.reg = (void *)HHI_VPU_CLK_CNTL,
-	.bit_idx = 24,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vpu_1 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_VPU_CLK_CNTL,
+		.bit_idx = 24,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "vpu_1",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "vpu_1_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
 	},
 };
 
-static struct clk_mux gxbb_vpu = {
-	.reg = (void *)HHI_VPU_CLK_CNTL,
-	.mask = 1,
-	.shift = 31,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vpu = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_VPU_CLK_CNTL,
+		.mask = 1,
+		.shift = 31,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vpu",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bit 31 selects from 2 possible parents:
 		 * vpu_0 or vpu_1
@@ -1240,20 +1410,19 @@ static struct clk_mux gxbb_vpu = {
 
 /* VAPB Clock */
 
-static u32 mux_table_vapb[] = {0, 1, 2, 3};
 static const char * const gxbb_vapb_parent_names[] = {
 	"fclk_div4", "fclk_div3", "fclk_div5", "fclk_div7"
 };
 
-static struct clk_mux gxbb_vapb_0_sel = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.mask = 0x3,
-	.shift = 9,
-	.lock = &meson_clk_lock,
-	.table = mux_table_vapb,
+static struct clk_regmap gxbb_vapb_0_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.mask = 0x3,
+		.shift = 9,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vapb_0_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bits 9:10 selects from 4 possible parents:
 		 * fclk_div4, fclk_div3, fclk_div5, fclk_div7,
@@ -1264,42 +1433,44 @@ static struct clk_mux gxbb_vapb_0_sel = {
 	},
 };
 
-static struct clk_divider gxbb_vapb_0_div = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vapb_0_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.shift = 0,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vapb_0_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "vapb_0_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_vapb_0 = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.bit_idx = 8,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vapb_0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.bit_idx = 8,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "vapb_0",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "vapb_0_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
 	},
 };
 
-static struct clk_mux gxbb_vapb_1_sel = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.mask = 0x3,
-	.shift = 25,
-	.lock = &meson_clk_lock,
-	.table = mux_table_vapb,
+static struct clk_regmap gxbb_vapb_1_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.mask = 0x3,
+		.shift = 25,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vapb_1_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bits 25:26 selects from 4 possible parents:
 		 * fclk_div4, fclk_div3, fclk_div5, fclk_div7,
@@ -1310,41 +1481,44 @@ static struct clk_mux gxbb_vapb_1_sel = {
 	},
 };
 
-static struct clk_divider gxbb_vapb_1_div = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.shift = 16,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vapb_1_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.shift = 16,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vapb_1_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ops,
 		.parent_names = (const char *[]){ "vapb_1_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
-static struct clk_gate gxbb_vapb_1 = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.bit_idx = 24,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vapb_1 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.bit_idx = 24,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "vapb_1",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "vapb_1_div" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
 	},
 };
 
-static struct clk_mux gxbb_vapb_sel = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.mask = 1,
-	.shift = 31,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vapb_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.mask = 1,
+		.shift = 31,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "vapb_sel",
-		.ops = &clk_mux_ops,
+		.ops = &clk_regmap_mux_ops,
 		/*
 		 * bit 31 selects from 2 possible parents:
 		 * vapb_0 or vapb_1
@@ -1355,13 +1529,14 @@ static struct clk_mux gxbb_vapb_sel = {
 	},
 };
 
-static struct clk_gate gxbb_vapb = {
-	.reg = (void *)HHI_VAPBCLK_CNTL,
-	.bit_idx = 30,
-	.lock = &meson_clk_lock,
+static struct clk_regmap gxbb_vapb = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_VAPBCLK_CNTL,
+		.bit_idx = 30,
+	},
 	.hw.init = &(struct clk_init_data) {
 		.name = "vapb",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "vapb_sel" },
 		.num_parents = 1,
 		.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
@@ -1601,6 +1776,16 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 		[CLKID_VAPB_1]		    = &gxbb_vapb_1.hw,
 		[CLKID_VAPB_SEL]	    = &gxbb_vapb_sel.hw,
 		[CLKID_VAPB]		    = &gxbb_vapb.hw,
+		[CLKID_HDMI_PLL_PRE_MULT]   = &gxbb_hdmi_pll_pre_mult.hw,
+		[CLKID_MPLL0_DIV]	    = &gxbb_mpll0_div.hw,
+		[CLKID_MPLL1_DIV]	    = &gxbb_mpll1_div.hw,
+		[CLKID_MPLL2_DIV]	    = &gxbb_mpll2_div.hw,
+		[CLKID_MPLL_PREDIV]	    = &gxbb_mpll_prediv.hw,
+		[CLKID_FCLK_DIV2_DIV]	    = &gxbb_fclk_div2_div.hw,
+		[CLKID_FCLK_DIV3_DIV]	    = &gxbb_fclk_div3_div.hw,
+		[CLKID_FCLK_DIV4_DIV]	    = &gxbb_fclk_div4_div.hw,
+		[CLKID_FCLK_DIV5_DIV]	    = &gxbb_fclk_div5_div.hw,
+		[CLKID_FCLK_DIV7_DIV]	    = &gxbb_fclk_div7_div.hw,
 		[NR_CLKS]		    = NULL,
 	},
 	.num = NR_CLKS,
@@ -1609,7 +1794,7 @@ static struct clk_hw_onecell_data gxbb_hw_onecell_data = {
 static struct clk_hw_onecell_data gxl_hw_onecell_data = {
 	.hws = {
 		[CLKID_SYS_PLL]		    = &gxbb_sys_pll.hw,
-		[CLKID_HDMI_PLL]	    = &gxbb_hdmi_pll.hw,
+		[CLKID_HDMI_PLL]	    = &gxl_hdmi_pll.hw,
 		[CLKID_FIXED_PLL]	    = &gxbb_fixed_pll.hw,
 		[CLKID_FCLK_DIV2]	    = &gxbb_fclk_div2.hw,
 		[CLKID_FCLK_DIV3]	    = &gxbb_fclk_div3.hw,
@@ -1748,34 +1933,31 @@ static struct clk_hw_onecell_data gxl_hw_onecell_data = {
 		[CLKID_VAPB_1]		    = &gxbb_vapb_1.hw,
 		[CLKID_VAPB_SEL]	    = &gxbb_vapb_sel.hw,
 		[CLKID_VAPB]		    = &gxbb_vapb.hw,
+		[CLKID_MPLL0_DIV]	    = &gxbb_mpll0_div.hw,
+		[CLKID_MPLL1_DIV]	    = &gxbb_mpll1_div.hw,
+		[CLKID_MPLL2_DIV]	    = &gxbb_mpll2_div.hw,
+		[CLKID_MPLL_PREDIV]	    = &gxbb_mpll_prediv.hw,
+		[CLKID_FCLK_DIV2_DIV]	    = &gxbb_fclk_div2_div.hw,
+		[CLKID_FCLK_DIV3_DIV]	    = &gxbb_fclk_div3_div.hw,
+		[CLKID_FCLK_DIV4_DIV]	    = &gxbb_fclk_div4_div.hw,
+		[CLKID_FCLK_DIV5_DIV]	    = &gxbb_fclk_div5_div.hw,
+		[CLKID_FCLK_DIV7_DIV]	    = &gxbb_fclk_div7_div.hw,
 		[NR_CLKS]		    = NULL,
 	},
 	.num = NR_CLKS,
 };
 
-/* Convenience tables to populate base addresses in .probe */
-
-static struct meson_clk_pll *const gxbb_clk_plls[] = {
-	&gxbb_fixed_pll,
-	&gxbb_hdmi_pll,
-	&gxbb_sys_pll,
+static struct clk_regmap *const gxbb_clk_regmaps[] = {
 	&gxbb_gp0_pll,
-};
-
-static struct meson_clk_pll *const gxl_clk_plls[] = {
-	&gxbb_fixed_pll,
 	&gxbb_hdmi_pll,
-	&gxbb_sys_pll,
-	&gxl_gp0_pll,
 };
 
-static struct meson_clk_mpll *const gxbb_clk_mplls[] = {
-	&gxbb_mpll0,
-	&gxbb_mpll1,
-	&gxbb_mpll2,
+static struct clk_regmap *const gxl_clk_regmaps[] = {
+	&gxl_gp0_pll,
+	&gxl_hdmi_pll,
 };
 
-static struct clk_gate *const gxbb_clk_gates[] = {
+static struct clk_regmap *const gx_clk_regmaps[] = {
 	&gxbb_clk81,
 	&gxbb_ddr,
 	&gxbb_dos,
@@ -1872,9 +2054,19 @@ static struct clk_gate *const gxbb_clk_gates[] = {
 	&gxbb_vapb_0,
 	&gxbb_vapb_1,
 	&gxbb_vapb,
-};
-
-static struct clk_mux *const gxbb_clk_muxes[] = {
+	&gxbb_mpeg_clk_div,
+	&gxbb_sar_adc_clk_div,
+	&gxbb_mali_0_div,
+	&gxbb_mali_1_div,
+	&gxbb_cts_mclk_i958_div,
+	&gxbb_32k_clk_div,
+	&gxbb_sd_emmc_a_clk0_div,
+	&gxbb_sd_emmc_b_clk0_div,
+	&gxbb_sd_emmc_c_clk0_div,
+	&gxbb_vpu_0_div,
+	&gxbb_vpu_1_div,
+	&gxbb_vapb_0_div,
+	&gxbb_vapb_1_div,
 	&gxbb_mpeg_clk_sel,
 	&gxbb_sar_adc_clk_sel,
 	&gxbb_mali_0_sel,
@@ -1893,73 +2085,38 @@ static struct clk_mux *const gxbb_clk_muxes[] = {
 	&gxbb_vapb_0_sel,
 	&gxbb_vapb_1_sel,
 	&gxbb_vapb_sel,
-};
-
-static struct clk_divider *const gxbb_clk_dividers[] = {
-	&gxbb_mpeg_clk_div,
-	&gxbb_sar_adc_clk_div,
-	&gxbb_mali_0_div,
-	&gxbb_mali_1_div,
-	&gxbb_cts_mclk_i958_div,
-	&gxbb_32k_clk_div,
-	&gxbb_sd_emmc_a_clk0_div,
-	&gxbb_sd_emmc_b_clk0_div,
-	&gxbb_sd_emmc_c_clk0_div,
-	&gxbb_vpu_0_div,
-	&gxbb_vpu_1_div,
-	&gxbb_vapb_0_div,
-	&gxbb_vapb_1_div,
-};
-
-static struct meson_clk_audio_divider *const gxbb_audio_dividers[] = {
+	&gxbb_mpll0,
+	&gxbb_mpll1,
+	&gxbb_mpll2,
+	&gxbb_mpll0_div,
+	&gxbb_mpll1_div,
+	&gxbb_mpll2_div,
 	&gxbb_cts_amclk_div,
+	&gxbb_fixed_pll,
+	&gxbb_sys_pll,
+	&gxbb_mpll_prediv,
+	&gxbb_fclk_div2,
+	&gxbb_fclk_div3,
+	&gxbb_fclk_div4,
+	&gxbb_fclk_div5,
+	&gxbb_fclk_div7,
 };
 
 struct clkc_data {
-	struct clk_gate *const *clk_gates;
-	unsigned int clk_gates_count;
-	struct meson_clk_mpll *const *clk_mplls;
-	unsigned int clk_mplls_count;
-	struct meson_clk_pll *const *clk_plls;
-	unsigned int clk_plls_count;
-	struct clk_mux *const *clk_muxes;
-	unsigned int clk_muxes_count;
-	struct clk_divider *const *clk_dividers;
-	unsigned int clk_dividers_count;
-	struct meson_clk_audio_divider *const *clk_audio_dividers;
-	unsigned int clk_audio_dividers_count;
+	struct clk_regmap *const *regmap_clks;
+	unsigned int regmap_clks_count;
 	struct clk_hw_onecell_data *hw_onecell_data;
 };
 
 static const struct clkc_data gxbb_clkc_data = {
-	.clk_gates = gxbb_clk_gates,
-	.clk_gates_count = ARRAY_SIZE(gxbb_clk_gates),
-	.clk_mplls = gxbb_clk_mplls,
-	.clk_mplls_count = ARRAY_SIZE(gxbb_clk_mplls),
-	.clk_plls = gxbb_clk_plls,
-	.clk_plls_count = ARRAY_SIZE(gxbb_clk_plls),
-	.clk_muxes = gxbb_clk_muxes,
-	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
-	.clk_dividers = gxbb_clk_dividers,
-	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
-	.clk_audio_dividers = gxbb_audio_dividers,
-	.clk_audio_dividers_count = ARRAY_SIZE(gxbb_audio_dividers),
+	.regmap_clks = gxbb_clk_regmaps,
+	.regmap_clks_count = ARRAY_SIZE(gxbb_clk_regmaps),
 	.hw_onecell_data = &gxbb_hw_onecell_data,
 };
 
 static const struct clkc_data gxl_clkc_data = {
-	.clk_gates = gxbb_clk_gates,
-	.clk_gates_count = ARRAY_SIZE(gxbb_clk_gates),
-	.clk_mplls = gxbb_clk_mplls,
-	.clk_mplls_count = ARRAY_SIZE(gxbb_clk_mplls),
-	.clk_plls = gxl_clk_plls,
-	.clk_plls_count = ARRAY_SIZE(gxl_clk_plls),
-	.clk_muxes = gxbb_clk_muxes,
-	.clk_muxes_count = ARRAY_SIZE(gxbb_clk_muxes),
-	.clk_dividers = gxbb_clk_dividers,
-	.clk_dividers_count = ARRAY_SIZE(gxbb_clk_dividers),
-	.clk_audio_dividers = gxbb_audio_dividers,
-	.clk_audio_dividers_count = ARRAY_SIZE(gxbb_audio_dividers),
+	.regmap_clks = gxl_clk_regmaps,
+	.regmap_clks_count = ARRAY_SIZE(gxl_clk_regmaps),
 	.hw_onecell_data = &gxl_hw_onecell_data,
 };
 
@@ -1969,71 +2126,79 @@ static const struct of_device_id clkc_match_table[] = {
 	{},
 };
 
+static const struct regmap_config clkc_regmap_config = {
+	.reg_bits       = 32,
+	.val_bits       = 32,
+	.reg_stride     = 4,
+};
+
 static int gxbb_clkc_probe(struct platform_device *pdev)
 {
 	const struct clkc_data *clkc_data;
+	struct resource *res;
 	void __iomem *clk_base;
-	int ret, clkid, i;
+	struct regmap *map;
+	int ret, i;
 	struct device *dev = &pdev->dev;
 
-	clkc_data = of_device_get_match_data(&pdev->dev);
+	clkc_data = of_device_get_match_data(dev);
 	if (!clkc_data)
 		return -EINVAL;
 
-	/*  Generic clocks and PLLs */
-	clk_base = of_iomap(dev->of_node, 0);
-	if (!clk_base) {
-		pr_err("%s: Unable to map clk base\n", __func__);
-		return -ENXIO;
-	}
-
-	/* Populate base address for PLLs */
-	for (i = 0; i < clkc_data->clk_plls_count; i++)
-		clkc_data->clk_plls[i]->base = clk_base;
-
-	/* Populate base address for MPLLs */
-	for (i = 0; i < clkc_data->clk_mplls_count; i++)
-		clkc_data->clk_mplls[i]->base = clk_base;
+	/* Get the hhi system controller node if available */
+	map = syscon_node_to_regmap(of_get_parent(dev->of_node));
+	if (IS_ERR(map)) {
+		dev_err(dev,
+			"failed to get HHI regmap - Trying obsolete regs\n");
 
-	/* Populate base address for gates */
-	for (i = 0; i < clkc_data->clk_gates_count; i++)
-		clkc_data->clk_gates[i]->reg = clk_base +
-			(u64)clkc_data->clk_gates[i]->reg;
-
-	/* Populate base address for muxes */
-	for (i = 0; i < clkc_data->clk_muxes_count; i++)
-		clkc_data->clk_muxes[i]->reg = clk_base +
-			(u64)clkc_data->clk_muxes[i]->reg;
+		/*
+		 * FIXME: HHI registers should be accessed through
+		 * the appropriate system controller. This is required because
+		 * there is more than just clocks in this register space
+		 *
+		 * This fallback method is only provided temporarily until
+		 * all the platform DTs are properly using the syscon node
+		 */
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!res)
+			return -EINVAL;
+
+		clk_base = devm_ioremap(dev, res->start, resource_size(res));
+		if (!clk_base) {
+			dev_err(dev, "Unable to map clk base\n");
+			return -ENXIO;
+		}
+
+		map = devm_regmap_init_mmio(dev, clk_base,
+					    &clkc_regmap_config);
+		if (IS_ERR(map))
+			return PTR_ERR(map);
+	}
 
-	/* Populate base address for dividers */
-	for (i = 0; i < clkc_data->clk_dividers_count; i++)
-		clkc_data->clk_dividers[i]->reg = clk_base +
-			(u64)clkc_data->clk_dividers[i]->reg;
+	/* Populate regmap for the common regmap backed clocks */
+	for (i = 0; i < ARRAY_SIZE(gx_clk_regmaps); i++)
+		gx_clk_regmaps[i]->map = map;
 
-	/* Populate base address for the audio dividers */
-	for (i = 0; i < clkc_data->clk_audio_dividers_count; i++)
-		clkc_data->clk_audio_dividers[i]->base = clk_base;
+	/* Populate regmap for soc specific clocks */
+	for (i = 0; i < clkc_data->regmap_clks_count; i++)
+		clkc_data->regmap_clks[i]->map = map;
 
-	/*
-	 * register all clks
-	 */
-	for (clkid = 0; clkid < clkc_data->hw_onecell_data->num; clkid++) {
+	/* Register all clks */
+	for (i = 0; i < clkc_data->hw_onecell_data->num; i++) {
 		/* array might be sparse */
-		if (!clkc_data->hw_onecell_data->hws[clkid])
+		if (!clkc_data->hw_onecell_data->hws[i])
 			continue;
 
 		ret = devm_clk_hw_register(dev,
-					clkc_data->hw_onecell_data->hws[clkid]);
-		if (ret)
-			goto iounmap;
+					   clkc_data->hw_onecell_data->hws[i]);
+		if (ret) {
+			dev_err(dev, "Clock registration failed\n");
+			return ret;
+		}
 	}
 
-	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
-			clkc_data->hw_onecell_data);
-
-iounmap:
-	iounmap(clk_base);
-	return ret;
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
+					   clkc_data->hw_onecell_data);
 }
 
 static struct platform_driver gxbb_driver = {
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index aee6fbba2004..9febf3f03739 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -194,8 +194,18 @@
 #define CLKID_VPU_1_DIV		  130
 #define CLKID_VAPB_0_DIV	  134
 #define CLKID_VAPB_1_DIV	  137
-
-#define NR_CLKS			  141
+#define CLKID_HDMI_PLL_PRE_MULT	  141
+#define CLKID_MPLL0_DIV		  142
+#define CLKID_MPLL1_DIV		  143
+#define CLKID_MPLL2_DIV		  144
+#define CLKID_MPLL_PREDIV	  145
+#define CLKID_FCLK_DIV2_DIV	  146
+#define CLKID_FCLK_DIV3_DIV	  147
+#define CLKID_FCLK_DIV4_DIV	  148
+#define CLKID_FCLK_DIV5_DIV	  149
+#define CLKID_FCLK_DIV7_DIV	  150
+
+#define NR_CLKS			  151
 
 /* include the CLKIDs that have been made part of the DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
index 3ffea80c1308..cc2992493e0b 100644
--- a/drivers/clk/meson/meson8b.c
+++ b/drivers/clk/meson/meson8b.c
@@ -23,14 +23,16 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/init.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
 #include <linux/slab.h>
-#include <linux/init.h>
+#include <linux/regmap.h>
 
 #include "clkc.h"
 #include "meson8b.h"
+#include "clk-regmap.h"
 
 static DEFINE_SPINLOCK(meson_clk_lock);
 
@@ -97,20 +99,6 @@ static const struct pll_rate_table sys_pll_rate_table[] = {
 	{ /* sentinel */ },
 };
 
-static const struct clk_div_table cpu_div_table[] = {
-	{ .val = 1, .div = 1 },
-	{ .val = 2, .div = 2 },
-	{ .val = 3, .div = 3 },
-	{ .val = 2, .div = 4 },
-	{ .val = 3, .div = 6 },
-	{ .val = 4, .div = 8 },
-	{ .val = 5, .div = 10 },
-	{ .val = 6, .div = 12 },
-	{ .val = 7, .div = 14 },
-	{ .val = 8, .div = 16 },
-	{ /* sentinel */ },
-};
-
 static struct clk_fixed_rate meson8b_xtal = {
 	.fixed_rate = 24000000,
 	.hw.init = &(struct clk_init_data){
@@ -120,23 +108,39 @@ static struct clk_fixed_rate meson8b_xtal = {
 	},
 };
 
-static struct meson_clk_pll meson8b_fixed_pll = {
-	.m = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
+static struct clk_regmap meson8b_fixed_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.frac = {
+			.reg_off = HHI_MPLL_CNTL2,
+			.shift   = 0,
+			.width   = 12,
+		},
+		.l = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "fixed_pll",
 		.ops = &meson_clk_pll_ro_ops,
@@ -146,23 +150,34 @@ static struct meson_clk_pll meson8b_fixed_pll = {
 	},
 };
 
-static struct meson_clk_pll meson8b_vid_pll = {
-	.m = {
-		.reg_off = HHI_VID_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
+static struct clk_regmap meson8b_vid_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_VID_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_VID_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_VID_PLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.l = {
+			.reg_off = HHI_VID_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_VID_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
 	},
-	.n = {
-		.reg_off = HHI_VID_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = HHI_VID_PLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "vid_pll",
 		.ops = &meson_clk_pll_ro_ops,
@@ -172,213 +187,317 @@ static struct meson_clk_pll meson8b_vid_pll = {
 	},
 };
 
-static struct meson_clk_pll meson8b_sys_pll = {
-	.m = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 9,
-		.width   = 5,
+static struct clk_regmap meson8b_sys_pll = {
+	.data = &(struct meson_clk_pll_data){
+		.m = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 0,
+			.width   = 9,
+		},
+		.n = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 9,
+			.width   = 5,
+		},
+		.od = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 16,
+			.width   = 2,
+		},
+		.l = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 31,
+			.width   = 1,
+		},
+		.rst = {
+			.reg_off = HHI_SYS_PLL_CNTL,
+			.shift   = 29,
+			.width   = 1,
+		},
+		.table = sys_pll_rate_table,
 	},
-	.od = {
-		.reg_off = HHI_SYS_PLL_CNTL,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.rate_table = sys_pll_rate_table,
-	.rate_count = ARRAY_SIZE(sys_pll_rate_table),
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
 		.name = "sys_pll",
-		.ops = &meson_clk_pll_ops,
+		.ops = &meson_clk_pll_ro_ops,
 		.parent_names = (const char *[]){ "xtal" },
 		.num_parents = 1,
 		.flags = CLK_GET_RATE_NOCACHE,
 	},
 };
 
-static struct clk_fixed_factor meson8b_fclk_div2 = {
+static struct clk_fixed_factor meson8b_fclk_div2_div = {
 	.mult = 1,
 	.div = 2,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div2",
+		.name = "fclk_div2_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor meson8b_fclk_div3 = {
+static struct clk_regmap meson8b_fclk_div2 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 27,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div2",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div2_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div3_div = {
 	.mult = 1,
 	.div = 3,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div3",
+		.name = "fclk_div_div3",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor meson8b_fclk_div4 = {
+static struct clk_regmap meson8b_fclk_div3 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 28,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div3",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div3_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div4_div = {
 	.mult = 1,
 	.div = 4,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div4",
+		.name = "fclk_div4_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor meson8b_fclk_div5 = {
+static struct clk_regmap meson8b_fclk_div4 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 29,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div4",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div4_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div5_div = {
 	.mult = 1,
 	.div = 5,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div5",
+		.name = "fclk_div5_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct clk_fixed_factor meson8b_fclk_div7 = {
+static struct clk_regmap meson8b_fclk_div5 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 30,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div5",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div5_div" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div7_div = {
 	.mult = 1,
 	.div = 7,
 	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div7",
+		.name = "fclk_div7_div",
 		.ops = &clk_fixed_factor_ops,
 		.parent_names = (const char *[]){ "fixed_pll" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll meson8b_mpll0 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 0,
-		.width   = 14,
+static struct clk_regmap meson8b_fclk_div7 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL6,
+		.bit_idx = 31,
 	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 15,
-		.width   = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div7",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "fclk_div7_div" },
+		.num_parents = 1,
 	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 16,
-		.width   = 9,
+};
+
+static struct clk_regmap meson8b_mpll_prediv = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MPLL_CNTL5,
+		.shift = 12,
+		.width = 1,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL7,
-		.shift   = 14,
-		.width   = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll_prediv",
+		.ops = &clk_regmap_divider_ro_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
 	},
-	.ssen = {
-		.reg_off = HHI_MPLL_CNTL,
-		.shift   = 25,
-		.width   = 1,
+};
+
+static struct clk_regmap meson8b_mpll0_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 15,
+			.width   = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL7,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.ssen = {
+			.reg_off = HHI_MPLL_CNTL,
+			.shift   = 25,
+			.width   = 1,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll0",
+		.name = "mpll0_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll meson8b_mpll1 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 0,
-		.width   = 14,
+static struct clk_regmap meson8b_mpll0 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL7,
+		.bit_idx = 14,
 	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 15,
-		.width   = 1,
-	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 16,
-		.width   = 9,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll0",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll0_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL8,
-		.shift   = 14,
-		.width   = 1,
+};
+
+static struct clk_regmap meson8b_mpll1_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 15,
+			.width   = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL8,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll1",
+		.name = "mpll1_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-static struct meson_clk_mpll meson8b_mpll2 = {
-	.sdm = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 0,
-		.width   = 14,
-	},
-	.sdm_en = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 15,
-		.width   = 1,
+static struct clk_regmap meson8b_mpll1 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL8,
+		.bit_idx = 14,
 	},
-	.n2 = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 16,
-		.width   = 9,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpll1",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll1_div" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
-	.en = {
-		.reg_off = HHI_MPLL_CNTL9,
-		.shift   = 14,
-		.width   = 1,
+};
+
+static struct clk_regmap meson8b_mpll2_div = {
+	.data = &(struct meson_clk_mpll_data){
+		.sdm = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 0,
+			.width   = 14,
+		},
+		.sdm_en = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 15,
+			.width   = 1,
+		},
+		.n2 = {
+			.reg_off = HHI_MPLL_CNTL9,
+			.shift   = 16,
+			.width   = 9,
+		},
+		.lock = &meson_clk_lock,
 	},
-	.lock = &meson_clk_lock,
 	.hw.init = &(struct clk_init_data){
-		.name = "mpll2",
+		.name = "mpll2_div",
 		.ops = &meson_clk_mpll_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
+		.parent_names = (const char *[]){ "mpll_prediv" },
 		.num_parents = 1,
 	},
 };
 
-/*
- * FIXME cpu clocks and the legacy composite clocks (e.g. clk81) are both PLL
- * post-dividers and should be modeled with their respective PLLs via the
- * forthcoming coordinated clock rates feature
- */
-static struct meson_clk_cpu meson8b_cpu_clk = {
-	.reg_off = HHI_SYS_CPU_CLK_CNTL1,
-	.div_table = cpu_div_table,
-	.clk_nb.notifier_call = meson_clk_cpu_notifier_cb,
+static struct clk_regmap meson8b_mpll2 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPLL_CNTL9,
+		.bit_idx = 14,
+	},
 	.hw.init = &(struct clk_init_data){
-		.name = "cpu_clk",
-		.ops = &meson_clk_cpu_ops,
-		.parent_names = (const char *[]){ "sys_pll" },
+		.name = "mpll2",
+		.ops = &clk_regmap_gate_ops,
+		.parent_names = (const char *[]){ "mpll2_div" },
 		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
 	},
 };
 
 static u32 mux_table_clk81[]	= { 6, 5, 7 };
-
-struct clk_mux meson8b_mpeg_clk_sel = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.mask = 0x7,
-	.shift = 12,
-	.flags = CLK_MUX_READ_ONLY,
-	.table = mux_table_clk81,
-	.lock = &meson_clk_lock,
+static struct clk_regmap meson8b_mpeg_clk_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.mask = 0x7,
+		.shift = 12,
+		.table = mux_table_clk81,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mpeg_clk_sel",
-		.ops = &clk_mux_ro_ops,
+		.ops = &clk_regmap_mux_ro_ops,
 		/*
 		 * FIXME bits 14:12 selects from 8 possible parents:
 		 * xtal, 1'b0 (wtf), fclk_div7, mpll_clkout1, mpll_clkout2,
@@ -387,34 +506,136 @@ struct clk_mux meson8b_mpeg_clk_sel = {
 		.parent_names = (const char *[]){ "fclk_div3", "fclk_div4",
 			"fclk_div5" },
 		.num_parents = 3,
-		.flags = (CLK_SET_RATE_NO_REPARENT | CLK_IGNORE_UNUSED),
 	},
 };
 
-struct clk_divider meson8b_mpeg_clk_div = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.shift = 0,
-	.width = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap meson8b_mpeg_clk_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.shift = 0,
+		.width = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "mpeg_clk_div",
-		.ops = &clk_divider_ops,
+		.ops = &clk_regmap_divider_ro_ops,
 		.parent_names = (const char *[]){ "mpeg_clk_sel" },
 		.num_parents = 1,
-		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),
 	},
 };
 
-struct clk_gate meson8b_clk81 = {
-	.reg = (void *)HHI_MPEG_CLK_CNTL,
-	.bit_idx = 7,
-	.lock = &meson_clk_lock,
+static struct clk_regmap meson8b_clk81 = {
+	.data = &(struct clk_regmap_gate_data){
+		.offset = HHI_MPEG_CLK_CNTL,
+		.bit_idx = 7,
+	},
 	.hw.init = &(struct clk_init_data){
 		.name = "clk81",
-		.ops = &clk_gate_ops,
+		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "mpeg_clk_div" },
 		.num_parents = 1,
-		.flags = (CLK_SET_RATE_PARENT | CLK_IS_CRITICAL),
+		.flags = CLK_IS_CRITICAL,
+	},
+};
+
+static struct clk_regmap meson8b_cpu_in_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_SYS_CPU_CLK_CNTL0,
+		.mask = 0x1,
+		.shift = 0,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "cpu_in_sel",
+		.ops = &clk_regmap_mux_ro_ops,
+		.parent_names = (const char *[]){ "xtal", "sys_pll" },
+		.num_parents = 2,
+		.flags = (CLK_SET_RATE_PARENT |
+			  CLK_SET_RATE_NO_REPARENT),
+	},
+};
+
+static struct clk_fixed_factor meson8b_cpu_div2 = {
+	.mult = 1,
+	.div = 2,
+	.hw.init = &(struct clk_init_data){
+		.name = "cpu_div2",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "cpu_in_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_fixed_factor meson8b_cpu_div3 = {
+	.mult = 1,
+	.div = 3,
+	.hw.init = &(struct clk_init_data){
+		.name = "cpu_div3",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "cpu_in_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static const struct clk_div_table cpu_scale_table[] = {
+	{ .val = 2, .div = 4 },
+	{ .val = 3, .div = 6 },
+	{ .val = 4, .div = 8 },
+	{ .val = 5, .div = 10 },
+	{ .val = 6, .div = 12 },
+	{ .val = 7, .div = 14 },
+	{ .val = 8, .div = 16 },
+	{ /* sentinel */ },
+};
+
+static struct clk_regmap meson8b_cpu_scale_div = {
+	.data = &(struct clk_regmap_div_data){
+		.offset =  HHI_SYS_CPU_CLK_CNTL1,
+		.shift = 20,
+		.width = 9,
+		.table = cpu_scale_table,
+		.flags = CLK_DIVIDER_ALLOW_ZERO,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "cpu_scale_div",
+		.ops = &clk_regmap_divider_ro_ops,
+		.parent_names = (const char *[]){ "cpu_in_sel" },
+		.num_parents = 1,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_regmap meson8b_cpu_scale_out_sel = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_SYS_CPU_CLK_CNTL0,
+		.mask = 0x3,
+		.shift = 2,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "cpu_scale_out_sel",
+		.ops = &clk_regmap_mux_ro_ops,
+		.parent_names = (const char *[]) { "cpu_in_sel",
+						   "cpu_div2",
+						   "cpu_div3",
+						   "cpu_scale_div" },
+		.num_parents = 4,
+		.flags = CLK_SET_RATE_PARENT,
+	},
+};
+
+static struct clk_regmap meson8b_cpu_clk = {
+	.data = &(struct clk_regmap_mux_data){
+		.offset = HHI_SYS_CPU_CLK_CNTL0,
+		.mask = 0x1,
+		.shift = 7,
+	},
+	.hw.init = &(struct clk_init_data){
+		.name = "cpu_clk",
+		.ops = &clk_regmap_mux_ro_ops,
+		.parent_names = (const char *[]){ "xtal", "cpu_out_sel" },
+		.num_parents = 2,
+		.flags = (CLK_SET_RATE_PARENT |
+			  CLK_SET_RATE_NO_REPARENT),
 	},
 };
 
@@ -599,24 +820,26 @@ static struct clk_hw_onecell_data meson8b_hw_onecell_data = {
 		[CLKID_MPLL0]		    = &meson8b_mpll0.hw,
 		[CLKID_MPLL1]		    = &meson8b_mpll1.hw,
 		[CLKID_MPLL2]		    = &meson8b_mpll2.hw,
+		[CLKID_MPLL0_DIV]	    = &meson8b_mpll0_div.hw,
+		[CLKID_MPLL1_DIV]	    = &meson8b_mpll1_div.hw,
+		[CLKID_MPLL2_DIV]	    = &meson8b_mpll2_div.hw,
+		[CLKID_CPU_IN_SEL]	    = &meson8b_cpu_in_sel.hw,
+		[CLKID_CPU_DIV2]	    = &meson8b_cpu_div2.hw,
+		[CLKID_CPU_DIV3]	    = &meson8b_cpu_div3.hw,
+		[CLKID_CPU_SCALE_DIV]	    = &meson8b_cpu_scale_div.hw,
+		[CLKID_CPU_SCALE_OUT_SEL]   = &meson8b_cpu_scale_out_sel.hw,
+		[CLKID_MPLL_PREDIV]	    = &meson8b_mpll_prediv.hw,
+		[CLKID_FCLK_DIV2_DIV]	    = &meson8b_fclk_div2_div.hw,
+		[CLKID_FCLK_DIV3_DIV]	    = &meson8b_fclk_div3_div.hw,
+		[CLKID_FCLK_DIV4_DIV]	    = &meson8b_fclk_div4_div.hw,
+		[CLKID_FCLK_DIV5_DIV]	    = &meson8b_fclk_div5_div.hw,
+		[CLKID_FCLK_DIV7_DIV]	    = &meson8b_fclk_div7_div.hw,
 		[CLK_NR_CLKS]		    = NULL,
 	},
 	.num = CLK_NR_CLKS,
 };
 
-static struct meson_clk_pll *const meson8b_clk_plls[] = {
-	&meson8b_fixed_pll,
-	&meson8b_vid_pll,
-	&meson8b_sys_pll,
-};
-
-static struct meson_clk_mpll *const meson8b_clk_mplls[] = {
-	&meson8b_mpll0,
-	&meson8b_mpll1,
-	&meson8b_mpll2,
-};
-
-static struct clk_gate *const meson8b_clk_gates[] = {
+static struct clk_regmap *const meson8b_clk_regmaps[] = {
 	&meson8b_clk81,
 	&meson8b_ddr,
 	&meson8b_dos,
@@ -695,14 +918,27 @@ static struct clk_gate *const meson8b_clk_gates[] = {
 	&meson8b_ao_ahb_sram,
 	&meson8b_ao_ahb_bus,
 	&meson8b_ao_iface,
-};
-
-static struct clk_mux *const meson8b_clk_muxes[] = {
-	&meson8b_mpeg_clk_sel,
-};
-
-static struct clk_divider *const meson8b_clk_dividers[] = {
 	&meson8b_mpeg_clk_div,
+	&meson8b_mpeg_clk_sel,
+	&meson8b_mpll0,
+	&meson8b_mpll1,
+	&meson8b_mpll2,
+	&meson8b_mpll0_div,
+	&meson8b_mpll1_div,
+	&meson8b_mpll2_div,
+	&meson8b_fixed_pll,
+	&meson8b_vid_pll,
+	&meson8b_sys_pll,
+	&meson8b_cpu_in_sel,
+	&meson8b_cpu_scale_div,
+	&meson8b_cpu_scale_out_sel,
+	&meson8b_cpu_clk,
+	&meson8b_mpll_prediv,
+	&meson8b_fclk_div2,
+	&meson8b_fclk_div3,
+	&meson8b_fclk_div4,
+	&meson8b_fclk_div5,
+	&meson8b_fclk_div7,
 };
 
 static const struct meson8b_clk_reset_line {
@@ -804,82 +1040,45 @@ static const struct reset_control_ops meson8b_clk_reset_ops = {
 	.deassert = meson8b_clk_reset_deassert,
 };
 
+static const struct regmap_config clkc_regmap_config = {
+	.reg_bits       = 32,
+	.val_bits       = 32,
+	.reg_stride     = 4,
+};
+
 static int meson8b_clkc_probe(struct platform_device *pdev)
 {
-	int ret, clkid, i;
-	struct clk_hw *parent_hw;
-	struct clk *parent_clk;
+	int ret, i;
 	struct device *dev = &pdev->dev;
+	struct regmap *map;
 
 	if (!clk_base)
 		return -ENXIO;
 
-	/* Populate base address for PLLs */
-	for (i = 0; i < ARRAY_SIZE(meson8b_clk_plls); i++)
-		meson8b_clk_plls[i]->base = clk_base;
-
-	/* Populate base address for MPLLs */
-	for (i = 0; i < ARRAY_SIZE(meson8b_clk_mplls); i++)
-		meson8b_clk_mplls[i]->base = clk_base;
-
-	/* Populate the base address for CPU clk */
-	meson8b_cpu_clk.base = clk_base;
-
-	/* Populate base address for gates */
-	for (i = 0; i < ARRAY_SIZE(meson8b_clk_gates); i++)
-		meson8b_clk_gates[i]->reg = clk_base +
-			(u32)meson8b_clk_gates[i]->reg;
-
-	/* Populate base address for muxes */
-	for (i = 0; i < ARRAY_SIZE(meson8b_clk_muxes); i++)
-		meson8b_clk_muxes[i]->reg = clk_base +
-			(u32)meson8b_clk_muxes[i]->reg;
+	map = devm_regmap_init_mmio(dev, clk_base, &clkc_regmap_config);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
 
-	/* Populate base address for dividers */
-	for (i = 0; i < ARRAY_SIZE(meson8b_clk_dividers); i++)
-		meson8b_clk_dividers[i]->reg = clk_base +
-			(u32)meson8b_clk_dividers[i]->reg;
+	/* Populate regmap for the regmap backed clocks */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_regmaps); i++)
+		meson8b_clk_regmaps[i]->map = map;
 
 	/*
 	 * register all clks
 	 * CLKID_UNUSED = 0, so skip it and start with CLKID_XTAL = 1
 	 */
-	for (clkid = CLKID_XTAL; clkid < CLK_NR_CLKS; clkid++) {
+	for (i = CLKID_XTAL; i < CLK_NR_CLKS; i++) {
 		/* array might be sparse */
-		if (!meson8b_hw_onecell_data.hws[clkid])
+		if (!meson8b_hw_onecell_data.hws[i])
 			continue;
 
-		/* FIXME convert to devm_clk_register */
-		ret = devm_clk_hw_register(dev, meson8b_hw_onecell_data.hws[clkid]);
+		ret = devm_clk_hw_register(dev, meson8b_hw_onecell_data.hws[i]);
 		if (ret)
 			return ret;
 	}
 
-	/*
-	 * Register CPU clk notifier
-	 *
-	 * FIXME this is wrong for a lot of reasons. First, the muxes should be
-	 * struct clk_hw objects. Second, we shouldn't program the muxes in
-	 * notifier handlers. The tricky programming sequence will be handled
-	 * by the forthcoming coordinated clock rates mechanism once that
-	 * feature is released.
-	 *
-	 * Furthermore, looking up the parent this way is terrible. At some
-	 * point we will stop allocating a default struct clk when registering
-	 * a new clk_hw, and this hack will no longer work. Releasing the ccr
-	 * feature before that time solves the problem :-)
-	 */
-	parent_hw = clk_hw_get_parent(&meson8b_cpu_clk.hw);
-	parent_clk = parent_hw->clk;
-	ret = clk_notifier_register(parent_clk, &meson8b_cpu_clk.clk_nb);
-	if (ret) {
-		pr_err("%s: failed to register clock notifier for cpu_clk\n",
-				__func__);
-		return ret;
-	}
-
-	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
-			&meson8b_hw_onecell_data);
+	return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
+					   &meson8b_hw_onecell_data);
 }
 
 static const struct of_device_id meson8b_clkc_match_table[] = {
diff --git a/drivers/clk/meson/meson8b.h b/drivers/clk/meson/meson8b.h
index 2eaf8a52e7dd..6e414bd36981 100644
--- a/drivers/clk/meson/meson8b.h
+++ b/drivers/clk/meson/meson8b.h
@@ -69,7 +69,22 @@
  * will remain defined here.
  */
 
-#define CLK_NR_CLKS		96
+#define CLKID_MPLL0_DIV		96
+#define CLKID_MPLL1_DIV		97
+#define CLKID_MPLL2_DIV		98
+#define CLKID_CPU_IN_SEL	99
+#define CLKID_CPU_DIV2		100
+#define CLKID_CPU_DIV3		101
+#define CLKID_CPU_SCALE_DIV	102
+#define CLKID_CPU_SCALE_OUT_SEL	103
+#define CLKID_MPLL_PREDIV	104
+#define CLKID_FCLK_DIV2_DIV	105
+#define CLKID_FCLK_DIV3_DIV	106
+#define CLKID_FCLK_DIV4_DIV	107
+#define CLKID_FCLK_DIV5_DIV	108
+#define CLKID_FCLK_DIV7_DIV	109
+
+#define CLK_NR_CLKS		110
 
 /*
  * include the CLKID and RESETID that have
diff --git a/drivers/clk/mvebu/armada-38x.c b/drivers/clk/mvebu/armada-38x.c
index 394aa6f03f01..9ff4ea63932d 100644
--- a/drivers/clk/mvebu/armada-38x.c
+++ b/drivers/clk/mvebu/armada-38x.c
@@ -46,11 +46,11 @@ static u32 __init armada_38x_get_tclk_freq(void __iomem *sar)
 }
 
 static const u32 armada_38x_cpu_frequencies[] __initconst = {
-	0, 0, 0, 0,
-	1066 * 1000 * 1000, 0, 0, 0,
+	666 * 1000 * 1000,  0, 800 * 1000 * 1000, 0,
+	1066 * 1000 * 1000, 0, 1200 * 1000 * 1000, 0,
 	1332 * 1000 * 1000, 0, 0, 0,
 	1600 * 1000 * 1000, 0, 0, 0,
-	1866 * 1000 * 1000,
+	1866 * 1000 * 1000, 0, 0, 2000 * 1000 * 1000,
 };
 
 static u32 __init armada_38x_get_cpu_freq(void __iomem *sar)
@@ -76,11 +76,11 @@ static const struct coreclk_ratio armada_38x_coreclk_ratios[] __initconst = {
 };
 
 static const int armada_38x_cpu_l2_ratios[32][2] __initconst = {
-	{0, 1}, {0, 1}, {0, 1}, {0, 1},
-	{1, 2}, {0, 1}, {0, 1}, {0, 1},
-	{1, 2}, {0, 1}, {0, 1}, {0, 1},
+	{1, 2}, {0, 1}, {1, 2}, {0, 1},
+	{1, 2}, {0, 1}, {1, 2}, {0, 1},
 	{1, 2}, {0, 1}, {0, 1}, {0, 1},
 	{1, 2}, {0, 1}, {0, 1}, {0, 1},
+	{1, 2}, {0, 1}, {0, 1}, {1, 2},
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
@@ -91,7 +91,7 @@ static const int armada_38x_cpu_ddr_ratios[32][2] __initconst = {
 	{1, 2}, {0, 1}, {0, 1}, {0, 1},
 	{1, 2}, {0, 1}, {0, 1}, {0, 1},
 	{1, 2}, {0, 1}, {0, 1}, {0, 1},
-	{1, 2}, {0, 1}, {0, 1}, {0, 1},
+	{1, 2}, {0, 1}, {0, 1}, {7, 15},
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
diff --git a/drivers/clk/mvebu/cp110-system-controller.c b/drivers/clk/mvebu/cp110-system-controller.c
index ca9a0a536174..75bf7b8f282f 100644
--- a/drivers/clk/mvebu/cp110-system-controller.c
+++ b/drivers/clk/mvebu/cp110-system-controller.c
@@ -13,18 +13,17 @@
 /*
  * CP110 has 6 core clocks:
  *
- *  - APLL		(1 Ghz)
- *    - PPv2 core	(1/3 APLL)
- *    - EIP		(1/2 APLL)
- *     - Core		(1/2 EIP)
- *    - SDIO		(2/5 APLL)
+ *  - PLL0		(1 Ghz)
+ *    - PPv2 core	(1/3 PLL0)
+ *    - x2 Core		(1/2 PLL0)
+ *	- Core		(1/2 x2 Core)
+ *    - SDIO		(2/5 PLL0)
  *
  *  - NAND clock, which is either:
  *    - Equal to SDIO clock
- *    - 2/5 APLL
+ *    - 2/5 PLL0
  *
- * CP110 has 32 gatable clocks, for the various peripherals in the
- * IP. They have fairly complicated parent/child relationships.
+ * CP110 has 32 gatable clocks, for the various peripherals in the IP.
  */
 
 #define pr_fmt(fmt) "cp110-system-controller: " fmt
@@ -53,9 +52,9 @@ enum {
 #define CP110_CLK_NUM \
 	(CP110_MAX_CORE_CLOCKS + CP110_MAX_GATABLE_CLOCKS)
 
-#define CP110_CORE_APLL			0
+#define CP110_CORE_PLL0			0
 #define CP110_CORE_PPV2			1
-#define CP110_CORE_EIP			2
+#define CP110_CORE_X2CORE		2
 #define CP110_CORE_CORE			3
 #define CP110_CORE_NAND			4
 #define CP110_CORE_SDIO			5
@@ -237,7 +236,7 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 	struct regmap *regmap;
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
-	const char *ppv2_name, *apll_name, *core_name, *eip_name, *nand_name,
+	const char *ppv2_name, *pll0_name, *core_name, *x2core_name, *nand_name,
 		*sdio_name;
 	struct clk_hw_onecell_data *cp110_clk_data;
 	struct clk_hw *hw, **cp110_clks;
@@ -263,20 +262,20 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 	cp110_clks = cp110_clk_data->hws;
 	cp110_clk_data->num = CP110_CLK_NUM;
 
-	/* Register the APLL which is the root of the hw tree */
-	apll_name = cp110_unique_name(dev, syscon_node, "apll");
-	hw = clk_hw_register_fixed_rate(NULL, apll_name, NULL, 0,
+	/* Register the PLL0 which is the root of the hw tree */
+	pll0_name = cp110_unique_name(dev, syscon_node, "pll0");
+	hw = clk_hw_register_fixed_rate(NULL, pll0_name, NULL, 0,
 					1000 * 1000 * 1000);
 	if (IS_ERR(hw)) {
 		ret = PTR_ERR(hw);
-		goto fail_apll;
+		goto fail_pll0;
 	}
 
-	cp110_clks[CP110_CORE_APLL] = hw;
+	cp110_clks[CP110_CORE_PLL0] = hw;
 
-	/* PPv2 is APLL/3 */
+	/* PPv2 is PLL0/3 */
 	ppv2_name = cp110_unique_name(dev, syscon_node, "ppv2-core");
-	hw = clk_hw_register_fixed_factor(NULL, ppv2_name, apll_name, 0, 1, 3);
+	hw = clk_hw_register_fixed_factor(NULL, ppv2_name, pll0_name, 0, 1, 3);
 	if (IS_ERR(hw)) {
 		ret = PTR_ERR(hw);
 		goto fail_ppv2;
@@ -284,30 +283,32 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 
 	cp110_clks[CP110_CORE_PPV2] = hw;
 
-	/* EIP clock is APLL/2 */
-	eip_name = cp110_unique_name(dev, syscon_node, "eip");
-	hw = clk_hw_register_fixed_factor(NULL, eip_name, apll_name, 0, 1, 2);
+	/* X2CORE clock is PLL0/2 */
+	x2core_name = cp110_unique_name(dev, syscon_node, "x2core");
+	hw = clk_hw_register_fixed_factor(NULL, x2core_name, pll0_name,
+					  0, 1, 2);
 	if (IS_ERR(hw)) {
 		ret = PTR_ERR(hw);
 		goto fail_eip;
 	}
 
-	cp110_clks[CP110_CORE_EIP] = hw;
+	cp110_clks[CP110_CORE_X2CORE] = hw;
 
-	/* Core clock is EIP/2 */
+	/* Core clock is X2CORE/2 */
 	core_name = cp110_unique_name(dev, syscon_node, "core");
-	hw = clk_hw_register_fixed_factor(NULL, core_name, eip_name, 0, 1, 2);
+	hw = clk_hw_register_fixed_factor(NULL, core_name, x2core_name,
+					  0, 1, 2);
 	if (IS_ERR(hw)) {
 		ret = PTR_ERR(hw);
 		goto fail_core;
 	}
 
 	cp110_clks[CP110_CORE_CORE] = hw;
-	/* NAND can be either APLL/2.5 or core clock */
+	/* NAND can be either PLL0/2.5 or core clock */
 	nand_name = cp110_unique_name(dev, syscon_node, "nand-core");
 	if (nand_clk_ctrl & NF_CLOCK_SEL_400_MASK)
 		hw = clk_hw_register_fixed_factor(NULL, nand_name,
-						   apll_name, 0, 2, 5);
+						   pll0_name, 0, 2, 5);
 	else
 		hw = clk_hw_register_fixed_factor(NULL, nand_name,
 						   core_name, 0, 1, 1);
@@ -318,10 +319,10 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 
 	cp110_clks[CP110_CORE_NAND] = hw;
 
-	/* SDIO clock is APLL/2.5 */
+	/* SDIO clock is PLL0/2.5 */
 	sdio_name = cp110_unique_name(dev, syscon_node, "sdio-core");
 	hw = clk_hw_register_fixed_factor(NULL, sdio_name,
-					  apll_name, 0, 2, 5);
+					  pll0_name, 0, 2, 5);
 	if (IS_ERR(hw)) {
 		ret = PTR_ERR(hw);
 		goto fail_sdio;
@@ -341,40 +342,23 @@ static int cp110_syscon_common_probe(struct platform_device *pdev,
 			continue;
 
 		switch (i) {
-		case CP110_GATE_AUDIO:
-		case CP110_GATE_COMM_UNIT:
-		case CP110_GATE_EIP150:
-		case CP110_GATE_EIP197:
-		case CP110_GATE_SLOW_IO:
-			parent = gate_name[CP110_GATE_MAIN];
-			break;
-		case CP110_GATE_MG:
-			parent = gate_name[CP110_GATE_MG_CORE];
-			break;
 		case CP110_GATE_NAND:
 			parent = nand_name;
 			break;
+		case CP110_GATE_MG:
+		case CP110_GATE_GOP_DP:
 		case CP110_GATE_PPV2:
 			parent = ppv2_name;
 			break;
 		case CP110_GATE_SDIO:
 			parent = sdio_name;
 			break;
-		case CP110_GATE_GOP_DP:
-			parent = gate_name[CP110_GATE_SDMMC_GOP];
-			break;
-		case CP110_GATE_XOR1:
-		case CP110_GATE_XOR0:
-		case CP110_GATE_PCIE_X1_0:
-		case CP110_GATE_PCIE_X1_1:
+		case CP110_GATE_MAIN:
+		case CP110_GATE_PCIE_XOR:
 		case CP110_GATE_PCIE_X4:
-			parent = gate_name[CP110_GATE_PCIE_XOR];
-			break;
-		case CP110_GATE_SATA:
-		case CP110_GATE_USB3H0:
-		case CP110_GATE_USB3H1:
-		case CP110_GATE_USB3DEV:
-			parent = gate_name[CP110_GATE_SATA_USB];
+		case CP110_GATE_EIP150:
+		case CP110_GATE_EIP197:
+			parent = x2core_name;
 			break;
 		default:
 			parent = core_name;
@@ -413,12 +397,12 @@ fail_sdio:
 fail_nand:
 	clk_hw_unregister_fixed_factor(cp110_clks[CP110_CORE_CORE]);
 fail_core:
-	clk_hw_unregister_fixed_factor(cp110_clks[CP110_CORE_EIP]);
+	clk_hw_unregister_fixed_factor(cp110_clks[CP110_CORE_X2CORE]);
 fail_eip:
 	clk_hw_unregister_fixed_factor(cp110_clks[CP110_CORE_PPV2]);
 fail_ppv2:
-	clk_hw_unregister_fixed_rate(cp110_clks[CP110_CORE_APLL]);
-fail_apll:
+	clk_hw_unregister_fixed_rate(cp110_clks[CP110_CORE_PLL0]);
+fail_pll0:
 	return ret;
 }
 
diff --git a/drivers/clk/nxp/clk-lpc32xx.c b/drivers/clk/nxp/clk-lpc32xx.c
index f5d815f577e0..5eeecee17b69 100644
--- a/drivers/clk/nxp/clk-lpc32xx.c
+++ b/drivers/clk/nxp/clk-lpc32xx.c
@@ -67,6 +67,7 @@
 #define LPC32XX_USB_CLK_STS		0xF8
 
 static struct regmap_config lpc32xx_scb_regmap_config = {
+	.name = "scb",
 	.reg_bits = 32,
 	.val_bits = 32,
 	.reg_stride = 4,
diff --git a/drivers/clk/qcom/clk-regmap-divider.c b/drivers/clk/qcom/clk-regmap-divider.c
index 4e9b8c2c8980..1ee75a5e93f4 100644
--- a/drivers/clk/qcom/clk-regmap-divider.c
+++ b/drivers/clk/qcom/clk-regmap-divider.c
@@ -28,22 +28,14 @@ static long div_round_ro_rate(struct clk_hw *hw, unsigned long rate,
 {
 	struct clk_regmap_div *divider = to_clk_regmap_div(hw);
 	struct clk_regmap *clkr = &divider->clkr;
-	u32 div;
-	struct clk_hw *hw_parent = clk_hw_get_parent(hw);
-
-	regmap_read(clkr->regmap, divider->reg, &div);
-	div >>= divider->shift;
-	div &= BIT(divider->width) - 1;
-	div += 1;
-
-	if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) {
-		if (!hw_parent)
-			return -EINVAL;
+	u32 val;
 
-		*prate = clk_hw_round_rate(hw_parent, rate * div);
-	}
+	regmap_read(clkr->regmap, divider->reg, &val);
+	val >>= divider->shift;
+	val &= BIT(divider->width) - 1;
 
-	return DIV_ROUND_UP_ULL((u64)*prate, div);
+	return divider_ro_round_rate(hw, rate, prate, NULL, divider->width,
+				     CLK_DIVIDER_ROUND_CLOSEST, val);
 }
 
 static long div_round_rate(struct clk_hw *hw, unsigned long rate,
diff --git a/drivers/clk/qcom/clk-rpm.c b/drivers/clk/qcom/clk-rpm.c
index c60f61b10c7f..b94981447664 100644
--- a/drivers/clk/qcom/clk-rpm.c
+++ b/drivers/clk/qcom/clk-rpm.c
@@ -29,6 +29,7 @@
 
 #define QCOM_RPM_MISC_CLK_TYPE				0x306b6c63
 #define QCOM_RPM_SCALING_ENABLE_ID			0x2
+#define QCOM_RPM_XO_MODE_ON				0x2
 
 #define DEFINE_CLK_RPM(_platform, _name, _active, r_id)			      \
 	static struct clk_rpm _platform##_##_active;			      \
@@ -56,6 +57,18 @@
 		},							      \
 	}
 
+#define DEFINE_CLK_RPM_XO_BUFFER(_platform, _name, _active, offset)	      \
+	static struct clk_rpm _platform##_##_name = {			      \
+		.rpm_clk_id = QCOM_RPM_CXO_BUFFERS,			      \
+		.xo_offset = (offset),					      \
+		.hw.init = &(struct clk_init_data){			      \
+			.ops = &clk_rpm_xo_ops,			      \
+			.name = #_name,					      \
+			.parent_names = (const char *[]){ "cxo_board" },      \
+			.num_parents = 1,				      \
+		},							      \
+	}
+
 #define DEFINE_CLK_RPM_FIXED(_platform, _name, _active, r_id, r)	      \
 	static struct clk_rpm _platform##_##_name = {			      \
 		.rpm_clk_id = (r_id),					      \
@@ -126,8 +139,11 @@
 
 #define to_clk_rpm(_hw) container_of(_hw, struct clk_rpm, hw)
 
+struct rpm_cc;
+
 struct clk_rpm {
 	const int rpm_clk_id;
+	const int xo_offset;
 	const bool active_only;
 	unsigned long rate;
 	bool enabled;
@@ -135,12 +151,15 @@ struct clk_rpm {
 	struct clk_rpm *peer;
 	struct clk_hw hw;
 	struct qcom_rpm *rpm;
+	struct rpm_cc *rpm_cc;
 };
 
 struct rpm_cc {
 	struct qcom_rpm *rpm;
 	struct clk_rpm **clks;
 	size_t num_clks;
+	u32 xo_buffer_value;
+	struct mutex xo_lock;
 };
 
 struct rpm_clk_desc {
@@ -159,7 +178,8 @@ static int clk_rpm_handoff(struct clk_rpm *r)
 	 * The vendor tree simply reads the status for this
 	 * RPM clock.
 	 */
-	if (r->rpm_clk_id == QCOM_RPM_PLL_4)
+	if (r->rpm_clk_id == QCOM_RPM_PLL_4 ||
+		r->rpm_clk_id == QCOM_RPM_CXO_BUFFERS)
 		return 0;
 
 	ret = qcom_rpm_write(r->rpm, QCOM_RPM_ACTIVE_STATE,
@@ -288,6 +308,46 @@ out:
 	mutex_unlock(&rpm_clk_lock);
 }
 
+static int clk_rpm_xo_prepare(struct clk_hw *hw)
+{
+	struct clk_rpm *r = to_clk_rpm(hw);
+	struct rpm_cc *rcc = r->rpm_cc;
+	int ret, clk_id = r->rpm_clk_id;
+	u32 value;
+
+	mutex_lock(&rcc->xo_lock);
+
+	value = rcc->xo_buffer_value | (QCOM_RPM_XO_MODE_ON << r->xo_offset);
+	ret = qcom_rpm_write(r->rpm, QCOM_RPM_ACTIVE_STATE, clk_id, &value, 1);
+	if (!ret) {
+		r->enabled = true;
+		rcc->xo_buffer_value = value;
+	}
+
+	mutex_unlock(&rcc->xo_lock);
+
+	return ret;
+}
+
+static void clk_rpm_xo_unprepare(struct clk_hw *hw)
+{
+	struct clk_rpm *r = to_clk_rpm(hw);
+	struct rpm_cc *rcc = r->rpm_cc;
+	int ret, clk_id = r->rpm_clk_id;
+	u32 value;
+
+	mutex_lock(&rcc->xo_lock);
+
+	value = rcc->xo_buffer_value & ~(QCOM_RPM_XO_MODE_ON << r->xo_offset);
+	ret = qcom_rpm_write(r->rpm, QCOM_RPM_ACTIVE_STATE, clk_id, &value, 1);
+	if (!ret) {
+		r->enabled = false;
+		rcc->xo_buffer_value = value;
+	}
+
+	mutex_unlock(&rcc->xo_lock);
+}
+
 static int clk_rpm_fixed_prepare(struct clk_hw *hw)
 {
 	struct clk_rpm *r = to_clk_rpm(hw);
@@ -378,6 +438,11 @@ static unsigned long clk_rpm_recalc_rate(struct clk_hw *hw,
 	return r->rate;
 }
 
+static const struct clk_ops clk_rpm_xo_ops = {
+	.prepare	= clk_rpm_xo_prepare,
+	.unprepare	= clk_rpm_xo_unprepare,
+};
+
 static const struct clk_ops clk_rpm_fixed_ops = {
 	.prepare	= clk_rpm_fixed_prepare,
 	.unprepare	= clk_rpm_fixed_unprepare,
@@ -449,6 +514,11 @@ DEFINE_CLK_RPM(apq8064, mmfpb_clk, mmfpb_a_clk, QCOM_RPM_MMFPB_CLK);
 DEFINE_CLK_RPM(apq8064, sfab_clk, sfab_a_clk, QCOM_RPM_SYS_FABRIC_CLK);
 DEFINE_CLK_RPM(apq8064, sfpb_clk, sfpb_a_clk, QCOM_RPM_SFPB_CLK);
 DEFINE_CLK_RPM(apq8064, qdss_clk, qdss_a_clk, QCOM_RPM_QDSS_CLK);
+DEFINE_CLK_RPM_XO_BUFFER(apq8064, xo_d0_clk, xo_d0_a_clk, 0);
+DEFINE_CLK_RPM_XO_BUFFER(apq8064, xo_d1_clk, xo_d1_a_clk, 8);
+DEFINE_CLK_RPM_XO_BUFFER(apq8064, xo_a0_clk, xo_a0_a_clk, 16);
+DEFINE_CLK_RPM_XO_BUFFER(apq8064, xo_a1_clk, xo_a1_a_clk, 24);
+DEFINE_CLK_RPM_XO_BUFFER(apq8064, xo_a2_clk, xo_a2_a_clk, 28);
 
 static struct clk_rpm *apq8064_clks[] = {
 	[RPM_APPS_FABRIC_CLK] = &apq8064_afab_clk,
@@ -469,6 +539,11 @@ static struct clk_rpm *apq8064_clks[] = {
 	[RPM_SFPB_A_CLK] = &apq8064_sfpb_a_clk,
 	[RPM_QDSS_CLK] = &apq8064_qdss_clk,
 	[RPM_QDSS_A_CLK] = &apq8064_qdss_a_clk,
+	[RPM_XO_D0] = &apq8064_xo_d0_clk,
+	[RPM_XO_D1] = &apq8064_xo_d1_clk,
+	[RPM_XO_A0] = &apq8064_xo_a0_clk,
+	[RPM_XO_A1] = &apq8064_xo_a1_clk,
+	[RPM_XO_A2] = &apq8064_xo_a2_clk,
 };
 
 static const struct rpm_clk_desc rpm_clk_apq8064 = {
@@ -526,12 +601,14 @@ static int rpm_clk_probe(struct platform_device *pdev)
 
 	rcc->clks = rpm_clks;
 	rcc->num_clks = num_clks;
+	mutex_init(&rcc->xo_lock);
 
 	for (i = 0; i < num_clks; i++) {
 		if (!rpm_clks[i])
 			continue;
 
 		rpm_clks[i]->rpm = rpm;
+		rpm_clks[i]->rpm_cc = rcc;
 
 		ret = clk_rpm_handoff(rpm_clks[i]);
 		if (ret)
diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index c26d9007bfc4..850c02a52248 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -686,7 +686,7 @@ static int rpm_smd_clk_probe(struct platform_device *pdev)
 			goto err;
 	}
 
-	ret = of_clk_add_hw_provider(pdev->dev.of_node, qcom_smdrpm_clk_hw_get,
+	ret = devm_of_clk_add_hw_provider(&pdev->dev, qcom_smdrpm_clk_hw_get,
 				     rcc);
 	if (ret)
 		goto err;
@@ -697,19 +697,12 @@ err:
 	return ret;
 }
 
-static int rpm_smd_clk_remove(struct platform_device *pdev)
-{
-	of_clk_del_provider(pdev->dev.of_node);
-	return 0;
-}
-
 static struct platform_driver rpm_smd_clk_driver = {
 	.driver = {
 		.name = "qcom-clk-smd-rpm",
 		.of_match_table = rpm_smd_clk_match_table,
 	},
 	.probe = rpm_smd_clk_probe,
-	.remove = rpm_smd_clk_remove,
 };
 
 static int __init rpm_smd_clk_init(void)
diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c
index 5d7451209206..3d6452932797 100644
--- a/drivers/clk/qcom/gcc-msm8996.c
+++ b/drivers/clk/qcom/gcc-msm8996.c
@@ -2895,7 +2895,7 @@ static struct clk_branch gcc_aggre0_snoc_axi_clk = {
 			.name = "gcc_aggre0_snoc_axi_clk",
 			.parent_names = (const char *[]){ "system_noc_clk_src" },
 			.num_parents = 1,
-			.flags = CLK_SET_RATE_PARENT,
+			.flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2910,7 +2910,7 @@ static struct clk_branch gcc_aggre0_cnoc_ahb_clk = {
 			.name = "gcc_aggre0_cnoc_ahb_clk",
 			.parent_names = (const char *[]){ "config_noc_clk_src" },
 			.num_parents = 1,
-			.flags = CLK_SET_RATE_PARENT,
+			.flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2925,7 +2925,7 @@ static struct clk_branch gcc_smmu_aggre0_axi_clk = {
 			.name = "gcc_smmu_aggre0_axi_clk",
 			.parent_names = (const char *[]){ "system_noc_clk_src" },
 			.num_parents = 1,
-			.flags = CLK_SET_RATE_PARENT,
+			.flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
@@ -2940,7 +2940,7 @@ static struct clk_branch gcc_smmu_aggre0_ahb_clk = {
 			.name = "gcc_smmu_aggre0_ahb_clk",
 			.parent_names = (const char *[]){ "config_noc_clk_src" },
 			.num_parents = 1,
-			.flags = CLK_SET_RATE_PARENT,
+			.flags = CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
 			.ops = &clk_branch2_ops,
 		},
 	},
diff --git a/drivers/clk/renesas/Kconfig b/drivers/clk/renesas/Kconfig
index 43b5a89c4b28..ef76c861ec84 100644
--- a/drivers/clk/renesas/Kconfig
+++ b/drivers/clk/renesas/Kconfig
@@ -15,7 +15,9 @@ config CLK_RENESAS
 	select CLK_R8A7794 if ARCH_R8A7794
 	select CLK_R8A7795 if ARCH_R8A7795
 	select CLK_R8A7796 if ARCH_R8A7796
+	select CLK_R8A77965 if ARCH_R8A77965
 	select CLK_R8A77970 if ARCH_R8A77970
+	select CLK_R8A77980 if ARCH_R8A77980
 	select CLK_R8A77995 if ARCH_R8A77995
 	select CLK_SH73A0 if ARCH_SH73A0
 
@@ -24,12 +26,13 @@ if CLK_RENESAS
 config CLK_RENESAS_LEGACY
 	bool "Legacy DT clock support"
 	depends on CLK_R8A7790 || CLK_R8A7791 || CLK_R8A7792 || CLK_R8A7794
-	default y
 	help
 	  Enable backward compatibility with old device trees describing a
 	  hierarchical representation of the various CPG and MSTP clocks.
 
 	  Say Y if you want your kernel to work with old DTBs.
+	  It is safe to say N if you use the DTS that is supplied with the
+	  current kernel source tree.
 
 # SoC
 config CLK_EMEV2
@@ -96,10 +99,18 @@ config CLK_R8A7796
 	bool "R-Car M3-W clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
 
+config CLK_R8A77965
+	bool "R-Car M3-N clock support" if COMPILE_TEST
+	select CLK_RCAR_GEN3_CPG
+
 config CLK_R8A77970
 	bool "R-Car V3M clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
 
+config CLK_R8A77980
+	bool "R-Car V3H clock support" if COMPILE_TEST
+	select CLK_RCAR_GEN3_CPG
+
 config CLK_R8A77995
 	bool "R-Car D3 clock support" if COMPILE_TEST
 	select CLK_RCAR_GEN3_CPG
diff --git a/drivers/clk/renesas/Makefile b/drivers/clk/renesas/Makefile
index 34c4e0b37afa..6c0f19636e3e 100644
--- a/drivers/clk/renesas/Makefile
+++ b/drivers/clk/renesas/Makefile
@@ -14,7 +14,9 @@ obj-$(CONFIG_CLK_R8A7792)		+= r8a7792-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7794)		+= r8a7794-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7795)		+= r8a7795-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A7796)		+= r8a7796-cpg-mssr.o
+obj-$(CONFIG_CLK_R8A77965)		+= r8a77965-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A77970)		+= r8a77970-cpg-mssr.o
+obj-$(CONFIG_CLK_R8A77980)		+= r8a77980-cpg-mssr.o
 obj-$(CONFIG_CLK_R8A77995)		+= r8a77995-cpg-mssr.o
 obj-$(CONFIG_CLK_SH73A0)		+= clk-sh73a0.o
 
diff --git a/drivers/clk/renesas/clk-div6.c b/drivers/clk/renesas/clk-div6.c
index 151336d2ba59..9febbf42c3df 100644
--- a/drivers/clk/renesas/clk-div6.c
+++ b/drivers/clk/renesas/clk-div6.c
@@ -53,9 +53,9 @@ static int cpg_div6_clock_enable(struct clk_hw *hw)
 	struct div6_clock *clock = to_div6_clock(hw);
 	u32 val;
 
-	val = (clk_readl(clock->reg) & ~(CPG_DIV6_DIV_MASK | CPG_DIV6_CKSTP))
+	val = (readl(clock->reg) & ~(CPG_DIV6_DIV_MASK | CPG_DIV6_CKSTP))
 	    | CPG_DIV6_DIV(clock->div - 1);
-	clk_writel(val, clock->reg);
+	writel(val, clock->reg);
 
 	return 0;
 }
@@ -65,7 +65,7 @@ static void cpg_div6_clock_disable(struct clk_hw *hw)
 	struct div6_clock *clock = to_div6_clock(hw);
 	u32 val;
 
-	val = clk_readl(clock->reg);
+	val = readl(clock->reg);
 	val |= CPG_DIV6_CKSTP;
 	/*
 	 * DIV6 clocks require the divisor field to be non-zero when stopping
@@ -75,14 +75,14 @@ static void cpg_div6_clock_disable(struct clk_hw *hw)
 	 */
 	if (!(val & CPG_DIV6_DIV_MASK))
 		val |= CPG_DIV6_DIV_MASK;
-	clk_writel(val, clock->reg);
+	writel(val, clock->reg);
 }
 
 static int cpg_div6_clock_is_enabled(struct clk_hw *hw)
 {
 	struct div6_clock *clock = to_div6_clock(hw);
 
-	return !(clk_readl(clock->reg) & CPG_DIV6_CKSTP);
+	return !(readl(clock->reg) & CPG_DIV6_CKSTP);
 }
 
 static unsigned long cpg_div6_clock_recalc_rate(struct clk_hw *hw,
@@ -122,10 +122,10 @@ static int cpg_div6_clock_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	clock->div = div;
 
-	val = clk_readl(clock->reg) & ~CPG_DIV6_DIV_MASK;
+	val = readl(clock->reg) & ~CPG_DIV6_DIV_MASK;
 	/* Only program the new divisor if the clock isn't stopped. */
 	if (!(val & CPG_DIV6_CKSTP))
-		clk_writel(val | CPG_DIV6_DIV(clock->div - 1), clock->reg);
+		writel(val | CPG_DIV6_DIV(clock->div - 1), clock->reg);
 
 	return 0;
 }
@@ -139,7 +139,7 @@ static u8 cpg_div6_clock_get_parent(struct clk_hw *hw)
 	if (clock->src_width == 0)
 		return 0;
 
-	hw_index = (clk_readl(clock->reg) >> clock->src_shift) &
+	hw_index = (readl(clock->reg) >> clock->src_shift) &
 		   (BIT(clock->src_width) - 1);
 	for (i = 0; i < clk_hw_get_num_parents(hw); i++) {
 		if (clock->parents[i] == hw_index)
@@ -163,8 +163,8 @@ static int cpg_div6_clock_set_parent(struct clk_hw *hw, u8 index)
 	mask = ~((BIT(clock->src_width) - 1) << clock->src_shift);
 	hw_index = clock->parents[index];
 
-	clk_writel((clk_readl(clock->reg) & mask) |
-		(hw_index << clock->src_shift), clock->reg);
+	writel((readl(clock->reg) & mask) | (hw_index << clock->src_shift),
+	       clock->reg);
 
 	return 0;
 }
@@ -241,7 +241,7 @@ struct clk * __init cpg_div6_register(const char *name,
 	 * Read the divisor. Disabling the clock overwrites the divisor, so we
 	 * need to cache its value for the enable operation.
 	 */
-	clock->div = (clk_readl(clock->reg) & CPG_DIV6_DIV_MASK) + 1;
+	clock->div = (readl(clock->reg) & CPG_DIV6_DIV_MASK) + 1;
 
 	switch (num_parents) {
 	case 1:
diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
index 858c24d4da8f..e82adcb16a52 100644
--- a/drivers/clk/renesas/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c
@@ -64,13 +64,13 @@ struct mstp_clock {
 static inline u32 cpg_mstp_read(struct mstp_clock_group *group,
 				u32 __iomem *reg)
 {
-	return group->width_8bit ? readb(reg) : clk_readl(reg);
+	return group->width_8bit ? readb(reg) : readl(reg);
 }
 
 static inline void cpg_mstp_write(struct mstp_clock_group *group, u32 val,
 				  u32 __iomem *reg)
 {
-	group->width_8bit ? writeb(val, reg) : clk_writel(val, reg);
+	group->width_8bit ? writeb(val, reg) : writel(val, reg);
 }
 
 static int cpg_mstp_clock_endisable(struct clk_hw *hw, bool enable)
diff --git a/drivers/clk/renesas/clk-r8a73a4.c b/drivers/clk/renesas/clk-r8a73a4.c
index 28d204bb659e..7b903ce4c901 100644
--- a/drivers/clk/renesas/clk-r8a73a4.c
+++ b/drivers/clk/renesas/clk-r8a73a4.c
@@ -71,7 +71,7 @@ r8a73a4_cpg_register_clock(struct device_node *np, struct r8a73a4_cpg *cpg,
 
 
 	if (!strcmp(name, "main")) {
-		u32 ckscr = clk_readl(cpg->reg + CPG_CKSCR);
+		u32 ckscr = readl(cpg->reg + CPG_CKSCR);
 
 		switch ((ckscr >> 28) & 3) {
 		case 0:	/* extal1 */
@@ -95,14 +95,14 @@ r8a73a4_cpg_register_clock(struct device_node *np, struct r8a73a4_cpg *cpg,
 		 * clock implementation and we currently have no need to change
 		 * the multiplier value.
 		 */
-		u32 value = clk_readl(cpg->reg + CPG_PLL0CR);
+		u32 value = readl(cpg->reg + CPG_PLL0CR);
 
 		parent_name = "main";
 		mult = ((value >> 24) & 0x7f) + 1;
 		if (value & BIT(20))
 			div = 2;
 	} else if (!strcmp(name, "pll1")) {
-		u32 value = clk_readl(cpg->reg + CPG_PLL1CR);
+		u32 value = readl(cpg->reg + CPG_PLL1CR);
 
 		parent_name = "main";
 		/* XXX: enable bit? */
@@ -125,7 +125,7 @@ r8a73a4_cpg_register_clock(struct device_node *np, struct r8a73a4_cpg *cpg,
 		default:
 			return ERR_PTR(-EINVAL);
 		}
-		value = clk_readl(cpg->reg + cr);
+		value = readl(cpg->reg + cr);
 		switch ((value >> 5) & 7) {
 		case 0:
 			parent_name = "main";
@@ -161,8 +161,7 @@ r8a73a4_cpg_register_clock(struct device_node *np, struct r8a73a4_cpg *cpg,
 			shift = 0;
 		}
 		div *= 32;
-		mult = 0x20 - ((clk_readl(cpg->reg + CPG_FRQCRC) >> shift)
-		       & 0x1f);
+		mult = 0x20 - ((readl(cpg->reg + CPG_FRQCRC) >> shift) & 0x1f);
 	} else {
 		struct div4_clk *c;
 
diff --git a/drivers/clk/renesas/clk-r8a7740.c b/drivers/clk/renesas/clk-r8a7740.c
index 2f7ce6696b6c..d074f8e982d0 100644
--- a/drivers/clk/renesas/clk-r8a7740.c
+++ b/drivers/clk/renesas/clk-r8a7740.c
@@ -98,20 +98,20 @@ r8a7740_cpg_register_clock(struct device_node *np, struct r8a7740_cpg *cpg,
 		 * clock implementation and we currently have no need to change
 		 * the multiplier value.
 		 */
-		u32 value = clk_readl(cpg->reg + CPG_FRQCRC);
+		u32 value = readl(cpg->reg + CPG_FRQCRC);
 		parent_name = "system";
 		mult = ((value >> 24) & 0x7f) + 1;
 	} else if (!strcmp(name, "pllc1")) {
-		u32 value = clk_readl(cpg->reg + CPG_FRQCRA);
+		u32 value = readl(cpg->reg + CPG_FRQCRA);
 		parent_name = "system";
 		mult = ((value >> 24) & 0x7f) + 1;
 		div = 2;
 	} else if (!strcmp(name, "pllc2")) {
-		u32 value = clk_readl(cpg->reg + CPG_PLLC2CR);
+		u32 value = readl(cpg->reg + CPG_PLLC2CR);
 		parent_name = "system";
 		mult = ((value >> 24) & 0x3f) + 1;
 	} else if (!strcmp(name, "usb24s")) {
-		u32 value = clk_readl(cpg->reg + CPG_USBCKCR);
+		u32 value = readl(cpg->reg + CPG_USBCKCR);
 		if (value & BIT(7))
 			/* extal2 */
 			parent_name = of_clk_get_parent_name(np, 1);
diff --git a/drivers/clk/renesas/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
index d14cbe1ca29a..ee32a022e6da 100644
--- a/drivers/clk/renesas/clk-rcar-gen2.c
+++ b/drivers/clk/renesas/clk-rcar-gen2.c
@@ -62,8 +62,7 @@ static unsigned long cpg_z_clk_recalc_rate(struct clk_hw *hw,
 	unsigned int mult;
 	unsigned int val;
 
-	val = (clk_readl(zclk->reg) & CPG_FRQCRC_ZFC_MASK)
-	    >> CPG_FRQCRC_ZFC_SHIFT;
+	val = (readl(zclk->reg) & CPG_FRQCRC_ZFC_MASK) >> CPG_FRQCRC_ZFC_SHIFT;
 	mult = 32 - val;
 
 	return div_u64((u64)parent_rate * mult, 32);
@@ -95,21 +94,21 @@ static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 	mult = div_u64((u64)rate * 32, parent_rate);
 	mult = clamp(mult, 1U, 32U);
 
-	if (clk_readl(zclk->kick_reg) & CPG_FRQCRB_KICK)
+	if (readl(zclk->kick_reg) & CPG_FRQCRB_KICK)
 		return -EBUSY;
 
-	val = clk_readl(zclk->reg);
+	val = readl(zclk->reg);
 	val &= ~CPG_FRQCRC_ZFC_MASK;
 	val |= (32 - mult) << CPG_FRQCRC_ZFC_SHIFT;
-	clk_writel(val, zclk->reg);
+	writel(val, zclk->reg);
 
 	/*
 	 * Set KICK bit in FRQCRB to update hardware setting and wait for
 	 * clock change completion.
 	 */
-	kick = clk_readl(zclk->kick_reg);
+	kick = readl(zclk->kick_reg);
 	kick |= CPG_FRQCRB_KICK;
-	clk_writel(kick, zclk->kick_reg);
+	writel(kick, zclk->kick_reg);
 
 	/*
 	 * Note: There is no HW information about the worst case latency.
@@ -121,7 +120,7 @@ static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 	 * "super" safe value.
 	 */
 	for (i = 1000; i; i--) {
-		if (!(clk_readl(zclk->kick_reg) & CPG_FRQCRB_KICK))
+		if (!(readl(zclk->kick_reg) & CPG_FRQCRB_KICK))
 			return 0;
 
 		cpu_relax();
@@ -332,7 +331,7 @@ rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg,
 			mult = config->pll0_mult;
 			div = 3;
 		} else {
-			u32 value = clk_readl(cpg->reg + CPG_PLL0CR);
+			u32 value = readl(cpg->reg + CPG_PLL0CR);
 			mult = ((value >> 24) & ((1 << 7) - 1)) + 1;
 		}
 		parent_name = "main";
diff --git a/drivers/clk/renesas/clk-rz.c b/drivers/clk/renesas/clk-rz.c
index 127c58135c8f..67dd712aa723 100644
--- a/drivers/clk/renesas/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c
@@ -75,9 +75,9 @@ rz_cpg_register_clock(struct device_node *np, struct rz_cpg *cpg, const char *na
 	 * let them run at fixed current speed and implement the details later.
 	 */
 	if (strcmp(name, "i") == 0)
-		val = (clk_readl(cpg->reg + CPG_FRQCR) >> 8) & 3;
+		val = (readl(cpg->reg + CPG_FRQCR) >> 8) & 3;
 	else if (strcmp(name, "g") == 0)
-		val = clk_readl(cpg->reg + CPG_FRQCR2) & 3;
+		val = readl(cpg->reg + CPG_FRQCR2) & 3;
 	else
 		return ERR_PTR(-EINVAL);
 
diff --git a/drivers/clk/renesas/clk-sh73a0.c b/drivers/clk/renesas/clk-sh73a0.c
index eea38f6ea77e..bab33610eb6c 100644
--- a/drivers/clk/renesas/clk-sh73a0.c
+++ b/drivers/clk/renesas/clk-sh73a0.c
@@ -46,7 +46,7 @@ struct div4_clk {
 	unsigned int shift;
 };
 
-static struct div4_clk div4_clks[] = {
+static const struct div4_clk div4_clks[] = {
 	{ "zg", "pll0", CPG_FRQCRA, 16 },
 	{ "m3", "pll1", CPG_FRQCRA, 12 },
 	{ "b",  "pll1", CPG_FRQCRA,  8 },
@@ -79,13 +79,13 @@ sh73a0_cpg_register_clock(struct device_node *np, struct sh73a0_cpg *cpg,
 {
 	const struct clk_div_table *table = NULL;
 	unsigned int shift, reg, width;
-	const char *parent_name;
+	const char *parent_name = NULL;
 	unsigned int mult = 1;
 	unsigned int div = 1;
 
 	if (!strcmp(name, "main")) {
 		/* extal1, extal1_div2, extal2, extal2_div2 */
-		u32 parent_idx = (clk_readl(cpg->reg + CPG_CKSCR) >> 28) & 3;
+		u32 parent_idx = (readl(cpg->reg + CPG_CKSCR) >> 28) & 3;
 
 		parent_name = of_clk_get_parent_name(np, parent_idx >> 1);
 		div = (parent_idx & 1) + 1;
@@ -110,11 +110,11 @@ sh73a0_cpg_register_clock(struct device_node *np, struct sh73a0_cpg *cpg,
 		default:
 			return ERR_PTR(-EINVAL);
 		}
-		if (clk_readl(cpg->reg + CPG_PLLECR) & BIT(enable_bit)) {
-			mult = ((clk_readl(enable_reg) >> 24) & 0x3f) + 1;
+		if (readl(cpg->reg + CPG_PLLECR) & BIT(enable_bit)) {
+			mult = ((readl(enable_reg) >> 24) & 0x3f) + 1;
 			/* handle CFG bit for PLL1 and PLL2 */
 			if (enable_bit == 1 || enable_bit == 2)
-				if (clk_readl(enable_reg) & BIT(20))
+				if (readl(enable_reg) & BIT(20))
 					mult *= 2;
 		}
 	} else if (!strcmp(name, "dsi0phy") || !strcmp(name, "dsi1phy")) {
@@ -135,7 +135,7 @@ sh73a0_cpg_register_clock(struct device_node *np, struct sh73a0_cpg *cpg,
 		shift = 24;
 		width = 5;
 	} else {
-		struct div4_clk *c;
+		const struct div4_clk *c;
 
 		for (c = div4_clks; c->name; c++) {
 			if (!strcmp(name, c->name)) {
@@ -193,9 +193,9 @@ static void __init sh73a0_cpg_clocks_init(struct device_node *np)
 		return;
 
 	/* Set SDHI clocks to a known state */
-	clk_writel(0x108, cpg->reg + CPG_SD0CKCR);
-	clk_writel(0x108, cpg->reg + CPG_SD1CKCR);
-	clk_writel(0x108, cpg->reg + CPG_SD2CKCR);
+	writel(0x108, cpg->reg + CPG_SD0CKCR);
+	writel(0x108, cpg->reg + CPG_SD1CKCR);
+	writel(0x108, cpg->reg + CPG_SD2CKCR);
 
 	for (i = 0; i < num_clks; ++i) {
 		const char *name;
diff --git a/drivers/clk/renesas/r8a7743-cpg-mssr.c b/drivers/clk/renesas/r8a7743-cpg-mssr.c
index 6dc0b3082aa6..d3c8b1e2969f 100644
--- a/drivers/clk/renesas/r8a7743-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7743-cpg-mssr.c
@@ -117,6 +117,7 @@ static const struct mssr_mod_clk r8a7743_mod_clks[] __initconst = {
 	DEF_MOD("cmt1",			 329,	R8A7743_CLK_R),
 	DEF_MOD("usbhs-dmac0",		 330,	R8A7743_CLK_HP),
 	DEF_MOD("usbhs-dmac1",		 331,	R8A7743_CLK_HP),
+	DEF_MOD("rwdt",			 402,	R8A7743_CLK_R),
 	DEF_MOD("irqc",			 407,	R8A7743_CLK_CP),
 	DEF_MOD("intc-sys",		 408,	R8A7743_CLK_ZS),
 	DEF_MOD("audio-dmac1",		 501,	R8A7743_CLK_HP),
@@ -195,6 +196,7 @@ static const struct mssr_mod_clk r8a7743_mod_clks[] __initconst = {
 };
 
 static const unsigned int r8a7743_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(402),	/* RWDT */
 	MOD_CLK_ID(408),	/* INTC-SYS (GIC) */
 };
 
diff --git a/drivers/clk/renesas/r8a7745-cpg-mssr.c b/drivers/clk/renesas/r8a7745-cpg-mssr.c
index 2859504cc866..87f5a3619e4f 100644
--- a/drivers/clk/renesas/r8a7745-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7745-cpg-mssr.c
@@ -114,6 +114,7 @@ static const struct mssr_mod_clk r8a7745_mod_clks[] __initconst = {
 	DEF_MOD("cmt1",			 329,	R8A7745_CLK_R),
 	DEF_MOD("usbhs-dmac0",		 330,	R8A7745_CLK_HP),
 	DEF_MOD("usbhs-dmac1",		 331,	R8A7745_CLK_HP),
+	DEF_MOD("rwdt",			 402,	R8A7745_CLK_R),
 	DEF_MOD("irqc",			 407,	R8A7745_CLK_CP),
 	DEF_MOD("intc-sys",		 408,	R8A7745_CLK_ZS),
 	DEF_MOD("audio-dmac0",		 502,	R8A7745_CLK_HP),
@@ -180,6 +181,7 @@ static const struct mssr_mod_clk r8a7745_mod_clks[] __initconst = {
 };
 
 static const unsigned int r8a7745_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(402),	/* RWDT */
 	MOD_CLK_ID(408),	/* INTC-SYS (GIC) */
 };
 
diff --git a/drivers/clk/renesas/r8a7790-cpg-mssr.c b/drivers/clk/renesas/r8a7790-cpg-mssr.c
index 46bb55bb223d..f936cb74b681 100644
--- a/drivers/clk/renesas/r8a7790-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7790-cpg-mssr.c
@@ -140,6 +140,7 @@ static const struct mssr_mod_clk r8a7790_mod_clks[] __initconst = {
 	DEF_MOD("cmt1",			 329,	R8A7790_CLK_R),
 	DEF_MOD("usbhs-dmac0",		 330,	R8A7790_CLK_HP),
 	DEF_MOD("usbhs-dmac1",		 331,	R8A7790_CLK_HP),
+	DEF_MOD("rwdt",			 402,	R8A7790_CLK_R),
 	DEF_MOD("irqc",			 407,	R8A7790_CLK_CP),
 	DEF_MOD("intc-sys",		 408,	R8A7790_CLK_ZS),
 	DEF_MOD("audio-dmac1",		 501,	R8A7790_CLK_HP),
@@ -211,6 +212,7 @@ static const struct mssr_mod_clk r8a7790_mod_clks[] __initconst = {
 };
 
 static const unsigned int r8a7790_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(402),	/* RWDT */
 	MOD_CLK_ID(408),	/* INTC-SYS (GIC) */
 };
 
diff --git a/drivers/clk/renesas/r8a7791-cpg-mssr.c b/drivers/clk/renesas/r8a7791-cpg-mssr.c
index c0b51f9bb278..820b220b09cc 100644
--- a/drivers/clk/renesas/r8a7791-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7791-cpg-mssr.c
@@ -128,6 +128,7 @@ static const struct mssr_mod_clk r8a7791_mod_clks[] __initconst = {
 	DEF_MOD("cmt1",			 329,	R8A7791_CLK_R),
 	DEF_MOD("usbhs-dmac0",		 330,	R8A7791_CLK_HP),
 	DEF_MOD("usbhs-dmac1",		 331,	R8A7791_CLK_HP),
+	DEF_MOD("rwdt",			 402,	R8A7791_CLK_R),
 	DEF_MOD("irqc",			 407,	R8A7791_CLK_CP),
 	DEF_MOD("intc-sys",		 408,	R8A7791_CLK_ZS),
 	DEF_MOD("audio-dmac1",		 501,	R8A7791_CLK_HP),
@@ -209,6 +210,7 @@ static const struct mssr_mod_clk r8a7791_mod_clks[] __initconst = {
 };
 
 static const unsigned int r8a7791_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(402),	/* RWDT */
 	MOD_CLK_ID(408),	/* INTC-SYS (GIC) */
 };
 
diff --git a/drivers/clk/renesas/r8a7792-cpg-mssr.c b/drivers/clk/renesas/r8a7792-cpg-mssr.c
index 7f85bbf20bf7..609a54080496 100644
--- a/drivers/clk/renesas/r8a7792-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7792-cpg-mssr.c
@@ -98,6 +98,7 @@ static const struct mssr_mod_clk r8a7792_mod_clks[] __initconst = {
 	DEF_MOD("tpu0",			 304,	R8A7792_CLK_CP),
 	DEF_MOD("sdhi0",		 314,	R8A7792_CLK_SD),
 	DEF_MOD("cmt1",			 329,	R8A7792_CLK_R),
+	DEF_MOD("rwdt",			 402,	R8A7792_CLK_R),
 	DEF_MOD("irqc",			 407,	R8A7792_CLK_CP),
 	DEF_MOD("intc-sys",		 408,	R8A7792_CLK_ZS),
 	DEF_MOD("audio-dmac0",		 502,	R8A7792_CLK_HP),
@@ -154,6 +155,7 @@ static const struct mssr_mod_clk r8a7792_mod_clks[] __initconst = {
 };
 
 static const unsigned int r8a7792_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(402),	/* RWDT */
 	MOD_CLK_ID(408),	/* INTC-SYS (GIC) */
 };
 
diff --git a/drivers/clk/renesas/r8a7794-cpg-mssr.c b/drivers/clk/renesas/r8a7794-cpg-mssr.c
index ec091a42da54..2a40bbeaeeaf 100644
--- a/drivers/clk/renesas/r8a7794-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7794-cpg-mssr.c
@@ -121,6 +121,7 @@ static const struct mssr_mod_clk r8a7794_mod_clks[] __initconst = {
 	DEF_MOD("cmt1",			 329,	R8A7794_CLK_R),
 	DEF_MOD("usbhs-dmac0",		 330,	R8A7794_CLK_HP),
 	DEF_MOD("usbhs-dmac1",		 331,	R8A7794_CLK_HP),
+	DEF_MOD("rwdt",			 402,	R8A7794_CLK_R),
 	DEF_MOD("irqc",			 407,	R8A7794_CLK_CP),
 	DEF_MOD("intc-sys",		 408,	R8A7794_CLK_ZS),
 	DEF_MOD("audio-dmac0",		 502,	R8A7794_CLK_HP),
@@ -190,6 +191,7 @@ static const struct mssr_mod_clk r8a7794_mod_clks[] __initconst = {
 };
 
 static const unsigned int r8a7794_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(402),	/* RWDT */
 	MOD_CLK_ID(408),	/* INTC-SYS (GIC) */
 };
 
diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index b1d9f48eae9e..775b0ceaa337 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -74,6 +74,8 @@ static struct cpg_core_clk r8a7795_core_clks[] __initdata = {
 	DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
 
 	/* Core Clock Outputs */
+	DEF_BASE("z",           R8A7795_CLK_Z,     CLK_TYPE_GEN3_Z, CLK_PLL0),
+	DEF_BASE("z2",          R8A7795_CLK_Z2,    CLK_TYPE_GEN3_Z2, CLK_PLL2),
 	DEF_FIXED("ztr",        R8A7795_CLK_ZTR,   CLK_PLL1_DIV2,  6, 1),
 	DEF_FIXED("ztrd2",      R8A7795_CLK_ZTRD2, CLK_PLL1_DIV2, 12, 1),
 	DEF_FIXED("zt",         R8A7795_CLK_ZT,    CLK_PLL1_DIV2,  4, 1),
diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index 41e29734126b..dfb267a92f2a 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -74,6 +74,8 @@ static const struct cpg_core_clk r8a7796_core_clks[] __initconst = {
 	DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
 
 	/* Core Clock Outputs */
+	DEF_BASE("z",           R8A7796_CLK_Z,     CLK_TYPE_GEN3_Z, CLK_PLL0),
+	DEF_BASE("z2",          R8A7796_CLK_Z2,    CLK_TYPE_GEN3_Z2, CLK_PLL2),
 	DEF_FIXED("ztr",        R8A7796_CLK_ZTR,   CLK_PLL1_DIV2,  6, 1),
 	DEF_FIXED("ztrd2",      R8A7796_CLK_ZTRD2, CLK_PLL1_DIV2, 12, 1),
 	DEF_FIXED("zt",         R8A7796_CLK_ZT,    CLK_PLL1_DIV2,  4, 1),
diff --git a/drivers/clk/renesas/r8a77965-cpg-mssr.c b/drivers/clk/renesas/r8a77965-cpg-mssr.c
new file mode 100644
index 000000000000..b1acfb60351c
--- /dev/null
+++ b/drivers/clk/renesas/r8a77965-cpg-mssr.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * r8a77965 Clock Pulse Generator / Module Standby and Software Reset
+ *
+ * Copyright (C) 2018 Jacopo Mondi <jacopo+renesas@jmondi.org>
+ *
+ * Based on r8a7795-cpg-mssr.c
+ *
+ * Copyright (C) 2015 Glider bvba
+ * Copyright (C) 2015 Renesas Electronics Corp.
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/soc/renesas/rcar-rst.h>
+
+#include <dt-bindings/clock/r8a77965-cpg-mssr.h>
+
+#include "renesas-cpg-mssr.h"
+#include "rcar-gen3-cpg.h"
+
+enum clk_ids {
+	/* Core Clock Outputs exported to DT */
+	LAST_DT_CORE_CLK = R8A77965_CLK_OSC,
+
+	/* External Input Clocks */
+	CLK_EXTAL,
+	CLK_EXTALR,
+
+	/* Internal Core Clocks */
+	CLK_MAIN,
+	CLK_PLL0,
+	CLK_PLL1,
+	CLK_PLL3,
+	CLK_PLL4,
+	CLK_PLL1_DIV2,
+	CLK_PLL1_DIV4,
+	CLK_S0,
+	CLK_S1,
+	CLK_S2,
+	CLK_S3,
+	CLK_SDSRC,
+	CLK_SSPSRC,
+	CLK_RINT,
+
+	/* Module Clocks */
+	MOD_CLK_BASE
+};
+
+static const struct cpg_core_clk r8a77965_core_clks[] __initconst = {
+	/* External Clock Inputs */
+	DEF_INPUT("extal",      CLK_EXTAL),
+	DEF_INPUT("extalr",     CLK_EXTALR),
+
+	/* Internal Core Clocks */
+	DEF_BASE(".main",	CLK_MAIN, CLK_TYPE_GEN3_MAIN, CLK_EXTAL),
+	DEF_BASE(".pll0",	CLK_PLL0, CLK_TYPE_GEN3_PLL0, CLK_MAIN),
+	DEF_BASE(".pll1",	CLK_PLL1, CLK_TYPE_GEN3_PLL1, CLK_MAIN),
+	DEF_BASE(".pll3",	CLK_PLL3, CLK_TYPE_GEN3_PLL3, CLK_MAIN),
+	DEF_BASE(".pll4",	CLK_PLL4, CLK_TYPE_GEN3_PLL4, CLK_MAIN),
+
+	DEF_FIXED(".pll1_div2", CLK_PLL1_DIV2,		CLK_PLL1,	2, 1),
+	DEF_FIXED(".pll1_div4", CLK_PLL1_DIV4,		CLK_PLL1_DIV2,	2, 1),
+	DEF_FIXED(".s0",	CLK_S0,			CLK_PLL1_DIV2,	2, 1),
+	DEF_FIXED(".s1",	CLK_S1,			CLK_PLL1_DIV2,	3, 1),
+	DEF_FIXED(".s2",	CLK_S2,			CLK_PLL1_DIV2,	4, 1),
+	DEF_FIXED(".s3",	CLK_S3,			CLK_PLL1_DIV2,	6, 1),
+	DEF_FIXED(".sdsrc",	CLK_SDSRC,		CLK_PLL1_DIV2,	2, 1),
+
+	/* Core Clock Outputs */
+	DEF_BASE("z",		R8A77965_CLK_Z,		CLK_TYPE_GEN3_Z, CLK_PLL0),
+	DEF_FIXED("ztr",	R8A77965_CLK_ZTR,	CLK_PLL1_DIV2,	6, 1),
+	DEF_FIXED("ztrd2",	R8A77965_CLK_ZTRD2,	CLK_PLL1_DIV2,	12, 1),
+	DEF_FIXED("zt",		R8A77965_CLK_ZT,	CLK_PLL1_DIV2,	4, 1),
+	DEF_FIXED("zx",		R8A77965_CLK_ZX,	CLK_PLL1_DIV2,	2, 1),
+	DEF_FIXED("s0d1",	R8A77965_CLK_S0D1,	CLK_S0,		1, 1),
+	DEF_FIXED("s0d2",	R8A77965_CLK_S0D2,	CLK_S0,		2, 1),
+	DEF_FIXED("s0d3",	R8A77965_CLK_S0D3,	CLK_S0,		3, 1),
+	DEF_FIXED("s0d4",	R8A77965_CLK_S0D4,	CLK_S0,		4, 1),
+	DEF_FIXED("s0d6",	R8A77965_CLK_S0D6,	CLK_S0,		6, 1),
+	DEF_FIXED("s0d8",	R8A77965_CLK_S0D8,	CLK_S0,		8, 1),
+	DEF_FIXED("s0d12",	R8A77965_CLK_S0D12,	CLK_S0,		12, 1),
+	DEF_FIXED("s1d1",	R8A77965_CLK_S1D1,	CLK_S1,		1, 1),
+	DEF_FIXED("s1d2",	R8A77965_CLK_S1D2,	CLK_S1,		2, 1),
+	DEF_FIXED("s1d4",	R8A77965_CLK_S1D4,	CLK_S1,		4, 1),
+	DEF_FIXED("s2d1",	R8A77965_CLK_S2D1,	CLK_S2,		1, 1),
+	DEF_FIXED("s2d2",	R8A77965_CLK_S2D2,	CLK_S2,		2, 1),
+	DEF_FIXED("s2d4",	R8A77965_CLK_S2D4,	CLK_S2,		4, 1),
+	DEF_FIXED("s3d1",	R8A77965_CLK_S3D1,	CLK_S3,		1, 1),
+	DEF_FIXED("s3d2",	R8A77965_CLK_S3D2,	CLK_S3,		2, 1),
+	DEF_FIXED("s3d4",	R8A77965_CLK_S3D4,	CLK_S3,		4, 1),
+
+	DEF_GEN3_SD("sd0",	R8A77965_CLK_SD0,	CLK_SDSRC,	0x074),
+	DEF_GEN3_SD("sd1",	R8A77965_CLK_SD1,	CLK_SDSRC,	0x078),
+	DEF_GEN3_SD("sd2",	R8A77965_CLK_SD2,	CLK_SDSRC,	0x268),
+	DEF_GEN3_SD("sd3",	R8A77965_CLK_SD3,	CLK_SDSRC,	0x26c),
+
+	DEF_FIXED("cl",		R8A77965_CLK_CL,	CLK_PLL1_DIV2,	48, 1),
+	DEF_FIXED("cp",		R8A77965_CLK_CP,	CLK_EXTAL,	2, 1),
+
+	DEF_DIV6P1("canfd",	R8A77965_CLK_CANFD,	CLK_PLL1_DIV4,	0x244),
+	DEF_DIV6P1("csi0",	R8A77965_CLK_CSI0,	CLK_PLL1_DIV4,	0x00c),
+	DEF_DIV6P1("mso",	R8A77965_CLK_MSO,	CLK_PLL1_DIV4,	0x014),
+	DEF_DIV6P1("hdmi",	R8A77965_CLK_HDMI,	CLK_PLL1_DIV4,	0x250),
+
+	DEF_DIV6_RO("osc",	R8A77965_CLK_OSC, CLK_EXTAL, CPG_RCKCR, 8),
+	DEF_DIV6_RO("r_int",	CLK_RINT, CLK_EXTAL, CPG_RCKCR, 32),
+
+	DEF_BASE("r",		R8A77965_CLK_R,	CLK_TYPE_GEN3_R, CLK_RINT),
+};
+
+static const struct mssr_mod_clk r8a77965_mod_clks[] __initconst = {
+	DEF_MOD("scif5",		202,	R8A77965_CLK_S3D4),
+	DEF_MOD("scif4",		203,	R8A77965_CLK_S3D4),
+	DEF_MOD("scif3",		204,	R8A77965_CLK_S3D4),
+	DEF_MOD("scif1",		206,	R8A77965_CLK_S3D4),
+	DEF_MOD("scif0",		207,	R8A77965_CLK_S3D4),
+	DEF_MOD("sys-dmac2",		217,	R8A77965_CLK_S0D3),
+	DEF_MOD("sys-dmac1",		218,	R8A77965_CLK_S0D3),
+	DEF_MOD("sys-dmac0",		219,	R8A77965_CLK_S0D3),
+
+	DEF_MOD("cmt3",			300,	R8A77965_CLK_R),
+	DEF_MOD("cmt2",			301,	R8A77965_CLK_R),
+	DEF_MOD("cmt1",			302,	R8A77965_CLK_R),
+	DEF_MOD("cmt0",			303,	R8A77965_CLK_R),
+	DEF_MOD("scif2",		310,	R8A77965_CLK_S3D4),
+	DEF_MOD("sdif3",		311,	R8A77965_CLK_SD3),
+	DEF_MOD("sdif2",		312,	R8A77965_CLK_SD2),
+	DEF_MOD("sdif1",		313,	R8A77965_CLK_SD1),
+	DEF_MOD("sdif0",		314,	R8A77965_CLK_SD0),
+	DEF_MOD("pcie1",		318,	R8A77965_CLK_S3D1),
+	DEF_MOD("pcie0",		319,	R8A77965_CLK_S3D1),
+	DEF_MOD("usb3-if0",		328,	R8A77965_CLK_S3D1),
+	DEF_MOD("usb-dmac0",		330,	R8A77965_CLK_S3D1),
+	DEF_MOD("usb-dmac1",		331,	R8A77965_CLK_S3D1),
+
+	DEF_MOD("rwdt",			402,	R8A77965_CLK_R),
+	DEF_MOD("intc-ex",		407,	R8A77965_CLK_CP),
+	DEF_MOD("intc-ap",		408,	R8A77965_CLK_S0D3),
+
+	DEF_MOD("audmac1",		501,	R8A77965_CLK_S0D3),
+	DEF_MOD("audmac0",		502,	R8A77965_CLK_S0D3),
+	DEF_MOD("drif7",		508,	R8A77965_CLK_S3D2),
+	DEF_MOD("drif6",		509,	R8A77965_CLK_S3D2),
+	DEF_MOD("drif5",		510,	R8A77965_CLK_S3D2),
+	DEF_MOD("drif4",		511,	R8A77965_CLK_S3D2),
+	DEF_MOD("drif3",		512,	R8A77965_CLK_S3D2),
+	DEF_MOD("drif2",		513,	R8A77965_CLK_S3D2),
+	DEF_MOD("drif1",		514,	R8A77965_CLK_S3D2),
+	DEF_MOD("drif0",		515,	R8A77965_CLK_S3D2),
+	DEF_MOD("hscif4",		516,	R8A77965_CLK_S3D1),
+	DEF_MOD("hscif3",		517,	R8A77965_CLK_S3D1),
+	DEF_MOD("hscif2",		518,	R8A77965_CLK_S3D1),
+	DEF_MOD("hscif1",		519,	R8A77965_CLK_S3D1),
+	DEF_MOD("hscif0",		520,	R8A77965_CLK_S3D1),
+	DEF_MOD("thermal",		522,	R8A77965_CLK_CP),
+	DEF_MOD("pwm",			523,	R8A77965_CLK_S0D12),
+
+	DEF_MOD("fcpvd1",		602,	R8A77965_CLK_S0D2),
+	DEF_MOD("fcpvd0",		603,	R8A77965_CLK_S0D2),
+	DEF_MOD("fcpvb0",		607,	R8A77965_CLK_S0D1),
+	DEF_MOD("fcpvi0",		611,	R8A77965_CLK_S0D1),
+	DEF_MOD("fcpf0",		615,	R8A77965_CLK_S0D1),
+	DEF_MOD("fcpcs",		619,	R8A77965_CLK_S0D2),
+	DEF_MOD("vspd1",		622,	R8A77965_CLK_S0D2),
+	DEF_MOD("vspd0",		623,	R8A77965_CLK_S0D2),
+	DEF_MOD("vspb",			626,	R8A77965_CLK_S0D1),
+	DEF_MOD("vspi0",		631,	R8A77965_CLK_S0D1),
+
+	DEF_MOD("ehci1",		702,	R8A77965_CLK_S3D4),
+	DEF_MOD("ehci0",		703,	R8A77965_CLK_S3D4),
+	DEF_MOD("hsusb",		704,	R8A77965_CLK_S3D4),
+	DEF_MOD("csi20",		714,	R8A77965_CLK_CSI0),
+	DEF_MOD("csi40",		716,	R8A77965_CLK_CSI0),
+	DEF_MOD("du3",			721,	R8A77965_CLK_S2D1),
+	DEF_MOD("du1",			723,	R8A77965_CLK_S2D1),
+	DEF_MOD("du0",			724,	R8A77965_CLK_S2D1),
+	DEF_MOD("lvds",			727,	R8A77965_CLK_S2D1),
+	DEF_MOD("hdmi0",		729,	R8A77965_CLK_HDMI),
+
+	DEF_MOD("vin7",			804,	R8A77965_CLK_S0D2),
+	DEF_MOD("vin6",			805,	R8A77965_CLK_S0D2),
+	DEF_MOD("vin5",			806,	R8A77965_CLK_S0D2),
+	DEF_MOD("vin4",			807,	R8A77965_CLK_S0D2),
+	DEF_MOD("vin3",			808,	R8A77965_CLK_S0D2),
+	DEF_MOD("vin2",			809,	R8A77965_CLK_S0D2),
+	DEF_MOD("vin1",			810,	R8A77965_CLK_S0D2),
+	DEF_MOD("vin0",			811,	R8A77965_CLK_S0D2),
+	DEF_MOD("etheravb",		812,	R8A77965_CLK_S0D6),
+	DEF_MOD("imr1",			822,	R8A77965_CLK_S0D2),
+	DEF_MOD("imr0",			823,	R8A77965_CLK_S0D2),
+
+	DEF_MOD("gpio7",		905,	R8A77965_CLK_S3D4),
+	DEF_MOD("gpio6",		906,	R8A77965_CLK_S3D4),
+	DEF_MOD("gpio5",		907,	R8A77965_CLK_S3D4),
+	DEF_MOD("gpio4",		908,	R8A77965_CLK_S3D4),
+	DEF_MOD("gpio3",		909,	R8A77965_CLK_S3D4),
+	DEF_MOD("gpio2",		910,	R8A77965_CLK_S3D4),
+	DEF_MOD("gpio1",		911,	R8A77965_CLK_S3D4),
+	DEF_MOD("gpio0",		912,	R8A77965_CLK_S3D4),
+	DEF_MOD("can-fd",		914,	R8A77965_CLK_S3D2),
+	DEF_MOD("can-if1",		915,	R8A77965_CLK_S3D4),
+	DEF_MOD("can-if0",		916,	R8A77965_CLK_S3D4),
+	DEF_MOD("i2c6",			918,	R8A77965_CLK_S0D6),
+	DEF_MOD("i2c5",			919,	R8A77965_CLK_S0D6),
+	DEF_MOD("i2c-dvfs",		926,	R8A77965_CLK_CP),
+	DEF_MOD("i2c4",			927,	R8A77965_CLK_S0D6),
+	DEF_MOD("i2c3",			928,	R8A77965_CLK_S0D6),
+	DEF_MOD("i2c2",			929,	R8A77965_CLK_S3D2),
+	DEF_MOD("i2c1",			930,	R8A77965_CLK_S3D2),
+	DEF_MOD("i2c0",			931,	R8A77965_CLK_S3D2),
+
+	DEF_MOD("ssi-all",		1005,	R8A77965_CLK_S3D4),
+	DEF_MOD("ssi9",			1006,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi8",			1007,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi7",			1008,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi6",			1009,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi5",			1010,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi4",			1011,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi3",			1012,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi2",			1013,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi1",			1014,	MOD_CLK_ID(1005)),
+	DEF_MOD("ssi0",			1015,	MOD_CLK_ID(1005)),
+	DEF_MOD("scu-all",		1017,	R8A77965_CLK_S3D4),
+	DEF_MOD("scu-dvc1",		1018,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-dvc0",		1019,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-ctu1-mix1",	1020,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-ctu0-mix0",	1021,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src9",		1022,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src8",		1023,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src7",		1024,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src6",		1025,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src5",		1026,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src4",		1027,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src3",		1028,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src2",		1029,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src1",		1030,	MOD_CLK_ID(1017)),
+	DEF_MOD("scu-src0",		1031,	MOD_CLK_ID(1017)),
+};
+
+static const unsigned int r8a77965_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(408),	/* INTC-AP (GIC) */
+};
+
+/*
+ * CPG Clock Data
+ */
+
+/*
+ *   MD		EXTAL		PLL0	PLL1	PLL3	PLL4
+ * 14 13 19 17	(MHz)
+ *-----------------------------------------------------------
+ * 0  0  0  0	16.66 x 1	x180	x192	x192	x144
+ * 0  0  0  1	16.66 x 1	x180	x192	x128	x144
+ * 0  0  1  0	Prohibited setting
+ * 0  0  1  1	16.66 x 1	x180	x192	x192	x144
+ * 0  1  0  0	20    x 1	x150	x160	x160	x120
+ * 0  1  0  1	20    x 1	x150	x160	x106	x120
+ * 0  1  1  0	Prohibited setting
+ * 0  1  1  1	20    x 1	x150	x160	x160	x120
+ * 1  0  0  0	25    x 1	x120	x128	x128	x96
+ * 1  0  0  1	25    x 1	x120	x128	x84	x96
+ * 1  0  1  0	Prohibited setting
+ * 1  0  1  1	25    x 1	x120	x128	x128	x96
+ * 1  1  0  0	33.33 / 2	x180	x192	x192	x144
+ * 1  1  0  1	33.33 / 2	x180	x192	x128	x144
+ * 1  1  1  0	Prohibited setting
+ * 1  1  1  1	33.33 / 2	x180	x192	x192	x144
+ */
+#define CPG_PLL_CONFIG_INDEX(md)	((((md) & BIT(14)) >> 11) | \
+					 (((md) & BIT(13)) >> 11) | \
+					 (((md) & BIT(19)) >> 18) | \
+					 (((md) & BIT(17)) >> 17))
+
+static const struct rcar_gen3_cpg_pll_config cpg_pll_configs[16] __initconst = {
+	/* EXTAL div	PLL1 mult/div	PLL3 mult/div */
+	{ 1,		192,	1,	192,	1,	},
+	{ 1,		192,	1,	128,	1,	},
+	{ 0, /* Prohibited setting */			},
+	{ 1,		192,	1,	192,	1,	},
+	{ 1,		160,	1,	160,	1,	},
+	{ 1,		160,	1,	106,	1,	},
+	{ 0, /* Prohibited setting */			},
+	{ 1,		160,	1,	160,	1,	},
+	{ 1,		128,	1,	128,	1,	},
+	{ 1,		128,	1,	84,	1,	},
+	{ 0, /* Prohibited setting */			},
+	{ 1,		128,	1,	128,	1,	},
+	{ 2,		192,	1,	192,	1,	},
+	{ 2,		192,	1,	128,	1,	},
+	{ 0, /* Prohibited setting */			},
+	{ 2,		192,	1,	192,	1,	},
+};
+
+static int __init r8a77965_cpg_mssr_init(struct device *dev)
+{
+	const struct rcar_gen3_cpg_pll_config *cpg_pll_config;
+	u32 cpg_mode;
+	int error;
+
+	error = rcar_rst_read_mode_pins(&cpg_mode);
+	if (error)
+		return error;
+
+	cpg_pll_config = &cpg_pll_configs[CPG_PLL_CONFIG_INDEX(cpg_mode)];
+	if (!cpg_pll_config->extal_div) {
+		dev_err(dev, "Prohibited setting (cpg_mode=0x%x)\n", cpg_mode);
+		return -EINVAL;
+	}
+
+	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
+};
+
+const struct cpg_mssr_info r8a77965_cpg_mssr_info __initconst = {
+	/* Core Clocks */
+	.core_clks		= r8a77965_core_clks,
+	.num_core_clks		= ARRAY_SIZE(r8a77965_core_clks),
+	.last_dt_core_clk	= LAST_DT_CORE_CLK,
+	.num_total_core_clks	= MOD_CLK_BASE,
+
+	/* Module Clocks */
+	.mod_clks		= r8a77965_mod_clks,
+	.num_mod_clks		= ARRAY_SIZE(r8a77965_mod_clks),
+	.num_hw_mod_clks	= 12 * 32,
+
+	/* Critical Module Clocks */
+	.crit_mod_clks		= r8a77965_crit_mod_clks,
+	.num_crit_mod_clks	= ARRAY_SIZE(r8a77965_crit_mod_clks),
+
+	/* Callbacks */
+	.init			= r8a77965_cpg_mssr_init,
+	.cpg_clk_register	= rcar_gen3_cpg_clk_register,
+};
diff --git a/drivers/clk/renesas/r8a77980-cpg-mssr.c b/drivers/clk/renesas/r8a77980-cpg-mssr.c
new file mode 100644
index 000000000000..7aaae73a321a
--- /dev/null
+++ b/drivers/clk/renesas/r8a77980-cpg-mssr.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * r8a77980 Clock Pulse Generator / Module Standby and Software Reset
+ *
+ * Copyright (C) 2018 Renesas Electronics Corp.
+ * Copyright (C) 2018 Cogent Embedded, Inc.
+ *
+ * Based on r8a7795-cpg-mssr.c
+ *
+ * Copyright (C) 2015 Glider bvba
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/soc/renesas/rcar-rst.h>
+#include <linux/sys_soc.h>
+
+#include <dt-bindings/clock/r8a77980-cpg-mssr.h>
+
+#include "renesas-cpg-mssr.h"
+#include "rcar-gen3-cpg.h"
+
+enum clk_ids {
+	/* Core Clock Outputs exported to DT */
+	LAST_DT_CORE_CLK = R8A77980_CLK_OSC,
+
+	/* External Input Clocks */
+	CLK_EXTAL,
+	CLK_EXTALR,
+
+	/* Internal Core Clocks */
+	CLK_MAIN,
+	CLK_PLL1,
+	CLK_PLL2,
+	CLK_PLL3,
+	CLK_PLL1_DIV2,
+	CLK_PLL1_DIV4,
+	CLK_S0,
+	CLK_S1,
+	CLK_S2,
+	CLK_S3,
+	CLK_SDSRC,
+
+	/* Module Clocks */
+	MOD_CLK_BASE
+};
+
+static const struct cpg_core_clk r8a77980_core_clks[] __initconst = {
+	/* External Clock Inputs */
+	DEF_INPUT("extal",  CLK_EXTAL),
+	DEF_INPUT("extalr", CLK_EXTALR),
+
+	/* Internal Core Clocks */
+	DEF_BASE(".main",       CLK_MAIN, CLK_TYPE_GEN3_MAIN, CLK_EXTAL),
+	DEF_BASE(".pll1",       CLK_PLL1, CLK_TYPE_GEN3_PLL1, CLK_MAIN),
+	DEF_BASE(".pll2",       CLK_PLL2, CLK_TYPE_GEN3_PLL2, CLK_MAIN),
+	DEF_BASE(".pll3",       CLK_PLL3, CLK_TYPE_GEN3_PLL3, CLK_MAIN),
+
+	DEF_FIXED(".pll1_div2", CLK_PLL1_DIV2,	   CLK_PLL1,       2, 1),
+	DEF_FIXED(".pll1_div4", CLK_PLL1_DIV4,	   CLK_PLL1_DIV2,  2, 1),
+	DEF_FIXED(".s0",	CLK_S0,		   CLK_PLL1_DIV2,  2, 1),
+	DEF_FIXED(".s1",	CLK_S1,		   CLK_PLL1_DIV2,  3, 1),
+	DEF_FIXED(".s2",	CLK_S2,		   CLK_PLL1_DIV2,  4, 1),
+	DEF_FIXED(".s3",	CLK_S3,		   CLK_PLL1_DIV2,  6, 1),
+	DEF_FIXED(".sdsrc",	CLK_SDSRC,	   CLK_PLL1_DIV2,  2, 1),
+
+	/* Core Clock Outputs */
+	DEF_FIXED("ztr",	R8A77980_CLK_ZTR,   CLK_PLL1_DIV2,  6, 1),
+	DEF_FIXED("ztrd2",	R8A77980_CLK_ZTRD2, CLK_PLL1_DIV2, 12, 1),
+	DEF_FIXED("zt",		R8A77980_CLK_ZT,    CLK_PLL1_DIV2,  4, 1),
+	DEF_FIXED("zx",		R8A77980_CLK_ZX,    CLK_PLL1_DIV2,  2, 1),
+	DEF_FIXED("s0d1",	R8A77980_CLK_S0D1,  CLK_S0,         1, 1),
+	DEF_FIXED("s0d2",	R8A77980_CLK_S0D2,  CLK_S0,         2, 1),
+	DEF_FIXED("s0d3",	R8A77980_CLK_S0D3,  CLK_S0,         3, 1),
+	DEF_FIXED("s0d4",	R8A77980_CLK_S0D4,  CLK_S0,         4, 1),
+	DEF_FIXED("s0d6",	R8A77980_CLK_S0D6,  CLK_S0,         6, 1),
+	DEF_FIXED("s0d12",	R8A77980_CLK_S0D12, CLK_S0,        12, 1),
+	DEF_FIXED("s0d24",	R8A77980_CLK_S0D24, CLK_S0,        24, 1),
+	DEF_FIXED("s1d1",	R8A77980_CLK_S1D1,  CLK_S1,         1, 1),
+	DEF_FIXED("s1d2",	R8A77980_CLK_S1D2,  CLK_S1,         2, 1),
+	DEF_FIXED("s1d4",	R8A77980_CLK_S1D4,  CLK_S1,         4, 1),
+	DEF_FIXED("s2d1",	R8A77980_CLK_S2D1,  CLK_S2,         1, 1),
+	DEF_FIXED("s2d2",	R8A77980_CLK_S2D2,  CLK_S2,         2, 1),
+	DEF_FIXED("s2d4",	R8A77980_CLK_S2D4,  CLK_S2,         4, 1),
+	DEF_FIXED("s3d1",	R8A77980_CLK_S3D1,  CLK_S3,         1, 1),
+	DEF_FIXED("s3d2",	R8A77980_CLK_S3D2,  CLK_S3,         2, 1),
+	DEF_FIXED("s3d4",	R8A77980_CLK_S3D4,  CLK_S3,         4, 1),
+
+	DEF_GEN3_SD("sd0",	R8A77980_CLK_SD0,   CLK_SDSRC,	  0x0074),
+
+	DEF_FIXED("cl",		R8A77980_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
+	DEF_FIXED("cp",		R8A77980_CLK_CP,    CLK_EXTAL,	    2, 1),
+	DEF_FIXED("cpex",	R8A77980_CLK_CPEX,  CLK_EXTAL,	    2, 1),
+
+	DEF_DIV6P1("canfd",	R8A77980_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
+	DEF_DIV6P1("csi0",	R8A77980_CLK_CSI0,  CLK_PLL1_DIV4, 0x00c),
+	DEF_DIV6P1("mso",	R8A77980_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
+};
+
+static const struct mssr_mod_clk r8a77980_mod_clks[] __initconst = {
+	DEF_MOD("tmu4",			 121,	R8A77980_CLK_S0D6),
+	DEF_MOD("tmu3",			 122,	R8A77980_CLK_S0D6),
+	DEF_MOD("tmu2",			 123,	R8A77980_CLK_S0D6),
+	DEF_MOD("tmu1",			 124,	R8A77980_CLK_S0D6),
+	DEF_MOD("tmu0",			 125,	R8A77980_CLK_CP),
+	DEF_MOD("scif4",		 203,	R8A77980_CLK_S3D4),
+	DEF_MOD("scif3",		 204,	R8A77980_CLK_S3D4),
+	DEF_MOD("scif1",		 206,	R8A77980_CLK_S3D4),
+	DEF_MOD("scif0",		 207,	R8A77980_CLK_S3D4),
+	DEF_MOD("msiof3",		 208,	R8A77980_CLK_MSO),
+	DEF_MOD("msiof2",		 209,	R8A77980_CLK_MSO),
+	DEF_MOD("msiof1",		 210,	R8A77980_CLK_MSO),
+	DEF_MOD("msiof0",		 211,	R8A77980_CLK_MSO),
+	DEF_MOD("sys-dmac2",		 217,	R8A77980_CLK_S0D3),
+	DEF_MOD("sys-dmac1",		 218,	R8A77980_CLK_S0D3),
+	DEF_MOD("tpu0",			 304,	R8A77980_CLK_S3D4),
+	DEF_MOD("sdif",			 314,	R8A77980_CLK_SD0),
+	DEF_MOD("pciec0",		 319,	R8A77980_CLK_S3D1),
+	DEF_MOD("intc-ex",		 407,	R8A77980_CLK_CP),
+	DEF_MOD("intc-ap",		 408,	R8A77980_CLK_S0D3),
+	DEF_MOD("hscif3",		 517,	R8A77980_CLK_S3D1),
+	DEF_MOD("hscif2",		 518,	R8A77980_CLK_S3D1),
+	DEF_MOD("hscif1",		 519,	R8A77980_CLK_S3D1),
+	DEF_MOD("hscif0",		 520,	R8A77980_CLK_S3D1),
+	DEF_MOD("imp4",			 521,	R8A77980_CLK_S1D1),
+	DEF_MOD("thermal",		 522,	R8A77980_CLK_CP),
+	DEF_MOD("pwm",			 523,	R8A77980_CLK_S0D12),
+	DEF_MOD("impdma1",		 526,	R8A77980_CLK_S1D1),
+	DEF_MOD("impdma0",		 527,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp-ocv4",		 528,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp-ocv3",		 529,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp-ocv2",		 531,	R8A77980_CLK_S1D1),
+	DEF_MOD("fcpvd0",		 603,	R8A77980_CLK_S3D1),
+	DEF_MOD("vspd0",		 623,	R8A77980_CLK_S3D1),
+	DEF_MOD("csi41",		 715,	R8A77980_CLK_CSI0),
+	DEF_MOD("csi40",		 716,	R8A77980_CLK_CSI0),
+	DEF_MOD("du0",			 724,	R8A77980_CLK_S2D1),
+	DEF_MOD("lvds",			 727,	R8A77980_CLK_S2D1),
+	DEF_MOD("etheravb",		 812,	R8A77980_CLK_S3D2),
+	DEF_MOD("gether",		 813,	R8A77980_CLK_S3D2),
+	DEF_MOD("imp3",			 824,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp2",			 825,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp1",			 826,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp0",			 827,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp-ocv1",		 828,	R8A77980_CLK_S1D1),
+	DEF_MOD("imp-ocv0",		 829,	R8A77980_CLK_S1D1),
+	DEF_MOD("impram",		 830,	R8A77980_CLK_S1D1),
+	DEF_MOD("impcnn",		 831,	R8A77980_CLK_S1D1),
+	DEF_MOD("gpio5",		 907,	R8A77980_CLK_CP),
+	DEF_MOD("gpio4",		 908,	R8A77980_CLK_CP),
+	DEF_MOD("gpio3",		 909,	R8A77980_CLK_CP),
+	DEF_MOD("gpio2",		 910,	R8A77980_CLK_CP),
+	DEF_MOD("gpio1",		 911,	R8A77980_CLK_CP),
+	DEF_MOD("gpio0",		 912,	R8A77980_CLK_CP),
+	DEF_MOD("can-fd",		 914,	R8A77980_CLK_S3D2),
+	DEF_MOD("i2c4",			 927,	R8A77980_CLK_S0D6),
+	DEF_MOD("i2c3",			 928,	R8A77980_CLK_S0D6),
+	DEF_MOD("i2c2",			 929,	R8A77980_CLK_S3D2),
+	DEF_MOD("i2c1",			 930,	R8A77980_CLK_S3D2),
+	DEF_MOD("i2c0",			 931,	R8A77980_CLK_S3D2),
+};
+
+static const unsigned int r8a77980_crit_mod_clks[] __initconst = {
+	MOD_CLK_ID(408),	/* INTC-AP (GIC) */
+};
+
+
+/*
+ * CPG Clock Data
+ */
+
+/*
+ *   MD		EXTAL		PLL2	PLL1	PLL3
+ * 14 13	(MHz)
+ * --------------------------------------------------
+ * 0  0		16.66 x 1	x240	x192	x192
+ * 0  1		20    x 1	x200	x160	x160
+ * 1  0		27    x 1	x148	x118	x118
+ * 1  1		33.33 / 2	x240	x192	x192
+ */
+#define CPG_PLL_CONFIG_INDEX(md)	((((md) & BIT(14)) >> 13) | \
+					 (((md) & BIT(13)) >> 13))
+
+static const struct rcar_gen3_cpg_pll_config cpg_pll_configs[4] __initconst = {
+	/* EXTAL div	PLL1 mult/div	PLL3 mult/div */
+	{ 1,		192,	1,	192,	1,	},
+	{ 1,		160,	1,	160,	1,	},
+	{ 1,		118,	1,	118,	1,	},
+	{ 2,		192,	1,	192,	1,	},
+};
+
+static int __init r8a77980_cpg_mssr_init(struct device *dev)
+{
+	const struct rcar_gen3_cpg_pll_config *cpg_pll_config;
+	u32 cpg_mode;
+	int error;
+
+	error = rcar_rst_read_mode_pins(&cpg_mode);
+	if (error)
+		return error;
+
+	cpg_pll_config = &cpg_pll_configs[CPG_PLL_CONFIG_INDEX(cpg_mode)];
+
+	return rcar_gen3_cpg_init(cpg_pll_config, CLK_EXTALR, cpg_mode);
+}
+
+const struct cpg_mssr_info r8a77980_cpg_mssr_info __initconst = {
+	/* Core Clocks */
+	.core_clks = r8a77980_core_clks,
+	.num_core_clks = ARRAY_SIZE(r8a77980_core_clks),
+	.last_dt_core_clk = LAST_DT_CORE_CLK,
+	.num_total_core_clks = MOD_CLK_BASE,
+
+	/* Module Clocks */
+	.mod_clks = r8a77980_mod_clks,
+	.num_mod_clks = ARRAY_SIZE(r8a77980_mod_clks),
+	.num_hw_mod_clks = 12 * 32,
+
+	/* Critical Module Clocks */
+	.crit_mod_clks = r8a77980_crit_mod_clks,
+	.num_crit_mod_clks = ARRAY_SIZE(r8a77980_crit_mod_clks),
+
+	/* Callbacks */
+	.init = r8a77980_cpg_mssr_init,
+	.cpg_clk_register = rcar_gen3_cpg_clk_register,
+};
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.c b/drivers/clk/renesas/rcar-gen3-cpg.c
index 0904886f5501..628b63b85d3f 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.c
+++ b/drivers/clk/renesas/rcar-gen3-cpg.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/bug.h>
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/device.h>
@@ -62,6 +63,140 @@ static void cpg_simple_notifier_register(struct raw_notifier_head *notifiers,
 }
 
 /*
+ * Z Clock & Z2 Clock
+ *
+ * Traits of this clock:
+ * prepare - clk_prepare only ensures that parents are prepared
+ * enable - clk_enable only ensures that parents are enabled
+ * rate - rate is adjustable.  clk->rate = (parent->rate * mult / 32 ) / 2
+ * parent - fixed parent.  No clk_set_parent support
+ */
+#define CPG_FRQCRB			0x00000004
+#define CPG_FRQCRB_KICK			BIT(31)
+#define CPG_FRQCRC			0x000000e0
+#define CPG_FRQCRC_ZFC_MASK		GENMASK(12, 8)
+#define CPG_FRQCRC_Z2FC_MASK		GENMASK(4, 0)
+
+struct cpg_z_clk {
+	struct clk_hw hw;
+	void __iomem *reg;
+	void __iomem *kick_reg;
+	unsigned long mask;
+};
+
+#define to_z_clk(_hw)	container_of(_hw, struct cpg_z_clk, hw)
+
+static unsigned long cpg_z_clk_recalc_rate(struct clk_hw *hw,
+					   unsigned long parent_rate)
+{
+	struct cpg_z_clk *zclk = to_z_clk(hw);
+	unsigned int mult;
+	u32 val;
+
+	val = readl(zclk->reg) & zclk->mask;
+	mult = 32 - (val >> __ffs(zclk->mask));
+
+	/* Factor of 2 is for fixed divider */
+	return DIV_ROUND_CLOSEST_ULL((u64)parent_rate * mult, 32 * 2);
+}
+
+static long cpg_z_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long *parent_rate)
+{
+	/* Factor of 2 is for fixed divider */
+	unsigned long prate = *parent_rate / 2;
+	unsigned int mult;
+
+	mult = div_u64(rate * 32ULL, prate);
+	mult = clamp(mult, 1U, 32U);
+
+	return (u64)prate * mult / 32;
+}
+
+static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long parent_rate)
+{
+	struct cpg_z_clk *zclk = to_z_clk(hw);
+	unsigned int mult;
+	unsigned int i;
+	u32 val, kick;
+
+	/* Factor of 2 is for fixed divider */
+	mult = DIV_ROUND_CLOSEST_ULL(rate * 32ULL * 2, parent_rate);
+	mult = clamp(mult, 1U, 32U);
+
+	if (readl(zclk->kick_reg) & CPG_FRQCRB_KICK)
+		return -EBUSY;
+
+	val = readl(zclk->reg) & ~zclk->mask;
+	val |= ((32 - mult) << __ffs(zclk->mask)) & zclk->mask;
+	writel(val, zclk->reg);
+
+	/*
+	 * Set KICK bit in FRQCRB to update hardware setting and wait for
+	 * clock change completion.
+	 */
+	kick = readl(zclk->kick_reg);
+	kick |= CPG_FRQCRB_KICK;
+	writel(kick, zclk->kick_reg);
+
+	/*
+	 * Note: There is no HW information about the worst case latency.
+	 *
+	 * Using experimental measurements, it seems that no more than
+	 * ~10 iterations are needed, independently of the CPU rate.
+	 * Since this value might be dependent of external xtal rate, pll1
+	 * rate or even the other emulation clocks rate, use 1000 as a
+	 * "super" safe value.
+	 */
+	for (i = 1000; i; i--) {
+		if (!(readl(zclk->kick_reg) & CPG_FRQCRB_KICK))
+			return 0;
+
+		cpu_relax();
+	}
+
+	return -ETIMEDOUT;
+}
+
+static const struct clk_ops cpg_z_clk_ops = {
+	.recalc_rate = cpg_z_clk_recalc_rate,
+	.round_rate = cpg_z_clk_round_rate,
+	.set_rate = cpg_z_clk_set_rate,
+};
+
+static struct clk * __init cpg_z_clk_register(const char *name,
+					      const char *parent_name,
+					      void __iomem *reg,
+					      unsigned long mask)
+{
+	struct clk_init_data init;
+	struct cpg_z_clk *zclk;
+	struct clk *clk;
+
+	zclk = kzalloc(sizeof(*zclk), GFP_KERNEL);
+	if (!zclk)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &cpg_z_clk_ops;
+	init.flags = 0;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	zclk->reg = reg + CPG_FRQCRC;
+	zclk->kick_reg = reg + CPG_FRQCRB;
+	zclk->hw.init = &init;
+	zclk->mask = mask;
+
+	clk = clk_register(NULL, &zclk->hw);
+	if (IS_ERR(clk))
+		kfree(zclk);
+
+	return clk;
+}
+
+/*
  * SDn Clock
  */
 #define CPG_SD_STP_HCK		BIT(9)
@@ -420,6 +555,14 @@ struct clk * __init rcar_gen3_cpg_clk_register(struct device *dev,
 		mult = 1;
 		break;
 
+	case CLK_TYPE_GEN3_Z:
+		return cpg_z_clk_register(core->name, __clk_get_name(parent),
+					  base, CPG_FRQCRC_ZFC_MASK);
+
+	case CLK_TYPE_GEN3_Z2:
+		return cpg_z_clk_register(core->name, __clk_get_name(parent),
+					  base, CPG_FRQCRC_Z2FC_MASK);
+
 	default:
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.h b/drivers/clk/renesas/rcar-gen3-cpg.h
index 2e4284399f53..ea4f8fc3c4c9 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.h
+++ b/drivers/clk/renesas/rcar-gen3-cpg.h
@@ -21,6 +21,8 @@ enum rcar_gen3_clk_types {
 	CLK_TYPE_GEN3_SD,
 	CLK_TYPE_GEN3_R,
 	CLK_TYPE_GEN3_PE,
+	CLK_TYPE_GEN3_Z,
+	CLK_TYPE_GEN3_Z2,
 };
 
 #define DEF_GEN3_SD(_name, _id, _parent, _offset)	\
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.c b/drivers/clk/renesas/renesas-cpg-mssr.c
index e3cc72c81311..4e88e980fb76 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.c
+++ b/drivers/clk/renesas/renesas-cpg-mssr.c
@@ -693,12 +693,24 @@ static const struct of_device_id cpg_mssr_match[] = {
 		.data = &r8a7796_cpg_mssr_info,
 	},
 #endif
+#ifdef CONFIG_CLK_R8A77965
+	{
+		.compatible = "renesas,r8a77965-cpg-mssr",
+		.data = &r8a77965_cpg_mssr_info,
+	},
+#endif
 #ifdef CONFIG_CLK_R8A77970
 	{
 		.compatible = "renesas,r8a77970-cpg-mssr",
 		.data = &r8a77970_cpg_mssr_info,
 	},
 #endif
+#ifdef CONFIG_CLK_R8A77980
+	{
+		.compatible = "renesas,r8a77980-cpg-mssr",
+		.data = &r8a77980_cpg_mssr_info,
+	},
+#endif
 #ifdef CONFIG_CLK_R8A77995
 	{
 		.compatible = "renesas,r8a77995-cpg-mssr",
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
index 0745b0930308..97ccb093c10f 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h
@@ -139,7 +139,9 @@ extern const struct cpg_mssr_info r8a7792_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7794_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7795_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a7796_cpg_mssr_info;
+extern const struct cpg_mssr_info r8a77965_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a77970_cpg_mssr_info;
+extern const struct cpg_mssr_info r8a77980_cpg_mssr_info;
 extern const struct cpg_mssr_info r8a77995_cpg_mssr_info;
 
 
diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
index 077fcdc7908b..026a26bb702d 100644
--- a/drivers/clk/rockchip/clk-mmc-phase.c
+++ b/drivers/clk/rockchip/clk-mmc-phase.c
@@ -25,6 +25,8 @@ struct rockchip_mmc_clock {
 	void __iomem	*reg;
 	int		id;
 	int		shift;
+	int		cached_phase;
+	struct notifier_block clk_rate_change_nb;
 };
 
 #define to_mmc_clock(_hw) container_of(_hw, struct rockchip_mmc_clock, hw)
@@ -58,6 +60,12 @@ static int rockchip_mmc_get_phase(struct clk_hw *hw)
 	u16 degrees;
 	u32 delay_num = 0;
 
+	/* See the comment for rockchip_mmc_set_phase below */
+	if (!rate) {
+		pr_err("%s: invalid clk rate\n", __func__);
+		return -EINVAL;
+	}
+
 	raw_value = readl(mmc_clock->reg) >> (mmc_clock->shift);
 
 	degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90;
@@ -84,6 +92,23 @@ static int rockchip_mmc_set_phase(struct clk_hw *hw, int degrees)
 	u32 raw_value;
 	u32 delay;
 
+	/*
+	 * The below calculation is based on the output clock from
+	 * MMC host to the card, which expects the phase clock inherits
+	 * the clock rate from its parent, namely the output clock
+	 * provider of MMC host. However, things may go wrong if
+	 * (1) It is orphan.
+	 * (2) It is assigned to the wrong parent.
+	 *
+	 * This check help debug the case (1), which seems to be the
+	 * most likely problem we often face and which makes it difficult
+	 * for people to debug unstable mmc tuning results.
+	 */
+	if (!rate) {
+		pr_err("%s: invalid clk rate\n", __func__);
+		return -EINVAL;
+	}
+
 	nineties = degrees / 90;
 	remainder = (degrees % 90);
 
@@ -139,6 +164,41 @@ static const struct clk_ops rockchip_mmc_clk_ops = {
 	.set_phase	= rockchip_mmc_set_phase,
 };
 
+#define to_rockchip_mmc_clock(x) \
+	container_of(x, struct rockchip_mmc_clock, clk_rate_change_nb)
+static int rockchip_mmc_clk_rate_notify(struct notifier_block *nb,
+					unsigned long event, void *data)
+{
+	struct rockchip_mmc_clock *mmc_clock = to_rockchip_mmc_clock(nb);
+	struct clk_notifier_data *ndata = data;
+
+	/*
+	 * rockchip_mmc_clk is mostly used by mmc controllers to sample
+	 * the intput data, which expects the fixed phase after the tuning
+	 * process. However if the clock rate is changed, the phase is stale
+	 * and may break the data sampling. So here we try to restore the phase
+	 * for that case, except that
+	 * (1) cached_phase is invaild since we inevitably cached it when the
+	 * clock provider be reparented from orphan to its real parent in the
+	 * first place. Otherwise we may mess up the initialization of MMC cards
+	 * since we only set the default sample phase and drive phase later on.
+	 * (2) the new coming rate is higher than the older one since mmc driver
+	 * set the max-frequency to match the boards' ability but we can't go
+	 * over the heads of that, otherwise the tests smoke out the issue.
+	 */
+	if (ndata->old_rate <= ndata->new_rate)
+		return NOTIFY_DONE;
+
+	if (event == PRE_RATE_CHANGE)
+		mmc_clock->cached_phase =
+			rockchip_mmc_get_phase(&mmc_clock->hw);
+	else if (mmc_clock->cached_phase != -EINVAL &&
+		 event == POST_RATE_CHANGE)
+		rockchip_mmc_set_phase(&mmc_clock->hw, mmc_clock->cached_phase);
+
+	return NOTIFY_DONE;
+}
+
 struct clk *rockchip_clk_register_mmc(const char *name,
 				const char *const *parent_names, u8 num_parents,
 				void __iomem *reg, int shift)
@@ -146,6 +206,7 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 	struct clk_init_data init;
 	struct rockchip_mmc_clock *mmc_clock;
 	struct clk *clk;
+	int ret;
 
 	mmc_clock = kmalloc(sizeof(*mmc_clock), GFP_KERNEL);
 	if (!mmc_clock)
@@ -162,8 +223,21 @@ struct clk *rockchip_clk_register_mmc(const char *name,
 	mmc_clock->shift = shift;
 
 	clk = clk_register(NULL, &mmc_clock->hw);
-	if (IS_ERR(clk))
-		kfree(mmc_clock);
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		goto err_register;
+	}
+
+	mmc_clock->clk_rate_change_nb.notifier_call =
+				&rockchip_mmc_clk_rate_notify;
+	ret = clk_notifier_register(clk, &mmc_clock->clk_rate_change_nb);
+	if (ret)
+		goto err_notifier;
 
 	return clk;
+err_notifier:
+	clk_unregister(clk);
+err_register:
+	kfree(mmc_clock);
+	return ERR_PTR(ret);
 }
diff --git a/drivers/clk/rockchip/clk-rk3228.c b/drivers/clk/rockchip/clk-rk3228.c
index 11e7f2d1c054..7af48184b022 100644
--- a/drivers/clk/rockchip/clk-rk3228.c
+++ b/drivers/clk/rockchip/clk-rk3228.c
@@ -387,7 +387,7 @@ static struct rockchip_clk_branch rk3228_clk_branches[] __initdata = {
 			RK2928_CLKSEL_CON(23), 5, 2, MFLAGS, 0, 6, DFLAGS,
 			RK2928_CLKGATE_CON(2), 15, GFLAGS),
 
-	COMPOSITE(SCLK_SDMMC, "sclk_sdmmc0", mux_mmc_src_p, 0,
+	COMPOSITE(SCLK_SDMMC, "sclk_sdmmc", mux_mmc_src_p, 0,
 			RK2928_CLKSEL_CON(11), 8, 2, MFLAGS, 0, 8, DFLAGS,
 			RK2928_CLKGATE_CON(2), 11, GFLAGS),
 
diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c
index b04f29774ee7..252366a5231f 100644
--- a/drivers/clk/rockchip/clk-rk3328.c
+++ b/drivers/clk/rockchip/clk-rk3328.c
@@ -304,7 +304,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	COMPOSITE_NOMUX(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED,
 			RK3328_CLKSEL_CON(1), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3328_CLKGATE_CON(7), 1, GFLAGS),
-	GATE(0, "aclk_core_niu", "aclk_core", CLK_IGNORE_UNUSED,
+	GATE(0, "aclk_core_niu", "aclk_core", 0,
 			RK3328_CLKGATE_CON(13), 0, GFLAGS),
 	GATE(0, "aclk_gic400", "aclk_core", CLK_IGNORE_UNUSED,
 			RK3328_CLKGATE_CON(13), 1, GFLAGS),
@@ -318,7 +318,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 			RK3328_CLKGATE_CON(6), 6, GFLAGS),
 	GATE(ACLK_GPU, "aclk_gpu", "aclk_gpu_pre", CLK_SET_RATE_PARENT,
 			RK3328_CLKGATE_CON(14), 0, GFLAGS),
-	GATE(0, "aclk_gpu_niu", "aclk_gpu_pre", CLK_IGNORE_UNUSED,
+	GATE(0, "aclk_gpu_niu", "aclk_gpu_pre", 0,
 			RK3328_CLKGATE_CON(14), 1, GFLAGS),
 
 	/* PD_DDR */
@@ -513,9 +513,9 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 			RK3328_CLKGATE_CON(24), 0, GFLAGS),
 	GATE(HCLK_RKVDEC, "hclk_rkvdec", "hclk_rkvdec_pre", CLK_SET_RATE_PARENT,
 			RK3328_CLKGATE_CON(24), 1, GFLAGS),
-	GATE(0, "aclk_rkvdec_niu", "aclk_rkvdec_pre", CLK_IGNORE_UNUSED,
+	GATE(0, "aclk_rkvdec_niu", "aclk_rkvdec_pre", 0,
 			RK3328_CLKGATE_CON(24), 2, GFLAGS),
-	GATE(0, "hclk_rkvdec_niu", "hclk_rkvdec_pre", CLK_IGNORE_UNUSED,
+	GATE(0, "hclk_rkvdec_niu", "hclk_rkvdec_pre", 0,
 			RK3328_CLKGATE_CON(24), 3, GFLAGS),
 
 	COMPOSITE(SCLK_VDEC_CABAC, "sclk_vdec_cabac", mux_4plls_p, 0,
@@ -535,9 +535,9 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 			RK3328_CLKGATE_CON(23), 0, GFLAGS),
 	GATE(HCLK_VPU, "hclk_vpu", "hclk_vpu_pre", CLK_SET_RATE_PARENT,
 			RK3328_CLKGATE_CON(23), 1, GFLAGS),
-	GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", CLK_IGNORE_UNUSED,
+	GATE(0, "aclk_vpu_niu", "aclk_vpu_pre", 0,
 			RK3328_CLKGATE_CON(23), 2, GFLAGS),
-	GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", CLK_IGNORE_UNUSED,
+	GATE(0, "hclk_vpu_niu", "hclk_vpu_pre", 0,
 			RK3328_CLKGATE_CON(23), 3, GFLAGS),
 
 	COMPOSITE(ACLK_RKVENC, "aclk_rkvenc", mux_4plls_p, 0,
@@ -545,9 +545,9 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 			RK3328_CLKGATE_CON(6), 3, GFLAGS),
 	FACTOR_GATE(HCLK_RKVENC, "hclk_rkvenc", "aclk_rkvenc", 0, 1, 4,
 			RK3328_CLKGATE_CON(11), 4, GFLAGS),
-	GATE(0, "aclk_rkvenc_niu", "aclk_rkvenc", CLK_IGNORE_UNUSED,
+	GATE(0, "aclk_rkvenc_niu", "aclk_rkvenc", 0,
 			RK3328_CLKGATE_CON(25), 0, GFLAGS),
-	GATE(0, "hclk_rkvenc_niu", "hclk_rkvenc", CLK_IGNORE_UNUSED,
+	GATE(0, "hclk_rkvenc_niu", "hclk_rkvenc", 0,
 			RK3328_CLKGATE_CON(25), 1, GFLAGS),
 	GATE(ACLK_H265, "aclk_h265", "aclk_rkvenc", 0,
 			RK3328_CLKGATE_CON(25), 0, GFLAGS),
@@ -588,7 +588,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	COMPOSITE(ACLK_VOP_PRE, "aclk_vop_pre", mux_4plls_p, 0,
 			RK3328_CLKSEL_CON(39), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3328_CLKGATE_CON(5), 5, GFLAGS),
-	GATE(0, "clk_hdmi_sfc", "xin24m", 0,
+	GATE(SCLK_HDMI_SFC, "sclk_hdmi_sfc", "xin24m", 0,
 			RK3328_CLKGATE_CON(5), 4, GFLAGS),
 
 	COMPOSITE_NODIV(0, "clk_cif_src", mux_2plls_p, 0,
@@ -602,7 +602,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 			RK3328_CLKGATE_CON(5), 6, GFLAGS),
 	DIV(DCLK_HDMIPHY, "dclk_hdmiphy", "dclk_lcdc_src", 0,
 			RK3328_CLKSEL_CON(40), 3, 3, DFLAGS),
-	MUX(DCLK_LCDC, "dclk_lcdc", mux_dclk_lcdc_p, 0,
+	MUX(DCLK_LCDC, "dclk_lcdc", mux_dclk_lcdc_p,  CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			RK3328_CLKSEL_CON(40), 1, 1, MFLAGS),
 
 	/*
@@ -709,14 +709,14 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 
 	/* PD_VOP */
 	GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3328_CLKGATE_CON(21), 10, GFLAGS),
-	GATE(0, "aclk_rga_niu", "aclk_rga_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(22), 3, GFLAGS),
+	GATE(0, "aclk_rga_niu", "aclk_rga_pre", 0, RK3328_CLKGATE_CON(22), 3, GFLAGS),
 	GATE(ACLK_VOP, "aclk_vop", "aclk_vop_pre", 0, RK3328_CLKGATE_CON(21), 2, GFLAGS),
-	GATE(0, "aclk_vop_niu", "aclk_vop_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 4, GFLAGS),
+	GATE(0, "aclk_vop_niu", "aclk_vop_pre", 0, RK3328_CLKGATE_CON(21), 4, GFLAGS),
 
 	GATE(ACLK_IEP, "aclk_iep", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 6, GFLAGS),
 	GATE(ACLK_CIF, "aclk_cif", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 8, GFLAGS),
 	GATE(ACLK_HDCP, "aclk_hdcp", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 15, GFLAGS),
-	GATE(0, "aclk_vio_niu", "aclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(22), 2, GFLAGS),
+	GATE(0, "aclk_vio_niu", "aclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 2, GFLAGS),
 
 	GATE(HCLK_VOP, "hclk_vop", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 3, GFLAGS),
 	GATE(0, "hclk_vop_niu", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 5, GFLAGS),
@@ -724,10 +724,10 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	GATE(HCLK_CIF, "hclk_cif", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 9, GFLAGS),
 	GATE(HCLK_RGA, "hclk_rga", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 11, GFLAGS),
 	GATE(0, "hclk_ahb1tom", "hclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 12, GFLAGS),
-	GATE(0, "pclk_vio_h2p", "hclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 13, GFLAGS),
-	GATE(0, "hclk_vio_h2p", "hclk_vio_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(21), 14, GFLAGS),
+	GATE(0, "pclk_vio_h2p", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 13, GFLAGS),
+	GATE(0, "hclk_vio_h2p", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(21), 14, GFLAGS),
 	GATE(HCLK_HDCP, "hclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 0, GFLAGS),
-	GATE(HCLK_VIO, "hclk_vio", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 1, GFLAGS),
+	GATE(0, "hclk_vio_niu", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 1, GFLAGS),
 	GATE(PCLK_HDMI, "pclk_hdmi", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 4, GFLAGS),
 	GATE(PCLK_HDCP, "pclk_hdcp", "hclk_vio_pre", 0, RK3328_CLKGATE_CON(22), 5, GFLAGS),
 
@@ -743,19 +743,19 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	GATE(HCLK_HOST0_ARB, "hclk_host0_arb", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 7, GFLAGS),
 	GATE(HCLK_OTG, "hclk_otg", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 8, GFLAGS),
 	GATE(HCLK_OTG_PMU, "hclk_otg_pmu", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 9, GFLAGS),
-	GATE(0, "hclk_peri_niu", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 12, GFLAGS),
-	GATE(0, "pclk_peri_niu", "hclk_peri", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(19), 13, GFLAGS),
+	GATE(0, "hclk_peri_niu", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 12, GFLAGS),
+	GATE(0, "pclk_peri_niu", "hclk_peri", 0, RK3328_CLKGATE_CON(19), 13, GFLAGS),
 
 	/* PD_GMAC */
 	GATE(ACLK_MAC2PHY, "aclk_mac2phy", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 0, GFLAGS),
 	GATE(ACLK_MAC2IO, "aclk_mac2io", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 2, GFLAGS),
-	GATE(0, "aclk_gmac_niu", "aclk_gmac", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(26), 4, GFLAGS),
+	GATE(0, "aclk_gmac_niu", "aclk_gmac", 0, RK3328_CLKGATE_CON(26), 4, GFLAGS),
 	GATE(PCLK_MAC2PHY, "pclk_mac2phy", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 1, GFLAGS),
 	GATE(PCLK_MAC2IO, "pclk_mac2io", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 3, GFLAGS),
-	GATE(0, "pclk_gmac_niu", "pclk_gmac", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(26), 5, GFLAGS),
+	GATE(0, "pclk_gmac_niu", "pclk_gmac", 0, RK3328_CLKGATE_CON(26), 5, GFLAGS),
 
 	/* PD_BUS */
-	GATE(0, "aclk_bus_niu", "aclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 12, GFLAGS),
+	GATE(0, "aclk_bus_niu", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 12, GFLAGS),
 	GATE(ACLK_DCF, "aclk_dcf", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 11, GFLAGS),
 	GATE(ACLK_TSP, "aclk_tsp", "aclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 12, GFLAGS),
 	GATE(0, "aclk_intmem", "aclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 0, GFLAGS),
@@ -769,10 +769,10 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	GATE(HCLK_TSP, "hclk_tsp", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(17), 11, GFLAGS),
 	GATE(HCLK_CRYPTO_MST, "hclk_crypto_mst", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 7, GFLAGS),
 	GATE(HCLK_CRYPTO_SLV, "hclk_crypto_slv", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 8, GFLAGS),
-	GATE(0, "hclk_bus_niu", "hclk_bus_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 13, GFLAGS),
+	GATE(0, "hclk_bus_niu", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(15), 13, GFLAGS),
 	GATE(HCLK_PDM, "hclk_pdm", "hclk_bus_pre", 0, RK3328_CLKGATE_CON(28), 0, GFLAGS),
 
-	GATE(0, "pclk_bus_niu", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 14, GFLAGS),
+	GATE(0, "pclk_bus_niu", "pclk_bus", 0, RK3328_CLKGATE_CON(15), 14, GFLAGS),
 	GATE(0, "pclk_efuse", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 9, GFLAGS),
 	GATE(0, "pclk_otp", "pclk_bus", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(28), 4, GFLAGS),
 	GATE(PCLK_I2C0, "pclk_i2c0", "pclk_bus", 0, RK3328_CLKGATE_CON(15), 10, GFLAGS),
@@ -807,37 +807,42 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = {
 	GATE(0, "pclk_acodecphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 5, GFLAGS),
 	GATE(PCLK_HDMIPHY, "pclk_hdmiphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 7, GFLAGS),
 	GATE(0, "pclk_vdacphy", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(17), 8, GFLAGS),
-	GATE(0, "pclk_phy_niu", "pclk_phy_pre", CLK_IGNORE_UNUSED, RK3328_CLKGATE_CON(15), 15, GFLAGS),
+	GATE(0, "pclk_phy_niu", "pclk_phy_pre", 0, RK3328_CLKGATE_CON(15), 15, GFLAGS),
 
 	/* PD_MMC */
-	MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "sclk_sdmmc",
+	MMC(SCLK_SDMMC_DRV, "sdmmc_drv", "clk_sdmmc",
 	    RK3328_SDMMC_CON0, 1),
-	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc",
+	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "clk_sdmmc",
 	    RK3328_SDMMC_CON1, 1),
 
-	MMC(SCLK_SDIO_DRV, "sdio_drv", "sclk_sdio",
+	MMC(SCLK_SDIO_DRV, "sdio_drv", "clk_sdio",
 	    RK3328_SDIO_CON0, 1),
-	MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "sclk_sdio",
+	MMC(SCLK_SDIO_SAMPLE, "sdio_sample", "clk_sdio",
 	    RK3328_SDIO_CON1, 1),
 
-	MMC(SCLK_EMMC_DRV, "emmc_drv", "sclk_emmc",
+	MMC(SCLK_EMMC_DRV, "emmc_drv", "clk_emmc",
 	    RK3328_EMMC_CON0, 1),
-	MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "sclk_emmc",
+	MMC(SCLK_EMMC_SAMPLE, "emmc_sample", "clk_emmc",
 	    RK3328_EMMC_CON1, 1),
 
-	MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "sclk_sdmmc_ext",
+	MMC(SCLK_SDMMC_EXT_DRV, "sdmmc_ext_drv", "clk_sdmmc_ext",
 	    RK3328_SDMMC_EXT_CON0, 1),
-	MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "sclk_sdmmc_ext",
+	MMC(SCLK_SDMMC_EXT_SAMPLE, "sdmmc_ext_sample", "clk_sdmmc_ext",
 	    RK3328_SDMMC_EXT_CON1, 1),
 };
 
 static const char *const rk3328_critical_clocks[] __initconst = {
 	"aclk_bus",
+	"aclk_bus_niu",
 	"pclk_bus",
+	"pclk_bus_niu",
 	"hclk_bus",
+	"hclk_bus_niu",
 	"aclk_peri",
 	"hclk_peri",
+	"hclk_peri_niu",
 	"pclk_peri",
+	"pclk_peri_niu",
 	"pclk_dbg",
 	"aclk_core_niu",
 	"aclk_gic400",
@@ -861,6 +866,20 @@ static const char *const rk3328_critical_clocks[] __initconst = {
 	"aclk_rga_niu",
 	"pclk_vio_h2p",
 	"hclk_vio_h2p",
+	"aclk_vio_niu",
+	"hclk_vio_niu",
+	"aclk_vop_niu",
+	"hclk_vop_niu",
+	"aclk_gpu_niu",
+	"aclk_rkvdec_niu",
+	"hclk_rkvdec_niu",
+	"aclk_vpu_niu",
+	"hclk_vpu_niu",
+	"aclk_rkvenc_niu",
+	"hclk_rkvenc_niu",
+	"aclk_gmac_niu",
+	"pclk_gmac_niu",
+	"pclk_phy_niu",
 };
 
 static void __init rk3328_clk_init(struct device_node *np)
diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index 6847120b61cd..bca10d618f0a 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -57,6 +57,7 @@ static struct rockchip_pll_rate_table rk3399_pll_rates[] = {
 	RK3036_PLL_RATE(1656000000, 1, 69, 1, 1, 1, 0),
 	RK3036_PLL_RATE(1632000000, 1, 68, 1, 1, 1, 0),
 	RK3036_PLL_RATE(1608000000, 1, 67, 1, 1, 1, 0),
+	RK3036_PLL_RATE(1600000000, 3, 200, 1, 1, 1, 0),
 	RK3036_PLL_RATE(1584000000, 1, 66, 1, 1, 1, 0),
 	RK3036_PLL_RATE(1560000000, 1, 65, 1, 1, 1, 0),
 	RK3036_PLL_RATE(1536000000, 1, 64, 1, 1, 1, 0),
@@ -670,7 +671,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
 			RK3399_CLKGATE_CON(9), 7, GFLAGS,
 			&rk3399_uart3_fracmux),
 
-	COMPOSITE(0, "pclk_ddr", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED,
+	COMPOSITE(PCLK_DDR, "pclk_ddr", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(6), 15, 1, MFLAGS, 8, 5, DFLAGS,
 			RK3399_CLKGATE_CON(3), 4, GFLAGS),
 
@@ -886,7 +887,7 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = {
 			RK3399_CLKGATE_CON(31), 8, GFLAGS),
 
 	/* sdio & sdmmc */
-	COMPOSITE(0, "hclk_sd", mux_pll_src_cpll_gpll_p, 0,
+	COMPOSITE(HCLK_SD, "hclk_sd", mux_pll_src_cpll_gpll_p, 0,
 			RK3399_CLKSEL_CON(13), 15, 1, MFLAGS, 8, 5, DFLAGS,
 			RK3399_CLKGATE_CON(12), 13, GFLAGS),
 	GATE(HCLK_SDMMC, "hclk_sdmmc", "hclk_sd", 0,
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 35dbd63c2f49..3cd8ad59e0b7 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -57,6 +57,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 	struct clk_divider *div = NULL;
 	const struct clk_ops *mux_ops = NULL, *div_ops = NULL,
 			     *gate_ops = NULL;
+	int ret;
 
 	if (num_parents > 1) {
 		mux = kzalloc(sizeof(*mux), GFP_KERNEL);
@@ -74,8 +75,10 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 
 	if (gate_offset >= 0) {
 		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
-		if (!gate)
+		if (!gate) {
+			ret = -ENOMEM;
 			goto err_gate;
+		}
 
 		gate->flags = gate_flags;
 		gate->reg = base + gate_offset;
@@ -86,8 +89,10 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 
 	if (div_width > 0) {
 		div = kzalloc(sizeof(*div), GFP_KERNEL);
-		if (!div)
+		if (!div) {
+			ret = -ENOMEM;
 			goto err_div;
+		}
 
 		div->flags = div_flags;
 		div->reg = base + muxdiv_offset;
@@ -106,12 +111,19 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 				     gate ? &gate->hw : NULL, gate_ops,
 				     flags);
 
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		goto err_composite;
+	}
+
 	return clk;
+err_composite:
+	kfree(div);
 err_div:
 	kfree(gate);
 err_gate:
 	kfree(mux);
-	return ERR_PTR(-ENOMEM);
+	return ERR_PTR(ret);
 }
 
 struct rockchip_clk_frac {
@@ -291,8 +303,10 @@ static struct clk *rockchip_clk_register_frac_branch(
 		init.num_parents = child->num_parents;
 
 		mux_clk = clk_register(NULL, &frac_mux->hw);
-		if (IS_ERR(mux_clk))
+		if (IS_ERR(mux_clk)) {
+			kfree(frac);
 			return clk;
+		}
 
 		rockchip_clk_add_lookup(ctx, mux_clk, child->id);
 
diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile
index ef8900bc077f..513826393158 100644
--- a/drivers/clk/samsung/Makefile
+++ b/drivers/clk/samsung/Makefile
@@ -8,9 +8,11 @@ obj-$(CONFIG_SOC_EXYNOS3250)	+= clk-exynos3250.o
 obj-$(CONFIG_ARCH_EXYNOS4)	+= clk-exynos4.o
 obj-$(CONFIG_ARCH_EXYNOS4)	+= clk-exynos4412-isp.o
 obj-$(CONFIG_SOC_EXYNOS5250)	+= clk-exynos5250.o
+obj-$(CONFIG_SOC_EXYNOS5250)	+= clk-exynos5-subcmu.o
 obj-$(CONFIG_SOC_EXYNOS5260)	+= clk-exynos5260.o
 obj-$(CONFIG_SOC_EXYNOS5410)	+= clk-exynos5410.o
 obj-$(CONFIG_SOC_EXYNOS5420)	+= clk-exynos5420.o
+obj-$(CONFIG_SOC_EXYNOS5420)	+= clk-exynos5-subcmu.o
 obj-$(CONFIG_EXYNOS_ARM64_COMMON_CLK)	+= clk-exynos5433.o
 obj-$(CONFIG_SOC_EXYNOS5440)	+= clk-exynos5440.o
 obj-$(CONFIG_EXYNOS_AUDSS_CLK_CON) += clk-exynos-audss.o
diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c
index 5bfc92ee3129..b4b057c7301c 100644
--- a/drivers/clk/samsung/clk-exynos-audss.c
+++ b/drivers/clk/samsung/clk-exynos-audss.c
@@ -143,10 +143,8 @@ static int exynos_audss_clk_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	reg_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(reg_base)) {
-		dev_err(dev, "failed to map audss registers\n");
+	if (IS_ERR(reg_base))
 		return PTR_ERR(reg_base);
-	}
 
 	epll = ERR_PTR(-ENODEV);
 
diff --git a/drivers/clk/samsung/clk-exynos3250.c b/drivers/clk/samsung/clk-exynos3250.c
index 1b81e283f605..27c9d23657b3 100644
--- a/drivers/clk/samsung/clk-exynos3250.c
+++ b/drivers/clk/samsung/clk-exynos3250.c
@@ -670,73 +670,73 @@ static const struct samsung_gate_clock gate_clks[] __initconst = {
 
 /* APLL & MPLL & BPLL & UPLL */
 static const struct samsung_pll_rate_table exynos3250_pll_rates[] __initconst = {
-	PLL_35XX_RATE(1200000000, 400, 4, 1),
-	PLL_35XX_RATE(1100000000, 275, 3, 1),
-	PLL_35XX_RATE(1066000000, 533, 6, 1),
-	PLL_35XX_RATE(1000000000, 250, 3, 1),
-	PLL_35XX_RATE( 960000000, 320, 4, 1),
-	PLL_35XX_RATE( 900000000, 300, 4, 1),
-	PLL_35XX_RATE( 850000000, 425, 6, 1),
-	PLL_35XX_RATE( 800000000, 200, 3, 1),
-	PLL_35XX_RATE( 700000000, 175, 3, 1),
-	PLL_35XX_RATE( 667000000, 667, 12, 1),
-	PLL_35XX_RATE( 600000000, 400, 4, 2),
-	PLL_35XX_RATE( 533000000, 533, 6, 2),
-	PLL_35XX_RATE( 520000000, 260, 3, 2),
-	PLL_35XX_RATE( 500000000, 250, 3, 2),
-	PLL_35XX_RATE( 400000000, 200, 3, 2),
-	PLL_35XX_RATE( 200000000, 200, 3, 3),
-	PLL_35XX_RATE( 100000000, 200, 3, 4),
+	PLL_35XX_RATE(24 * MHZ, 1200000000, 400, 4, 1),
+	PLL_35XX_RATE(24 * MHZ, 1100000000, 275, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 1066000000, 533, 6, 1),
+	PLL_35XX_RATE(24 * MHZ, 1000000000, 250, 3, 1),
+	PLL_35XX_RATE(24 * MHZ,  960000000, 320, 4, 1),
+	PLL_35XX_RATE(24 * MHZ,  900000000, 300, 4, 1),
+	PLL_35XX_RATE(24 * MHZ,  850000000, 425, 6, 1),
+	PLL_35XX_RATE(24 * MHZ,  800000000, 200, 3, 1),
+	PLL_35XX_RATE(24 * MHZ,  700000000, 175, 3, 1),
+	PLL_35XX_RATE(24 * MHZ,  667000000, 667, 12, 1),
+	PLL_35XX_RATE(24 * MHZ,  600000000, 400, 4, 2),
+	PLL_35XX_RATE(24 * MHZ,  533000000, 533, 6, 2),
+	PLL_35XX_RATE(24 * MHZ,  520000000, 260, 3, 2),
+	PLL_35XX_RATE(24 * MHZ,  500000000, 250, 3, 2),
+	PLL_35XX_RATE(24 * MHZ,  400000000, 200, 3, 2),
+	PLL_35XX_RATE(24 * MHZ,  200000000, 200, 3, 3),
+	PLL_35XX_RATE(24 * MHZ,  100000000, 200, 3, 4),
 	{ /* sentinel */ }
 };
 
 /* EPLL */
 static const struct samsung_pll_rate_table exynos3250_epll_rates[] __initconst = {
-	PLL_36XX_RATE(800000000, 200, 3, 1,     0),
-	PLL_36XX_RATE(288000000,  96, 2, 2,     0),
-	PLL_36XX_RATE(192000000, 128, 2, 3,     0),
-	PLL_36XX_RATE(144000000,  96, 2, 3,     0),
-	PLL_36XX_RATE( 96000000, 128, 2, 4,     0),
-	PLL_36XX_RATE( 84000000, 112, 2, 4,     0),
-	PLL_36XX_RATE( 80000004, 106, 2, 4, 43691),
-	PLL_36XX_RATE( 73728000,  98, 2, 4, 19923),
-	PLL_36XX_RATE( 67737598, 270, 3, 5, 62285),
-	PLL_36XX_RATE( 65535999, 174, 2, 5, 49982),
-	PLL_36XX_RATE( 50000000, 200, 3, 5,     0),
-	PLL_36XX_RATE( 49152002, 131, 2, 5,  4719),
-	PLL_36XX_RATE( 48000000, 128, 2, 5,     0),
-	PLL_36XX_RATE( 45158401, 180, 3, 5, 41524),
+	PLL_36XX_RATE(24 * MHZ, 800000000, 200, 3, 1,     0),
+	PLL_36XX_RATE(24 * MHZ, 288000000,  96, 2, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 192000000, 128, 2, 3,     0),
+	PLL_36XX_RATE(24 * MHZ, 144000000,  96, 2, 3,     0),
+	PLL_36XX_RATE(24 * MHZ,  96000000, 128, 2, 4,     0),
+	PLL_36XX_RATE(24 * MHZ,  84000000, 112, 2, 4,     0),
+	PLL_36XX_RATE(24 * MHZ,  80000003, 106, 2, 4, 43691),
+	PLL_36XX_RATE(24 * MHZ,  73728000,  98, 2, 4, 19923),
+	PLL_36XX_RATE(24 * MHZ,  67737598, 270, 3, 5, 62285),
+	PLL_36XX_RATE(24 * MHZ,  65535999, 174, 2, 5, 49982),
+	PLL_36XX_RATE(24 * MHZ,  50000000, 200, 3, 5,     0),
+	PLL_36XX_RATE(24 * MHZ,  49152002, 131, 2, 5,  4719),
+	PLL_36XX_RATE(24 * MHZ,  48000000, 128, 2, 5,     0),
+	PLL_36XX_RATE(24 * MHZ,  45158401, 180, 3, 5, 41524),
 	{ /* sentinel */ }
 };
 
 /* VPLL */
 static const struct samsung_pll_rate_table exynos3250_vpll_rates[] __initconst = {
-	PLL_36XX_RATE(600000000, 100, 2, 1,     0),
-	PLL_36XX_RATE(533000000, 266, 3, 2, 32768),
-	PLL_36XX_RATE(519230987, 173, 2, 2,  5046),
-	PLL_36XX_RATE(500000000, 250, 3, 2,     0),
-	PLL_36XX_RATE(445500000, 148, 2, 2, 32768),
-	PLL_36XX_RATE(445055007, 148, 2, 2, 23047),
-	PLL_36XX_RATE(400000000, 200, 3, 2,     0),
-	PLL_36XX_RATE(371250000, 123, 2, 2, 49152),
-	PLL_36XX_RATE(370878997, 185, 3, 2, 28803),
-	PLL_36XX_RATE(340000000, 170, 3, 2,     0),
-	PLL_36XX_RATE(335000015, 111, 2, 2, 43691),
-	PLL_36XX_RATE(333000000, 111, 2, 2,     0),
-	PLL_36XX_RATE(330000000, 110, 2, 2,     0),
-	PLL_36XX_RATE(320000015, 106, 2, 2, 43691),
-	PLL_36XX_RATE(300000000, 100, 2, 2,     0),
-	PLL_36XX_RATE(275000000, 275, 3, 3,     0),
-	PLL_36XX_RATE(222750000, 148, 2, 3, 32768),
-	PLL_36XX_RATE(222528007, 148, 2, 3, 23069),
-	PLL_36XX_RATE(160000000, 160, 3, 3,     0),
-	PLL_36XX_RATE(148500000,  99, 2, 3,     0),
-	PLL_36XX_RATE(148352005,  98, 2, 3, 59070),
-	PLL_36XX_RATE(108000000, 144, 2, 4,     0),
-	PLL_36XX_RATE( 74250000,  99, 2, 4,     0),
-	PLL_36XX_RATE( 74176002,  98, 3, 4, 59070),
-	PLL_36XX_RATE( 54054000, 216, 3, 5, 14156),
-	PLL_36XX_RATE( 54000000, 144, 2, 5,     0),
+	PLL_36XX_RATE(24 * MHZ, 600000000, 100, 2, 1,     0),
+	PLL_36XX_RATE(24 * MHZ, 533000000, 266, 3, 2, 32768),
+	PLL_36XX_RATE(24 * MHZ, 519230987, 173, 2, 2,  5046),
+	PLL_36XX_RATE(24 * MHZ, 500000000, 250, 3, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 445500000, 148, 2, 2, 32768),
+	PLL_36XX_RATE(24 * MHZ, 445055007, 148, 2, 2, 23047),
+	PLL_36XX_RATE(24 * MHZ, 400000000, 200, 3, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 371250000, 123, 2, 2, 49152),
+	PLL_36XX_RATE(24 * MHZ, 370878997, 185, 3, 2, 28803),
+	PLL_36XX_RATE(24 * MHZ, 340000000, 170, 3, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 335000015, 111, 2, 2, 43691),
+	PLL_36XX_RATE(24 * MHZ, 333000000, 111, 2, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 330000000, 110, 2, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 320000015, 106, 2, 2, 43691),
+	PLL_36XX_RATE(24 * MHZ, 300000000, 100, 2, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 275000000, 275, 3, 3,     0),
+	PLL_36XX_RATE(24 * MHZ, 222750000, 148, 2, 3, 32768),
+	PLL_36XX_RATE(24 * MHZ, 222528007, 148, 2, 3, 23069),
+	PLL_36XX_RATE(24 * MHZ, 160000000, 160, 3, 3,     0),
+	PLL_36XX_RATE(24 * MHZ, 148500000,  99, 2, 3,     0),
+	PLL_36XX_RATE(24 * MHZ, 148352005,  98, 2, 3, 59070),
+	PLL_36XX_RATE(24 * MHZ, 108000000, 144, 2, 4,     0),
+	PLL_36XX_RATE(24 * MHZ,  74250000,  99, 2, 4,     0),
+	PLL_36XX_RATE(24 * MHZ,  74176002,  98, 2, 4, 59070),
+	PLL_36XX_RATE(24 * MHZ,  54054000, 216, 3, 5, 14156),
+	PLL_36XX_RATE(24 * MHZ,  54000000, 144, 2, 5,     0),
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index 134f25f2a913..0421960eb963 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c
@@ -1266,77 +1266,78 @@ static const struct of_device_id ext_clk_match[] __initconst = {
 
 /* PLLs PMS values */
 static const struct samsung_pll_rate_table exynos4210_apll_rates[] __initconst = {
-	PLL_45XX_RATE(1200000000, 150,  3, 1, 28),
-	PLL_45XX_RATE(1000000000, 250,  6, 1, 28),
-	PLL_45XX_RATE( 800000000, 200,  6, 1, 28),
-	PLL_45XX_RATE( 666857142, 389, 14, 1, 13),
-	PLL_45XX_RATE( 600000000, 100,  4, 1, 13),
-	PLL_45XX_RATE( 533000000, 533, 24, 1,  5),
-	PLL_45XX_RATE( 500000000, 250,  6, 2, 28),
-	PLL_45XX_RATE( 400000000, 200,  6, 2, 28),
-	PLL_45XX_RATE( 200000000, 200,  6, 3, 28),
+	PLL_4508_RATE(24 * MHZ, 1200000000, 150,  3, 1, 28),
+	PLL_4508_RATE(24 * MHZ, 1000000000, 250,  6, 1, 28),
+	PLL_4508_RATE(24 * MHZ,  800000000, 200,  6, 1, 28),
+	PLL_4508_RATE(24 * MHZ,  666857142, 389, 14, 1, 13),
+	PLL_4508_RATE(24 * MHZ,  600000000, 100,  4, 1, 13),
+	PLL_4508_RATE(24 * MHZ,  533000000, 533, 24, 1,  5),
+	PLL_4508_RATE(24 * MHZ,  500000000, 250,  6, 2, 28),
+	PLL_4508_RATE(24 * MHZ,  400000000, 200,  6, 2, 28),
+	PLL_4508_RATE(24 * MHZ,  200000000, 200,  6, 3, 28),
 	{ /* sentinel */ }
 };
 
 static const struct samsung_pll_rate_table exynos4210_epll_rates[] __initconst = {
-	PLL_4600_RATE(192000000, 48, 3, 1,     0, 0),
-	PLL_4600_RATE(180633605, 45, 3, 1, 10381, 0),
-	PLL_4600_RATE(180000000, 45, 3, 1,     0, 0),
-	PLL_4600_RATE( 73727996, 73, 3, 3, 47710, 1),
-	PLL_4600_RATE( 67737602, 90, 4, 3, 20762, 1),
-	PLL_4600_RATE( 49151992, 49, 3, 3,  9961, 0),
-	PLL_4600_RATE( 45158401, 45, 3, 3, 10381, 0),
+	PLL_4600_RATE(24 * MHZ, 192000000, 48, 3, 1,     0, 0),
+	PLL_4600_RATE(24 * MHZ, 180633605, 45, 3, 1, 10381, 0),
+	PLL_4600_RATE(24 * MHZ, 180000000, 45, 3, 1,     0, 0),
+	PLL_4600_RATE(24 * MHZ,  73727996, 73, 3, 3, 47710, 1),
+	PLL_4600_RATE(24 * MHZ,  67737602, 90, 4, 3, 20762, 1),
+	PLL_4600_RATE(24 * MHZ,  49151992, 49, 3, 3,  9961, 0),
+	PLL_4600_RATE(24 * MHZ,  45158401, 45, 3, 3, 10381, 0),
 	{ /* sentinel */ }
 };
 
 static const struct samsung_pll_rate_table exynos4210_vpll_rates[] __initconst = {
-	PLL_4650_RATE(360000000, 44, 3, 0, 1024, 0, 14, 0),
-	PLL_4650_RATE(324000000, 53, 2, 1, 1024, 1,  1, 1),
-	PLL_4650_RATE(259617187, 63, 3, 1, 1950, 0, 20, 1),
-	PLL_4650_RATE(110000000, 53, 3, 2, 2048, 0, 17, 0),
-	PLL_4650_RATE( 55360351, 53, 3, 3, 2417, 0, 17, 0),
+	PLL_4650_RATE(24 * MHZ, 360000000, 44, 3, 0, 1024, 0, 14, 0),
+	PLL_4650_RATE(24 * MHZ, 324000000, 53, 2, 1, 1024, 1,  1, 1),
+	PLL_4650_RATE(24 * MHZ, 259617187, 63, 3, 1, 1950, 0, 20, 1),
+	PLL_4650_RATE(24 * MHZ, 110000000, 53, 3, 2, 2048, 0, 17, 0),
+	PLL_4650_RATE(24 * MHZ,  55360351, 53, 3, 3, 2417, 0, 17, 0),
 	{ /* sentinel */ }
 };
 
 static const struct samsung_pll_rate_table exynos4x12_apll_rates[] __initconst = {
-	PLL_35XX_RATE(1704000000, 213, 3, 0),
-	PLL_35XX_RATE(1600000000, 200, 3, 0),
-	PLL_35XX_RATE(1500000000, 250, 4, 0),
-	PLL_35XX_RATE(1400000000, 175, 3, 0),
-	PLL_35XX_RATE(1300000000, 325, 6, 0),
-	PLL_35XX_RATE(1200000000, 200, 4, 0),
-	PLL_35XX_RATE(1100000000, 275, 6, 0),
-	PLL_35XX_RATE(1000000000, 125, 3, 0),
-	PLL_35XX_RATE( 900000000, 150, 4, 0),
-	PLL_35XX_RATE( 800000000, 100, 3, 0),
-	PLL_35XX_RATE( 700000000, 175, 3, 1),
-	PLL_35XX_RATE( 600000000, 200, 4, 1),
-	PLL_35XX_RATE( 500000000, 125, 3, 1),
-	PLL_35XX_RATE( 400000000, 100, 3, 1),
-	PLL_35XX_RATE( 300000000, 200, 4, 2),
-	PLL_35XX_RATE( 200000000, 100, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 1704000000, 213, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1600000000, 200, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1500000000, 250, 4, 0),
+	PLL_35XX_RATE(24 * MHZ, 1400000000, 175, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1300000000, 325, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1200000000, 200, 4, 0),
+	PLL_35XX_RATE(24 * MHZ, 1100000000, 275, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1000000000, 125, 3, 0),
+	PLL_35XX_RATE(24 * MHZ,  900000000, 150, 4, 0),
+	PLL_35XX_RATE(24 * MHZ,  800000000, 100, 3, 0),
+	PLL_35XX_RATE(24 * MHZ,  700000000, 175, 3, 1),
+	PLL_35XX_RATE(24 * MHZ,  600000000, 200, 4, 1),
+	PLL_35XX_RATE(24 * MHZ,  500000000, 125, 3, 1),
+	PLL_35XX_RATE(24 * MHZ,  400000000, 100, 3, 1),
+	PLL_35XX_RATE(24 * MHZ,  300000000, 200, 4, 2),
+	PLL_35XX_RATE(24 * MHZ,  200000000, 100, 3, 2),
 	{ /* sentinel */ }
 };
 
 static const struct samsung_pll_rate_table exynos4x12_epll_rates[] __initconst = {
-	PLL_36XX_RATE(192000000, 48, 3, 1,     0),
-	PLL_36XX_RATE(180633605, 45, 3, 1, 10381),
-	PLL_36XX_RATE(180000000, 45, 3, 1,     0),
-	PLL_36XX_RATE( 73727996, 73, 3, 3, 47710),
-	PLL_36XX_RATE( 67737602, 90, 4, 3, 20762),
-	PLL_36XX_RATE( 49151992, 49, 3, 3,  9961),
-	PLL_36XX_RATE( 45158401, 45, 3, 3, 10381),
+	PLL_36XX_RATE(24 * MHZ, 196608001, 197, 3, 3, -25690),
+	PLL_36XX_RATE(24 * MHZ, 192000000, 48, 3, 1,     0),
+	PLL_36XX_RATE(24 * MHZ, 180633605, 45, 3, 1, 10381),
+	PLL_36XX_RATE(24 * MHZ, 180000000, 45, 3, 1,     0),
+	PLL_36XX_RATE(24 * MHZ,  73727996, 73, 3, 3, 47710),
+	PLL_36XX_RATE(24 * MHZ,  67737602, 90, 4, 3, 20762),
+	PLL_36XX_RATE(24 * MHZ,  49151992, 49, 3, 3,  9961),
+	PLL_36XX_RATE(24 * MHZ,  45158401, 45, 3, 3, 10381),
 	{ /* sentinel */ }
 };
 
 static const struct samsung_pll_rate_table exynos4x12_vpll_rates[] __initconst = {
-	PLL_36XX_RATE(533000000, 133, 3, 1, 16384),
-	PLL_36XX_RATE(440000000, 110, 3, 1,     0),
-	PLL_36XX_RATE(350000000, 175, 3, 2,     0),
-	PLL_36XX_RATE(266000000, 133, 3, 2,     0),
-	PLL_36XX_RATE(160000000, 160, 3, 3,     0),
-	PLL_36XX_RATE(106031250,  53, 3, 2,  1024),
-	PLL_36XX_RATE( 53015625,  53, 3, 3,  1024),
+	PLL_36XX_RATE(24 * MHZ, 533000000, 133, 3, 1, 16384),
+	PLL_36XX_RATE(24 * MHZ, 440000000, 110, 3, 1,     0),
+	PLL_36XX_RATE(24 * MHZ, 350000000, 175, 3, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 266000000, 133, 3, 2,     0),
+	PLL_36XX_RATE(24 * MHZ, 160000000, 160, 3, 3,     0),
+	PLL_36XX_RATE(24 * MHZ, 106031250,  53, 3, 2,  1024),
+	PLL_36XX_RATE(24 * MHZ,  53015625,  53, 3, 3,  1024),
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/clk/samsung/clk-exynos5-subcmu.c b/drivers/clk/samsung/clk-exynos5-subcmu.c
new file mode 100644
index 000000000000..93306283d764
--- /dev/null
+++ b/drivers/clk/samsung/clk-exynos5-subcmu.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2018 Samsung Electronics Co., Ltd.
+// Author: Marek Szyprowski <m.szyprowski@samsung.com>
+// Common Clock Framework support for Exynos5 power-domain dependent clocks
+
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+
+#include "clk.h"
+#include "clk-exynos5-subcmu.h"
+
+static struct samsung_clk_provider *ctx;
+static const struct exynos5_subcmu_info *cmu;
+static int nr_cmus;
+
+static void exynos5_subcmu_clk_save(void __iomem *base,
+				    struct exynos5_subcmu_reg_dump *rd,
+				    unsigned int num_regs)
+{
+	for (; num_regs > 0; --num_regs, ++rd) {
+		rd->save = readl(base + rd->offset);
+		writel((rd->save & ~rd->mask) | rd->value, base + rd->offset);
+		rd->save &= rd->mask;
+	}
+};
+
+static void exynos5_subcmu_clk_restore(void __iomem *base,
+				       struct exynos5_subcmu_reg_dump *rd,
+				       unsigned int num_regs)
+{
+	for (; num_regs > 0; --num_regs, ++rd)
+		writel((readl(base + rd->offset) & ~rd->mask) | rd->save,
+		       base + rd->offset);
+}
+
+static void exynos5_subcmu_defer_gate(struct samsung_clk_provider *ctx,
+			      const struct samsung_gate_clock *list, int nr_clk)
+{
+	while (nr_clk--)
+		samsung_clk_add_lookup(ctx, ERR_PTR(-EPROBE_DEFER), list++->id);
+}
+
+/*
+ * Pass the needed clock provider context and register sub-CMU clocks
+ *
+ * NOTE: This function has to be called from the main, OF_CLK_DECLARE-
+ * initialized clock provider driver. This happens very early during boot
+ * process. Then this driver, during core_initcall registers two platform
+ * drivers: one which binds to the same device-tree node as OF_CLK_DECLARE
+ * driver and second, for handling its per-domain child-devices. Those
+ * platform drivers are bound to their devices a bit later in arch_initcall,
+ * when OF-core populates all device-tree nodes.
+ */
+void exynos5_subcmus_init(struct samsung_clk_provider *_ctx, int _nr_cmus,
+			  const struct exynos5_subcmu_info *_cmu)
+{
+	ctx = _ctx;
+	cmu = _cmu;
+	nr_cmus = _nr_cmus;
+
+	for (; _nr_cmus--; _cmu++) {
+		exynos5_subcmu_defer_gate(ctx, _cmu->gate_clks,
+					  _cmu->nr_gate_clks);
+		exynos5_subcmu_clk_save(ctx->reg_base, _cmu->suspend_regs,
+					_cmu->nr_suspend_regs);
+	}
+}
+
+static int __maybe_unused exynos5_subcmu_suspend(struct device *dev)
+{
+	struct exynos5_subcmu_info *info = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	exynos5_subcmu_clk_save(ctx->reg_base, info->suspend_regs,
+				info->nr_suspend_regs);
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	return 0;
+}
+
+static int __maybe_unused exynos5_subcmu_resume(struct device *dev)
+{
+	struct exynos5_subcmu_info *info = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	exynos5_subcmu_clk_restore(ctx->reg_base, info->suspend_regs,
+				   info->nr_suspend_regs);
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	return 0;
+}
+
+static int __init exynos5_subcmu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct exynos5_subcmu_info *info = dev_get_drvdata(dev);
+
+	pm_runtime_set_suspended(dev);
+	pm_runtime_enable(dev);
+	pm_runtime_get(dev);
+
+	ctx->dev = dev;
+	samsung_clk_register_div(ctx, info->div_clks, info->nr_div_clks);
+	samsung_clk_register_gate(ctx, info->gate_clks, info->nr_gate_clks);
+	ctx->dev = NULL;
+
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops exynos5_subcmu_pm_ops = {
+	SET_RUNTIME_PM_OPS(exynos5_subcmu_suspend,
+			   exynos5_subcmu_resume, NULL)
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				     pm_runtime_force_resume)
+};
+
+static struct platform_driver exynos5_subcmu_driver __refdata = {
+	.driver	= {
+		.name = "exynos5-subcmu",
+		.suppress_bind_attrs = true,
+		.pm = &exynos5_subcmu_pm_ops,
+	},
+	.probe = exynos5_subcmu_probe,
+};
+
+static int __init exynos5_clk_register_subcmu(struct device *parent,
+					 const struct exynos5_subcmu_info *info,
+					      struct device_node *pd_node)
+{
+	struct of_phandle_args genpdspec = { .np = pd_node };
+	struct platform_device *pdev;
+
+	pdev = platform_device_alloc(info->pd_name, -1);
+	pdev->dev.parent = parent;
+	pdev->driver_override = "exynos5-subcmu";
+	platform_set_drvdata(pdev, (void *)info);
+	of_genpd_add_device(&genpdspec, &pdev->dev);
+	platform_device_add(pdev);
+
+	return 0;
+}
+
+static int __init exynos5_clk_probe(struct platform_device *pdev)
+{
+	struct device_node *np;
+	const char *name;
+	int i;
+
+	for_each_compatible_node(np, NULL, "samsung,exynos4210-pd") {
+		if (of_property_read_string(np, "label", &name) < 0)
+			continue;
+		for (i = 0; i < nr_cmus; i++)
+			if (strcmp(cmu[i].pd_name, name) == 0)
+				exynos5_clk_register_subcmu(&pdev->dev,
+							    &cmu[i], np);
+	}
+	return 0;
+}
+
+static const struct of_device_id exynos5_clk_of_match[] = {
+	{ .compatible = "samsung,exynos5250-clock", },
+	{ .compatible = "samsung,exynos5420-clock", },
+	{ .compatible = "samsung,exynos5800-clock", },
+	{ },
+};
+
+static struct platform_driver exynos5_clk_driver __refdata = {
+	.driver	= {
+		.name = "exynos5-clock",
+		.of_match_table = exynos5_clk_of_match,
+		.suppress_bind_attrs = true,
+	},
+	.probe = exynos5_clk_probe,
+};
+
+static int __init exynos5_clk_drv_init(void)
+{
+	platform_driver_register(&exynos5_clk_driver);
+	platform_driver_register(&exynos5_subcmu_driver);
+	return 0;
+}
+core_initcall(exynos5_clk_drv_init);
diff --git a/drivers/clk/samsung/clk-exynos5-subcmu.h b/drivers/clk/samsung/clk-exynos5-subcmu.h
new file mode 100644
index 000000000000..755ee8aaa3de
--- /dev/null
+++ b/drivers/clk/samsung/clk-exynos5-subcmu.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __CLK_EXYNOS5_SUBCMU_H
+#define __CLK_EXYNOS5_SUBCMU_H
+
+struct exynos5_subcmu_reg_dump {
+	u32 offset;
+	u32 value;
+	u32 mask;
+	u32 save;
+};
+
+struct exynos5_subcmu_info {
+	const struct samsung_div_clock *div_clks;
+	unsigned int nr_div_clks;
+	const struct samsung_gate_clock *gate_clks;
+	unsigned int nr_gate_clks;
+	struct exynos5_subcmu_reg_dump *suspend_regs;
+	unsigned int nr_suspend_regs;
+	const char *pd_name;
+};
+
+void exynos5_subcmus_init(struct samsung_clk_provider *ctx, int nr_cmus,
+			  const struct exynos5_subcmu_info *cmu);
+
+#endif
diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c
index 9b073c98a891..347fd80c351b 100644
--- a/drivers/clk/samsung/clk-exynos5250.c
+++ b/drivers/clk/samsung/clk-exynos5250.c
@@ -18,6 +18,7 @@
 
 #include "clk.h"
 #include "clk-cpu.h"
+#include "clk-exynos5-subcmu.h"
 
 #define APLL_LOCK		0x0
 #define APLL_CON0		0x100
@@ -560,6 +561,8 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = {
 		0),
 	GATE(CLK_GSCL3, "gscl3", "mout_aclk266_gscl_sub", GATE_IP_GSCL, 3, 0,
 		0),
+	GATE(CLK_CAMIF_TOP, "camif_top", "mout_aclk266_gscl_sub",
+			GATE_IP_GSCL, 4, 0, 0),
 	GATE(CLK_GSCL_WA, "gscl_wa", "div_gscl_wa", GATE_IP_GSCL, 5, 0, 0),
 	GATE(CLK_GSCL_WB, "gscl_wb", "div_gscl_wb", GATE_IP_GSCL, 6, 0, 0),
 	GATE(CLK_SMMU_GSCL0, "smmu_gscl0", "mout_aclk266_gscl_sub",
@@ -570,18 +573,11 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = {
 			GATE_IP_GSCL, 9, 0, 0),
 	GATE(CLK_SMMU_GSCL3, "smmu_gscl3", "mout_aclk266_gscl_sub",
 			GATE_IP_GSCL, 10, 0, 0),
+	GATE(CLK_SMMU_FIMC_LITE0, "smmu_fimc_lite0", "mout_aclk266_gscl_sub",
+			GATE_IP_GSCL, 11, 0, 0),
+	GATE(CLK_SMMU_FIMC_LITE1, "smmu_fimc_lite1", "mout_aclk266_gscl_sub",
+			GATE_IP_GSCL, 12, 0, 0),
 
-	GATE(CLK_FIMD1, "fimd1", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 0, 0,
-		0),
-	GATE(CLK_MIE1, "mie1", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 1, 0,
-		0),
-	GATE(CLK_DSIM0, "dsim0", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 3, 0,
-		0),
-	GATE(CLK_DP, "dp", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 4, 0, 0),
-	GATE(CLK_MIXER, "mixer", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 5, 0,
-		0),
-	GATE(CLK_HDMI, "hdmi", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 6, 0,
-		0),
 
 	GATE(CLK_MFC, "mfc", "mout_aclk333_sub", GATE_IP_MFC, 0, 0, 0),
 	GATE(CLK_SMMU_MFCR, "smmu_mfcr", "mout_aclk333_sub", GATE_IP_MFC, 1, 0,
@@ -671,10 +667,6 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = {
 	GATE(CLK_WDT, "wdt", "div_aclk66", GATE_IP_PERIS, 19, 0, 0),
 	GATE(CLK_RTC, "rtc", "div_aclk66", GATE_IP_PERIS, 20, 0, 0),
 	GATE(CLK_TMU, "tmu", "div_aclk66", GATE_IP_PERIS, 21, 0, 0),
-	GATE(CLK_SMMU_TV, "smmu_tv", "mout_aclk200_disp1_sub",
-			GATE_IP_DISP1, 9, 0, 0),
-	GATE(CLK_SMMU_FIMD1, "smmu_fimd1", "mout_aclk200_disp1_sub",
-			GATE_IP_DISP1, 8, 0, 0),
 	GATE(CLK_SMMU_2D, "smmu_2d", "div_aclk200", GATE_IP_ACP, 7, 0, 0),
 	GATE(CLK_SMMU_FIMC_ISP, "smmu_fimc_isp", "mout_aclk_266_isp_sub",
 			GATE_IP_ISP0, 8, 0, 0),
@@ -698,48 +690,80 @@ static const struct samsung_gate_clock exynos5250_gate_clks[] __initconst = {
 			GATE_IP_ISP1, 7, 0, 0),
 };
 
+static const struct samsung_gate_clock exynos5250_disp_gate_clks[] __initconst = {
+	GATE(CLK_FIMD1, "fimd1", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 0, 0,
+		0),
+	GATE(CLK_MIE1, "mie1", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 1, 0,
+		0),
+	GATE(CLK_DSIM0, "dsim0", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 3, 0,
+		0),
+	GATE(CLK_DP, "dp", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 4, 0, 0),
+	GATE(CLK_MIXER, "mixer", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 5, 0,
+		0),
+	GATE(CLK_HDMI, "hdmi", "mout_aclk200_disp1_sub", GATE_IP_DISP1, 6, 0,
+		0),
+	GATE(CLK_SMMU_TV, "smmu_tv", "mout_aclk200_disp1_sub",
+			GATE_IP_DISP1, 9, 0, 0),
+	GATE(CLK_SMMU_FIMD1, "smmu_fimd1", "mout_aclk200_disp1_sub",
+			GATE_IP_DISP1, 8, 0, 0),
+};
+
+static struct exynos5_subcmu_reg_dump exynos5250_disp_suspend_regs[] = {
+	{ GATE_IP_DISP1, 0xffffffff, 0xffffffff }, /* DISP1 gates */
+	{ SRC_TOP3, 0, BIT(4) },	/* MUX mout_aclk200_disp1_sub */
+	{ SRC_TOP3, 0, BIT(6) },	/* MUX mout_aclk300_disp1_sub */
+};
+
+static const struct exynos5_subcmu_info exynos5250_disp_subcmu = {
+	.gate_clks	= exynos5250_disp_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(exynos5250_disp_gate_clks),
+	.suspend_regs	= exynos5250_disp_suspend_regs,
+	.nr_suspend_regs = ARRAY_SIZE(exynos5250_disp_suspend_regs),
+	.pd_name	= "DISP1",
+};
+
 static const struct samsung_pll_rate_table vpll_24mhz_tbl[] __initconst = {
 	/* sorted in descending order */
 	/* PLL_36XX_RATE(rate, m, p, s, k) */
-	PLL_36XX_RATE(266000000, 266, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 266000000, 266, 3, 3, 0),
 	/* Not in UM, but need for eDP on snow */
-	PLL_36XX_RATE(70500000, 94, 2, 4, 0),
+	PLL_36XX_RATE(24 * MHZ, 70500000, 94, 2, 4, 0),
 	{ },
 };
 
 static const struct samsung_pll_rate_table epll_24mhz_tbl[] __initconst = {
 	/* sorted in descending order */
 	/* PLL_36XX_RATE(rate, m, p, s, k) */
-	PLL_36XX_RATE(192000000, 64, 2, 2, 0),
-	PLL_36XX_RATE(180633600, 90, 3, 2, 20762),
-	PLL_36XX_RATE(180000000, 90, 3, 2, 0),
-	PLL_36XX_RATE(73728000, 98, 2, 4, 19923),
-	PLL_36XX_RATE(67737600, 90, 2, 4, 20762),
-	PLL_36XX_RATE(49152000, 98, 3, 4, 19923),
-	PLL_36XX_RATE(45158400, 90, 3, 4, 20762),
-	PLL_36XX_RATE(32768000, 131, 3, 5, 4719),
+	PLL_36XX_RATE(24 * MHZ, 192000000, 64, 2, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 180633605, 90, 3, 2, 20762),
+	PLL_36XX_RATE(24 * MHZ, 180000000, 90, 3, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 73728000, 98, 2, 4, 19923),
+	PLL_36XX_RATE(24 * MHZ, 67737602, 90, 2, 4, 20762),
+	PLL_36XX_RATE(24 * MHZ, 49152000, 98, 3, 4, 19923),
+	PLL_36XX_RATE(24 * MHZ, 45158401, 90, 3, 4, 20762),
+	PLL_36XX_RATE(24 * MHZ, 32768001, 131, 3, 5, 4719),
 	{ },
 };
 
 static const struct samsung_pll_rate_table apll_24mhz_tbl[] __initconst = {
 	/* sorted in descending order */
-	/* PLL_35XX_RATE(rate, m, p, s) */
-	PLL_35XX_RATE(1700000000, 425, 6, 0),
-	PLL_35XX_RATE(1600000000, 200, 3, 0),
-	PLL_35XX_RATE(1500000000, 250, 4, 0),
-	PLL_35XX_RATE(1400000000, 175, 3, 0),
-	PLL_35XX_RATE(1300000000, 325, 6, 0),
-	PLL_35XX_RATE(1200000000, 200, 4, 0),
-	PLL_35XX_RATE(1100000000, 275, 6, 0),
-	PLL_35XX_RATE(1000000000, 125, 3, 0),
-	PLL_35XX_RATE(900000000, 150, 4, 0),
-	PLL_35XX_RATE(800000000, 100, 3, 0),
-	PLL_35XX_RATE(700000000, 175, 3, 1),
-	PLL_35XX_RATE(600000000, 200, 4, 1),
-	PLL_35XX_RATE(500000000, 125, 3, 1),
-	PLL_35XX_RATE(400000000, 100, 3, 1),
-	PLL_35XX_RATE(300000000, 200, 4, 2),
-	PLL_35XX_RATE(200000000, 100, 3, 2),
+	/* PLL_35XX_RATE(fin, rate, m, p, s) */
+	PLL_35XX_RATE(24 * MHZ, 1700000000, 425, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1600000000, 200, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1500000000, 250, 4, 0),
+	PLL_35XX_RATE(24 * MHZ, 1400000000, 175, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1300000000, 325, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1200000000, 200, 4, 0),
+	PLL_35XX_RATE(24 * MHZ, 1100000000, 275, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1000000000, 125, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 900000000, 150, 4, 0),
+	PLL_35XX_RATE(24 * MHZ, 800000000, 100, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 700000000, 175, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 600000000, 200, 4, 1),
+	PLL_35XX_RATE(24 * MHZ, 500000000, 125, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 400000000, 100, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 300000000, 200, 4, 2),
+	PLL_35XX_RATE(24 * MHZ, 200000000, 100, 3, 2),
 };
 
 static struct samsung_pll_clock exynos5250_plls[nr_plls] __initdata = {
@@ -859,10 +883,11 @@ static void __init exynos5250_clk_init(struct device_node *np)
 	__raw_writel(tmp, reg_base + PWR_CTRL2);
 
 	exynos5250_clk_sleep_init();
+	exynos5_subcmus_init(ctx, 1, &exynos5250_disp_subcmu);
 
 	samsung_clk_of_add_provider(np, ctx);
 
 	pr_info("Exynos5250: clock setup completed, armclk=%ld\n",
 			_get_rate("div_arm2"));
 }
-CLK_OF_DECLARE(exynos5250_clk, "samsung,exynos5250-clock", exynos5250_clk_init);
+CLK_OF_DECLARE_DRIVER(exynos5250_clk, "samsung,exynos5250-clock", exynos5250_clk_init);
diff --git a/drivers/clk/samsung/clk-exynos5260.c b/drivers/clk/samsung/clk-exynos5260.c
index fd1d9bfc151b..2cc2583abd87 100644
--- a/drivers/clk/samsung/clk-exynos5260.c
+++ b/drivers/clk/samsung/clk-exynos5260.c
@@ -23,57 +23,57 @@
  * DISP_PLL, EGL_PLL, KFC_PLL, MEM_PLL, BUS_PLL, MEDIA_PLL, G3D_PLL.
  */
 static const struct samsung_pll_rate_table pll2550_24mhz_tbl[] __initconst = {
-	PLL_35XX_RATE(1700000000, 425, 6, 0),
-	PLL_35XX_RATE(1600000000, 200, 3, 0),
-	PLL_35XX_RATE(1500000000, 250, 4, 0),
-	PLL_35XX_RATE(1400000000, 175, 3, 0),
-	PLL_35XX_RATE(1300000000, 325, 6, 0),
-	PLL_35XX_RATE(1200000000, 400, 4, 1),
-	PLL_35XX_RATE(1100000000, 275, 3, 1),
-	PLL_35XX_RATE(1000000000, 250, 3, 1),
-	PLL_35XX_RATE(933000000, 311, 4, 1),
-	PLL_35XX_RATE(900000000, 300, 4, 1),
-	PLL_35XX_RATE(800000000, 200, 3, 1),
-	PLL_35XX_RATE(733000000, 733, 12, 1),
-	PLL_35XX_RATE(700000000, 175, 3, 1),
-	PLL_35XX_RATE(667000000, 667, 12, 1),
-	PLL_35XX_RATE(633000000, 211, 4, 1),
-	PLL_35XX_RATE(620000000, 310, 3, 2),
-	PLL_35XX_RATE(600000000, 400, 4, 2),
-	PLL_35XX_RATE(543000000, 362, 4, 2),
-	PLL_35XX_RATE(533000000, 533, 6, 2),
-	PLL_35XX_RATE(500000000, 250, 3, 2),
-	PLL_35XX_RATE(450000000, 300, 4, 2),
-	PLL_35XX_RATE(400000000, 200, 3, 2),
-	PLL_35XX_RATE(350000000, 175, 3, 2),
-	PLL_35XX_RATE(300000000, 400, 4, 3),
-	PLL_35XX_RATE(266000000, 266, 3, 3),
-	PLL_35XX_RATE(200000000, 200, 3, 3),
-	PLL_35XX_RATE(160000000, 160, 3, 3),
+	PLL_35XX_RATE(24 * MHZ, 1700000000, 425, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1600000000, 200, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1500000000, 250, 4, 0),
+	PLL_35XX_RATE(24 * MHZ, 1400000000, 175, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1300000000, 325, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1200000000, 400, 4, 1),
+	PLL_35XX_RATE(24 * MHZ, 1100000000, 275, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 1000000000, 250, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 933000000, 311, 4, 1),
+	PLL_35XX_RATE(24 * MHZ, 900000000, 300, 4, 1),
+	PLL_35XX_RATE(24 * MHZ, 800000000, 200, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 733000000, 733, 12, 1),
+	PLL_35XX_RATE(24 * MHZ, 700000000, 175, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 667000000, 667, 12, 1),
+	PLL_35XX_RATE(24 * MHZ, 633000000, 211, 4, 1),
+	PLL_35XX_RATE(24 * MHZ, 620000000, 310, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 600000000, 400, 4, 2),
+	PLL_35XX_RATE(24 * MHZ, 543000000, 362, 4, 2),
+	PLL_35XX_RATE(24 * MHZ, 533000000, 533, 6, 2),
+	PLL_35XX_RATE(24 * MHZ, 500000000, 250, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 450000000, 300, 4, 2),
+	PLL_35XX_RATE(24 * MHZ, 400000000, 200, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 350000000, 175, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 300000000, 400, 4, 3),
+	PLL_35XX_RATE(24 * MHZ, 266000000, 266, 3, 3),
+	PLL_35XX_RATE(24 * MHZ, 200000000, 200, 3, 3),
+	PLL_35XX_RATE(24 * MHZ, 160000000, 160, 3, 3),
 };
 
 /*
  * Applicable for 2650 Type PLL for AUD_PLL.
  */
 static const struct samsung_pll_rate_table pll2650_24mhz_tbl[] __initconst = {
-	PLL_36XX_RATE(1600000000, 200, 3, 0, 0),
-	PLL_36XX_RATE(1200000000, 100, 2, 0, 0),
-	PLL_36XX_RATE(1000000000, 250, 3, 1, 0),
-	PLL_36XX_RATE(800000000, 200, 3, 1, 0),
-	PLL_36XX_RATE(600000000, 100, 2, 1, 0),
-	PLL_36XX_RATE(532000000, 266, 3, 2, 0),
-	PLL_36XX_RATE(480000000, 160, 2, 2, 0),
-	PLL_36XX_RATE(432000000, 144, 2, 2, 0),
-	PLL_36XX_RATE(400000000, 200, 3, 2, 0),
-	PLL_36XX_RATE(394073130, 459, 7, 2, 49282),
-	PLL_36XX_RATE(333000000, 111, 2, 2, 0),
-	PLL_36XX_RATE(300000000, 100, 2, 2, 0),
-	PLL_36XX_RATE(266000000, 266, 3, 3, 0),
-	PLL_36XX_RATE(200000000, 200, 3, 3, 0),
-	PLL_36XX_RATE(166000000, 166, 3, 3, 0),
-	PLL_36XX_RATE(133000000, 266, 3, 4, 0),
-	PLL_36XX_RATE(100000000, 200, 3, 4, 0),
-	PLL_36XX_RATE(66000000, 176, 2, 5, 0),
+	PLL_36XX_RATE(24 * MHZ, 1600000000, 200, 3, 0, 0),
+	PLL_36XX_RATE(24 * MHZ, 1200000000, 100, 2, 0, 0),
+	PLL_36XX_RATE(24 * MHZ, 1000000000, 250, 3, 1, 0),
+	PLL_36XX_RATE(24 * MHZ, 800000000, 200, 3, 1, 0),
+	PLL_36XX_RATE(24 * MHZ, 600000000, 100, 2, 1, 0),
+	PLL_36XX_RATE(24 * MHZ, 532000000, 266, 3, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 480000000, 160, 2, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 432000000, 144, 2, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 400000000, 200, 3, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 394073128, 459, 7, 2, 49282),
+	PLL_36XX_RATE(24 * MHZ, 333000000, 111, 2, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 300000000, 100, 2, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 266000000, 266, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 200000000, 200, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 166000000, 166, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 133000000, 266, 3, 4, 0),
+	PLL_36XX_RATE(24 * MHZ, 100000000, 200, 3, 4, 0),
+	PLL_36XX_RATE(24 * MHZ, 66000000, 176, 2, 5, 0),
 };
 
 /* CMU_AUD */
diff --git a/drivers/clk/samsung/clk-exynos5410.c b/drivers/clk/samsung/clk-exynos5410.c
index fc471a49e8f4..0a0b09591e6f 100644
--- a/drivers/clk/samsung/clk-exynos5410.c
+++ b/drivers/clk/samsung/clk-exynos5410.c
@@ -226,16 +226,16 @@ static const struct samsung_gate_clock exynos5410_gate_clks[] __initconst = {
 };
 
 static const struct samsung_pll_rate_table exynos5410_pll2550x_24mhz_tbl[] __initconst = {
-	PLL_36XX_RATE(400000000U, 200, 3, 2, 0),
-	PLL_36XX_RATE(333000000U, 111, 2, 2, 0),
-	PLL_36XX_RATE(300000000U, 100, 2, 2, 0),
-	PLL_36XX_RATE(266000000U, 266, 3, 3, 0),
-	PLL_36XX_RATE(200000000U, 200, 3, 3, 0),
-	PLL_36XX_RATE(192000000U, 192, 3, 3, 0),
-	PLL_36XX_RATE(166000000U, 166, 3, 3, 0),
-	PLL_36XX_RATE(133000000U, 266, 3, 4, 0),
-	PLL_36XX_RATE(100000000U, 200, 3, 4, 0),
-	PLL_36XX_RATE(66000000U,  176, 2, 5, 0),
+	PLL_36XX_RATE(24 * MHZ, 400000000U, 200, 3, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 333000000U, 111, 2, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 300000000U, 100, 2, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 266000000U, 266, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 200000000U, 200, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 192000000U, 192, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 166000000U, 166, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 133000000U, 266, 3, 4, 0),
+	PLL_36XX_RATE(24 * MHZ, 100000000U, 200, 3, 4, 0),
+	PLL_36XX_RATE(24 * MHZ, 66000000U,  176, 2, 5, 0),
 };
 
 static struct samsung_pll_clock exynos5410_plls[nr_plls] __initdata = {
diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index 45d34f601e9e..95e1bf69449b 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -19,6 +19,7 @@
 
 #include "clk.h"
 #include "clk-cpu.h"
+#include "clk-exynos5-subcmu.h"
 
 #define APLL_LOCK		0x0
 #define APLL_CON0		0x100
@@ -620,7 +621,8 @@ static const struct samsung_mux_clock exynos5420_mux_clks[] __initconst = {
 
 	MUX(CLK_MOUT_MX_MSPLL_CCORE, "mout_mx_mspll_ccore",
 			mout_group5_5800_p, SRC_TOP7, 16, 2),
-	MUX(0, "mout_mau_epll_clk", mout_mau_epll_clk_p, SRC_TOP7, 20, 2),
+	MUX_F(0, "mout_mau_epll_clk", mout_mau_epll_clk_p, SRC_TOP7, 20, 2,
+	      CLK_SET_RATE_PARENT, 0),
 
 	MUX(0, "mout_fimd1", mout_group3_p, SRC_DISP10, 4, 1),
 };
@@ -863,7 +865,6 @@ static const struct samsung_div_clock exynos5x_div_clks[] __initconst = {
 	DIV(0, "dout_mipi1", "mout_mipi1", DIV_DISP10, 16, 8),
 	DIV(0, "dout_dp1", "mout_dp1", DIV_DISP10, 24, 4),
 	DIV(CLK_DOUT_PIXEL, "dout_hdmi_pixel", "mout_pixel", DIV_DISP10, 28, 4),
-	DIV(0, "dout_disp1_blk", "aclk200_disp1", DIV2_RATIO0, 16, 2),
 	DIV(CLK_DOUT_ACLK400_DISP1, "dout_aclk400_disp1",
 			"mout_aclk400_disp1", DIV_TOP2, 4, 3),
 
@@ -912,8 +913,6 @@ static const struct samsung_div_clock exynos5x_div_clks[] __initconst = {
 	DIV(0, "dout_spi1", "mout_spi1", DIV_PERIC1, 24, 4),
 	DIV(0, "dout_spi2", "mout_spi2", DIV_PERIC1, 28, 4),
 
-	/* Mfc Block */
-	DIV(0, "dout_mfc_blk", "mout_user_aclk333", DIV4_RATIO, 0, 2),
 
 	/* PCM */
 	DIV(0, "dout_pcm1", "dout_audio1", DIV_PERIC2, 16, 8),
@@ -932,8 +931,6 @@ static const struct samsung_div_clock exynos5x_div_clks[] __initconst = {
 	DIV(0, "dout_spi2_pre", "dout_spi2", DIV_PERIC4, 24, 8),
 
 	/* GSCL Block */
-	DIV(0, "dout_gscl_blk_300", "mout_user_aclk300_gscl",
-			DIV2_RATIO0, 4, 2),
 	DIV(0, "dout_gscl_blk_333", "aclk333_432_gscl", DIV2_RATIO0, 6, 2),
 
 	/* MSCL Block */
@@ -1190,8 +1187,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = {
 	GATE(CLK_SCLK_GSCL_WB, "sclk_gscl_wb", "mout_user_aclk333_432_gscl",
 			GATE_TOP_SCLK_GSCL, 7, 0, 0),
 
-	GATE(CLK_GSCL0, "gscl0", "aclk300_gscl", GATE_IP_GSCL0, 0, 0, 0),
-	GATE(CLK_GSCL1, "gscl1", "aclk300_gscl", GATE_IP_GSCL0, 1, 0, 0),
 	GATE(CLK_FIMC_3AA, "fimc_3aa", "aclk333_432_gscl",
 			GATE_IP_GSCL0, 4, 0, 0),
 	GATE(CLK_FIMC_LITE0, "fimc_lite0", "aclk333_432_gscl",
@@ -1205,10 +1200,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = {
 			GATE_IP_GSCL1, 3, 0, 0),
 	GATE(CLK_SMMU_FIMCL1, "smmu_fimcl1", "dout_gscl_blk_333",
 			GATE_IP_GSCL1, 4, 0, 0),
-	GATE(CLK_SMMU_GSCL0, "smmu_gscl0", "dout_gscl_blk_300",
-			GATE_IP_GSCL1, 6, 0, 0),
-	GATE(CLK_SMMU_GSCL1, "smmu_gscl1", "dout_gscl_blk_300",
-			GATE_IP_GSCL1, 7, 0, 0),
 	GATE(CLK_GSCL_WA, "gscl_wa", "sclk_gscl_wa", GATE_IP_GSCL1, 12, 0, 0),
 	GATE(CLK_GSCL_WB, "gscl_wb", "sclk_gscl_wb", GATE_IP_GSCL1, 13, 0, 0),
 	GATE(CLK_SMMU_FIMCL3, "smmu_fimcl3,", "dout_gscl_blk_333",
@@ -1227,18 +1218,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = {
 	GATE(CLK_SMMU_MSCL2, "smmu_mscl2", "dout_mscl_blk",
 			GATE_IP_MSCL, 10, 0, 0),
 
-	GATE(CLK_FIMD1, "fimd1", "aclk300_disp1", GATE_IP_DISP1, 0, 0, 0),
-	GATE(CLK_DSIM1, "dsim1", "aclk200_disp1", GATE_IP_DISP1, 3, 0, 0),
-	GATE(CLK_DP1, "dp1", "aclk200_disp1", GATE_IP_DISP1, 4, 0, 0),
-	GATE(CLK_MIXER, "mixer", "aclk200_disp1", GATE_IP_DISP1, 5, 0, 0),
-	GATE(CLK_HDMI, "hdmi", "aclk200_disp1", GATE_IP_DISP1, 6, 0, 0),
-	GATE(CLK_SMMU_FIMD1M0, "smmu_fimd1m0", "dout_disp1_blk",
-			GATE_IP_DISP1, 7, 0, 0),
-	GATE(CLK_SMMU_FIMD1M1, "smmu_fimd1m1", "dout_disp1_blk",
-			GATE_IP_DISP1, 8, 0, 0),
-	GATE(CLK_SMMU_MIXER, "smmu_mixer", "aclk200_disp1",
-			GATE_IP_DISP1, 9, 0, 0),
-
 	/* ISP */
 	GATE(CLK_SCLK_UART_ISP, "sclk_uart_isp", "dout_uart_isp",
 			GATE_TOP_SCLK_ISP, 0, CLK_SET_RATE_PARENT, 0),
@@ -1255,48 +1234,138 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = {
 	GATE(CLK_SCLK_ISP_SENSOR2, "sclk_isp_sensor2", "dout_isp_sensor2",
 			GATE_TOP_SCLK_ISP, 12, CLK_SET_RATE_PARENT, 0),
 
+	GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0),
+};
+
+static const struct samsung_div_clock exynos5x_disp_div_clks[] __initconst = {
+	DIV(0, "dout_disp1_blk", "aclk200_disp1", DIV2_RATIO0, 16, 2),
+};
+
+static const struct samsung_gate_clock exynos5x_disp_gate_clks[] __initconst = {
+	GATE(CLK_FIMD1, "fimd1", "aclk300_disp1", GATE_IP_DISP1, 0, 0, 0),
+	GATE(CLK_DSIM1, "dsim1", "aclk200_disp1", GATE_IP_DISP1, 3, 0, 0),
+	GATE(CLK_DP1, "dp1", "aclk200_disp1", GATE_IP_DISP1, 4, 0, 0),
+	GATE(CLK_MIXER, "mixer", "aclk200_disp1", GATE_IP_DISP1, 5, 0, 0),
+	GATE(CLK_HDMI, "hdmi", "aclk200_disp1", GATE_IP_DISP1, 6, 0, 0),
+	GATE(CLK_SMMU_FIMD1M0, "smmu_fimd1m0", "dout_disp1_blk",
+			GATE_IP_DISP1, 7, 0, 0),
+	GATE(CLK_SMMU_FIMD1M1, "smmu_fimd1m1", "dout_disp1_blk",
+			GATE_IP_DISP1, 8, 0, 0),
+	GATE(CLK_SMMU_MIXER, "smmu_mixer", "aclk200_disp1",
+			GATE_IP_DISP1, 9, 0, 0),
+};
+
+static struct exynos5_subcmu_reg_dump exynos5x_disp_suspend_regs[] = {
+	{ GATE_IP_DISP1, 0xffffffff, 0xffffffff }, /* DISP1 gates */
+	{ SRC_TOP5, 0, BIT(0) },	/* MUX mout_user_aclk400_disp1 */
+	{ SRC_TOP5, 0, BIT(24) },	/* MUX mout_user_aclk300_disp1 */
+	{ SRC_TOP3, 0, BIT(8) },	/* MUX mout_user_aclk200_disp1 */
+	{ DIV2_RATIO0, 0, 0x30000 },		/* DIV dout_disp1_blk */
+};
+
+static const struct samsung_div_clock exynos5x_gsc_div_clks[] __initconst = {
+	DIV(0, "dout_gscl_blk_300", "mout_user_aclk300_gscl",
+			DIV2_RATIO0, 4, 2),
+};
+
+static const struct samsung_gate_clock exynos5x_gsc_gate_clks[] __initconst = {
+	GATE(CLK_GSCL0, "gscl0", "aclk300_gscl", GATE_IP_GSCL0, 0, 0, 0),
+	GATE(CLK_GSCL1, "gscl1", "aclk300_gscl", GATE_IP_GSCL0, 1, 0, 0),
+	GATE(CLK_SMMU_GSCL0, "smmu_gscl0", "dout_gscl_blk_300",
+			GATE_IP_GSCL1, 6, 0, 0),
+	GATE(CLK_SMMU_GSCL1, "smmu_gscl1", "dout_gscl_blk_300",
+			GATE_IP_GSCL1, 7, 0, 0),
+};
+
+static struct exynos5_subcmu_reg_dump exynos5x_gsc_suspend_regs[] = {
+	{ GATE_IP_GSCL0, 0x3, 0x3 },	/* GSC gates */
+	{ GATE_IP_GSCL1, 0xc0, 0xc0 },	/* GSC gates */
+	{ SRC_TOP5, 0, BIT(28) },	/* MUX mout_user_aclk300_gscl */
+	{ DIV2_RATIO0, 0, 0x30 },	/* DIV dout_gscl_blk_300 */
+};
+
+static const struct samsung_div_clock exynos5x_mfc_div_clks[] __initconst = {
+	DIV(0, "dout_mfc_blk", "mout_user_aclk333", DIV4_RATIO, 0, 2),
+};
+
+static const struct samsung_gate_clock exynos5x_mfc_gate_clks[] __initconst = {
 	GATE(CLK_MFC, "mfc", "aclk333", GATE_IP_MFC, 0, 0, 0),
 	GATE(CLK_SMMU_MFCL, "smmu_mfcl", "dout_mfc_blk", GATE_IP_MFC, 1, 0, 0),
 	GATE(CLK_SMMU_MFCR, "smmu_mfcr", "dout_mfc_blk", GATE_IP_MFC, 2, 0, 0),
+};
 
-	GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0),
+static struct exynos5_subcmu_reg_dump exynos5x_mfc_suspend_regs[] = {
+	{ GATE_IP_MFC, 0xffffffff, 0xffffffff }, /* MFC gates */
+	{ SRC_TOP4, 0, BIT(28) },		/* MUX mout_user_aclk333 */
+	{ DIV4_RATIO, 0, 0x3 },			/* DIV dout_mfc_blk */
+};
+
+static const struct exynos5_subcmu_info exynos5x_subcmus[] = {
+	{
+		.div_clks	= exynos5x_disp_div_clks,
+		.nr_div_clks	= ARRAY_SIZE(exynos5x_disp_div_clks),
+		.gate_clks	= exynos5x_disp_gate_clks,
+		.nr_gate_clks	= ARRAY_SIZE(exynos5x_disp_gate_clks),
+		.suspend_regs	= exynos5x_disp_suspend_regs,
+		.nr_suspend_regs = ARRAY_SIZE(exynos5x_disp_suspend_regs),
+		.pd_name	= "DISP",
+	}, {
+		.div_clks	= exynos5x_gsc_div_clks,
+		.nr_div_clks	= ARRAY_SIZE(exynos5x_gsc_div_clks),
+		.gate_clks	= exynos5x_gsc_gate_clks,
+		.nr_gate_clks	= ARRAY_SIZE(exynos5x_gsc_gate_clks),
+		.suspend_regs	= exynos5x_gsc_suspend_regs,
+		.nr_suspend_regs = ARRAY_SIZE(exynos5x_gsc_suspend_regs),
+		.pd_name	= "GSC",
+	}, {
+		.div_clks	= exynos5x_mfc_div_clks,
+		.nr_div_clks	= ARRAY_SIZE(exynos5x_mfc_div_clks),
+		.gate_clks	= exynos5x_mfc_gate_clks,
+		.nr_gate_clks	= ARRAY_SIZE(exynos5x_mfc_gate_clks),
+		.suspend_regs	= exynos5x_mfc_suspend_regs,
+		.nr_suspend_regs = ARRAY_SIZE(exynos5x_mfc_suspend_regs),
+		.pd_name	= "MFC",
+	},
 };
 
 static const struct samsung_pll_rate_table exynos5420_pll2550x_24mhz_tbl[] __initconst = {
-	PLL_35XX_RATE(2000000000, 250, 3, 0),
-	PLL_35XX_RATE(1900000000, 475, 6, 0),
-	PLL_35XX_RATE(1800000000, 225, 3, 0),
-	PLL_35XX_RATE(1700000000, 425, 6, 0),
-	PLL_35XX_RATE(1600000000, 200, 3, 0),
-	PLL_35XX_RATE(1500000000, 250, 4, 0),
-	PLL_35XX_RATE(1400000000, 175, 3, 0),
-	PLL_35XX_RATE(1300000000, 325, 6, 0),
-	PLL_35XX_RATE(1200000000, 200, 2, 1),
-	PLL_35XX_RATE(1100000000, 275, 3, 1),
-	PLL_35XX_RATE(1000000000, 250, 3, 1),
-	PLL_35XX_RATE(900000000,  150, 2, 1),
-	PLL_35XX_RATE(800000000,  200, 3, 1),
-	PLL_35XX_RATE(700000000,  175, 3, 1),
-	PLL_35XX_RATE(600000000,  200, 2, 2),
-	PLL_35XX_RATE(500000000,  250, 3, 2),
-	PLL_35XX_RATE(400000000,  200, 3, 2),
-	PLL_35XX_RATE(300000000,  200, 2, 3),
-	PLL_35XX_RATE(200000000,  200, 3, 3),
+	PLL_35XX_RATE(24 * MHZ, 2000000000, 250, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1900000000, 475, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1800000000, 225, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1700000000, 425, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1600000000, 200, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1500000000, 250, 4, 0),
+	PLL_35XX_RATE(24 * MHZ, 1400000000, 175, 3, 0),
+	PLL_35XX_RATE(24 * MHZ, 1300000000, 325, 6, 0),
+	PLL_35XX_RATE(24 * MHZ, 1200000000, 200, 2, 1),
+	PLL_35XX_RATE(24 * MHZ, 1100000000, 275, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 1000000000, 250, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 900000000,  150, 2, 1),
+	PLL_35XX_RATE(24 * MHZ, 800000000,  200, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 700000000,  175, 3, 1),
+	PLL_35XX_RATE(24 * MHZ, 600000000,  200, 2, 2),
+	PLL_35XX_RATE(24 * MHZ, 500000000,  250, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 400000000,  200, 3, 2),
+	PLL_35XX_RATE(24 * MHZ, 300000000,  200, 2, 3),
+	PLL_35XX_RATE(24 * MHZ, 200000000,  200, 3, 3),
 };
 
 static const struct samsung_pll_rate_table exynos5420_epll_24mhz_tbl[] = {
-	PLL_36XX_RATE(600000000U, 100, 2, 1, 0),
-	PLL_36XX_RATE(400000000U, 200, 3, 2, 0),
-	PLL_36XX_RATE(393216003U, 197, 3, 2, -25690),
-	PLL_36XX_RATE(361267218U, 301, 5, 2, 3671),
-	PLL_36XX_RATE(200000000U, 200, 3, 3, 0),
-	PLL_36XX_RATE(196608001U, 197, 3, 3, -25690),
-	PLL_36XX_RATE(180633609U, 301, 5, 3, 3671),
-	PLL_36XX_RATE(131072006U, 131, 3, 3, 4719),
-	PLL_36XX_RATE(100000000U, 200, 3, 4, 0),
-	PLL_36XX_RATE( 65536003U, 131, 3, 4, 4719),
-	PLL_36XX_RATE( 49152000U, 197, 3, 5, -25690),
-	PLL_36XX_RATE( 32768001U, 131, 3, 5, 4719),
+	PLL_36XX_RATE(24 * MHZ, 600000000U, 100, 2, 1, 0),
+	PLL_36XX_RATE(24 * MHZ, 400000000U, 200, 3, 2, 0),
+	PLL_36XX_RATE(24 * MHZ, 393216003U, 197, 3, 2, -25690),
+	PLL_36XX_RATE(24 * MHZ, 361267218U, 301, 5, 2, 3671),
+	PLL_36XX_RATE(24 * MHZ, 200000000U, 200, 3, 3, 0),
+	PLL_36XX_RATE(24 * MHZ, 196608001U, 197, 3, 3, -25690),
+	PLL_36XX_RATE(24 * MHZ, 180633609U, 301, 5, 3, 3671),
+	PLL_36XX_RATE(24 * MHZ, 131072006U, 131, 3, 3, 4719),
+	PLL_36XX_RATE(24 * MHZ, 100000000U, 200, 3, 4, 0),
+	PLL_36XX_RATE(24 * MHZ,  73728000U, 98, 2, 4, 19923),
+	PLL_36XX_RATE(24 * MHZ,  67737602U, 90, 2, 4, 20762),
+	PLL_36XX_RATE(24 * MHZ,  65536003U, 131, 3, 4, 4719),
+	PLL_36XX_RATE(24 * MHZ,  49152000U, 197, 3, 5, -25690),
+	PLL_36XX_RATE(24 * MHZ,  45158401U, 90, 3, 4, 20762),
+	PLL_36XX_RATE(24 * MHZ,  32768001U, 131, 3, 5, 4719),
 };
 
 static struct samsung_pll_clock exynos5x_plls[nr_plls] __initdata = {
@@ -1472,6 +1541,8 @@ static void __init exynos5x_clk_init(struct device_node *np,
 		exynos5420_kfcclk_d, ARRAY_SIZE(exynos5420_kfcclk_d), 0);
 
 	exynos5420_clk_sleep_init();
+	exynos5_subcmus_init(ctx, ARRAY_SIZE(exynos5x_subcmus),
+			     exynos5x_subcmus);
 
 	samsung_clk_of_add_provider(np, ctx);
 }
@@ -1480,10 +1551,12 @@ static void __init exynos5420_clk_init(struct device_node *np)
 {
 	exynos5x_clk_init(np, EXYNOS5420);
 }
-CLK_OF_DECLARE(exynos5420_clk, "samsung,exynos5420-clock", exynos5420_clk_init);
+CLK_OF_DECLARE_DRIVER(exynos5420_clk, "samsung,exynos5420-clock",
+		      exynos5420_clk_init);
 
 static void __init exynos5800_clk_init(struct device_node *np)
 {
 	exynos5x_clk_init(np, EXYNOS5800);
 }
-CLK_OF_DECLARE(exynos5800_clk, "samsung,exynos5800-clock", exynos5800_clk_init);
+CLK_OF_DECLARE_DRIVER(exynos5800_clk, "samsung,exynos5800-clock",
+		      exynos5800_clk_init);
diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c
index db270908037a..5305ace514b2 100644
--- a/drivers/clk/samsung/clk-exynos5433.c
+++ b/drivers/clk/samsung/clk-exynos5433.c
@@ -703,68 +703,69 @@ static const struct samsung_gate_clock top_gate_clks[] __initconst = {
  * & MPHY_PLL & G3D_PLL & DISP_PLL & ISP_PLL
  */
 static const struct samsung_pll_rate_table exynos5433_pll_rates[] __initconst = {
-	PLL_35XX_RATE(2500000000U, 625, 6,  0),
-	PLL_35XX_RATE(2400000000U, 500, 5,  0),
-	PLL_35XX_RATE(2300000000U, 575, 6,  0),
-	PLL_35XX_RATE(2200000000U, 550, 6,  0),
-	PLL_35XX_RATE(2100000000U, 350, 4,  0),
-	PLL_35XX_RATE(2000000000U, 500, 6,  0),
-	PLL_35XX_RATE(1900000000U, 475, 6,  0),
-	PLL_35XX_RATE(1800000000U, 375, 5,  0),
-	PLL_35XX_RATE(1700000000U, 425, 6,  0),
-	PLL_35XX_RATE(1600000000U, 400, 6,  0),
-	PLL_35XX_RATE(1500000000U, 250, 4,  0),
-	PLL_35XX_RATE(1400000000U, 350, 6,  0),
-	PLL_35XX_RATE(1332000000U, 222, 4,  0),
-	PLL_35XX_RATE(1300000000U, 325, 6,  0),
-	PLL_35XX_RATE(1200000000U, 500, 5,  1),
-	PLL_35XX_RATE(1100000000U, 550, 6,  1),
-	PLL_35XX_RATE(1086000000U, 362, 4,  1),
-	PLL_35XX_RATE(1066000000U, 533, 6,  1),
-	PLL_35XX_RATE(1000000000U, 500, 6,  1),
-	PLL_35XX_RATE(933000000U,  311, 4,  1),
-	PLL_35XX_RATE(921000000U,  307, 4,  1),
-	PLL_35XX_RATE(900000000U,  375, 5,  1),
-	PLL_35XX_RATE(825000000U,  275, 4,  1),
-	PLL_35XX_RATE(800000000U,  400, 6,  1),
-	PLL_35XX_RATE(733000000U,  733, 12, 1),
-	PLL_35XX_RATE(700000000U,  175, 3,  1),
-	PLL_35XX_RATE(667000000U,  222, 4,  1),
-	PLL_35XX_RATE(633000000U,  211, 4,  1),
-	PLL_35XX_RATE(600000000U,  500, 5,  2),
-	PLL_35XX_RATE(552000000U,  460, 5,  2),
-	PLL_35XX_RATE(550000000U,  550, 6,  2),
-	PLL_35XX_RATE(543000000U,  362, 4,  2),
-	PLL_35XX_RATE(533000000U,  533, 6,  2),
-	PLL_35XX_RATE(500000000U,  500, 6,  2),
-	PLL_35XX_RATE(444000000U,  370, 5,  2),
-	PLL_35XX_RATE(420000000U,  350, 5,  2),
-	PLL_35XX_RATE(400000000U,  400, 6,  2),
-	PLL_35XX_RATE(350000000U,  350, 6,  2),
-	PLL_35XX_RATE(333000000U,  222, 4,  2),
-	PLL_35XX_RATE(300000000U,  500, 5,  3),
-	PLL_35XX_RATE(278000000U,  556, 6,  3),
-	PLL_35XX_RATE(266000000U,  532, 6,  3),
-	PLL_35XX_RATE(250000000U,  500, 6,  3),
-	PLL_35XX_RATE(200000000U,  400, 6,  3),
-	PLL_35XX_RATE(166000000U,  332, 6,  3),
-	PLL_35XX_RATE(160000000U,  320, 6,  3),
-	PLL_35XX_RATE(133000000U,  532, 6,  4),
-	PLL_35XX_RATE(100000000U,  400, 6,  4),
+	PLL_35XX_RATE(24 * MHZ, 2500000000U, 625, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 2400000000U, 500, 5,  0),
+	PLL_35XX_RATE(24 * MHZ, 2300000000U, 575, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 2200000000U, 550, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 2100000000U, 350, 4,  0),
+	PLL_35XX_RATE(24 * MHZ, 2000000000U, 500, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 1900000000U, 475, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 1800000000U, 375, 5,  0),
+	PLL_35XX_RATE(24 * MHZ, 1700000000U, 425, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 1600000000U, 400, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 1500000000U, 250, 4,  0),
+	PLL_35XX_RATE(24 * MHZ, 1400000000U, 350, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 1332000000U, 222, 4,  0),
+	PLL_35XX_RATE(24 * MHZ, 1300000000U, 325, 6,  0),
+	PLL_35XX_RATE(24 * MHZ, 1200000000U, 500, 5,  1),
+	PLL_35XX_RATE(24 * MHZ, 1100000000U, 550, 6,  1),
+	PLL_35XX_RATE(24 * MHZ, 1086000000U, 362, 4,  1),
+	PLL_35XX_RATE(24 * MHZ, 1066000000U, 533, 6,  1),
+	PLL_35XX_RATE(24 * MHZ, 1000000000U, 500, 6,  1),
+	PLL_35XX_RATE(24 * MHZ, 933000000U,  311, 4,  1),
+	PLL_35XX_RATE(24 * MHZ, 921000000U,  307, 4,  1),
+	PLL_35XX_RATE(24 * MHZ, 900000000U,  375, 5,  1),
+	PLL_35XX_RATE(24 * MHZ, 825000000U,  275, 4,  1),
+	PLL_35XX_RATE(24 * MHZ, 800000000U,  400, 6,  1),
+	PLL_35XX_RATE(24 * MHZ, 733000000U,  733, 12, 1),
+	PLL_35XX_RATE(24 * MHZ, 700000000U,  175, 3,  1),
+	PLL_35XX_RATE(24 * MHZ, 666000000U,  222, 4,  1),
+	PLL_35XX_RATE(24 * MHZ, 633000000U,  211, 4,  1),
+	PLL_35XX_RATE(24 * MHZ, 600000000U,  500, 5,  2),
+	PLL_35XX_RATE(24 * MHZ, 552000000U,  460, 5,  2),
+	PLL_35XX_RATE(24 * MHZ, 550000000U,  550, 6,  2),
+	PLL_35XX_RATE(24 * MHZ, 543000000U,  362, 4,  2),
+	PLL_35XX_RATE(24 * MHZ, 533000000U,  533, 6,  2),
+	PLL_35XX_RATE(24 * MHZ, 500000000U,  500, 6,  2),
+	PLL_35XX_RATE(24 * MHZ, 444000000U,  370, 5,  2),
+	PLL_35XX_RATE(24 * MHZ, 420000000U,  350, 5,  2),
+	PLL_35XX_RATE(24 * MHZ, 400000000U,  400, 6,  2),
+	PLL_35XX_RATE(24 * MHZ, 350000000U,  350, 6,  2),
+	PLL_35XX_RATE(24 * MHZ, 333000000U,  222, 4,  2),
+	PLL_35XX_RATE(24 * MHZ, 300000000U,  500, 5,  3),
+	PLL_35XX_RATE(24 * MHZ, 278000000U,  556, 6,  3),
+	PLL_35XX_RATE(24 * MHZ, 266000000U,  532, 6,  3),
+	PLL_35XX_RATE(24 * MHZ, 250000000U,  500, 6,  3),
+	PLL_35XX_RATE(24 * MHZ, 200000000U,  400, 6,  3),
+	PLL_35XX_RATE(24 * MHZ, 166000000U,  332, 6,  3),
+	PLL_35XX_RATE(24 * MHZ, 160000000U,  320, 6,  3),
+	PLL_35XX_RATE(24 * MHZ, 133000000U,  532, 6,  4),
+	PLL_35XX_RATE(24 * MHZ, 100000000U,  400, 6,  4),
 	{ /* sentinel */ }
 };
 
 /* AUD_PLL */
 static const struct samsung_pll_rate_table exynos5433_aud_pll_rates[] __initconst = {
-	PLL_36XX_RATE(400000000U, 200, 3, 2,      0),
-	PLL_36XX_RATE(393216000U, 197, 3, 2, -25690),
-	PLL_36XX_RATE(384000000U, 128, 2, 2,      0),
-	PLL_36XX_RATE(368640000U, 246, 4, 2, -15729),
-	PLL_36XX_RATE(361507200U, 181, 3, 2, -16148),
-	PLL_36XX_RATE(338688000U, 113, 2, 2,  -6816),
-	PLL_36XX_RATE(294912000U,  98, 1, 3,  19923),
-	PLL_36XX_RATE(288000000U,  96, 1, 3,      0),
-	PLL_36XX_RATE(252000000U,  84, 1, 3,      0),
+	PLL_36XX_RATE(24 * MHZ, 400000000U, 200, 3, 2,      0),
+	PLL_36XX_RATE(24 * MHZ, 393216003U, 197, 3, 2, -25690),
+	PLL_36XX_RATE(24 * MHZ, 384000000U, 128, 2, 2,      0),
+	PLL_36XX_RATE(24 * MHZ, 368639991U, 246, 4, 2, -15729),
+	PLL_36XX_RATE(24 * MHZ, 361507202U, 181, 3, 2, -16148),
+	PLL_36XX_RATE(24 * MHZ, 338687988U, 113, 2, 2,  -6816),
+	PLL_36XX_RATE(24 * MHZ, 294912002U,  98, 1, 3,  19923),
+	PLL_36XX_RATE(24 * MHZ, 288000000U,  96, 1, 3,      0),
+	PLL_36XX_RATE(24 * MHZ, 252000000U,  84, 1, 3,      0),
+	PLL_36XX_RATE(24 * MHZ, 196608001U, 197, 3, 3, -25690),
 	{ /* sentinel */ }
 };
 
@@ -1672,7 +1673,7 @@ static const struct samsung_gate_clock peric_gate_clks[] __initconst = {
 			ENABLE_SCLK_PERIC, 11, CLK_SET_RATE_PARENT, 0),
 	GATE(CLK_SCLK_IOCLK_I2S1_BCLK, "sclk_ioclk_i2s1_bclk",
 			"ioclk_i2s1_bclk_in", ENABLE_SCLK_PERIC, 10,
-			CLK_SET_RATE_PARENT, 0),
+			CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_SCLK_SPDIF, "sclk_spdif", "sclk_spdif_peric",
 			ENABLE_SCLK_PERIC, 8, CLK_SET_RATE_PARENT, 0),
 	GATE(CLK_SCLK_PCM1, "sclk_pcm1", "sclk_pcm1_peric",
@@ -5513,10 +5514,8 @@ static int __init exynos5433_cmu_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	reg_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(reg_base)) {
-		dev_err(dev, "failed to map registers\n");
+	if (IS_ERR(reg_base))
 		return PTR_ERR(reg_base);
-	}
 
 	for (i = 0; i < info->nr_clk_ids; ++i)
 		ctx->clk_data.hws[i] = ERR_PTR(-ENOENT);
diff --git a/drivers/clk/samsung/clk-exynos7.c b/drivers/clk/samsung/clk-exynos7.c
index 5931a4140c3d..492d51691080 100644
--- a/drivers/clk/samsung/clk-exynos7.c
+++ b/drivers/clk/samsung/clk-exynos7.c
@@ -140,7 +140,7 @@ static const struct samsung_div_clock topc_div_clks[] __initconst = {
 };
 
 static const struct samsung_pll_rate_table pll1460x_24mhz_tbl[] __initconst = {
-	PLL_36XX_RATE(491520000, 20, 1, 0, 31457),
+	PLL_36XX_RATE(24 * MHZ, 491519897, 20, 1, 0, 31457),
 	{},
 };
 
diff --git a/drivers/clk/samsung/clk-pll.h b/drivers/clk/samsung/clk-pll.h
index 61eb8abbfd9c..ca57b3dfa814 100644
--- a/drivers/clk/samsung/clk-pll.h
+++ b/drivers/clk/samsung/clk-pll.h
@@ -41,35 +41,62 @@ enum samsung_pll_type {
 	pll_1460x,
 };
 
-#define PLL_35XX_RATE(_rate, _m, _p, _s)			\
+#define PLL_RATE(_fin, _m, _p, _s, _k, _ks) \
+	((u64)(_fin) * (BIT(_ks) * (_m) + (_k)) / BIT(_ks) / ((_p) << (_s)))
+#define PLL_VALID_RATE(_fin, _fout, _m, _p, _s, _k, _ks) ((_fout) + \
+	BUILD_BUG_ON_ZERO(PLL_RATE(_fin, _m, _p, _s, _k, _ks) != (_fout)))
+
+#define PLL_35XX_RATE(_fin, _rate, _m, _p, _s)			\
+	{							\
+		.rate	=	PLL_VALID_RATE(_fin, _rate,	\
+				_m, _p, _s, 0, 16),		\
+		.mdiv	=	(_m),				\
+		.pdiv	=	(_p),				\
+		.sdiv	=	(_s),				\
+	}
+
+#define PLL_S3C2410_MPLL_RATE(_fin, _rate, _m, _p, _s)		\
+	{							\
+		.rate	=	PLL_VALID_RATE(_fin, _rate,	\
+				_m + 8, _p + 2, _s, 0, 16),	\
+		.mdiv	=	(_m),				\
+		.pdiv	=	(_p),				\
+		.sdiv	=	(_s),				\
+	}
+
+#define PLL_S3C2440_MPLL_RATE(_fin, _rate, _m, _p, _s)		\
 	{							\
-		.rate	=	(_rate),				\
+		.rate	=	PLL_VALID_RATE(_fin, _rate,	\
+				2 * (_m + 8), _p + 2, _s, 0, 16), \
 		.mdiv	=	(_m),				\
 		.pdiv	=	(_p),				\
 		.sdiv	=	(_s),				\
 	}
 
-#define PLL_36XX_RATE(_rate, _m, _p, _s, _k)			\
+#define PLL_36XX_RATE(_fin, _rate, _m, _p, _s, _k)		\
 	{							\
-		.rate	=	(_rate),				\
+		.rate	=	PLL_VALID_RATE(_fin, _rate,	\
+				_m, _p, _s, _k, 16),		\
 		.mdiv	=	(_m),				\
 		.pdiv	=	(_p),				\
 		.sdiv	=	(_s),				\
 		.kdiv	=	(_k),				\
 	}
 
-#define PLL_45XX_RATE(_rate, _m, _p, _s, _afc)			\
+#define PLL_4508_RATE(_fin, _rate, _m, _p, _s, _afc)		\
 	{							\
-		.rate	=	(_rate),			\
+		.rate	=	PLL_VALID_RATE(_fin, _rate,	\
+				_m, _p, _s - 1, 0, 16),		\
 		.mdiv	=	(_m),				\
 		.pdiv	=	(_p),				\
 		.sdiv	=	(_s),				\
 		.afc	=	(_afc),				\
 	}
 
-#define PLL_4600_RATE(_rate, _m, _p, _s, _k, _vsel)		\
+#define PLL_4600_RATE(_fin, _rate, _m, _p, _s, _k, _vsel)	\
 	{							\
-		.rate	=	(_rate),			\
+		.rate	=	PLL_VALID_RATE(_fin, _rate,	\
+				_m, _p, _s, _k, 16),		\
 		.mdiv	=	(_m),				\
 		.pdiv	=	(_p),				\
 		.sdiv	=	(_s),				\
@@ -77,9 +104,10 @@ enum samsung_pll_type {
 		.vsel	=	(_vsel),			\
 	}
 
-#define PLL_4650_RATE(_rate, _m, _p, _s, _k, _mfr, _mrr, _vsel)	\
+#define PLL_4650_RATE(_fin, _rate, _m, _p, _s, _k, _mfr, _mrr, _vsel) \
 	{							\
-		.rate	=	(_rate),			\
+		.rate	=	PLL_VALID_RATE(_fin, _rate,	\
+				_m, _p, _s, _k, 10),		\
 		.mdiv	=	(_m),				\
 		.pdiv	=	(_p),				\
 		.sdiv	=	(_s),				\
diff --git a/drivers/clk/samsung/clk-s3c2410.c b/drivers/clk/samsung/clk-s3c2410.c
index e0650c33863b..a9c887475054 100644
--- a/drivers/clk/samsung/clk-s3c2410.c
+++ b/drivers/clk/samsung/clk-s3c2410.c
@@ -95,7 +95,7 @@ static void __init s3c2410_clk_sleep_init(void) {}
 
 PNAME(fclk_p) = { "mpll", "div_slow" };
 
-struct samsung_mux_clock s3c2410_common_muxes[] __initdata = {
+static struct samsung_mux_clock s3c2410_common_muxes[] __initdata = {
 	MUX(FCLK, "fclk", fclk_p, CLKSLOW, 4, 1),
 };
 
@@ -111,12 +111,12 @@ static struct clk_div_table divslow_d[] = {
 	{ /* sentinel */ },
 };
 
-struct samsung_div_clock s3c2410_common_dividers[] __initdata = {
+static struct samsung_div_clock s3c2410_common_dividers[] __initdata = {
 	DIV_T(0, "div_slow", "xti", CLKSLOW, 0, 3, divslow_d),
 	DIV(PCLK, "pclk", "hclk", CLKDIVN, 0, 1),
 };
 
-struct samsung_gate_clock s3c2410_common_gates[] __initdata = {
+static struct samsung_gate_clock s3c2410_common_gates[] __initdata = {
 	GATE(PCLK_SPI, "spi", "pclk", CLKCON, 18, 0, 0),
 	GATE(PCLK_I2S, "i2s", "pclk", CLKCON, 17, 0, 0),
 	GATE(PCLK_I2C, "i2c", "pclk", CLKCON, 16, 0, 0),
@@ -135,7 +135,7 @@ struct samsung_gate_clock s3c2410_common_gates[] __initdata = {
 };
 
 /* should be added _after_ the soc-specific clocks are created */
-struct samsung_clock_alias s3c2410_common_aliases[] __initdata = {
+static struct samsung_clock_alias s3c2410_common_aliases[] __initdata = {
 	ALIAS(PCLK_I2C, "s3c2410-i2c.0", "i2c"),
 	ALIAS(PCLK_ADC, NULL, "adc"),
 	ALIAS(PCLK_RTC, NULL, "rtc"),
@@ -162,34 +162,34 @@ struct samsung_clock_alias s3c2410_common_aliases[] __initdata = {
 static struct samsung_pll_rate_table pll_s3c2410_12mhz_tbl[] __initdata = {
 	/* sorted in descending order */
 	/* 2410A extras */
-	PLL_35XX_RATE(270000000, 127, 1, 1),
-	PLL_35XX_RATE(268000000, 126, 1, 1),
-	PLL_35XX_RATE(266000000, 125, 1, 1),
-	PLL_35XX_RATE(226000000, 105, 1, 1),
-	PLL_35XX_RATE(210000000, 132, 2, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 270000000, 127, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 268000000, 126, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 266000000, 125, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 226000000, 105, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 210000000, 132, 2, 1),
 	/* 2410 common */
-	PLL_35XX_RATE(203000000, 161, 3, 1),
-	PLL_35XX_RATE(192000000, 88, 1, 1),
-	PLL_35XX_RATE(186000000, 85, 1, 1),
-	PLL_35XX_RATE(180000000, 82, 1, 1),
-	PLL_35XX_RATE(170000000, 77, 1, 1),
-	PLL_35XX_RATE(158000000, 71, 1, 1),
-	PLL_35XX_RATE(152000000, 68, 1, 1),
-	PLL_35XX_RATE(147000000, 90, 2, 1),
-	PLL_35XX_RATE(135000000, 82, 2, 1),
-	PLL_35XX_RATE(124000000, 116, 1, 2),
-	PLL_35XX_RATE(118000000, 150, 2, 2),
-	PLL_35XX_RATE(113000000, 105, 1, 2),
-	PLL_35XX_RATE(101000000, 127, 2, 2),
-	PLL_35XX_RATE(90000000, 112, 2, 2),
-	PLL_35XX_RATE(85000000, 105, 2, 2),
-	PLL_35XX_RATE(79000000, 71, 1, 2),
-	PLL_35XX_RATE(68000000, 82, 2, 2),
-	PLL_35XX_RATE(56000000, 142, 2, 3),
-	PLL_35XX_RATE(48000000, 120, 2, 3),
-	PLL_35XX_RATE(51000000, 161, 3, 3),
-	PLL_35XX_RATE(45000000, 82, 1, 3),
-	PLL_35XX_RATE(34000000, 82, 2, 3),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 202800000, 161, 3, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 192000000, 88, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 186000000, 85, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 180000000, 82, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 170000000, 77, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 158000000, 71, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 152000000, 68, 1, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 147000000, 90, 2, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 135000000, 82, 2, 1),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 124000000, 116, 1, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 118500000, 150, 2, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 113000000, 105, 1, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 101250000, 127, 2, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 90000000, 112, 2, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 84750000, 105, 2, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 79000000, 71, 1, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 67500000, 82, 2, 2),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 56250000, 142, 2, 3),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 48000000, 120, 2, 3),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 50700000, 161, 3, 3),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 45000000, 82, 1, 3),
+	PLL_S3C2410_MPLL_RATE(12 * MHZ, 33750000, 82, 2, 3),
 	{ /* sentinel */ },
 };
 
@@ -200,11 +200,11 @@ static struct samsung_pll_clock s3c2410_plls[] __initdata = {
 						LOCKTIME, UPLLCON, NULL),
 };
 
-struct samsung_div_clock s3c2410_dividers[] __initdata = {
+static struct samsung_div_clock s3c2410_dividers[] __initdata = {
 	DIV(HCLK, "hclk", "mpll", CLKDIVN, 1, 1),
 };
 
-struct samsung_fixed_factor_clock s3c2410_ffactor[] __initdata = {
+static struct samsung_fixed_factor_clock s3c2410_ffactor[] __initdata = {
 	/*
 	 * armclk is directly supplied by the fclk, without
 	 * switching possibility like on the s3c244x below.
@@ -215,7 +215,7 @@ struct samsung_fixed_factor_clock s3c2410_ffactor[] __initdata = {
 	FFACTOR(UCLK, "uclk", "upll", 1, 1, 0),
 };
 
-struct samsung_clock_alias s3c2410_aliases[] __initdata = {
+static struct samsung_clock_alias s3c2410_aliases[] __initdata = {
 	ALIAS(PCLK_UART0, "s3c2410-uart.0", "uart"),
 	ALIAS(PCLK_UART1, "s3c2410-uart.1", "uart"),
 	ALIAS(PCLK_UART2, "s3c2410-uart.2", "uart"),
@@ -229,33 +229,33 @@ struct samsung_clock_alias s3c2410_aliases[] __initdata = {
 
 static struct samsung_pll_rate_table pll_s3c244x_12mhz_tbl[] __initdata = {
 	/* sorted in descending order */
-	PLL_35XX_RATE(400000000, 0x5c, 1, 1),
-	PLL_35XX_RATE(390000000, 0x7a, 2, 1),
-	PLL_35XX_RATE(380000000, 0x57, 1, 1),
-	PLL_35XX_RATE(370000000, 0xb1, 4, 1),
-	PLL_35XX_RATE(360000000, 0x70, 2, 1),
-	PLL_35XX_RATE(350000000, 0xa7, 4, 1),
-	PLL_35XX_RATE(340000000, 0x4d, 1, 1),
-	PLL_35XX_RATE(330000000, 0x66, 2, 1),
-	PLL_35XX_RATE(320000000, 0x98, 4, 1),
-	PLL_35XX_RATE(310000000, 0x93, 4, 1),
-	PLL_35XX_RATE(300000000, 0x75, 3, 1),
-	PLL_35XX_RATE(240000000, 0x70, 1, 2),
-	PLL_35XX_RATE(230000000, 0x6b, 1, 2),
-	PLL_35XX_RATE(220000000, 0x66, 1, 2),
-	PLL_35XX_RATE(210000000, 0x84, 2, 2),
-	PLL_35XX_RATE(200000000, 0x5c, 1, 2),
-	PLL_35XX_RATE(190000000, 0x57, 1, 2),
-	PLL_35XX_RATE(180000000, 0x70, 2, 2),
-	PLL_35XX_RATE(170000000, 0x4d, 1, 2),
-	PLL_35XX_RATE(160000000, 0x98, 4, 2),
-	PLL_35XX_RATE(150000000, 0x75, 3, 2),
-	PLL_35XX_RATE(120000000, 0x70, 1, 3),
-	PLL_35XX_RATE(110000000, 0x66, 1, 3),
-	PLL_35XX_RATE(100000000, 0x5c, 1, 3),
-	PLL_35XX_RATE(90000000, 0x70, 2, 3),
-	PLL_35XX_RATE(80000000, 0x98, 4, 3),
-	PLL_35XX_RATE(75000000, 0x75, 3, 3),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 400000000, 0x5c, 1, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 390000000, 0x7a, 2, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 380000000, 0x57, 1, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 370000000, 0xb1, 4, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 360000000, 0x70, 2, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 350000000, 0xa7, 4, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 340000000, 0x4d, 1, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 330000000, 0x66, 2, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 320000000, 0x98, 4, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 310000000, 0x93, 4, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 300000000, 0x75, 3, 1),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 240000000, 0x70, 1, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 230000000, 0x6b, 1, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 220000000, 0x66, 1, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 210000000, 0x84, 2, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 200000000, 0x5c, 1, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 190000000, 0x57, 1, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 180000000, 0x70, 2, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 170000000, 0x4d, 1, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 160000000, 0x98, 4, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 150000000, 0x75, 3, 2),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 120000000, 0x70, 1, 3),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 110000000, 0x66, 1, 3),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 100000000, 0x5c, 1, 3),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 90000000, 0x70, 2, 3),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 80000000, 0x98, 4, 3),
+	PLL_S3C2440_MPLL_RATE(12 * MHZ, 75000000, 0x75, 3, 3),
 	{ /* sentinel */ },
 };
 
@@ -269,12 +269,12 @@ static struct samsung_pll_clock s3c244x_common_plls[] __initdata = {
 PNAME(hclk_p) = { "fclk", "div_hclk_2", "div_hclk_4", "div_hclk_3" };
 PNAME(armclk_p) = { "fclk", "hclk" };
 
-struct samsung_mux_clock s3c244x_common_muxes[] __initdata = {
+static struct samsung_mux_clock s3c244x_common_muxes[] __initdata = {
 	MUX(HCLK, "hclk", hclk_p, CLKDIVN, 1, 2),
 	MUX(ARMCLK, "armclk", armclk_p, CAMDIVN, 12, 1),
 };
 
-struct samsung_fixed_factor_clock s3c244x_common_ffactor[] __initdata = {
+static struct samsung_fixed_factor_clock s3c244x_common_ffactor[] __initdata = {
 	FFACTOR(0, "div_hclk_2", "fclk", 1, 2, 0),
 	FFACTOR(0, "ff_cam", "div_cam", 2, 1, CLK_SET_RATE_PARENT),
 };
@@ -291,7 +291,7 @@ static struct clk_div_table div_hclk_3_d[] = {
 	{ /* sentinel */ },
 };
 
-struct samsung_div_clock s3c244x_common_dividers[] __initdata = {
+static struct samsung_div_clock s3c244x_common_dividers[] __initdata = {
 	DIV(UCLK, "uclk", "upll", CLKDIVN, 3, 1),
 	DIV(0, "div_hclk", "fclk", CLKDIVN, 1, 1),
 	DIV_T(0, "div_hclk_4", "fclk", CAMDIVN, 9, 1, div_hclk_4_d),
@@ -299,11 +299,11 @@ struct samsung_div_clock s3c244x_common_dividers[] __initdata = {
 	DIV(0, "div_cam", "upll", CAMDIVN, 0, 3),
 };
 
-struct samsung_gate_clock s3c244x_common_gates[] __initdata = {
+static struct samsung_gate_clock s3c244x_common_gates[] __initdata = {
 	GATE(HCLK_CAM, "cam", "hclk", CLKCON, 19, 0, 0),
 };
 
-struct samsung_clock_alias s3c244x_common_aliases[] __initdata = {
+static struct samsung_clock_alias s3c244x_common_aliases[] __initdata = {
 	ALIAS(PCLK_UART0, "s3c2440-uart.0", "uart"),
 	ALIAS(PCLK_UART1, "s3c2440-uart.1", "uart"),
 	ALIAS(PCLK_UART2, "s3c2440-uart.2", "uart"),
@@ -318,23 +318,23 @@ struct samsung_clock_alias s3c244x_common_aliases[] __initdata = {
 
 PNAME(s3c2440_camif_p) = { "upll", "ff_cam" };
 
-struct samsung_mux_clock s3c2440_muxes[] __initdata = {
+static struct samsung_mux_clock s3c2440_muxes[] __initdata = {
 	MUX(CAMIF, "camif", s3c2440_camif_p, CAMDIVN, 4, 1),
 };
 
-struct samsung_gate_clock s3c2440_gates[] __initdata = {
+static struct samsung_gate_clock s3c2440_gates[] __initdata = {
 	GATE(PCLK_AC97, "ac97", "pclk", CLKCON, 20, 0, 0),
 };
 
 /* S3C2442 specific clocks */
 
-struct samsung_fixed_factor_clock s3c2442_ffactor[] __initdata = {
+static struct samsung_fixed_factor_clock s3c2442_ffactor[] __initdata = {
 	FFACTOR(0, "upll_3", "upll", 1, 3, 0),
 };
 
 PNAME(s3c2442_camif_p) = { "upll", "ff_cam", "upll", "upll_3" };
 
-struct samsung_mux_clock s3c2442_muxes[] __initdata = {
+static struct samsung_mux_clock s3c2442_muxes[] __initdata = {
 	MUX(CAMIF, "camif", s3c2442_camif_p, CAMDIVN, 4, 2),
 };
 
@@ -343,7 +343,7 @@ struct samsung_mux_clock s3c2442_muxes[] __initdata = {
  * Only necessary until the devicetree-move is complete
  */
 #define XTI	1
-struct samsung_fixed_rate_clock s3c2410_common_frate_clks[] __initdata = {
+static struct samsung_fixed_rate_clock s3c2410_common_frate_clks[] __initdata = {
 	FRATE(XTI, "xti", NULL, 0, 0),
 };
 
@@ -468,18 +468,18 @@ void __init s3c2410_common_clk_init(struct device_node *np, unsigned long xti_f,
 
 static void __init s3c2410_clk_init(struct device_node *np)
 {
-	s3c2410_common_clk_init(np, 0, S3C2410, 0);
+	s3c2410_common_clk_init(np, 0, S3C2410, NULL);
 }
 CLK_OF_DECLARE(s3c2410_clk, "samsung,s3c2410-clock", s3c2410_clk_init);
 
 static void __init s3c2440_clk_init(struct device_node *np)
 {
-	s3c2410_common_clk_init(np, 0, S3C2440, 0);
+	s3c2410_common_clk_init(np, 0, S3C2440, NULL);
 }
 CLK_OF_DECLARE(s3c2440_clk, "samsung,s3c2440-clock", s3c2440_clk_init);
 
 static void __init s3c2442_clk_init(struct device_node *np)
 {
-	s3c2410_common_clk_init(np, 0, S3C2442, 0);
+	s3c2410_common_clk_init(np, 0, S3C2442, NULL);
 }
 CLK_OF_DECLARE(s3c2442_clk, "samsung,s3c2442-clock", s3c2442_clk_init);
diff --git a/drivers/clk/samsung/clk-s3c2412.c b/drivers/clk/samsung/clk-s3c2412.c
index b8340a49921b..6bc94d3aff78 100644
--- a/drivers/clk/samsung/clk-s3c2412.c
+++ b/drivers/clk/samsung/clk-s3c2412.c
@@ -27,11 +27,6 @@
 #define CLKSRC		0x1c
 #define SWRST		0x30
 
-/* list of PLLs to be registered */
-enum s3c2412_plls {
-	mpll, upll,
-};
-
 static void __iomem *reg_base;
 
 #ifdef CONFIG_PM_SLEEP
@@ -98,7 +93,7 @@ static struct clk_div_table divxti_d[] = {
 	{ /* sentinel */ },
 };
 
-struct samsung_div_clock s3c2412_dividers[] __initdata = {
+static struct samsung_div_clock s3c2412_dividers[] __initdata = {
 	DIV_T(0, "div_xti", "xti", CLKSRC, 0, 3, divxti_d),
 	DIV(0, "div_cam", "mux_cam", CLKDIVN, 16, 4),
 	DIV(0, "div_i2s", "mux_i2s", CLKDIVN, 12, 4),
@@ -110,7 +105,7 @@ struct samsung_div_clock s3c2412_dividers[] __initdata = {
 	DIV(HCLK, "hclk", "armdiv", CLKDIVN, 0, 2),
 };
 
-struct samsung_fixed_factor_clock s3c2412_ffactor[] __initdata = {
+static struct samsung_fixed_factor_clock s3c2412_ffactor[] __initdata = {
 	FFACTOR(0, "ff_hclk", "hclk", 2, 1, CLK_SET_RATE_PARENT),
 };
 
@@ -130,7 +125,7 @@ PNAME(msysclk_p) = { "mdivclk", "mpll" };
 PNAME(mdivclk_p) = { "xti", "div_xti" };
 PNAME(armclk_p) = { "armdiv", "hclk" };
 
-struct samsung_mux_clock s3c2412_muxes[] __initdata = {
+static struct samsung_mux_clock s3c2412_muxes[] __initdata = {
 	MUX(0, "erefclk", erefclk_p, CLKSRC, 14, 2),
 	MUX(0, "urefclk", urefclk_p, CLKSRC, 12, 2),
 	MUX(0, "mux_cam", camclk_p, CLKSRC, 11, 1),
@@ -144,13 +139,11 @@ struct samsung_mux_clock s3c2412_muxes[] __initdata = {
 };
 
 static struct samsung_pll_clock s3c2412_plls[] __initdata = {
-	[mpll] = PLL(pll_s3c2440_mpll, MPLL, "mpll", "xti",
-						LOCKTIME, MPLLCON, NULL),
-	[upll] = PLL(pll_s3c2410_upll, UPLL, "upll", "urefclk",
-						LOCKTIME, UPLLCON, NULL),
+	PLL(pll_s3c2440_mpll, MPLL, "mpll", "xti", LOCKTIME, MPLLCON, NULL),
+	PLL(pll_s3c2410_upll, UPLL, "upll", "urefclk", LOCKTIME, UPLLCON, NULL),
 };
 
-struct samsung_gate_clock s3c2412_gates[] __initdata = {
+static struct samsung_gate_clock s3c2412_gates[] __initdata = {
 	GATE(PCLK_WDT, "wdt", "pclk", CLKCON, 28, 0, 0),
 	GATE(PCLK_SPI, "spi", "pclk", CLKCON, 27, 0, 0),
 	GATE(PCLK_I2S, "i2s", "pclk", CLKCON, 26, 0, 0),
@@ -181,7 +174,7 @@ struct samsung_gate_clock s3c2412_gates[] __initdata = {
 	GATE(HCLK_DMA0, "dma0", "hclk", CLKCON, 0, CLK_IGNORE_UNUSED, 0),
 };
 
-struct samsung_clock_alias s3c2412_aliases[] __initdata = {
+static struct samsung_clock_alias s3c2412_aliases[] __initdata = {
 	ALIAS(PCLK_UART0, "s3c2412-uart.0", "uart"),
 	ALIAS(PCLK_UART1, "s3c2412-uart.1", "uart"),
 	ALIAS(PCLK_UART2, "s3c2412-uart.2", "uart"),
@@ -231,7 +224,7 @@ static struct notifier_block s3c2412_restart_handler = {
  * Only necessary until the devicetree-move is complete
  */
 #define XTI	1
-struct samsung_fixed_rate_clock s3c2412_common_frate_clks[] __initdata = {
+static struct samsung_fixed_rate_clock s3c2412_common_frate_clks[] __initdata = {
 	FRATE(XTI, "xti", NULL, 0, 0),
 	FRATE(0, "ext", NULL, 0, 0),
 };
@@ -296,6 +289,6 @@ void __init s3c2412_common_clk_init(struct device_node *np, unsigned long xti_f,
 
 static void __init s3c2412_clk_init(struct device_node *np)
 {
-	s3c2412_common_clk_init(np, 0, 0, 0);
+	s3c2412_common_clk_init(np, 0, 0, NULL);
 }
 CLK_OF_DECLARE(s3c2412_clk, "samsung,s3c2412-clock", s3c2412_clk_init);
diff --git a/drivers/clk/samsung/clk-s3c2443.c b/drivers/clk/samsung/clk-s3c2443.c
index d94b85a42356..c46e6d5bc9bc 100644
--- a/drivers/clk/samsung/clk-s3c2443.c
+++ b/drivers/clk/samsung/clk-s3c2443.c
@@ -41,11 +41,6 @@ enum supported_socs {
 	S3C2450,
 };
 
-/* list of PLLs to be registered */
-enum s3c2443_plls {
-	mpll, epll,
-};
-
 static void __iomem *reg_base;
 
 #ifdef CONFIG_PM_SLEEP
@@ -113,7 +108,7 @@ PNAME(msysclk_p) = { "mpllref", "mpll" };
 PNAME(armclk_p) = { "armdiv" , "hclk" };
 PNAME(i2s0_p) = { "div_i2s0", "ext_i2s", "epllref", "epllref" };
 
-struct samsung_mux_clock s3c2443_common_muxes[] __initdata = {
+static struct samsung_mux_clock s3c2443_common_muxes[] __initdata = {
 	MUX(0, "epllref", epllref_p, CLKSRC, 7, 2),
 	MUX(ESYSCLK, "esysclk", esysclk_p, CLKSRC, 6, 1),
 	MUX(0, "mpllref", mpllref_p, CLKSRC, 3, 1),
@@ -141,7 +136,7 @@ static struct clk_div_table mdivclk_d[] = {
 	{ /* sentinel */ },
 };
 
-struct samsung_div_clock s3c2443_common_dividers[] __initdata = {
+static struct samsung_div_clock s3c2443_common_dividers[] __initdata = {
 	DIV_T(0, "mdivclk", "xti", CLKDIV0, 6, 3, mdivclk_d),
 	DIV(0, "prediv", "msysclk", CLKDIV0, 4, 2),
 	DIV_T(HCLK, "hclk", "prediv", CLKDIV0, 0, 2, hclk_d),
@@ -154,7 +149,7 @@ struct samsung_div_clock s3c2443_common_dividers[] __initdata = {
 	DIV(0, "div_usbhost", "esysclk", CLKDIV1, 4, 2),
 };
 
-struct samsung_gate_clock s3c2443_common_gates[] __initdata = {
+static struct samsung_gate_clock s3c2443_common_gates[] __initdata = {
 	GATE(SCLK_HSMMC_EXT, "sclk_hsmmcext", "ext", SCLKCON, 13, 0, 0),
 	GATE(SCLK_HSMMC1, "sclk_hsmmc1", "div_hsmmc1", SCLKCON, 12, 0, 0),
 	GATE(SCLK_FIMD, "sclk_fimd", "div_fimd", SCLKCON, 10, 0, 0),
@@ -188,7 +183,7 @@ struct samsung_gate_clock s3c2443_common_gates[] __initdata = {
 	GATE(PCLK_UART0, "uart0", "pclk", PCLKCON, 0, 0, 0),
 };
 
-struct samsung_clock_alias s3c2443_common_aliases[] __initdata = {
+static struct samsung_clock_alias s3c2443_common_aliases[] __initdata = {
 	ALIAS(MSYSCLK, NULL, "msysclk"),
 	ALIAS(ARMCLK, NULL, "armclk"),
 	ALIAS(MPLL, NULL, "mpll"),
@@ -225,10 +220,8 @@ struct samsung_clock_alias s3c2443_common_aliases[] __initdata = {
 /* S3C2416 specific clocks */
 
 static struct samsung_pll_clock s3c2416_pll_clks[] __initdata = {
-	[mpll] = PLL(pll_6552_s3c2416, MPLL, "mpll", "mpllref",
-						LOCKCON0, MPLLCON, NULL),
-	[epll] = PLL(pll_6553, EPLL, "epll", "epllref",
-						LOCKCON1, EPLLCON, NULL),
+	PLL(pll_6552_s3c2416, MPLL, "mpll", "mpllref", LOCKCON0, MPLLCON, NULL),
+	PLL(pll_6553, EPLL, "epll", "epllref", LOCKCON1, EPLLCON, NULL),
 };
 
 PNAME(s3c2416_hsmmc0_p) = { "sclk_hsmmc0", "sclk_hsmmcext" };
@@ -245,19 +238,19 @@ static struct clk_div_table armdiv_s3c2416_d[] = {
 	{ /* sentinel */ },
 };
 
-struct samsung_div_clock s3c2416_dividers[] __initdata = {
+static struct samsung_div_clock s3c2416_dividers[] __initdata = {
 	DIV_T(ARMDIV, "armdiv", "msysclk", CLKDIV0, 9, 3, armdiv_s3c2416_d),
 	DIV(0, "div_hsspi0_mpll", "msysclk", CLKDIV2, 0, 4),
 	DIV(0, "div_hsmmc0", "esysclk", CLKDIV2, 6, 2),
 };
 
-struct samsung_mux_clock s3c2416_muxes[] __initdata = {
+static struct samsung_mux_clock s3c2416_muxes[] __initdata = {
 	MUX(MUX_HSMMC0, "mux_hsmmc0", s3c2416_hsmmc0_p, CLKSRC, 16, 1),
 	MUX(MUX_HSMMC1, "mux_hsmmc1", s3c2416_hsmmc1_p, CLKSRC, 17, 1),
 	MUX(MUX_HSSPI0, "mux_hsspi0", s3c2416_hsspi0_p, CLKSRC, 18, 1),
 };
 
-struct samsung_gate_clock s3c2416_gates[] __initdata = {
+static struct samsung_gate_clock s3c2416_gates[] __initdata = {
 	GATE(0, "hsspi0_mpll", "div_hsspi0_mpll", SCLKCON, 19, 0, 0),
 	GATE(0, "hsspi0_epll", "div_hsspi0_epll", SCLKCON, 14, 0, 0),
 	GATE(0, "sclk_hsmmc0", "div_hsmmc0", SCLKCON, 6, 0, 0),
@@ -267,7 +260,7 @@ struct samsung_gate_clock s3c2416_gates[] __initdata = {
 	GATE(PCLK_PCM, "pcm", "pclk", PCLKCON, 19, 0, 0),
 };
 
-struct samsung_clock_alias s3c2416_aliases[] __initdata = {
+static struct samsung_clock_alias s3c2416_aliases[] __initdata = {
 	ALIAS(HCLK_HSMMC0, "s3c-sdhci.0", "hsmmc"),
 	ALIAS(HCLK_HSMMC0, "s3c-sdhci.0", "mmc_busclk.0"),
 	ALIAS(MUX_HSMMC0, "s3c-sdhci.0", "mmc_busclk.2"),
@@ -279,10 +272,8 @@ struct samsung_clock_alias s3c2416_aliases[] __initdata = {
 /* S3C2443 specific clocks */
 
 static struct samsung_pll_clock s3c2443_pll_clks[] __initdata = {
-	[mpll] = PLL(pll_3000, MPLL, "mpll", "mpllref",
-						LOCKCON0, MPLLCON, NULL),
-	[epll] = PLL(pll_2126, EPLL, "epll", "epllref",
-						LOCKCON1, EPLLCON, NULL),
+	PLL(pll_3000, MPLL, "mpll", "mpllref", LOCKCON0, MPLLCON, NULL),
+	PLL(pll_2126, EPLL, "epll", "epllref", LOCKCON1, EPLLCON, NULL),
 };
 
 static struct clk_div_table armdiv_s3c2443_d[] = {
@@ -297,12 +288,12 @@ static struct clk_div_table armdiv_s3c2443_d[] = {
 	{ /* sentinel */ },
 };
 
-struct samsung_div_clock s3c2443_dividers[] __initdata = {
+static struct samsung_div_clock s3c2443_dividers[] __initdata = {
 	DIV_T(ARMDIV, "armdiv", "msysclk", CLKDIV0, 9, 4, armdiv_s3c2443_d),
 	DIV(0, "div_cam", "esysclk", CLKDIV1, 26, 4),
 };
 
-struct samsung_gate_clock s3c2443_gates[] __initdata = {
+static struct samsung_gate_clock s3c2443_gates[] __initdata = {
 	GATE(SCLK_HSSPI0, "sclk_hsspi0", "div_hsspi0_epll", SCLKCON, 14, 0, 0),
 	GATE(SCLK_CAM, "sclk_cam", "div_cam", SCLKCON, 11, 0, 0),
 	GATE(HCLK_CFC, "cfc", "hclk", HCLKCON, 17, CLK_IGNORE_UNUSED, 0),
@@ -311,7 +302,7 @@ struct samsung_gate_clock s3c2443_gates[] __initdata = {
 	GATE(PCLK_SDI, "sdi", "pclk", PCLKCON, 5, 0, 0),
 };
 
-struct samsung_clock_alias s3c2443_aliases[] __initdata = {
+static struct samsung_clock_alias s3c2443_aliases[] __initdata = {
 	ALIAS(SCLK_HSSPI0, "s3c2443-spi.0", "spi_busclk2"),
 	ALIAS(SCLK_HSMMC1, "s3c-sdhci.1", "mmc_busclk.2"),
 	ALIAS(SCLK_CAM, NULL, "camif-upll"),
@@ -327,20 +318,20 @@ PNAME(s3c2450_cam_p) = { "div_cam", "hclk" };
 PNAME(s3c2450_hsspi1_p) = { "hsspi1_epll", "hsspi1_mpll" };
 PNAME(i2s1_p) = { "div_i2s1", "ext_i2s", "epllref", "epllref" };
 
-struct samsung_div_clock s3c2450_dividers[] __initdata = {
+static struct samsung_div_clock s3c2450_dividers[] __initdata = {
 	DIV(0, "div_cam", "esysclk", CLKDIV1, 26, 4),
 	DIV(0, "div_hsspi1_epll", "esysclk", CLKDIV2, 24, 2),
 	DIV(0, "div_hsspi1_mpll", "msysclk", CLKDIV2, 16, 4),
 	DIV(0, "div_i2s1", "esysclk", CLKDIV2, 12, 4),
 };
 
-struct samsung_mux_clock s3c2450_muxes[] __initdata = {
+static struct samsung_mux_clock s3c2450_muxes[] __initdata = {
 	MUX(0, "mux_cam", s3c2450_cam_p, CLKSRC, 20, 1),
 	MUX(MUX_HSSPI1, "mux_hsspi1", s3c2450_hsspi1_p, CLKSRC, 19, 1),
 	MUX(0, "mux_i2s1", i2s1_p, CLKSRC, 12, 2),
 };
 
-struct samsung_gate_clock s3c2450_gates[] __initdata = {
+static struct samsung_gate_clock s3c2450_gates[] __initdata = {
 	GATE(SCLK_I2S1, "sclk_i2s1", "div_i2s1", SCLKCON, 5, 0, 0),
 	GATE(HCLK_CFC, "cfc", "hclk", HCLKCON, 17, 0, 0),
 	GATE(HCLK_CAM, "cam", "hclk", HCLKCON, 8, 0, 0),
@@ -351,7 +342,7 @@ struct samsung_gate_clock s3c2450_gates[] __initdata = {
 	GATE(PCLK_SPI1, "spi1", "pclk", PCLKCON, 14, 0, 0),
 };
 
-struct samsung_clock_alias s3c2450_aliases[] __initdata = {
+static struct samsung_clock_alias s3c2450_aliases[] __initdata = {
 	ALIAS(PCLK_SPI1, "s3c2443-spi.1", "spi"),
 	ALIAS(PCLK_SPI1, "s3c2443-spi.1", "spi_busclk0"),
 	ALIAS(MUX_HSSPI1, "s3c2443-spi.1", "spi_busclk2"),
@@ -374,7 +365,7 @@ static struct notifier_block s3c2443_restart_handler = {
  * fixed rate clocks generated outside the soc
  * Only necessary until the devicetree-move is complete
  */
-struct samsung_fixed_rate_clock s3c2443_common_frate_clks[] __initdata = {
+static struct samsung_fixed_rate_clock s3c2443_common_frate_clks[] __initdata = {
 	FRATE(0, "xti", NULL, 0, 0),
 	FRATE(0, "ext", NULL, 0, 0),
 	FRATE(0, "ext_i2s", NULL, 0, 0),
@@ -470,18 +461,18 @@ void __init s3c2443_common_clk_init(struct device_node *np, unsigned long xti_f,
 
 static void __init s3c2416_clk_init(struct device_node *np)
 {
-	s3c2443_common_clk_init(np, 0, S3C2416, 0);
+	s3c2443_common_clk_init(np, 0, S3C2416, NULL);
 }
 CLK_OF_DECLARE(s3c2416_clk, "samsung,s3c2416-clock", s3c2416_clk_init);
 
 static void __init s3c2443_clk_init(struct device_node *np)
 {
-	s3c2443_common_clk_init(np, 0, S3C2443, 0);
+	s3c2443_common_clk_init(np, 0, S3C2443, NULL);
 }
 CLK_OF_DECLARE(s3c2443_clk, "samsung,s3c2443-clock", s3c2443_clk_init);
 
 static void __init s3c2450_clk_init(struct device_node *np)
 {
-	s3c2443_common_clk_init(np, 0, S3C2450, 0);
+	s3c2443_common_clk_init(np, 0, S3C2450, NULL);
 }
 CLK_OF_DECLARE(s3c2450_clk, "samsung,s3c2450-clock", s3c2450_clk_init);
diff --git a/drivers/clk/samsung/clk-s3c64xx.c b/drivers/clk/samsung/clk-s3c64xx.c
index 7306867a0ab8..6db01cf5ab83 100644
--- a/drivers/clk/samsung/clk-s3c64xx.c
+++ b/drivers/clk/samsung/clk-s3c64xx.c
@@ -56,11 +56,6 @@
 #define GATE_ON(_id, cname, pname, o, b) \
 		GATE(_id, cname, pname, o, b, CLK_IGNORE_UNUSED, 0)
 
-/* list of PLLs to be registered */
-enum s3c64xx_plls {
-	apll, mpll, epll,
-};
-
 static void __iomem *reg_base;
 static bool is_s3c6400;
 
@@ -364,12 +359,12 @@ GATE_CLOCKS(s3c6410_gate_clks) __initdata = {
 
 /* List of PLL clocks. */
 static struct samsung_pll_clock s3c64xx_pll_clks[] __initdata = {
-	[apll] = PLL(pll_6552, FOUT_APLL, "fout_apll", "fin_pll",
-						APLL_LOCK, APLL_CON, NULL),
-	[mpll] = PLL(pll_6552, FOUT_MPLL, "fout_mpll", "fin_pll",
-						MPLL_LOCK, MPLL_CON, NULL),
-	[epll] = PLL(pll_6553, FOUT_EPLL, "fout_epll", "fin_pll",
-						EPLL_LOCK, EPLL_CON0, NULL),
+	PLL(pll_6552, FOUT_APLL, "fout_apll", "fin_pll",
+					APLL_LOCK, APLL_CON, NULL),
+	PLL(pll_6552, FOUT_MPLL, "fout_mpll", "fin_pll",
+					MPLL_LOCK, MPLL_CON, NULL),
+	PLL(pll_6553, FOUT_EPLL, "fout_epll", "fin_pll",
+					EPLL_LOCK, EPLL_CON0, NULL),
 };
 
 /* Aliases for common s3c64xx clocks. */
diff --git a/drivers/clk/socfpga/Makefile b/drivers/clk/socfpga/Makefile
index 9146c20fe21f..ce5aa7802eb8 100644
--- a/drivers/clk/socfpga/Makefile
+++ b/drivers/clk/socfpga/Makefile
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y += clk.o
-obj-y += clk-gate.o
-obj-y += clk-pll.o
-obj-y += clk-periph.o
-obj-y += clk-pll-a10.o clk-periph-a10.o clk-gate-a10.o
+obj-$(CONFIG_ARCH_SOCFPGA) += clk.o clk-gate.o clk-pll.o clk-periph.o
+obj-$(CONFIG_ARCH_SOCFPGA) += clk-pll-a10.o clk-periph-a10.o clk-gate-a10.o
+obj-$(CONFIG_ARCH_STRATIX10) += clk-s10.o
+obj-$(CONFIG_ARCH_STRATIX10) += clk-pll-s10.o clk-periph-s10.o clk-gate-s10.o
diff --git a/drivers/clk/socfpga/clk-gate-s10.c b/drivers/clk/socfpga/clk-gate-s10.c
new file mode 100644
index 000000000000..eee2d48ab656
--- /dev/null
+++ b/drivers/clk/socfpga/clk-gate-s10.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier:	GPL-2.0
+/*
+ * Copyright (C) 2017, Intel Corporation
+ */
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "stratix10-clk.h"
+#include "clk.h"
+
+#define SOCFPGA_CS_PDBG_CLK	"cs_pdbg_clk"
+#define to_socfpga_gate_clk(p) container_of(p, struct socfpga_gate_clk, hw.hw)
+
+static unsigned long socfpga_gate_clk_recalc_rate(struct clk_hw *hwclk,
+						  unsigned long parent_rate)
+{
+	struct socfpga_gate_clk *socfpgaclk = to_socfpga_gate_clk(hwclk);
+	u32 div = 1, val;
+
+	if (socfpgaclk->fixed_div) {
+		div = socfpgaclk->fixed_div;
+	} else if (socfpgaclk->div_reg) {
+		val = readl(socfpgaclk->div_reg) >> socfpgaclk->shift;
+		val &= GENMASK(socfpgaclk->width - 1, 0);
+		div = (1 << val);
+	}
+	return parent_rate / div;
+}
+
+static unsigned long socfpga_dbg_clk_recalc_rate(struct clk_hw *hwclk,
+						  unsigned long parent_rate)
+{
+	struct socfpga_gate_clk *socfpgaclk = to_socfpga_gate_clk(hwclk);
+	u32 div = 1, val;
+
+	val = readl(socfpgaclk->div_reg) >> socfpgaclk->shift;
+	val &= GENMASK(socfpgaclk->width - 1, 0);
+	div = (1 << val);
+	div = div ? 4 : 1;
+
+	return parent_rate / div;
+}
+
+static u8 socfpga_gate_get_parent(struct clk_hw *hwclk)
+{
+	struct socfpga_gate_clk *socfpgaclk = to_socfpga_gate_clk(hwclk);
+	u32 mask;
+	u8 parent = 0;
+
+	if (socfpgaclk->bypass_reg) {
+		mask = (0x1 << socfpgaclk->bypass_shift);
+		parent = ((readl(socfpgaclk->bypass_reg) & mask) >>
+			  socfpgaclk->bypass_shift);
+	}
+	return parent;
+}
+
+static struct clk_ops gateclk_ops = {
+	.recalc_rate = socfpga_gate_clk_recalc_rate,
+	.get_parent = socfpga_gate_get_parent,
+};
+
+static const struct clk_ops dbgclk_ops = {
+	.recalc_rate = socfpga_dbg_clk_recalc_rate,
+	.get_parent = socfpga_gate_get_parent,
+};
+
+struct clk *s10_register_gate(const char *name, const char *parent_name,
+			      const char * const *parent_names,
+			      u8 num_parents, unsigned long flags,
+			      void __iomem *regbase, unsigned long gate_reg,
+			      unsigned long gate_idx, unsigned long div_reg,
+			      unsigned long div_offset, u8 div_width,
+			      unsigned long bypass_reg, u8 bypass_shift,
+			      u8 fixed_div)
+{
+	struct clk *clk;
+	struct socfpga_gate_clk *socfpga_clk;
+	struct clk_init_data init;
+
+	socfpga_clk = kzalloc(sizeof(*socfpga_clk), GFP_KERNEL);
+	if (!socfpga_clk)
+		return NULL;
+
+	socfpga_clk->hw.reg = regbase + gate_reg;
+	socfpga_clk->hw.bit_idx = gate_idx;
+
+	gateclk_ops.enable = clk_gate_ops.enable;
+	gateclk_ops.disable = clk_gate_ops.disable;
+
+	socfpga_clk->fixed_div = fixed_div;
+
+	if (div_reg)
+		socfpga_clk->div_reg = regbase + div_reg;
+	else
+		socfpga_clk->div_reg = NULL;
+
+	socfpga_clk->width = div_width;
+	socfpga_clk->shift = div_offset;
+
+	if (bypass_reg)
+		socfpga_clk->bypass_reg = regbase + bypass_reg;
+	else
+		socfpga_clk->bypass_reg = NULL;
+	socfpga_clk->bypass_shift = bypass_shift;
+
+	if (streq(name, "cs_pdbg_clk"))
+		init.ops = &dbgclk_ops;
+	else
+		init.ops = &gateclk_ops;
+
+	init.name = name;
+	init.flags = flags;
+
+	init.num_parents = num_parents;
+	init.parent_names = parent_names ? parent_names : &parent_name;
+	socfpga_clk->hw.hw.init = &init;
+
+	clk = clk_register(NULL, &socfpga_clk->hw.hw);
+	if (WARN_ON(IS_ERR(clk))) {
+		kfree(socfpga_clk);
+		return NULL;
+	}
+
+	return clk;
+}
diff --git a/drivers/clk/socfpga/clk-periph-s10.c b/drivers/clk/socfpga/clk-periph-s10.c
new file mode 100644
index 000000000000..568f59b58ddf
--- /dev/null
+++ b/drivers/clk/socfpga/clk-periph-s10.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier:	GPL-2.0
+/*
+ * Copyright (C) 2017, Intel Corporation
+ */
+#include <linux/slab.h>
+#include <linux/clk-provider.h>
+
+#include "stratix10-clk.h"
+#include "clk.h"
+
+#define CLK_MGR_FREE_SHIFT		16
+#define CLK_MGR_FREE_MASK		0x7
+#define SWCTRLBTCLKSEN_SHIFT		8
+
+#define to_periph_clk(p) container_of(p, struct socfpga_periph_clk, hw.hw)
+
+static unsigned long clk_peri_c_clk_recalc_rate(struct clk_hw *hwclk,
+					     unsigned long parent_rate)
+{
+	struct socfpga_periph_clk *socfpgaclk = to_periph_clk(hwclk);
+	unsigned long div = 1;
+	u32 val;
+
+	val = readl(socfpgaclk->hw.reg);
+	val &= GENMASK(SWCTRLBTCLKSEN_SHIFT - 1, 0);
+	parent_rate /= val;
+
+	return parent_rate / div;
+}
+
+static unsigned long clk_peri_cnt_clk_recalc_rate(struct clk_hw *hwclk,
+					     unsigned long parent_rate)
+{
+	struct socfpga_periph_clk *socfpgaclk = to_periph_clk(hwclk);
+	unsigned long div = 1;
+
+	if (socfpgaclk->fixed_div) {
+		div = socfpgaclk->fixed_div;
+	} else {
+		if (!socfpgaclk->bypass_reg)
+			div = ((readl(socfpgaclk->hw.reg) & 0x7ff) + 1);
+	}
+
+	return parent_rate / div;
+}
+
+static u8 clk_periclk_get_parent(struct clk_hw *hwclk)
+{
+	struct socfpga_periph_clk *socfpgaclk = to_periph_clk(hwclk);
+	u32 clk_src, mask;
+	u8 parent;
+
+	if (socfpgaclk->bypass_reg) {
+		mask = (0x1 << socfpgaclk->bypass_shift);
+		parent = ((readl(socfpgaclk->bypass_reg) & mask) >>
+			   socfpgaclk->bypass_shift);
+	} else {
+		clk_src = readl(socfpgaclk->hw.reg);
+		parent = (clk_src >> CLK_MGR_FREE_SHIFT) &
+			CLK_MGR_FREE_MASK;
+	}
+	return parent;
+}
+
+static const struct clk_ops peri_c_clk_ops = {
+	.recalc_rate = clk_peri_c_clk_recalc_rate,
+	.get_parent = clk_periclk_get_parent,
+};
+
+static const struct clk_ops peri_cnt_clk_ops = {
+	.recalc_rate = clk_peri_cnt_clk_recalc_rate,
+	.get_parent = clk_periclk_get_parent,
+};
+
+struct clk *s10_register_periph(const char *name, const char *parent_name,
+				const char * const *parent_names,
+				u8 num_parents, unsigned long flags,
+				void __iomem *reg, unsigned long offset)
+{
+	struct clk *clk;
+	struct socfpga_periph_clk *periph_clk;
+	struct clk_init_data init;
+
+	periph_clk = kzalloc(sizeof(*periph_clk), GFP_KERNEL);
+	if (WARN_ON(!periph_clk))
+		return NULL;
+
+	periph_clk->hw.reg = reg + offset;
+
+	init.name = name;
+	init.ops = &peri_c_clk_ops;
+	init.flags = flags;
+
+	init.num_parents = num_parents;
+	init.parent_names = parent_names ? parent_names : &parent_name;
+
+	periph_clk->hw.hw.init = &init;
+
+	clk = clk_register(NULL, &periph_clk->hw.hw);
+	if (WARN_ON(IS_ERR(clk))) {
+		kfree(periph_clk);
+		return NULL;
+	}
+	return clk;
+}
+
+struct clk *s10_register_cnt_periph(const char *name, const char *parent_name,
+				    const char * const *parent_names,
+				    u8 num_parents, unsigned long flags,
+				    void __iomem *regbase, unsigned long offset,
+				    u8 fixed_divider, unsigned long bypass_reg,
+				    unsigned long bypass_shift)
+{
+	struct clk *clk;
+	struct socfpga_periph_clk *periph_clk;
+	struct clk_init_data init;
+
+	periph_clk = kzalloc(sizeof(*periph_clk), GFP_KERNEL);
+	if (WARN_ON(!periph_clk))
+		return NULL;
+
+	if (offset)
+		periph_clk->hw.reg = regbase + offset;
+	else
+		periph_clk->hw.reg = NULL;
+
+	if (bypass_reg)
+		periph_clk->bypass_reg = regbase + bypass_reg;
+	else
+		periph_clk->bypass_reg = NULL;
+	periph_clk->bypass_shift = bypass_shift;
+	periph_clk->fixed_div = fixed_divider;
+
+	init.name = name;
+	init.ops = &peri_cnt_clk_ops;
+	init.flags = flags;
+
+	init.num_parents = num_parents;
+	init.parent_names = parent_names ? parent_names : &parent_name;
+
+	periph_clk->hw.hw.init = &init;
+
+	clk = clk_register(NULL, &periph_clk->hw.hw);
+	if (WARN_ON(IS_ERR(clk))) {
+		kfree(periph_clk);
+		return NULL;
+	}
+	return clk;
+}
diff --git a/drivers/clk/socfpga/clk-pll-s10.c b/drivers/clk/socfpga/clk-pll-s10.c
new file mode 100644
index 000000000000..2d5d8b43727e
--- /dev/null
+++ b/drivers/clk/socfpga/clk-pll-s10.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier:	GPL-2.0
+/*
+ * Copyright (C) 2017, Intel Corporation
+ */
+#include <linux/slab.h>
+#include <linux/clk-provider.h>
+
+#include "stratix10-clk.h"
+#include "clk.h"
+
+/* Clock Manager offsets */
+#define CLK_MGR_PLL_CLK_SRC_SHIFT	16
+#define CLK_MGR_PLL_CLK_SRC_MASK	0x3
+
+/* PLL Clock enable bits */
+#define SOCFPGA_PLL_POWER		0
+#define SOCFPGA_PLL_RESET_MASK		0x2
+#define SOCFPGA_PLL_REFDIV_MASK		0x00003F00
+#define SOCFPGA_PLL_REFDIV_SHIFT	8
+#define SOCFPGA_PLL_MDIV_MASK		0xFF000000
+#define SOCFPGA_PLL_MDIV_SHIFT		24
+#define SWCTRLBTCLKSEL_MASK		0x200
+#define SWCTRLBTCLKSEL_SHIFT		9
+
+#define SOCFPGA_BOOT_CLK		"boot_clk"
+
+#define to_socfpga_clk(p) container_of(p, struct socfpga_pll, hw.hw)
+
+static unsigned long clk_pll_recalc_rate(struct clk_hw *hwclk,
+					 unsigned long parent_rate)
+{
+	struct socfpga_pll *socfpgaclk = to_socfpga_clk(hwclk);
+	unsigned long mdiv;
+	unsigned long refdiv;
+	unsigned long reg;
+	unsigned long long vco_freq;
+
+	/* read VCO1 reg for numerator and denominator */
+	reg = readl(socfpgaclk->hw.reg);
+	refdiv = (reg & SOCFPGA_PLL_REFDIV_MASK) >> SOCFPGA_PLL_REFDIV_SHIFT;
+	vco_freq = (unsigned long long)parent_rate / refdiv;
+
+	/* Read mdiv and fdiv from the fdbck register */
+	reg = readl(socfpgaclk->hw.reg + 0x4);
+	mdiv = (reg & SOCFPGA_PLL_MDIV_MASK) >> SOCFPGA_PLL_MDIV_SHIFT;
+	vco_freq = (unsigned long long)parent_rate * (mdiv + 6);
+
+	return (unsigned long)vco_freq;
+}
+
+static unsigned long clk_boot_clk_recalc_rate(struct clk_hw *hwclk,
+					 unsigned long parent_rate)
+{
+	struct socfpga_pll *socfpgaclk = to_socfpga_clk(hwclk);
+	u32 div = 1;
+
+	div = ((readl(socfpgaclk->hw.reg) &
+		SWCTRLBTCLKSEL_MASK) >>
+		SWCTRLBTCLKSEL_SHIFT);
+	div += 1;
+	return parent_rate /= div;
+}
+
+
+static u8 clk_pll_get_parent(struct clk_hw *hwclk)
+{
+	struct socfpga_pll *socfpgaclk = to_socfpga_clk(hwclk);
+	u32 pll_src;
+
+	pll_src = readl(socfpgaclk->hw.reg);
+	return (pll_src >> CLK_MGR_PLL_CLK_SRC_SHIFT) &
+		CLK_MGR_PLL_CLK_SRC_MASK;
+}
+
+static u8 clk_boot_get_parent(struct clk_hw *hwclk)
+{
+	struct socfpga_pll *socfpgaclk = to_socfpga_clk(hwclk);
+	u32 pll_src;
+
+	pll_src = readl(socfpgaclk->hw.reg);
+	return (pll_src >> SWCTRLBTCLKSEL_SHIFT) &
+		SWCTRLBTCLKSEL_MASK;
+}
+
+static int clk_pll_prepare(struct clk_hw *hwclk)
+{
+	struct socfpga_pll *socfpgaclk = to_socfpga_clk(hwclk);
+	u32 reg;
+
+	/* Bring PLL out of reset */
+	reg = readl(socfpgaclk->hw.reg);
+	reg |= SOCFPGA_PLL_RESET_MASK;
+	writel(reg, socfpgaclk->hw.reg);
+
+	return 0;
+}
+
+static struct clk_ops clk_pll_ops = {
+	.recalc_rate = clk_pll_recalc_rate,
+	.get_parent = clk_pll_get_parent,
+	.prepare = clk_pll_prepare,
+};
+
+static struct clk_ops clk_boot_ops = {
+	.recalc_rate = clk_boot_clk_recalc_rate,
+	.get_parent = clk_boot_get_parent,
+	.prepare = clk_pll_prepare,
+};
+
+struct clk *s10_register_pll(const char *name, const char * const *parent_names,
+				    u8 num_parents, unsigned long flags,
+				    void __iomem *reg, unsigned long offset)
+{
+	struct clk *clk;
+	struct socfpga_pll *pll_clk;
+	struct clk_init_data init;
+
+	pll_clk = kzalloc(sizeof(*pll_clk), GFP_KERNEL);
+	if (WARN_ON(!pll_clk))
+		return NULL;
+
+	pll_clk->hw.reg = reg + offset;
+
+	if (streq(name, SOCFPGA_BOOT_CLK))
+		init.ops = &clk_boot_ops;
+	else
+		init.ops = &clk_pll_ops;
+
+	init.name = name;
+	init.flags = flags;
+
+	init.num_parents = num_parents;
+	init.parent_names = parent_names;
+	pll_clk->hw.hw.init = &init;
+
+	pll_clk->hw.bit_idx = SOCFPGA_PLL_POWER;
+	clk_pll_ops.enable = clk_gate_ops.enable;
+	clk_pll_ops.disable = clk_gate_ops.disable;
+
+	clk = clk_register(NULL, &pll_clk->hw.hw);
+	if (WARN_ON(IS_ERR(clk))) {
+		kfree(pll_clk);
+		return NULL;
+	}
+	return clk;
+}
diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c
new file mode 100644
index 000000000000..3a11c382a663
--- /dev/null
+++ b/drivers/clk/socfpga/clk-s10.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier:	GPL-2.0
+/*
+ * Copyright (C) 2017, Intel Corporation
+ */
+#include <linux/slab.h>
+#include <linux/clk-provider.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include <dt-bindings/clock/stratix10-clock.h>
+
+#include "stratix10-clk.h"
+
+static const char * const pll_mux[] = { "osc1", "cb_intosc_hs_div2_clk",
+					"f2s_free_clk",};
+static const char * const cntr_mux[] = { "main_pll", "periph_pll",
+					 "osc1", "cb_intosc_hs_div2_clk",
+					 "f2s_free_clk"};
+static const char * const boot_mux[] = { "osc1", "cb_intosc_hs_div2_clk",};
+
+static const char * const noc_free_mux[] = {"main_noc_base_clk",
+					    "peri_noc_base_clk",
+					    "osc1", "cb_intosc_hs_div2_clk",
+					    "f2s_free_clk"};
+
+static const char * const emaca_free_mux[] = {"peri_emaca_clk", "boot_clk"};
+static const char * const emacb_free_mux[] = {"peri_emacb_clk", "boot_clk"};
+static const char * const emac_ptp_free_mux[] = {"peri_emac_ptp_clk", "boot_clk"};
+static const char * const gpio_db_free_mux[] = {"peri_gpio_db_clk", "boot_clk"};
+static const char * const sdmmc_free_mux[] = {"peri_sdmmc_clk", "boot_clk"};
+static const char * const s2f_usr1_free_mux[] = {"peri_s2f_usr1_clk", "boot_clk"};
+static const char * const psi_ref_free_mux[] = {"peri_psi_ref_clk", "boot_clk"};
+static const char * const mpu_mux[] = { "mpu_free_clk", "boot_clk",};
+
+static const char * const s2f_usr0_mux[] = {"f2s_free_clk", "boot_clk"};
+static const char * const emac_mux[] = {"emaca_free_clk", "emacb_free_clk"};
+static const char * const noc_mux[] = {"noc_free_clk", "boot_clk"};
+
+/* clocks in AO (always on) controller */
+static const struct stratix10_pll_clock s10_pll_clks[] = {
+	{ STRATIX10_BOOT_CLK, "boot_clk", boot_mux, ARRAY_SIZE(boot_mux), 0,
+	  0x0},
+	{ STRATIX10_MAIN_PLL_CLK, "main_pll", pll_mux, ARRAY_SIZE(pll_mux),
+	  0, 0x74},
+	{ STRATIX10_PERIPH_PLL_CLK, "periph_pll", pll_mux, ARRAY_SIZE(pll_mux),
+	  0, 0xe4},
+};
+
+static const struct stratix10_perip_c_clock s10_main_perip_c_clks[] = {
+	{ STRATIX10_MAIN_MPU_BASE_CLK, "main_mpu_base_clk", "main_pll", NULL, 1, 0, 0x84},
+	{ STRATIX10_MAIN_NOC_BASE_CLK, "main_noc_base_clk", "main_pll", NULL, 1, 0, 0x88},
+	{ STRATIX10_PERI_MPU_BASE_CLK, "peri_mpu_base_clk", "periph_pll", NULL, 1, 0,
+	  0xF4},
+	{ STRATIX10_PERI_NOC_BASE_CLK, "peri_noc_base_clk", "periph_pll", NULL, 1, 0,
+	  0xF8},
+};
+
+static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = {
+	{ STRATIX10_MPU_FREE_CLK, "mpu_free_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	   0, 0x48, 0, 0, 0},
+	{ STRATIX10_NOC_FREE_CLK, "noc_free_clk", NULL, noc_free_mux, ARRAY_SIZE(noc_free_mux),
+	  0, 0x4C, 0, 0, 0},
+	{ STRATIX10_MAIN_EMACA_CLK, "main_emaca_clk", "main_noc_base_clk", NULL, 1, 0,
+	  0x50, 0, 0, 0},
+	{ STRATIX10_MAIN_EMACB_CLK, "main_emacb_clk", "main_noc_base_clk", NULL, 1, 0,
+	  0x54, 0, 0, 0},
+	{ STRATIX10_MAIN_EMAC_PTP_CLK, "main_emac_ptp_clk", "main_noc_base_clk", NULL, 1, 0,
+	  0x58, 0, 0, 0},
+	{ STRATIX10_MAIN_GPIO_DB_CLK, "main_gpio_db_clk", "main_noc_base_clk", NULL, 1, 0,
+	  0x5C, 0, 0, 0},
+	{ STRATIX10_MAIN_SDMMC_CLK, "main_sdmmc_clk", "main_noc_base_clk", NULL, 1, 0,
+	  0x60, 0, 0, 0},
+	{ STRATIX10_MAIN_S2F_USR0_CLK, "main_s2f_usr0_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	  0, 0x64, 0, 0, 0},
+	{ STRATIX10_MAIN_S2F_USR1_CLK, "main_s2f_usr1_clk", "main_noc_base_clk", NULL, 1, 0,
+	  0x68, 0, 0, 0},
+	{ STRATIX10_MAIN_PSI_REF_CLK, "main_psi_ref_clk", "main_noc_base_clk", NULL, 1, 0,
+	  0x6C, 0, 0, 0},
+	{ STRATIX10_PERI_EMACA_CLK, "peri_emaca_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	  0, 0xBC, 0, 0, 0},
+	{ STRATIX10_PERI_EMACB_CLK, "peri_emacb_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	  0, 0xC0, 0, 0, 0},
+	{ STRATIX10_PERI_EMAC_PTP_CLK, "peri_emac_ptp_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	  0, 0xC4, 0, 0, 0},
+	{ STRATIX10_PERI_GPIO_DB_CLK, "peri_gpio_db_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	  0, 0xC8, 0, 0, 0},
+	{ STRATIX10_PERI_SDMMC_CLK, "peri_sdmmc_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	  0, 0xCC, 0, 0, 0},
+	{ STRATIX10_PERI_S2F_USR0_CLK, "peri_s2f_usr0_clk", "peri_noc_base_clk", NULL, 1, 0,
+	  0xD0, 0, 0, 0},
+	{ STRATIX10_PERI_S2F_USR1_CLK, "peri_s2f_usr1_clk", NULL, cntr_mux, ARRAY_SIZE(cntr_mux),
+	  0, 0xD4, 0, 0, 0},
+	{ STRATIX10_PERI_PSI_REF_CLK, "peri_psi_ref_clk", "peri_noc_base_clk", NULL, 1, 0,
+	  0xD8, 0, 0, 0},
+	{ STRATIX10_L4_SYS_FREE_CLK, "l4_sys_free_clk", "noc_free_clk", NULL, 1, 0,
+	  0, 4, 0, 0},
+	{ STRATIX10_NOC_CLK, "noc_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux),
+	  0, 0, 0, 0x3C, 1},
+	{ STRATIX10_EMAC_A_FREE_CLK, "emaca_free_clk", NULL, emaca_free_mux, ARRAY_SIZE(emaca_free_mux),
+	  0, 0, 4, 0xB0, 0},
+	{ STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux),
+	  0, 0, 4, 0xB0, 1},
+	{ STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux,
+	  ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2},
+	{ STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux,
+	  ARRAY_SIZE(gpio_db_free_mux), 0, 0, 0, 0xB0, 3},
+	{ STRATIX10_SDMMC_FREE_CLK, "sdmmc_free_clk", NULL, sdmmc_free_mux,
+	  ARRAY_SIZE(sdmmc_free_mux), 0, 0, 0, 0xB0, 4},
+	{ STRATIX10_S2F_USER1_FREE_CLK, "s2f_user1_free_clk", NULL, s2f_usr1_free_mux,
+	  ARRAY_SIZE(s2f_usr1_free_mux), 0, 0, 0, 0xB0, 5},
+	{ STRATIX10_PSI_REF_FREE_CLK, "psi_ref_free_clk", NULL, psi_ref_free_mux,
+	  ARRAY_SIZE(psi_ref_free_mux), 0, 0, 0, 0xB0, 6},
+};
+
+static const struct stratix10_gate_clock s10_gate_clks[] = {
+	{ STRATIX10_MPU_CLK, "mpu_clk", NULL, mpu_mux, ARRAY_SIZE(mpu_mux), 0, 0x30,
+	  0, 0, 0, 0, 0x3C, 0, 0},
+	{ STRATIX10_MPU_PERIPH_CLK, "mpu_periph_clk", "mpu_clk", NULL, 1, 0, 0x30,
+	  0, 0, 0, 0, 0, 0, 4},
+	{ STRATIX10_MPU_L2RAM_CLK, "mpu_l2ram_clk", "mpu_clk", NULL, 1, 0, 0x30,
+	  0, 0, 0, 0, 0, 0, 2},
+	{ STRATIX10_L4_MAIN_CLK, "l4_main_clk", "noc_clk", NULL, 1, 0, 0x30,
+	  1, 0x70, 0, 2, 0, 0, 0},
+	{ STRATIX10_L4_MP_CLK, "l4_mp_clk", "noc_clk", NULL, 1, 0, 0x30,
+	  2, 0x70, 8, 2, 0, 0, 0},
+	{ STRATIX10_L4_SP_CLK, "l4_sp_clk", "noc_clk", NULL, 1, CLK_IS_CRITICAL, 0x30,
+	  3, 0x70, 16, 2, 0, 0, 0},
+	{ STRATIX10_CS_AT_CLK, "cs_at_clk", "noc_clk", NULL, 1, 0, 0x30,
+	  4, 0x70, 24, 2, 0, 0, 0},
+	{ STRATIX10_CS_TRACE_CLK, "cs_trace_clk", "noc_clk", NULL, 1, 0, 0x30,
+	  4, 0x70, 26, 2, 0, 0, 0},
+	{ STRATIX10_CS_PDBG_CLK, "cs_pdbg_clk", "cs_at_clk", NULL, 1, 0, 0x30,
+	  4, 0x70, 28, 1, 0, 0, 0},
+	{ STRATIX10_CS_TIMER_CLK, "cs_timer_clk", "noc_clk", NULL, 1, 0, 0x30,
+	  5, 0, 0, 0, 0, 0, 0},
+	{ STRATIX10_S2F_USER0_CLK, "s2f_user0_clk", NULL, s2f_usr0_mux, ARRAY_SIZE(s2f_usr0_mux), 0, 0x30,
+	  6, 0, 0, 0, 0, 0, 0},
+	{ STRATIX10_EMAC0_CLK, "emac0_clk", NULL, emac_mux, ARRAY_SIZE(emac_mux), 0, 0xA4,
+	  0, 0, 0, 0, 0xDC, 26, 0},
+	{ STRATIX10_EMAC1_CLK, "emac1_clk", NULL, emac_mux, ARRAY_SIZE(emac_mux), 0, 0xA4,
+	  1, 0, 0, 0, 0xDC, 27, 0},
+	{ STRATIX10_EMAC2_CLK, "emac2_clk", NULL, emac_mux, ARRAY_SIZE(emac_mux), 0, 0xA4,
+	  2, 0, 0, 0, 0xDC, 28, 0},
+	{ STRATIX10_EMAC_PTP_CLK, "emac_ptp_clk", "emac_ptp_free_clk", NULL, 1, 0, 0xA4,
+	  3, 0, 0, 0, 0, 0, 0},
+	{ STRATIX10_GPIO_DB_CLK, "gpio_db_clk", "gpio_db_free_clk", NULL, 1, 0, 0xA4,
+	  4, 0xE0, 0, 16, 0, 0, 0},
+	{ STRATIX10_SDMMC_CLK, "sdmmc_clk", "sdmmc_free_clk", NULL, 1, 0, 0xA4,
+	  5, 0, 0, 0, 0, 0, 4},
+	{ STRATIX10_S2F_USER1_CLK, "s2f_user1_clk", "s2f_user1_free_clk", NULL, 1, 0, 0xA4,
+	  6, 0, 0, 0, 0, 0, 0},
+	{ STRATIX10_PSI_REF_CLK, "psi_ref_clk", "psi_ref_free_clk", NULL, 1, 0, 0xA4,
+	  7, 0, 0, 0, 0, 0, 0},
+	{ STRATIX10_USB_CLK, "usb_clk", "l4_mp_clk", NULL, 1, 0, 0xA4,
+	  8, 0, 0, 0, 0, 0, 0},
+	{ STRATIX10_SPI_M_CLK, "spi_m_clk", "l4_mp_clk", NULL, 1, 0, 0xA4,
+	  9, 0, 0, 0, 0, 0, 0},
+	{ STRATIX10_NAND_CLK, "nand_clk", "l4_main_clk", NULL, 1, 0, 0xA4,
+	  10, 0, 0, 0, 0, 0, 0},
+};
+
+static int s10_clk_register_c_perip(const struct stratix10_perip_c_clock *clks,
+				    int nums, struct stratix10_clock_data *data)
+{
+	struct clk *clk;
+	void __iomem *base = data->base;
+	int i;
+
+	for (i = 0; i < nums; i++) {
+		clk = s10_register_periph(clks[i].name, clks[i].parent_name,
+					  clks[i].parent_names, clks[i].num_parents,
+					  clks[i].flags, base, clks[i].offset);
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		data->clk_data.clks[clks[i].id] = clk;
+	}
+	return 0;
+}
+
+static int s10_clk_register_cnt_perip(const struct stratix10_perip_cnt_clock *clks,
+				      int nums, struct stratix10_clock_data *data)
+{
+	struct clk *clk;
+	void __iomem *base = data->base;
+	int i;
+
+	for (i = 0; i < nums; i++) {
+		clk = s10_register_cnt_periph(clks[i].name, clks[i].parent_name,
+					      clks[i].parent_names,
+					      clks[i].num_parents,
+					      clks[i].flags, base,
+					      clks[i].offset,
+					      clks[i].fixed_divider,
+					      clks[i].bypass_reg,
+					      clks[i].bypass_shift);
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		data->clk_data.clks[clks[i].id] = clk;
+	}
+
+	return 0;
+}
+
+static int s10_clk_register_gate(const struct stratix10_gate_clock *clks,
+				 int nums, struct stratix10_clock_data *data)
+{
+	struct clk *clk;
+	void __iomem *base = data->base;
+	int i;
+
+	for (i = 0; i < nums; i++) {
+		clk = s10_register_gate(clks[i].name, clks[i].parent_name,
+					clks[i].parent_names,
+					clks[i].num_parents,
+					clks[i].flags, base,
+					clks[i].gate_reg,
+					clks[i].gate_idx, clks[i].div_reg,
+					clks[i].div_offset, clks[i].div_width,
+					clks[i].bypass_reg,
+					clks[i].bypass_shift,
+					clks[i].fixed_div);
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		data->clk_data.clks[clks[i].id] = clk;
+	}
+
+	return 0;
+}
+
+static int s10_clk_register_pll(const struct stratix10_pll_clock *clks,
+				 int nums, struct stratix10_clock_data *data)
+{
+	struct clk *clk;
+	void __iomem *base = data->base;
+	int i;
+
+	for (i = 0; i < nums; i++) {
+		clk = s10_register_pll(clks[i].name, clks[i].parent_names,
+				    clks[i].num_parents,
+				    clks[i].flags, base,
+				    clks[i].offset);
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		data->clk_data.clks[clks[i].id] = clk;
+	}
+
+	return 0;
+}
+
+static struct stratix10_clock_data *__socfpga_s10_clk_init(struct device_node *np,
+						    int nr_clks)
+{
+	struct stratix10_clock_data *clk_data;
+	struct clk **clk_table;
+	void __iomem *base;
+
+	base = of_iomap(np, 0);
+	if (!base) {
+		pr_err("%s: failed to map clock registers\n", __func__);
+		goto err;
+	}
+
+	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
+	if (!clk_data)
+		goto err;
+
+	clk_data->base = base;
+	clk_table = kcalloc(nr_clks, sizeof(*clk_table), GFP_KERNEL);
+	if (!clk_table)
+		goto err_data;
+
+	clk_data->clk_data.clks = clk_table;
+	clk_data->clk_data.clk_num = nr_clks;
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data->clk_data);
+	return clk_data;
+
+err_data:
+	kfree(clk_data);
+err:
+	return NULL;
+}
+
+static int s10_clkmgr_init(struct device_node *np)
+{
+	struct stratix10_clock_data *clk_data;
+
+	clk_data = __socfpga_s10_clk_init(np, STRATIX10_NUM_CLKS);
+	if (!clk_data)
+		return -ENOMEM;
+
+	s10_clk_register_pll(s10_pll_clks, ARRAY_SIZE(s10_pll_clks), clk_data);
+
+	s10_clk_register_c_perip(s10_main_perip_c_clks,
+				 ARRAY_SIZE(s10_main_perip_c_clks), clk_data);
+
+	s10_clk_register_cnt_perip(s10_main_perip_cnt_clks,
+				   ARRAY_SIZE(s10_main_perip_cnt_clks),
+				   clk_data);
+
+	s10_clk_register_gate(s10_gate_clks, ARRAY_SIZE(s10_gate_clks),
+			      clk_data);
+	return 0;
+}
+
+static int s10_clkmgr_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+
+	s10_clkmgr_init(np);
+
+	return 0;
+}
+
+static const struct of_device_id stratix10_clkmgr_match_table[] = {
+	{ .compatible = "intel,stratix10-clkmgr",
+	  .data = s10_clkmgr_init },
+	{ }
+};
+
+static struct platform_driver stratix10_clkmgr_driver = {
+	.probe		= s10_clkmgr_probe,
+	.driver		= {
+		.name	= "stratix10-clkmgr",
+		.of_match_table = stratix10_clkmgr_match_table,
+	},
+};
+
+static int __init s10_clk_init(void)
+{
+	return platform_driver_register(&stratix10_clkmgr_driver);
+}
+core_initcall(s10_clk_init);
diff --git a/drivers/clk/socfpga/clk.h b/drivers/clk/socfpga/clk.h
index 9cf1230115b1..26c3a265cf78 100644
--- a/drivers/clk/socfpga/clk.h
+++ b/drivers/clk/socfpga/clk.h
@@ -54,9 +54,11 @@ struct socfpga_gate_clk {
 	char *parent_name;
 	u32 fixed_div;
 	void __iomem *div_reg;
+	void __iomem *bypass_reg;
 	struct regmap *sys_mgr_base_addr;
 	u32 width;	/* only valid if div_reg != 0 */
 	u32 shift;	/* only valid if div_reg != 0 */
+	u32 bypass_shift;      /* only valid if bypass_reg != 0 */
 	u32 clk_phase[2];
 };
 
@@ -65,8 +67,10 @@ struct socfpga_periph_clk {
 	char *parent_name;
 	u32 fixed_div;
 	void __iomem *div_reg;
+	void __iomem *bypass_reg;
 	u32 width;      /* only valid if div_reg != 0 */
 	u32 shift;      /* only valid if div_reg != 0 */
+	u32 bypass_shift;      /* only valid if bypass_reg != 0 */
 };
 
 #endif /* SOCFPGA_CLK_H */
diff --git a/drivers/clk/socfpga/stratix10-clk.h b/drivers/clk/socfpga/stratix10-clk.h
new file mode 100644
index 000000000000..e8e121907952
--- /dev/null
+++ b/drivers/clk/socfpga/stratix10-clk.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier:    GPL-2.0 */
+/*
+ * Copyright (C) 2017, Intel Corporation
+ */
+
+#ifndef	__STRATIX10_CLK_H
+#define	__STRATIX10_CLK_H
+
+struct stratix10_clock_data {
+	struct clk_onecell_data	clk_data;
+	void __iomem		*base;
+};
+
+struct stratix10_pll_clock {
+	unsigned int		id;
+	const char		*name;
+	const char		*const *parent_names;
+	u8			num_parents;
+	unsigned long		flags;
+	unsigned long		offset;
+};
+
+struct stratix10_perip_c_clock {
+	unsigned int		id;
+	const char		*name;
+	const char		*parent_name;
+	const char		*const *parent_names;
+	u8			num_parents;
+	unsigned long		flags;
+	unsigned long		offset;
+};
+
+struct stratix10_perip_cnt_clock {
+	unsigned int		id;
+	const char		*name;
+	const char		*parent_name;
+	const char		*const *parent_names;
+	u8			num_parents;
+	unsigned long		flags;
+	unsigned long		offset;
+	u8			fixed_divider;
+	unsigned long		bypass_reg;
+	unsigned long		bypass_shift;
+};
+
+struct stratix10_gate_clock {
+	unsigned int		id;
+	const char		*name;
+	const char		*parent_name;
+	const char		*const *parent_names;
+	u8			num_parents;
+	unsigned long		flags;
+	unsigned long		gate_reg;
+	u8			gate_idx;
+	unsigned long		div_reg;
+	u8			div_offset;
+	u8			div_width;
+	unsigned long		bypass_reg;
+	u8			bypass_shift;
+	u8			fixed_div;
+};
+
+struct clk *s10_register_pll(const char *, const char *const *, u8,
+			     unsigned long, void __iomem *, unsigned long);
+
+struct clk *s10_register_periph(const char *, const char *,
+				const char * const *, u8, unsigned long,
+				void __iomem *, unsigned long);
+struct clk *s10_register_cnt_periph(const char *, const char *,
+				    const char * const *, u8,
+				    unsigned long, void __iomem *,
+				    unsigned long, u8, unsigned long,
+				    unsigned long);
+struct clk *s10_register_gate(const char *, const char *,
+			      const char * const *, u8,
+			      unsigned long, void __iomem *,
+			      unsigned long, unsigned long,
+			      unsigned long, unsigned long, u8,
+			      unsigned long, u8, u8);
+#endif	/* __STRATIX10_CLK_H */
diff --git a/drivers/clk/sprd/sc9860-clk.c b/drivers/clk/sprd/sc9860-clk.c
index ed5c027df0f4..9980ab55271b 100644
--- a/drivers/clk/sprd/sc9860-clk.c
+++ b/drivers/clk/sprd/sc9860-clk.c
@@ -959,6 +959,44 @@ static SPRD_SC_GATE_CLK(sdio2_2x_en,	"sdio2-2x-en",	"aon-apb", 0x13c,
 			       0x1000, BIT(6), 0, 0);
 static SPRD_SC_GATE_CLK(emmc_2x_en,	"emmc-2x-en",	"aon-apb", 0x13c,
 			       0x1000, BIT(9), 0, 0);
+static SPRD_SC_GATE_CLK(arch_rtc_eb, "arch-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(0), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(kpb_rtc_eb, "kpb-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(1), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(aon_syst_rtc_eb, "aon-syst-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(2), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(ap_syst_rtc_eb, "ap-syst-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(3), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(aon_tmr_rtc_eb, "aon-tmr-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(4), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(ap_tmr0_rtc_eb, "ap-tmr0-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(5), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(eic_rtc_eb, "eic-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(6), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(eic_rtcdv5_eb, "eic-rtcdv5-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(7), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(ap_wdg_rtc_eb, "ap-wdg-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(9), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(ap_tmr1_rtc_eb, "ap-tmr1-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(15), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(ap_tmr2_rtc_eb, "ap-tmr2-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(16), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(dcxo_tmr_rtc_eb, "dcxo-tmr-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(17), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(bb_cal_rtc_eb, "bb-cal-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(18), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(avs_big_rtc_eb, "avs-big-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(20), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(avs_lit_rtc_eb, "avs-lit-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(21), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(avs_gpu0_rtc_eb, "avs-gpu0-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(22), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(avs_gpu1_rtc_eb, "avs-gpu1-rtc-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(23), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(gpu_ts_eb, "gpu-ts-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(24), CLK_IGNORE_UNUSED, 0);
+static SPRD_SC_GATE_CLK(rtcdv10_eb, "rtcdv10-eb",	"aon-apb", 0x10,
+		     0x1000, BIT(27), CLK_IGNORE_UNUSED, 0);
 
 static struct sprd_clk_common *sc9860_aon_gate[] = {
 	/* address base is 0x402e0000 */
@@ -1030,6 +1068,25 @@ static struct sprd_clk_common *sc9860_aon_gate[] = {
 	&sdio1_2x_en.common,
 	&sdio2_2x_en.common,
 	&emmc_2x_en.common,
+	&arch_rtc_eb.common,
+	&kpb_rtc_eb.common,
+	&aon_syst_rtc_eb.common,
+	&ap_syst_rtc_eb.common,
+	&aon_tmr_rtc_eb.common,
+	&ap_tmr0_rtc_eb.common,
+	&eic_rtc_eb.common,
+	&eic_rtcdv5_eb.common,
+	&ap_wdg_rtc_eb.common,
+	&ap_tmr1_rtc_eb.common,
+	&ap_tmr2_rtc_eb.common,
+	&dcxo_tmr_rtc_eb.common,
+	&bb_cal_rtc_eb.common,
+	&avs_big_rtc_eb.common,
+	&avs_lit_rtc_eb.common,
+	&avs_gpu0_rtc_eb.common,
+	&avs_gpu1_rtc_eb.common,
+	&gpu_ts_eb.common,
+	&rtcdv10_eb.common,
 };
 
 static struct clk_hw_onecell_data sc9860_aon_gate_hws = {
@@ -1102,6 +1159,25 @@ static struct clk_hw_onecell_data sc9860_aon_gate_hws = {
 		[CLK_SDIO1_2X_EN]	= &sdio1_2x_en.common.hw,
 		[CLK_SDIO2_2X_EN]	= &sdio2_2x_en.common.hw,
 		[CLK_EMMC_2X_EN]	= &emmc_2x_en.common.hw,
+		[CLK_ARCH_RTC_EB]	= &arch_rtc_eb.common.hw,
+		[CLK_KPB_RTC_EB]	= &kpb_rtc_eb.common.hw,
+		[CLK_AON_SYST_RTC_EB]	= &aon_syst_rtc_eb.common.hw,
+		[CLK_AP_SYST_RTC_EB]	= &ap_syst_rtc_eb.common.hw,
+		[CLK_AON_TMR_RTC_EB]	= &aon_tmr_rtc_eb.common.hw,
+		[CLK_AP_TMR0_RTC_EB]	= &ap_tmr0_rtc_eb.common.hw,
+		[CLK_EIC_RTC_EB]	= &eic_rtc_eb.common.hw,
+		[CLK_EIC_RTCDV5_EB]	= &eic_rtcdv5_eb.common.hw,
+		[CLK_AP_WDG_RTC_EB]	= &ap_wdg_rtc_eb.common.hw,
+		[CLK_AP_TMR1_RTC_EB]	= &ap_tmr1_rtc_eb.common.hw,
+		[CLK_AP_TMR2_RTC_EB]	= &ap_tmr2_rtc_eb.common.hw,
+		[CLK_DCXO_TMR_RTC_EB]	= &dcxo_tmr_rtc_eb.common.hw,
+		[CLK_BB_CAL_RTC_EB]	= &bb_cal_rtc_eb.common.hw,
+		[CLK_AVS_BIG_RTC_EB]	= &avs_big_rtc_eb.common.hw,
+		[CLK_AVS_LIT_RTC_EB]	= &avs_lit_rtc_eb.common.hw,
+		[CLK_AVS_GPU0_RTC_EB]	= &avs_gpu0_rtc_eb.common.hw,
+		[CLK_AVS_GPU1_RTC_EB]	= &avs_gpu1_rtc_eb.common.hw,
+		[CLK_GPU_TS_EB]		= &gpu_ts_eb.common.hw,
+		[CLK_RTCDV10_EB]	= &rtcdv10_eb.common.hw,
 	},
 	.num	= CLK_AON_GATE_NUM,
 };
diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 6427d0ebe2de..79dfd296c3d1 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -11,15 +11,13 @@ config SUN50I_A64_CCU
 	default ARM64 && ARCH_SUNXI
 	depends on (ARM64 && ARCH_SUNXI) || COMPILE_TEST
 
+config SUN50I_H6_CCU
+	bool "Support for the Allwinner H6 CCU"
+	default ARM64 && ARCH_SUNXI
+	depends on (ARM64 && ARCH_SUNXI) || COMPILE_TEST
+
 config SUN4I_A10_CCU
 	bool "Support for the Allwinner A10/A20 CCU"
-	select SUNXI_CCU_DIV
-	select SUNXI_CCU_MULT
-	select SUNXI_CCU_NK
-	select SUNXI_CCU_NKM
-	select SUNXI_CCU_NM
-	select SUNXI_CCU_MP
-	select SUNXI_CCU_PHASE
 	default MACH_SUN4I
 	default MACH_SUN7I
 	depends on MACH_SUN4I || MACH_SUN7I || COMPILE_TEST
diff --git a/drivers/clk/sunxi-ng/Makefile b/drivers/clk/sunxi-ng/Makefile
index 4141c3fe08ae..128a40ee5c5e 100644
--- a/drivers/clk/sunxi-ng/Makefile
+++ b/drivers/clk/sunxi-ng/Makefile
@@ -22,6 +22,7 @@ lib-$(CONFIG_SUNXI_CCU)		+= ccu_mp.o
 
 # SoC support
 obj-$(CONFIG_SUN50I_A64_CCU)	+= ccu-sun50i-a64.o
+obj-$(CONFIG_SUN50I_H6_CCU)	+= ccu-sun50i-h6.o
 obj-$(CONFIG_SUN4I_A10_CCU)	+= ccu-sun4i-a10.o
 obj-$(CONFIG_SUN5I_CCU)		+= ccu-sun5i.o
 obj-$(CONFIG_SUN6I_A31_CCU)	+= ccu-sun6i-a31.o
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
new file mode 100644
index 000000000000..bdbfe78fe133
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c
@@ -0,0 +1,1211 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017 Icenowy Zheng <icenowy@aosc.io>
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_mult.h"
+#include "ccu_nk.h"
+#include "ccu_nkm.h"
+#include "ccu_nkmp.h"
+#include "ccu_nm.h"
+
+#include "ccu-sun50i-h6.h"
+
+/*
+ * The CPU PLL is actually NP clock, with P being /1, /2 or /4. However
+ * P should only be used for output frequencies lower than 288 MHz.
+ *
+ * For now we can just model it as a multiplier clock, and force P to /1.
+ *
+ * The M factor is present in the register's description, but not in the
+ * frequency formula, and it's documented as "M is only used for backdoor
+ * testing", so it's not modelled and then force to 0.
+ */
+#define SUN50I_H6_PLL_CPUX_REG		0x000
+static struct ccu_mult pll_cpux_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.mult		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.common		= {
+		.reg		= 0x000,
+		.hw.init	= CLK_HW_INIT("pll-cpux", "osc24M",
+					      &ccu_mult_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+/* Some PLLs are input * N / div1 / P. Model them as NKMP with no K */
+#define SUN50I_H6_PLL_DDR0_REG		0x010
+static struct ccu_nkmp pll_ddr0_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
+	.common		= {
+		.reg		= 0x010,
+		.hw.init	= CLK_HW_INIT("pll-ddr0", "osc24M",
+					      &ccu_nkmp_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+#define SUN50I_H6_PLL_PERIPH0_REG	0x020
+static struct ccu_nkmp pll_periph0_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
+	.fixed_post_div	= 4,
+	.common		= {
+		.reg		= 0x020,
+		.features	= CCU_FEATURE_FIXED_POSTDIV,
+		.hw.init	= CLK_HW_INIT("pll-periph0", "osc24M",
+					      &ccu_nkmp_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+#define SUN50I_H6_PLL_PERIPH1_REG	0x028
+static struct ccu_nkmp pll_periph1_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
+	.fixed_post_div	= 4,
+	.common		= {
+		.reg		= 0x028,
+		.features	= CCU_FEATURE_FIXED_POSTDIV,
+		.hw.init	= CLK_HW_INIT("pll-periph1", "osc24M",
+					      &ccu_nkmp_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+#define SUN50I_H6_PLL_GPU_REG		0x030
+static struct ccu_nkmp pll_gpu_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
+	.common		= {
+		.reg		= 0x030,
+		.hw.init	= CLK_HW_INIT("pll-gpu", "osc24M",
+					      &ccu_nkmp_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+/*
+ * For Video PLLs, the output divider is described as "used for testing"
+ * in the user manual. So it's not modelled and forced to 0.
+ */
+#define SUN50I_H6_PLL_VIDEO0_REG	0x040
+static struct ccu_nm pll_video0_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.fixed_post_div	= 4,
+	.common		= {
+		.reg		= 0x040,
+		.features	= CCU_FEATURE_FIXED_POSTDIV,
+		.hw.init	= CLK_HW_INIT("pll-video0", "osc24M",
+					      &ccu_nm_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+#define SUN50I_H6_PLL_VIDEO1_REG	0x048
+static struct ccu_nm pll_video1_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.fixed_post_div	= 4,
+	.common		= {
+		.reg		= 0x048,
+		.features	= CCU_FEATURE_FIXED_POSTDIV,
+		.hw.init	= CLK_HW_INIT("pll-video1", "osc24M",
+					      &ccu_nm_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+#define SUN50I_H6_PLL_VE_REG		0x058
+static struct ccu_nkmp pll_ve_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
+	.common		= {
+		.reg		= 0x058,
+		.hw.init	= CLK_HW_INIT("pll-ve", "osc24M",
+					      &ccu_nkmp_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+#define SUN50I_H6_PLL_DE_REG		0x060
+static struct ccu_nkmp pll_de_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
+	.common		= {
+		.reg		= 0x060,
+		.hw.init	= CLK_HW_INIT("pll-de", "osc24M",
+					      &ccu_nkmp_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+#define SUN50I_H6_PLL_HSIC_REG		0x070
+static struct ccu_nkmp pll_hsic_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.p		= _SUNXI_CCU_DIV(0, 1), /* output divider */
+	.common		= {
+		.reg		= 0x070,
+		.hw.init	= CLK_HW_INIT("pll-hsic", "osc24M",
+					      &ccu_nkmp_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+/*
+ * The Audio PLL is supposed to have 3 outputs: 2 fixed factors from
+ * the base (2x and 4x), and one variable divider (the one true pll audio).
+ *
+ * We don't have any need for the variable divider for now, so we just
+ * hardcode it to match with the clock names.
+ */
+#define SUN50I_H6_PLL_AUDIO_REG		0x078
+static struct ccu_nm pll_audio_base_clk = {
+	.enable		= BIT(31),
+	.lock		= BIT(28),
+	.n		= _SUNXI_CCU_MULT_MIN(8, 8, 12),
+	.m		= _SUNXI_CCU_DIV(1, 1), /* input divider */
+	.common		= {
+		.reg		= 0x078,
+		.hw.init	= CLK_HW_INIT("pll-audio-base", "osc24M",
+					      &ccu_nm_ops,
+					      CLK_SET_RATE_UNGATE),
+	},
+};
+
+static const char * const cpux_parents[] = { "osc24M", "osc32k",
+					     "iosc", "pll-cpux" };
+static SUNXI_CCU_MUX(cpux_clk, "cpux", cpux_parents,
+		     0x500, 24, 2, CLK_SET_RATE_PARENT | CLK_IS_CRITICAL);
+static SUNXI_CCU_M(axi_clk, "axi", "cpux", 0x500, 0, 2, 0);
+static SUNXI_CCU_M(cpux_apb_clk, "cpux-apb", "cpux", 0x500, 8, 2, 0);
+
+static const char * const psi_ahb1_ahb2_parents[] = { "osc24M", "osc32k",
+						      "iosc", "pll-periph0" };
+static SUNXI_CCU_MP_WITH_MUX(psi_ahb1_ahb2_clk, "psi-ahb1-ahb2",
+			     psi_ahb1_ahb2_parents,
+			     0x510,
+			     0, 5,	/* M */
+			     16, 2,	/* P */
+			     24, 2,	/* mux */
+			     0);
+
+static const char * const ahb3_apb1_apb2_parents[] = { "osc24M", "osc32k",
+						       "psi-ahb1-ahb2",
+						       "pll-periph0" };
+static SUNXI_CCU_MP_WITH_MUX(ahb3_clk, "ahb3", ahb3_apb1_apb2_parents, 0x51c,
+			     0, 5,	/* M */
+			     16, 2,	/* P */
+			     24, 2,	/* mux */
+			     0);
+
+static SUNXI_CCU_MP_WITH_MUX(apb1_clk, "apb1", ahb3_apb1_apb2_parents, 0x520,
+			     0, 5,	/* M */
+			     16, 2,	/* P */
+			     24, 2,	/* mux */
+			     0);
+
+static SUNXI_CCU_MP_WITH_MUX(apb2_clk, "apb2", ahb3_apb1_apb2_parents, 0x524,
+			     0, 5,	/* M */
+			     16, 2,	/* P */
+			     24, 2,	/* mux */
+			     0);
+
+static const char * const mbus_parents[] = { "osc24M", "pll-periph0-2x",
+					     "pll-ddr0", "pll-periph0-4x" };
+static SUNXI_CCU_M_WITH_MUX_GATE(mbus_clk, "mbus", mbus_parents, 0x540,
+				       0, 3,	/* M */
+				       24, 2,	/* mux */
+				       BIT(31),	/* gate */
+				       CLK_IS_CRITICAL);
+
+static const char * const de_parents[] = { "pll-de", "pll-periph0-2x" };
+static SUNXI_CCU_M_WITH_MUX_GATE(de_clk, "de", de_parents, 0x600,
+				       0, 4,	/* M */
+				       24, 1,	/* mux */
+				       BIT(31),	/* gate */
+				       0);
+
+static SUNXI_CCU_GATE(bus_de_clk, "bus-de", "psi-ahb1-ahb2",
+		      0x60c, BIT(0), 0);
+
+static const char * const deinterlace_parents[] = { "pll-periph0",
+						    "pll-periph1" };
+static SUNXI_CCU_M_WITH_MUX_GATE(deinterlace_clk, "deinterlace",
+				       deinterlace_parents,
+				       0x620,
+				       0, 4,	/* M */
+				       24, 1,	/* mux */
+				       BIT(31),	/* gate */
+				       0);
+
+static SUNXI_CCU_GATE(bus_deinterlace_clk, "bus-deinterlace", "psi-ahb1-ahb2",
+		      0x62c, BIT(0), 0);
+
+static const char * const gpu_parents[] = { "pll-gpu" };
+static SUNXI_CCU_M_WITH_MUX_GATE(gpu_clk, "gpu", gpu_parents, 0x670,
+				       0, 3,	/* M */
+				       24, 1,	/* mux */
+				       BIT(31),	/* gate */
+				       0);
+
+static SUNXI_CCU_GATE(bus_gpu_clk, "bus-gpu", "psi-ahb1-ahb2",
+		      0x67c, BIT(0), 0);
+
+/* Also applies to EMCE */
+static const char * const ce_parents[] = { "osc24M", "pll-periph0-2x" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(ce_clk, "ce", ce_parents, 0x680,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 1,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_GATE(bus_ce_clk, "bus-ce", "psi-ahb1-ahb2",
+		      0x68c, BIT(0), 0);
+
+static const char * const ve_parents[] = { "pll-ve" };
+static SUNXI_CCU_M_WITH_MUX_GATE(ve_clk, "ve", ve_parents, 0x690,
+				       0, 3,	/* M */
+				       24, 1,	/* mux */
+				       BIT(31),	/* gate */
+				       0);
+
+static SUNXI_CCU_GATE(bus_ve_clk, "bus-ve", "psi-ahb1-ahb2",
+		      0x69c, BIT(0), 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(emce_clk, "emce", ce_parents, 0x6b0,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 1,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_GATE(bus_emce_clk, "bus-emce", "psi-ahb1-ahb2",
+		      0x6bc, BIT(0), 0);
+
+static const char * const vp9_parents[] = { "pll-ve", "pll-periph0-2x" };
+static SUNXI_CCU_M_WITH_MUX_GATE(vp9_clk, "vp9", vp9_parents, 0x6c0,
+				       0, 3,	/* M */
+				       24, 1,	/* mux */
+				       BIT(31),	/* gate */
+				       0);
+
+static SUNXI_CCU_GATE(bus_vp9_clk, "bus-vp9", "psi-ahb1-ahb2",
+		      0x6cc, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_dma_clk, "bus-dma", "psi-ahb1-ahb2",
+		      0x70c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_msgbox_clk, "bus-msgbox", "psi-ahb1-ahb2",
+		      0x71c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_spinlock_clk, "bus-spinlock", "psi-ahb1-ahb2",
+		      0x72c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_hstimer_clk, "bus-hstimer", "psi-ahb1-ahb2",
+		      0x73c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(avs_clk, "avs", "osc24M", 0x740, BIT(31), 0);
+
+static SUNXI_CCU_GATE(bus_dbg_clk, "bus-dbg", "psi-ahb1-ahb2",
+		      0x78c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_psi_clk, "bus-psi", "psi-ahb1-ahb2",
+		      0x79c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_pwm_clk, "bus-pwm", "apb1", 0x79c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_iommu_clk, "bus-iommu", "apb1", 0x7bc, BIT(0), 0);
+
+static const char * const dram_parents[] = { "pll-ddr0" };
+static struct ccu_div dram_clk = {
+	.div		= _SUNXI_CCU_DIV(0, 2),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common	= {
+		.reg		= 0x800,
+		.hw.init	= CLK_HW_INIT_PARENTS("dram",
+						      dram_parents,
+						      &ccu_div_ops,
+						      CLK_IS_CRITICAL),
+	},
+};
+
+static SUNXI_CCU_GATE(mbus_dma_clk, "mbus-dma", "mbus",
+		      0x804, BIT(0), 0);
+static SUNXI_CCU_GATE(mbus_ve_clk, "mbus-ve", "mbus",
+		      0x804, BIT(1), 0);
+static SUNXI_CCU_GATE(mbus_ce_clk, "mbus-ce", "mbus",
+		      0x804, BIT(2), 0);
+static SUNXI_CCU_GATE(mbus_ts_clk, "mbus-ts", "mbus",
+		      0x804, BIT(3), 0);
+static SUNXI_CCU_GATE(mbus_nand_clk, "mbus-nand", "mbus",
+		      0x804, BIT(5), 0);
+static SUNXI_CCU_GATE(mbus_csi_clk, "mbus-csi", "mbus",
+		      0x804, BIT(8), 0);
+static SUNXI_CCU_GATE(mbus_deinterlace_clk, "mbus-deinterlace", "mbus",
+		      0x804, BIT(11), 0);
+
+static SUNXI_CCU_GATE(bus_dram_clk, "bus-dram", "psi-ahb1-ahb2",
+		      0x80c, BIT(0), CLK_IS_CRITICAL);
+
+static const char * const nand_spi_parents[] = { "osc24M", "pll-periph0",
+					     "pll-periph1", "pll-periph0-2x",
+					     "pll-periph1-2x" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand0_clk, "nand0", nand_spi_parents, 0x810,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 3,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand1_clk, "nand1", nand_spi_parents, 0x814,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 3,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_GATE(bus_nand_clk, "bus-nand", "ahb3", 0x82c, BIT(0), 0);
+
+static const char * const mmc_parents[] = { "osc24M", "pll-periph0-2x",
+					    "pll-periph1-2x" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc0_clk, "mmc0", mmc_parents, 0x830,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 3,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc1_clk, "mmc1", mmc_parents, 0x834,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 3,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc2_clk, "mmc2", mmc_parents, 0x838,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 3,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_GATE(bus_mmc0_clk, "bus-mmc0", "ahb3", 0x84c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_mmc1_clk, "bus-mmc1", "ahb3", 0x84c, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_mmc2_clk, "bus-mmc2", "ahb3", 0x84c, BIT(2), 0);
+
+static SUNXI_CCU_GATE(bus_uart0_clk, "bus-uart0", "apb2", 0x90c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_uart1_clk, "bus-uart1", "apb2", 0x90c, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_uart2_clk, "bus-uart2", "apb2", 0x90c, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_uart3_clk, "bus-uart3", "apb2", 0x90c, BIT(3), 0);
+
+static SUNXI_CCU_GATE(bus_i2c0_clk, "bus-i2c0", "apb2", 0x91c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_i2c1_clk, "bus-i2c1", "apb2", 0x91c, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_i2c2_clk, "bus-i2c2", "apb2", 0x91c, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_i2c3_clk, "bus-i2c3", "apb2", 0x91c, BIT(3), 0);
+
+static SUNXI_CCU_GATE(bus_scr0_clk, "bus-scr0", "apb2", 0x93c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_scr1_clk, "bus-scr1", "apb2", 0x93c, BIT(1), 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", nand_spi_parents, 0x940,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 3,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", nand_spi_parents, 0x944,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 3,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_GATE(bus_spi0_clk, "bus-spi0", "ahb3", 0x96c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_spi1_clk, "bus-spi1", "ahb3", 0x96c, BIT(1), 0);
+
+static SUNXI_CCU_GATE(bus_emac_clk, "bus-emac", "ahb3", 0x97c, BIT(0), 0);
+
+static const char * const ts_parents[] = { "osc24M", "pll-periph0" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(ts_clk, "ts", ts_parents, 0x9b0,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 1,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_GATE(bus_ts_clk, "bus-ts", "ahb3", 0x9bc, BIT(0), 0);
+
+static const char * const ir_tx_parents[] = { "osc32k", "osc24M" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(ir_tx_clk, "ir-tx", ir_tx_parents, 0x9c0,
+					0, 4,	/* M */
+					8, 2,	/* N */
+					24, 1,	/* mux */
+					BIT(31),/* gate */
+					0);
+
+static SUNXI_CCU_GATE(bus_ir_tx_clk, "bus-ir-tx", "apb1", 0x9cc, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_ths_clk, "bus-ths", "apb1", 0x9fc, BIT(0), 0);
+
+static const char * const audio_parents[] = { "pll-audio", "pll-audio-2x", "pll-audio-4x" };
+static struct ccu_div i2s3_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common		= {
+		.reg		= 0xa0c,
+		.hw.init	= CLK_HW_INIT_PARENTS("i2s3",
+						      audio_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_div i2s0_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common		= {
+		.reg		= 0xa10,
+		.hw.init	= CLK_HW_INIT_PARENTS("i2s0",
+						      audio_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_div i2s1_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common		= {
+		.reg		= 0xa14,
+		.hw.init	= CLK_HW_INIT_PARENTS("i2s1",
+						      audio_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_div i2s2_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common		= {
+		.reg		= 0xa18,
+		.hw.init	= CLK_HW_INIT_PARENTS("i2s2",
+						      audio_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static SUNXI_CCU_GATE(bus_i2s0_clk, "bus-i2s0", "apb1", 0xa1c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_i2s1_clk, "bus-i2s1", "apb1", 0xa1c, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_i2s2_clk, "bus-i2s2", "apb1", 0xa1c, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_i2s3_clk, "bus-i2s3", "apb1", 0xa1c, BIT(3), 0);
+
+static struct ccu_div spdif_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common		= {
+		.reg		= 0xa20,
+		.hw.init	= CLK_HW_INIT_PARENTS("spdif",
+						      audio_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static SUNXI_CCU_GATE(bus_spdif_clk, "bus-spdif", "apb1", 0xa2c, BIT(0), 0);
+
+static struct ccu_div dmic_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common		= {
+		.reg		= 0xa40,
+		.hw.init	= CLK_HW_INIT_PARENTS("dmic",
+						      audio_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static SUNXI_CCU_GATE(bus_dmic_clk, "bus-dmic", "apb1", 0xa4c, BIT(0), 0);
+
+static struct ccu_div audio_hub_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV_FLAGS(8, 2, CLK_DIVIDER_POWER_OF_TWO),
+	.mux		= _SUNXI_CCU_MUX(24, 2),
+	.common		= {
+		.reg		= 0xa60,
+		.hw.init	= CLK_HW_INIT_PARENTS("audio-hub",
+						      audio_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static SUNXI_CCU_GATE(bus_audio_hub_clk, "bus-audio-hub", "apb1", 0xa6c, BIT(0), 0);
+
+/*
+ * There are OHCI 12M clock source selection bits for 2 USB 2.0 ports.
+ * We will force them to 0 (12M divided from 48M).
+ */
+#define SUN50I_H6_USB0_CLK_REG		0xa70
+#define SUN50I_H6_USB3_CLK_REG		0xa7c
+
+static SUNXI_CCU_GATE(usb_ohci0_clk, "usb-ohci0", "osc12M", 0xa70, BIT(31), 0);
+static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", 0xa70, BIT(29), 0);
+
+static SUNXI_CCU_GATE(usb_phy1_clk, "usb-phy1", "osc24M", 0xa74, BIT(29), 0);
+
+static SUNXI_CCU_GATE(usb_ohci3_clk, "usb-ohci3", "osc12M", 0xa7c, BIT(31), 0);
+static SUNXI_CCU_GATE(usb_phy3_clk, "usb-phy3", "osc12M", 0xa7c, BIT(29), 0);
+static SUNXI_CCU_GATE(usb_hsic_12m_clk, "usb-hsic-12M", "osc12M", 0xa7c, BIT(27), 0);
+static SUNXI_CCU_GATE(usb_hsic_clk, "usb-hsic", "pll-hsic", 0xa7c, BIT(26), 0);
+
+static SUNXI_CCU_GATE(bus_ohci0_clk, "bus-ohci0", "ahb3", 0xa8c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_ohci3_clk, "bus-ohci3", "ahb3", 0xa8c, BIT(3), 0);
+static SUNXI_CCU_GATE(bus_ehci0_clk, "bus-ehci0", "ahb3", 0xa8c, BIT(4), 0);
+static SUNXI_CCU_GATE(bus_xhci_clk, "bus-xhci", "ahb3", 0xa8c, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_ehci3_clk, "bus-ehci3", "ahb3", 0xa8c, BIT(7), 0);
+static SUNXI_CCU_GATE(bus_otg_clk, "bus-otg", "ahb3", 0xa8c, BIT(8), 0);
+
+static CLK_FIXED_FACTOR(pcie_ref_100m_clk, "pcie-ref-100M",
+			"pll-periph0-4x", 24, 1, 0);
+static SUNXI_CCU_GATE(pcie_ref_clk, "pcie-ref", "pcie-ref-100M",
+		      0xab0, BIT(31), 0);
+static SUNXI_CCU_GATE(pcie_ref_out_clk, "pcie-ref-out", "pcie-ref",
+		      0xab0, BIT(30), 0);
+
+static SUNXI_CCU_M_WITH_GATE(pcie_maxi_clk, "pcie-maxi",
+			     "pll-periph0", 0xab4,
+			     0, 4,	/* M */
+			     BIT(31),	/* gate */
+			     0);
+
+static SUNXI_CCU_M_WITH_GATE(pcie_aux_clk, "pcie-aux", "osc24M", 0xab8,
+			     0, 5,	/* M */
+			     BIT(31),	/* gate */
+			     0);
+
+static SUNXI_CCU_GATE(bus_pcie_clk, "bus-pcie", "psi-ahb1-ahb2",
+		      0xabc, BIT(0), 0);
+
+static const char * const hdmi_parents[] = { "pll-video0", "pll-video1",
+					      "pll-video1-4x" };
+static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", hdmi_parents, 0xb00,
+				 0, 4,		/* M */
+				 24, 2,		/* mux */
+				 BIT(31),	/* gate */
+				 0);
+
+static SUNXI_CCU_GATE(hdmi_slow_clk, "hdmi-slow", "osc24M", 0xb04, BIT(31), 0);
+
+static const char * const hdmi_cec_parents[] = { "osc32k", "pll-periph0-2x" };
+static const struct ccu_mux_fixed_prediv hdmi_cec_predivs[] = {
+	{ .index = 1, .div = 36621 },
+};
+static struct ccu_mux hdmi_cec_clk = {
+	.enable		= BIT(31),
+
+	.mux		= {
+		.shift	= 24,
+		.width	= 2,
+
+		.fixed_predivs	= hdmi_cec_predivs,
+		.n_predivs	= ARRAY_SIZE(hdmi_cec_predivs),
+	},
+
+	.common		= {
+		.reg		= 0xb10,
+		.features	= CCU_FEATURE_VARIABLE_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("hdmi-cec",
+						      hdmi_cec_parents,
+						      &ccu_mux_ops,
+						      0),
+	},
+};
+
+static SUNXI_CCU_GATE(bus_hdmi_clk, "bus-hdmi", "ahb3", 0xb1c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(bus_tcon_top_clk, "bus-tcon-top", "ahb3",
+		      0xb5c, BIT(0), 0);
+
+static const char * const tcon_lcd0_parents[] = { "pll-video0",
+						  "pll-video0-4x",
+						  "pll-video1" };
+static SUNXI_CCU_MUX_WITH_GATE(tcon_lcd0_clk, "tcon-lcd0",
+			       tcon_lcd0_parents, 0xb60,
+			       24, 3,	/* mux */
+			       BIT(31),	/* gate */
+			       0);
+
+static SUNXI_CCU_GATE(bus_tcon_lcd0_clk, "bus-tcon-lcd0", "ahb3",
+		      0xb7c, BIT(0), 0);
+
+static const char * const tcon_tv0_parents[] = { "pll-video0",
+						 "pll-video0-4x",
+						 "pll-video1",
+						 "pll-video1-4x" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(tcon_tv0_clk, "tcon-tv0",
+				  tcon_tv0_parents, 0xb80,
+				  0, 4,		/* M */
+				  8, 2,		/* P */
+				  24, 3,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_GATE(bus_tcon_tv0_clk, "bus-tcon-tv0", "ahb3",
+		      0xb9c, BIT(0), 0);
+
+static SUNXI_CCU_GATE(csi_cci_clk, "csi-cci", "osc24M", 0xc00, BIT(0), 0);
+
+static const char * const csi_top_parents[] = { "pll-video0", "pll-ve",
+					      "pll-periph0" };
+static const u8 csi_top_table[] = { 0, 2, 3 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_top_clk, "csi-top",
+				       csi_top_parents, csi_top_table, 0xc04,
+				       0, 4,	/* M */
+				       24, 3,	/* mux */
+				       BIT(31),	/* gate */
+				       0);
+
+static const char * const csi_mclk_parents[] = { "osc24M", "pll-video0",
+					       "pll-periph0", "pll-periph1" };
+static SUNXI_CCU_M_WITH_MUX_GATE(csi_mclk_clk, "csi-mclk",
+				 csi_mclk_parents, 0xc08,
+				 0, 5,		/* M */
+				 24, 3,		/* mux */
+				 BIT(31),	/* gate */
+				 0);
+
+static SUNXI_CCU_GATE(bus_csi_clk, "bus-csi", "ahb3", 0xc2c, BIT(0), 0);
+
+static const char * const hdcp_parents[] = { "pll-periph0", "pll-periph1" };
+static SUNXI_CCU_M_WITH_MUX_GATE(hdcp_clk, "hdcp", hdcp_parents, 0xc40,
+				 0, 4,		/* M */
+				 24, 2,		/* mux */
+				 BIT(31),	/* gate */
+				 0);
+
+static SUNXI_CCU_GATE(bus_hdcp_clk, "bus-hdcp", "ahb3", 0xc4c, BIT(0), 0);
+
+/* Fixed factor clocks */
+static CLK_FIXED_FACTOR(osc12M_clk, "osc12M", "osc24M", 2, 1, 0);
+
+/*
+ * The divider of pll-audio is fixed to 8 now, as pll-audio-4x has a
+ * fixed post-divider 2.
+ */
+static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
+			"pll-audio-base", 8, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_2x_clk, "pll-audio-2x",
+			"pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_4x_clk, "pll-audio-4x",
+			"pll-audio-base", 2, 1, CLK_SET_RATE_PARENT);
+
+static CLK_FIXED_FACTOR(pll_periph0_4x_clk, "pll-periph0-4x",
+			"pll-periph0", 1, 4, 0);
+static CLK_FIXED_FACTOR(pll_periph0_2x_clk, "pll-periph0-2x",
+			"pll-periph0", 1, 2, 0);
+
+static CLK_FIXED_FACTOR(pll_periph1_4x_clk, "pll-periph1-4x",
+			"pll-periph1", 1, 4, 0);
+static CLK_FIXED_FACTOR(pll_periph1_2x_clk, "pll-periph1-2x",
+			"pll-periph1", 1, 2, 0);
+
+static CLK_FIXED_FACTOR(pll_video0_4x_clk, "pll-video0-4x",
+			"pll-video0", 1, 4, CLK_SET_RATE_PARENT);
+
+static CLK_FIXED_FACTOR(pll_video1_4x_clk, "pll-video1-4x",
+			"pll-video1", 1, 4, CLK_SET_RATE_PARENT);
+
+static struct ccu_common *sun50i_h6_ccu_clks[] = {
+	&pll_cpux_clk.common,
+	&pll_ddr0_clk.common,
+	&pll_periph0_clk.common,
+	&pll_periph1_clk.common,
+	&pll_gpu_clk.common,
+	&pll_video0_clk.common,
+	&pll_video1_clk.common,
+	&pll_ve_clk.common,
+	&pll_de_clk.common,
+	&pll_hsic_clk.common,
+	&pll_audio_base_clk.common,
+	&cpux_clk.common,
+	&axi_clk.common,
+	&cpux_apb_clk.common,
+	&psi_ahb1_ahb2_clk.common,
+	&ahb3_clk.common,
+	&apb1_clk.common,
+	&apb2_clk.common,
+	&mbus_clk.common,
+	&de_clk.common,
+	&bus_de_clk.common,
+	&deinterlace_clk.common,
+	&bus_deinterlace_clk.common,
+	&gpu_clk.common,
+	&bus_gpu_clk.common,
+	&ce_clk.common,
+	&bus_ce_clk.common,
+	&ve_clk.common,
+	&bus_ve_clk.common,
+	&emce_clk.common,
+	&bus_emce_clk.common,
+	&vp9_clk.common,
+	&bus_vp9_clk.common,
+	&bus_dma_clk.common,
+	&bus_msgbox_clk.common,
+	&bus_spinlock_clk.common,
+	&bus_hstimer_clk.common,
+	&avs_clk.common,
+	&bus_dbg_clk.common,
+	&bus_psi_clk.common,
+	&bus_pwm_clk.common,
+	&bus_iommu_clk.common,
+	&dram_clk.common,
+	&mbus_dma_clk.common,
+	&mbus_ve_clk.common,
+	&mbus_ce_clk.common,
+	&mbus_ts_clk.common,
+	&mbus_nand_clk.common,
+	&mbus_csi_clk.common,
+	&mbus_deinterlace_clk.common,
+	&bus_dram_clk.common,
+	&nand0_clk.common,
+	&nand1_clk.common,
+	&bus_nand_clk.common,
+	&mmc0_clk.common,
+	&mmc1_clk.common,
+	&mmc2_clk.common,
+	&bus_mmc0_clk.common,
+	&bus_mmc1_clk.common,
+	&bus_mmc2_clk.common,
+	&bus_uart0_clk.common,
+	&bus_uart1_clk.common,
+	&bus_uart2_clk.common,
+	&bus_uart3_clk.common,
+	&bus_i2c0_clk.common,
+	&bus_i2c1_clk.common,
+	&bus_i2c2_clk.common,
+	&bus_i2c3_clk.common,
+	&bus_scr0_clk.common,
+	&bus_scr1_clk.common,
+	&spi0_clk.common,
+	&spi1_clk.common,
+	&bus_spi0_clk.common,
+	&bus_spi1_clk.common,
+	&bus_emac_clk.common,
+	&ts_clk.common,
+	&bus_ts_clk.common,
+	&ir_tx_clk.common,
+	&bus_ir_tx_clk.common,
+	&bus_ths_clk.common,
+	&i2s3_clk.common,
+	&i2s0_clk.common,
+	&i2s1_clk.common,
+	&i2s2_clk.common,
+	&bus_i2s0_clk.common,
+	&bus_i2s1_clk.common,
+	&bus_i2s2_clk.common,
+	&bus_i2s3_clk.common,
+	&spdif_clk.common,
+	&bus_spdif_clk.common,
+	&dmic_clk.common,
+	&bus_dmic_clk.common,
+	&audio_hub_clk.common,
+	&bus_audio_hub_clk.common,
+	&usb_ohci0_clk.common,
+	&usb_phy0_clk.common,
+	&usb_phy1_clk.common,
+	&usb_ohci3_clk.common,
+	&usb_phy3_clk.common,
+	&usb_hsic_12m_clk.common,
+	&usb_hsic_clk.common,
+	&bus_ohci0_clk.common,
+	&bus_ohci3_clk.common,
+	&bus_ehci0_clk.common,
+	&bus_xhci_clk.common,
+	&bus_ehci3_clk.common,
+	&bus_otg_clk.common,
+	&pcie_ref_clk.common,
+	&pcie_ref_out_clk.common,
+	&pcie_maxi_clk.common,
+	&pcie_aux_clk.common,
+	&bus_pcie_clk.common,
+	&hdmi_clk.common,
+	&hdmi_slow_clk.common,
+	&hdmi_cec_clk.common,
+	&bus_hdmi_clk.common,
+	&bus_tcon_top_clk.common,
+	&tcon_lcd0_clk.common,
+	&bus_tcon_lcd0_clk.common,
+	&tcon_tv0_clk.common,
+	&bus_tcon_tv0_clk.common,
+	&csi_cci_clk.common,
+	&csi_top_clk.common,
+	&csi_mclk_clk.common,
+	&bus_csi_clk.common,
+	&hdcp_clk.common,
+	&bus_hdcp_clk.common,
+};
+
+static struct clk_hw_onecell_data sun50i_h6_hw_clks = {
+	.hws	= {
+		[CLK_OSC12M]		= &osc12M_clk.hw,
+		[CLK_PLL_CPUX]		= &pll_cpux_clk.common.hw,
+		[CLK_PLL_DDR0]		= &pll_ddr0_clk.common.hw,
+		[CLK_PLL_PERIPH0]	= &pll_periph0_clk.common.hw,
+		[CLK_PLL_PERIPH0_2X]	= &pll_periph0_2x_clk.hw,
+		[CLK_PLL_PERIPH0_4X]	= &pll_periph0_4x_clk.hw,
+		[CLK_PLL_PERIPH1]	= &pll_periph1_clk.common.hw,
+		[CLK_PLL_PERIPH1_2X]	= &pll_periph1_2x_clk.hw,
+		[CLK_PLL_PERIPH1_4X]	= &pll_periph1_4x_clk.hw,
+		[CLK_PLL_GPU]		= &pll_gpu_clk.common.hw,
+		[CLK_PLL_VIDEO0]	= &pll_video0_clk.common.hw,
+		[CLK_PLL_VIDEO0_4X]	= &pll_video0_4x_clk.hw,
+		[CLK_PLL_VIDEO1]	= &pll_video1_clk.common.hw,
+		[CLK_PLL_VIDEO1_4X]	= &pll_video1_4x_clk.hw,
+		[CLK_PLL_VE]		= &pll_ve_clk.common.hw,
+		[CLK_PLL_DE]		= &pll_de_clk.common.hw,
+		[CLK_PLL_HSIC]		= &pll_hsic_clk.common.hw,
+		[CLK_PLL_AUDIO_BASE]	= &pll_audio_base_clk.common.hw,
+		[CLK_PLL_AUDIO]		= &pll_audio_clk.hw,
+		[CLK_PLL_AUDIO_2X]	= &pll_audio_2x_clk.hw,
+		[CLK_PLL_AUDIO_4X]	= &pll_audio_4x_clk.hw,
+		[CLK_CPUX]		= &cpux_clk.common.hw,
+		[CLK_AXI]		= &axi_clk.common.hw,
+		[CLK_CPUX_APB]		= &cpux_apb_clk.common.hw,
+		[CLK_PSI_AHB1_AHB2]	= &psi_ahb1_ahb2_clk.common.hw,
+		[CLK_AHB3]		= &ahb3_clk.common.hw,
+		[CLK_APB1]		= &apb1_clk.common.hw,
+		[CLK_APB2]		= &apb2_clk.common.hw,
+		[CLK_MBUS]		= &mbus_clk.common.hw,
+		[CLK_DE]		= &de_clk.common.hw,
+		[CLK_BUS_DE]		= &bus_de_clk.common.hw,
+		[CLK_DEINTERLACE]	= &deinterlace_clk.common.hw,
+		[CLK_BUS_DEINTERLACE]	= &bus_deinterlace_clk.common.hw,
+		[CLK_GPU]		= &gpu_clk.common.hw,
+		[CLK_BUS_GPU]		= &bus_gpu_clk.common.hw,
+		[CLK_CE]		= &ce_clk.common.hw,
+		[CLK_BUS_CE]		= &bus_ce_clk.common.hw,
+		[CLK_VE]		= &ve_clk.common.hw,
+		[CLK_BUS_VE]		= &bus_ve_clk.common.hw,
+		[CLK_EMCE]		= &emce_clk.common.hw,
+		[CLK_BUS_EMCE]		= &bus_emce_clk.common.hw,
+		[CLK_VP9]		= &vp9_clk.common.hw,
+		[CLK_BUS_VP9]		= &bus_vp9_clk.common.hw,
+		[CLK_BUS_DMA]		= &bus_dma_clk.common.hw,
+		[CLK_BUS_MSGBOX]	= &bus_msgbox_clk.common.hw,
+		[CLK_BUS_SPINLOCK]	= &bus_spinlock_clk.common.hw,
+		[CLK_BUS_HSTIMER]	= &bus_hstimer_clk.common.hw,
+		[CLK_AVS]		= &avs_clk.common.hw,
+		[CLK_BUS_DBG]		= &bus_dbg_clk.common.hw,
+		[CLK_BUS_PSI]		= &bus_psi_clk.common.hw,
+		[CLK_BUS_PWM]		= &bus_pwm_clk.common.hw,
+		[CLK_BUS_IOMMU]		= &bus_iommu_clk.common.hw,
+		[CLK_DRAM]		= &dram_clk.common.hw,
+		[CLK_MBUS_DMA]		= &mbus_dma_clk.common.hw,
+		[CLK_MBUS_VE]		= &mbus_ve_clk.common.hw,
+		[CLK_MBUS_CE]		= &mbus_ce_clk.common.hw,
+		[CLK_MBUS_TS]		= &mbus_ts_clk.common.hw,
+		[CLK_MBUS_NAND]		= &mbus_nand_clk.common.hw,
+		[CLK_MBUS_CSI]		= &mbus_csi_clk.common.hw,
+		[CLK_MBUS_DEINTERLACE]	= &mbus_deinterlace_clk.common.hw,
+		[CLK_BUS_DRAM]		= &bus_dram_clk.common.hw,
+		[CLK_NAND0]		= &nand0_clk.common.hw,
+		[CLK_NAND1]		= &nand1_clk.common.hw,
+		[CLK_BUS_NAND]		= &bus_nand_clk.common.hw,
+		[CLK_MMC0]		= &mmc0_clk.common.hw,
+		[CLK_MMC1]		= &mmc1_clk.common.hw,
+		[CLK_MMC2]		= &mmc2_clk.common.hw,
+		[CLK_BUS_MMC0]		= &bus_mmc0_clk.common.hw,
+		[CLK_BUS_MMC1]		= &bus_mmc1_clk.common.hw,
+		[CLK_BUS_MMC2]		= &bus_mmc2_clk.common.hw,
+		[CLK_BUS_UART0]		= &bus_uart0_clk.common.hw,
+		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
+		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
+		[CLK_BUS_UART3]		= &bus_uart3_clk.common.hw,
+		[CLK_BUS_I2C0]		= &bus_i2c0_clk.common.hw,
+		[CLK_BUS_I2C1]		= &bus_i2c1_clk.common.hw,
+		[CLK_BUS_I2C2]		= &bus_i2c2_clk.common.hw,
+		[CLK_BUS_I2C3]		= &bus_i2c3_clk.common.hw,
+		[CLK_BUS_SCR0]		= &bus_scr0_clk.common.hw,
+		[CLK_BUS_SCR1]		= &bus_scr1_clk.common.hw,
+		[CLK_SPI0]		= &spi0_clk.common.hw,
+		[CLK_SPI1]		= &spi1_clk.common.hw,
+		[CLK_BUS_SPI0]		= &bus_spi0_clk.common.hw,
+		[CLK_BUS_SPI1]		= &bus_spi1_clk.common.hw,
+		[CLK_BUS_EMAC]		= &bus_emac_clk.common.hw,
+		[CLK_TS]		= &ts_clk.common.hw,
+		[CLK_BUS_TS]		= &bus_ts_clk.common.hw,
+		[CLK_IR_TX]		= &ir_tx_clk.common.hw,
+		[CLK_BUS_IR_TX]		= &bus_ir_tx_clk.common.hw,
+		[CLK_BUS_THS]		= &bus_ths_clk.common.hw,
+		[CLK_I2S3]		= &i2s3_clk.common.hw,
+		[CLK_I2S0]		= &i2s0_clk.common.hw,
+		[CLK_I2S1]		= &i2s1_clk.common.hw,
+		[CLK_I2S2]		= &i2s2_clk.common.hw,
+		[CLK_BUS_I2S0]		= &bus_i2s0_clk.common.hw,
+		[CLK_BUS_I2S1]		= &bus_i2s1_clk.common.hw,
+		[CLK_BUS_I2S2]		= &bus_i2s2_clk.common.hw,
+		[CLK_BUS_I2S3]		= &bus_i2s3_clk.common.hw,
+		[CLK_SPDIF]		= &spdif_clk.common.hw,
+		[CLK_BUS_SPDIF]		= &bus_spdif_clk.common.hw,
+		[CLK_DMIC]		= &dmic_clk.common.hw,
+		[CLK_BUS_DMIC]		= &bus_dmic_clk.common.hw,
+		[CLK_AUDIO_HUB]		= &audio_hub_clk.common.hw,
+		[CLK_BUS_AUDIO_HUB]	= &bus_audio_hub_clk.common.hw,
+		[CLK_USB_OHCI0]		= &usb_ohci0_clk.common.hw,
+		[CLK_USB_PHY0]		= &usb_phy0_clk.common.hw,
+		[CLK_USB_PHY1]		= &usb_phy1_clk.common.hw,
+		[CLK_USB_OHCI3]		= &usb_ohci3_clk.common.hw,
+		[CLK_USB_PHY3]		= &usb_phy3_clk.common.hw,
+		[CLK_USB_HSIC_12M]	= &usb_hsic_12m_clk.common.hw,
+		[CLK_USB_HSIC]		= &usb_hsic_clk.common.hw,
+		[CLK_BUS_OHCI0]		= &bus_ohci0_clk.common.hw,
+		[CLK_BUS_OHCI3]		= &bus_ohci3_clk.common.hw,
+		[CLK_BUS_EHCI0]		= &bus_ehci0_clk.common.hw,
+		[CLK_BUS_XHCI]		= &bus_xhci_clk.common.hw,
+		[CLK_BUS_EHCI3]		= &bus_ehci3_clk.common.hw,
+		[CLK_BUS_OTG]		= &bus_otg_clk.common.hw,
+		[CLK_PCIE_REF_100M]	= &pcie_ref_100m_clk.hw,
+		[CLK_PCIE_REF]		= &pcie_ref_clk.common.hw,
+		[CLK_PCIE_REF_OUT]	= &pcie_ref_out_clk.common.hw,
+		[CLK_PCIE_MAXI]		= &pcie_maxi_clk.common.hw,
+		[CLK_PCIE_AUX]		= &pcie_aux_clk.common.hw,
+		[CLK_BUS_PCIE]		= &bus_pcie_clk.common.hw,
+		[CLK_HDMI]		= &hdmi_clk.common.hw,
+		[CLK_HDMI_SLOW]		= &hdmi_slow_clk.common.hw,
+		[CLK_HDMI_CEC]		= &hdmi_cec_clk.common.hw,
+		[CLK_BUS_HDMI]		= &bus_hdmi_clk.common.hw,
+		[CLK_BUS_TCON_TOP]	= &bus_tcon_top_clk.common.hw,
+		[CLK_TCON_LCD0]		= &tcon_lcd0_clk.common.hw,
+		[CLK_BUS_TCON_LCD0]	= &bus_tcon_lcd0_clk.common.hw,
+		[CLK_TCON_TV0]		= &tcon_tv0_clk.common.hw,
+		[CLK_BUS_TCON_TV0]	= &bus_tcon_tv0_clk.common.hw,
+		[CLK_CSI_CCI]		= &csi_cci_clk.common.hw,
+		[CLK_CSI_TOP]		= &csi_top_clk.common.hw,
+		[CLK_CSI_MCLK]		= &csi_mclk_clk.common.hw,
+		[CLK_BUS_CSI]		= &bus_csi_clk.common.hw,
+		[CLK_HDCP]		= &hdcp_clk.common.hw,
+		[CLK_BUS_HDCP]		= &bus_hdcp_clk.common.hw,
+	},
+	.num = CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun50i_h6_ccu_resets[] = {
+	[RST_MBUS]		= { 0x540, BIT(30) },
+
+	[RST_BUS_DE]		= { 0x60c, BIT(16) },
+	[RST_BUS_DEINTERLACE]	= { 0x62c, BIT(16) },
+	[RST_BUS_GPU]		= { 0x67c, BIT(16) },
+	[RST_BUS_CE]		= { 0x68c, BIT(16) },
+	[RST_BUS_VE]		= { 0x69c, BIT(16) },
+	[RST_BUS_EMCE]		= { 0x6bc, BIT(16) },
+	[RST_BUS_VP9]		= { 0x6cc, BIT(16) },
+	[RST_BUS_DMA]		= { 0x70c, BIT(16) },
+	[RST_BUS_MSGBOX]	= { 0x71c, BIT(16) },
+	[RST_BUS_SPINLOCK]	= { 0x72c, BIT(16) },
+	[RST_BUS_HSTIMER]	= { 0x73c, BIT(16) },
+	[RST_BUS_DBG]		= { 0x78c, BIT(16) },
+	[RST_BUS_PSI]		= { 0x79c, BIT(16) },
+	[RST_BUS_PWM]		= { 0x7ac, BIT(16) },
+	[RST_BUS_IOMMU]		= { 0x7bc, BIT(16) },
+	[RST_BUS_DRAM]		= { 0x80c, BIT(16) },
+	[RST_BUS_NAND]		= { 0x82c, BIT(16) },
+	[RST_BUS_MMC0]		= { 0x84c, BIT(16) },
+	[RST_BUS_MMC1]		= { 0x84c, BIT(17) },
+	[RST_BUS_MMC2]		= { 0x84c, BIT(18) },
+	[RST_BUS_UART0]		= { 0x90c, BIT(16) },
+	[RST_BUS_UART1]		= { 0x90c, BIT(17) },
+	[RST_BUS_UART2]		= { 0x90c, BIT(18) },
+	[RST_BUS_UART3]		= { 0x90c, BIT(19) },
+	[RST_BUS_I2C0]		= { 0x91c, BIT(16) },
+	[RST_BUS_I2C1]		= { 0x91c, BIT(17) },
+	[RST_BUS_I2C2]		= { 0x91c, BIT(18) },
+	[RST_BUS_I2C3]		= { 0x91c, BIT(19) },
+	[RST_BUS_SCR0]		= { 0x93c, BIT(16) },
+	[RST_BUS_SCR1]		= { 0x93c, BIT(17) },
+	[RST_BUS_SPI0]		= { 0x96c, BIT(16) },
+	[RST_BUS_SPI1]		= { 0x96c, BIT(17) },
+	[RST_BUS_EMAC]		= { 0x97c, BIT(16) },
+	[RST_BUS_TS]		= { 0x9bc, BIT(16) },
+	[RST_BUS_IR_TX]		= { 0x9cc, BIT(16) },
+	[RST_BUS_THS]		= { 0x9fc, BIT(16) },
+	[RST_BUS_I2S0]		= { 0xa1c, BIT(16) },
+	[RST_BUS_I2S1]		= { 0xa1c, BIT(17) },
+	[RST_BUS_I2S2]		= { 0xa1c, BIT(18) },
+	[RST_BUS_I2S3]		= { 0xa1c, BIT(19) },
+	[RST_BUS_SPDIF]		= { 0xa2c, BIT(16) },
+	[RST_BUS_DMIC]		= { 0xa4c, BIT(16) },
+	[RST_BUS_AUDIO_HUB]	= { 0xa6c, BIT(16) },
+
+	[RST_USB_PHY0]		= { 0xa70, BIT(30) },
+	[RST_USB_PHY1]		= { 0xa74, BIT(30) },
+	[RST_USB_PHY3]		= { 0xa7c, BIT(30) },
+	[RST_USB_HSIC]		= { 0xa7c, BIT(28) },
+
+	[RST_BUS_OHCI0]		= { 0xa8c, BIT(16) },
+	[RST_BUS_OHCI3]		= { 0xa8c, BIT(19) },
+	[RST_BUS_EHCI0]		= { 0xa8c, BIT(20) },
+	[RST_BUS_XHCI]		= { 0xa8c, BIT(21) },
+	[RST_BUS_EHCI3]		= { 0xa8c, BIT(23) },
+	[RST_BUS_OTG]		= { 0xa8c, BIT(24) },
+	[RST_BUS_PCIE]		= { 0xabc, BIT(16) },
+
+	[RST_PCIE_POWERUP]	= { 0xabc, BIT(17) },
+
+	[RST_BUS_HDMI]		= { 0xb1c, BIT(16) },
+	[RST_BUS_HDMI_SUB]	= { 0xb1c, BIT(17) },
+	[RST_BUS_TCON_TOP]	= { 0xb5c, BIT(16) },
+	[RST_BUS_TCON_LCD0]	= { 0xb7c, BIT(16) },
+	[RST_BUS_TCON_TV0]	= { 0xb9c, BIT(16) },
+	[RST_BUS_CSI]		= { 0xc2c, BIT(16) },
+	[RST_BUS_HDCP]		= { 0xc4c, BIT(16) },
+};
+
+static const struct sunxi_ccu_desc sun50i_h6_ccu_desc = {
+	.ccu_clks	= sun50i_h6_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun50i_h6_ccu_clks),
+
+	.hw_clks	= &sun50i_h6_hw_clks,
+
+	.resets		= sun50i_h6_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun50i_h6_ccu_resets),
+};
+
+static const u32 pll_regs[] = {
+	SUN50I_H6_PLL_CPUX_REG,
+	SUN50I_H6_PLL_DDR0_REG,
+	SUN50I_H6_PLL_PERIPH0_REG,
+	SUN50I_H6_PLL_PERIPH1_REG,
+	SUN50I_H6_PLL_GPU_REG,
+	SUN50I_H6_PLL_VIDEO0_REG,
+	SUN50I_H6_PLL_VIDEO1_REG,
+	SUN50I_H6_PLL_VE_REG,
+	SUN50I_H6_PLL_DE_REG,
+	SUN50I_H6_PLL_HSIC_REG,
+	SUN50I_H6_PLL_AUDIO_REG,
+};
+
+static const u32 pll_video_regs[] = {
+	SUN50I_H6_PLL_VIDEO0_REG,
+	SUN50I_H6_PLL_VIDEO1_REG,
+};
+
+static const u32 usb2_clk_regs[] = {
+	SUN50I_H6_USB0_CLK_REG,
+	SUN50I_H6_USB3_CLK_REG,
+};
+
+static int sun50i_h6_ccu_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	void __iomem *reg;
+	u32 val;
+	int i;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	/* Enable the lock bits on all PLLs */
+	for (i = 0; i < ARRAY_SIZE(pll_regs); i++) {
+		val = readl(reg + pll_regs[i]);
+		val |= BIT(29);
+		writel(val, reg + pll_regs[i]);
+	}
+
+	/*
+	 * Force the output divider of video PLLs to 0.
+	 *
+	 * See the comment before pll-video0 definition for the reason.
+	 */
+	for (i = 0; i < ARRAY_SIZE(pll_video_regs); i++) {
+		val = readl(reg + pll_video_regs[i]);
+		val &= ~BIT(0);
+		writel(val, reg + pll_video_regs[i]);
+	}
+
+	/*
+	 * Force OHCI 12M clock sources to 00 (12MHz divided from 48MHz)
+	 *
+	 * This clock mux is still mysterious, and the code just enforces
+	 * it to have a valid clock parent.
+	 */
+	for (i = 0; i < ARRAY_SIZE(usb2_clk_regs); i++) {
+		val = readl(reg + usb2_clk_regs[i]);
+		val &= ~GENMASK(25, 24);
+		writel (val, reg + usb2_clk_regs[i]);
+	}
+
+	/*
+	 * Force the post-divider of pll-audio to 8 and the output divider
+	 * of it to 1, to make the clock name represents the real frequency.
+	 */
+	val = readl(reg + SUN50I_H6_PLL_AUDIO_REG);
+	val &= ~(GENMASK(21, 16) | BIT(0));
+	writel(val | (7 << 16), reg + SUN50I_H6_PLL_AUDIO_REG);
+
+	return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_h6_ccu_desc);
+}
+
+static const struct of_device_id sun50i_h6_ccu_ids[] = {
+	{ .compatible = "allwinner,sun50i-h6-ccu" },
+	{ }
+};
+
+static struct platform_driver sun50i_h6_ccu_driver = {
+	.probe	= sun50i_h6_ccu_probe,
+	.driver	= {
+		.name	= "sun50i-h6-ccu",
+		.of_match_table	= sun50i_h6_ccu_ids,
+	},
+};
+builtin_platform_driver(sun50i_h6_ccu_driver);
diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.h b/drivers/clk/sunxi-ng/ccu-sun50i-h6.h
new file mode 100644
index 000000000000..2ccfe4428260
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2016 Icenowy Zheng <icenowy@aosc.io>
+ */
+
+#ifndef _CCU_SUN50I_H6_H_
+#define _CCU_SUN50I_H6_H_
+
+#include <dt-bindings/clock/sun50i-h6-ccu.h>
+#include <dt-bindings/reset/sun50i-h6-ccu.h>
+
+#define CLK_OSC12M		0
+#define CLK_PLL_CPUX		1
+#define CLK_PLL_DDR0		2
+
+/* PLL_PERIPH0 exported for PRCM */
+
+#define CLK_PLL_PERIPH0_2X	4
+#define CLK_PLL_PERIPH0_4X	5
+#define CLK_PLL_PERIPH1		6
+#define CLK_PLL_PERIPH1_2X	7
+#define CLK_PLL_PERIPH1_4X	8
+#define CLK_PLL_GPU		9
+#define CLK_PLL_VIDEO0		10
+#define CLK_PLL_VIDEO0_4X	11
+#define CLK_PLL_VIDEO1		12
+#define CLK_PLL_VIDEO1_4X	13
+#define CLK_PLL_VE		14
+#define CLK_PLL_DE		15
+#define CLK_PLL_HSIC		16
+#define CLK_PLL_AUDIO_BASE	17
+#define CLK_PLL_AUDIO		18
+#define CLK_PLL_AUDIO_2X	19
+#define CLK_PLL_AUDIO_4X	20
+
+/* CPUX clock exported for DVFS */
+
+#define CLK_AXI			22
+#define CLK_CPUX_APB		23
+#define CLK_PSI_AHB1_AHB2	24
+#define CLK_AHB3		25
+
+/* APB1 clock exported for PIO */
+
+#define CLK_APB2		27
+#define CLK_MBUS		28
+
+/* All module clocks and bus gates are exported except DRAM */
+
+#define CLK_DRAM		52
+
+#define CLK_BUS_DRAM		60
+
+#define CLK_NUMBER		(CLK_BUS_HDCP + 1)
+
+#endif /* _CCU_SUN50I_H6_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
index 29bc0566b776..77ed0b0ba681 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
@@ -69,17 +69,18 @@ static SUNXI_CCU_NM_WITH_SDM_GATE_LOCK(pll_audio_base_clk, "pll-audio-base",
 				       BIT(28),	/* lock */
 				       CLK_SET_RATE_UNGATE);
 
-static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_video_clk, "pll-video",
-					"osc24M", 0x0010,
-					8, 7,		/* N */
-					0, 4,		/* M */
-					BIT(24),	/* frac enable */
-					BIT(25),	/* frac select */
-					270000000,	/* frac rate 0 */
-					297000000,	/* frac rate 1 */
-					BIT(31),	/* gate */
-					BIT(28),	/* lock */
-					CLK_SET_RATE_UNGATE);
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK_MIN(pll_video_clk, "pll-video",
+					    "osc24M", 0x0010,
+					    192000000,	/* Minimum rate */
+					    8, 7,	/* N */
+					    0, 4,	/* M */
+					    BIT(24),	/* frac enable */
+					    BIT(25),	/* frac select */
+					    270000000,	/* frac rate 0 */
+					    297000000,	/* frac rate 1 */
+					    BIT(31),	/* gate */
+					    BIT(28),	/* lock */
+					    CLK_SET_RATE_UNGATE);
 
 static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve",
 					"osc24M", 0x0018,
@@ -451,11 +452,13 @@ static SUNXI_CCU_GATE(dram_ts_clk,	"dram-ts",	"dram",
 
 static const char * const de_parents[] = { "pll-periph0-2x", "pll-de" };
 static SUNXI_CCU_M_WITH_MUX_GATE(de_clk, "de", de_parents,
-				 0x104, 0, 4, 24, 3, BIT(31), 0);
+				 0x104, 0, 4, 24, 3, BIT(31),
+				 CLK_SET_RATE_PARENT);
 
 static const char * const tcon_parents[] = { "pll-video" };
 static SUNXI_CCU_M_WITH_MUX_GATE(tcon_clk, "tcon", tcon_parents,
-				 0x118, 0, 4, 24, 3, BIT(31), 0);
+				 0x118, 0, 4, 24, 3, BIT(31),
+				 CLK_SET_RATE_PARENT);
 
 static const char * const tve_parents[] = { "pll-de", "pll-periph1" };
 static SUNXI_CCU_M_WITH_MUX_GATE(tve_clk, "tve", tve_parents,
@@ -486,7 +489,8 @@ static SUNXI_CCU_GATE(avs_clk,		"avs",		"osc24M",
 
 static const char * const hdmi_parents[] = { "pll-video" };
 static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", hdmi_parents,
-				 0x150, 0, 4, 24, 2, BIT(31), 0);
+				 0x150, 0, 4, 24, 2, BIT(31),
+				 CLK_SET_RATE_PARENT);
 
 static SUNXI_CCU_GATE(hdmi_ddc_clk,	"hdmi-ddc",	"osc24M",
 		      0x154, BIT(31), 0);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
index 1b4baea37d81..73d7392c968c 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.h
@@ -26,7 +26,9 @@
 #define CLK_PLL_AUDIO_2X	3
 #define CLK_PLL_AUDIO_4X	4
 #define CLK_PLL_AUDIO_8X	5
-#define CLK_PLL_VIDEO		6
+
+/* PLL_VIDEO is exported */
+
 #define CLK_PLL_VE		7
 #define CLK_PLL_DDR		8
 
diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index e58c95787f94..ebd9436d2c7c 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -20,6 +20,18 @@ struct _ccu_nkmp {
 	unsigned long	p, min_p, max_p;
 };
 
+static unsigned long ccu_nkmp_calc_rate(unsigned long parent,
+					unsigned long n, unsigned long k,
+					unsigned long m, unsigned long p)
+{
+	u64 rate = parent;
+
+	rate *= n * k;
+	do_div(rate, m * p);
+
+	return rate;
+}
+
 static void ccu_nkmp_find_best(unsigned long parent, unsigned long rate,
 			       struct _ccu_nkmp *nkmp)
 {
@@ -33,7 +45,9 @@ static void ccu_nkmp_find_best(unsigned long parent, unsigned long rate,
 				for (_p = nkmp->min_p; _p <= nkmp->max_p; _p <<= 1) {
 					unsigned long tmp_rate;
 
-					tmp_rate = parent * _n * _k / (_m * _p);
+					tmp_rate = ccu_nkmp_calc_rate(parent,
+								      _n, _k,
+								      _m, _p);
 
 					if (tmp_rate > rate)
 						continue;
@@ -81,7 +95,7 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
 					unsigned long parent_rate)
 {
 	struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
-	unsigned long n, m, k, p;
+	unsigned long n, m, k, p, rate;
 	u32 reg;
 
 	reg = readl(nkmp->common.base + nkmp->common.reg);
@@ -107,7 +121,11 @@ static unsigned long ccu_nkmp_recalc_rate(struct clk_hw *hw,
 	p = reg >> nkmp->p.shift;
 	p &= (1 << nkmp->p.width) - 1;
 
-	return (parent_rate * n * k >> p) / m;
+	rate = ccu_nkmp_calc_rate(parent_rate, n, k, m, 1 << p);
+	if (nkmp->common.features & CCU_FEATURE_FIXED_POSTDIV)
+		rate /= nkmp->fixed_post_div;
+
+	return rate;
 }
 
 static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
@@ -116,6 +134,9 @@ static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
 	struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
 	struct _ccu_nkmp _nkmp;
 
+	if (nkmp->common.features & CCU_FEATURE_FIXED_POSTDIV)
+		rate *= nkmp->fixed_post_div;
+
 	_nkmp.min_n = nkmp->n.min ?: 1;
 	_nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
 	_nkmp.min_k = nkmp->k.min ?: 1;
@@ -127,17 +148,26 @@ static long ccu_nkmp_round_rate(struct clk_hw *hw, unsigned long rate,
 
 	ccu_nkmp_find_best(*parent_rate, rate, &_nkmp);
 
-	return *parent_rate * _nkmp.n * _nkmp.k / (_nkmp.m * _nkmp.p);
+	rate = ccu_nkmp_calc_rate(*parent_rate, _nkmp.n, _nkmp.k,
+				  _nkmp.m, _nkmp.p);
+	if (nkmp->common.features & CCU_FEATURE_FIXED_POSTDIV)
+		rate = rate / nkmp->fixed_post_div;
+
+	return rate;
 }
 
 static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
 			   unsigned long parent_rate)
 {
 	struct ccu_nkmp *nkmp = hw_to_ccu_nkmp(hw);
+	u32 n_mask, k_mask, m_mask, p_mask;
 	struct _ccu_nkmp _nkmp;
 	unsigned long flags;
 	u32 reg;
 
+	if (nkmp->common.features & CCU_FEATURE_FIXED_POSTDIV)
+		rate = rate * nkmp->fixed_post_div;
+
 	_nkmp.min_n = nkmp->n.min ?: 1;
 	_nkmp.max_n = nkmp->n.max ?: 1 << nkmp->n.width;
 	_nkmp.min_k = nkmp->k.min ?: 1;
@@ -149,18 +179,20 @@ static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
 
 	ccu_nkmp_find_best(parent_rate, rate, &_nkmp);
 
+	n_mask = GENMASK(nkmp->n.width + nkmp->n.shift - 1, nkmp->n.shift);
+	k_mask = GENMASK(nkmp->k.width + nkmp->k.shift - 1, nkmp->k.shift);
+	m_mask = GENMASK(nkmp->m.width + nkmp->m.shift - 1, nkmp->m.shift);
+	p_mask = GENMASK(nkmp->p.width + nkmp->p.shift - 1, nkmp->p.shift);
+
 	spin_lock_irqsave(nkmp->common.lock, flags);
 
 	reg = readl(nkmp->common.base + nkmp->common.reg);
-	reg &= ~GENMASK(nkmp->n.width + nkmp->n.shift - 1, nkmp->n.shift);
-	reg &= ~GENMASK(nkmp->k.width + nkmp->k.shift - 1, nkmp->k.shift);
-	reg &= ~GENMASK(nkmp->m.width + nkmp->m.shift - 1, nkmp->m.shift);
-	reg &= ~GENMASK(nkmp->p.width + nkmp->p.shift - 1, nkmp->p.shift);
-
-	reg |= (_nkmp.n - nkmp->n.offset) << nkmp->n.shift;
-	reg |= (_nkmp.k - nkmp->k.offset) << nkmp->k.shift;
-	reg |= (_nkmp.m - nkmp->m.offset) << nkmp->m.shift;
-	reg |= ilog2(_nkmp.p) << nkmp->p.shift;
+	reg &= ~(n_mask | k_mask | m_mask | p_mask);
+
+	reg |= ((_nkmp.n - nkmp->n.offset) << nkmp->n.shift) & n_mask;
+	reg |= ((_nkmp.k - nkmp->k.offset) << nkmp->k.shift) & k_mask;
+	reg |= ((_nkmp.m - nkmp->m.offset) << nkmp->m.shift) & m_mask;
+	reg |= (ilog2(_nkmp.p) << nkmp->p.shift) & p_mask;
 
 	writel(reg, nkmp->common.base + nkmp->common.reg);
 
diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.h b/drivers/clk/sunxi-ng/ccu_nkmp.h
index a82facbc6144..6940503e7fc4 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.h
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.h
@@ -34,6 +34,8 @@ struct ccu_nkmp {
 	struct ccu_div_internal		m;
 	struct ccu_div_internal		p;
 
+	unsigned int		fixed_post_div;
+
 	struct ccu_common	common;
 };
 
diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c
index a16de092bf94..4e2073307f34 100644
--- a/drivers/clk/sunxi-ng/ccu_nm.c
+++ b/drivers/clk/sunxi-ng/ccu_nm.c
@@ -117,6 +117,13 @@ static long ccu_nm_round_rate(struct clk_hw *hw, unsigned long rate,
 	if (nm->common.features & CCU_FEATURE_FIXED_POSTDIV)
 		rate *= nm->fixed_post_div;
 
+	if (rate < nm->min_rate) {
+		rate = nm->min_rate;
+		if (nm->common.features & CCU_FEATURE_FIXED_POSTDIV)
+			rate /= nm->fixed_post_div;
+		return rate;
+	}
+
 	if (ccu_frac_helper_has_rate(&nm->common, &nm->frac, rate)) {
 		if (nm->common.features & CCU_FEATURE_FIXED_POSTDIV)
 			rate /= nm->fixed_post_div;
diff --git a/drivers/clk/sunxi-ng/ccu_nm.h b/drivers/clk/sunxi-ng/ccu_nm.h
index eba586b4c7d0..1d8b459c50b7 100644
--- a/drivers/clk/sunxi-ng/ccu_nm.h
+++ b/drivers/clk/sunxi-ng/ccu_nm.h
@@ -37,6 +37,7 @@ struct ccu_nm {
 	struct ccu_sdm_internal		sdm;
 
 	unsigned int		fixed_post_div;
+	unsigned int		min_rate;
 
 	struct ccu_common	common;
 };
@@ -88,6 +89,32 @@ struct ccu_nm {
 		},							\
 	}
 
+#define SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK_MIN(_struct, _name, _parent,	\
+					     _reg, _min_rate,		\
+					     _nshift, _nwidth,		\
+					     _mshift, _mwidth,		\
+					     _frac_en, _frac_sel,	\
+					     _frac_rate_0, _frac_rate_1,\
+					     _gate, _lock, _flags)	\
+	struct ccu_nm _struct = {					\
+		.enable		= _gate,				\
+		.lock		= _lock,				\
+		.n		= _SUNXI_CCU_MULT(_nshift, _nwidth),	\
+		.m		= _SUNXI_CCU_DIV(_mshift, _mwidth),	\
+		.frac		= _SUNXI_CCU_FRAC(_frac_en, _frac_sel,	\
+						  _frac_rate_0,		\
+						  _frac_rate_1),	\
+		.min_rate	= _min_rate,				\
+		.common		= {					\
+			.reg		= _reg,				\
+			.features	= CCU_FEATURE_FRACTIONAL,	\
+			.hw.init	= CLK_HW_INIT(_name,		\
+						      _parent,		\
+						      &ccu_nm_ops,	\
+						      _flags),		\
+		},							\
+	}
+
 #define SUNXI_CCU_NM_WITH_GATE_LOCK(_struct, _name, _parent, _reg,	\
 				    _nshift, _nwidth,			\
 				    _mshift, _mwidth,			\
diff --git a/drivers/clk/tegra/clk-emc.c b/drivers/clk/tegra/clk-emc.c
index 11a5066e5c27..5234acd30e89 100644
--- a/drivers/clk/tegra/clk-emc.c
+++ b/drivers/clk/tegra/clk-emc.c
@@ -515,7 +515,7 @@ struct clk *tegra_clk_register_emc(void __iomem *base, struct device_node *np,
 
 	init.name = "emc";
 	init.ops = &tegra_clk_emc_ops;
-	init.flags = 0;
+	init.flags = CLK_IS_CRITICAL;
 	init.parent_names = emc_parent_clk_names;
 	init.num_parents = ARRAY_SIZE(emc_parent_clk_names);
 
diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index 7c369e21c91c..830d1c87fa7c 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -1151,6 +1151,8 @@ static const struct clk_ops tegra_clk_pllu_ops = {
 	.enable = clk_pllu_enable,
 	.disable = clk_pll_disable,
 	.recalc_rate = clk_pll_recalc_rate,
+	.round_rate = clk_pll_round_rate,
+	.set_rate = clk_pll_set_rate,
 };
 
 static int _pll_fixed_mdiv(struct tegra_clk_pll_params *pll_params,
diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c
index c02711927d79..2acba2986bc6 100644
--- a/drivers/clk/tegra/clk-tegra-periph.c
+++ b/drivers/clk/tegra/clk-tegra-periph.c
@@ -830,7 +830,7 @@ static struct tegra_periph_init_data gate_clks[] = {
 	GATE("xusb_host", "xusb_host_src", 89, 0, tegra_clk_xusb_host, 0),
 	GATE("xusb_ss", "xusb_ss_src", 156, 0, tegra_clk_xusb_ss, 0),
 	GATE("xusb_dev", "xusb_dev_src", 95, 0, tegra_clk_xusb_dev, 0),
-	GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IGNORE_UNUSED),
+	GATE("emc", "emc_mux", 57, 0, tegra_clk_emc, CLK_IS_CRITICAL),
 	GATE("sata_cold", "clk_m", 129, TEGRA_PERIPH_ON_APB, tegra_clk_sata_cold, 0),
 	GATE("ispa", "isp", 23, 0, tegra_clk_ispa, 0),
 	GATE("ispb", "isp", 3, 0, tegra_clk_ispb, 0),
diff --git a/drivers/clk/tegra/clk-tegra-super-gen4.c b/drivers/clk/tegra/clk-tegra-super-gen4.c
index 10047107c1dc..89d6b47a27a8 100644
--- a/drivers/clk/tegra/clk-tegra-super-gen4.c
+++ b/drivers/clk/tegra/clk-tegra-super-gen4.c
@@ -125,7 +125,8 @@ static void __init tegra_sclk_init(void __iomem *clk_base,
 		/* SCLK */
 		dt_clk = tegra_lookup_dt_id(tegra_clk_sclk, tegra_clks);
 		if (dt_clk) {
-			clk = clk_register_divider(NULL, "sclk", "sclk_mux", 0,
+			clk = clk_register_divider(NULL, "sclk", "sclk_mux",
+						CLK_IS_CRITICAL,
 						clk_base + SCLK_DIVIDER, 0, 8,
 						0, &sysrate_lock);
 			*dt_clk = clk;
@@ -137,7 +138,8 @@ static void __init tegra_sclk_init(void __iomem *clk_base,
 			clk = tegra_clk_register_super_mux("sclk",
 						gen_info->sclk_parents,
 						gen_info->num_sclk_parents,
-						CLK_SET_RATE_PARENT,
+						CLK_SET_RATE_PARENT |
+						CLK_IS_CRITICAL,
 						clk_base + SCLK_BURST_POLICY,
 						0, 4, 0, 0, NULL);
 			*dt_clk = clk;
@@ -151,7 +153,7 @@ static void __init tegra_sclk_init(void __iomem *clk_base,
 				   clk_base + SYSTEM_CLK_RATE, 4, 2, 0,
 				   &sysrate_lock);
 		clk = clk_register_gate(NULL, "hclk", "hclk_div",
-				CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+				CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
 				clk_base + SYSTEM_CLK_RATE,
 				7, CLK_GATE_SET_TO_DISABLE, &sysrate_lock);
 		*dt_clk = clk;
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 63087d17c3e2..5d5a22d529f5 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -955,8 +955,7 @@ static void __init tegra114_pll_init(void __iomem *clk_base,
 
 	/* PLLM */
 	clk = tegra_clk_register_pllm("pll_m", "pll_ref", clk_base, pmc,
-			     CLK_IGNORE_UNUSED | CLK_SET_RATE_GATE,
-			     &pll_m_params, NULL);
+			     CLK_SET_RATE_GATE, &pll_m_params, NULL);
 	clks[TEGRA114_CLK_PLL_M] = clk;
 
 	/* PLLM_OUT1 */
@@ -1190,6 +1189,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA114_CLK_XUSB_HS_SRC, TEGRA114_CLK_XUSB_SS_DIV2, 61200000, 0 },
 	{ TEGRA114_CLK_XUSB_FALCON_SRC, TEGRA114_CLK_PLL_P, 204000000, 0 },
 	{ TEGRA114_CLK_XUSB_HOST_SRC, TEGRA114_CLK_PLL_P, 102000000, 0 },
+	{ TEGRA114_CLK_VDE, TEGRA114_CLK_CLK_MAX, 600000000, 0 },
 	/* must be the last entry */
 	{ TEGRA114_CLK_CLK_MAX, TEGRA114_CLK_CLK_MAX, 0, 0 },
 };
diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c
index e81ea5b11577..50088e976611 100644
--- a/drivers/clk/tegra/clk-tegra124.c
+++ b/drivers/clk/tegra/clk-tegra124.c
@@ -1089,8 +1089,7 @@ static void __init tegra124_pll_init(void __iomem *clk_base,
 
 	/* PLLM */
 	clk = tegra_clk_register_pllm("pll_m", "pll_ref", clk_base, pmc,
-			     CLK_IGNORE_UNUSED | CLK_SET_RATE_GATE,
-			     &pll_m_params, NULL);
+			     CLK_SET_RATE_GATE, &pll_m_params, NULL);
 	clk_register_clkdev(clk, "pll_m", NULL);
 	clks[TEGRA124_CLK_PLL_M] = clk;
 
@@ -1099,7 +1098,7 @@ static void __init tegra124_pll_init(void __iomem *clk_base,
 				clk_base + PLLM_OUT, 0, TEGRA_DIVIDER_ROUND_UP,
 				8, 8, 1, NULL);
 	clk = tegra_clk_register_pll_out("pll_m_out1", "pll_m_out1_div",
-				clk_base + PLLM_OUT, 1, 0, CLK_IGNORE_UNUSED |
+				clk_base + PLLM_OUT, 1, 0,
 				CLK_SET_RATE_PARENT, 0, NULL);
 	clk_register_clkdev(clk, "pll_m_out1", NULL);
 	clks[TEGRA124_CLK_PLL_M_OUT1] = clk;
@@ -1268,11 +1267,11 @@ static struct tegra_clk_init_table common_init_table[] __initdata = {
 	{ TEGRA124_CLK_I2S2, TEGRA124_CLK_PLL_A_OUT0, 11289600, 0 },
 	{ TEGRA124_CLK_I2S3, TEGRA124_CLK_PLL_A_OUT0, 11289600, 0 },
 	{ TEGRA124_CLK_I2S4, TEGRA124_CLK_PLL_A_OUT0, 11289600, 0 },
-	{ TEGRA124_CLK_VDE, TEGRA124_CLK_PLL_P, 0, 0 },
+	{ TEGRA124_CLK_VDE, TEGRA124_CLK_CLK_MAX, 600000000, 0 },
 	{ TEGRA124_CLK_HOST1X, TEGRA124_CLK_PLL_P, 136000000, 1 },
 	{ TEGRA124_CLK_DSIALP, TEGRA124_CLK_PLL_P, 68000000, 0 },
 	{ TEGRA124_CLK_DSIBLP, TEGRA124_CLK_PLL_P, 68000000, 0 },
-	{ TEGRA124_CLK_SCLK, TEGRA124_CLK_PLL_P_OUT2, 102000000, 1 },
+	{ TEGRA124_CLK_SCLK, TEGRA124_CLK_PLL_P_OUT2, 102000000, 0 },
 	{ TEGRA124_CLK_DFLL_SOC, TEGRA124_CLK_PLL_P, 51000000, 1 },
 	{ TEGRA124_CLK_DFLL_REF, TEGRA124_CLK_PLL_P, 51000000, 1 },
 	{ TEGRA124_CLK_PLL_C, TEGRA124_CLK_CLK_MAX, 768000000, 0 },
diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index cbd5a2e5c569..0ee56dd04cec 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -576,6 +576,7 @@ static struct tegra_clk tegra20_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_afi] = { .dt_id = TEGRA20_CLK_AFI, .present = true },
 	[tegra_clk_fuse] = { .dt_id = TEGRA20_CLK_FUSE, .present = true },
 	[tegra_clk_kfuse] = { .dt_id = TEGRA20_CLK_KFUSE, .present = true },
+	[tegra_clk_emc] = { .dt_id = TEGRA20_CLK_EMC, .present = true },
 };
 
 static unsigned long tegra20_clk_measure_input_freq(void)
@@ -651,8 +652,7 @@ static void tegra20_pll_init(void)
 
 	/* PLLM */
 	clk = tegra_clk_register_pll("pll_m", "pll_ref", clk_base, NULL,
-			    CLK_IGNORE_UNUSED | CLK_SET_RATE_GATE,
-			    &pll_m_params, NULL);
+			    CLK_SET_RATE_GATE, &pll_m_params, NULL);
 	clks[TEGRA20_CLK_PLL_M] = clk;
 
 	/* PLLM_OUT1 */
@@ -660,7 +660,7 @@ static void tegra20_pll_init(void)
 				clk_base + PLLM_OUT, 0, TEGRA_DIVIDER_ROUND_UP,
 				8, 8, 1, NULL);
 	clk = tegra_clk_register_pll_out("pll_m_out1", "pll_m_out1_div",
-				clk_base + PLLM_OUT, 1, 0, CLK_IGNORE_UNUSED |
+				clk_base + PLLM_OUT, 1, 0,
 				CLK_SET_RATE_PARENT, 0, NULL);
 	clks[TEGRA20_CLK_PLL_M_OUT1] = clk;
 
@@ -723,7 +723,8 @@ static void tegra20_super_clk_init(void)
 
 	/* SCLK */
 	clk = tegra_clk_register_super_mux("sclk", sclk_parents,
-			      ARRAY_SIZE(sclk_parents), CLK_SET_RATE_PARENT,
+			      ARRAY_SIZE(sclk_parents),
+			      CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
 			      clk_base + SCLK_BURST_POLICY, 0, 4, 0, 0, NULL);
 	clks[TEGRA20_CLK_SCLK] = clk;
 
@@ -814,9 +815,6 @@ static void __init tegra20_periph_clk_init(void)
 			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
 			       30, 2, 0, &emc_lock);
-	clk = tegra_clk_register_periph_gate("emc", "emc_mux", 0, clk_base, 0,
-				    57, periph_clk_enb_refcnt);
-	clks[TEGRA20_CLK_EMC] = clk;
 
 	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
 				    &emc_lock);
@@ -1019,13 +1017,12 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA20_CLK_PLL_P_OUT2, TEGRA20_CLK_CLK_MAX, 48000000, 1 },
 	{ TEGRA20_CLK_PLL_P_OUT3, TEGRA20_CLK_CLK_MAX, 72000000, 1 },
 	{ TEGRA20_CLK_PLL_P_OUT4, TEGRA20_CLK_CLK_MAX, 24000000, 1 },
-	{ TEGRA20_CLK_PLL_C, TEGRA20_CLK_CLK_MAX, 600000000, 1 },
-	{ TEGRA20_CLK_PLL_C_OUT1, TEGRA20_CLK_CLK_MAX, 216000000, 1 },
-	{ TEGRA20_CLK_SCLK, TEGRA20_CLK_PLL_C_OUT1, 0, 1 },
-	{ TEGRA20_CLK_HCLK, TEGRA20_CLK_CLK_MAX, 0, 1 },
-	{ TEGRA20_CLK_PCLK, TEGRA20_CLK_CLK_MAX, 60000000, 1 },
+	{ TEGRA20_CLK_PLL_C, TEGRA20_CLK_CLK_MAX, 600000000, 0 },
+	{ TEGRA20_CLK_PLL_C_OUT1, TEGRA20_CLK_CLK_MAX, 240000000, 0 },
+	{ TEGRA20_CLK_SCLK, TEGRA20_CLK_PLL_C_OUT1, 240000000, 0 },
+	{ TEGRA20_CLK_HCLK, TEGRA20_CLK_CLK_MAX, 240000000, 0 },
+	{ TEGRA20_CLK_PCLK, TEGRA20_CLK_CLK_MAX, 60000000, 0 },
 	{ TEGRA20_CLK_CSITE, TEGRA20_CLK_CLK_MAX, 0, 1 },
-	{ TEGRA20_CLK_EMC, TEGRA20_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA20_CLK_CCLK, TEGRA20_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA20_CLK_UARTA, TEGRA20_CLK_PLL_P, 0, 0 },
 	{ TEGRA20_CLK_UARTB, TEGRA20_CLK_PLL_P, 0, 0 },
@@ -1051,6 +1048,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA20_CLK_DISP2, TEGRA20_CLK_PLL_P, 600000000, 0 },
 	{ TEGRA20_CLK_GR2D, TEGRA20_CLK_PLL_C, 300000000, 0 },
 	{ TEGRA20_CLK_GR3D, TEGRA20_CLK_PLL_C, 300000000, 0 },
+	{ TEGRA20_CLK_VDE, TEGRA20_CLK_CLK_MAX, 300000000, 0 },
 	/* must be the last entry */
 	{ TEGRA20_CLK_CLK_MAX, TEGRA20_CLK_CLK_MAX, 0, 0 },
 };
diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c
index 9e6260869eb9..9fb5d51ccce4 100644
--- a/drivers/clk/tegra/clk-tegra210.c
+++ b/drivers/clk/tegra/clk-tegra210.c
@@ -22,10 +22,12 @@
 #include <linux/of_address.h>
 #include <linux/delay.h>
 #include <linux/export.h>
+#include <linux/mutex.h>
 #include <linux/clk/tegra.h>
 #include <dt-bindings/clock/tegra210-car.h>
 #include <dt-bindings/reset/tegra210-car.h>
 #include <linux/iopoll.h>
+#include <soc/tegra/pmc.h>
 
 #include "clk.h"
 #include "clk-id.h"
@@ -41,6 +43,7 @@
 #define CLK_SOURCE_CSITE 0x1d4
 #define CLK_SOURCE_EMC 0x19c
 #define CLK_SOURCE_SOR1 0x410
+#define CLK_SOURCE_LA 0x1f8
 
 #define PLLC_BASE 0x80
 #define PLLC_OUT 0x84
@@ -231,6 +234,30 @@
 #define CLK_RST_CONTROLLER_RST_DEV_Y_SET 0x2a8
 #define CLK_RST_CONTROLLER_RST_DEV_Y_CLR 0x2ac
 
+#define LVL2_CLK_GATE_OVRA 0xf8
+#define LVL2_CLK_GATE_OVRC 0x3a0
+#define LVL2_CLK_GATE_OVRD 0x3a4
+#define LVL2_CLK_GATE_OVRE 0x554
+
+/* I2S registers to handle during APE MBIST WAR */
+#define TEGRA210_I2S_BASE  0x1000
+#define TEGRA210_I2S_SIZE  0x100
+#define TEGRA210_I2S_CTRLS 5
+#define TEGRA210_I2S_CG    0x88
+#define TEGRA210_I2S_CTRL  0xa0
+
+/* DISPA registers to handle during MBIST WAR */
+#define DC_CMD_DISPLAY_COMMAND 0xc8
+#define DC_COM_DSC_TOP_CTL 0xcf8
+
+/* VIC register to handle during MBIST WAR */
+#define NV_PVIC_THI_SLCG_OVERRIDE_LOW 0x8c
+
+/* APE, DISPA and VIC base addesses needed for MBIST WAR */
+#define TEGRA210_AHUB_BASE  0x702d0000
+#define TEGRA210_DISPA_BASE 0x54200000
+#define TEGRA210_VIC_BASE  0x54340000
+
 /*
  * SDM fractional divisor is 16-bit 2's complement signed number within
  * (-2^12 ... 2^12-1) range. Represented in PLL data structure as unsigned
@@ -255,8 +282,22 @@ static struct cpu_clk_suspend_context {
 } tegra210_cpu_clk_sctx;
 #endif
 
+struct tegra210_domain_mbist_war {
+	void (*handle_lvl2_ovr)(struct tegra210_domain_mbist_war *mbist);
+	const u32 lvl2_offset;
+	const u32 lvl2_mask;
+	const unsigned int num_clks;
+	const unsigned int *clk_init_data;
+	struct clk_bulk_data *clks;
+};
+
+static struct clk **clks;
+
 static void __iomem *clk_base;
 static void __iomem *pmc_base;
+static void __iomem *ahub_base;
+static void __iomem *dispa_base;
+static void __iomem *vic_base;
 
 static unsigned long osc_freq;
 static unsigned long pll_ref_freq;
@@ -267,6 +308,7 @@ static DEFINE_SPINLOCK(pll_re_lock);
 static DEFINE_SPINLOCK(pll_u_lock);
 static DEFINE_SPINLOCK(sor1_lock);
 static DEFINE_SPINLOCK(emc_lock);
+static DEFINE_MUTEX(lvl2_ovr_lock);
 
 /* possible OSC frequencies in Hz */
 static unsigned long tegra210_input_freq[] = {
@@ -310,6 +352,8 @@ static const char *mux_pllmcp_clkm[] = {
 #define PLLA_MISC2_WRITE_MASK		0x06ffffff
 
 /* PLLD */
+#define PLLD_BASE_CSI_CLKSOURCE		(1 << 23)
+
 #define PLLD_MISC0_EN_SDM		(1 << 16)
 #define PLLD_MISC0_LOCK_OVERRIDE	(1 << 17)
 #define PLLD_MISC0_LOCK_ENABLE		(1 << 18)
@@ -513,6 +557,115 @@ void tegra210_set_sata_pll_seq_sw(bool state)
 }
 EXPORT_SYMBOL_GPL(tegra210_set_sata_pll_seq_sw);
 
+static void tegra210_generic_mbist_war(struct tegra210_domain_mbist_war *mbist)
+{
+	u32 val;
+
+	val = readl_relaxed(clk_base + mbist->lvl2_offset);
+	writel_relaxed(val | mbist->lvl2_mask, clk_base + mbist->lvl2_offset);
+	fence_udelay(1, clk_base);
+	writel_relaxed(val, clk_base + mbist->lvl2_offset);
+	fence_udelay(1, clk_base);
+}
+
+static void tegra210_venc_mbist_war(struct tegra210_domain_mbist_war *mbist)
+{
+	u32 csi_src, ovra, ovre;
+	unsigned long flags = 0;
+
+	spin_lock_irqsave(&pll_d_lock, flags);
+
+	csi_src = readl_relaxed(clk_base + PLLD_BASE);
+	writel_relaxed(csi_src | PLLD_BASE_CSI_CLKSOURCE, clk_base + PLLD_BASE);
+	fence_udelay(1, clk_base);
+
+	ovra = readl_relaxed(clk_base + LVL2_CLK_GATE_OVRA);
+	writel_relaxed(ovra | BIT(15), clk_base + LVL2_CLK_GATE_OVRA);
+	ovre = readl_relaxed(clk_base + LVL2_CLK_GATE_OVRE);
+	writel_relaxed(ovre | BIT(3), clk_base + LVL2_CLK_GATE_OVRE);
+	fence_udelay(1, clk_base);
+
+	writel_relaxed(ovra, clk_base + LVL2_CLK_GATE_OVRA);
+	writel_relaxed(ovre, clk_base + LVL2_CLK_GATE_OVRE);
+	writel_relaxed(csi_src, clk_base + PLLD_BASE);
+	fence_udelay(1, clk_base);
+
+	spin_unlock_irqrestore(&pll_d_lock, flags);
+}
+
+static void tegra210_disp_mbist_war(struct tegra210_domain_mbist_war *mbist)
+{
+	u32 ovra, dsc_top_ctrl;
+
+	ovra = readl_relaxed(clk_base + LVL2_CLK_GATE_OVRA);
+	writel_relaxed(ovra | BIT(1), clk_base + LVL2_CLK_GATE_OVRA);
+	fence_udelay(1, clk_base);
+
+	dsc_top_ctrl = readl_relaxed(dispa_base + DC_COM_DSC_TOP_CTL);
+	writel_relaxed(dsc_top_ctrl | BIT(2), dispa_base + DC_COM_DSC_TOP_CTL);
+	readl_relaxed(dispa_base + DC_CMD_DISPLAY_COMMAND);
+	writel_relaxed(dsc_top_ctrl, dispa_base + DC_COM_DSC_TOP_CTL);
+	readl_relaxed(dispa_base + DC_CMD_DISPLAY_COMMAND);
+
+	writel_relaxed(ovra, clk_base + LVL2_CLK_GATE_OVRA);
+	fence_udelay(1, clk_base);
+}
+
+static void tegra210_vic_mbist_war(struct tegra210_domain_mbist_war *mbist)
+{
+	u32 ovre, val;
+
+	ovre = readl_relaxed(clk_base + LVL2_CLK_GATE_OVRE);
+	writel_relaxed(ovre | BIT(5), clk_base + LVL2_CLK_GATE_OVRE);
+	fence_udelay(1, clk_base);
+
+	val = readl_relaxed(vic_base + NV_PVIC_THI_SLCG_OVERRIDE_LOW);
+	writel_relaxed(val | BIT(0) | GENMASK(7, 2) | BIT(24),
+			vic_base + NV_PVIC_THI_SLCG_OVERRIDE_LOW);
+	fence_udelay(1, vic_base + NV_PVIC_THI_SLCG_OVERRIDE_LOW);
+
+	writel_relaxed(val, vic_base + NV_PVIC_THI_SLCG_OVERRIDE_LOW);
+	readl(vic_base + NV_PVIC_THI_SLCG_OVERRIDE_LOW);
+
+	writel_relaxed(ovre, clk_base + LVL2_CLK_GATE_OVRE);
+	fence_udelay(1, clk_base);
+}
+
+static void tegra210_ape_mbist_war(struct tegra210_domain_mbist_war *mbist)
+{
+	void __iomem *i2s_base;
+	unsigned int i;
+	u32 ovrc, ovre;
+
+	ovrc = readl_relaxed(clk_base + LVL2_CLK_GATE_OVRC);
+	ovre = readl_relaxed(clk_base + LVL2_CLK_GATE_OVRE);
+	writel_relaxed(ovrc | BIT(1), clk_base + LVL2_CLK_GATE_OVRC);
+	writel_relaxed(ovre | BIT(10) | BIT(11),
+			clk_base + LVL2_CLK_GATE_OVRE);
+	fence_udelay(1, clk_base);
+
+	i2s_base = ahub_base + TEGRA210_I2S_BASE;
+
+	for (i = 0; i < TEGRA210_I2S_CTRLS; i++) {
+		u32 i2s_ctrl;
+
+		i2s_ctrl = readl_relaxed(i2s_base + TEGRA210_I2S_CTRL);
+		writel_relaxed(i2s_ctrl | BIT(10),
+				i2s_base + TEGRA210_I2S_CTRL);
+		writel_relaxed(0, i2s_base + TEGRA210_I2S_CG);
+		readl(i2s_base + TEGRA210_I2S_CG);
+		writel_relaxed(1, i2s_base + TEGRA210_I2S_CG);
+		writel_relaxed(i2s_ctrl, i2s_base + TEGRA210_I2S_CTRL);
+		readl(i2s_base + TEGRA210_I2S_CTRL);
+
+		i2s_base += TEGRA210_I2S_SIZE;
+	}
+
+	writel_relaxed(ovrc, clk_base + LVL2_CLK_GATE_OVRC);
+	writel_relaxed(ovre, clk_base + LVL2_CLK_GATE_OVRE);
+	fence_udelay(1, clk_base);
+}
+
 static inline void _pll_misc_chk_default(void __iomem *base,
 					struct tegra_clk_pll_params *params,
 					u8 misc_num, u32 default_val, u32 mask)
@@ -2411,13 +2564,150 @@ static struct tegra_audio_clk_info tegra210_audio_plls[] = {
 	{ "pll_a1", &pll_a1_params, tegra_clk_pll_a1, "pll_ref" },
 };
 
-static struct clk **clks;
-
 static const char * const aclk_parents[] = {
 	"pll_a1", "pll_c", "pll_p", "pll_a_out0", "pll_c2", "pll_c3",
 	"clk_m"
 };
 
+static const unsigned int nvjpg_slcg_clkids[] = { TEGRA210_CLK_NVDEC };
+static const unsigned int nvdec_slcg_clkids[] = { TEGRA210_CLK_NVJPG };
+static const unsigned int sor_slcg_clkids[] = { TEGRA210_CLK_HDA2CODEC_2X,
+	TEGRA210_CLK_HDA2HDMI, TEGRA210_CLK_DISP1, TEGRA210_CLK_DISP2 };
+static const unsigned int disp_slcg_clkids[] = { TEGRA210_CLK_LA,
+	TEGRA210_CLK_HOST1X};
+static const unsigned int xusba_slcg_clkids[] = { TEGRA210_CLK_XUSB_HOST,
+	TEGRA210_CLK_XUSB_DEV };
+static const unsigned int xusbb_slcg_clkids[] = { TEGRA210_CLK_XUSB_HOST,
+	TEGRA210_CLK_XUSB_SS };
+static const unsigned int xusbc_slcg_clkids[] = { TEGRA210_CLK_XUSB_DEV,
+	TEGRA210_CLK_XUSB_SS };
+static const unsigned int venc_slcg_clkids[] = { TEGRA210_CLK_HOST1X,
+	TEGRA210_CLK_PLL_D };
+static const unsigned int ape_slcg_clkids[] = { TEGRA210_CLK_ACLK,
+	TEGRA210_CLK_I2S0, TEGRA210_CLK_I2S1, TEGRA210_CLK_I2S2,
+	TEGRA210_CLK_I2S3, TEGRA210_CLK_I2S4, TEGRA210_CLK_SPDIF_OUT,
+	TEGRA210_CLK_D_AUDIO };
+static const unsigned int vic_slcg_clkids[] = { TEGRA210_CLK_HOST1X };
+
+static struct tegra210_domain_mbist_war tegra210_pg_mbist_war[] = {
+	[TEGRA_POWERGATE_VENC] = {
+		.handle_lvl2_ovr = tegra210_venc_mbist_war,
+		.num_clks = ARRAY_SIZE(venc_slcg_clkids),
+		.clk_init_data = venc_slcg_clkids,
+	},
+	[TEGRA_POWERGATE_SATA] = {
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRC,
+		.lvl2_mask = BIT(0) | BIT(17) | BIT(19),
+	},
+	[TEGRA_POWERGATE_MPE] = {
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRE,
+		.lvl2_mask = BIT(2),
+	},
+	[TEGRA_POWERGATE_SOR] = {
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.num_clks = ARRAY_SIZE(sor_slcg_clkids),
+		.clk_init_data = sor_slcg_clkids,
+		.lvl2_offset = LVL2_CLK_GATE_OVRA,
+		.lvl2_mask = BIT(1) | BIT(2),
+	},
+	[TEGRA_POWERGATE_DIS] = {
+		.handle_lvl2_ovr = tegra210_disp_mbist_war,
+		.num_clks = ARRAY_SIZE(disp_slcg_clkids),
+		.clk_init_data = disp_slcg_clkids,
+	},
+	[TEGRA_POWERGATE_DISB] = {
+		.num_clks = ARRAY_SIZE(disp_slcg_clkids),
+		.clk_init_data = disp_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRA,
+		.lvl2_mask = BIT(2),
+	},
+	[TEGRA_POWERGATE_XUSBA] = {
+		.num_clks = ARRAY_SIZE(xusba_slcg_clkids),
+		.clk_init_data = xusba_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRC,
+		.lvl2_mask = BIT(30) | BIT(31),
+	},
+	[TEGRA_POWERGATE_XUSBB] = {
+		.num_clks = ARRAY_SIZE(xusbb_slcg_clkids),
+		.clk_init_data = xusbb_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRC,
+		.lvl2_mask = BIT(30) | BIT(31),
+	},
+	[TEGRA_POWERGATE_XUSBC] = {
+		.num_clks = ARRAY_SIZE(xusbc_slcg_clkids),
+		.clk_init_data = xusbc_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRC,
+		.lvl2_mask = BIT(30) | BIT(31),
+	},
+	[TEGRA_POWERGATE_VIC] = {
+		.num_clks = ARRAY_SIZE(vic_slcg_clkids),
+		.clk_init_data = vic_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_vic_mbist_war,
+	},
+	[TEGRA_POWERGATE_NVDEC] = {
+		.num_clks = ARRAY_SIZE(nvdec_slcg_clkids),
+		.clk_init_data = nvdec_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRC,
+		.lvl2_mask = BIT(9) | BIT(31),
+	},
+	[TEGRA_POWERGATE_NVJPG] = {
+		.num_clks = ARRAY_SIZE(nvjpg_slcg_clkids),
+		.clk_init_data = nvjpg_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRC,
+		.lvl2_mask = BIT(9) | BIT(31),
+	},
+	[TEGRA_POWERGATE_AUD] = {
+		.num_clks = ARRAY_SIZE(ape_slcg_clkids),
+		.clk_init_data = ape_slcg_clkids,
+		.handle_lvl2_ovr = tegra210_ape_mbist_war,
+	},
+	[TEGRA_POWERGATE_VE2] = {
+		.handle_lvl2_ovr = tegra210_generic_mbist_war,
+		.lvl2_offset = LVL2_CLK_GATE_OVRD,
+		.lvl2_mask = BIT(22),
+	},
+};
+
+int tegra210_clk_handle_mbist_war(unsigned int id)
+{
+	int err;
+	struct tegra210_domain_mbist_war *mbist_war;
+
+	if (id >= ARRAY_SIZE(tegra210_pg_mbist_war)) {
+		WARN(1, "unknown domain id in MBIST WAR handler\n");
+		return -EINVAL;
+	}
+
+	mbist_war = &tegra210_pg_mbist_war[id];
+	if (!mbist_war->handle_lvl2_ovr)
+		return 0;
+
+	if (mbist_war->num_clks && !mbist_war->clks)
+		return -ENODEV;
+
+	err = clk_bulk_prepare_enable(mbist_war->num_clks, mbist_war->clks);
+	if (err < 0)
+		return err;
+
+	mutex_lock(&lvl2_ovr_lock);
+
+	mbist_war->handle_lvl2_ovr(mbist_war);
+
+	mutex_unlock(&lvl2_ovr_lock);
+
+	clk_bulk_disable_unprepare(mbist_war->num_clks, mbist_war->clks);
+
+	return 0;
+}
+
 void tegra210_put_utmipll_in_iddq(void)
 {
 	u32 reg;
@@ -2654,6 +2944,13 @@ static struct tegra_periph_init_data tegra210_periph[] = {
 			      sor1_parents_idx, 0, &sor1_lock),
 };
 
+static const char * const la_parents[] = {
+	"pll_p", "pll_c2", "pll_c", "pll_c3", "pll_re_out1", "pll_a1", "clk_m", "pll_c4_out0"
+};
+
+static struct tegra_clk_periph tegra210_la =
+	TEGRA_CLK_PERIPH(29, 7, 9, 0, 8, 1, TEGRA_DIVIDER_ROUND_UP, 76, 0, NULL, 0);
+
 static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 					    void __iomem *pmc_base)
 {
@@ -2700,6 +2997,12 @@ static __init void tegra210_periph_clk_init(void __iomem *clk_base,
 					     periph_clk_enb_refcnt);
 	clks[TEGRA210_CLK_DSIB] = clk;
 
+	/* la */
+	clk = tegra_clk_register_periph("la", la_parents,
+			ARRAY_SIZE(la_parents), &tegra210_la, clk_base,
+			CLK_SOURCE_LA, 0);
+	clks[TEGRA210_CLK_LA] = clk;
+
 	/* emc mux */
 	clk = clk_register_mux(NULL, "emc_mux", mux_pllmcp_clkm,
 			       ARRAY_SIZE(mux_pllmcp_clkm), 0,
@@ -3025,7 +3328,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA210_CLK_I2S4, TEGRA210_CLK_PLL_A_OUT0, 11289600, 0 },
 	{ TEGRA210_CLK_HOST1X, TEGRA210_CLK_PLL_P, 136000000, 1 },
 	{ TEGRA210_CLK_SCLK_MUX, TEGRA210_CLK_PLL_P, 0, 1 },
-	{ TEGRA210_CLK_SCLK, TEGRA210_CLK_CLK_MAX, 102000000, 1 },
+	{ TEGRA210_CLK_SCLK, TEGRA210_CLK_CLK_MAX, 102000000, 0 },
 	{ TEGRA210_CLK_DFLL_SOC, TEGRA210_CLK_PLL_P, 51000000, 1 },
 	{ TEGRA210_CLK_DFLL_REF, TEGRA210_CLK_PLL_P, 51000000, 1 },
 	{ TEGRA210_CLK_SBC4, TEGRA210_CLK_PLL_P, 12000000, 1 },
@@ -3040,7 +3343,6 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA210_CLK_XUSB_DEV_SRC, TEGRA210_CLK_PLL_P_OUT_XUSB, 102000000, 0 },
 	{ TEGRA210_CLK_SATA, TEGRA210_CLK_PLL_P, 104000000, 0 },
 	{ TEGRA210_CLK_SATA_OOB, TEGRA210_CLK_PLL_P, 204000000, 0 },
-	{ TEGRA210_CLK_EMC, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_MSELECT, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA210_CLK_CSITE, TEGRA210_CLK_CLK_MAX, 0, 1 },
 	/* TODO find a way to enable this on-demand */
@@ -3149,6 +3451,37 @@ static int tegra210_reset_deassert(unsigned long id)
 	return 0;
 }
 
+static void tegra210_mbist_clk_init(void)
+{
+	unsigned int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(tegra210_pg_mbist_war); i++) {
+		unsigned int num_clks = tegra210_pg_mbist_war[i].num_clks;
+		struct clk_bulk_data *clk_data;
+
+		if (!num_clks)
+			continue;
+
+		clk_data = kmalloc_array(num_clks, sizeof(*clk_data),
+					 GFP_KERNEL);
+		if (WARN_ON(!clk_data))
+			return;
+
+		tegra210_pg_mbist_war[i].clks = clk_data;
+		for (j = 0; j < num_clks; j++) {
+			int clk_id = tegra210_pg_mbist_war[i].clk_init_data[j];
+			struct clk *clk = clks[clk_id];
+
+			if (WARN(IS_ERR(clk), "clk_id: %d\n", clk_id)) {
+				kfree(clk_data);
+				tegra210_pg_mbist_war[i].clks = NULL;
+				break;
+			}
+			clk_data[j].clk = clk;
+		}
+	}
+}
+
 /**
  * tegra210_clock_init - Tegra210-specific clock initialization
  * @np: struct device_node * of the DT node for the SoC CAR IP block
@@ -3183,6 +3516,24 @@ static void __init tegra210_clock_init(struct device_node *np)
 		return;
 	}
 
+	ahub_base = ioremap(TEGRA210_AHUB_BASE, SZ_64K);
+	if (!ahub_base) {
+		pr_err("ioremap tegra210 APE failed\n");
+		return;
+	}
+
+	dispa_base = ioremap(TEGRA210_DISPA_BASE, SZ_256K);
+	if (!dispa_base) {
+		pr_err("ioremap tegra210 DISPA failed\n");
+		return;
+	}
+
+	vic_base = ioremap(TEGRA210_VIC_BASE, SZ_256K);
+	if (!vic_base) {
+		pr_err("ioremap tegra210 VIC failed\n");
+		return;
+	}
+
 	clks = tegra_clk_init(clk_base, TEGRA210_CLK_CLK_MAX,
 			      TEGRA210_CAR_BANK_COUNT);
 	if (!clks)
@@ -3219,6 +3570,8 @@ static void __init tegra210_clock_init(struct device_node *np)
 	tegra_add_of_provider(np);
 	tegra_register_devclks(devclks, ARRAY_SIZE(devclks));
 
+	tegra210_mbist_clk_init();
+
 	tegra_cpu_car_ops = &tegra210_cpu_car_ops;
 }
 CLK_OF_DECLARE(tegra210, "nvidia,tegra210-car", tegra210_clock_init);
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index bee84c554932..b316dfb6f6c7 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -819,6 +819,7 @@ static struct tegra_clk tegra30_clks[tegra_clk_max] __initdata = {
 	[tegra_clk_pll_a] = { .dt_id = TEGRA30_CLK_PLL_A, .present = true },
 	[tegra_clk_pll_a_out0] = { .dt_id = TEGRA30_CLK_PLL_A_OUT0, .present = true },
 	[tegra_clk_cec] = { .dt_id = TEGRA30_CLK_CEC, .present = true },
+	[tegra_clk_emc] = { .dt_id = TEGRA30_CLK_EMC, .present = true },
 };
 
 static const char *pll_e_parents[] = { "pll_ref", "pll_p" };
@@ -843,8 +844,7 @@ static void __init tegra30_pll_init(void)
 
 	/* PLLM */
 	clk = tegra_clk_register_pll("pll_m", "pll_ref", clk_base, pmc_base,
-			    CLK_IGNORE_UNUSED | CLK_SET_RATE_GATE,
-			    &pll_m_params, NULL);
+			    CLK_SET_RATE_GATE, &pll_m_params, NULL);
 	clks[TEGRA30_CLK_PLL_M] = clk;
 
 	/* PLLM_OUT1 */
@@ -852,7 +852,7 @@ static void __init tegra30_pll_init(void)
 				clk_base + PLLM_OUT, 0, TEGRA_DIVIDER_ROUND_UP,
 				8, 8, 1, NULL);
 	clk = tegra_clk_register_pll_out("pll_m_out1", "pll_m_out1_div",
-				clk_base + PLLM_OUT, 1, 0, CLK_IGNORE_UNUSED |
+				clk_base + PLLM_OUT, 1, 0,
 				CLK_SET_RATE_PARENT, 0, NULL);
 	clks[TEGRA30_CLK_PLL_M_OUT1] = clk;
 
@@ -990,7 +990,7 @@ static void __init tegra30_super_clk_init(void)
 	/* SCLK */
 	clk = tegra_clk_register_super_mux("sclk", sclk_parents,
 				  ARRAY_SIZE(sclk_parents),
-				  CLK_SET_RATE_PARENT,
+				  CLK_SET_RATE_PARENT | CLK_IS_CRITICAL,
 				  clk_base + SCLK_BURST_POLICY,
 				  0, 4, 0, 0, NULL);
 	clks[TEGRA30_CLK_SCLK] = clk;
@@ -1060,9 +1060,6 @@ static void __init tegra30_periph_clk_init(void)
 			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
 			       30, 2, 0, &emc_lock);
-	clk = tegra_clk_register_periph_gate("emc", "emc_mux", 0, clk_base, 0,
-				    57, periph_clk_enb_refcnt);
-	clks[TEGRA30_CLK_EMC] = clk;
 
 	clk = tegra_clk_register_mc("mc", "emc_mux", clk_base + CLK_SOURCE_EMC,
 				    &emc_lock);
@@ -1252,10 +1249,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA30_CLK_SDMMC1, TEGRA30_CLK_PLL_P, 48000000, 0 },
 	{ TEGRA30_CLK_SDMMC2, TEGRA30_CLK_PLL_P, 48000000, 0 },
 	{ TEGRA30_CLK_SDMMC3, TEGRA30_CLK_PLL_P, 48000000, 0 },
-	{ TEGRA30_CLK_PLL_M, TEGRA30_CLK_CLK_MAX, 0, 1 },
-	{ TEGRA30_CLK_PCLK, TEGRA30_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA30_CLK_CSITE, TEGRA30_CLK_CLK_MAX, 0, 1 },
-	{ TEGRA30_CLK_EMC, TEGRA30_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA30_CLK_MSELECT, TEGRA30_CLK_CLK_MAX, 0, 1 },
 	{ TEGRA30_CLK_SBC1, TEGRA30_CLK_PLL_P, 100000000, 0 },
 	{ TEGRA30_CLK_SBC2, TEGRA30_CLK_PLL_P, 100000000, 0 },
@@ -1272,6 +1266,7 @@ static struct tegra_clk_init_table init_table[] __initdata = {
 	{ TEGRA30_CLK_GR3D, TEGRA30_CLK_PLL_C, 300000000, 0 },
 	{ TEGRA30_CLK_GR3D2, TEGRA30_CLK_PLL_C, 300000000, 0 },
 	{ TEGRA30_CLK_PLL_U, TEGRA30_CLK_CLK_MAX, 480000000, 0 },
+	{ TEGRA30_CLK_VDE, TEGRA30_CLK_CLK_MAX, 600000000, 0 },
 	/* must be the last entry */
 	{ TEGRA30_CLK_CLK_MAX, TEGRA30_CLK_CLK_MAX, 0, 0 },
 };
diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h
index 3b2763df51c2..ba7e20e6a82b 100644
--- a/drivers/clk/tegra/clk.h
+++ b/drivers/clk/tegra/clk.h
@@ -812,4 +812,11 @@ int tegra_pll_wait_for_lock(struct tegra_clk_pll *pll);
 u16 tegra_pll_get_fixed_mdiv(struct clk_hw *hw, unsigned long input_rate);
 int tegra_pll_p_div_to_hw(struct tegra_clk_pll *pll, u8 p_div);
 
+/* Combined read fence with delay */
+#define fence_udelay(delay, reg)	\
+	do {				\
+		readl(reg);		\
+		udelay(delay);		\
+	} while (0)
+
 #endif /* TEGRA_CLK_H */
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index f4d6802a8544..7d22e1af2247 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -55,6 +55,29 @@ static void clk_memmap_writel(u32 val, const struct clk_omap_reg *reg)
 		writel_relaxed(val, io->mem + reg->offset);
 }
 
+static void _clk_rmw(u32 val, u32 mask, void __iomem *ptr)
+{
+	u32 v;
+
+	v = readl_relaxed(ptr);
+	v &= ~mask;
+	v |= val;
+	writel_relaxed(v, ptr);
+}
+
+static void clk_memmap_rmw(u32 val, u32 mask, const struct clk_omap_reg *reg)
+{
+	struct clk_iomap *io = clk_memmaps[reg->index];
+
+	if (reg->ptr) {
+		_clk_rmw(val, mask, reg->ptr);
+	} else if (io->regmap) {
+		regmap_update_bits(io->regmap, reg->offset, mask, val);
+	} else {
+		_clk_rmw(val, mask, io->mem + reg->offset);
+	}
+}
+
 static u32 clk_memmap_readl(const struct clk_omap_reg *reg)
 {
 	u32 val;
@@ -89,6 +112,7 @@ int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops)
 	ti_clk_ll_ops = ops;
 	ops->clk_readl = clk_memmap_readl;
 	ops->clk_writel = clk_memmap_writel;
+	ops->clk_rmw = clk_memmap_rmw;
 
 	return 0;
 }
@@ -251,6 +275,20 @@ int ti_clk_get_reg_addr(struct device_node *node, int index,
 	return 0;
 }
 
+void ti_clk_latch(struct clk_omap_reg *reg, s8 shift)
+{
+	u32 latch;
+
+	if (shift < 0)
+		return;
+
+	latch = 1 << shift;
+
+	ti_clk_ll_ops->clk_rmw(latch, latch, reg);
+	ti_clk_ll_ops->clk_rmw(0, latch, reg);
+	ti_clk_ll_ops->clk_readl(reg); /* OCP barrier */
+}
+
 /**
  * omap2_clk_provider_init - init master clock provider
  * @parent: master node
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index d9b43bfc2532..b58278077226 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -22,6 +22,7 @@ struct clk_omap_divider {
 	u8			shift;
 	u8			width;
 	u8			flags;
+	s8			latch;
 	const struct clk_div_table	*table;
 };
 
@@ -33,6 +34,7 @@ struct clk_omap_mux {
 	u32			*table;
 	u32			mask;
 	u8			shift;
+	s8			latch;
 	u8			flags;
 };
 
@@ -74,6 +76,11 @@ enum {
 #define CLKF_CORE			(1 << 9)
 #define CLKF_J_TYPE			(1 << 10)
 
+/* CLKCTRL flags */
+#define CLKF_SW_SUP			BIT(5)
+#define CLKF_HW_SUP			BIT(6)
+#define CLKF_NO_IDLEST			BIT(7)
+
 #define CLK(dev, con, ck)		\
 	{				\
 		.lk = {			\
@@ -183,10 +190,6 @@ extern const struct omap_clkctrl_data am438x_clkctrl_data[];
 extern const struct omap_clkctrl_data dm814_clkctrl_data[];
 extern const struct omap_clkctrl_data dm816_clkctrl_data[];
 
-#define CLKF_SW_SUP	BIT(0)
-#define CLKF_HW_SUP	BIT(1)
-#define CLKF_NO_IDLEST	BIT(2)
-
 typedef void (*ti_of_clk_init_cb_t)(void *, struct device_node *);
 
 struct clk *ti_clk_register(struct device *dev, struct clk_hw *hw,
@@ -194,6 +197,8 @@ struct clk *ti_clk_register(struct device *dev, struct clk_hw *hw,
 int ti_clk_add_alias(struct device *dev, struct clk *clk, const char *con);
 void ti_clk_add_aliases(void);
 
+void ti_clk_latch(struct clk_omap_reg *reg, s8 shift);
+
 struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup);
 
 int ti_clk_parse_divider_data(int *div_table, int num_dividers, int max_div,
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index 77f93f6d2806..aaa277dd6d99 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -263,6 +263,8 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	val |= value << divider->shift;
 	ti_clk_ll_ops->clk_writel(val, &divider->reg);
 
+	ti_clk_latch(&divider->reg, divider->latch);
+
 	return 0;
 }
 
@@ -276,7 +278,8 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 				     const char *parent_name,
 				     unsigned long flags,
 				     struct clk_omap_reg *reg,
-				     u8 shift, u8 width, u8 clk_divider_flags,
+				     u8 shift, u8 width, s8 latch,
+				     u8 clk_divider_flags,
 				     const struct clk_div_table *table)
 {
 	struct clk_omap_divider *div;
@@ -305,6 +308,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	memcpy(&div->reg, reg, sizeof(*reg));
 	div->shift = shift;
 	div->width = width;
+	div->latch = latch;
 	div->flags = clk_divider_flags;
 	div->hw.init = &init;
 	div->table = table;
@@ -420,6 +424,7 @@ struct clk_hw *ti_clk_build_component_div(struct ti_clk_divider *setup)
 	div->table = _get_div_table_from_setup(setup, &div->width);
 
 	div->shift = setup->bit_shift;
+	div->latch = -EINVAL;
 
 	return &div->hw;
 }
@@ -452,7 +457,7 @@ struct clk *ti_clk_register_divider(struct ti_clk *setup)
 
 	clk = _register_divider(NULL, setup->name, div->parent,
 				flags, &reg, div->bit_shift,
-				width, div_flags, table);
+				width, -EINVAL, div_flags, table);
 
 	if (IS_ERR(clk))
 		kfree(table);
@@ -556,7 +561,7 @@ static int _get_divider_width(struct device_node *node,
 
 static int __init ti_clk_divider_populate(struct device_node *node,
 	struct clk_omap_reg *reg, const struct clk_div_table **table,
-	u32 *flags, u8 *div_flags, u8 *width, u8 *shift)
+	u32 *flags, u8 *div_flags, u8 *width, u8 *shift, s8 *latch)
 {
 	u32 val;
 	int ret;
@@ -570,6 +575,13 @@ static int __init ti_clk_divider_populate(struct device_node *node,
 	else
 		*shift = 0;
 
+	if (latch) {
+		if (!of_property_read_u32(node, "ti,latch-bit", &val))
+			*latch = val;
+		else
+			*latch = -EINVAL;
+	}
+
 	*flags = 0;
 	*div_flags = 0;
 
@@ -606,17 +618,18 @@ static void __init of_ti_divider_clk_setup(struct device_node *node)
 	u8 clk_divider_flags = 0;
 	u8 width = 0;
 	u8 shift = 0;
+	s8 latch = -EINVAL;
 	const struct clk_div_table *table = NULL;
 	u32 flags = 0;
 
 	parent_name = of_clk_get_parent_name(node, 0);
 
 	if (ti_clk_divider_populate(node, &reg, &table, &flags,
-				    &clk_divider_flags, &width, &shift))
+				    &clk_divider_flags, &width, &shift, &latch))
 		goto cleanup;
 
 	clk = _register_divider(NULL, node->name, parent_name, flags, &reg,
-				shift, width, clk_divider_flags, table);
+				shift, width, latch, clk_divider_flags, table);
 
 	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
@@ -639,7 +652,8 @@ static void __init of_ti_composite_divider_clk_setup(struct device_node *node)
 		return;
 
 	if (ti_clk_divider_populate(node, &div->reg, &div->table, &val,
-				    &div->flags, &div->width, &div->shift) < 0)
+				    &div->flags, &div->width, &div->shift,
+				    NULL) < 0)
 		goto cleanup;
 
 	if (!ti_clk_add_component(node, &div->hw, CLK_COMPONENT_TYPE_DIVIDER))
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index d4705803f3d3..69a4308a5a98 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -86,6 +86,7 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 	}
 	val |= index << mux->shift;
 	ti_clk_ll_ops->clk_writel(val, &mux->reg);
+	ti_clk_latch(&mux->reg, mux->latch);
 
 	return 0;
 }
@@ -100,7 +101,7 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 				 const char * const *parent_names,
 				 u8 num_parents, unsigned long flags,
 				 struct clk_omap_reg *reg, u8 shift, u32 mask,
-				 u8 clk_mux_flags, u32 *table)
+				 s8 latch, u8 clk_mux_flags, u32 *table)
 {
 	struct clk_omap_mux *mux;
 	struct clk *clk;
@@ -121,6 +122,7 @@ static struct clk *_register_mux(struct device *dev, const char *name,
 	memcpy(&mux->reg, reg, sizeof(*reg));
 	mux->shift = shift;
 	mux->mask = mask;
+	mux->latch = latch;
 	mux->flags = clk_mux_flags;
 	mux->table = table;
 	mux->hw.init = &init;
@@ -160,7 +162,7 @@ struct clk *ti_clk_register_mux(struct ti_clk *setup)
 		flags |= CLK_SET_RATE_PARENT;
 
 	return _register_mux(NULL, setup->name, mux->parents, mux->num_parents,
-			     flags, &reg, mux->bit_shift, mask,
+			     flags, &reg, mux->bit_shift, mask, -EINVAL,
 			     mux_flags, NULL);
 }
 
@@ -179,6 +181,7 @@ static void of_mux_clk_setup(struct device_node *node)
 	u8 clk_mux_flags = 0;
 	u32 mask = 0;
 	u32 shift = 0;
+	s32 latch = -EINVAL;
 	u32 flags = CLK_SET_RATE_NO_REPARENT;
 
 	num_parents = of_clk_get_parent_count(node);
@@ -197,6 +200,8 @@ static void of_mux_clk_setup(struct device_node *node)
 
 	of_property_read_u32(node, "ti,bit-shift", &shift);
 
+	of_property_read_u32(node, "ti,latch-bit", &latch);
+
 	if (of_property_read_bool(node, "ti,index-starts-at-one"))
 		clk_mux_flags |= CLK_MUX_INDEX_ONE;
 
@@ -211,7 +216,8 @@ static void of_mux_clk_setup(struct device_node *node)
 	mask = (1 << fls(mask)) - 1;
 
 	clk = _register_mux(NULL, node->name, parent_names, num_parents,
-			    flags, &reg, shift, mask, clk_mux_flags, NULL);
+			    flags, &reg, shift, mask, latch, clk_mux_flags,
+			    NULL);
 
 	if (!IS_ERR(clk))
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
@@ -234,6 +240,7 @@ struct clk_hw *ti_clk_build_component_mux(struct ti_clk_mux *setup)
 		return ERR_PTR(-ENOMEM);
 
 	mux->shift = setup->bit_shift;
+	mux->latch = -EINVAL;
 
 	mux->reg.index = setup->module;
 	mux->reg.offset = setup->reg;
diff --git a/drivers/clk/uniphier/clk-uniphier-sys.c b/drivers/clk/uniphier/clk-uniphier-sys.c
index d244e724e198..ebc78ab2df05 100644
--- a/drivers/clk/uniphier/clk-uniphier-sys.c
+++ b/drivers/clk/uniphier/clk-uniphier-sys.c
@@ -57,6 +57,14 @@
 #define UNIPHIER_PRO4_SYS_CLK_USB3(idx, ch)				\
 	UNIPHIER_CLK_GATE("usb3" #ch, (idx), NULL, 0x2104, 16 + (ch))
 
+#define UNIPHIER_PRO4_SYS_CLK_AIO(idx)					\
+	UNIPHIER_CLK_FACTOR("aio-io200m", -1, "spll", 1, 8),		\
+	UNIPHIER_CLK_GATE("aio", (idx), "aio-io200m", 0x2104, 13)
+
+#define UNIPHIER_PRO5_SYS_CLK_AIO(idx)					\
+	UNIPHIER_CLK_FACTOR("aio-io200m", -1, "spll", 1, 12),		\
+	UNIPHIER_CLK_GATE("aio", (idx), "aio-io200m", 0x2104, 13)
+
 #define UNIPHIER_LD11_SYS_CLK_AIO(idx)					\
 	UNIPHIER_CLK_FACTOR("aio-io200m", -1, "spll", 1, 10),		\
 	UNIPHIER_CLK_GATE("aio", (idx), "aio-io200m", 0x2108, 0)
@@ -94,16 +102,22 @@ const struct uniphier_clk_data uniphier_pro4_sys_clk_data[] = {
 	UNIPHIER_CLK_FACTOR("upll", -1, "ref", 288, 25),	/* 288 MHz */
 	UNIPHIER_CLK_FACTOR("a2pll", -1, "upll", 256, 125),	/* 589.824 MHz */
 	UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 270, 25),	/* 270 MHz */
+	UNIPHIER_CLK_FACTOR("gpll", -1, "ref", 10, 1),		/* 250 MHz */
 	UNIPHIER_CLK_FACTOR("uart", 0, "a2pll", 1, 8),
 	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 32),
 	UNIPHIER_LD4_SYS_CLK_NAND(2),
 	UNIPHIER_LD4_SYS_CLK_SD,
 	UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
 	UNIPHIER_PRO4_SYS_CLK_ETHER(6),
+	UNIPHIER_CLK_GATE("ether-gb", 7, "gpll", 0x2104, 5),
 	UNIPHIER_LD4_SYS_CLK_STDMAC(8),			/* HSC, MIO, RLE */
+	UNIPHIER_CLK_GATE("ether-phy", 10, "ref", 0x2260, 0),
 	UNIPHIER_PRO4_SYS_CLK_GIO(12),			/* Ether, SATA, USB3 */
 	UNIPHIER_PRO4_SYS_CLK_USB3(14, 0),
 	UNIPHIER_PRO4_SYS_CLK_USB3(15, 1),
+	UNIPHIER_CLK_GATE("sata0", 28, NULL, 0x2104, 18),
+	UNIPHIER_CLK_GATE("sata1", 29, NULL, 0x2104, 19),
+	UNIPHIER_PRO4_SYS_CLK_AIO(40),
 	{ /* sentinel */ }
 };
 
@@ -132,6 +146,8 @@ const struct uniphier_clk_data uniphier_pro5_sys_clk_data[] = {
 	UNIPHIER_PRO4_SYS_CLK_GIO(12),				/* PCIe, USB3 */
 	UNIPHIER_PRO4_SYS_CLK_USB3(14, 0),
 	UNIPHIER_PRO4_SYS_CLK_USB3(15, 1),
+	UNIPHIER_CLK_GATE("pcie", 24, NULL, 0x2108, 2),
+	UNIPHIER_PRO5_SYS_CLK_AIO(40),
 	{ /* sentinel */ }
 };
 
@@ -149,6 +165,8 @@ const struct uniphier_clk_data uniphier_pxs2_sys_clk_data[] = {
 	/* The document mentions 0x2104 bit 18, but not functional */
 	UNIPHIER_CLK_GATE("usb30-phy", 16, NULL, 0x2104, 19),
 	UNIPHIER_CLK_GATE("usb31-phy", 20, NULL, 0x2104, 20),
+	UNIPHIER_CLK_GATE("sata0", 28, NULL, 0x2104, 22),
+	UNIPHIER_PRO5_SYS_CLK_AIO(40),
 	{ /* sentinel */ }
 };
 
@@ -205,6 +223,7 @@ const struct uniphier_clk_data uniphier_ld20_sys_clk_data[] = {
 	UNIPHIER_CLK_GATE("usb30", 14, NULL, 0x210c, 14),
 	UNIPHIER_CLK_GATE("usb30-phy0", 16, NULL, 0x210c, 12),
 	UNIPHIER_CLK_GATE("usb30-phy1", 17, NULL, 0x210c, 13),
+	UNIPHIER_CLK_GATE("pcie", 24, NULL, 0x210c, 4),
 	UNIPHIER_LD11_SYS_CLK_AIO(40),
 	UNIPHIER_LD11_SYS_CLK_EVEA(41),
 	UNIPHIER_LD11_SYS_CLK_EXIV(42),
@@ -233,6 +252,8 @@ const struct uniphier_clk_data uniphier_pxs3_sys_clk_data[] = {
 	UNIPHIER_LD20_SYS_CLK_SD,
 	UNIPHIER_LD11_SYS_CLK_NAND(2),
 	UNIPHIER_LD11_SYS_CLK_EMMC(4),
+	UNIPHIER_CLK_GATE("ether0", 6, NULL, 0x210c, 9),
+	UNIPHIER_CLK_GATE("ether1", 7, NULL, 0x210c, 10),
 	UNIPHIER_CLK_GATE("usb30", 12, NULL, 0x210c, 4),	/* =GIO0 */
 	UNIPHIER_CLK_GATE("usb31-0", 13, NULL, 0x210c, 5),	/* =GIO1 */
 	UNIPHIER_CLK_GATE("usb31-1", 14, NULL, 0x210c, 6),	/* =GIO1-1 */
@@ -241,6 +262,10 @@ const struct uniphier_clk_data uniphier_pxs3_sys_clk_data[] = {
 	UNIPHIER_CLK_GATE("usb30-phy2", 18, NULL, 0x210c, 20),
 	UNIPHIER_CLK_GATE("usb31-phy0", 20, NULL, 0x210c, 17),
 	UNIPHIER_CLK_GATE("usb31-phy1", 21, NULL, 0x210c, 19),
+	UNIPHIER_CLK_GATE("pcie", 24, NULL, 0x210c, 3),
+	UNIPHIER_CLK_GATE("sata0", 28, NULL, 0x210c, 7),
+	UNIPHIER_CLK_GATE("sata1", 29, NULL, 0x210c, 8),
+	UNIPHIER_CLK_GATE("sata-phy", 30, NULL, 0x210c, 21),
 	/* CPU gears */
 	UNIPHIER_CLK_DIV4("cpll", 2, 3, 4, 8),
 	UNIPHIER_CLK_DIV4("spll", 2, 3, 4, 8),
diff --git a/drivers/clk/ux500/Makefile b/drivers/clk/ux500/Makefile
index fedc083dc8be..53fd29002401 100644
--- a/drivers/clk/ux500/Makefile
+++ b/drivers/clk/ux500/Makefile
@@ -10,8 +10,6 @@ obj-y += clk-sysctrl.o
 
 # Clock definitions
 obj-y += u8500_of_clk.o
-obj-y += u9540_clk.o
-obj-y += u8540_clk.o
 
 # ABX500 clock driver
 obj-y += abx500-clk.o
diff --git a/drivers/clk/ux500/abx500-clk.c b/drivers/clk/ux500/abx500-clk.c
index 2257d12ba988..5a86cd8fe5de 100644
--- a/drivers/clk/ux500/abx500-clk.c
+++ b/drivers/clk/ux500/abx500-clk.c
@@ -88,18 +88,6 @@ static int ab8500_reg_clks(struct device *dev)
 	return 0;
 }
 
-/* Clock definitions for ab8540 */
-static int ab8540_reg_clks(struct device *dev)
-{
-	return 0;
-}
-
-/* Clock definitions for ab9540 */
-static int ab9540_reg_clks(struct device *dev)
-{
-	return 0;
-}
-
 static int abx500_clk_probe(struct platform_device *pdev)
 {
 	struct ab8500 *parent = dev_get_drvdata(pdev->dev.parent);
@@ -107,10 +95,6 @@ static int abx500_clk_probe(struct platform_device *pdev)
 
 	if (is_ab8500(parent) || is_ab8505(parent)) {
 		ret = ab8500_reg_clks(&pdev->dev);
-	} else if (is_ab8540(parent)) {
-		ret = ab8540_reg_clks(&pdev->dev);
-	} else if (is_ab9540(parent)) {
-		ret = ab9540_reg_clks(&pdev->dev);
 	} else {
 		dev_err(&pdev->dev, "non supported plf id\n");
 		return -ENODEV;
diff --git a/drivers/clk/ux500/u8540_clk.c b/drivers/clk/ux500/u8540_clk.c
deleted file mode 100644
index 133859f0e2bf..000000000000
--- a/drivers/clk/ux500/u8540_clk.c
+++ /dev/null
@@ -1,597 +0,0 @@
-/*
- * Clock definitions for u8540 platform.
- *
- * Copyright (C) 2012 ST-Ericsson SA
- * Author: Ulf Hansson <ulf.hansson@linaro.org>
- *
- * License terms: GNU General Public License (GPL) version 2
- */
-
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/clkdev.h>
-#include <linux/clk-provider.h>
-#include <linux/mfd/dbx500-prcmu.h>
-#include "clk.h"
-
-/* CLKRST4 is missing making it hard to index things */
-enum clkrst_index {
-	CLKRST1_INDEX = 0,
-	CLKRST2_INDEX,
-	CLKRST3_INDEX,
-	CLKRST5_INDEX,
-	CLKRST6_INDEX,
-	CLKRST_MAX,
-};
-
-static void u8540_clk_init(struct device_node *np)
-{
-	struct clk *clk;
-	u32 bases[CLKRST_MAX];
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(bases); i++) {
-		struct resource r;
-
-		if (of_address_to_resource(np, i, &r))
-			/* Not much choice but to continue */
-			pr_err("failed to get CLKRST %d base address\n",
-			       i + 1);
-		bases[i] = r.start;
-	}
-
-	/* Clock sources. */
-	/* Fixed ClockGen */
-	clk = clk_reg_prcmu_gate("soc0_pll", NULL, PRCMU_PLLSOC0,
-				CLK_IGNORE_UNUSED);
-	clk_register_clkdev(clk, "soc0_pll", NULL);
-
-	clk = clk_reg_prcmu_gate("soc1_pll", NULL, PRCMU_PLLSOC1,
-				CLK_IGNORE_UNUSED);
-	clk_register_clkdev(clk, "soc1_pll", NULL);
-
-	clk = clk_reg_prcmu_gate("ddr_pll", NULL, PRCMU_PLLDDR,
-				CLK_IGNORE_UNUSED);
-	clk_register_clkdev(clk, "ddr_pll", NULL);
-
-	clk = clk_register_fixed_rate(NULL, "rtc32k", NULL,
-				CLK_IGNORE_UNUSED,
-				32768);
-	clk_register_clkdev(clk, "clk32k", NULL);
-	clk_register_clkdev(clk, "apb_pclk", "rtc-pl031");
-
-	clk = clk_register_fixed_rate(NULL, "ulp38m4", NULL,
-				CLK_IGNORE_UNUSED,
-				38400000);
-
-	clk = clk_reg_prcmu_gate("uartclk", NULL, PRCMU_UARTCLK, 0);
-	clk_register_clkdev(clk, NULL, "UART");
-
-	/* msp02clk needs a abx500 clk as parent. Handle by abx500 clk driver */
-	clk = clk_reg_prcmu_gate("msp02clk", "ab9540_sysclk12_b1",
-			PRCMU_MSP02CLK, 0);
-	clk_register_clkdev(clk, NULL, "MSP02");
-
-	clk = clk_reg_prcmu_gate("msp1clk", NULL, PRCMU_MSP1CLK, 0);
-	clk_register_clkdev(clk, NULL, "MSP1");
-
-	clk = clk_reg_prcmu_gate("i2cclk", NULL, PRCMU_I2CCLK, 0);
-	clk_register_clkdev(clk, NULL, "I2C");
-
-	clk = clk_reg_prcmu_gate("slimclk", NULL, PRCMU_SLIMCLK, 0);
-	clk_register_clkdev(clk, NULL, "slim");
-
-	clk = clk_reg_prcmu_gate("per1clk", NULL, PRCMU_PER1CLK, 0);
-	clk_register_clkdev(clk, NULL, "PERIPH1");
-
-	clk = clk_reg_prcmu_gate("per2clk", NULL, PRCMU_PER2CLK, 0);
-	clk_register_clkdev(clk, NULL, "PERIPH2");
-
-	clk = clk_reg_prcmu_gate("per3clk", NULL, PRCMU_PER3CLK, 0);
-	clk_register_clkdev(clk, NULL, "PERIPH3");
-
-	clk = clk_reg_prcmu_gate("per5clk", NULL, PRCMU_PER5CLK, 0);
-	clk_register_clkdev(clk, NULL, "PERIPH5");
-
-	clk = clk_reg_prcmu_gate("per6clk", NULL, PRCMU_PER6CLK, 0);
-	clk_register_clkdev(clk, NULL, "PERIPH6");
-
-	clk = clk_reg_prcmu_gate("per7clk", NULL, PRCMU_PER7CLK, 0);
-	clk_register_clkdev(clk, NULL, "PERIPH7");
-
-	clk = clk_reg_prcmu_scalable("lcdclk", NULL, PRCMU_LCDCLK, 0,
-				CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "lcd");
-	clk_register_clkdev(clk, "lcd", "mcde");
-
-	clk = clk_reg_prcmu_opp_gate("bmlclk", NULL, PRCMU_BMLCLK, 0);
-	clk_register_clkdev(clk, NULL, "bml");
-
-	clk = clk_reg_prcmu_scalable("hsitxclk", NULL, PRCMU_HSITXCLK, 0,
-				     CLK_SET_RATE_GATE);
-
-	clk = clk_reg_prcmu_scalable("hsirxclk", NULL, PRCMU_HSIRXCLK, 0,
-				     CLK_SET_RATE_GATE);
-
-	clk = clk_reg_prcmu_scalable("hdmiclk", NULL, PRCMU_HDMICLK, 0,
-				     CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "hdmi");
-	clk_register_clkdev(clk, "hdmi", "mcde");
-
-	clk = clk_reg_prcmu_gate("apeatclk", NULL, PRCMU_APEATCLK, 0);
-	clk_register_clkdev(clk, NULL, "apeat");
-
-	clk = clk_reg_prcmu_gate("apetraceclk", NULL, PRCMU_APETRACECLK, 0);
-	clk_register_clkdev(clk, NULL, "apetrace");
-
-	clk = clk_reg_prcmu_gate("mcdeclk", NULL, PRCMU_MCDECLK, 0);
-	clk_register_clkdev(clk, NULL, "mcde");
-	clk_register_clkdev(clk, "mcde", "mcde");
-	clk_register_clkdev(clk, NULL, "dsilink.0");
-	clk_register_clkdev(clk, NULL, "dsilink.1");
-	clk_register_clkdev(clk, NULL, "dsilink.2");
-
-	clk = clk_reg_prcmu_opp_gate("ipi2cclk", NULL, PRCMU_IPI2CCLK, 0);
-	clk_register_clkdev(clk, NULL, "ipi2");
-
-	clk = clk_reg_prcmu_gate("dsialtclk", NULL, PRCMU_DSIALTCLK, 0);
-	clk_register_clkdev(clk, NULL, "dsialt");
-
-	clk = clk_reg_prcmu_gate("dmaclk", NULL, PRCMU_DMACLK, 0);
-	clk_register_clkdev(clk, NULL, "dma40.0");
-
-	clk = clk_reg_prcmu_gate("b2r2clk", NULL, PRCMU_B2R2CLK, 0);
-	clk_register_clkdev(clk, NULL, "b2r2");
-	clk_register_clkdev(clk, NULL, "b2r2_core");
-	clk_register_clkdev(clk, NULL, "U8500-B2R2.0");
-	clk_register_clkdev(clk, NULL, "b2r2_1_core");
-
-	clk = clk_reg_prcmu_scalable("tvclk", NULL, PRCMU_TVCLK, 0,
-				     CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "tv");
-	clk_register_clkdev(clk, "tv", "mcde");
-
-	clk = clk_reg_prcmu_gate("sspclk", NULL, PRCMU_SSPCLK, 0);
-	clk_register_clkdev(clk, NULL, "SSP");
-
-	clk = clk_reg_prcmu_gate("rngclk", NULL, PRCMU_RNGCLK, 0);
-	clk_register_clkdev(clk, NULL, "rngclk");
-
-	clk = clk_reg_prcmu_gate("uiccclk", NULL, PRCMU_UICCCLK, 0);
-	clk_register_clkdev(clk, NULL, "uicc");
-
-	clk = clk_reg_prcmu_gate("timclk", NULL, PRCMU_TIMCLK, 0);
-	clk_register_clkdev(clk, NULL, "mtu0");
-	clk_register_clkdev(clk, NULL, "mtu1");
-
-	clk = clk_reg_prcmu_opp_volt_scalable("sdmmcclk", NULL,
-					PRCMU_SDMMCCLK, 100000000,
-					CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdmmc");
-
-	clk = clk_reg_prcmu_opp_volt_scalable("sdmmchclk", NULL,
-					PRCMU_SDMMCHCLK, 400000000,
-					CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdmmchclk");
-
-	clk = clk_reg_prcmu_gate("hvaclk", NULL, PRCMU_HVACLK, 0);
-	clk_register_clkdev(clk, NULL, "hva");
-
-	clk = clk_reg_prcmu_gate("g1clk", NULL, PRCMU_G1CLK, 0);
-	clk_register_clkdev(clk, NULL, "g1");
-
-	clk = clk_reg_prcmu_scalable("spare1clk", NULL, PRCMU_SPARE1CLK, 0,
-				     CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "dsilcd", "mcde");
-
-	clk = clk_reg_prcmu_scalable("dsi_pll", "hdmiclk",
-				PRCMU_PLLDSI, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "dsihs2", "mcde");
-	clk_register_clkdev(clk, "hs_clk", "dsilink.2");
-
-	clk = clk_reg_prcmu_scalable("dsilcd_pll", "spare1clk",
-				PRCMU_PLLDSI_LCD, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "dsilcd_pll", "mcde");
-
-	clk = clk_reg_prcmu_scalable("dsi0clk", "dsi_pll",
-				PRCMU_DSI0CLK, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "dsihs0", "mcde");
-
-	clk = clk_reg_prcmu_scalable("dsi0lcdclk", "dsilcd_pll",
-				PRCMU_DSI0CLK_LCD, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "dsihs0", "mcde");
-	clk_register_clkdev(clk, "hs_clk", "dsilink.0");
-
-	clk = clk_reg_prcmu_scalable("dsi1clk", "dsi_pll",
-				PRCMU_DSI1CLK, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "dsihs1", "mcde");
-
-	clk = clk_reg_prcmu_scalable("dsi1lcdclk", "dsilcd_pll",
-				PRCMU_DSI1CLK_LCD, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "dsihs1", "mcde");
-	clk_register_clkdev(clk, "hs_clk", "dsilink.1");
-
-	clk = clk_reg_prcmu_scalable("dsi0escclk", "tvclk",
-				PRCMU_DSI0ESCCLK, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "lp_clk", "dsilink.0");
-	clk_register_clkdev(clk, "dsilp0", "mcde");
-
-	clk = clk_reg_prcmu_scalable("dsi1escclk", "tvclk",
-				PRCMU_DSI1ESCCLK, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "lp_clk", "dsilink.1");
-	clk_register_clkdev(clk, "dsilp1", "mcde");
-
-	clk = clk_reg_prcmu_scalable("dsi2escclk", "tvclk",
-				PRCMU_DSI2ESCCLK, 0, CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, "lp_clk", "dsilink.2");
-	clk_register_clkdev(clk, "dsilp2", "mcde");
-
-	clk = clk_reg_prcmu_scalable_rate("armss", NULL,
-				PRCMU_ARMSS, 0, CLK_IGNORE_UNUSED);
-	clk_register_clkdev(clk, "armss", NULL);
-
-	clk = clk_register_fixed_factor(NULL, "smp_twd", "armss",
-				CLK_IGNORE_UNUSED, 1, 2);
-	clk_register_clkdev(clk, NULL, "smp_twd");
-
-	/* PRCC P-clocks */
-	/* Peripheral 1 : PRCC P-clocks */
-	clk = clk_reg_prcc_pclk("p1_pclk0", "per1clk", bases[CLKRST1_INDEX],
-				BIT(0), 0);
-	clk_register_clkdev(clk, "apb_pclk", "uart0");
-
-	clk = clk_reg_prcc_pclk("p1_pclk1", "per1clk", bases[CLKRST1_INDEX],
-				BIT(1), 0);
-	clk_register_clkdev(clk, "apb_pclk", "uart1");
-
-	clk = clk_reg_prcc_pclk("p1_pclk2", "per1clk", bases[CLKRST1_INDEX],
-				BIT(2), 0);
-	clk_register_clkdev(clk, "apb_pclk", "nmk-i2c.1");
-
-	clk = clk_reg_prcc_pclk("p1_pclk3", "per1clk", bases[CLKRST1_INDEX],
-				BIT(3), 0);
-	clk_register_clkdev(clk, "apb_pclk", "msp0");
-	clk_register_clkdev(clk, "apb_pclk", "dbx5x0-msp-i2s.0");
-
-	clk = clk_reg_prcc_pclk("p1_pclk4", "per1clk", bases[CLKRST1_INDEX],
-				BIT(4), 0);
-	clk_register_clkdev(clk, "apb_pclk", "msp1");
-	clk_register_clkdev(clk, "apb_pclk", "dbx5x0-msp-i2s.1");
-
-	clk = clk_reg_prcc_pclk("p1_pclk5", "per1clk", bases[CLKRST1_INDEX],
-				BIT(5), 0);
-	clk_register_clkdev(clk, "apb_pclk", "sdi0");
-
-	clk = clk_reg_prcc_pclk("p1_pclk6", "per1clk", bases[CLKRST1_INDEX],
-				BIT(6), 0);
-	clk_register_clkdev(clk, "apb_pclk", "nmk-i2c.2");
-
-	clk = clk_reg_prcc_pclk("p1_pclk7", "per1clk", bases[CLKRST1_INDEX],
-				BIT(7), 0);
-	clk_register_clkdev(clk, NULL, "spi3");
-
-	clk = clk_reg_prcc_pclk("p1_pclk8", "per1clk", bases[CLKRST1_INDEX],
-				BIT(8), 0);
-	clk_register_clkdev(clk, "apb_pclk", "slimbus0");
-
-	clk = clk_reg_prcc_pclk("p1_pclk9", "per1clk", bases[CLKRST1_INDEX],
-				BIT(9), 0);
-	clk_register_clkdev(clk, NULL, "gpio.0");
-	clk_register_clkdev(clk, NULL, "gpio.1");
-	clk_register_clkdev(clk, NULL, "gpioblock0");
-	clk_register_clkdev(clk, "apb_pclk", "ab85xx-codec.0");
-
-	clk = clk_reg_prcc_pclk("p1_pclk10", "per1clk", bases[CLKRST1_INDEX],
-				BIT(10), 0);
-	clk_register_clkdev(clk, "apb_pclk", "nmk-i2c.4");
-
-	clk = clk_reg_prcc_pclk("p1_pclk11", "per1clk", bases[CLKRST1_INDEX],
-				BIT(11), 0);
-	clk_register_clkdev(clk, "apb_pclk", "msp3");
-	clk_register_clkdev(clk, "apb_pclk", "dbx5x0-msp-i2s.3");
-
-	/* Peripheral 2 : PRCC P-clocks */
-	clk = clk_reg_prcc_pclk("p2_pclk0", "per2clk", bases[CLKRST2_INDEX],
-				BIT(0), 0);
-	clk_register_clkdev(clk, "apb_pclk", "nmk-i2c.3");
-
-	clk = clk_reg_prcc_pclk("p2_pclk1", "per2clk", bases[CLKRST2_INDEX],
-				BIT(1), 0);
-	clk_register_clkdev(clk, NULL, "spi2");
-
-	clk = clk_reg_prcc_pclk("p2_pclk2", "per2clk", bases[CLKRST2_INDEX],
-				BIT(2), 0);
-	clk_register_clkdev(clk, NULL, "spi1");
-
-	clk = clk_reg_prcc_pclk("p2_pclk3", "per2clk", bases[CLKRST2_INDEX],
-				BIT(3), 0);
-	clk_register_clkdev(clk, NULL, "pwl");
-
-	clk = clk_reg_prcc_pclk("p2_pclk4", "per2clk", bases[CLKRST2_INDEX],
-				BIT(4), 0);
-	clk_register_clkdev(clk, "apb_pclk", "sdi4");
-
-	clk = clk_reg_prcc_pclk("p2_pclk5", "per2clk", bases[CLKRST2_INDEX],
-				BIT(5), 0);
-	clk_register_clkdev(clk, "apb_pclk", "msp2");
-	clk_register_clkdev(clk, "apb_pclk", "dbx5x0-msp-i2s.2");
-
-	clk = clk_reg_prcc_pclk("p2_pclk6", "per2clk", bases[CLKRST2_INDEX],
-				BIT(6), 0);
-	clk_register_clkdev(clk, "apb_pclk", "sdi1");
-
-	clk = clk_reg_prcc_pclk("p2_pclk7", "per2clk", bases[CLKRST2_INDEX],
-				BIT(7), 0);
-	clk_register_clkdev(clk, "apb_pclk", "sdi3");
-
-	clk = clk_reg_prcc_pclk("p2_pclk8", "per2clk", bases[CLKRST2_INDEX],
-				BIT(8), 0);
-	clk_register_clkdev(clk, NULL, "spi0");
-
-	clk = clk_reg_prcc_pclk("p2_pclk9", "per2clk", bases[CLKRST2_INDEX],
-				BIT(9), 0);
-	clk_register_clkdev(clk, "hsir_hclk", "ste_hsi.0");
-
-	clk = clk_reg_prcc_pclk("p2_pclk10", "per2clk", bases[CLKRST2_INDEX],
-				BIT(10), 0);
-	clk_register_clkdev(clk, "hsit_hclk", "ste_hsi.0");
-
-	clk = clk_reg_prcc_pclk("p2_pclk11", "per2clk", bases[CLKRST2_INDEX],
-				BIT(11), 0);
-	clk_register_clkdev(clk, NULL, "gpio.6");
-	clk_register_clkdev(clk, NULL, "gpio.7");
-	clk_register_clkdev(clk, NULL, "gpioblock1");
-
-	clk = clk_reg_prcc_pclk("p2_pclk12", "per2clk", bases[CLKRST2_INDEX],
-				BIT(12), 0);
-	clk_register_clkdev(clk, "msp4-pclk", "ab85xx-codec.0");
-
-	/* Peripheral 3 : PRCC P-clocks */
-	clk = clk_reg_prcc_pclk("p3_pclk0", "per3clk", bases[CLKRST3_INDEX],
-				BIT(0), 0);
-	clk_register_clkdev(clk, NULL, "fsmc");
-
-	clk = clk_reg_prcc_pclk("p3_pclk1", "per3clk", bases[CLKRST3_INDEX],
-				BIT(1), 0);
-	clk_register_clkdev(clk, "apb_pclk", "ssp0");
-
-	clk = clk_reg_prcc_pclk("p3_pclk2", "per3clk", bases[CLKRST3_INDEX],
-				BIT(2), 0);
-	clk_register_clkdev(clk, "apb_pclk", "ssp1");
-
-	clk = clk_reg_prcc_pclk("p3_pclk3", "per3clk", bases[CLKRST3_INDEX],
-				BIT(3), 0);
-	clk_register_clkdev(clk, "apb_pclk", "nmk-i2c.0");
-
-	clk = clk_reg_prcc_pclk("p3_pclk4", "per3clk", bases[CLKRST3_INDEX],
-				BIT(4), 0);
-	clk_register_clkdev(clk, "apb_pclk", "sdi2");
-
-	clk = clk_reg_prcc_pclk("p3_pclk5", "per3clk", bases[CLKRST3_INDEX],
-				BIT(5), 0);
-	clk_register_clkdev(clk, "apb_pclk", "ske");
-	clk_register_clkdev(clk, "apb_pclk", "nmk-ske-keypad");
-
-	clk = clk_reg_prcc_pclk("p3_pclk6", "per3clk", bases[CLKRST3_INDEX],
-				BIT(6), 0);
-	clk_register_clkdev(clk, "apb_pclk", "uart2");
-
-	clk = clk_reg_prcc_pclk("p3_pclk7", "per3clk", bases[CLKRST3_INDEX],
-				BIT(7), 0);
-	clk_register_clkdev(clk, "apb_pclk", "sdi5");
-
-	clk = clk_reg_prcc_pclk("p3_pclk8", "per3clk", bases[CLKRST3_INDEX],
-				BIT(8), 0);
-	clk_register_clkdev(clk, NULL, "gpio.2");
-	clk_register_clkdev(clk, NULL, "gpio.3");
-	clk_register_clkdev(clk, NULL, "gpio.4");
-	clk_register_clkdev(clk, NULL, "gpio.5");
-	clk_register_clkdev(clk, NULL, "gpioblock2");
-
-	clk = clk_reg_prcc_pclk("p3_pclk9", "per3clk", bases[CLKRST3_INDEX],
-				BIT(9), 0);
-	clk_register_clkdev(clk, "apb_pclk", "nmk-i2c.5");
-
-	clk = clk_reg_prcc_pclk("p3_pclk10", "per3clk", bases[CLKRST3_INDEX],
-				BIT(10), 0);
-	clk_register_clkdev(clk, "apb_pclk", "nmk-i2c.6");
-
-	clk = clk_reg_prcc_pclk("p3_pclk11", "per3clk", bases[CLKRST3_INDEX],
-				BIT(11), 0);
-	clk_register_clkdev(clk, "apb_pclk", "uart3");
-
-	clk = clk_reg_prcc_pclk("p3_pclk12", "per3clk", bases[CLKRST3_INDEX],
-				BIT(12), 0);
-	clk_register_clkdev(clk, "apb_pclk", "uart4");
-
-	/* Peripheral 5 : PRCC P-clocks */
-	clk = clk_reg_prcc_pclk("p5_pclk0", "per5clk", bases[CLKRST5_INDEX],
-				BIT(0), 0);
-	clk_register_clkdev(clk, "usb", "musb-ux500.0");
-	clk_register_clkdev(clk, "usbclk", "ab-iddet.0");
-
-	clk = clk_reg_prcc_pclk("p5_pclk1", "per5clk", bases[CLKRST5_INDEX],
-				BIT(1), 0);
-	clk_register_clkdev(clk, NULL, "gpio.8");
-	clk_register_clkdev(clk, NULL, "gpioblock3");
-
-	/* Peripheral 6 : PRCC P-clocks */
-	clk = clk_reg_prcc_pclk("p6_pclk0", "per6clk", bases[CLKRST6_INDEX],
-				BIT(0), 0);
-	clk_register_clkdev(clk, "apb_pclk", "rng");
-
-	clk = clk_reg_prcc_pclk("p6_pclk1", "per6clk", bases[CLKRST6_INDEX],
-				BIT(1), 0);
-	clk_register_clkdev(clk, NULL, "cryp0");
-	clk_register_clkdev(clk, NULL, "cryp1");
-
-	clk = clk_reg_prcc_pclk("p6_pclk2", "per6clk", bases[CLKRST6_INDEX],
-				BIT(2), 0);
-	clk_register_clkdev(clk, NULL, "hash0");
-
-	clk = clk_reg_prcc_pclk("p6_pclk3", "per6clk", bases[CLKRST6_INDEX],
-				BIT(3), 0);
-	clk_register_clkdev(clk, NULL, "pka");
-
-	clk = clk_reg_prcc_pclk("p6_pclk4", "per6clk", bases[CLKRST6_INDEX],
-				BIT(4), 0);
-	clk_register_clkdev(clk, NULL, "db8540-hash1");
-
-	clk = clk_reg_prcc_pclk("p6_pclk5", "per6clk", bases[CLKRST6_INDEX],
-				BIT(5), 0);
-	clk_register_clkdev(clk, NULL, "cfgreg");
-
-	clk = clk_reg_prcc_pclk("p6_pclk6", "per6clk", bases[CLKRST6_INDEX],
-				BIT(6), 0);
-	clk_register_clkdev(clk, "apb_pclk", "mtu0");
-
-	clk = clk_reg_prcc_pclk("p6_pclk7", "per6clk", bases[CLKRST6_INDEX],
-				BIT(7), 0);
-	clk_register_clkdev(clk, "apb_pclk", "mtu1");
-
-	/*
-	 * PRCC K-clocks  ==> see table PRCC_PCKEN/PRCC_KCKEN
-	 * This differs from the internal implementation:
-	 * We don't use the PERPIH[n| clock as parent, since those _should_
-	 * only be used as parents for the P-clocks.
-	 * TODO: "parentjoin" with corresponding P-clocks for all K-clocks.
-	 */
-
-	/* Peripheral 1 : PRCC K-clocks */
-	clk = clk_reg_prcc_kclk("p1_uart0_kclk", "uartclk",
-			bases[CLKRST1_INDEX], BIT(0), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "uart0");
-
-	clk = clk_reg_prcc_kclk("p1_uart1_kclk", "uartclk",
-			bases[CLKRST1_INDEX], BIT(1), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "uart1");
-
-	clk = clk_reg_prcc_kclk("p1_i2c1_kclk", "i2cclk",
-			bases[CLKRST1_INDEX], BIT(2), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "nmk-i2c.1");
-
-	clk = clk_reg_prcc_kclk("p1_msp0_kclk", "msp02clk",
-			bases[CLKRST1_INDEX], BIT(3), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "msp0");
-	clk_register_clkdev(clk, NULL, "dbx5x0-msp-i2s.0");
-
-	clk = clk_reg_prcc_kclk("p1_msp1_kclk", "msp1clk",
-			bases[CLKRST1_INDEX], BIT(4), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "msp1");
-	clk_register_clkdev(clk, NULL, "dbx5x0-msp-i2s.1");
-
-	clk = clk_reg_prcc_kclk("p1_sdi0_kclk", "sdmmchclk",
-			bases[CLKRST1_INDEX], BIT(5), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdi0");
-
-	clk = clk_reg_prcc_kclk("p1_i2c2_kclk", "i2cclk",
-			bases[CLKRST1_INDEX], BIT(6), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "nmk-i2c.2");
-
-	clk = clk_reg_prcc_kclk("p1_slimbus0_kclk", "slimclk",
-			bases[CLKRST1_INDEX], BIT(8), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "slimbus0");
-
-	clk = clk_reg_prcc_kclk("p1_i2c4_kclk", "i2cclk",
-			bases[CLKRST1_INDEX], BIT(9), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "nmk-i2c.4");
-
-	clk = clk_reg_prcc_kclk("p1_msp3_kclk", "msp1clk",
-			bases[CLKRST1_INDEX], BIT(10), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "msp3");
-	clk_register_clkdev(clk, NULL, "dbx5x0-msp-i2s.3");
-
-	/* Peripheral 2 : PRCC K-clocks */
-	clk = clk_reg_prcc_kclk("p2_i2c3_kclk", "i2cclk",
-			bases[CLKRST2_INDEX], BIT(0), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "nmk-i2c.3");
-
-	clk = clk_reg_prcc_kclk("p2_pwl_kclk", "rtc32k",
-			bases[CLKRST2_INDEX], BIT(1), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "pwl");
-
-	clk = clk_reg_prcc_kclk("p2_sdi4_kclk", "sdmmchclk",
-			bases[CLKRST2_INDEX], BIT(2), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdi4");
-
-	clk = clk_reg_prcc_kclk("p2_msp2_kclk", "msp02clk",
-			bases[CLKRST2_INDEX], BIT(3), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "msp2");
-	clk_register_clkdev(clk, NULL, "dbx5x0-msp-i2s.2");
-
-	clk = clk_reg_prcc_kclk("p2_sdi1_kclk", "sdmmchclk",
-			bases[CLKRST2_INDEX], BIT(4), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdi1");
-
-	clk = clk_reg_prcc_kclk("p2_sdi3_kclk", "sdmmcclk",
-			bases[CLKRST2_INDEX], BIT(5), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdi3");
-
-	clk = clk_reg_prcc_kclk("p2_ssirx_kclk", "hsirxclk",
-			bases[CLKRST2_INDEX], BIT(6),
-			CLK_SET_RATE_GATE|CLK_SET_RATE_PARENT);
-	clk_register_clkdev(clk, "hsir_hsirxclk", "ste_hsi.0");
-
-	clk = clk_reg_prcc_kclk("p2_ssitx_kclk", "hsitxclk",
-			bases[CLKRST2_INDEX], BIT(7),
-			CLK_SET_RATE_GATE|CLK_SET_RATE_PARENT);
-	clk_register_clkdev(clk, "hsit_hsitxclk", "ste_hsi.0");
-
-	/* Should only be 9540, but might be added for 85xx as well */
-	clk = clk_reg_prcc_kclk("p2_msp4_kclk", "msp02clk",
-			bases[CLKRST2_INDEX], BIT(9), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "msp4");
-	clk_register_clkdev(clk, "msp4", "ab85xx-codec.0");
-
-	/* Peripheral 3 : PRCC K-clocks */
-	clk = clk_reg_prcc_kclk("p3_ssp0_kclk", "sspclk",
-			bases[CLKRST3_INDEX], BIT(1), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "ssp0");
-
-	clk = clk_reg_prcc_kclk("p3_ssp1_kclk", "sspclk",
-			bases[CLKRST3_INDEX], BIT(2), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "ssp1");
-
-	clk = clk_reg_prcc_kclk("p3_i2c0_kclk", "i2cclk",
-			bases[CLKRST3_INDEX], BIT(3), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "nmk-i2c.0");
-
-	clk = clk_reg_prcc_kclk("p3_sdi2_kclk", "sdmmchclk",
-			bases[CLKRST3_INDEX], BIT(4), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdi2");
-
-	clk = clk_reg_prcc_kclk("p3_ske_kclk", "rtc32k",
-			bases[CLKRST3_INDEX], BIT(5), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "ske");
-	clk_register_clkdev(clk, NULL, "nmk-ske-keypad");
-
-	clk = clk_reg_prcc_kclk("p3_uart2_kclk", "uartclk",
-			bases[CLKRST3_INDEX], BIT(6), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "uart2");
-
-	clk = clk_reg_prcc_kclk("p3_sdi5_kclk", "sdmmcclk",
-			bases[CLKRST3_INDEX], BIT(7), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "sdi5");
-
-	clk = clk_reg_prcc_kclk("p3_i2c5_kclk", "i2cclk",
-			bases[CLKRST3_INDEX], BIT(8), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "nmk-i2c.5");
-
-	clk = clk_reg_prcc_kclk("p3_i2c6_kclk", "i2cclk",
-			bases[CLKRST3_INDEX], BIT(9), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "nmk-i2c.6");
-
-	clk = clk_reg_prcc_kclk("p3_uart3_kclk", "uartclk",
-			bases[CLKRST3_INDEX], BIT(10), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "uart3");
-
-	clk = clk_reg_prcc_kclk("p3_uart4_kclk", "uartclk",
-			bases[CLKRST3_INDEX], BIT(11), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "uart4");
-
-	/* Peripheral 6 : PRCC K-clocks */
-	clk = clk_reg_prcc_kclk("p6_rng_kclk", "rngclk",
-			bases[CLKRST6_INDEX], BIT(0), CLK_SET_RATE_GATE);
-	clk_register_clkdev(clk, NULL, "rng");
-}
-CLK_OF_DECLARE(u8540_clks, "stericsson,u8540-clks", u8540_clk_init);
diff --git a/drivers/clk/ux500/u9540_clk.c b/drivers/clk/ux500/u9540_clk.c
deleted file mode 100644
index 7b6bca49ce42..000000000000
--- a/drivers/clk/ux500/u9540_clk.c
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Clock definitions for u9540 platform.
- *
- * Copyright (C) 2012 ST-Ericsson SA
- * Author: Ulf Hansson <ulf.hansson@linaro.org>
- *
- * License terms: GNU General Public License (GPL) version 2
- */
-
-#include <linux/clk-provider.h>
-#include <linux/mfd/dbx500-prcmu.h>
-#include "clk.h"
-
-static void u9540_clk_init(struct device_node *np)
-{
-	/* register clocks here */
-}
-CLK_OF_DECLARE(u9540_clks, "stericsson,u9540-clks", u9540_clk_init);
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index e7a868b83fe5..dd08ecb498be 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -44,10 +44,10 @@ static long vexpress_osc_round_rate(struct clk_hw *hw, unsigned long rate,
 {
 	struct vexpress_osc *osc = to_vexpress_osc(hw);
 
-	if (WARN_ON(osc->rate_min && rate < osc->rate_min))
+	if (osc->rate_min && rate < osc->rate_min)
 		rate = osc->rate_min;
 
-	if (WARN_ON(osc->rate_max && rate > osc->rate_max))
+	if (osc->rate_max && rate > osc->rate_max)
 		rate = osc->rate_max;
 
 	return rate;
@@ -104,6 +104,7 @@ static int vexpress_osc_probe(struct platform_device *pdev)
 		return PTR_ERR(clk);
 
 	of_clk_add_provider(pdev->dev.of_node, of_clk_src_simple_get, clk);
+	clk_hw_set_rate_range(&osc->hw, osc->rate_min, osc->rate_max);
 
 	dev_dbg(&pdev->dev, "Registered clock '%s'\n", init.name);
 
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index c6ebc88a7d8d..72a2975499db 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -202,6 +202,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
 	cur_frequency = clk_get_rate(clk);
 	if (!cur_frequency) {
 		dev_err(cpu_dev, "Failed to get clock rate for CPU\n");
+		clk_put(clk);
 		return -EINVAL;
 	}
 
@@ -210,6 +211,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
 		return -EINVAL;
 
 	armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
+	clk_put(clk);
 
 	for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
 	     load_lvl++) {
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8300a9fcb80c..bc5fc1630876 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -162,14 +162,23 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		cpu->perf_caps.highest_perf;
 	policy->cpuinfo.max_freq = cppc_dmi_max_khz;
 
-	policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num);
 	policy->transition_delay_us = cppc_get_transition_latency(cpu_num) /
 		NSEC_PER_USEC;
 	policy->shared_type = cpu->shared_type;
 
-	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+		int i;
+
 		cpumask_copy(policy->cpus, cpu->shared_cpu_map);
-	else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
+
+		for_each_cpu(i, policy->cpus) {
+			if (unlikely(i == policy->cpu))
+				continue;
+
+			memcpy(&all_cpu_data[i]->perf_caps, &cpu->perf_caps,
+			       sizeof(cpu->perf_caps));
+		}
+	} else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
 		/* Support only SW_ANY for now. */
 		pr_debug("Unsupported CPU co-ord type\n");
 		return -EFAULT;
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 10e119ae66dd..3a8cc99e6815 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -352,20 +352,6 @@ static int set_freq_table_sorted(struct cpufreq_policy *policy)
 	return 0;
 }
 
-int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
-				      struct cpufreq_frequency_table *table)
-{
-	int ret;
-
-	ret = cpufreq_frequency_table_cpuinfo(policy, table);
-	if (ret)
-		return ret;
-
-	policy->freq_table = table;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show);
-
 int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy)
 {
 	int ret;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6d084c61ee25..17e566afbb41 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -26,7 +26,6 @@
 #include <linux/sysfs.h>
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/debugfs.h>
 #include <linux/acpi.h>
 #include <linux/vmalloc.h>
 #include <trace/events/power.h>
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 959a1dbe3835..b4dbc77459b6 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -159,13 +159,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	priv->domain_id = handle->perf_ops->device_domain_id(cpu_dev);
 
 	policy->driver_data = priv;
-
-	ret = cpufreq_table_validate_and_show(policy, freq_table);
-	if (ret) {
-		dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
-			ret);
-		goto out_free_cpufreq_table;
-	}
+	policy->freq_table = freq_table;
 
 	/* SCMI allows DVFS request for any domain from any CPU */
 	policy->dvfs_possible_from_any_cpu = true;
@@ -179,8 +173,6 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	policy->fast_switch_possible = true;
 	return 0;
 
-out_free_cpufreq_table:
-	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 out_free_priv:
 	kfree(priv);
 out_free_opp:
diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index a099b7bf74cd..6ba709b6f095 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -304,7 +304,7 @@ static struct platform_driver ti_cpufreq_driver = {
 		.name = "ti-cpufreq",
 	},
 };
-module_platform_driver(ti_cpufreq_driver);
+builtin_platform_driver(ti_cpufreq_driver);
 
 MODULE_DESCRIPTION("TI CPUFreq/OPP hw-supported driver");
 MODULE_AUTHOR("Dave Gerlach <d-gerlach@ti.com>");
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0003e9a02637..6df894d65d9e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -272,12 +272,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
  *
  * @drv: the cpuidle driver
  * @dev: the cpuidle device
+ * @stop_tick: indication on whether or not to stop the tick
  *
  * Returns the index of the idle state.  The return value must not be negative.
+ *
+ * The memory location pointed to by @stop_tick is expected to be written the
+ * 'false' boolean value if the scheduler tick should not be stopped before
+ * entering the returned state.
  */
-int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+		   bool *stop_tick)
 {
-	return cpuidle_curr_governor->select(drv, dev);
+	return cpuidle_curr_governor->select(drv, dev, stop_tick);
 }
 
 /**
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 1ad8745fd6d6..b24883f85c99 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -63,9 +63,10 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
  * ladder_select_state - selects the next state to enter
  * @drv: cpuidle driver
  * @dev: the CPU
+ * @dummy: not used
  */
 static int ladder_select_state(struct cpuidle_driver *drv,
-				struct cpuidle_device *dev)
+			       struct cpuidle_device *dev, bool *dummy)
 {
 	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
 	struct device *device = get_cpu_device(dev->cpu);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index aa390404e85f..1bfe03ceb236 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -123,6 +123,7 @@
 struct menu_device {
 	int		last_state_idx;
 	int             needs_update;
+	int             tick_wakeup;
 
 	unsigned int	next_timer_us;
 	unsigned int	predicted_us;
@@ -279,8 +280,10 @@ again:
  * menu_select - selects the next idle state to enter
  * @drv: cpuidle driver containing state data
  * @dev: the CPU
+ * @stop_tick: indication on whether or not to stop the tick
  */
-static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+		       bool *stop_tick)
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
 	struct device *device = get_cpu_device(dev->cpu);
@@ -292,6 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
 	int resume_latency = dev_pm_qos_raw_read_value(device);
+	ktime_t delta_next;
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -303,11 +307,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		latency_req = resume_latency;
 
 	/* Special case when user has set very strict latency requirement */
-	if (unlikely(latency_req == 0))
+	if (unlikely(latency_req == 0)) {
+		*stop_tick = false;
 		return 0;
+	}
 
 	/* determine the expected residency time, round up */
-	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
+	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
 
 	get_iowait_load(&nr_iowaiters, &cpu_load);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
@@ -346,14 +352,30 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 */
 	data->predicted_us = min(data->predicted_us, expected_interval);
 
-	/*
-	 * Use the performance multiplier and the user-configurable
-	 * latency_req to determine the maximum exit latency.
-	 */
-	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
-	if (latency_req > interactivity_req)
-		latency_req = interactivity_req;
+	if (tick_nohz_tick_stopped()) {
+		/*
+		 * If the tick is already stopped, the cost of possible short
+		 * idle duration misprediction is much higher, because the CPU
+		 * may be stuck in a shallow idle state for a long time as a
+		 * result of it.  In that case say we might mispredict and try
+		 * to force the CPU into a state for which we would have stopped
+		 * the tick, unless a timer is going to expire really soon
+		 * anyway.
+		 */
+		if (data->predicted_us < TICK_USEC)
+			data->predicted_us = min_t(unsigned int, TICK_USEC,
+						   ktime_to_us(delta_next));
+	} else {
+		/*
+		 * Use the performance multiplier and the user-configurable
+		 * latency_req to determine the maximum exit latency.
+		 */
+		interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+		if (latency_req > interactivity_req)
+			latency_req = interactivity_req;
+	}
 
+	expected_interval = data->predicted_us;
 	/*
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
@@ -369,15 +391,52 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 			idx = i; /* first enabled state */
 		if (s->target_residency > data->predicted_us)
 			break;
-		if (s->exit_latency > latency_req)
+		if (s->exit_latency > latency_req) {
+			/*
+			 * If we break out of the loop for latency reasons, use
+			 * the target residency of the selected state as the
+			 * expected idle duration so that the tick is retained
+			 * as long as that target residency is low enough.
+			 */
+			expected_interval = drv->states[idx].target_residency;
 			break;
-
+		}
 		idx = i;
 	}
 
 	if (idx == -1)
 		idx = 0; /* No states enabled. Must use 0. */
 
+	/*
+	 * Don't stop the tick if the selected state is a polling one or if the
+	 * expected idle duration is shorter than the tick period length.
+	 */
+	if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
+	    expected_interval < TICK_USEC) {
+		unsigned int delta_next_us = ktime_to_us(delta_next);
+
+		*stop_tick = false;
+
+		if (!tick_nohz_tick_stopped() && idx > 0 &&
+		    drv->states[idx].target_residency > delta_next_us) {
+			/*
+			 * The tick is not going to be stopped and the target
+			 * residency of the state to be returned is not within
+			 * the time until the next timer event including the
+			 * tick, so try to correct that.
+			 */
+			for (i = idx - 1; i >= 0; i--) {
+			    if (drv->states[i].disabled ||
+			        dev->states_usage[i].disable)
+					continue;
+
+				idx = i;
+				if (drv->states[i].target_residency <= delta_next_us)
+					break;
+			}
+		}
+	}
+
 	data->last_state_idx = idx;
 
 	return data->last_state_idx;
@@ -397,6 +456,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
 
 	data->last_state_idx = index;
 	data->needs_update = 1;
+	data->tick_wakeup = tick_nohz_idle_got_tick();
 }
 
 /**
@@ -427,14 +487,27 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * assume the state was never reached and the exit latency is 0.
 	 */
 
-	/* measured value */
-	measured_us = cpuidle_get_last_residency(dev);
-
-	/* Deduct exit latency */
-	if (measured_us > 2 * target->exit_latency)
-		measured_us -= target->exit_latency;
-	else
-		measured_us /= 2;
+	if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
+		/*
+		 * The nohz code said that there wouldn't be any events within
+		 * the tick boundary (if the tick was stopped), but the idle
+		 * duration predictor had a differing opinion.  Since the CPU
+		 * was woken up by a tick (that wasn't stopped after all), the
+		 * predictor was not quite right, so assume that the CPU could
+		 * have been idle long (but not forever) to help the idle
+		 * duration predictor do a better job next time.
+		 */
+		measured_us = 9 * MAX_INTERESTING / 10;
+	} else {
+		/* measured value */
+		measured_us = cpuidle_get_last_residency(dev);
+
+		/* Deduct exit latency */
+		if (measured_us > 2 * target->exit_latency)
+			measured_us -= target->exit_latency;
+		else
+			measured_us /= 2;
+	}
 
 	/* Make sure our coefficients do not exceed unity */
 	if (measured_us > data->next_timer_us)
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index b79aa8f7a497..e0700bf4893a 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -1,3 +1,7 @@
+config DAX_DRIVER
+	select DAX
+	bool
+
 menuconfig DAX
 	tristate "DAX: direct access to differentiated memory"
 	select SRCU
@@ -16,7 +20,6 @@ config DEV_DAX
 	  baseline memory pool.  Mappings of a /dev/daxX.Y device impose
 	  restrictions that make the mapping behavior deterministic.
 
-
 config DEV_DAX_PMEM
 	tristate "PMEM DAX: direct access to persistent memory"
 	depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 0b61f48f21a6..be8606457f27 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -257,8 +257,8 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PAGE_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -267,8 +267,7 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				vmf->pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -299,14 +298,14 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PMD_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
 	/* dax pmd mappings require pfn_t_devmap() */
 	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-		dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+		dev_dbg(dev, "region lacks devmap flags\n");
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -323,8 +322,7 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pmd_addr);
 	phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -351,14 +349,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PUD_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
 	/* dax pud mappings require pfn_t_devmap() */
 	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-		dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+		dev_dbg(dev, "region lacks devmap flags\n");
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -375,8 +373,7 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pud_addr);
 	phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -399,9 +396,8 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
 	struct file *filp = vmf->vma->vm_file;
 	struct dev_dax *dev_dax = filp->private_data;
 
-	dev_dbg(&dev_dax->dev, "%s: %s: %s (%#lx - %#lx) size = %d\n", __func__,
-			current->comm, (vmf->flags & FAULT_FLAG_WRITE)
-			? "write" : "read",
+	dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
+			(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
 			vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
 
 	id = dax_read_lock();
@@ -460,7 +456,7 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct dev_dax *dev_dax = filp->private_data;
 	int rc, id;
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 
 	/*
 	 * We lock to check dax_dev liveness and will re-check at
@@ -518,7 +514,7 @@ static int dax_open(struct inode *inode, struct file *filp)
 	struct inode *__dax_inode = dax_inode(dax_dev);
 	struct dev_dax *dev_dax = dax_get_private(dax_dev);
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 	inode->i_mapping = __dax_inode->i_mapping;
 	inode->i_mapping->host = __dax_inode;
 	filp->f_mapping = inode->i_mapping;
@@ -533,7 +529,7 @@ static int dax_release(struct inode *inode, struct file *filp)
 {
 	struct dev_dax *dev_dax = filp->private_data;
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 	return 0;
 }
 
@@ -575,7 +571,7 @@ static void unregister_dev_dax(void *dev)
 	struct inode *inode = dax_inode(dax_dev);
 	struct cdev *cdev = inode->i_cdev;
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 
 	kill_dev_dax(dev_dax);
 	cdev_device_del(cdev, dev);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 31b6ecce4c64..fd49b24fd6af 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -34,7 +34,7 @@ static void dax_pmem_percpu_release(struct percpu_ref *ref)
 {
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	complete(&dax_pmem->cmp);
 }
 
@@ -43,7 +43,7 @@ static void dax_pmem_percpu_exit(void *data)
 	struct percpu_ref *ref = data;
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	wait_for_completion(&dax_pmem->cmp);
 	percpu_ref_exit(ref);
 }
@@ -53,7 +53,7 @@ static void dax_pmem_percpu_kill(void *data)
 	struct percpu_ref *ref = data;
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	percpu_ref_kill(ref);
 }
 
@@ -150,17 +150,7 @@ static struct nd_device_driver dax_pmem_driver = {
 	.type = ND_DRIVER_DAX_PMEM,
 };
 
-static int __init dax_pmem_init(void)
-{
-	return nd_driver_register(&dax_pmem_driver);
-}
-module_init(dax_pmem_init);
-
-static void __exit dax_pmem_exit(void)
-{
-	driver_unregister(&dax_pmem_driver.drv);
-}
-module_exit(dax_pmem_exit);
+module_nd_driver(dax_pmem_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ecdc292aa4e4..2b2332b605e4 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
 		return len < 0 ? len : -EIO;
 	}
 
-	if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
-			|| pfn_t_devmap(pfn))
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
+		/*
+		 * An arch that has enabled the pmem api should also
+		 * have its drivers support pfn_t_devmap()
+		 *
+		 * This is a developer warning and should not trigger in
+		 * production. dax_flush() will crash since it depends
+		 * on being able to do (page_address(pfn_to_page())).
+		 */
+		WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
+	} else if (pfn_t_devmap(pfn)) {
 		/* pass */;
-	else {
+	} else {
 		pr_debug("VFS (%s): error: dax support not enabled\n",
 				sb->s_id);
 		return -EOPNOTSUPP;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 27df3e2837fd..6d61cd023633 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -187,6 +187,16 @@ config DMA_SUN6I
 	help
 	  Support for the DMA engine first found in Allwinner A31 SoCs.
 
+config DW_AXI_DMAC
+	tristate "Synopsys DesignWare AXI DMA support"
+	depends on OF || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for Synopsys DesignWare AXI DMA controller.
+	  NOTE: This driver wasn't tested on 64 bit platform because
+	  of lack 64 bit platform with Synopsys DW AXI DMAC.
+
 config EP93XX_DMA
 	bool "Cirrus Logic EP93xx DMA support"
 	depends on ARCH_EP93XX || COMPILE_TEST
@@ -633,6 +643,8 @@ config ZX_DMA
 # driver files
 source "drivers/dma/bestcomm/Kconfig"
 
+source "drivers/dma/mediatek/Kconfig"
+
 source "drivers/dma/qcom/Kconfig"
 
 source "drivers/dma/dw/Kconfig"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index b9dca8a0e142..0f62a4d49aab 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_DMA_OMAP) += omap-dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o
 obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/
 obj-$(CONFIG_DW_DMAC_CORE) += dw/
 obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
@@ -75,5 +76,6 @@ obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
 obj-$(CONFIG_ZX_DMA) += zx_dma.o
 obj-$(CONFIG_ST_FDMA) += st_fdma.o
 
+obj-y += mediatek/
 obj-y += qcom/
 obj-y += xilinx/
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index c00e3923d7d8..94236ec9d410 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1471,10 +1471,10 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
 		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
 		rmb();
-		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
-		rmb();
 		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
 		rmb();
+		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
+		rmb();
 		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
 		rmb();
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 80cc2be6483c..b9339524d5bd 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -74,7 +74,11 @@ MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
 
 static bool noverify;
 module_param(noverify, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(noverify, "Disable random data setup and verification");
+MODULE_PARM_DESC(noverify, "Disable data verification (default: verify)");
+
+static bool norandom;
+module_param(norandom, bool, 0644);
+MODULE_PARM_DESC(norandom, "Disable random offset setup (default: random)");
 
 static bool verbose;
 module_param(verbose, bool, S_IRUGO | S_IWUSR);
@@ -103,6 +107,7 @@ struct dmatest_params {
 	unsigned int	pq_sources;
 	int		timeout;
 	bool		noverify;
+	bool		norandom;
 };
 
 /**
@@ -575,7 +580,7 @@ static int dmatest_func(void *data)
 			break;
 		}
 
-		if (params->noverify)
+		if (params->norandom)
 			len = params->buf_size;
 		else
 			len = dmatest_random() % params->buf_size + 1;
@@ -586,17 +591,19 @@ static int dmatest_func(void *data)
 
 		total_len += len;
 
-		if (params->noverify) {
+		if (params->norandom) {
 			src_off = 0;
 			dst_off = 0;
 		} else {
-			start = ktime_get();
 			src_off = dmatest_random() % (params->buf_size - len + 1);
 			dst_off = dmatest_random() % (params->buf_size - len + 1);
 
 			src_off = (src_off >> align) << align;
 			dst_off = (dst_off >> align) << align;
+		}
 
+		if (!params->noverify) {
+			start = ktime_get();
 			dmatest_init_srcs(thread->srcs, src_off, len,
 					  params->buf_size, is_memset);
 			dmatest_init_dsts(thread->dsts, dst_off, len,
@@ -975,6 +982,7 @@ static void run_threaded_test(struct dmatest_info *info)
 	params->pq_sources = pq_sources;
 	params->timeout = timeout;
 	params->noverify = noverify;
+	params->norandom = norandom;
 
 	request_channels(info, DMA_MEMCPY);
 	request_channels(info, DMA_MEMSET);
diff --git a/drivers/dma/dw-axi-dmac/Makefile b/drivers/dma/dw-axi-dmac/Makefile
new file mode 100644
index 000000000000..4bfa462005be
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac-platform.o
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
new file mode 100644
index 000000000000..c4eb55e3011c
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -0,0 +1,1008 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/types.h>
+
+#include "dw-axi-dmac.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * The set of bus widths supported by the DMA controller. DW AXI DMAC supports
+ * master data bus width up to 512 bits (for both AXI master interfaces), but
+ * it depends on IP block configurarion.
+ */
+#define AXI_DMA_BUSWIDTHS		  \
+	(DMA_SLAVE_BUSWIDTH_1_BYTE	| \
+	DMA_SLAVE_BUSWIDTH_2_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_4_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_8_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_16_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_32_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_64_BYTES)
+
+static inline void
+axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val)
+{
+	iowrite32(val, chip->regs + reg);
+}
+
+static inline u32 axi_dma_ioread32(struct axi_dma_chip *chip, u32 reg)
+{
+	return ioread32(chip->regs + reg);
+}
+
+static inline void
+axi_chan_iowrite32(struct axi_dma_chan *chan, u32 reg, u32 val)
+{
+	iowrite32(val, chan->chan_regs + reg);
+}
+
+static inline u32 axi_chan_ioread32(struct axi_dma_chan *chan, u32 reg)
+{
+	return ioread32(chan->chan_regs + reg);
+}
+
+static inline void
+axi_chan_iowrite64(struct axi_dma_chan *chan, u32 reg, u64 val)
+{
+	/*
+	 * We split one 64 bit write for two 32 bit write as some HW doesn't
+	 * support 64 bit access.
+	 */
+	iowrite32(lower_32_bits(val), chan->chan_regs + reg);
+	iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4);
+}
+
+static inline void axi_dma_disable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val &= ~DMAC_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_enable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val |= DMAC_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_disable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val &= ~INT_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_enable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val |= INT_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_chan_irq_disable(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	u32 val;
+
+	if (likely(irq_mask == DWAXIDMAC_IRQ_ALL)) {
+		axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, DWAXIDMAC_IRQ_NONE);
+	} else {
+		val = axi_chan_ioread32(chan, CH_INTSTATUS_ENA);
+		val &= ~irq_mask;
+		axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, val);
+	}
+}
+
+static inline void axi_chan_irq_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_sig_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTSIGNAL_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_clear(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTCLEAR, irq_mask);
+}
+
+static inline u32 axi_chan_irq_read(struct axi_dma_chan *chan)
+{
+	return axi_chan_ioread32(chan, CH_INTSTATUS);
+}
+
+static inline void axi_chan_disable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val &= ~(BIT(chan->id) << DMAC_CHAN_EN_SHIFT);
+	val |=   BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline void axi_chan_enable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT |
+	       BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline bool axi_chan_is_hw_enable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+
+	return !!(val & (BIT(chan->id) << DMAC_CHAN_EN_SHIFT));
+}
+
+static void axi_dma_hw_init(struct axi_dma_chip *chip)
+{
+	u32 i;
+
+	for (i = 0; i < chip->dw->hdata->nr_channels; i++) {
+		axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+		axi_chan_disable(&chip->dw->chan[i]);
+	}
+}
+
+static u32 axi_chan_get_xfer_width(struct axi_dma_chan *chan, dma_addr_t src,
+				   dma_addr_t dst, size_t len)
+{
+	u32 max_width = chan->chip->dw->hdata->m_data_width;
+
+	return __ffs(src | dst | len | BIT(max_width));
+}
+
+static inline const char *axi_chan_name(struct axi_dma_chan *chan)
+{
+	return dma_chan_name(&chan->vc.chan);
+}
+
+static struct axi_dma_desc *axi_desc_get(struct axi_dma_chan *chan)
+{
+	struct dw_axi_dma *dw = chan->chip->dw;
+	struct axi_dma_desc *desc;
+	dma_addr_t phys;
+
+	desc = dma_pool_zalloc(dw->desc_pool, GFP_NOWAIT, &phys);
+	if (unlikely(!desc)) {
+		dev_err(chan2dev(chan), "%s: not enough descriptors available\n",
+			axi_chan_name(chan));
+		return NULL;
+	}
+
+	atomic_inc(&chan->descs_allocated);
+	INIT_LIST_HEAD(&desc->xfer_list);
+	desc->vd.tx.phys = phys;
+	desc->chan = chan;
+
+	return desc;
+}
+
+static void axi_desc_put(struct axi_dma_desc *desc)
+{
+	struct axi_dma_chan *chan = desc->chan;
+	struct dw_axi_dma *dw = chan->chip->dw;
+	struct axi_dma_desc *child, *_next;
+	unsigned int descs_put = 0;
+
+	list_for_each_entry_safe(child, _next, &desc->xfer_list, xfer_list) {
+		list_del(&child->xfer_list);
+		dma_pool_free(dw->desc_pool, child, child->vd.tx.phys);
+		descs_put++;
+	}
+
+	dma_pool_free(dw->desc_pool, desc, desc->vd.tx.phys);
+	descs_put++;
+
+	atomic_sub(descs_put, &chan->descs_allocated);
+	dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n",
+		axi_chan_name(chan), descs_put,
+		atomic_read(&chan->descs_allocated));
+}
+
+static void vchan_desc_put(struct virt_dma_desc *vdesc)
+{
+	axi_desc_put(vd_to_axi_desc(vdesc));
+}
+
+static enum dma_status
+dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+		  struct dma_tx_state *txstate)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+
+	if (chan->is_paused && ret == DMA_IN_PROGRESS)
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+static void write_desc_llp(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.llp = cpu_to_le64(adr);
+}
+
+static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr)
+{
+	axi_chan_iowrite64(chan, CH_LLP, adr);
+}
+
+/* Called in chan locked context */
+static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
+				      struct axi_dma_desc *first)
+{
+	u32 priority = chan->chip->dw->hdata->priority[chan->id];
+	u32 reg, irq_mask;
+	u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+	if (unlikely(axi_chan_is_hw_enable(chan))) {
+		dev_err(chan2dev(chan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+
+		return;
+	}
+
+	axi_dma_enable(chan->chip);
+
+	reg = (DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_DST_MULTBLK_TYPE_POS |
+	       DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_SRC_MULTBLK_TYPE_POS);
+	axi_chan_iowrite32(chan, CH_CFG_L, reg);
+
+	reg = (DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC << CH_CFG_H_TT_FC_POS |
+	       priority << CH_CFG_H_PRIORITY_POS |
+	       DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_DST_POS |
+	       DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_SRC_POS);
+	axi_chan_iowrite32(chan, CH_CFG_H, reg);
+
+	write_chan_llp(chan, first->vd.tx.phys | lms);
+
+	irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR;
+	axi_chan_irq_sig_set(chan, irq_mask);
+
+	/* Generate 'suspend' status but don't generate interrupt */
+	irq_mask |= DWAXIDMAC_IRQ_SUSPENDED;
+	axi_chan_irq_set(chan, irq_mask);
+
+	axi_chan_enable(chan);
+}
+
+static void axi_chan_start_first_queued(struct axi_dma_chan *chan)
+{
+	struct axi_dma_desc *desc;
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&chan->vc);
+	if (!vd)
+		return;
+
+	desc = vd_to_axi_desc(vd);
+	dev_vdbg(chan2dev(chan), "%s: started %u\n", axi_chan_name(chan),
+		vd->tx.cookie);
+	axi_chan_block_xfer_start(chan, desc);
+}
+
+static void dma_chan_issue_pending(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (vchan_issue_pending(&chan->vc))
+		axi_chan_start_first_queued(chan);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static int dma_chan_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+	/* ASSERT: channel is idle */
+	if (axi_chan_is_hw_enable(chan)) {
+		dev_err(chan2dev(chan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+		return -EBUSY;
+	}
+
+	dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan));
+
+	pm_runtime_get(chan->chip->dev);
+
+	return 0;
+}
+
+static void dma_chan_free_chan_resources(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+	/* ASSERT: channel is idle */
+	if (axi_chan_is_hw_enable(chan))
+		dev_err(dchan2dev(dchan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+
+	axi_chan_disable(chan);
+	axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL);
+
+	vchan_free_chan_resources(&chan->vc);
+
+	dev_vdbg(dchan2dev(dchan),
+		 "%s: free resources, descriptor still allocated: %u\n",
+		 axi_chan_name(chan), atomic_read(&chan->descs_allocated));
+
+	pm_runtime_put(chan->chip->dev);
+}
+
+/*
+ * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI
+ * as 1, it understands that the current block is the final block in the
+ * transfer and completes the DMA transfer operation at the end of current
+ * block transfer.
+ */
+static void set_desc_last(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	val = le32_to_cpu(desc->lli.ctl_hi);
+	val |= CH_CTL_H_LLI_LAST;
+	desc->lli.ctl_hi = cpu_to_le32(val);
+}
+
+static void write_desc_sar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.sar = cpu_to_le64(adr);
+}
+
+static void write_desc_dar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.dar = cpu_to_le64(adr);
+}
+
+static void set_desc_src_master(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	/* Select AXI0 for source master */
+	val = le32_to_cpu(desc->lli.ctl_lo);
+	val &= ~CH_CTL_L_SRC_MAST;
+	desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static void set_desc_dest_master(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	/* Select AXI1 for source master if available */
+	val = le32_to_cpu(desc->lli.ctl_lo);
+	if (desc->chan->chip->dw->hdata->nr_masters > 1)
+		val |= CH_CTL_L_DST_MAST;
+	else
+		val &= ~CH_CTL_L_DST_MAST;
+
+	desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static struct dma_async_tx_descriptor *
+dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
+			 dma_addr_t src_adr, size_t len, unsigned long flags)
+{
+	struct axi_dma_desc *first = NULL, *desc = NULL, *prev = NULL;
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	size_t block_ts, max_block_ts, xfer_len;
+	u32 xfer_width, reg;
+	u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+	dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx",
+		axi_chan_name(chan), &src_adr, &dst_adr, len, flags);
+
+	max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+	while (len) {
+		xfer_len = len;
+
+		/*
+		 * Take care for the alignment.
+		 * Actually source and destination widths can be different, but
+		 * make them same to be simpler.
+		 */
+		xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, xfer_len);
+
+		/*
+		 * block_ts indicates the total number of data of width
+		 * to be transferred in a DMA block transfer.
+		 * BLOCK_TS register should be set to block_ts - 1
+		 */
+		block_ts = xfer_len >> xfer_width;
+		if (block_ts > max_block_ts) {
+			block_ts = max_block_ts;
+			xfer_len = max_block_ts << xfer_width;
+		}
+
+		desc = axi_desc_get(chan);
+		if (unlikely(!desc))
+			goto err_desc_get;
+
+		write_desc_sar(desc, src_adr);
+		write_desc_dar(desc, dst_adr);
+		desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1);
+
+		reg = CH_CTL_H_LLI_VALID;
+		if (chan->chip->dw->hdata->restrict_axi_burst_len) {
+			u32 burst_len = chan->chip->dw->hdata->axi_rw_burst_len;
+
+			reg |= (CH_CTL_H_ARLEN_EN |
+				burst_len << CH_CTL_H_ARLEN_POS |
+				CH_CTL_H_AWLEN_EN |
+				burst_len << CH_CTL_H_AWLEN_POS);
+		}
+		desc->lli.ctl_hi = cpu_to_le32(reg);
+
+		reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
+		       DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
+		       xfer_width << CH_CTL_L_DST_WIDTH_POS |
+		       xfer_width << CH_CTL_L_SRC_WIDTH_POS |
+		       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
+		       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS);
+		desc->lli.ctl_lo = cpu_to_le32(reg);
+
+		set_desc_src_master(desc);
+		set_desc_dest_master(desc);
+
+		/* Manage transfer list (xfer_list) */
+		if (!first) {
+			first = desc;
+		} else {
+			list_add_tail(&desc->xfer_list, &first->xfer_list);
+			write_desc_llp(prev, desc->vd.tx.phys | lms);
+		}
+		prev = desc;
+
+		/* update the length and addresses for the next loop cycle */
+		len -= xfer_len;
+		dst_adr += xfer_len;
+		src_adr += xfer_len;
+	}
+
+	/* Total len of src/dest sg == 0, so no descriptor were allocated */
+	if (unlikely(!first))
+		return NULL;
+
+	/* Set end-of-link to the last link descriptor of list */
+	set_desc_last(desc);
+
+	return vchan_tx_prep(&chan->vc, &first->vd, flags);
+
+err_desc_get:
+	axi_desc_put(first);
+	return NULL;
+}
+
+static void axi_chan_dump_lli(struct axi_dma_chan *chan,
+			      struct axi_dma_desc *desc)
+{
+	dev_err(dchan2dev(&chan->vc.chan),
+		"SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x",
+		le64_to_cpu(desc->lli.sar),
+		le64_to_cpu(desc->lli.dar),
+		le64_to_cpu(desc->lli.llp),
+		le32_to_cpu(desc->lli.block_ts_lo),
+		le32_to_cpu(desc->lli.ctl_hi),
+		le32_to_cpu(desc->lli.ctl_lo));
+}
+
+static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
+				   struct axi_dma_desc *desc_head)
+{
+	struct axi_dma_desc *desc;
+
+	axi_chan_dump_lli(chan, desc_head);
+	list_for_each_entry(desc, &desc_head->xfer_list, xfer_list)
+		axi_chan_dump_lli(chan, desc);
+}
+
+static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
+{
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	axi_chan_disable(chan);
+
+	/* The bad descriptor currently is in the head of vc list */
+	vd = vchan_next_desc(&chan->vc);
+	/* Remove the completed descriptor from issued list */
+	list_del(&vd->node);
+
+	/* WARN about bad descriptor */
+	dev_err(chan2dev(chan),
+		"Bad descriptor submitted for %s, cookie: %d, irq: 0x%08x\n",
+		axi_chan_name(chan), vd->tx.cookie, status);
+	axi_chan_list_dump_lli(chan, vd_to_axi_desc(vd));
+
+	vchan_cookie_complete(vd);
+
+	/* Try to restart the controller */
+	axi_chan_start_first_queued(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
+{
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (unlikely(axi_chan_is_hw_enable(chan))) {
+		dev_err(chan2dev(chan), "BUG: %s caught DWAXIDMAC_IRQ_DMA_TRF, but channel not idle!\n",
+			axi_chan_name(chan));
+		axi_chan_disable(chan);
+	}
+
+	/* The completed descriptor currently is in the head of vc list */
+	vd = vchan_next_desc(&chan->vc);
+	/* Remove the completed descriptor from issued list before completing */
+	list_del(&vd->node);
+	vchan_cookie_complete(vd);
+
+	/* Submit queued descriptors after processing the completed ones */
+	axi_chan_start_first_queued(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id)
+{
+	struct axi_dma_chip *chip = dev_id;
+	struct dw_axi_dma *dw = chip->dw;
+	struct axi_dma_chan *chan;
+
+	u32 status, i;
+
+	/* Disable DMAC inerrupts. We'll enable them after processing chanels */
+	axi_dma_irq_disable(chip);
+
+	/* Poll, clear and process every chanel interrupt status */
+	for (i = 0; i < dw->hdata->nr_channels; i++) {
+		chan = &dw->chan[i];
+		status = axi_chan_irq_read(chan);
+		axi_chan_irq_clear(chan, status);
+
+		dev_vdbg(chip->dev, "%s %u IRQ status: 0x%08x\n",
+			axi_chan_name(chan), i, status);
+
+		if (status & DWAXIDMAC_IRQ_ALL_ERR)
+			axi_chan_handle_err(chan, status);
+		else if (status & DWAXIDMAC_IRQ_DMA_TRF)
+			axi_chan_block_xfer_complete(chan);
+	}
+
+	/* Re-enable interrupts */
+	axi_dma_irq_enable(chip);
+
+	return IRQ_HANDLED;
+}
+
+static int dma_chan_terminate_all(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	axi_chan_disable(chan);
+
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	/*
+	 * As vchan_dma_desc_free_list can access to desc_allocated list
+	 * we need to call it in vc.lock context.
+	 */
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan));
+
+	return 0;
+}
+
+static int dma_chan_pause(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+	unsigned int timeout = 20; /* timeout iterations */
+	u32 val;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val |= BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT |
+	       BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+	do  {
+		if (axi_chan_irq_read(chan) & DWAXIDMAC_IRQ_SUSPENDED)
+			break;
+
+		udelay(2);
+	} while (--timeout);
+
+	axi_chan_irq_clear(chan, DWAXIDMAC_IRQ_SUSPENDED);
+
+	chan->is_paused = true;
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return timeout ? 0 : -EAGAIN;
+}
+
+/* Called in chan locked context */
+static inline void axi_chan_resume(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT);
+	val |=  (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT);
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+	chan->is_paused = false;
+}
+
+static int dma_chan_resume(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	if (chan->is_paused)
+		axi_chan_resume(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return 0;
+}
+
+static int axi_dma_suspend(struct axi_dma_chip *chip)
+{
+	axi_dma_irq_disable(chip);
+	axi_dma_disable(chip);
+
+	clk_disable_unprepare(chip->core_clk);
+	clk_disable_unprepare(chip->cfgr_clk);
+
+	return 0;
+}
+
+static int axi_dma_resume(struct axi_dma_chip *chip)
+{
+	int ret;
+
+	ret = clk_prepare_enable(chip->cfgr_clk);
+	if (ret < 0)
+		return ret;
+
+	ret = clk_prepare_enable(chip->core_clk);
+	if (ret < 0)
+		return ret;
+
+	axi_dma_enable(chip);
+	axi_dma_irq_enable(chip);
+
+	return 0;
+}
+
+static int __maybe_unused axi_dma_runtime_suspend(struct device *dev)
+{
+	struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+	return axi_dma_suspend(chip);
+}
+
+static int __maybe_unused axi_dma_runtime_resume(struct device *dev)
+{
+	struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+	return axi_dma_resume(chip);
+}
+
+static int parse_device_properties(struct axi_dma_chip *chip)
+{
+	struct device *dev = chip->dev;
+	u32 tmp, carr[DMAC_MAX_CHANNELS];
+	int ret;
+
+	ret = device_property_read_u32(dev, "dma-channels", &tmp);
+	if (ret)
+		return ret;
+	if (tmp == 0 || tmp > DMAC_MAX_CHANNELS)
+		return -EINVAL;
+
+	chip->dw->hdata->nr_channels = tmp;
+
+	ret = device_property_read_u32(dev, "snps,dma-masters", &tmp);
+	if (ret)
+		return ret;
+	if (tmp == 0 || tmp > DMAC_MAX_MASTERS)
+		return -EINVAL;
+
+	chip->dw->hdata->nr_masters = tmp;
+
+	ret = device_property_read_u32(dev, "snps,data-width", &tmp);
+	if (ret)
+		return ret;
+	if (tmp > DWAXIDMAC_TRANS_WIDTH_MAX)
+		return -EINVAL;
+
+	chip->dw->hdata->m_data_width = tmp;
+
+	ret = device_property_read_u32_array(dev, "snps,block-size", carr,
+					     chip->dw->hdata->nr_channels);
+	if (ret)
+		return ret;
+	for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+		if (carr[tmp] == 0 || carr[tmp] > DMAC_MAX_BLK_SIZE)
+			return -EINVAL;
+
+		chip->dw->hdata->block_size[tmp] = carr[tmp];
+	}
+
+	ret = device_property_read_u32_array(dev, "snps,priority", carr,
+					     chip->dw->hdata->nr_channels);
+	if (ret)
+		return ret;
+	/* Priority value must be programmed within [0:nr_channels-1] range */
+	for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+		if (carr[tmp] >= chip->dw->hdata->nr_channels)
+			return -EINVAL;
+
+		chip->dw->hdata->priority[tmp] = carr[tmp];
+	}
+
+	/* axi-max-burst-len is optional property */
+	ret = device_property_read_u32(dev, "snps,axi-max-burst-len", &tmp);
+	if (!ret) {
+		if (tmp > DWAXIDMAC_ARWLEN_MAX + 1)
+			return -EINVAL;
+		if (tmp < DWAXIDMAC_ARWLEN_MIN + 1)
+			return -EINVAL;
+
+		chip->dw->hdata->restrict_axi_burst_len = true;
+		chip->dw->hdata->axi_rw_burst_len = tmp - 1;
+	}
+
+	return 0;
+}
+
+static int dw_probe(struct platform_device *pdev)
+{
+	struct axi_dma_chip *chip;
+	struct resource *mem;
+	struct dw_axi_dma *dw;
+	struct dw_axi_dma_hcfg *hdata;
+	u32 i;
+	int ret;
+
+	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	dw = devm_kzalloc(&pdev->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	hdata = devm_kzalloc(&pdev->dev, sizeof(*hdata), GFP_KERNEL);
+	if (!hdata)
+		return -ENOMEM;
+
+	chip->dw = dw;
+	chip->dev = &pdev->dev;
+	chip->dw->hdata = hdata;
+
+	chip->irq = platform_get_irq(pdev, 0);
+	if (chip->irq < 0)
+		return chip->irq;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chip->regs = devm_ioremap_resource(chip->dev, mem);
+	if (IS_ERR(chip->regs))
+		return PTR_ERR(chip->regs);
+
+	chip->core_clk = devm_clk_get(chip->dev, "core-clk");
+	if (IS_ERR(chip->core_clk))
+		return PTR_ERR(chip->core_clk);
+
+	chip->cfgr_clk = devm_clk_get(chip->dev, "cfgr-clk");
+	if (IS_ERR(chip->cfgr_clk))
+		return PTR_ERR(chip->cfgr_clk);
+
+	ret = parse_device_properties(chip);
+	if (ret)
+		return ret;
+
+	dw->chan = devm_kcalloc(chip->dev, hdata->nr_channels,
+				sizeof(*dw->chan), GFP_KERNEL);
+	if (!dw->chan)
+		return -ENOMEM;
+
+	ret = devm_request_irq(chip->dev, chip->irq, dw_axi_dma_interrupt,
+			       IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (ret)
+		return ret;
+
+	/* Lli address must be aligned to a 64-byte boundary */
+	dw->desc_pool = dmam_pool_create(KBUILD_MODNAME, chip->dev,
+					 sizeof(struct axi_dma_desc), 64, 0);
+	if (!dw->desc_pool) {
+		dev_err(chip->dev, "No memory for descriptors dma pool\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&dw->dma.channels);
+	for (i = 0; i < hdata->nr_channels; i++) {
+		struct axi_dma_chan *chan = &dw->chan[i];
+
+		chan->chip = chip;
+		chan->id = i;
+		chan->chan_regs = chip->regs + COMMON_REG_LEN + i * CHAN_REG_LEN;
+		atomic_set(&chan->descs_allocated, 0);
+
+		chan->vc.desc_free = vchan_desc_put;
+		vchan_init(&chan->vc, &dw->dma);
+	}
+
+	/* Set capabilities */
+	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
+	/* DMA capabilities */
+	dw->dma.chancnt = hdata->nr_channels;
+	dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
+	dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
+	dw->dma.directions = BIT(DMA_MEM_TO_MEM);
+	dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	dw->dma.dev = chip->dev;
+	dw->dma.device_tx_status = dma_chan_tx_status;
+	dw->dma.device_issue_pending = dma_chan_issue_pending;
+	dw->dma.device_terminate_all = dma_chan_terminate_all;
+	dw->dma.device_pause = dma_chan_pause;
+	dw->dma.device_resume = dma_chan_resume;
+
+	dw->dma.device_alloc_chan_resources = dma_chan_alloc_chan_resources;
+	dw->dma.device_free_chan_resources = dma_chan_free_chan_resources;
+
+	dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy;
+
+	platform_set_drvdata(pdev, chip);
+
+	pm_runtime_enable(chip->dev);
+
+	/*
+	 * We can't just call pm_runtime_get here instead of
+	 * pm_runtime_get_noresume + axi_dma_resume because we need
+	 * driver to work also without Runtime PM.
+	 */
+	pm_runtime_get_noresume(chip->dev);
+	ret = axi_dma_resume(chip);
+	if (ret < 0)
+		goto err_pm_disable;
+
+	axi_dma_hw_init(chip);
+
+	pm_runtime_put(chip->dev);
+
+	ret = dma_async_device_register(&dw->dma);
+	if (ret)
+		goto err_pm_disable;
+
+	dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n",
+		 dw->hdata->nr_channels);
+
+	return 0;
+
+err_pm_disable:
+	pm_runtime_disable(chip->dev);
+
+	return ret;
+}
+
+static int dw_remove(struct platform_device *pdev)
+{
+	struct axi_dma_chip *chip = platform_get_drvdata(pdev);
+	struct dw_axi_dma *dw = chip->dw;
+	struct axi_dma_chan *chan, *_chan;
+	u32 i;
+
+	/* Enable clk before accessing to registers */
+	clk_prepare_enable(chip->cfgr_clk);
+	clk_prepare_enable(chip->core_clk);
+	axi_dma_irq_disable(chip);
+	for (i = 0; i < dw->hdata->nr_channels; i++) {
+		axi_chan_disable(&chip->dw->chan[i]);
+		axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+	}
+	axi_dma_disable(chip);
+
+	pm_runtime_disable(chip->dev);
+	axi_dma_suspend(chip);
+
+	devm_free_irq(chip->dev, chip->irq, chip);
+
+	list_for_each_entry_safe(chan, _chan, &dw->dma.channels,
+			vc.chan.device_node) {
+		list_del(&chan->vc.chan.device_node);
+		tasklet_kill(&chan->vc.task);
+	}
+
+	dma_async_device_unregister(&dw->dma);
+
+	return 0;
+}
+
+static const struct dev_pm_ops dw_axi_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(axi_dma_runtime_suspend, axi_dma_runtime_resume, NULL)
+};
+
+static const struct of_device_id dw_dma_of_id_table[] = {
+	{ .compatible = "snps,axi-dma-1.01a" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
+
+static struct platform_driver dw_driver = {
+	.probe		= dw_probe,
+	.remove		= dw_remove,
+	.driver = {
+		.name	= KBUILD_MODNAME,
+		.of_match_table = of_match_ptr(dw_dma_of_id_table),
+		.pm = &dw_axi_dma_pm_ops,
+	},
+};
+module_platform_driver(dw_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare AXI DMA Controller platform driver");
+MODULE_AUTHOR("Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>");
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
new file mode 100644
index 000000000000..f8888dc0b8dc
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#ifndef _AXI_DMA_PLATFORM_H
+#define _AXI_DMA_PLATFORM_H
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/types.h>
+
+#include "../virt-dma.h"
+
+#define DMAC_MAX_CHANNELS	8
+#define DMAC_MAX_MASTERS	2
+#define DMAC_MAX_BLK_SIZE	0x200000
+
+struct dw_axi_dma_hcfg {
+	u32	nr_channels;
+	u32	nr_masters;
+	u32	m_data_width;
+	u32	block_size[DMAC_MAX_CHANNELS];
+	u32	priority[DMAC_MAX_CHANNELS];
+	/* maximum supported axi burst length */
+	u32	axi_rw_burst_len;
+	bool	restrict_axi_burst_len;
+};
+
+struct axi_dma_chan {
+	struct axi_dma_chip		*chip;
+	void __iomem			*chan_regs;
+	u8				id;
+	atomic_t			descs_allocated;
+
+	struct virt_dma_chan		vc;
+
+	/* these other elements are all protected by vc.lock */
+	bool				is_paused;
+};
+
+struct dw_axi_dma {
+	struct dma_device	dma;
+	struct dw_axi_dma_hcfg	*hdata;
+	struct dma_pool		*desc_pool;
+
+	/* channels */
+	struct axi_dma_chan	*chan;
+};
+
+struct axi_dma_chip {
+	struct device		*dev;
+	int			irq;
+	void __iomem		*regs;
+	struct clk		*core_clk;
+	struct clk		*cfgr_clk;
+	struct dw_axi_dma	*dw;
+};
+
+/* LLI == Linked List Item */
+struct __packed axi_dma_lli {
+	__le64		sar;
+	__le64		dar;
+	__le32		block_ts_lo;
+	__le32		block_ts_hi;
+	__le64		llp;
+	__le32		ctl_lo;
+	__le32		ctl_hi;
+	__le32		sstat;
+	__le32		dstat;
+	__le32		status_lo;
+	__le32		ststus_hi;
+	__le32		reserved_lo;
+	__le32		reserved_hi;
+};
+
+struct axi_dma_desc {
+	struct axi_dma_lli		lli;
+
+	struct virt_dma_desc		vd;
+	struct axi_dma_chan		*chan;
+	struct list_head		xfer_list;
+};
+
+static inline struct device *dchan2dev(struct dma_chan *dchan)
+{
+	return &dchan->dev->device;
+}
+
+static inline struct device *chan2dev(struct axi_dma_chan *chan)
+{
+	return &chan->vc.chan.dev->device;
+}
+
+static inline struct axi_dma_desc *vd_to_axi_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct axi_dma_desc, vd);
+}
+
+static inline struct axi_dma_chan *vc_to_axi_dma_chan(struct virt_dma_chan *vc)
+{
+	return container_of(vc, struct axi_dma_chan, vc);
+}
+
+static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
+{
+	return vc_to_axi_dma_chan(to_virt_chan(dchan));
+}
+
+
+#define COMMON_REG_LEN		0x100
+#define CHAN_REG_LEN		0x100
+
+/* Common registers offset */
+#define DMAC_ID			0x000 /* R DMAC ID */
+#define DMAC_COMPVER		0x008 /* R DMAC Component Version */
+#define DMAC_CFG		0x010 /* R/W DMAC Configuration */
+#define DMAC_CHEN		0x018 /* R/W DMAC Channel Enable */
+#define DMAC_CHEN_L		0x018 /* R/W DMAC Channel Enable 00-31 */
+#define DMAC_CHEN_H		0x01C /* R/W DMAC Channel Enable 32-63 */
+#define DMAC_INTSTATUS		0x030 /* R DMAC Interrupt Status */
+#define DMAC_COMMON_INTCLEAR	0x038 /* W DMAC Interrupt Clear */
+#define DMAC_COMMON_INTSTATUS_ENA 0x040 /* R DMAC Interrupt Status Enable */
+#define DMAC_COMMON_INTSIGNAL_ENA 0x048 /* R/W DMAC Interrupt Signal Enable */
+#define DMAC_COMMON_INTSTATUS	0x050 /* R DMAC Interrupt Status */
+#define DMAC_RESET		0x058 /* R DMAC Reset Register1 */
+
+/* DMA channel registers offset */
+#define CH_SAR			0x000 /* R/W Chan Source Address */
+#define CH_DAR			0x008 /* R/W Chan Destination Address */
+#define CH_BLOCK_TS		0x010 /* R/W Chan Block Transfer Size */
+#define CH_CTL			0x018 /* R/W Chan Control */
+#define CH_CTL_L		0x018 /* R/W Chan Control 00-31 */
+#define CH_CTL_H		0x01C /* R/W Chan Control 32-63 */
+#define CH_CFG			0x020 /* R/W Chan Configuration */
+#define CH_CFG_L		0x020 /* R/W Chan Configuration 00-31 */
+#define CH_CFG_H		0x024 /* R/W Chan Configuration 32-63 */
+#define CH_LLP			0x028 /* R/W Chan Linked List Pointer */
+#define CH_STATUS		0x030 /* R Chan Status */
+#define CH_SWHSSRC		0x038 /* R/W Chan SW Handshake Source */
+#define CH_SWHSDST		0x040 /* R/W Chan SW Handshake Destination */
+#define CH_BLK_TFR_RESUMEREQ	0x048 /* W Chan Block Transfer Resume Req */
+#define CH_AXI_ID		0x050 /* R/W Chan AXI ID */
+#define CH_AXI_QOS		0x058 /* R/W Chan AXI QOS */
+#define CH_SSTAT		0x060 /* R Chan Source Status */
+#define CH_DSTAT		0x068 /* R Chan Destination Status */
+#define CH_SSTATAR		0x070 /* R/W Chan Source Status Fetch Addr */
+#define CH_DSTATAR		0x078 /* R/W Chan Destination Status Fetch Addr */
+#define CH_INTSTATUS_ENA	0x080 /* R/W Chan Interrupt Status Enable */
+#define CH_INTSTATUS		0x088 /* R/W Chan Interrupt Status */
+#define CH_INTSIGNAL_ENA	0x090 /* R/W Chan Interrupt Signal Enable */
+#define CH_INTCLEAR		0x098 /* W Chan Interrupt Clear */
+
+
+/* DMAC_CFG */
+#define DMAC_EN_POS			0
+#define DMAC_EN_MASK			BIT(DMAC_EN_POS)
+
+#define INT_EN_POS			1
+#define INT_EN_MASK			BIT(INT_EN_POS)
+
+#define DMAC_CHAN_EN_SHIFT		0
+#define DMAC_CHAN_EN_WE_SHIFT		8
+
+#define DMAC_CHAN_SUSP_SHIFT		16
+#define DMAC_CHAN_SUSP_WE_SHIFT		24
+
+/* CH_CTL_H */
+#define CH_CTL_H_ARLEN_EN		BIT(6)
+#define CH_CTL_H_ARLEN_POS		7
+#define CH_CTL_H_AWLEN_EN		BIT(15)
+#define CH_CTL_H_AWLEN_POS		16
+
+enum {
+	DWAXIDMAC_ARWLEN_1		= 0,
+	DWAXIDMAC_ARWLEN_2		= 1,
+	DWAXIDMAC_ARWLEN_4		= 3,
+	DWAXIDMAC_ARWLEN_8		= 7,
+	DWAXIDMAC_ARWLEN_16		= 15,
+	DWAXIDMAC_ARWLEN_32		= 31,
+	DWAXIDMAC_ARWLEN_64		= 63,
+	DWAXIDMAC_ARWLEN_128		= 127,
+	DWAXIDMAC_ARWLEN_256		= 255,
+	DWAXIDMAC_ARWLEN_MIN		= DWAXIDMAC_ARWLEN_1,
+	DWAXIDMAC_ARWLEN_MAX		= DWAXIDMAC_ARWLEN_256
+};
+
+#define CH_CTL_H_LLI_LAST		BIT(30)
+#define CH_CTL_H_LLI_VALID		BIT(31)
+
+/* CH_CTL_L */
+#define CH_CTL_L_LAST_WRITE_EN		BIT(30)
+
+#define CH_CTL_L_DST_MSIZE_POS		18
+#define CH_CTL_L_SRC_MSIZE_POS		14
+
+enum {
+	DWAXIDMAC_BURST_TRANS_LEN_1	= 0,
+	DWAXIDMAC_BURST_TRANS_LEN_4,
+	DWAXIDMAC_BURST_TRANS_LEN_8,
+	DWAXIDMAC_BURST_TRANS_LEN_16,
+	DWAXIDMAC_BURST_TRANS_LEN_32,
+	DWAXIDMAC_BURST_TRANS_LEN_64,
+	DWAXIDMAC_BURST_TRANS_LEN_128,
+	DWAXIDMAC_BURST_TRANS_LEN_256,
+	DWAXIDMAC_BURST_TRANS_LEN_512,
+	DWAXIDMAC_BURST_TRANS_LEN_1024
+};
+
+#define CH_CTL_L_DST_WIDTH_POS		11
+#define CH_CTL_L_SRC_WIDTH_POS		8
+
+#define CH_CTL_L_DST_INC_POS		6
+#define CH_CTL_L_SRC_INC_POS		4
+enum {
+	DWAXIDMAC_CH_CTL_L_INC		= 0,
+	DWAXIDMAC_CH_CTL_L_NOINC
+};
+
+#define CH_CTL_L_DST_MAST		BIT(2)
+#define CH_CTL_L_SRC_MAST		BIT(0)
+
+/* CH_CFG_H */
+#define CH_CFG_H_PRIORITY_POS		17
+#define CH_CFG_H_HS_SEL_DST_POS		4
+#define CH_CFG_H_HS_SEL_SRC_POS		3
+enum {
+	DWAXIDMAC_HS_SEL_HW		= 0,
+	DWAXIDMAC_HS_SEL_SW
+};
+
+#define CH_CFG_H_TT_FC_POS		0
+enum {
+	DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC	= 0,
+	DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_PER_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_MEM_SRC,
+	DWAXIDMAC_TT_FC_PER_TO_PER_SRC,
+	DWAXIDMAC_TT_FC_MEM_TO_PER_DST,
+	DWAXIDMAC_TT_FC_PER_TO_PER_DST
+};
+
+/* CH_CFG_L */
+#define CH_CFG_L_DST_MULTBLK_TYPE_POS	2
+#define CH_CFG_L_SRC_MULTBLK_TYPE_POS	0
+enum {
+	DWAXIDMAC_MBLK_TYPE_CONTIGUOUS	= 0,
+	DWAXIDMAC_MBLK_TYPE_RELOAD,
+	DWAXIDMAC_MBLK_TYPE_SHADOW_REG,
+	DWAXIDMAC_MBLK_TYPE_LL
+};
+
+/**
+ * DW AXI DMA channel interrupts
+ *
+ * @DWAXIDMAC_IRQ_NONE: Bitmask of no one interrupt
+ * @DWAXIDMAC_IRQ_BLOCK_TRF: Block transfer complete
+ * @DWAXIDMAC_IRQ_DMA_TRF: Dma transfer complete
+ * @DWAXIDMAC_IRQ_SRC_TRAN: Source transaction complete
+ * @DWAXIDMAC_IRQ_DST_TRAN: Destination transaction complete
+ * @DWAXIDMAC_IRQ_SRC_DEC_ERR: Source decode error
+ * @DWAXIDMAC_IRQ_DST_DEC_ERR: Destination decode error
+ * @DWAXIDMAC_IRQ_SRC_SLV_ERR: Source slave error
+ * @DWAXIDMAC_IRQ_DST_SLV_ERR: Destination slave error
+ * @DWAXIDMAC_IRQ_LLI_RD_DEC_ERR: LLI read decode error
+ * @DWAXIDMAC_IRQ_LLI_WR_DEC_ERR: LLI write decode error
+ * @DWAXIDMAC_IRQ_LLI_RD_SLV_ERR: LLI read slave error
+ * @DWAXIDMAC_IRQ_LLI_WR_SLV_ERR: LLI write slave error
+ * @DWAXIDMAC_IRQ_INVALID_ERR: LLI invalid error or Shadow register error
+ * @DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR: Slave Interface Multiblock type error
+ * @DWAXIDMAC_IRQ_DEC_ERR: Slave Interface decode error
+ * @DWAXIDMAC_IRQ_WR2RO_ERR: Slave Interface write to read only error
+ * @DWAXIDMAC_IRQ_RD2RWO_ERR: Slave Interface read to write only error
+ * @DWAXIDMAC_IRQ_WRONCHEN_ERR: Slave Interface write to channel error
+ * @DWAXIDMAC_IRQ_SHADOWREG_ERR: Slave Interface shadow reg error
+ * @DWAXIDMAC_IRQ_WRONHOLD_ERR: Slave Interface hold error
+ * @DWAXIDMAC_IRQ_LOCK_CLEARED: Lock Cleared Status
+ * @DWAXIDMAC_IRQ_SRC_SUSPENDED: Source Suspended Status
+ * @DWAXIDMAC_IRQ_SUSPENDED: Channel Suspended Status
+ * @DWAXIDMAC_IRQ_DISABLED: Channel Disabled Status
+ * @DWAXIDMAC_IRQ_ABORTED: Channel Aborted Status
+ * @DWAXIDMAC_IRQ_ALL_ERR: Bitmask of all error interrupts
+ * @DWAXIDMAC_IRQ_ALL: Bitmask of all interrupts
+ */
+enum {
+	DWAXIDMAC_IRQ_NONE		= 0,
+	DWAXIDMAC_IRQ_BLOCK_TRF		= BIT(0),
+	DWAXIDMAC_IRQ_DMA_TRF		= BIT(1),
+	DWAXIDMAC_IRQ_SRC_TRAN		= BIT(3),
+	DWAXIDMAC_IRQ_DST_TRAN		= BIT(4),
+	DWAXIDMAC_IRQ_SRC_DEC_ERR	= BIT(5),
+	DWAXIDMAC_IRQ_DST_DEC_ERR	= BIT(6),
+	DWAXIDMAC_IRQ_SRC_SLV_ERR	= BIT(7),
+	DWAXIDMAC_IRQ_DST_SLV_ERR	= BIT(8),
+	DWAXIDMAC_IRQ_LLI_RD_DEC_ERR	= BIT(9),
+	DWAXIDMAC_IRQ_LLI_WR_DEC_ERR	= BIT(10),
+	DWAXIDMAC_IRQ_LLI_RD_SLV_ERR	= BIT(11),
+	DWAXIDMAC_IRQ_LLI_WR_SLV_ERR	= BIT(12),
+	DWAXIDMAC_IRQ_INVALID_ERR	= BIT(13),
+	DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR	= BIT(14),
+	DWAXIDMAC_IRQ_DEC_ERR		= BIT(16),
+	DWAXIDMAC_IRQ_WR2RO_ERR		= BIT(17),
+	DWAXIDMAC_IRQ_RD2RWO_ERR	= BIT(18),
+	DWAXIDMAC_IRQ_WRONCHEN_ERR	= BIT(19),
+	DWAXIDMAC_IRQ_SHADOWREG_ERR	= BIT(20),
+	DWAXIDMAC_IRQ_WRONHOLD_ERR	= BIT(21),
+	DWAXIDMAC_IRQ_LOCK_CLEARED	= BIT(27),
+	DWAXIDMAC_IRQ_SRC_SUSPENDED	= BIT(28),
+	DWAXIDMAC_IRQ_SUSPENDED		= BIT(29),
+	DWAXIDMAC_IRQ_DISABLED		= BIT(30),
+	DWAXIDMAC_IRQ_ABORTED		= BIT(31),
+	DWAXIDMAC_IRQ_ALL_ERR		= (GENMASK(21, 16) | GENMASK(14, 5)),
+	DWAXIDMAC_IRQ_ALL		= GENMASK(31, 0)
+};
+
+enum {
+	DWAXIDMAC_TRANS_WIDTH_8		= 0,
+	DWAXIDMAC_TRANS_WIDTH_16,
+	DWAXIDMAC_TRANS_WIDTH_32,
+	DWAXIDMAC_TRANS_WIDTH_64,
+	DWAXIDMAC_TRANS_WIDTH_128,
+	DWAXIDMAC_TRANS_WIDTH_256,
+	DWAXIDMAC_TRANS_WIDTH_512,
+	DWAXIDMAC_TRANS_WIDTH_MAX	= DWAXIDMAC_TRANS_WIDTH_512
+};
+
+#endif /* _AXI_DMA_PLATFORM_H */
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 948df1ab5f1a..85ea92fcea54 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1876,6 +1876,11 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 
 	if (memcpy_channels) {
 		m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+		if (!m_ddev) {
+			dev_warn(ecc->dev, "memcpy is disabled due to OoM\n");
+			memcpy_channels = NULL;
+			goto ch_setup;
+		}
 		ecc->dma_memcpy = m_ddev;
 
 		dma_cap_zero(m_ddev->cap_mask);
@@ -1903,6 +1908,7 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 		dev_info(ecc->dev, "memcpy is disabled\n");
 	}
 
+ch_setup:
 	for (i = 0; i < ecc->num_channels; i++) {
 		struct edma_chan *echan = &ecc->slave_chans[i];
 		echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index e7db24c67030..ccd03c3cedfe 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -338,6 +338,7 @@ struct sdma_channel {
 	unsigned int			chn_real_count;
 	struct tasklet_struct		tasklet;
 	struct imx_dma_data		data;
+	bool				enabled;
 };
 
 #define IMX_DMA_SG_LOOP		BIT(0)
@@ -596,7 +597,14 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,
 
 static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
 {
+	unsigned long flags;
+	struct sdma_channel *sdmac = &sdma->channel[channel];
+
 	writel(BIT(channel), sdma->regs + SDMA_H_START);
+
+	spin_lock_irqsave(&sdmac->lock, flags);
+	sdmac->enabled = true;
+	spin_unlock_irqrestore(&sdmac->lock, flags);
 }
 
 /*
@@ -685,6 +693,14 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 	struct sdma_buffer_descriptor *bd;
 	int error = 0;
 	enum dma_status	old_status = sdmac->status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdmac->lock, flags);
+	if (!sdmac->enabled) {
+		spin_unlock_irqrestore(&sdmac->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&sdmac->lock, flags);
 
 	/*
 	 * loop mode. Iterate over descriptors, re-setup them and
@@ -938,10 +954,15 @@ static int sdma_disable_channel(struct dma_chan *chan)
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
+	unsigned long flags;
 
 	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
 	sdmac->status = DMA_ERROR;
 
+	spin_lock_irqsave(&sdmac->lock, flags);
+	sdmac->enabled = false;
+	spin_unlock_irqrestore(&sdmac->lock, flags);
+
 	return 0;
 }
 
diff --git a/drivers/dma/mediatek/Kconfig b/drivers/dma/mediatek/Kconfig
new file mode 100644
index 000000000000..27bac0bba09e
--- /dev/null
+++ b/drivers/dma/mediatek/Kconfig
@@ -0,0 +1,13 @@
+
+config MTK_HSDMA
+	tristate "MediaTek High-Speed DMA controller support"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	---help---
+	  Enable support for High-Speed DMA controller on MediaTek
+	  SoCs.
+
+	  This controller provides the channels which is dedicated to
+	  memory-to-memory transfer to offload from CPU through ring-
+	  based descriptor management.
diff --git a/drivers/dma/mediatek/Makefile b/drivers/dma/mediatek/Makefile
new file mode 100644
index 000000000000..6e778f842f01
--- /dev/null
+++ b/drivers/dma/mediatek/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o
diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
new file mode 100644
index 000000000000..b7ec56ae02a6
--- /dev/null
+++ b/drivers/dma/mediatek/mtk-hsdma.c
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 MediaTek Inc.
+
+/*
+ * Driver for MediaTek High-Speed DMA Controller
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+
+#include "../virt-dma.h"
+
+#define MTK_HSDMA_USEC_POLL		20
+#define MTK_HSDMA_TIMEOUT_POLL		200000
+#define MTK_HSDMA_DMA_BUSWIDTHS		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+/* The default number of virtual channel */
+#define MTK_HSDMA_NR_VCHANS		3
+
+/* Only one physical channel supported */
+#define MTK_HSDMA_NR_MAX_PCHANS		1
+
+/* Macro for physical descriptor (PD) manipulation */
+/* The number of PD which must be 2 of power */
+#define MTK_DMA_SIZE			64
+#define MTK_HSDMA_NEXT_DESP_IDX(x, y)	(((x) + 1) & ((y) - 1))
+#define MTK_HSDMA_LAST_DESP_IDX(x, y)	(((x) - 1) & ((y) - 1))
+#define MTK_HSDMA_MAX_LEN		0x3f80
+#define MTK_HSDMA_ALIGN_SIZE		4
+#define MTK_HSDMA_PLEN_MASK		0x3fff
+#define MTK_HSDMA_DESC_PLEN(x)		(((x) & MTK_HSDMA_PLEN_MASK) << 16)
+#define MTK_HSDMA_DESC_PLEN_GET(x)	(((x) >> 16) & MTK_HSDMA_PLEN_MASK)
+
+/* Registers for underlying ring manipulation */
+#define MTK_HSDMA_TX_BASE		0x0
+#define MTK_HSDMA_TX_CNT		0x4
+#define MTK_HSDMA_TX_CPU		0x8
+#define MTK_HSDMA_TX_DMA		0xc
+#define MTK_HSDMA_RX_BASE		0x100
+#define MTK_HSDMA_RX_CNT		0x104
+#define MTK_HSDMA_RX_CPU		0x108
+#define MTK_HSDMA_RX_DMA		0x10c
+
+/* Registers for global setup */
+#define MTK_HSDMA_GLO			0x204
+#define MTK_HSDMA_GLO_MULTI_DMA		BIT(10)
+#define MTK_HSDMA_TX_WB_DDONE		BIT(6)
+#define MTK_HSDMA_BURST_64BYTES		(0x2 << 4)
+#define MTK_HSDMA_GLO_RX_BUSY		BIT(3)
+#define MTK_HSDMA_GLO_RX_DMA		BIT(2)
+#define MTK_HSDMA_GLO_TX_BUSY		BIT(1)
+#define MTK_HSDMA_GLO_TX_DMA		BIT(0)
+#define MTK_HSDMA_GLO_DMA		(MTK_HSDMA_GLO_TX_DMA |	\
+					 MTK_HSDMA_GLO_RX_DMA)
+#define MTK_HSDMA_GLO_BUSY		(MTK_HSDMA_GLO_RX_BUSY | \
+					 MTK_HSDMA_GLO_TX_BUSY)
+#define MTK_HSDMA_GLO_DEFAULT		(MTK_HSDMA_GLO_TX_DMA | \
+					 MTK_HSDMA_GLO_RX_DMA | \
+					 MTK_HSDMA_TX_WB_DDONE | \
+					 MTK_HSDMA_BURST_64BYTES | \
+					 MTK_HSDMA_GLO_MULTI_DMA)
+
+/* Registers for reset */
+#define MTK_HSDMA_RESET			0x208
+#define MTK_HSDMA_RST_TX		BIT(0)
+#define MTK_HSDMA_RST_RX		BIT(16)
+
+/* Registers for interrupt control */
+#define MTK_HSDMA_DLYINT		0x20c
+#define MTK_HSDMA_RXDLY_INT_EN		BIT(15)
+
+/* Interrupt fires when the pending number's more than the specified */
+#define MTK_HSDMA_RXMAX_PINT(x)		(((x) & 0x7f) << 8)
+
+/* Interrupt fires when the pending time's more than the specified in 20 us */
+#define MTK_HSDMA_RXMAX_PTIME(x)	((x) & 0x7f)
+#define MTK_HSDMA_DLYINT_DEFAULT	(MTK_HSDMA_RXDLY_INT_EN | \
+					 MTK_HSDMA_RXMAX_PINT(20) | \
+					 MTK_HSDMA_RXMAX_PTIME(20))
+#define MTK_HSDMA_INT_STATUS		0x220
+#define MTK_HSDMA_INT_ENABLE		0x228
+#define MTK_HSDMA_INT_RXDONE		BIT(16)
+
+enum mtk_hsdma_vdesc_flag {
+	MTK_HSDMA_VDESC_FINISHED	= 0x01,
+};
+
+#define IS_MTK_HSDMA_VDESC_FINISHED(x) ((x) == MTK_HSDMA_VDESC_FINISHED)
+
+/**
+ * struct mtk_hsdma_pdesc - This is the struct holding info describing physical
+ *			    descriptor (PD) and its placement must be kept at
+ *			    4-bytes alignment in little endian order.
+ * @desc[1-4]:		    The control pad used to indicate hardware how to
+ *			    deal with the descriptor such as source and
+ *			    destination address and data length. The maximum
+ *			    data length each pdesc can handle is 0x3f80 bytes
+ */
+struct mtk_hsdma_pdesc {
+	__le32 desc1;
+	__le32 desc2;
+	__le32 desc3;
+	__le32 desc4;
+} __packed __aligned(4);
+
+/**
+ * struct mtk_hsdma_vdesc - This is the struct holding info describing virtual
+ *			    descriptor (VD)
+ * @vd:			    An instance for struct virt_dma_desc
+ * @len:		    The total data size device wants to move
+ * @residue:		    The remaining data size device will move
+ * @dest:		    The destination address device wants to move to
+ * @src:		    The source address device wants to move from
+ */
+struct mtk_hsdma_vdesc {
+	struct virt_dma_desc vd;
+	size_t len;
+	size_t residue;
+	dma_addr_t dest;
+	dma_addr_t src;
+};
+
+/**
+ * struct mtk_hsdma_cb - This is the struct holding extra info required for RX
+ *			 ring to know what relevant VD the the PD is being
+ *			 mapped to.
+ * @vd:			 Pointer to the relevant VD.
+ * @flag:		 Flag indicating what action should be taken when VD
+ *			 is completed.
+ */
+struct mtk_hsdma_cb {
+	struct virt_dma_desc *vd;
+	enum mtk_hsdma_vdesc_flag flag;
+};
+
+/**
+ * struct mtk_hsdma_ring - This struct holds info describing underlying ring
+ *			   space
+ * @txd:		   The descriptor TX ring which describes DMA source
+ *			   information
+ * @rxd:		   The descriptor RX ring which describes DMA
+ *			   destination information
+ * @cb:			   The extra information pointed at by RX ring
+ * @tphys:		   The physical addr of TX ring
+ * @rphys:		   The physical addr of RX ring
+ * @cur_tptr:		   Pointer to the next free descriptor used by the host
+ * @cur_rptr:		   Pointer to the last done descriptor by the device
+ */
+struct mtk_hsdma_ring {
+	struct mtk_hsdma_pdesc *txd;
+	struct mtk_hsdma_pdesc *rxd;
+	struct mtk_hsdma_cb *cb;
+	dma_addr_t tphys;
+	dma_addr_t rphys;
+	u16 cur_tptr;
+	u16 cur_rptr;
+};
+
+/**
+ * struct mtk_hsdma_pchan - This is the struct holding info describing physical
+ *			   channel (PC)
+ * @ring:		   An instance for the underlying ring
+ * @sz_ring:		   Total size allocated for the ring
+ * @nr_free:		   Total number of free rooms in the ring. It would
+ *			   be accessed and updated frequently between IRQ
+ *			   context and user context to reflect whether ring
+ *			   can accept requests from VD.
+ */
+struct mtk_hsdma_pchan {
+	struct mtk_hsdma_ring ring;
+	size_t sz_ring;
+	atomic_t nr_free;
+};
+
+/**
+ * struct mtk_hsdma_vchan - This is the struct holding info describing virtual
+ *			   channel (VC)
+ * @vc:			   An instance for struct virt_dma_chan
+ * @issue_completion:	   The wait for all issued descriptors completited
+ * @issue_synchronize:	   Bool indicating channel synchronization starts
+ * @desc_hw_processing:	   List those descriptors the hardware is processing,
+ *			   which is protected by vc.lock
+ */
+struct mtk_hsdma_vchan {
+	struct virt_dma_chan vc;
+	struct completion issue_completion;
+	bool issue_synchronize;
+	struct list_head desc_hw_processing;
+};
+
+/**
+ * struct mtk_hsdma_soc - This is the struct holding differences among SoCs
+ * @ddone:		  Bit mask for DDONE
+ * @ls0:		  Bit mask for LS0
+ */
+struct mtk_hsdma_soc {
+	__le32 ddone;
+	__le32 ls0;
+};
+
+/**
+ * struct mtk_hsdma_device - This is the struct holding info describing HSDMA
+ *			     device
+ * @ddev:		     An instance for struct dma_device
+ * @base:		     The mapped register I/O base
+ * @clk:		     The clock that device internal is using
+ * @irq:		     The IRQ that device are using
+ * @dma_requests:	     The number of VCs the device supports to
+ * @vc:			     The pointer to all available VCs
+ * @pc:			     The pointer to the underlying PC
+ * @pc_refcnt:		     Track how many VCs are using the PC
+ * @lock:		     Lock protect agaisting multiple VCs access PC
+ * @soc:		     The pointer to area holding differences among
+ *			     vaious platform
+ */
+struct mtk_hsdma_device {
+	struct dma_device ddev;
+	void __iomem *base;
+	struct clk *clk;
+	u32 irq;
+
+	u32 dma_requests;
+	struct mtk_hsdma_vchan *vc;
+	struct mtk_hsdma_pchan *pc;
+	refcount_t pc_refcnt;
+
+	/* Lock used to protect against multiple VCs access PC */
+	spinlock_t lock;
+
+	const struct mtk_hsdma_soc *soc;
+};
+
+static struct mtk_hsdma_device *to_hsdma_dev(struct dma_chan *chan)
+{
+	return container_of(chan->device, struct mtk_hsdma_device, ddev);
+}
+
+static inline struct mtk_hsdma_vchan *to_hsdma_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mtk_hsdma_vchan, vc.chan);
+}
+
+static struct mtk_hsdma_vdesc *to_hsdma_vdesc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct mtk_hsdma_vdesc, vd);
+}
+
+static struct device *hsdma2dev(struct mtk_hsdma_device *hsdma)
+{
+	return hsdma->ddev.dev;
+}
+
+static u32 mtk_dma_read(struct mtk_hsdma_device *hsdma, u32 reg)
+{
+	return readl(hsdma->base + reg);
+}
+
+static void mtk_dma_write(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	writel(val, hsdma->base + reg);
+}
+
+static void mtk_dma_rmw(struct mtk_hsdma_device *hsdma, u32 reg,
+			u32 mask, u32 set)
+{
+	u32 val;
+
+	val = mtk_dma_read(hsdma, reg);
+	val &= ~mask;
+	val |= set;
+	mtk_dma_write(hsdma, reg, val);
+}
+
+static void mtk_dma_set(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	mtk_dma_rmw(hsdma, reg, 0, val);
+}
+
+static void mtk_dma_clr(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	mtk_dma_rmw(hsdma, reg, val, 0);
+}
+
+static void mtk_hsdma_vdesc_free(struct virt_dma_desc *vd)
+{
+	kfree(container_of(vd, struct mtk_hsdma_vdesc, vd));
+}
+
+static int mtk_hsdma_busy_wait(struct mtk_hsdma_device *hsdma)
+{
+	u32 status = 0;
+
+	return readl_poll_timeout(hsdma->base + MTK_HSDMA_GLO, status,
+				  !(status & MTK_HSDMA_GLO_BUSY),
+				  MTK_HSDMA_USEC_POLL,
+				  MTK_HSDMA_TIMEOUT_POLL);
+}
+
+static int mtk_hsdma_alloc_pchan(struct mtk_hsdma_device *hsdma,
+				 struct mtk_hsdma_pchan *pc)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+	int err;
+
+	memset(pc, 0, sizeof(*pc));
+
+	/*
+	 * Allocate ring space where [0 ... MTK_DMA_SIZE - 1] is for TX ring
+	 * and [MTK_DMA_SIZE ... 2 * MTK_DMA_SIZE - 1] is for RX ring.
+	 */
+	pc->sz_ring = 2 * MTK_DMA_SIZE * sizeof(*ring->txd);
+	ring->txd = dma_zalloc_coherent(hsdma2dev(hsdma), pc->sz_ring,
+					&ring->tphys, GFP_NOWAIT);
+	if (!ring->txd)
+		return -ENOMEM;
+
+	ring->rxd = &ring->txd[MTK_DMA_SIZE];
+	ring->rphys = ring->tphys + MTK_DMA_SIZE * sizeof(*ring->txd);
+	ring->cur_tptr = 0;
+	ring->cur_rptr = MTK_DMA_SIZE - 1;
+
+	ring->cb = kcalloc(MTK_DMA_SIZE, sizeof(*ring->cb), GFP_NOWAIT);
+	if (!ring->cb) {
+		err = -ENOMEM;
+		goto err_free_dma;
+	}
+
+	atomic_set(&pc->nr_free, MTK_DMA_SIZE - 1);
+
+	/* Disable HSDMA and wait for the completion */
+	mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+	err = mtk_hsdma_busy_wait(hsdma);
+	if (err)
+		goto err_free_cb;
+
+	/* Reset */
+	mtk_dma_set(hsdma, MTK_HSDMA_RESET,
+		    MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+	mtk_dma_clr(hsdma, MTK_HSDMA_RESET,
+		    MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+
+	/* Setup HSDMA initial pointer in the ring */
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, ring->tphys);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, MTK_DMA_SIZE);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_DMA, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, ring->rphys);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, MTK_DMA_SIZE);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, ring->cur_rptr);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_DMA, 0);
+
+	/* Enable HSDMA */
+	mtk_dma_set(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+
+	/* Setup delayed interrupt */
+	mtk_dma_write(hsdma, MTK_HSDMA_DLYINT, MTK_HSDMA_DLYINT_DEFAULT);
+
+	/* Enable interrupt */
+	mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+	return 0;
+
+err_free_cb:
+	kfree(ring->cb);
+
+err_free_dma:
+	dma_free_coherent(hsdma2dev(hsdma),
+			  pc->sz_ring, ring->txd, ring->tphys);
+	return err;
+}
+
+static void mtk_hsdma_free_pchan(struct mtk_hsdma_device *hsdma,
+				 struct mtk_hsdma_pchan *pc)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+
+	/* Disable HSDMA and then wait for the completion */
+	mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+	mtk_hsdma_busy_wait(hsdma);
+
+	/* Reset pointer in the ring */
+	mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, MTK_DMA_SIZE - 1);
+
+	kfree(ring->cb);
+
+	dma_free_coherent(hsdma2dev(hsdma),
+			  pc->sz_ring, ring->txd, ring->tphys);
+}
+
+static int mtk_hsdma_issue_pending_vdesc(struct mtk_hsdma_device *hsdma,
+					 struct mtk_hsdma_pchan *pc,
+					 struct mtk_hsdma_vdesc *hvd)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+	struct mtk_hsdma_pdesc *txd, *rxd;
+	u16 reserved, prev, tlen, num_sgs;
+	unsigned long flags;
+
+	/* Protect against PC is accessed by multiple VCs simultaneously */
+	spin_lock_irqsave(&hsdma->lock, flags);
+
+	/*
+	 * Reserve rooms, where pc->nr_free is used to track how many free
+	 * rooms in the ring being updated in user and IRQ context.
+	 */
+	num_sgs = DIV_ROUND_UP(hvd->len, MTK_HSDMA_MAX_LEN);
+	reserved = min_t(u16, num_sgs, atomic_read(&pc->nr_free));
+
+	if (!reserved) {
+		spin_unlock_irqrestore(&hsdma->lock, flags);
+		return -ENOSPC;
+	}
+
+	atomic_sub(reserved, &pc->nr_free);
+
+	while (reserved--) {
+		/* Limit size by PD capability for valid data moving */
+		tlen = (hvd->len > MTK_HSDMA_MAX_LEN) ?
+		       MTK_HSDMA_MAX_LEN : hvd->len;
+
+		/*
+		 * Setup PDs using the remaining VD info mapped on those
+		 * reserved rooms. And since RXD is shared memory between the
+		 * host and the device allocated by dma_alloc_coherent call,
+		 * the helper macro WRITE_ONCE can ensure the data written to
+		 * RAM would really happens.
+		 */
+		txd = &ring->txd[ring->cur_tptr];
+		WRITE_ONCE(txd->desc1, hvd->src);
+		WRITE_ONCE(txd->desc2,
+			   hsdma->soc->ls0 | MTK_HSDMA_DESC_PLEN(tlen));
+
+		rxd = &ring->rxd[ring->cur_tptr];
+		WRITE_ONCE(rxd->desc1, hvd->dest);
+		WRITE_ONCE(rxd->desc2, MTK_HSDMA_DESC_PLEN(tlen));
+
+		/* Associate VD, the PD belonged to */
+		ring->cb[ring->cur_tptr].vd = &hvd->vd;
+
+		/* Move forward the pointer of TX ring */
+		ring->cur_tptr = MTK_HSDMA_NEXT_DESP_IDX(ring->cur_tptr,
+							 MTK_DMA_SIZE);
+
+		/* Update VD with remaining data */
+		hvd->src  += tlen;
+		hvd->dest += tlen;
+		hvd->len  -= tlen;
+	}
+
+	/*
+	 * Tagging flag for the last PD for VD will be responsible for
+	 * completing VD.
+	 */
+	if (!hvd->len) {
+		prev = MTK_HSDMA_LAST_DESP_IDX(ring->cur_tptr, MTK_DMA_SIZE);
+		ring->cb[prev].flag = MTK_HSDMA_VDESC_FINISHED;
+	}
+
+	/* Ensure all changes indeed done before we're going on */
+	wmb();
+
+	/*
+	 * Updating into hardware the pointer of TX ring lets HSDMA to take
+	 * action for those pending PDs.
+	 */
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+
+	spin_unlock_irqrestore(&hsdma->lock, flags);
+
+	return 0;
+}
+
+static void mtk_hsdma_issue_vchan_pending(struct mtk_hsdma_device *hsdma,
+					  struct mtk_hsdma_vchan *hvc)
+{
+	struct virt_dma_desc *vd, *vd2;
+	int err;
+
+	lockdep_assert_held(&hvc->vc.lock);
+
+	list_for_each_entry_safe(vd, vd2, &hvc->vc.desc_issued, node) {
+		struct mtk_hsdma_vdesc *hvd;
+
+		hvd = to_hsdma_vdesc(vd);
+
+		/* Map VD into PC and all VCs shares a single PC */
+		err = mtk_hsdma_issue_pending_vdesc(hsdma, hsdma->pc, hvd);
+
+		/*
+		 * Move VD from desc_issued to desc_hw_processing when entire
+		 * VD is fit into available PDs. Otherwise, the uncompleted
+		 * VDs would stay in list desc_issued and then restart the
+		 * processing as soon as possible once underlying ring space
+		 * got freed.
+		 */
+		if (err == -ENOSPC || hvd->len > 0)
+			break;
+
+		/*
+		 * The extra list desc_hw_processing is used because
+		 * hardware can't provide sufficient information allowing us
+		 * to know what VDs are still working on the underlying ring.
+		 * Through the additional list, it can help us to implement
+		 * terminate_all, residue calculation and such thing needed
+		 * to know detail descriptor status on the hardware.
+		 */
+		list_move_tail(&vd->node, &hvc->desc_hw_processing);
+	}
+}
+
+static void mtk_hsdma_free_rooms_in_ring(struct mtk_hsdma_device *hsdma)
+{
+	struct mtk_hsdma_vchan *hvc;
+	struct mtk_hsdma_pdesc *rxd;
+	struct mtk_hsdma_vdesc *hvd;
+	struct mtk_hsdma_pchan *pc;
+	struct mtk_hsdma_cb *cb;
+	int i = MTK_DMA_SIZE;
+	__le32 desc2;
+	u32 status;
+	u16 next;
+
+	/* Read IRQ status */
+	status = mtk_dma_read(hsdma, MTK_HSDMA_INT_STATUS);
+	if (unlikely(!(status & MTK_HSDMA_INT_RXDONE)))
+		goto rx_done;
+
+	pc = hsdma->pc;
+
+	/*
+	 * Using a fail-safe loop with iterations of up to MTK_DMA_SIZE to
+	 * reclaim these finished descriptors: The most number of PDs the ISR
+	 * can handle at one time shouldn't be more than MTK_DMA_SIZE so we
+	 * take it as limited count instead of just using a dangerous infinite
+	 * poll.
+	 */
+	while (i--) {
+		next = MTK_HSDMA_NEXT_DESP_IDX(pc->ring.cur_rptr,
+					       MTK_DMA_SIZE);
+		rxd = &pc->ring.rxd[next];
+
+		/*
+		 * If MTK_HSDMA_DESC_DDONE is no specified, that means data
+		 * moving for the PD is still under going.
+		 */
+		desc2 = READ_ONCE(rxd->desc2);
+		if (!(desc2 & hsdma->soc->ddone))
+			break;
+
+		cb = &pc->ring.cb[next];
+		if (unlikely(!cb->vd)) {
+			dev_err(hsdma2dev(hsdma), "cb->vd cannot be null\n");
+			break;
+		}
+
+		/* Update residue of VD the associated PD belonged to */
+		hvd = to_hsdma_vdesc(cb->vd);
+		hvd->residue -= MTK_HSDMA_DESC_PLEN_GET(rxd->desc2);
+
+		/* Complete VD until the relevant last PD is finished */
+		if (IS_MTK_HSDMA_VDESC_FINISHED(cb->flag)) {
+			hvc = to_hsdma_vchan(cb->vd->tx.chan);
+
+			spin_lock(&hvc->vc.lock);
+
+			/* Remove VD from list desc_hw_processing */
+			list_del(&cb->vd->node);
+
+			/* Add VD into list desc_completed */
+			vchan_cookie_complete(cb->vd);
+
+			if (hvc->issue_synchronize &&
+			    list_empty(&hvc->desc_hw_processing)) {
+				complete(&hvc->issue_completion);
+				hvc->issue_synchronize = false;
+			}
+			spin_unlock(&hvc->vc.lock);
+
+			cb->flag = 0;
+		}
+
+		cb->vd = 0;
+
+		/*
+		 * Recycle the RXD with the helper WRITE_ONCE that can ensure
+		 * data written into RAM would really happens.
+		 */
+		WRITE_ONCE(rxd->desc1, 0);
+		WRITE_ONCE(rxd->desc2, 0);
+		pc->ring.cur_rptr = next;
+
+		/* Release rooms */
+		atomic_inc(&pc->nr_free);
+	}
+
+	/* Ensure all changes indeed done before we're going on */
+	wmb();
+
+	/* Update CPU pointer for those completed PDs */
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, pc->ring.cur_rptr);
+
+	/*
+	 * Acking the pending IRQ allows hardware no longer to keep the used
+	 * IRQ line in certain trigger state when software has completed all
+	 * the finished physical descriptors.
+	 */
+	if (atomic_read(&pc->nr_free) >= MTK_DMA_SIZE - 1)
+		mtk_dma_write(hsdma, MTK_HSDMA_INT_STATUS, status);
+
+	/* ASAP handles pending VDs in all VCs after freeing some rooms */
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		hvc = &hsdma->vc[i];
+		spin_lock(&hvc->vc.lock);
+		mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+		spin_unlock(&hvc->vc.lock);
+	}
+
+rx_done:
+	/* All completed PDs are cleaned up, so enable interrupt again */
+	mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+}
+
+static irqreturn_t mtk_hsdma_irq(int irq, void *devid)
+{
+	struct mtk_hsdma_device *hsdma = devid;
+
+	/*
+	 * Disable interrupt until all completed PDs are cleaned up in
+	 * mtk_hsdma_free_rooms call.
+	 */
+	mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+	mtk_hsdma_free_rooms_in_ring(hsdma);
+
+	return IRQ_HANDLED;
+}
+
+static struct virt_dma_desc *mtk_hsdma_find_active_desc(struct dma_chan *c,
+							dma_cookie_t cookie)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	struct virt_dma_desc *vd;
+
+	list_for_each_entry(vd, &hvc->desc_hw_processing, node)
+		if (vd->tx.cookie == cookie)
+			return vd;
+
+	list_for_each_entry(vd, &hvc->vc.desc_issued, node)
+		if (vd->tx.cookie == cookie)
+			return vd;
+
+	return NULL;
+}
+
+static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *txstate)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	struct mtk_hsdma_vdesc *hvd;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&hvc->vc.lock, flags);
+	vd = mtk_hsdma_find_active_desc(c, cookie);
+	spin_unlock_irqrestore(&hvc->vc.lock, flags);
+
+	if (vd) {
+		hvd = to_hsdma_vdesc(vd);
+		bytes = hvd->residue;
+	}
+
+	dma_set_residue(txstate, bytes);
+
+	return ret;
+}
+
+static void mtk_hsdma_issue_pending(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hvc->vc.lock, flags);
+
+	if (vchan_issue_pending(&hvc->vc))
+		mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+
+	spin_unlock_irqrestore(&hvc->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+mtk_hsdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest,
+			  dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct mtk_hsdma_vdesc *hvd;
+
+	hvd = kzalloc(sizeof(*hvd), GFP_NOWAIT);
+	if (!hvd)
+		return NULL;
+
+	hvd->len = len;
+	hvd->residue = len;
+	hvd->src = src;
+	hvd->dest = dest;
+
+	return vchan_tx_prep(to_virt_chan(c), &hvd->vd, flags);
+}
+
+static int mtk_hsdma_free_inactive_desc(struct dma_chan *c)
+{
+	struct virt_dma_chan *vc = to_virt_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_splice_tail_init(&vc->desc_allocated, &head);
+	list_splice_tail_init(&vc->desc_submitted, &head);
+	list_splice_tail_init(&vc->desc_issued, &head);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	/* At the point, we don't expect users put descriptor into VC again */
+	vchan_dma_desc_free_list(vc, &head);
+
+	return 0;
+}
+
+static void mtk_hsdma_free_active_desc(struct dma_chan *c)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	bool sync_needed = false;
+
+	/*
+	 * Once issue_synchronize is being set, which means once the hardware
+	 * consumes all descriptors for the channel in the ring, the
+	 * synchronization must be be notified immediately it is completed.
+	 */
+	spin_lock(&hvc->vc.lock);
+	if (!list_empty(&hvc->desc_hw_processing)) {
+		hvc->issue_synchronize = true;
+		sync_needed = true;
+	}
+	spin_unlock(&hvc->vc.lock);
+
+	if (sync_needed)
+		wait_for_completion(&hvc->issue_completion);
+	/*
+	 * At the point, we expect that all remaining descriptors in the ring
+	 * for the channel should be all processing done.
+	 */
+	WARN_ONCE(!list_empty(&hvc->desc_hw_processing),
+		  "Desc pending still in list desc_hw_processing\n");
+
+	/* Free all descriptors in list desc_completed */
+	vchan_synchronize(&hvc->vc);
+
+	WARN_ONCE(!list_empty(&hvc->vc.desc_completed),
+		  "Desc pending still in list desc_completed\n");
+}
+
+static int mtk_hsdma_terminate_all(struct dma_chan *c)
+{
+	/*
+	 * Free pending descriptors not processed yet by hardware that have
+	 * previously been submitted to the channel.
+	 */
+	mtk_hsdma_free_inactive_desc(c);
+
+	/*
+	 * However, the DMA engine doesn't provide any way to stop these
+	 * descriptors being processed currently by hardware. The only way is
+	 * to just waiting until these descriptors are all processed completely
+	 * through mtk_hsdma_free_active_desc call.
+	 */
+	mtk_hsdma_free_active_desc(c);
+
+	return 0;
+}
+
+static int mtk_hsdma_alloc_chan_resources(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+	int err;
+
+	/*
+	 * Since HSDMA has only one PC, the resource for PC is being allocated
+	 * when the first VC is being created and the other VCs would run on
+	 * the same PC.
+	 */
+	if (!refcount_read(&hsdma->pc_refcnt)) {
+		err = mtk_hsdma_alloc_pchan(hsdma, hsdma->pc);
+		if (err)
+			return err;
+		/*
+		 * refcount_inc would complain increment on 0; use-after-free.
+		 * Thus, we need to explicitly set it as 1 initially.
+		 */
+		refcount_set(&hsdma->pc_refcnt, 1);
+	} else {
+		refcount_inc(&hsdma->pc_refcnt);
+	}
+
+	return 0;
+}
+
+static void mtk_hsdma_free_chan_resources(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+
+	/* Free all descriptors in all lists on the VC */
+	mtk_hsdma_terminate_all(c);
+
+	/* The resource for PC is not freed until all the VCs are destroyed */
+	if (!refcount_dec_and_test(&hsdma->pc_refcnt))
+		return;
+
+	mtk_hsdma_free_pchan(hsdma, hsdma->pc);
+}
+
+static int mtk_hsdma_hw_init(struct mtk_hsdma_device *hsdma)
+{
+	int err;
+
+	pm_runtime_enable(hsdma2dev(hsdma));
+	pm_runtime_get_sync(hsdma2dev(hsdma));
+
+	err = clk_prepare_enable(hsdma->clk);
+	if (err)
+		return err;
+
+	mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DEFAULT);
+
+	return 0;
+}
+
+static int mtk_hsdma_hw_deinit(struct mtk_hsdma_device *hsdma)
+{
+	mtk_dma_write(hsdma, MTK_HSDMA_GLO, 0);
+
+	clk_disable_unprepare(hsdma->clk);
+
+	pm_runtime_put_sync(hsdma2dev(hsdma));
+	pm_runtime_disable(hsdma2dev(hsdma));
+
+	return 0;
+}
+
+static const struct mtk_hsdma_soc mt7623_soc = {
+	.ddone = BIT(31),
+	.ls0 = BIT(30),
+};
+
+static const struct mtk_hsdma_soc mt7622_soc = {
+	.ddone = BIT(15),
+	.ls0 = BIT(14),
+};
+
+static const struct of_device_id mtk_hsdma_match[] = {
+	{ .compatible = "mediatek,mt7623-hsdma", .data = &mt7623_soc},
+	{ .compatible = "mediatek,mt7622-hsdma", .data = &mt7622_soc},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_hsdma_match);
+
+static int mtk_hsdma_probe(struct platform_device *pdev)
+{
+	struct mtk_hsdma_device *hsdma;
+	struct mtk_hsdma_vchan *vc;
+	struct dma_device *dd;
+	struct resource *res;
+	int i, err;
+
+	hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL);
+	if (!hsdma)
+		return -ENOMEM;
+
+	dd = &hsdma->ddev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hsdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(hsdma->base))
+		return PTR_ERR(hsdma->base);
+
+	hsdma->soc = of_device_get_match_data(&pdev->dev);
+	if (!hsdma->soc) {
+		dev_err(&pdev->dev, "No device match found\n");
+		return -ENODEV;
+	}
+
+	hsdma->clk = devm_clk_get(&pdev->dev, "hsdma");
+	if (IS_ERR(hsdma->clk)) {
+		dev_err(&pdev->dev, "No clock for %s\n",
+			dev_name(&pdev->dev));
+		return PTR_ERR(hsdma->clk);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "No irq resource for %s\n",
+			dev_name(&pdev->dev));
+		return -EINVAL;
+	}
+	hsdma->irq = res->start;
+
+	refcount_set(&hsdma->pc_refcnt, 0);
+	spin_lock_init(&hsdma->lock);
+
+	dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+
+	dd->copy_align = MTK_HSDMA_ALIGN_SIZE;
+	dd->device_alloc_chan_resources = mtk_hsdma_alloc_chan_resources;
+	dd->device_free_chan_resources = mtk_hsdma_free_chan_resources;
+	dd->device_tx_status = mtk_hsdma_tx_status;
+	dd->device_issue_pending = mtk_hsdma_issue_pending;
+	dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy;
+	dd->device_terminate_all = mtk_hsdma_terminate_all;
+	dd->src_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+	dd->dst_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+	dd->directions = BIT(DMA_MEM_TO_MEM);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	dd->dev = &pdev->dev;
+	INIT_LIST_HEAD(&dd->channels);
+
+	hsdma->dma_requests = MTK_HSDMA_NR_VCHANS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &hsdma->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Using %u as missing dma-requests property\n",
+			 MTK_HSDMA_NR_VCHANS);
+	}
+
+	hsdma->pc = devm_kcalloc(&pdev->dev, MTK_HSDMA_NR_MAX_PCHANS,
+				 sizeof(*hsdma->pc), GFP_KERNEL);
+	if (!hsdma->pc)
+		return -ENOMEM;
+
+	hsdma->vc = devm_kcalloc(&pdev->dev, hsdma->dma_requests,
+				 sizeof(*hsdma->vc), GFP_KERNEL);
+	if (!hsdma->vc)
+		return -ENOMEM;
+
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		vc = &hsdma->vc[i];
+		vc->vc.desc_free = mtk_hsdma_vdesc_free;
+		vchan_init(&vc->vc, dd);
+		init_completion(&vc->issue_completion);
+		INIT_LIST_HEAD(&vc->desc_hw_processing);
+	}
+
+	err = dma_async_device_register(dd);
+	if (err)
+		return err;
+
+	err = of_dma_controller_register(pdev->dev.of_node,
+					 of_dma_xlate_by_chan_id, hsdma);
+	if (err) {
+		dev_err(&pdev->dev,
+			"MediaTek HSDMA OF registration failed %d\n", err);
+		goto err_unregister;
+	}
+
+	mtk_hsdma_hw_init(hsdma);
+
+	err = devm_request_irq(&pdev->dev, hsdma->irq,
+			       mtk_hsdma_irq, 0,
+			       dev_name(&pdev->dev), hsdma);
+	if (err) {
+		dev_err(&pdev->dev,
+			"request_irq failed with err %d\n", err);
+		goto err_unregister;
+	}
+
+	platform_set_drvdata(pdev, hsdma);
+
+	dev_info(&pdev->dev, "MediaTek HSDMA driver registered\n");
+
+	return 0;
+
+err_unregister:
+	dma_async_device_unregister(dd);
+
+	return err;
+}
+
+static int mtk_hsdma_remove(struct platform_device *pdev)
+{
+	struct mtk_hsdma_device *hsdma = platform_get_drvdata(pdev);
+	struct mtk_hsdma_vchan *vc;
+	int i;
+
+	/* Kill VC task */
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		vc = &hsdma->vc[i];
+
+		list_del(&vc->vc.chan.device_node);
+		tasklet_kill(&vc->vc.task);
+	}
+
+	/* Disable DMA interrupt */
+	mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+
+	/* Waits for any pending IRQ handlers to complete */
+	synchronize_irq(hsdma->irq);
+
+	/* Disable hardware */
+	mtk_hsdma_hw_deinit(hsdma);
+
+	dma_async_device_unregister(&hsdma->ddev);
+	of_dma_controller_free(pdev->dev.of_node);
+
+	return 0;
+}
+
+static struct platform_driver mtk_hsdma_driver = {
+	.probe		= mtk_hsdma_probe,
+	.remove		= mtk_hsdma_remove,
+	.driver = {
+		.name		= KBUILD_MODNAME,
+		.of_match_table	= mtk_hsdma_match,
+	},
+};
+module_platform_driver(mtk_hsdma_driver);
+
+MODULE_DESCRIPTION("MediaTek High-Speed DMA Controller Driver");
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index d7327fd5f445..de1fd59fe136 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1510,7 +1510,7 @@ static void pl330_dotask(unsigned long data)
 /* Returns 1 if state was updated, 0 otherwise */
 static int pl330_update(struct pl330_dmac *pl330)
 {
-	struct dma_pl330_desc *descdone, *tmp;
+	struct dma_pl330_desc *descdone;
 	unsigned long flags;
 	void __iomem *regs;
 	u32 val;
@@ -1588,7 +1588,9 @@ static int pl330_update(struct pl330_dmac *pl330)
 	}
 
 	/* Now that we are in no hurry, do the callbacks */
-	list_for_each_entry_safe(descdone, tmp, &pl330->req_done, rqd) {
+	while (!list_empty(&pl330->req_done)) {
+		descdone = list_first_entry(&pl330->req_done,
+					    struct dma_pl330_desc, rqd);
 		list_del(&descdone->rqd);
 		spin_unlock_irqrestore(&pl330->lock, flags);
 		dma_pl330_rqcb(descdone, PL330_ERR_NONE);
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index d076940e0c69..d29275b97e84 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -393,6 +393,7 @@ struct bam_device {
 	struct device_dma_parameters dma_parms;
 	struct bam_chan *channels;
 	u32 num_channels;
+	u32 num_ees;
 
 	/* execution environment ID, from DT */
 	u32 ee;
@@ -934,12 +935,15 @@ static void bam_apply_new_config(struct bam_chan *bchan,
 	struct bam_device *bdev = bchan->bdev;
 	u32 maxburst;
 
-	if (dir == DMA_DEV_TO_MEM)
-		maxburst = bchan->slave.src_maxburst;
-	else
-		maxburst = bchan->slave.dst_maxburst;
+	if (!bdev->controlled_remotely) {
+		if (dir == DMA_DEV_TO_MEM)
+			maxburst = bchan->slave.src_maxburst;
+		else
+			maxburst = bchan->slave.dst_maxburst;
 
-	writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+		writel_relaxed(maxburst,
+			       bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+	}
 
 	bchan->reconfigure = 0;
 }
@@ -1128,15 +1132,19 @@ static int bam_init(struct bam_device *bdev)
 	u32 val;
 
 	/* read revision and configuration information */
-	val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
-	val &= NUM_EES_MASK;
+	if (!bdev->num_ees) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION));
+		bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK;
+	}
 
 	/* check that configured EE is within range */
-	if (bdev->ee >= val)
+	if (bdev->ee >= bdev->num_ees)
 		return -EINVAL;
 
-	val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
-	bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+	if (!bdev->num_channels) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
+		bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+	}
 
 	if (bdev->controlled_remotely)
 		return 0;
@@ -1232,9 +1240,25 @@ static int bam_dma_probe(struct platform_device *pdev)
 	bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node,
 						"qcom,controlled-remotely");
 
+	if (bdev->controlled_remotely) {
+		ret = of_property_read_u32(pdev->dev.of_node, "num-channels",
+					   &bdev->num_channels);
+		if (ret)
+			dev_err(bdev->dev, "num-channels unspecified in dt\n");
+
+		ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees",
+					   &bdev->num_ees);
+		if (ret)
+			dev_err(bdev->dev, "num-ees unspecified in dt\n");
+	}
+
 	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
-	if (IS_ERR(bdev->bamclk))
-		return PTR_ERR(bdev->bamclk);
+	if (IS_ERR(bdev->bamclk)) {
+		if (!bdev->controlled_remotely)
+			return PTR_ERR(bdev->bamclk);
+
+		bdev->bamclk = NULL;
+	}
 
 	ret = clk_prepare_enable(bdev->bamclk);
 	if (ret) {
@@ -1309,6 +1333,11 @@ static int bam_dma_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_unregister_dma;
 
+	if (bdev->controlled_remotely) {
+		pm_runtime_disable(&pdev->dev);
+		return 0;
+	}
+
 	pm_runtime_irq_safe(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY);
 	pm_runtime_use_autosuspend(&pdev->dev);
@@ -1392,7 +1421,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev)
 {
 	struct bam_device *bdev = dev_get_drvdata(dev);
 
-	pm_runtime_force_suspend(dev);
+	if (!bdev->controlled_remotely)
+		pm_runtime_force_suspend(dev);
 
 	clk_unprepare(bdev->bamclk);
 
@@ -1408,7 +1438,8 @@ static int __maybe_unused bam_dma_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	pm_runtime_force_resume(dev);
+	if (!bdev->controlled_remotely)
+		pm_runtime_force_resume(dev);
 
 	return 0;
 }
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index d0cacdb0713e..2a2ccd9c78e4 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1301,8 +1301,17 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	 * If the cookie doesn't correspond to the currently running transfer
 	 * then the descriptor hasn't been processed yet, and the residue is
 	 * equal to the full descriptor size.
+	 * Also, a client driver is possible to call this function before
+	 * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running"
+	 * will be the next descriptor, and the done list will appear. So, if
+	 * the argument cookie matches the done list's cookie, we can assume
+	 * the residue is zero.
 	 */
 	if (cookie != desc->async_tx.cookie) {
+		list_for_each_entry(desc, &chan->desc.done, node) {
+			if (cookie == desc->async_tx.cookie)
+				return 0;
+		}
 		list_for_each_entry(desc, &chan->desc.pending, node) {
 			if (cookie == desc->async_tx.cookie)
 				return desc->size;
@@ -1677,8 +1686,8 @@ static const struct dev_pm_ops rcar_dmac_pm = {
 	 *   - Wait for the current transfer to complete and stop the device,
 	 *   - Resume transfers, if any.
 	 */
-	SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-				     pm_runtime_force_resume)
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				      pm_runtime_force_resume)
 	SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume,
 			   NULL)
 };
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 786fc8fcc38e..8c5807362a25 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -5,6 +5,7 @@
  *
  * Copyright (C) M'boumba Cedric Madianga 2015
  * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *         Pierre-Yves Mordret <pierre-yves.mordret@st.com>
  *
  * License terms:  GNU General Public License (GPL), version 2
  */
@@ -33,9 +34,14 @@
 #define STM32_DMA_LIFCR			0x0008 /* DMA Low Int Flag Clear Reg */
 #define STM32_DMA_HIFCR			0x000c /* DMA High Int Flag Clear Reg */
 #define STM32_DMA_TCI			BIT(5) /* Transfer Complete Interrupt */
+#define STM32_DMA_HTI			BIT(4) /* Half Transfer Interrupt */
 #define STM32_DMA_TEI			BIT(3) /* Transfer Error Interrupt */
 #define STM32_DMA_DMEI			BIT(2) /* Direct Mode Error Interrupt */
 #define STM32_DMA_FEI			BIT(0) /* FIFO Error Interrupt */
+#define STM32_DMA_MASKI			(STM32_DMA_TCI \
+					 | STM32_DMA_TEI \
+					 | STM32_DMA_DMEI \
+					 | STM32_DMA_FEI)
 
 /* DMA Stream x Configuration Register */
 #define STM32_DMA_SCR(x)		(0x0010 + 0x18 * (x)) /* x = 0..7 */
@@ -60,7 +66,8 @@
 #define STM32_DMA_SCR_PINC		BIT(9) /* Peripheral increment mode */
 #define STM32_DMA_SCR_CIRC		BIT(8) /* Circular mode */
 #define STM32_DMA_SCR_PFCTRL		BIT(5) /* Peripheral Flow Controller */
-#define STM32_DMA_SCR_TCIE		BIT(4) /* Transfer Cplete Int Enable*/
+#define STM32_DMA_SCR_TCIE		BIT(4) /* Transfer Complete Int Enable
+						*/
 #define STM32_DMA_SCR_TEIE		BIT(2) /* Transfer Error Int Enable */
 #define STM32_DMA_SCR_DMEIE		BIT(1) /* Direct Mode Err Int Enable */
 #define STM32_DMA_SCR_EN		BIT(0) /* Stream Enable */
@@ -111,11 +118,24 @@
 #define STM32_DMA_FIFO_THRESHOLD_FULL			0x03
 
 #define STM32_DMA_MAX_DATA_ITEMS	0xffff
+/*
+ * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter
+ * gather at boundary. Thus it's safer to round down this value on FIFO
+ * size (16 Bytes)
+ */
+#define STM32_DMA_ALIGNED_MAX_DATA_ITEMS	\
+	ALIGN_DOWN(STM32_DMA_MAX_DATA_ITEMS, 16)
 #define STM32_DMA_MAX_CHANNELS		0x08
 #define STM32_DMA_MAX_REQUEST_ID	0x08
 #define STM32_DMA_MAX_DATA_PARAM	0x03
+#define STM32_DMA_FIFO_SIZE		16	/* FIFO is 16 bytes */
+#define STM32_DMA_MIN_BURST		4
 #define STM32_DMA_MAX_BURST		16
 
+/* DMA Features */
+#define STM32_DMA_THRESHOLD_FTR_MASK	GENMASK(1, 0)
+#define STM32_DMA_THRESHOLD_FTR_GET(n)	((n) & STM32_DMA_THRESHOLD_FTR_MASK)
+
 enum stm32_dma_width {
 	STM32_DMA_BYTE,
 	STM32_DMA_HALF_WORD,
@@ -129,11 +149,18 @@ enum stm32_dma_burst_size {
 	STM32_DMA_BURST_INCR16,
 };
 
+/**
+ * struct stm32_dma_cfg - STM32 DMA custom configuration
+ * @channel_id: channel ID
+ * @request_line: DMA request
+ * @stream_config: 32bit mask specifying the DMA channel configuration
+ * @features: 32bit mask specifying the DMA Feature list
+ */
 struct stm32_dma_cfg {
 	u32 channel_id;
 	u32 request_line;
 	u32 stream_config;
-	u32 threshold;
+	u32 features;
 };
 
 struct stm32_dma_chan_reg {
@@ -171,6 +198,9 @@ struct stm32_dma_chan {
 	u32 next_sg;
 	struct dma_slave_config	dma_sconfig;
 	struct stm32_dma_chan_reg chan_reg;
+	u32 threshold;
+	u32 mem_burst;
+	u32 mem_width;
 };
 
 struct stm32_dma_device {
@@ -235,6 +265,85 @@ static int stm32_dma_get_width(struct stm32_dma_chan *chan,
 	}
 }
 
+static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len,
+						       u32 threshold)
+{
+	enum dma_slave_buswidth max_width;
+
+	if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL)
+		max_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	else
+		max_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	while ((buf_len < max_width  || buf_len % max_width) &&
+	       max_width > DMA_SLAVE_BUSWIDTH_1_BYTE)
+		max_width = max_width >> 1;
+
+	return max_width;
+}
+
+static bool stm32_dma_fifo_threshold_is_allowed(u32 burst, u32 threshold,
+						enum dma_slave_buswidth width)
+{
+	u32 remaining;
+
+	if (width != DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		if (burst != 0) {
+			/*
+			 * If number of beats fit in several whole bursts
+			 * this configuration is allowed.
+			 */
+			remaining = ((STM32_DMA_FIFO_SIZE / width) *
+				     (threshold + 1) / 4) % burst;
+
+			if (remaining == 0)
+				return true;
+		} else {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool stm32_dma_is_burst_possible(u32 buf_len, u32 threshold)
+{
+	switch (threshold) {
+	case STM32_DMA_FIFO_THRESHOLD_FULL:
+		if (buf_len >= STM32_DMA_MAX_BURST)
+			return true;
+		else
+			return false;
+	case STM32_DMA_FIFO_THRESHOLD_HALFFULL:
+		if (buf_len >= STM32_DMA_MAX_BURST / 2)
+			return true;
+		else
+			return false;
+	default:
+		return false;
+	}
+}
+
+static u32 stm32_dma_get_best_burst(u32 buf_len, u32 max_burst, u32 threshold,
+				    enum dma_slave_buswidth width)
+{
+	u32 best_burst = max_burst;
+
+	if (best_burst == 1 || !stm32_dma_is_burst_possible(buf_len, threshold))
+		return 0;
+
+	while ((buf_len < best_burst * width && best_burst > 1) ||
+	       !stm32_dma_fifo_threshold_is_allowed(best_burst, threshold,
+						    width)) {
+		if (best_burst > STM32_DMA_MIN_BURST)
+			best_burst = best_burst >> 1;
+		else
+			best_burst = 0;
+	}
+
+	return best_burst;
+}
+
 static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
 {
 	switch (maxburst) {
@@ -254,12 +363,12 @@ static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
 }
 
 static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan,
-				      u32 src_maxburst, u32 dst_maxburst)
+				      u32 src_burst, u32 dst_burst)
 {
 	chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK;
 	chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE;
 
-	if ((!src_maxburst) && (!dst_maxburst)) {
+	if (!src_burst && !dst_burst) {
 		/* Using direct mode */
 		chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE;
 	} else {
@@ -300,7 +409,7 @@ static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan)
 
 	flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
 
-	return flags;
+	return flags & STM32_DMA_MASKI;
 }
 
 static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
@@ -315,6 +424,7 @@ static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
 	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
 	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
 	 */
+	flags &= STM32_DMA_MASKI;
 	dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
 
 	if (chan->id & 4)
@@ -429,6 +539,8 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
 	dev_dbg(chan2dev(chan), "SFCR:  0x%08x\n", sfcr);
 }
 
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan);
+
 static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 {
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
@@ -471,6 +583,9 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 	if (status)
 		stm32_dma_irq_clear(chan, status);
 
+	if (chan->desc->cyclic)
+		stm32_dma_configure_next_sg(chan);
+
 	stm32_dma_dump_reg(chan);
 
 	/* Start DMA */
@@ -541,13 +656,29 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
 	status = stm32_dma_irq_status(chan);
 	scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
 
-	if ((status & STM32_DMA_TCI) && (scr & STM32_DMA_SCR_TCIE)) {
+	if (status & STM32_DMA_TCI) {
 		stm32_dma_irq_clear(chan, STM32_DMA_TCI);
-		stm32_dma_handle_chan_done(chan);
-
-	} else {
+		if (scr & STM32_DMA_SCR_TCIE)
+			stm32_dma_handle_chan_done(chan);
+		status &= ~STM32_DMA_TCI;
+	}
+	if (status & STM32_DMA_HTI) {
+		stm32_dma_irq_clear(chan, STM32_DMA_HTI);
+		status &= ~STM32_DMA_HTI;
+	}
+	if (status & STM32_DMA_FEI) {
+		stm32_dma_irq_clear(chan, STM32_DMA_FEI);
+		status &= ~STM32_DMA_FEI;
+		if (!(scr & STM32_DMA_SCR_EN))
+			dev_err(chan2dev(chan), "FIFO Error\n");
+		else
+			dev_dbg(chan2dev(chan), "FIFO over/underrun\n");
+	}
+	if (status) {
 		stm32_dma_irq_clear(chan, status);
 		dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status);
+		if (!(scr & STM32_DMA_SCR_EN))
+			dev_err(chan2dev(chan), "chan disabled by HW\n");
 	}
 
 	spin_unlock(&chan->vchan.lock);
@@ -564,45 +695,59 @@ static void stm32_dma_issue_pending(struct dma_chan *c)
 	if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
 		dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan);
 		stm32_dma_start_transfer(chan);
-		if (chan->desc->cyclic)
-			stm32_dma_configure_next_sg(chan);
+
 	}
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
 }
 
 static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 				    enum dma_transfer_direction direction,
-				    enum dma_slave_buswidth *buswidth)
+				    enum dma_slave_buswidth *buswidth,
+				    u32 buf_len)
 {
 	enum dma_slave_buswidth src_addr_width, dst_addr_width;
 	int src_bus_width, dst_bus_width;
 	int src_burst_size, dst_burst_size;
-	u32 src_maxburst, dst_maxburst;
-	u32 dma_scr = 0;
+	u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst;
+	u32 dma_scr, threshold;
 
 	src_addr_width = chan->dma_sconfig.src_addr_width;
 	dst_addr_width = chan->dma_sconfig.dst_addr_width;
 	src_maxburst = chan->dma_sconfig.src_maxburst;
 	dst_maxburst = chan->dma_sconfig.dst_maxburst;
+	threshold = chan->threshold;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
+		/* Set device data size */
 		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
 		if (dst_bus_width < 0)
 			return dst_bus_width;
 
-		dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+		/* Set device burst size */
+		dst_best_burst = stm32_dma_get_best_burst(buf_len,
+							  dst_maxburst,
+							  threshold,
+							  dst_addr_width);
+
+		dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
 		if (dst_burst_size < 0)
 			return dst_burst_size;
 
-		if (!src_addr_width)
-			src_addr_width = dst_addr_width;
-
+		/* Set memory data size */
+		src_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+		chan->mem_width = src_addr_width;
 		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
 		if (src_bus_width < 0)
 			return src_bus_width;
 
-		src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+		/* Set memory burst size */
+		src_maxburst = STM32_DMA_MAX_BURST;
+		src_best_burst = stm32_dma_get_best_burst(buf_len,
+							  src_maxburst,
+							  threshold,
+							  src_addr_width);
+		src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
 		if (src_burst_size < 0)
 			return src_burst_size;
 
@@ -612,27 +757,46 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 			STM32_DMA_SCR_PBURST(dst_burst_size) |
 			STM32_DMA_SCR_MBURST(src_burst_size);
 
+		/* Set FIFO threshold */
+		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+		/* Set peripheral address */
 		chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr;
 		*buswidth = dst_addr_width;
 		break;
 
 	case DMA_DEV_TO_MEM:
+		/* Set device data size */
 		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
 		if (src_bus_width < 0)
 			return src_bus_width;
 
-		src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+		/* Set device burst size */
+		src_best_burst = stm32_dma_get_best_burst(buf_len,
+							  src_maxburst,
+							  threshold,
+							  src_addr_width);
+		chan->mem_burst = src_best_burst;
+		src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
 		if (src_burst_size < 0)
 			return src_burst_size;
 
-		if (!dst_addr_width)
-			dst_addr_width = src_addr_width;
-
+		/* Set memory data size */
+		dst_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+		chan->mem_width = dst_addr_width;
 		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
 		if (dst_bus_width < 0)
 			return dst_bus_width;
 
-		dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+		/* Set memory burst size */
+		dst_maxburst = STM32_DMA_MAX_BURST;
+		dst_best_burst = stm32_dma_get_best_burst(buf_len,
+							  dst_maxburst,
+							  threshold,
+							  dst_addr_width);
+		chan->mem_burst = dst_best_burst;
+		dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
 		if (dst_burst_size < 0)
 			return dst_burst_size;
 
@@ -642,6 +806,11 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 			STM32_DMA_SCR_PBURST(src_burst_size) |
 			STM32_DMA_SCR_MBURST(dst_burst_size);
 
+		/* Set FIFO threshold */
+		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+		/* Set peripheral address */
 		chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr;
 		*buswidth = chan->dma_sconfig.src_addr_width;
 		break;
@@ -651,8 +820,9 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 		return -EINVAL;
 	}
 
-	stm32_dma_set_fifo_config(chan, src_maxburst, dst_maxburst);
+	stm32_dma_set_fifo_config(chan, src_best_burst, dst_best_burst);
 
+	/* Set DMA control register */
 	chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK |
 			STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK |
 			STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK);
@@ -692,10 +862,6 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
 	if (!desc)
 		return NULL;
 
-	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
-	if (ret < 0)
-		goto err;
-
 	/* Set peripheral flow controller */
 	if (chan->dma_sconfig.device_fc)
 		chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL;
@@ -703,10 +869,15 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
 		chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
 
 	for_each_sg(sgl, sg, sg_len, i) {
+		ret = stm32_dma_set_xfer_param(chan, direction, &buswidth,
+					       sg_dma_len(sg));
+		if (ret < 0)
+			goto err;
+
 		desc->sg_req[i].len = sg_dma_len(sg);
 
 		nb_data_items = desc->sg_req[i].len / buswidth;
-		if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+		if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
 			dev_err(chan2dev(chan), "nb items not supported\n");
 			goto err;
 		}
@@ -767,12 +938,12 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic(
 		return NULL;
 	}
 
-	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
+	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len);
 	if (ret < 0)
 		return NULL;
 
 	nb_data_items = period_len / buswidth;
-	if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+	if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
 		dev_err(chan2dev(chan), "number of items not supported\n");
 		return NULL;
 	}
@@ -816,35 +987,45 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
 	dma_addr_t src, size_t len, unsigned long flags)
 {
 	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
-	u32 num_sgs;
+	enum dma_slave_buswidth max_width;
 	struct stm32_dma_desc *desc;
 	size_t xfer_count, offset;
+	u32 num_sgs, best_burst, dma_burst, threshold;
 	int i;
 
-	num_sgs = DIV_ROUND_UP(len, STM32_DMA_MAX_DATA_ITEMS);
+	num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
 	desc = stm32_dma_alloc_desc(num_sgs);
 	if (!desc)
 		return NULL;
 
+	threshold = chan->threshold;
+
 	for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) {
 		xfer_count = min_t(size_t, len - offset,
-				   STM32_DMA_MAX_DATA_ITEMS);
+				   STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
 
-		desc->sg_req[i].len = xfer_count;
+		/* Compute best burst size */
+		max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST,
+						      threshold, max_width);
+		dma_burst = stm32_dma_get_burst(chan, best_burst);
 
 		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
 		desc->sg_req[i].chan_reg.dma_scr =
 			STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) |
+			STM32_DMA_SCR_PBURST(dma_burst) |
+			STM32_DMA_SCR_MBURST(dma_burst) |
 			STM32_DMA_SCR_MINC |
 			STM32_DMA_SCR_PINC |
 			STM32_DMA_SCR_TCIE |
 			STM32_DMA_SCR_TEIE;
-		desc->sg_req[i].chan_reg.dma_sfcr = STM32_DMA_SFCR_DMDIS |
-			STM32_DMA_SFCR_FTH(STM32_DMA_FIFO_THRESHOLD_FULL) |
-			STM32_DMA_SFCR_FEIE;
+		desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+		desc->sg_req[i].chan_reg.dma_sfcr |=
+			STM32_DMA_SFCR_FTH(threshold);
 		desc->sg_req[i].chan_reg.dma_spar = src + offset;
 		desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
 		desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
+		desc->sg_req[i].len = xfer_count;
 	}
 
 	desc->num_sgs = num_sgs;
@@ -869,6 +1050,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 				     struct stm32_dma_desc *desc,
 				     u32 next_sg)
 {
+	u32 modulo, burst_size;
 	u32 residue = 0;
 	int i;
 
@@ -876,8 +1058,10 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 	 * In cyclic mode, for the last period, residue = remaining bytes from
 	 * NDTR
 	 */
-	if (chan->desc->cyclic && next_sg == 0)
-		return stm32_dma_get_remaining_bytes(chan);
+	if (chan->desc->cyclic && next_sg == 0) {
+		residue = stm32_dma_get_remaining_bytes(chan);
+		goto end;
+	}
 
 	/*
 	 * For all other periods in cyclic mode, and in sg mode,
@@ -888,6 +1072,15 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 		residue += desc->sg_req[i].len;
 	residue += stm32_dma_get_remaining_bytes(chan);
 
+end:
+	if (!chan->mem_burst)
+		return residue;
+
+	burst_size = chan->mem_burst * chan->mem_width;
+	modulo = residue % burst_size;
+	if (modulo)
+		residue = residue - modulo + burst_size;
+
 	return residue;
 }
 
@@ -902,7 +1095,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 	u32 residue = 0;
 
 	status = dma_cookie_status(c, cookie, state);
-	if ((status == DMA_COMPLETE) || (!state))
+	if (status == DMA_COMPLETE || !state)
 		return status;
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
@@ -966,7 +1159,7 @@ static void stm32_dma_desc_free(struct virt_dma_desc *vdesc)
 }
 
 static void stm32_dma_set_config(struct stm32_dma_chan *chan,
-			  struct stm32_dma_cfg *cfg)
+				 struct stm32_dma_cfg *cfg)
 {
 	stm32_dma_clear_reg(&chan->chan_reg);
 
@@ -976,7 +1169,7 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan,
 	/* Enable Interrupts  */
 	chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE;
 
-	chan->chan_reg.dma_sfcr = cfg->threshold & STM32_DMA_SFCR_FTH_MASK;
+	chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features);
 }
 
 static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
@@ -996,10 +1189,10 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 	cfg.channel_id = dma_spec->args[0];
 	cfg.request_line = dma_spec->args[1];
 	cfg.stream_config = dma_spec->args[2];
-	cfg.threshold = dma_spec->args[3];
+	cfg.features = dma_spec->args[3];
 
-	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) ||
-	    (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) {
+	if (cfg.channel_id >= STM32_DMA_MAX_CHANNELS ||
+	    cfg.request_line >= STM32_DMA_MAX_REQUEST_ID) {
 		dev_err(dev, "Bad channel and/or request id\n");
 		return NULL;
 	}
diff --git a/drivers/firmware/broadcom/Kconfig b/drivers/firmware/broadcom/Kconfig
index 3c7e5b741e37..f77cdb3a041f 100644
--- a/drivers/firmware/broadcom/Kconfig
+++ b/drivers/firmware/broadcom/Kconfig
@@ -13,6 +13,7 @@ config BCM47XX_NVRAM
 config BCM47XX_SPROM
 	bool "Broadcom SPROM driver"
 	depends on BCM47XX_NVRAM
+	select GENERIC_NET_UTILS
 	help
 	  Broadcom devices store configuration data in SPROM. Accessing it is
 	  specific to the bus host type, e.g. PCI(e) devices have it mapped in
diff --git a/drivers/firmware/broadcom/bcm47xx_sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c
index 62aa3cf09b4d..4787f86c8ac1 100644
--- a/drivers/firmware/broadcom/bcm47xx_sprom.c
+++ b/drivers/firmware/broadcom/bcm47xx_sprom.c
@@ -137,20 +137,6 @@ static void nvram_read_leddc(const char *prefix, const char *name,
 	*leddc_off_time = (val >> 16) & 0xff;
 }
 
-static void bcm47xx_nvram_parse_macaddr(char *buf, u8 macaddr[6])
-{
-	if (strchr(buf, ':'))
-		sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0],
-			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
-			&macaddr[5]);
-	else if (strchr(buf, '-'))
-		sscanf(buf, "%hhx-%hhx-%hhx-%hhx-%hhx-%hhx", &macaddr[0],
-			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
-			&macaddr[5]);
-	else
-		pr_warn("Can not parse mac address: %s\n", buf);
-}
-
 static void nvram_read_macaddr(const char *prefix, const char *name,
 			       u8 val[6], bool fallback)
 {
@@ -161,7 +147,9 @@ static void nvram_read_macaddr(const char *prefix, const char *name,
 	if (err < 0)
 		return;
 
-	bcm47xx_nvram_parse_macaddr(buf, val);
+	strreplace(buf, '-', ':');
+	if (!mac_pton(buf, val))
+		pr_warn("Can not parse mac address: %s\n", buf);
 }
 
 static void nvram_read_alpha2(const char *prefix, const char *name,
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 6feeacbe4d97..54e66adef252 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -187,7 +187,7 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
 	char *s;
 	int is_ff = 1, is_00 = 1, i;
 
-	if (dmi_ident[slot] || dm->length <= index + 16)
+	if (dmi_ident[slot] || dm->length < index + 16)
 		return;
 
 	d = (u8 *) dm + index;
@@ -211,9 +211,9 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
 	 * says that this is the defacto standard.
 	 */
 	if (dmi_ver >= 0x020600)
-		sprintf(s, "%pUL", d);
+		sprintf(s, "%pUl", d);
 	else
-		sprintf(s, "%pUB", d);
+		sprintf(s, "%pUb", d);
 
 	dmi_ident[slot] = s;
 }
@@ -792,7 +792,15 @@ static bool dmi_matches(const struct dmi_system_id *dmi)
 		int s = dmi->matches[i].slot;
 		if (s == DMI_NONE)
 			break;
-		if (dmi_ident[s]) {
+		if (s == DMI_OEM_STRING) {
+			/* DMI_OEM_STRING must be exact match */
+			const struct dmi_device *valid;
+
+			valid = dmi_find_device(DMI_DEV_TYPE_OEM_STRING,
+						dmi->matches[i].substr, NULL);
+			if (valid)
+				continue;
+		} else if (dmi_ident[s]) {
 			if (dmi->matches[i].exact_match) {
 				if (!strcmp(dmi_ident[s],
 					    dmi->matches[i].substr))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f44a83ab2bf4..c8b605f3dc05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -890,6 +890,7 @@ struct amdgpu_gfx_funcs {
 	void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields);
 	void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst);
 	void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
+	void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue);
 };
 
 struct amdgpu_ngg_buf {
@@ -1378,6 +1379,7 @@ enum amd_hw_ip_block_type {
 	ATHUB_HWIP,
 	NBIO_HWIP,
 	MP0_HWIP,
+	MP1_HWIP,
 	UVD_HWIP,
 	VCN_HWIP = UVD_HWIP,
 	VCE_HWIP,
@@ -1387,6 +1389,7 @@ enum amd_hw_ip_block_type {
 	SMUIO_HWIP,
 	PWR_HWIP,
 	NBIF_HWIP,
+	THM_HWIP,
 	MAX_HWIP
 };
 
@@ -1812,6 +1815,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
+#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q))
 
 /* Common functions */
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 369beb5041a2..448d69fe3756 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -64,16 +64,21 @@ int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 
 #if defined(CONFIG_DEBUG_FS)
 
-static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
-					size_t size, loff_t *pos)
+
+static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
+		char __user *buf, size_t size, loff_t *pos)
 {
 	struct amdgpu_device *adev = file_inode(f)->i_private;
 	ssize_t result = 0;
 	int r;
-	bool pm_pg_lock, use_bank;
-	unsigned instance_bank, sh_bank, se_bank;
+	bool pm_pg_lock, use_bank, use_ring;
+	unsigned instance_bank, sh_bank, se_bank, me, pipe, queue;
 
-	if (size & 0x3 || *pos & 0x3)
+	pm_pg_lock = use_bank = use_ring = false;
+	instance_bank = sh_bank = se_bank = me = pipe = queue = 0;
+
+	if (size & 0x3 || *pos & 0x3 ||
+			((*pos & (1ULL << 62)) && (*pos & (1ULL << 61))))
 		return -EINVAL;
 
 	/* are we reading registers for which a PG lock is necessary? */
@@ -91,8 +96,15 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		if (instance_bank == 0x3FF)
 			instance_bank = 0xFFFFFFFF;
 		use_bank = 1;
+	} else if (*pos & (1ULL << 61)) {
+
+		me = (*pos & GENMASK_ULL(33, 24)) >> 24;
+		pipe = (*pos & GENMASK_ULL(43, 34)) >> 34;
+		queue = (*pos & GENMASK_ULL(53, 44)) >> 44;
+
+		use_ring = 1;
 	} else {
-		use_bank = 0;
+		use_bank = use_ring = 0;
 	}
 
 	*pos &= (1UL << 22) - 1;
@@ -104,6 +116,9 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		mutex_lock(&adev->grbm_idx_mutex);
 		amdgpu_gfx_select_se_sh(adev, se_bank,
 					sh_bank, instance_bank);
+	} else if (use_ring) {
+		mutex_lock(&adev->srbm_mutex);
+		amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue);
 	}
 
 	if (pm_pg_lock)
@@ -115,8 +130,14 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		if (*pos > adev->rmmio_size)
 			goto end;
 
-		value = RREG32(*pos >> 2);
-		r = put_user(value, (uint32_t *)buf);
+		if (read) {
+			value = RREG32(*pos >> 2);
+			r = put_user(value, (uint32_t *)buf);
+		} else {
+			r = get_user(value, (uint32_t *)buf);
+			if (!r)
+				WREG32(*pos >> 2, value);
+		}
 		if (r) {
 			result = r;
 			goto end;
@@ -132,6 +153,9 @@ end:
 	if (use_bank) {
 		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		mutex_unlock(&adev->grbm_idx_mutex);
+	} else if (use_ring) {
+		amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
 	}
 
 	if (pm_pg_lock)
@@ -140,78 +164,17 @@ end:
 	return result;
 }
 
+
+static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos);
+}
+
 static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf,
 					 size_t size, loff_t *pos)
 {
-	struct amdgpu_device *adev = file_inode(f)->i_private;
-	ssize_t result = 0;
-	int r;
-	bool pm_pg_lock, use_bank;
-	unsigned instance_bank, sh_bank, se_bank;
-
-	if (size & 0x3 || *pos & 0x3)
-		return -EINVAL;
-
-	/* are we reading registers for which a PG lock is necessary? */
-	pm_pg_lock = (*pos >> 23) & 1;
-
-	if (*pos & (1ULL << 62)) {
-		se_bank = (*pos & GENMASK_ULL(33, 24)) >> 24;
-		sh_bank = (*pos & GENMASK_ULL(43, 34)) >> 34;
-		instance_bank = (*pos & GENMASK_ULL(53, 44)) >> 44;
-
-		if (se_bank == 0x3FF)
-			se_bank = 0xFFFFFFFF;
-		if (sh_bank == 0x3FF)
-			sh_bank = 0xFFFFFFFF;
-		if (instance_bank == 0x3FF)
-			instance_bank = 0xFFFFFFFF;
-		use_bank = 1;
-	} else {
-		use_bank = 0;
-	}
-
-	*pos &= (1UL << 22) - 1;
-
-	if (use_bank) {
-		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
-		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines))
-			return -EINVAL;
-		mutex_lock(&adev->grbm_idx_mutex);
-		amdgpu_gfx_select_se_sh(adev, se_bank,
-					sh_bank, instance_bank);
-	}
-
-	if (pm_pg_lock)
-		mutex_lock(&adev->pm.mutex);
-
-	while (size) {
-		uint32_t value;
-
-		if (*pos > adev->rmmio_size)
-			return result;
-
-		r = get_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
-
-		WREG32(*pos >> 2, value);
-
-		result += 4;
-		buf += 4;
-		*pos += 4;
-		size -= 4;
-	}
-
-	if (use_bank) {
-		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-		mutex_unlock(&adev->grbm_idx_mutex);
-	}
-
-	if (pm_pg_lock)
-		mutex_unlock(&adev->pm.mutex);
-
-	return result;
+	return amdgpu_debugfs_process_reg_op(false, f, (char __user *)buf, size, pos);
 }
 
 static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 7379aa5a6849..0b19482b36b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -922,6 +922,11 @@ static int __init amdgpu_init(void)
 {
 	int r;
 
+	if (vgacon_text_force()) {
+		DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
+		return -EINVAL;
+	}
+
 	r = amdgpu_sync_init();
 	if (r)
 		goto error_sync;
@@ -930,10 +935,6 @@ static int __init amdgpu_init(void)
 	if (r)
 		goto error_fence;
 
-	if (vgacon_text_force()) {
-		DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
-		return -EINVAL;
-	}
 	DRM_INFO("amdgpu kernel modesetting enabled.\n");
 	driver = &kms_driver;
 	pdriver = &amdgpu_kms_pci_driver;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 455a81e4c246..97449e06a242 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -410,6 +410,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 				  unsigned num_hw_submission)
 {
+	long timeout;
 	int r;
 
 	/* Check that num_hw_submission is a power of two */
@@ -433,11 +434,16 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 
 	/* No need to setup the GPU scheduler for KIQ ring */
 	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
+		/* for non-sriov case, no timeout enforce on compute ring */
+		if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
+				&& !amdgpu_sriov_vf(ring->adev))
+			timeout = MAX_SCHEDULE_TIMEOUT;
+		else
+			timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+
 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
 				   num_hw_submission, amdgpu_job_hang_limit,
-				   (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ?
-				   MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(amdgpu_lockup_timeout),
-				   ring->name);
+				   timeout, ring->name);
 		if (r) {
 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
 				  ring->name);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 28c2706e48d7..46b9ea4e6103 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -56,11 +56,23 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
 		alignment = PAGE_SIZE;
 	}
 
+retry:
 	r = amdgpu_bo_create(adev, size, alignment, initial_domain,
 			     flags, type, resv, &bo);
 	if (r) {
-		DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n",
-			  size, initial_domain, alignment, r);
+		if (r != -ERESTARTSYS) {
+			if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+				flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+				goto retry;
+			}
+
+			if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+				initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+				goto retry;
+			}
+			DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n",
+				  size, initial_domain, alignment, r);
+		}
 		return r;
 	}
 	*obj = &bo->gem_base;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index fac4b6067efd..6d08cde8443c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -356,7 +356,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
 	struct amdgpu_bo *bo;
 	unsigned long page_align;
 	size_t acc_size;
-	u32 domains;
 	int r;
 
 	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
@@ -418,23 +417,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
 #endif
 
 	bo->tbo.bdev = &adev->mman.bdev;
-	domains = bo->preferred_domains;
-retry:
-	amdgpu_ttm_placement_from_domain(bo, domains);
+	amdgpu_ttm_placement_from_domain(bo, domain);
+
 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
 				 &bo->placement, page_align, &ctx, acc_size,
 				 NULL, resv, &amdgpu_ttm_bo_destroy);
-
-	if (unlikely(r && r != -ERESTARTSYS)) {
-		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
-			bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-			goto retry;
-		} else if (domains != bo->preferred_domains) {
-			domains = bo->allowed_domains;
-			goto retry;
-		}
-	}
-	if (unlikely(r))
+	if (unlikely(r != 0))
 		return r;
 
 	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 19e71f4a8ac2..c7d43e064fc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -505,6 +505,9 @@ failed:
 
 int psp_gpu_reset(struct amdgpu_device *adev)
 {
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		return 0;
+
 	return psp_mode1_reset(&adev->psp);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index f48ea0dad875..a7576255cc30 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -859,7 +859,7 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, addr & 0xfffffffc);
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
 	amdgpu_ring_write(ring, seq); /* reference */
-	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
 	amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 0fff5b8cd318..cd6bf291a853 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3061,11 +3061,18 @@ static void gfx_v6_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
 }
 
+static void gfx_v6_0_select_me_pipe_q(struct amdgpu_device *adev,
+				  u32 me, u32 pipe, u32 q)
+{
+	DRM_INFO("Not implemented\n");
+}
+
 static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v6_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v6_0_select_se_sh,
 	.read_wave_data = &gfx_v6_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v6_0_read_wave_sgprs,
+	.select_me_pipe_q = &gfx_v6_0_select_me_pipe_q
 };
 
 static int gfx_v6_0_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index e13d9d83767b..42b6144c1fd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4270,11 +4270,18 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
 }
 
+static void gfx_v7_0_select_me_pipe_q(struct amdgpu_device *adev,
+				  u32 me, u32 pipe, u32 q)
+{
+	cik_srbm_select(adev, me, pipe, q, 0);
+}
+
 static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v7_0_select_se_sh,
 	.read_wave_data = &gfx_v7_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
+	.select_me_pipe_q = &gfx_v7_0_select_me_pipe_q
 };
 
 static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 27943e57681c..b0e591eaa71a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3475,6 +3475,12 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
 	WREG32(mmGRBM_GFX_INDEX, data);
 }
 
+static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev,
+				  u32 me, u32 pipe, u32 q)
+{
+	vi_srbm_select(adev, me, pipe, q, 0);
+}
+
 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -5442,6 +5448,7 @@ static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
 	.select_se_sh = &gfx_v8_0_select_se_sh,
 	.read_wave_data = &gfx_v8_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
+	.select_me_pipe_q = &gfx_v8_0_select_me_pipe_q
 };
 
 static int gfx_v8_0_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1ae3de1094f9..9d39fd5b1822 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -998,12 +998,19 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
 }
 
+static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
+				  u32 me, u32 pipe, u32 q)
+{
+	soc15_grbm_select(adev, me, pipe, q, 0);
+}
+
 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v9_0_select_se_sh,
 	.read_wave_data = &gfx_v9_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
+	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
 };
 
 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
@@ -2757,6 +2764,45 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	return 0;
 }
 
+static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	int j;
+
+	/* disable the queue if it's active */
+	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
+
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
+
+		for (j = 0; j < adev->usec_timeout; j++) {
+			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+
+		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
+			DRM_DEBUG("KIQ dequeue request failed.\n");
+
+			/* Manual disable if dequeue request times out */
+			WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, 0);
+		}
+
+		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
+		      0);
+	}
+
+	WREG32_SOC15(GC, 0, mmCP_HQD_IQ_TIMER, 0);
+	WREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL, 0);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, 0);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
+	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
+
+	return 0;
+}
+
 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -3010,7 +3056,6 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring
 	return r;
 }
 
-
 static int gfx_v9_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -3033,6 +3078,20 @@ static int gfx_v9_0_hw_fini(void *handle)
 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 		return 0;
 	}
+
+	/* Use deinitialize sequence from CAIL when unbinding device from driver,
+	 * otherwise KIQ is hanging when binding back
+	 */
+	if (!adev->in_gpu_reset && !adev->gfx.in_suspend) {
+		mutex_lock(&adev->srbm_mutex);
+		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
+				adev->gfx.kiq.ring.pipe,
+				adev->gfx.kiq.ring.queue, 0);
+		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
+		soc15_grbm_select(adev, 0, 0, 0, 0);
+		mutex_unlock(&adev->srbm_mutex);
+	}
+
 	gfx_v9_0_cp_enable(adev, false);
 	gfx_v9_0_rlc_stop(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 6452101c7aab..c7190c39c4f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -837,7 +837,7 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, addr & 0xfffffffc);
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
 	amdgpu_ring_write(ring, seq); /* reference */
-	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ecaef084dab1..be20a387d961 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1105,7 +1105,7 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, addr & 0xfffffffc);
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
 	amdgpu_ring_write(ring, seq); /* reference */
-	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 2a8184082cd1..399f876f9cad 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1121,7 +1121,7 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, addr & 0xfffffffc);
 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
 	amdgpu_ring_write(ring, seq); /* reference */
-	amdgpu_ring_write(ring, 0xfffffff); /* mask */
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index b154667a8fd9..a675ec6d2811 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1252,6 +1252,71 @@ static void si_invalidate_hdp(struct amdgpu_device *adev,
 	}
 }
 
+static int si_get_pcie_lanes(struct amdgpu_device *adev)
+{
+	u32 link_width_cntl;
+
+	if (adev->flags & AMD_IS_APU)
+		return 0;
+
+	link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+
+	switch ((link_width_cntl & LC_LINK_WIDTH_RD_MASK) >> LC_LINK_WIDTH_RD_SHIFT) {
+	case LC_LINK_WIDTH_X1:
+		return 1;
+	case LC_LINK_WIDTH_X2:
+		return 2;
+	case LC_LINK_WIDTH_X4:
+		return 4;
+	case LC_LINK_WIDTH_X8:
+		return 8;
+	case LC_LINK_WIDTH_X0:
+	case LC_LINK_WIDTH_X16:
+	default:
+		return 16;
+	}
+}
+
+static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
+{
+	u32 link_width_cntl, mask;
+
+	if (adev->flags & AMD_IS_APU)
+		return;
+
+	switch (lanes) {
+	case 0:
+		mask = LC_LINK_WIDTH_X0;
+		break;
+	case 1:
+		mask = LC_LINK_WIDTH_X1;
+		break;
+	case 2:
+		mask = LC_LINK_WIDTH_X2;
+		break;
+	case 4:
+		mask = LC_LINK_WIDTH_X4;
+		break;
+	case 8:
+		mask = LC_LINK_WIDTH_X8;
+		break;
+	case 16:
+		mask = LC_LINK_WIDTH_X16;
+		break;
+	default:
+		DRM_ERROR("invalid pcie lane request: %d\n", lanes);
+		return;
+	}
+
+	link_width_cntl = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+	link_width_cntl &= ~LC_LINK_WIDTH_MASK;
+	link_width_cntl |= mask << LC_LINK_WIDTH_SHIFT;
+	link_width_cntl |= (LC_RECONFIG_NOW |
+			    LC_RECONFIG_ARC_MISSING_ESCAPE);
+
+	WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
+}
+
 static const struct amdgpu_asic_funcs si_asic_funcs =
 {
 	.read_disabled_bios = &si_read_disabled_bios,
@@ -1262,6 +1327,8 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
 	.get_xclk = &si_get_xclk,
 	.set_uvd_clocks = &si_set_uvd_clocks,
 	.set_vce_clocks = NULL,
+	.get_pcie_lanes = &si_get_pcie_lanes,
+	.set_pcie_lanes = &si_set_pcie_lanes,
 	.get_config_memsize = &si_get_config_memsize,
 	.flush_hdp = &si_flush_hdp,
 	.invalidate_hdp = &si_invalidate_hdp,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index 672eaffac0a5..797d505bf9ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -6372,9 +6372,9 @@ static void si_set_pcie_lane_width_in_smc(struct amdgpu_device *adev,
 {
 	u32 lane_width;
 	u32 new_lane_width =
-		(amdgpu_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT;
+		((amdgpu_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1;
 	u32 current_lane_width =
-		(amdgpu_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT;
+		((amdgpu_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1;
 
 	if (new_lane_width != current_lane_width) {
 		amdgpu_set_pcie_lanes(adev, new_lane_width);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 73fd48d6c756..73fd48d6c756 100755..100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index 4c45db7f1157..45aafca7f315 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -38,6 +38,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev)
 		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
 		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
 		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
 		adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
 		adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
 		adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(VCN_BASE.instance[i]));
@@ -49,7 +50,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev)
 		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
 		adev->reg_offset[PWR_HWIP][i] = (uint32_t *)(&(PWR_BASE.instance[i]));
 		adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIF_BASE.instance[i]));
-
+		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e42a28e3adc5..4e2f379ce217 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1403,6 +1403,28 @@ static int initialize_plane(struct amdgpu_display_manager *dm,
 	return ret;
 }
 
+
+static void register_backlight_device(struct amdgpu_display_manager *dm,
+				      struct dc_link *link)
+{
+#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
+	defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
+
+	if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) &&
+	    link->type != dc_connection_none) {
+		/* Event if registration failed, we should continue with
+		 * DM initialization because not having a backlight control
+		 * is better then a black screen.
+		 */
+		amdgpu_dm_register_backlight_device(dm);
+
+		if (dm->backlight_dev)
+			dm->backlight_link = link;
+	}
+#endif
+}
+
+
 /* In this architecture, the association
  * connector -> encoder -> crtc
  * id not really requried. The crtc and connector will hold the
@@ -1456,6 +1478,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 
 	/* loops over all connectors on the board */
 	for (i = 0; i < link_cnt; i++) {
+		struct dc_link *link = NULL;
 
 		if (i > AMDGPU_DM_MAX_DISPLAY_INDEX) {
 			DRM_ERROR(
@@ -1482,9 +1505,14 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
 			goto fail;
 		}
 
-		if (dc_link_detect(dc_get_link_at_index(dm->dc, i),
-				DETECT_REASON_BOOT))
+		link = dc_get_link_at_index(dm->dc, i);
+
+		if (dc_link_detect(link, DETECT_REASON_BOOT)) {
 			amdgpu_dm_update_connector_after_detect(aconnector);
+			register_backlight_device(dm, link);
+		}
+
+
 	}
 
 	/* Software is initialized. Now we can register interrupt handlers. */
@@ -2685,7 +2713,8 @@ static void amdgpu_dm_connector_destroy(struct drm_connector *connector)
 #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
 	defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
 
-	if (link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) {
+	if ((link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) &&
+	    link->type != dc_connection_none) {
 		amdgpu_dm_register_backlight_device(dm);
 
 		if (dm->backlight_dev) {
@@ -3561,6 +3590,7 @@ create_i2c(struct ddc_service *ddc_service,
 	return i2c;
 }
 
+
 /* Note: this function assumes that dc_link_detect() was called for the
  * dc_link which will be represented by this aconnector.
  */
@@ -3630,28 +3660,6 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 		|| connector_type == DRM_MODE_CONNECTOR_eDP)
 		amdgpu_dm_initialize_dp_connector(dm, aconnector);
 
-#if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\
-	defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-
-	/* NOTE: this currently will create backlight device even if a panel
-	 * is not connected to the eDP/LVDS connector.
-	 *
-	 * This is less than ideal but we don't have sink information at this
-	 * stage since detection happens after. We can't do detection earlier
-	 * since MST detection needs connectors to be created first.
-	 */
-	if (link->connector_signal & (SIGNAL_TYPE_EDP | SIGNAL_TYPE_LVDS)) {
-		/* Event if registration failed, we should continue with
-		 * DM initialization because not having a backlight control
-		 * is better then a black screen.
-		 */
-		amdgpu_dm_register_backlight_device(dm);
-
-		if (dm->backlight_dev)
-			dm->backlight_link = link;
-	}
-#endif
-
 out_free:
 	if (res) {
 		kfree(i2c);
@@ -4840,33 +4848,6 @@ static int dm_update_planes_state(struct dc *dc,
 	return ret;
 }
 
-static int dm_atomic_check_plane_state_fb(struct drm_atomic_state *state,
-					  struct drm_crtc *crtc)
-{
-	struct drm_plane *plane;
-	struct drm_crtc_state *crtc_state;
-
-	WARN_ON(!drm_atomic_get_new_crtc_state(state, crtc));
-
-	drm_for_each_plane_mask(plane, state->dev, crtc->state->plane_mask) {
-		struct drm_plane_state *plane_state =
-			drm_atomic_get_plane_state(state, plane);
-
-		if (IS_ERR(plane_state))
-			return -EDEADLK;
-
-		crtc_state = drm_atomic_get_crtc_state(plane_state->state, crtc);
-		if (IS_ERR(crtc_state))
-			return PTR_ERR(crtc_state);
-
-		if (crtc->primary == plane && crtc_state->active) {
-			if (!plane_state->fb)
-				return -EINVAL;
-		}
-	}
-	return 0;
-}
-
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
 				  struct drm_atomic_state *state)
 {
@@ -4890,10 +4871,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 		goto fail;
 
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		ret = dm_atomic_check_plane_state_fb(state, crtc);
-		if (ret)
-			goto fail;
-
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state) &&
 		    !new_crtc_state->color_mgmt_changed)
 			continue;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index eeb04471b2f5..6d1c4981a185 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1997,6 +1997,19 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level,
 	return true;
 }
 
+bool dc_link_set_abm_disable(const struct dc_link *link)
+{
+	struct dc  *core_dc = link->ctx->dc;
+	struct abm *abm = core_dc->res_pool->abm;
+
+	if ((abm == NULL) || (abm->funcs->set_backlight_level == NULL))
+		return false;
+
+	abm->funcs->set_abm_immediate_disable(abm);
+
+	return true;
+}
+
 bool dc_link_set_psr_enable(const struct dc_link *link, bool enable, bool wait)
 {
 	struct dc  *core_dc = link->ctx->dc;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index fb4d9eafdc6e..dc34515ef01f 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -132,6 +132,8 @@ static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_
 bool dc_link_set_backlight_level(const struct dc_link *dc_link, uint32_t level,
 		uint32_t frame_ramp, const struct dc_stream_state *stream);
 
+bool dc_link_set_abm_disable(const struct dc_link *dc_link);
+
 bool dc_link_set_psr_enable(const struct dc_link *dc_link, bool enable, bool wait);
 
 bool dc_link_get_psr_state(const struct dc_link *dc_link, uint32_t *psr_state);
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
index 444558ca6533..162f6a6c4208 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c
@@ -735,6 +735,8 @@ static void dce110_stream_encoder_update_hdmi_info_packets(
 		if (info_frame->avi.valid) {
 			const uint32_t *content =
 				(const uint32_t *) &info_frame->avi.sb[0];
+			/*we need turn on clock before programming AFMT block*/
+			REG_UPDATE(AFMT_CNTL, AFMT_AUDIO_CLOCK_EN, 1);
 
 			REG_WRITE(AFMT_AVI_INFO0, content[0]);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
index 775d3bf0bd39..9150d2694450 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
@@ -102,6 +102,43 @@ static uint32_t align_to_chunks_number_per_line(uint32_t pixels)
 	return 256 * ((pixels + 255) / 256);
 }
 
+static void reset_lb_on_vblank(struct dc_context *ctx)
+{
+	uint32_t value, frame_count;
+	uint32_t retry = 0;
+	uint32_t status_pos =
+			dm_read_reg(ctx, mmCRTC_STATUS_POSITION);
+
+
+	/* Only if CRTC is enabled and counter is moving we wait for one frame. */
+	if (status_pos != dm_read_reg(ctx, mmCRTC_STATUS_POSITION)) {
+		/* Resetting LB on VBlank */
+		value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL);
+		set_reg_field_value(value, 3, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL);
+		set_reg_field_value(value, 1, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2);
+		dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value);
+
+		frame_count = dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT);
+
+
+		for (retry = 100; retry > 0; retry--) {
+			if (frame_count != dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT))
+				break;
+			msleep(1);
+		}
+		if (!retry)
+			dm_error("Frame count did not increase for 100ms.\n");
+
+		/* Resetting LB on VBlank */
+		value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL);
+		set_reg_field_value(value, 2, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL);
+		set_reg_field_value(value, 0, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2);
+		dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value);
+
+	}
+
+}
+
 static void wait_for_fbc_state_changed(
 	struct dce110_compressor *cp110,
 	bool enabled)
@@ -232,19 +269,23 @@ void dce110_compressor_disable_fbc(struct compressor *compressor)
 {
 	struct dce110_compressor *cp110 = TO_DCE110_COMPRESSOR(compressor);
 
-	if (compressor->options.bits.FBC_SUPPORT &&
-		dce110_compressor_is_fbc_enabled_in_hw(compressor, NULL)) {
-		uint32_t reg_data;
-		/* Turn off compression */
-		reg_data = dm_read_reg(compressor->ctx, mmFBC_CNTL);
-		set_reg_field_value(reg_data, 0, FBC_CNTL, FBC_GRPH_COMP_EN);
-		dm_write_reg(compressor->ctx, mmFBC_CNTL, reg_data);
-
-		/* Reset enum controller_id to undefined */
-		compressor->attached_inst = 0;
-		compressor->is_enabled = false;
-
-		wait_for_fbc_state_changed(cp110, false);
+	if (compressor->options.bits.FBC_SUPPORT) {
+		if (dce110_compressor_is_fbc_enabled_in_hw(compressor, NULL)) {
+			uint32_t reg_data;
+			/* Turn off compression */
+			reg_data = dm_read_reg(compressor->ctx, mmFBC_CNTL);
+			set_reg_field_value(reg_data, 0, FBC_CNTL, FBC_GRPH_COMP_EN);
+			dm_write_reg(compressor->ctx, mmFBC_CNTL, reg_data);
+
+			/* Reset enum controller_id to undefined */
+			compressor->attached_inst = 0;
+			compressor->is_enabled = false;
+
+			wait_for_fbc_state_changed(cp110, false);
+		}
+
+		/* Sync line buffer  - dce100/110 only*/
+		reset_lb_on_vblank(compressor->ctx);
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 30dd62f0f5fa..d0575999f172 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -453,10 +453,13 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
 
 	} else {
 		/* 10 segments
-		 * segment is from 2^-10 to 2^0
+		 * segment is from 2^-10 to 2^1
+		 * We include an extra segment for range [2^0, 2^1). This is to
+		 * ensure that colors with normalized values of 1 don't miss the
+		 * LUT.
 		 */
 		region_start = -10;
-		region_end = 0;
+		region_end = 1;
 
 		seg_distr[0] = 4;
 		seg_distr[1] = 4;
@@ -468,7 +471,7 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf,
 		seg_distr[7] = 4;
 		seg_distr[8] = 4;
 		seg_distr[9] = 4;
-		seg_distr[10] = -1;
+		seg_distr[10] = 0;
 		seg_distr[11] = -1;
 		seg_distr[12] = -1;
 		seg_distr[13] = -1;
@@ -1016,8 +1019,10 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx)
 	struct dc_stream_state *stream = pipe_ctx->stream;
 	struct dc_link *link = stream->sink->link;
 
-	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP)
+	if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) {
 		link->dc->hwss.edp_backlight_control(link, false);
+		dc_link_set_abm_disable(link);
+	}
 
 	if (dc_is_dp_signal(pipe_ctx->stream->signal))
 		pipe_ctx->stream_res.stream_enc->funcs->dp_blank(pipe_ctx->stream_res.stream_enc);
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 3ae3da4e7c14..0f5ad54d3fd3 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -1264,9 +1264,9 @@ struct atom_smc_dpm_info_v4_1
   uint8_t  ledpin2;
   uint8_t  padding8_4;
 
-  uint8_t  gfxclkspreadenabled;
-  uint8_t  gfxclkspreadpercent;
-  uint16_t gfxclkspreadfreq;
+	uint8_t  pllgfxclkspreadenabled;
+	uint8_t  pllgfxclkspreadpercent;
+	uint16_t pllgfxclkspreadfreq;
 
   uint8_t uclkspreadenabled;
   uint8_t uclkspreadpercent;
@@ -1276,7 +1276,11 @@ struct atom_smc_dpm_info_v4_1
   uint8_t socclkspreadpercent;
   uint16_t socclkspreadfreq;
 
-  uint32_t boardreserved[3];
+	uint8_t  acggfxclkspreadenabled;
+	uint8_t  acggfxclkspreadpercent;
+	uint16_t acggfxclkspreadfreq;
+
+	uint32_t boardreserved[10];
 };
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
index faf9c880e4f7..210fb3ecd213 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/Makefile
@@ -32,7 +32,7 @@ HARDWARE_MGR = hwmgr.o processpptables.o \
 		vega10_processpptables.o vega10_hwmgr.o vega10_powertune.o \
 		vega10_thermal.o smu10_hwmgr.o pp_psm.o\
 		vega12_processpptables.o vega12_hwmgr.o \
-		vega12_powertune.o vega12_thermal.o \
+		vega12_thermal.o \
 		pp_overdriver.o smu_helper.o
 
 AMD_PP_HWMGR = $(addprefix $(AMD_PP_PATH)/hwmgr/,$(HARDWARE_MGR))
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index 55f9b30513ff..ad42caac033e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -616,9 +616,9 @@ int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr,
 	param->ledpin1 = info->ledpin1;
 	param->ledpin2 = info->ledpin2;
 
-	param->gfxclkspreadenabled = info->gfxclkspreadenabled;
-	param->gfxclkspreadpercent = info->gfxclkspreadpercent;
-	param->gfxclkspreadfreq = info->gfxclkspreadfreq;
+	param->pllgfxclkspreadenabled = info->pllgfxclkspreadenabled;
+	param->pllgfxclkspreadpercent = info->pllgfxclkspreadpercent;
+	param->pllgfxclkspreadfreq = info->pllgfxclkspreadfreq;
 
 	param->uclkspreadenabled = info->uclkspreadenabled;
 	param->uclkspreadpercent = info->uclkspreadpercent;
@@ -628,5 +628,9 @@ int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr,
 	param->socclkspreadpercent = info->socclkspreadpercent;
 	param->socclkspreadfreq = info->socclkspreadfreq;
 
+	param->acggfxclkspreadenabled = info->acggfxclkspreadenabled;
+	param->acggfxclkspreadpercent = info->acggfxclkspreadpercent;
+	param->acggfxclkspreadfreq = info->acggfxclkspreadfreq;
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index a957d8f08029..8df1e84f27c9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -192,9 +192,9 @@ struct pp_atomfwctrl_smc_dpm_parameters
   uint8_t  ledpin1;
   uint8_t  ledpin2;
 
-  uint8_t  gfxclkspreadenabled;
-  uint8_t  gfxclkspreadpercent;
-  uint16_t gfxclkspreadfreq;
+	uint8_t  pllgfxclkspreadenabled;
+	uint8_t  pllgfxclkspreadpercent;
+	uint16_t pllgfxclkspreadfreq;
 
   uint8_t  uclkspreadenabled;
   uint8_t  uclkspreadpercent;
@@ -203,6 +203,10 @@ struct pp_atomfwctrl_smc_dpm_parameters
   uint8_t socclkspreadenabled;
   uint8_t socclkspreadpercent;
   uint16_t socclkspreadfreq;
+
+	uint8_t  acggfxclkspreadenabled;
+	uint8_t  acggfxclkspreadpercent;
+	uint16_t acggfxclkspreadfreq;
 };
 
 int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 2b0c366d6149..add90675fd2a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3374,7 +3374,8 @@ static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr,
 			"Failed to start pm status log!",
 			return -1);
 
-	msleep_interruptible(20);
+	/* Sampling period from 50ms to 4sec */
+	msleep_interruptible(200);
 
 	PP_ASSERT_WITH_CODE(!smum_send_msg_to_smc(hwmgr,
 			PPSMC_MSG_PmStatusLogSample),
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
index 75a465f771f0..7b26607c646a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
@@ -319,13 +319,13 @@ static int smu8_get_system_info_data(struct pp_hwmgr *hwmgr)
 			GetIndexIntoMasterTable(DATA, IntegratedSystemInfo),
 			&size, &frev, &crev);
 
-	if (crev != 9) {
-		pr_err("Unsupported IGP table: %d %d\n", frev, crev);
+	if (info == NULL) {
+		pr_err("Could not retrieve the Integrated System Info Table!\n");
 		return -EINVAL;
 	}
 
-	if (info == NULL) {
-		pr_err("Could not retrieve the Integrated System Info Table!\n");
+	if (crev != 9) {
+		pr_err("Unsupported IGP table: %d %d\n", frev, crev);
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 15ce1e825021..200de46bd06b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -33,7 +33,6 @@
 #include "ppatomfwctrl.h"
 #include "atomfirmware.h"
 #include "cgs_common.h"
-#include "vega12_powertune.h"
 #include "vega12_inc.h"
 #include "pp_soc15.h"
 #include "pppcielanes.h"
@@ -893,6 +892,28 @@ static int vega12_odn_initialize_default_settings(
 	return 0;
 }
 
+static int vega12_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
+		uint32_t adjust_percent)
+{
+	return smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_OverDriveSetPercentage, adjust_percent);
+}
+
+static int vega12_power_control_set_level(struct pp_hwmgr *hwmgr)
+{
+	int adjust_percent, result = 0;
+
+	if (PP_CAP(PHM_PlatformCaps_PowerContainment)) {
+		adjust_percent =
+				hwmgr->platform_descriptor.TDPAdjustmentPolarity ?
+				hwmgr->platform_descriptor.TDPAdjustment :
+				(-1 * hwmgr->platform_descriptor.TDPAdjustment);
+		result = vega12_set_overdrive_target_percentage(hwmgr,
+				(uint32_t)adjust_percent);
+	}
+	return result;
+}
+
 static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
 	int tmp_result, result = 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_powertune.c
deleted file mode 100644
index 76e60c0181ac..000000000000
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_powertune.c
+++ /dev/null
@@ -1,1364 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "hwmgr.h"
-#include "vega12_hwmgr.h"
-#include "vega12_powertune.h"
-#include "vega12_smumgr.h"
-#include "vega12_ppsmc.h"
-#include "vega12_inc.h"
-#include "pp_debug.h"
-#include "pp_soc15.h"
-
-static const struct vega12_didt_config_reg SEDiDtTuningCtrlConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* DIDT_SQ */
-	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,        DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,        0x3853 },
-	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,        DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,        0x3153 },
-
-	/* DIDT_TD */
-	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,        DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,        0x0dde },
-	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,        DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,        0x0dde },
-
-	/* DIDT_TCP */
-	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,       0x3dde },
-	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,       0x3dde },
-
-	/* DIDT_DB */
-	{   ixDIDT_DB_TUNING_CTRL,             DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,        DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,        0x3dde },
-	{   ixDIDT_DB_TUNING_CTRL,             DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,        DIDT_DB_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,        0x3dde },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEDiDtCtrl3Config_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset               Mask                                                     Shift                                                            Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/*DIDT_SQ_CTRL3 */
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__GC_DIDT_ENABLE_MASK,       DIDT_SQ_CTRL3__GC_DIDT_ENABLE__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK,       DIDT_SQ_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__THROTTLE_POLICY_MASK,       DIDT_SQ_CTRL3__THROTTLE_POLICY__SHIFT,             0x0003 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK,       DIDT_SQ_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK,       DIDT_SQ_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK,       DIDT_SQ_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT,             0x0003 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK,       DIDT_SQ_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK,       DIDT_SQ_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__QUALIFY_STALL_EN_MASK,       DIDT_SQ_CTRL3__QUALIFY_STALL_EN__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__DIDT_STALL_SEL_MASK,       DIDT_SQ_CTRL3__DIDT_STALL_SEL__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__DIDT_FORCE_STALL_MASK,       DIDT_SQ_CTRL3__DIDT_FORCE_STALL__SHIFT,             0x0000 },
-	{   ixDIDT_SQ_CTRL3,     DIDT_SQ_CTRL3__DIDT_STALL_DELAY_EN_MASK,       DIDT_SQ_CTRL3__DIDT_STALL_DELAY_EN__SHIFT,             0x0000 },
-
-	/*DIDT_TCP_CTRL3 */
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__GC_DIDT_ENABLE_MASK,      DIDT_TCP_CTRL3__GC_DIDT_ENABLE__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK,      DIDT_TCP_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__THROTTLE_POLICY_MASK,      DIDT_TCP_CTRL3__THROTTLE_POLICY__SHIFT,            0x0003 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK,      DIDT_TCP_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK,      DIDT_TCP_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK,      DIDT_TCP_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT,            0x0003 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK,      DIDT_TCP_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK,      DIDT_TCP_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__QUALIFY_STALL_EN_MASK,      DIDT_TCP_CTRL3__QUALIFY_STALL_EN__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__DIDT_STALL_SEL_MASK,      DIDT_TCP_CTRL3__DIDT_STALL_SEL__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__DIDT_FORCE_STALL_MASK,      DIDT_TCP_CTRL3__DIDT_FORCE_STALL__SHIFT,            0x0000 },
-	{   ixDIDT_TCP_CTRL3,    DIDT_TCP_CTRL3__DIDT_STALL_DELAY_EN_MASK,      DIDT_TCP_CTRL3__DIDT_STALL_DELAY_EN__SHIFT,            0x0000 },
-
-	/*DIDT_TD_CTRL3 */
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__GC_DIDT_ENABLE_MASK,       DIDT_TD_CTRL3__GC_DIDT_ENABLE__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK,       DIDT_TD_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__THROTTLE_POLICY_MASK,       DIDT_TD_CTRL3__THROTTLE_POLICY__SHIFT,             0x0003 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK,       DIDT_TD_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK,       DIDT_TD_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK,       DIDT_TD_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT,             0x0003 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK,       DIDT_TD_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK,       DIDT_TD_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__QUALIFY_STALL_EN_MASK,       DIDT_TD_CTRL3__QUALIFY_STALL_EN__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__DIDT_STALL_SEL_MASK,       DIDT_TD_CTRL3__DIDT_STALL_SEL__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__DIDT_FORCE_STALL_MASK,       DIDT_TD_CTRL3__DIDT_FORCE_STALL__SHIFT,             0x0000 },
-	{   ixDIDT_TD_CTRL3,     DIDT_TD_CTRL3__DIDT_STALL_DELAY_EN_MASK,       DIDT_TD_CTRL3__DIDT_STALL_DELAY_EN__SHIFT,             0x0000 },
-
-	/*DIDT_DB_CTRL3 */
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__GC_DIDT_ENABLE_MASK,       DIDT_DB_CTRL3__GC_DIDT_ENABLE__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__GC_DIDT_CLK_EN_OVERRIDE_MASK,       DIDT_DB_CTRL3__GC_DIDT_CLK_EN_OVERRIDE__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__THROTTLE_POLICY_MASK,       DIDT_DB_CTRL3__THROTTLE_POLICY__SHIFT,             0x0003 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK,       DIDT_DB_CTRL3__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__DIDT_POWER_LEVEL_LOWBIT_MASK,       DIDT_DB_CTRL3__DIDT_POWER_LEVEL_LOWBIT__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS_MASK,       DIDT_DB_CTRL3__DIDT_STALL_PATTERN_BIT_NUMS__SHIFT,             0x0003 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__GC_DIDT_LEVEL_COMB_EN_MASK,       DIDT_DB_CTRL3__GC_DIDT_LEVEL_COMB_EN__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__SE_DIDT_LEVEL_COMB_EN_MASK,       DIDT_DB_CTRL3__SE_DIDT_LEVEL_COMB_EN__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__QUALIFY_STALL_EN_MASK,       DIDT_DB_CTRL3__QUALIFY_STALL_EN__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__DIDT_STALL_SEL_MASK,       DIDT_DB_CTRL3__DIDT_STALL_SEL__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__DIDT_FORCE_STALL_MASK,       DIDT_DB_CTRL3__DIDT_FORCE_STALL__SHIFT,             0x0000 },
-	{   ixDIDT_DB_CTRL3,     DIDT_DB_CTRL3__DIDT_STALL_DELAY_EN_MASK,       DIDT_DB_CTRL3__DIDT_STALL_DELAY_EN__SHIFT,             0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEDiDtCtrl2Config_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                            Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* DIDT_SQ */
-	{   ixDIDT_SQ_CTRL2,                  DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK,                 DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT,                 0x3853 },
-	{   ixDIDT_SQ_CTRL2,                  DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,        DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,        0x00c0 },
-	{   ixDIDT_SQ_CTRL2,                  DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,        DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,        0x0000 },
-
-	/* DIDT_TD */
-	{   ixDIDT_TD_CTRL2,                  DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK,                 DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT,                 0x3fff },
-	{   ixDIDT_TD_CTRL2,                  DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,        DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,        0x00c0 },
-	{   ixDIDT_TD_CTRL2,                  DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,        DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,        0x0001 },
-
-	/* DIDT_TCP */
-	{   ixDIDT_TCP_CTRL2,                 DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT,                0x3dde },
-	{   ixDIDT_TCP_CTRL2,                 DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,       0x00c0 },
-	{   ixDIDT_TCP_CTRL2,                 DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,       0x0001 },
-
-	/* DIDT_DB */
-	{   ixDIDT_DB_CTRL2,                  DIDT_DB_CTRL2__MAX_POWER_DELTA_MASK,                 DIDT_DB_CTRL2__MAX_POWER_DELTA__SHIFT,                 0x3dde },
-	{   ixDIDT_DB_CTRL2,                  DIDT_DB_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,        DIDT_DB_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,        0x00c0 },
-	{   ixDIDT_DB_CTRL2,                  DIDT_DB_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,        DIDT_DB_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,        0x0001 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEDiDtCtrl1Config_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* DIDT_SQ */
-	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MIN_POWER_MASK,                       DIDT_SQ_CTRL1__MIN_POWER__SHIFT,                       0x0000 },
-	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MAX_POWER_MASK,                       DIDT_SQ_CTRL1__MAX_POWER__SHIFT,                       0xffff },
-	/* DIDT_TD */
-	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MIN_POWER_MASK,                       DIDT_TD_CTRL1__MIN_POWER__SHIFT,                       0x0000 },
-	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MAX_POWER_MASK,                       DIDT_TD_CTRL1__MAX_POWER__SHIFT,                       0xffff },
-	/* DIDT_TCP */
-	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MIN_POWER_MASK,                      DIDT_TCP_CTRL1__MIN_POWER__SHIFT,                      0x0000 },
-	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MAX_POWER_MASK,                      DIDT_TCP_CTRL1__MAX_POWER__SHIFT,                      0xffff },
-	/* DIDT_DB */
-	{   ixDIDT_DB_CTRL1,                   DIDT_DB_CTRL1__MIN_POWER_MASK,                       DIDT_DB_CTRL1__MIN_POWER__SHIFT,                       0x0000 },
-	{   ixDIDT_DB_CTRL1,                   DIDT_DB_CTRL1__MAX_POWER_MASK,                       DIDT_DB_CTRL1__MAX_POWER__SHIFT,                       0xffff },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-
-static const struct vega12_didt_config_reg SEDiDtWeightConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                  Shift                                                 Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* DIDT_SQ */
-	{   ixDIDT_SQ_WEIGHT0_3,               0xFFFFFFFF,                                           0,                                                    0x2B363B1A },
-	{   ixDIDT_SQ_WEIGHT4_7,               0xFFFFFFFF,                                           0,                                                    0x270B2432 },
-	{   ixDIDT_SQ_WEIGHT8_11,              0xFFFFFFFF,                                           0,                                                    0x00000018 },
-
-	/* DIDT_TD */
-	{   ixDIDT_TD_WEIGHT0_3,               0xFFFFFFFF,                                           0,                                                    0x2B1D220F },
-	{   ixDIDT_TD_WEIGHT4_7,               0xFFFFFFFF,                                           0,                                                    0x00007558 },
-	{   ixDIDT_TD_WEIGHT8_11,              0xFFFFFFFF,                                           0,                                                    0x00000000 },
-
-	/* DIDT_TCP */
-	{   ixDIDT_TCP_WEIGHT0_3,               0xFFFFFFFF,                                          0,                                                    0x5ACE160D },
-	{   ixDIDT_TCP_WEIGHT4_7,               0xFFFFFFFF,                                          0,                                                    0x00000000 },
-	{   ixDIDT_TCP_WEIGHT8_11,              0xFFFFFFFF,                                          0,                                                    0x00000000 },
-
-	/* DIDT_DB */
-	{   ixDIDT_DB_WEIGHT0_3,                0xFFFFFFFF,                                          0,                                                    0x0E152A0F },
-	{   ixDIDT_DB_WEIGHT4_7,                0xFFFFFFFF,                                          0,                                                    0x09061813 },
-	{   ixDIDT_DB_WEIGHT8_11,               0xFFFFFFFF,                                          0,                                                    0x00000013 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEDiDtCtrl0Config_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* DIDT_SQ */
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK,   DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT,  0x0000 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__PHASE_OFFSET_MASK,   DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT,  0x0000 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK,   DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT,  0x0000 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,  0x0000 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_STALL_CTRL_EN_MASK,   DIDT_SQ_CTRL0__DIDT_STALL_CTRL_EN__SHIFT,  0x0001 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_TUNING_CTRL_EN_MASK,   DIDT_SQ_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT,  0x0001 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK,   DIDT_SQ_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT,  0x0001 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_SQ_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT,  0xffff },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_AUTO_MPD_EN_MASK,   DIDT_SQ_CTRL0__DIDT_AUTO_MPD_EN__SHIFT,  0x0000 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_STALL_EVENT_EN_MASK,   DIDT_SQ_CTRL0__DIDT_STALL_EVENT_EN__SHIFT,  0x0000 },
-	{  ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK,   DIDT_SQ_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT,  0x0000 },
-	/* DIDT_TD */
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK,   DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT,  0x0000 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__PHASE_OFFSET_MASK,   DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT,  0x0000 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK,   DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT,  0x0000 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,  0x0000 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_STALL_CTRL_EN_MASK,   DIDT_TD_CTRL0__DIDT_STALL_CTRL_EN__SHIFT,  0x0001 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_TUNING_CTRL_EN_MASK,   DIDT_TD_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT,  0x0001 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK,   DIDT_TD_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT,  0x0001 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_TD_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT,  0xffff },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_AUTO_MPD_EN_MASK,   DIDT_TD_CTRL0__DIDT_AUTO_MPD_EN__SHIFT,  0x0000 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_STALL_EVENT_EN_MASK,   DIDT_TD_CTRL0__DIDT_STALL_EVENT_EN__SHIFT,  0x0000 },
-	{  ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK,   DIDT_TD_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT,  0x0000 },
-	/* DIDT_TCP */
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK,  DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT, 0x0000 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__PHASE_OFFSET_MASK,  DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT, 0x0000 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK,  DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT, 0x0000 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,  DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT, 0x0000 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_STALL_CTRL_EN_MASK,  DIDT_TCP_CTRL0__DIDT_STALL_CTRL_EN__SHIFT, 0x0001 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_TUNING_CTRL_EN_MASK,  DIDT_TCP_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT, 0x0001 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK,  DIDT_TCP_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT, 0x0001 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK,  DIDT_TCP_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT, 0xffff },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_AUTO_MPD_EN_MASK,  DIDT_TCP_CTRL0__DIDT_AUTO_MPD_EN__SHIFT, 0x0000 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_STALL_EVENT_EN_MASK,  DIDT_TCP_CTRL0__DIDT_STALL_EVENT_EN__SHIFT, 0x0000 },
-	{  ixDIDT_TCP_CTRL0,                  DIDT_TCP_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK,  DIDT_TCP_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT, 0x0000 },
-	/* DIDT_DB */
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK,   DIDT_DB_CTRL0__DIDT_CTRL_EN__SHIFT,  0x0000 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__PHASE_OFFSET_MASK,   DIDT_DB_CTRL0__PHASE_OFFSET__SHIFT,  0x0000 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_CTRL_RST_MASK,   DIDT_DB_CTRL0__DIDT_CTRL_RST__SHIFT,  0x0000 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,   DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,  0x0000 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_STALL_CTRL_EN_MASK,   DIDT_DB_CTRL0__DIDT_STALL_CTRL_EN__SHIFT,  0x0001 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_TUNING_CTRL_EN_MASK,   DIDT_DB_CTRL0__DIDT_TUNING_CTRL_EN__SHIFT,  0x0001 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_STALL_AUTO_RELEASE_EN_MASK,   DIDT_DB_CTRL0__DIDT_STALL_AUTO_RELEASE_EN__SHIFT,  0x0001 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_DB_CTRL0__DIDT_HI_POWER_THRESHOLD__SHIFT,  0xffff },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_AUTO_MPD_EN_MASK,   DIDT_DB_CTRL0__DIDT_AUTO_MPD_EN__SHIFT,  0x0000 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_STALL_EVENT_EN_MASK,   DIDT_DB_CTRL0__DIDT_STALL_EVENT_EN__SHIFT,  0x0000 },
-	{  ixDIDT_DB_CTRL0,                   DIDT_DB_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR_MASK,   DIDT_DB_CTRL0__DIDT_STALL_EVENT_COUNTER_CLEAR__SHIFT,  0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-
-static const struct vega12_didt_config_reg SEDiDtStallCtrlConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                   Mask                                                     Shift                                                      Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* DIDT_SQ */
-	{   ixDIDT_SQ_STALL_CTRL,    DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0004 },
-	{   ixDIDT_SQ_STALL_CTRL,    DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0004 },
-	{   ixDIDT_SQ_STALL_CTRL,    DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK,    DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,     0x000a },
-	{   ixDIDT_SQ_STALL_CTRL,    DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK,    DIDT_SQ_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,     0x000a },
-
-	/* DIDT_TD */
-	{   ixDIDT_TD_STALL_CTRL,    DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001 },
-	{   ixDIDT_TD_STALL_CTRL,    DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001 },
-	{   ixDIDT_TD_STALL_CTRL,    DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK,    DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,     0x000a },
-	{   ixDIDT_TD_STALL_CTRL,    DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK,    DIDT_TD_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,     0x000a },
-
-	/* DIDT_TCP */
-	{   ixDIDT_TCP_STALL_CTRL,   DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,    0x0001 },
-	{   ixDIDT_TCP_STALL_CTRL,   DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,    0x0001 },
-	{   ixDIDT_TCP_STALL_CTRL,   DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK,   DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,    0x000a },
-	{   ixDIDT_TCP_STALL_CTRL,   DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK,   DIDT_TCP_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,    0x000a },
-
-	/* DIDT_DB */
-	{   ixDIDT_DB_STALL_CTRL,    DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,    DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0004 },
-	{   ixDIDT_DB_STALL_CTRL,    DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,    DIDT_DB_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0004 },
-	{   ixDIDT_DB_STALL_CTRL,    DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI_MASK,    DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,     0x000a },
-	{   ixDIDT_DB_STALL_CTRL,    DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO_MASK,    DIDT_DB_STALL_CTRL__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,     0x000a },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEDiDtStallPatternConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                        Mask                                                      Shift                                                    Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* DIDT_SQ_STALL_PATTERN_1_2 */
-	{   ixDIDT_SQ_STALL_PATTERN_1_2,  DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK,    DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT,  0x0001 },
-	{   ixDIDT_SQ_STALL_PATTERN_1_2,  DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK,    DIDT_SQ_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT,  0x0001 },
-
-	/* DIDT_SQ_STALL_PATTERN_3_4 */
-	{   ixDIDT_SQ_STALL_PATTERN_3_4,  DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK,    DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT,  0x0001 },
-	{   ixDIDT_SQ_STALL_PATTERN_3_4,  DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK,    DIDT_SQ_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT,  0x0001 },
-
-	/* DIDT_SQ_STALL_PATTERN_5_6 */
-	{   ixDIDT_SQ_STALL_PATTERN_5_6,  DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK,    DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT,  0x0000 },
-	{   ixDIDT_SQ_STALL_PATTERN_5_6,  DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK,    DIDT_SQ_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT,  0x0000 },
-
-	/* DIDT_SQ_STALL_PATTERN_7 */
-	{   ixDIDT_SQ_STALL_PATTERN_7,    DIDT_SQ_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK,      DIDT_SQ_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT,    0x0000 },
-
-	/* DIDT_TCP_STALL_PATTERN_1_2 */
-	{   ixDIDT_TCP_STALL_PATTERN_1_2, DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK,   DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT, 0x0001 },
-	{   ixDIDT_TCP_STALL_PATTERN_1_2, DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK,   DIDT_TCP_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT, 0x0001 },
-
-	/* DIDT_TCP_STALL_PATTERN_3_4 */
-	{   ixDIDT_TCP_STALL_PATTERN_3_4, DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK,   DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT, 0x0001 },
-	{   ixDIDT_TCP_STALL_PATTERN_3_4, DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK,   DIDT_TCP_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT, 0x0001 },
-
-	/* DIDT_TCP_STALL_PATTERN_5_6 */
-	{   ixDIDT_TCP_STALL_PATTERN_5_6, DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK,   DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT, 0x0000 },
-	{   ixDIDT_TCP_STALL_PATTERN_5_6, DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK,   DIDT_TCP_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT, 0x0000 },
-
-	/* DIDT_TCP_STALL_PATTERN_7 */
-	{   ixDIDT_TCP_STALL_PATTERN_7,   DIDT_TCP_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK,     DIDT_TCP_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT,   0x0000 },
-
-	/* DIDT_TD_STALL_PATTERN_1_2 */
-	{   ixDIDT_TD_STALL_PATTERN_1_2,  DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK,    DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT,  0x0001 },
-	{   ixDIDT_TD_STALL_PATTERN_1_2,  DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK,    DIDT_TD_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT,  0x0001 },
-
-	/* DIDT_TD_STALL_PATTERN_3_4 */
-	{   ixDIDT_TD_STALL_PATTERN_3_4,  DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK,    DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT,  0x0001 },
-	{   ixDIDT_TD_STALL_PATTERN_3_4,  DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK,    DIDT_TD_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT,  0x0001 },
-
-	/* DIDT_TD_STALL_PATTERN_5_6 */
-	{   ixDIDT_TD_STALL_PATTERN_5_6,  DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK,    DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT,  0x0000 },
-	{   ixDIDT_TD_STALL_PATTERN_5_6,  DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK,    DIDT_TD_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT,  0x0000 },
-
-	/* DIDT_TD_STALL_PATTERN_7 */
-	{   ixDIDT_TD_STALL_PATTERN_7,    DIDT_TD_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK,      DIDT_TD_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT,    0x0000 },
-
-	/* DIDT_DB_STALL_PATTERN_1_2 */
-	{   ixDIDT_DB_STALL_PATTERN_1_2,  DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1_MASK,    DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_1__SHIFT,  0x0001 },
-	{   ixDIDT_DB_STALL_PATTERN_1_2,  DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2_MASK,    DIDT_DB_STALL_PATTERN_1_2__DIDT_STALL_PATTERN_2__SHIFT,  0x0001 },
-
-	/* DIDT_DB_STALL_PATTERN_3_4 */
-	{   ixDIDT_DB_STALL_PATTERN_3_4,  DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3_MASK,    DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_3__SHIFT,  0x0001 },
-	{   ixDIDT_DB_STALL_PATTERN_3_4,  DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4_MASK,    DIDT_DB_STALL_PATTERN_3_4__DIDT_STALL_PATTERN_4__SHIFT,  0x0001 },
-
-	/* DIDT_DB_STALL_PATTERN_5_6 */
-	{   ixDIDT_DB_STALL_PATTERN_5_6,  DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5_MASK,    DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_5__SHIFT,  0x0000 },
-	{   ixDIDT_DB_STALL_PATTERN_5_6,  DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6_MASK,    DIDT_DB_STALL_PATTERN_5_6__DIDT_STALL_PATTERN_6__SHIFT,  0x0000 },
-
-	/* DIDT_DB_STALL_PATTERN_7 */
-	{   ixDIDT_DB_STALL_PATTERN_7,    DIDT_DB_STALL_PATTERN_7__DIDT_STALL_PATTERN_7_MASK,      DIDT_DB_STALL_PATTERN_7__DIDT_STALL_PATTERN_7__SHIFT,    0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SELCacConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ */
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x00060021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x00860021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x01060021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x01860021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x02060021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x02860021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x03060021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x03860021 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x04060021 },
-	/* TD */
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x000E0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x008E0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x010E0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x018E0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x020E0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x028E0020 },
-	/* TCP */
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x001c0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x009c0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x011c0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x019c0020 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x021c0020 },
-	/* DB */
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x00200008 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x00820008 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x01020008 },
-	{   ixSE_CAC_CNTL,                     0xFFFFFFFF,                                          0,                                                     0x01820008 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-
-static const struct vega12_didt_config_reg SEEDCStallPatternConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ */
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_1_2,   0xFFFFFFFF,                                          0,                                                     0x00030001 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_3_4,   0xFFFFFFFF,                                          0,                                                     0x000F0007 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_5_6,   0xFFFFFFFF,                                          0,                                                     0x003F001F },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_7,     0xFFFFFFFF,                                          0,                                                     0x0000007F },
-	/* TD */
-	{   ixDIDT_TD_EDC_STALL_PATTERN_1_2,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_TD_EDC_STALL_PATTERN_3_4,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_TD_EDC_STALL_PATTERN_5_6,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_TD_EDC_STALL_PATTERN_7,     0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	/* TCP */
-	{   ixDIDT_TCP_EDC_STALL_PATTERN_1_2,   0xFFFFFFFF,                                         0,                                                     0x00000000 },
-	{   ixDIDT_TCP_EDC_STALL_PATTERN_3_4,   0xFFFFFFFF,                                         0,                                                     0x00000000 },
-	{   ixDIDT_TCP_EDC_STALL_PATTERN_5_6,   0xFFFFFFFF,                                         0,                                                     0x00000000 },
-	{   ixDIDT_TCP_EDC_STALL_PATTERN_7,     0xFFFFFFFF,                                         0,                                                     0x00000000 },
-	/* DB */
-	{   ixDIDT_DB_EDC_STALL_PATTERN_1_2,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_DB_EDC_STALL_PATTERN_3_4,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_DB_EDC_STALL_PATTERN_5_6,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_DB_EDC_STALL_PATTERN_7,     0xFFFFFFFF,                                          0,                                                     0x00000000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEEDCForceStallPatternConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ */
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_1_2,   0xFFFFFFFF,                                          0,                                                     0x00000015 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_3_4,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_5_6,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_7,     0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	/* TD */
-	{   ixDIDT_TD_EDC_STALL_PATTERN_1_2,   0xFFFFFFFF,                                          0,                                                     0x00000015 },
-	{   ixDIDT_TD_EDC_STALL_PATTERN_3_4,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_TD_EDC_STALL_PATTERN_5_6,   0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_TD_EDC_STALL_PATTERN_7,     0xFFFFFFFF,                                          0,                                                     0x00000000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEEDCStallDelayConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ */
-	{   ixDIDT_SQ_EDC_STALL_DELAY_1,       0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_SQ_EDC_STALL_DELAY_2,       0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	/* TD */
-	{   ixDIDT_TD_EDC_STALL_DELAY_1,       0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_TD_EDC_STALL_DELAY_2,       0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	/* TCP */
-	{   ixDIDT_TCP_EDC_STALL_DELAY_1,      0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	{   ixDIDT_TCP_EDC_STALL_DELAY_2,      0xFFFFFFFF,                                          0,                                                     0x00000000 },
-	/* DB */
-	{   ixDIDT_DB_EDC_STALL_DELAY_1,       0xFFFFFFFF,                                          0,                                                     0x00000000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEEDCThresholdConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   ixDIDT_SQ_EDC_THRESHOLD,           0xFFFFFFFF,                                          0,                                                     0x0000010E },
-	{   ixDIDT_TD_EDC_THRESHOLD,           0xFFFFFFFF,                                          0,                                                     0xFFFFFFFF },
-	{   ixDIDT_TCP_EDC_THRESHOLD,          0xFFFFFFFF,                                          0,                                                     0xFFFFFFFF },
-	{   ixDIDT_DB_EDC_THRESHOLD,           0xFFFFFFFF,                                          0,                                                     0xFFFFFFFF },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEEDCCtrlResetConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ */
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_EN_MASK,                       DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT,                        0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK,                   DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT,                    0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,          DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK,              DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT,               0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,  DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,   0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK,   DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT,    0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,     DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,      0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK,                    DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT,                     0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK,          DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEEDCCtrlConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ */
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_EN_MASK,                       DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT,                        0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK,                   DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT,                    0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,          DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK,              DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT,               0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,  DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,   0x0004 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK,   DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT,    0x0006 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,     DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,      0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK,                    DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT,                     0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK,          DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT,          0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg SEEDCCtrlForceStallConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ */
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_EN_MASK,                       DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT,                        0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK,                   DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT,                    0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,          DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK,              DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT,               0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,  DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,   0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK,   DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT,    0x000C },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,     DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,      0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK,                    DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT,                     0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK,          DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT,          0x0001 },
-
-	/* TD */
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__EDC_EN_MASK,                       DIDT_TD_EDC_CTRL__EDC_EN__SHIFT,                        0x0000 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__EDC_SW_RST_MASK,                   DIDT_TD_EDC_CTRL__EDC_SW_RST__SHIFT,                    0x0000 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,          DIDT_TD_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,           0x0000 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__EDC_FORCE_STALL_MASK,              DIDT_TD_EDC_CTRL__EDC_FORCE_STALL__SHIFT,               0x0001 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,  DIDT_TD_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,   0x0001 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK,   DIDT_TD_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT,    0x000E },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,     DIDT_TD_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,      0x0000 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__GC_EDC_EN_MASK,                    DIDT_TD_EDC_CTRL__GC_EDC_EN__SHIFT,                     0x0000 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__GC_EDC_STALL_POLICY_MASK,          DIDT_TD_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT,           0x0000 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK,         DIDT_TD_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-	{   ixDIDT_TD_EDC_CTRL,                DIDT_TD_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK,         DIDT_TD_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT,          0x0001 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg    GCDiDtDroopCtrlConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   mmGC_DIDT_DROOP_CTRL,             GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_EN_MASK,   GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_EN__SHIFT,  0x0000 },
-	{   mmGC_DIDT_DROOP_CTRL,             GC_DIDT_DROOP_CTRL__DIDT_DROOP_THRESHOLD_MASK,   GC_DIDT_DROOP_CTRL__DIDT_DROOP_THRESHOLD__SHIFT,  0x0000 },
-	{   mmGC_DIDT_DROOP_CTRL,             GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_INDEX_MASK,   GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_INDEX__SHIFT,  0x0000 },
-	{   mmGC_DIDT_DROOP_CTRL,             GC_DIDT_DROOP_CTRL__DIDT_LEVEL_SEL_MASK,   GC_DIDT_DROOP_CTRL__DIDT_LEVEL_SEL__SHIFT,  0x0000 },
-	{   mmGC_DIDT_DROOP_CTRL,             GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_OVERFLOW_MASK,   GC_DIDT_DROOP_CTRL__DIDT_DROOP_LEVEL_OVERFLOW__SHIFT,  0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg    GCDiDtCtrl0Config_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   mmGC_DIDT_CTRL0,                  GC_DIDT_CTRL0__DIDT_CTRL_EN_MASK,   GC_DIDT_CTRL0__DIDT_CTRL_EN__SHIFT,  0x0000 },
-	{   mmGC_DIDT_CTRL0,                  GC_DIDT_CTRL0__PHASE_OFFSET_MASK,   GC_DIDT_CTRL0__PHASE_OFFSET__SHIFT,  0x0000 },
-	{   mmGC_DIDT_CTRL0,                  GC_DIDT_CTRL0__DIDT_SW_RST_MASK,   GC_DIDT_CTRL0__DIDT_SW_RST__SHIFT,  0x0000 },
-	{   mmGC_DIDT_CTRL0,                  GC_DIDT_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,   GC_DIDT_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,  0x0000 },
-	{   mmGC_DIDT_CTRL0,                  GC_DIDT_CTRL0__DIDT_TRIGGER_THROTTLE_LOWBIT_MASK,   GC_DIDT_CTRL0__DIDT_TRIGGER_THROTTLE_LOWBIT__SHIFT,  0x0000 },
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-
-static const struct vega12_didt_config_reg   PSMSEEDCStallPatternConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ EDC STALL PATTERNs */
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_1_2,  DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_1_MASK,   DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_1__SHIFT,   0x0101 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_1_2,  DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_2_MASK,   DIDT_SQ_EDC_STALL_PATTERN_1_2__EDC_STALL_PATTERN_2__SHIFT,   0x0101 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_3_4,  DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_3_MASK,   DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_3__SHIFT,   0x1111 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_3_4,  DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_4_MASK,   DIDT_SQ_EDC_STALL_PATTERN_3_4__EDC_STALL_PATTERN_4__SHIFT,   0x1111 },
-
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_5_6,  DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_5_MASK,   DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_5__SHIFT,   0x1515 },
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_5_6,  DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_6_MASK,   DIDT_SQ_EDC_STALL_PATTERN_5_6__EDC_STALL_PATTERN_6__SHIFT,   0x1515 },
-
-	{   ixDIDT_SQ_EDC_STALL_PATTERN_7,  DIDT_SQ_EDC_STALL_PATTERN_7__EDC_STALL_PATTERN_7_MASK,   DIDT_SQ_EDC_STALL_PATTERN_7__EDC_STALL_PATTERN_7__SHIFT,     0x5555 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMSEEDCStallDelayConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ EDC STALL DELAYs */
-	{   ixDIDT_SQ_EDC_STALL_DELAY_1,      DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ0_MASK,  DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ0__SHIFT,  0x0000 },
-	{   ixDIDT_SQ_EDC_STALL_DELAY_1,      DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ1_MASK,  DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ1__SHIFT,  0x0000 },
-	{   ixDIDT_SQ_EDC_STALL_DELAY_1,      DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ2_MASK,  DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ2__SHIFT,  0x0000 },
-	{   ixDIDT_SQ_EDC_STALL_DELAY_1,      DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ3_MASK,  DIDT_SQ_EDC_STALL_DELAY_1__EDC_STALL_DELAY_SQ3__SHIFT,  0x0000 },
-
-	{   ixDIDT_SQ_EDC_STALL_DELAY_2,      DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ4_MASK,  DIDT_SQ_EDC_STALL_DELAY_2__EDC_STALL_DELAY_SQ4__SHIFT,  0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMSEEDCThresholdConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ EDC THRESHOLD */
-	{   ixDIDT_SQ_EDC_THRESHOLD,           DIDT_SQ_EDC_THRESHOLD__EDC_THRESHOLD_MASK,           DIDT_SQ_EDC_THRESHOLD__EDC_THRESHOLD__SHIFT,            0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMSEEDCCtrlResetConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ EDC CTRL */
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_EN_MASK,                       DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT,                        0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK,                   DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT,                    0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,          DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK,              DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT,               0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,  DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,   0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK,   DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT,    0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,     DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,      0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK,                    DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT,                     0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK,          DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMSEEDCCtrlConfig_Vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	/* SQ EDC CTRL */
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_EN_MASK,                       DIDT_SQ_EDC_CTRL__EDC_EN__SHIFT,                        0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_SW_RST_MASK,                   DIDT_SQ_EDC_CTRL__EDC_SW_RST__SHIFT,                    0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,          DIDT_SQ_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,           0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL_MASK,              DIDT_SQ_EDC_CTRL__EDC_FORCE_STALL__SHIFT,               0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,  DIDT_SQ_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,   0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS_MASK,   DIDT_SQ_EDC_CTRL__EDC_STALL_PATTERN_BIT_NUMS__SHIFT,    0x000E },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,     DIDT_SQ_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,      0x0000 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_EN_MASK,                    DIDT_SQ_EDC_CTRL__GC_EDC_EN__SHIFT,                     0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY_MASK,          DIDT_SQ_EDC_CTRL__GC_EDC_STALL_POLICY__SHIFT,           0x0003 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__GC_EDC_LEVEL_COMB_EN__SHIFT,          0x0001 },
-	{   ixDIDT_SQ_EDC_CTRL,                DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN_MASK,         DIDT_SQ_EDC_CTRL__SE_EDC_LEVEL_COMB_EN__SHIFT,          0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMGCEDCThresholdConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   mmGC_EDC_THRESHOLD,                GC_EDC_THRESHOLD__EDC_THRESHOLD_MASK,                GC_EDC_THRESHOLD__EDC_THRESHOLD__SHIFT,                 0x0000000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMGCEDCDroopCtrlConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   mmGC_EDC_DROOP_CTRL,               GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_EN_MASK,          GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_EN__SHIFT,           0x0001 },
-	{   mmGC_EDC_DROOP_CTRL,               GC_EDC_DROOP_CTRL__EDC_DROOP_THRESHOLD_MASK,         GC_EDC_DROOP_CTRL__EDC_DROOP_THRESHOLD__SHIFT,          0x0384 },
-	{   mmGC_EDC_DROOP_CTRL,               GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_INDEX_MASK,       GC_EDC_DROOP_CTRL__EDC_DROOP_LEVEL_INDEX__SHIFT,        0x0001 },
-	{   mmGC_EDC_DROOP_CTRL,               GC_EDC_DROOP_CTRL__AVG_PSM_SEL_MASK,                 GC_EDC_DROOP_CTRL__AVG_PSM_SEL__SHIFT,                  0x0001 },
-	{   mmGC_EDC_DROOP_CTRL,               GC_EDC_DROOP_CTRL__EDC_LEVEL_SEL_MASK,               GC_EDC_DROOP_CTRL__EDC_LEVEL_SEL__SHIFT,                0x0001 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMGCEDCCtrlResetConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_EN_MASK,                            GC_EDC_CTRL__EDC_EN__SHIFT,                             0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_SW_RST_MASK,                        GC_EDC_CTRL__EDC_SW_RST__SHIFT,                         0x0001 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,               GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,                0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_FORCE_STALL_MASK,                   GC_EDC_CTRL__EDC_FORCE_STALL__SHIFT,                    0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,       GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,        0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,          GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,           0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg   PSMGCEDCCtrlConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_EN_MASK,                            GC_EDC_CTRL__EDC_EN__SHIFT,                             0x0001 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_SW_RST_MASK,                        GC_EDC_CTRL__EDC_SW_RST__SHIFT,                         0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE_MASK,               GC_EDC_CTRL__EDC_CLK_EN_OVERRIDE__SHIFT,                0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_FORCE_STALL_MASK,                   GC_EDC_CTRL__EDC_FORCE_STALL__SHIFT,                    0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT_MASK,       GC_EDC_CTRL__EDC_TRIGGER_THROTTLE_LOWBIT__SHIFT,        0x0000 },
-	{   mmGC_EDC_CTRL,                     GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA_MASK,          GC_EDC_CTRL__EDC_ALLOW_WRITE_PWRDELTA__SHIFT,           0x0000 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg    AvfsPSMResetConfig_vega12[]=
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   0x16A02,                         0xFFFFFFFF,                                            0x0,                                                    0x0000005F },
-	{   0x16A05,                         0xFFFFFFFF,                                            0x0,                                                    0x00000001 },
-	{   0x16A06,                         0x00000001,                                            0x0,                                                    0x02000000 },
-	{   0x16A01,                         0xFFFFFFFF,                                            0x0,                                                    0x00003027 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static const struct vega12_didt_config_reg    AvfsPSMInitConfig_vega12[] =
-{
-/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- *      Offset                             Mask                                                 Shift                                                  Value
- * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- */
-	{   0x16A05,                         0xFFFFFFFF,                                            0x18,                                                    0x00000001 },
-	{   0x16A05,                         0xFFFFFFFF,                                            0x8,                                                     0x00000003 },
-	{   0x16A05,                         0xFFFFFFFF,                                            0xa,                                                     0x00000006 },
-	{   0x16A05,                         0xFFFFFFFF,                                            0x7,                                                     0x00000000 },
-	{   0x16A06,                         0xFFFFFFFF,                                            0x18,                                                    0x00000001 },
-	{   0x16A06,                         0xFFFFFFFF,                                            0x19,                                                    0x00000001 },
-	{   0x16A01,                         0xFFFFFFFF,                                            0x0,                                                     0x00003027 },
-
-	{   0xFFFFFFFF  }  /* End of list */
-};
-
-static int vega12_program_didt_config_registers(struct pp_hwmgr *hwmgr, const struct vega12_didt_config_reg *config_regs, enum vega12_didt_config_reg_type reg_type)
-{
-	uint32_t data;
-
-	PP_ASSERT_WITH_CODE((config_regs != NULL), "[vega12_program_didt_config_registers] Invalid config register table!", return -EINVAL);
-
-	while (config_regs->offset != 0xFFFFFFFF) {
-		switch (reg_type) {
-		case VEGA12_CONFIGREG_DIDT:
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset);
-			data &= ~config_regs->mask;
-			data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset, data);
-			break;
-		case VEGA12_CONFIGREG_GCCAC:
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset);
-			data &= ~config_regs->mask;
-			data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset, data);
-			break;
-		case VEGA12_CONFIGREG_SECAC:
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG_SE_CAC, config_regs->offset);
-			data &= ~config_regs->mask;
-			data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG_SE_CAC, config_regs->offset, data);
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		config_regs++;
-	}
-
-	return 0;
-}
-
-static int vega12_program_gc_didt_config_registers(struct pp_hwmgr *hwmgr, const struct vega12_didt_config_reg *config_regs)
-{
-	uint32_t data;
-
-	while (config_regs->offset != 0xFFFFFFFF) {
-		data = cgs_read_register(hwmgr->device, config_regs->offset);
-		data &= ~config_regs->mask;
-		data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
-		cgs_write_register(hwmgr->device, config_regs->offset, data);
-		config_regs++;
-	}
-
-	return 0;
-}
-
-static void vega12_didt_set_mask(struct pp_hwmgr *hwmgr, const bool enable)
-{
-	uint32_t data;
-	int result;
-	uint32_t en = (enable ? 1 : 0);
-	uint32_t didt_block_info = SQ_IR_MASK | TCP_IR_MASK | TD_PCC_MASK;
-
-	if (PP_CAP(PHM_PlatformCaps_SQRamping)) {
-		CGS_WREG32_FIELD_IND(hwmgr->device, CGS_IND_REG__DIDT,
-				     DIDT_SQ_CTRL0, DIDT_CTRL_EN, en);
-		didt_block_info &= ~SQ_Enable_MASK;
-		didt_block_info |= en << SQ_Enable_SHIFT;
-	}
-
-	if (PP_CAP(PHM_PlatformCaps_DBRamping)) {
-		CGS_WREG32_FIELD_IND(hwmgr->device, CGS_IND_REG__DIDT,
-				     DIDT_DB_CTRL0, DIDT_CTRL_EN, en);
-		didt_block_info &= ~DB_Enable_MASK;
-		didt_block_info |= en << DB_Enable_SHIFT;
-	}
-
-	if (PP_CAP(PHM_PlatformCaps_TDRamping)) {
-		CGS_WREG32_FIELD_IND(hwmgr->device, CGS_IND_REG__DIDT,
-				     DIDT_TD_CTRL0, DIDT_CTRL_EN, en);
-		didt_block_info &= ~TD_Enable_MASK;
-		didt_block_info |= en << TD_Enable_SHIFT;
-	}
-
-	if (PP_CAP(PHM_PlatformCaps_TCPRamping)) {
-		CGS_WREG32_FIELD_IND(hwmgr->device, CGS_IND_REG__DIDT,
-				     DIDT_TCP_CTRL0, DIDT_CTRL_EN, en);
-		didt_block_info &= ~TCP_Enable_MASK;
-		didt_block_info |= en << TCP_Enable_SHIFT;
-	}
-
-#if 0
-	if (PP_CAP(PHM_PlatformCaps_DBRRamping)) {
-		CGS_WREG32_FIELD_IND(hwmgr->device, CGS_IND_REG__DIDT,
-				     DIDT_DBR_CTRL0, DIDT_CTRL_EN, en);
-	}
-#endif
-
-	if (PP_CAP(PHM_PlatformCaps_DiDtEDCEnable)) {
-		if (PP_CAP(PHM_PlatformCaps_SQRamping)) {
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_SQ_EDC_CTRL, EDC_SW_RST, ~en);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_EDC_CTRL, data);
-		}
-
-		if (PP_CAP(PHM_PlatformCaps_DBRamping)) {
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_DB_EDC_CTRL, EDC_SW_RST, ~en);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_EDC_CTRL, data);
-		}
-
-		if (PP_CAP(PHM_PlatformCaps_TDRamping)) {
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_TD_EDC_CTRL, EDC_SW_RST, ~en);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_EDC_CTRL, data);
-		}
-
-		if (PP_CAP(PHM_PlatformCaps_TCPRamping)) {
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_TCP_EDC_CTRL, EDC_SW_RST, ~en);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_EDC_CTRL, data);
-		}
-
-#if 0
-		if (PP_CAP(PHM_PlatformCaps_DBRRamping)) {
-			data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL);
-			data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_EN, en);
-			data = CGS_REG_SET_FIELD(data, DIDT_DBR_EDC_CTRL, EDC_SW_RST, ~en);
-			cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DBR_EDC_CTRL, data);
-		}
-#endif
-	}
-
-	if (enable) {
-		/* For Vega12, SMC does not support any mask yet. */
-		result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_ConfigureGfxDidt, didt_block_info);
-		PP_ASSERT((0 == result), "[EnableDiDtConfig] SMC Configure Gfx Didt Failed!");
-	}
-}
-
-static int vega12_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr)
-{
-	int result;
-	uint32_t num_se = 0, count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
-
-	num_se = adev->gfx.config.max_shader_engines;
-
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
-	for (count = 0; count < num_se; count++) {
-		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
-
-		result =  vega12_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtCtrl1Config_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtCtrl2Config_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtCtrl3Config_vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtTuningCtrlConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SELCacConfig_Vega12, VEGA12_CONFIGREG_SECAC);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtCtrl0Config_Vega12, VEGA12_CONFIGREG_DIDT);
-
-		if (0 != result)
-			break;
-	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
-
-	vega12_didt_set_mask(hwmgr, true);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	return 0;
-}
-
-static int vega12_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr)
-{
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	vega12_didt_set_mask(hwmgr, false);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	return 0;
-}
-
-static int vega12_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
-{
-	int result;
-	uint32_t num_se = 0, count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
-
-	num_se = adev->gfx.config.max_shader_engines;
-
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
-	for (count = 0; count < num_se; count++) {
-		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
-
-		result = vega12_program_didt_config_registers(hwmgr, SEDiDtStallCtrlConfig_vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtStallPatternConfig_vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtCtrl3Config_vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEDiDtCtrl0Config_Vega12, VEGA12_CONFIGREG_DIDT);
-		if (0 != result)
-			break;
-	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
-
-	vega12_didt_set_mask(hwmgr, true);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	vega12_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega12);
-	if (PP_CAP(PHM_PlatformCaps_GCEDC))
-		vega12_program_gc_didt_config_registers(hwmgr, GCDiDtCtrl0Config_vega12);
-
-	if (PP_CAP(PHM_PlatformCaps_PSM))
-		vega12_program_gc_didt_config_registers(hwmgr,  AvfsPSMInitConfig_vega12);
-
-	return 0;
-}
-
-static int vega12_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr)
-{
-	uint32_t data;
-
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	vega12_didt_set_mask(hwmgr, false);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	if (PP_CAP(PHM_PlatformCaps_GCEDC)) {
-		data = 0x00000000;
-		cgs_write_register(hwmgr->device, mmGC_DIDT_CTRL0, data);
-	}
-
-	if (PP_CAP(PHM_PlatformCaps_PSM))
-		vega12_program_gc_didt_config_registers(hwmgr,  AvfsPSMResetConfig_vega12);
-
-	return 0;
-}
-
-static int vega12_enable_se_edc_config(struct pp_hwmgr *hwmgr)
-{
-	int result;
-	uint32_t num_se = 0, count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
-
-	num_se = adev->gfx.config.max_shader_engines;
-
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
-	for (count = 0; count < num_se; count++) {
-		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
-		result = vega12_program_didt_config_registers(hwmgr, SEDiDtWeightConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEEDCStallPatternConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEEDCStallDelayConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEEDCThresholdConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEEDCCtrlResetConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, SEEDCCtrlConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-
-		if (0 != result)
-			break;
-	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
-
-	vega12_didt_set_mask(hwmgr, true);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	return 0;
-}
-
-static int vega12_disable_se_edc_config(struct pp_hwmgr *hwmgr)
-{
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	vega12_didt_set_mask(hwmgr, false);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	return 0;
-}
-
-static int vega12_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
-{
-	int result;
-	uint32_t num_se = 0;
-	uint32_t count, data;
-	struct amdgpu_device *adev = hwmgr->adev;
-	uint32_t reg;
-
-	num_se = adev->gfx.config.max_shader_engines;
-
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	vega12_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega12);
-
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
-	for (count = 0; count < num_se; count++) {
-		data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK | GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | ( count << GRBM_GFX_INDEX__SE_INDEX__SHIFT);
-		cgs_write_register(hwmgr->device, reg, data);
-		result |= vega12_program_didt_config_registers(hwmgr, PSMSEEDCStallPatternConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, PSMSEEDCStallDelayConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, PSMSEEDCCtrlResetConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-		result |= vega12_program_didt_config_registers(hwmgr, PSMSEEDCCtrlConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-
-		if (0 != result)
-			break;
-	}
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
-
-	vega12_didt_set_mask(hwmgr, true);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	vega12_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega12);
-
-	if (PP_CAP(PHM_PlatformCaps_GCEDC)) {
-		vega12_program_gc_didt_config_registers(hwmgr, PSMGCEDCCtrlResetConfig_vega12);
-		vega12_program_gc_didt_config_registers(hwmgr, PSMGCEDCCtrlConfig_vega12);
-	}
-
-	if (PP_CAP(PHM_PlatformCaps_PSM))
-		vega12_program_gc_didt_config_registers(hwmgr,  AvfsPSMInitConfig_vega12);
-
-	return 0;
-}
-
-static int vega12_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
-{
-	uint32_t data;
-
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	vega12_didt_set_mask(hwmgr, false);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	if (PP_CAP(PHM_PlatformCaps_GCEDC)) {
-		data = 0x00000000;
-		cgs_write_register(hwmgr->device, mmGC_EDC_CTRL, data);
-	}
-
-	if (PP_CAP(PHM_PlatformCaps_PSM))
-		vega12_program_gc_didt_config_registers(hwmgr,  AvfsPSMResetConfig_vega12);
-
-	return 0;
-}
-
-static int vega12_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr)
-{
-	uint32_t reg;
-	int result;
-
-	cgs_enter_safe_mode(hwmgr->device, true);
-
-	cgs_lock_grbm_idx(hwmgr->device, true);
-	reg = soc15_get_register_offset(GC_HWID, 0, mmGRBM_GFX_INDEX_BASE_IDX, mmGRBM_GFX_INDEX);
-	cgs_write_register(hwmgr->device, reg, 0xE0000000);
-	cgs_lock_grbm_idx(hwmgr->device, false);
-
-	result = vega12_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-	result |= vega12_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega12, VEGA12_CONFIGREG_DIDT);
-	if (0 != result)
-		return result;
-
-	vega12_didt_set_mask(hwmgr, false);
-
-	cgs_enter_safe_mode(hwmgr->device, false);
-
-	return 0;
-}
-
-static int vega12_disable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr)
-{
-	int result;
-
-	result = vega12_disable_se_edc_config(hwmgr);
-	PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDtConfig] Pre DIDT disable clock gating failed!", return result);
-
-	return 0;
-}
-
-int vega12_enable_didt_config(struct pp_hwmgr *hwmgr)
-{
-	int result = 0;
-	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
-
-	if (data->smu_features[GNLD_DIDT].supported) {
-		if (data->smu_features[GNLD_DIDT].enabled)
-			PP_DBG_LOG("[EnableDiDtConfig] Feature DiDt Already enabled!\n");
-
-		switch (data->registry_data.didt_mode) {
-		case 0:
-			result = vega12_enable_cac_driving_se_didt_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 0 Failed!", return result);
-			break;
-		case 2:
-			result = vega12_enable_psm_gc_didt_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 2 Failed!", return result);
-			break;
-		case 3:
-			result = vega12_enable_se_edc_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 3 Failed!", return result);
-			break;
-		case 1:
-		case 4:
-		case 5:
-			result = vega12_enable_psm_gc_edc_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 5 Failed!", return result);
-			break;
-		case 6:
-			result = vega12_enable_se_edc_force_stall_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDt] Attempt to enable DiDt Mode 6 Failed!", return result);
-			break;
-		default:
-			result = -EINVAL;
-			break;
-		}
-
-#if 0
-		if (0 == result) {
-			result = vega12_enable_smc_features(hwmgr, true, data->smu_features[GNLD_DIDT].smu_feature_bitmap);
-			PP_ASSERT_WITH_CODE((0 == result), "[EnableDiDtConfig] Attempt to Enable DiDt feature Failed!", return result);
-			data->smu_features[GNLD_DIDT].enabled = true;
-		}
-#endif
-	}
-
-	return result;
-}
-
-int vega12_disable_didt_config(struct pp_hwmgr *hwmgr)
-{
-	int result = 0;
-	struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
-
-	if (data->smu_features[GNLD_DIDT].supported) {
-		if (!data->smu_features[GNLD_DIDT].enabled)
-			PP_DBG_LOG("[DisableDiDtConfig] Feature DiDt Already Disabled!\n");
-
-		switch (data->registry_data.didt_mode) {
-		case 0:
-			result = vega12_disable_cac_driving_se_didt_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 0 Failed!", return result);
-			break;
-		case 2:
-			result = vega12_disable_psm_gc_didt_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 2 Failed!", return result);
-			break;
-		case 3:
-			result = vega12_disable_se_edc_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 3 Failed!", return result);
-			break;
-		case 1:
-		case 4:
-		case 5:
-			result = vega12_disable_psm_gc_edc_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 5 Failed!", return result);
-			break;
-		case 6:
-			result = vega12_disable_se_edc_force_stall_config(hwmgr);
-			PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDt] Attempt to disable DiDt Mode 6 Failed!", return result);
-			break;
-		default:
-			result = -EINVAL;
-			break;
-		}
-
-		if (0 == result) {
-			result = vega12_enable_smc_features(hwmgr, false, data->smu_features[GNLD_DIDT].smu_feature_bitmap);
-			PP_ASSERT_WITH_CODE((0 == result), "[DisableDiDtConfig] Attempt to Disable DiDt feature Failed!", return result);
-			data->smu_features[GNLD_DIDT].enabled = false;
-		}
-	}
-
-	return result;
-}
-
-int vega12_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
-{
-	struct vega12_hwmgr *data =
-			(struct vega12_hwmgr *)(hwmgr->backend);
-
-	if (data->smu_features[GNLD_PPT].enabled)
-		return smum_send_msg_to_smc_with_parameter(hwmgr,
-				PPSMC_MSG_SetPptLimit, n);
-
-	return 0;
-}
-
-int vega12_enable_power_containment(struct pp_hwmgr *hwmgr)
-{
-	struct vega12_hwmgr *data =
-			(struct vega12_hwmgr *)(hwmgr->backend);
-	struct phm_ppt_v2_information *table_info =
-			(struct phm_ppt_v2_information *)(hwmgr->pptable);
-	struct phm_tdp_table *tdp_table = table_info->tdp_table;
-	uint32_t default_pwr_limit =
-			(uint32_t)(tdp_table->usMaximumPowerDeliveryLimit);
-	int result = 0;
-
-	if (PP_CAP(PHM_PlatformCaps_PowerContainment)) {
-		if (data->smu_features[GNLD_PPT].supported)
-			PP_ASSERT_WITH_CODE(!vega12_enable_smc_features(hwmgr,
-					true, data->smu_features[GNLD_PPT].smu_feature_bitmap),
-					"Attempt to enable PPT feature Failed!",
-					data->smu_features[GNLD_PPT].supported = false);
-
-		if (data->smu_features[GNLD_TDC].supported)
-			PP_ASSERT_WITH_CODE(!vega12_enable_smc_features(hwmgr,
-					true, data->smu_features[GNLD_TDC].smu_feature_bitmap),
-					"Attempt to enable PPT feature Failed!",
-					data->smu_features[GNLD_TDC].supported = false);
-
-		result = vega12_set_power_limit(hwmgr, default_pwr_limit);
-		PP_ASSERT_WITH_CODE(!result,
-				"Failed to set Default Power Limit in SMC!",
-				return result);
-	}
-
-	return result;
-}
-
-int vega12_disable_power_containment(struct pp_hwmgr *hwmgr)
-{
-	struct vega12_hwmgr *data =
-			(struct vega12_hwmgr *)(hwmgr->backend);
-
-	if (PP_CAP(PHM_PlatformCaps_PowerContainment)) {
-		if (data->smu_features[GNLD_PPT].supported)
-			PP_ASSERT_WITH_CODE(!vega12_enable_smc_features(hwmgr,
-					false, data->smu_features[GNLD_PPT].smu_feature_bitmap),
-					"Attempt to disable PPT feature Failed!",
-					data->smu_features[GNLD_PPT].supported = false);
-
-		if (data->smu_features[GNLD_TDC].supported)
-			PP_ASSERT_WITH_CODE(!vega12_enable_smc_features(hwmgr,
-					false, data->smu_features[GNLD_TDC].smu_feature_bitmap),
-					"Attempt to disable PPT feature Failed!",
-					data->smu_features[GNLD_TDC].supported = false);
-	}
-
-	return 0;
-}
-
-static int vega12_set_overdrive_target_percentage(struct pp_hwmgr *hwmgr,
-		uint32_t adjust_percent)
-{
-	return smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_OverDriveSetPercentage, adjust_percent);
-}
-
-int vega12_power_control_set_level(struct pp_hwmgr *hwmgr)
-{
-	int adjust_percent, result = 0;
-
-	if (PP_CAP(PHM_PlatformCaps_PowerContainment)) {
-		adjust_percent =
-				hwmgr->platform_descriptor.TDPAdjustmentPolarity ?
-				hwmgr->platform_descriptor.TDPAdjustment :
-				(-1 * hwmgr->platform_descriptor.TDPAdjustment);
-		result = vega12_set_overdrive_target_percentage(hwmgr,
-				(uint32_t)adjust_percent);
-	}
-	return result;
-}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_powertune.h
deleted file mode 100644
index 78d31a6747dd..000000000000
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_powertune.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#ifndef _VEGA12_POWERTUNE_H_
-#define _VEGA12_POWERTUNE_H_
-
-enum vega12_didt_config_reg_type {
-	VEGA12_CONFIGREG_DIDT = 0,
-	VEGA12_CONFIGREG_GCCAC,
-	VEGA12_CONFIGREG_SECAC
-};
-
-/* PowerContainment Features */
-#define POWERCONTAINMENT_FEATURE_DTE             0x00000001
-#define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
-#define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
-
-struct vega12_didt_config_reg {
-	uint32_t		offset;
-	uint32_t		mask;
-	uint32_t		shift;
-	uint32_t		value;
-};
-
-int vega12_enable_power_containment(struct pp_hwmgr *hwmgr);
-int vega12_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
-int vega12_power_control_set_level(struct pp_hwmgr *hwmgr);
-int vega12_disable_power_containment(struct pp_hwmgr *hwmgr);
-
-int vega12_enable_didt_config(struct pp_hwmgr *hwmgr);
-int vega12_disable_didt_config(struct pp_hwmgr *hwmgr);
-
-#endif  /* _VEGA12_POWERTUNE_H_ */
-
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
index e7d794980b84..b34113f45904 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
@@ -208,9 +208,9 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
 	ppsmc_pptable->LedPin1 = smc_dpm_table.ledpin1;
 	ppsmc_pptable->LedPin2 = smc_dpm_table.ledpin2;
 
-	ppsmc_pptable->GfxclkSpreadEnabled = smc_dpm_table.gfxclkspreadenabled;
-	ppsmc_pptable->GfxclkSpreadPercent = smc_dpm_table.gfxclkspreadpercent;
-	ppsmc_pptable->GfxclkSpreadFreq = smc_dpm_table.gfxclkspreadfreq;
+	ppsmc_pptable->PllGfxclkSpreadEnabled = smc_dpm_table.pllgfxclkspreadenabled;
+	ppsmc_pptable->PllGfxclkSpreadPercent = smc_dpm_table.pllgfxclkspreadpercent;
+	ppsmc_pptable->PllGfxclkSpreadFreq = smc_dpm_table.pllgfxclkspreadfreq;
 
 	ppsmc_pptable->UclkSpreadEnabled = 0;
 	ppsmc_pptable->UclkSpreadPercent = smc_dpm_table.uclkspreadpercent;
@@ -220,6 +220,11 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
 	ppsmc_pptable->SocclkSpreadPercent = smc_dpm_table.socclkspreadpercent;
 	ppsmc_pptable->SocclkSpreadFreq = smc_dpm_table.socclkspreadfreq;
 
+	ppsmc_pptable->AcgGfxclkSpreadEnabled = smc_dpm_table.acggfxclkspreadenabled;
+	ppsmc_pptable->AcgGfxclkSpreadPercent = smc_dpm_table.acggfxclkspreadpercent;
+	ppsmc_pptable->AcgGfxclkSpreadFreq = smc_dpm_table.acggfxclkspreadfreq;
+
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h
index cd2e503a87da..fb696e3d06cf 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h
@@ -127,7 +127,7 @@
 #define FEATURE_GFX_EDC_MASK            (1 << FEATURE_GFX_EDC_BIT            )
 #define FEATURE_GFXOFF_MASK             (1 << FEATURE_GFXOFF_BIT             )
 #define FEATURE_CG_MASK                 (1 << FEATURE_CG_BIT                 )
-#define FEATURE_ACG_MASK                (1 << FEATURE_ACG_BIT                )
+#define FEATURE_ACG_MASK          (1 << FEATURE_ACG_BIT)
 #define FEATURE_SPARE_29_MASK           (1 << FEATURE_SPARE_29_BIT           )
 #define FEATURE_SPARE_30_MASK           (1 << FEATURE_SPARE_30_BIT           )
 #define FEATURE_SPARE_31_MASK           (1 << FEATURE_SPARE_31_BIT           )
@@ -481,9 +481,9 @@ typedef struct {
   uint8_t      padding8_4;
 
 
-  uint8_t      GfxclkSpreadEnabled;
-  uint8_t      GfxclkSpreadPercent;
-  uint16_t     GfxclkSpreadFreq;
+	uint8_t      PllGfxclkSpreadEnabled;
+	uint8_t      PllGfxclkSpreadPercent;
+	uint16_t     PllGfxclkSpreadFreq;
 
   uint8_t      UclkSpreadEnabled;
   uint8_t      UclkSpreadPercent;
@@ -493,7 +493,11 @@ typedef struct {
   uint8_t      SocclkSpreadPercent;
   uint16_t     SocclkSpreadFreq;
 
-  uint32_t     BoardReserved[3];
+	uint8_t      AcgGfxclkSpreadEnabled;
+	uint8_t      AcgGfxclkSpreadPercent;
+	uint16_t     AcgGfxclkSpreadFreq;
+
+	uint32_t     BoardReserved[10];
 
 
   uint32_t     MmHubPadding[7];
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
index 55cd204c1789..651a3f28734b 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vega12_smumgr.c
@@ -30,8 +30,7 @@
 
 #include "ppatomctrl.h"
 #include "pp_debug.h"
-#include "smu_ucode_xfer_vi.h"
-#include "smu7_smumgr.h"
+
 
 /* MP Apertures */
 #define MP0_Public                  0x03800000
@@ -392,8 +391,7 @@ static int vega12_smu_init(struct pp_hwmgr *hwmgr)
 	struct cgs_firmware_info info = {0};
 	int ret;
 
-	ret = cgs_get_firmware_info(hwmgr->device,
-				smu7_convert_fw_type_to_cgs(UCODE_ID_SMU),
+	ret = cgs_get_firmware_info(hwmgr->device, CGS_UCODE_ID_SMU,
 				&info);
 	if (ret || !info.kptr)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index a4f68affc13b..d39400e5bc42 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -89,14 +89,14 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
 	 */
 	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
 		ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
-				mem_region, mem_phys, mem_size);
+				mem_region, mem_phys, mem_size, NULL);
 	} else {
 		char newname[strlen("qcom/") + strlen(fwname) + 1];
 
 		sprintf(newname, "qcom/%s", fwname);
 
 		ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
-				mem_region, mem_phys, mem_size);
+				mem_region, mem_phys, mem_size, NULL);
 	}
 	if (ret)
 		goto out;
diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c
index fb1c27f69e3a..3d662e6805eb 100644
--- a/drivers/gpu/drm/omapdrm/dss/dpi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dpi.c
@@ -142,7 +142,7 @@ static enum dss_clk_source dpi_get_clk_src(struct dpi_data *dpi)
 }
 
 struct dpi_clk_calc_ctx {
-	struct dss_pll *pll;
+	struct dpi_data *dpi;
 	unsigned int clkout_idx;
 
 	/* inputs */
@@ -191,7 +191,7 @@ static bool dpi_calc_hsdiv_cb(int m_dispc, unsigned long dispc,
 	ctx->pll_cinfo.mX[ctx->clkout_idx] = m_dispc;
 	ctx->pll_cinfo.clkout[ctx->clkout_idx] = dispc;
 
-	return dispc_div_calc(ctx->pll->dss->dispc, dispc,
+	return dispc_div_calc(ctx->dpi->dss->dispc, dispc,
 			      ctx->pck_min, ctx->pck_max,
 			      dpi_calc_dispc_cb, ctx);
 }
@@ -208,8 +208,8 @@ static bool dpi_calc_pll_cb(int n, int m, unsigned long fint,
 	ctx->pll_cinfo.fint = fint;
 	ctx->pll_cinfo.clkdco = clkdco;
 
-	return dss_pll_hsdiv_calc_a(ctx->pll, clkdco,
-		ctx->pck_min, dss_get_max_fck_rate(ctx->pll->dss),
+	return dss_pll_hsdiv_calc_a(ctx->dpi->pll, clkdco,
+		ctx->pck_min, dss_get_max_fck_rate(ctx->dpi->dss),
 		dpi_calc_hsdiv_cb, ctx);
 }
 
@@ -219,7 +219,7 @@ static bool dpi_calc_dss_cb(unsigned long fck, void *data)
 
 	ctx->fck = fck;
 
-	return dispc_div_calc(ctx->pll->dss->dispc, fck,
+	return dispc_div_calc(ctx->dpi->dss->dispc, fck,
 			      ctx->pck_min, ctx->pck_max,
 			      dpi_calc_dispc_cb, ctx);
 }
@@ -230,7 +230,7 @@ static bool dpi_pll_clk_calc(struct dpi_data *dpi, unsigned long pck,
 	unsigned long clkin;
 
 	memset(ctx, 0, sizeof(*ctx));
-	ctx->pll = dpi->pll;
+	ctx->dpi = dpi;
 	ctx->clkout_idx = dss_pll_get_clkout_idx_for_src(dpi->clk_src);
 
 	clkin = clk_get_rate(dpi->pll->clkin);
@@ -244,7 +244,7 @@ static bool dpi_pll_clk_calc(struct dpi_data *dpi, unsigned long pck,
 		pll_min = 0;
 		pll_max = 0;
 
-		return dss_pll_calc_a(ctx->pll, clkin,
+		return dss_pll_calc_a(ctx->dpi->pll, clkin,
 				pll_min, pll_max,
 				dpi_calc_pll_cb, ctx);
 	} else { /* DSS_PLL_TYPE_B */
@@ -275,6 +275,7 @@ static bool dpi_dss_clk_calc(struct dpi_data *dpi, unsigned long pck,
 		bool ok;
 
 		memset(ctx, 0, sizeof(*ctx));
+		ctx->dpi = dpi;
 		if (pck > 1000 * i * i * i)
 			ctx->pck_min = max(pck - 1000 * i * i * i, 0lu);
 		else
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e415d2c097a7..48d0e6bd0508 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -140,6 +140,10 @@ static struct radeon_px_quirk radeon_px_quirk_list[] = {
 	 * https://bugs.freedesktop.org/show_bug.cgi?id=101491
 	 */
 	{ PCI_VENDOR_ID_ATI, 0x6741, 0x1043, 0x2122, RADEON_PX_QUIRK_DISABLE_PX },
+	/* Asus K73TK laptop with AMD A6-3420M APU and Radeon 7670m GPU
+	 * https://bugzilla.kernel.org/show_bug.cgi?id=51381#c52
+	 */
+	{ PCI_VENDOR_ID_ATI, 0x6840, 0x1043, 0x2123, RADEON_PX_QUIRK_DISABLE_PX },
 	{ 0, 0, 0, 0, 0 },
 };
 
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 97a0a639dad9..90d5b41007bf 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -5912,9 +5912,9 @@ static void si_set_pcie_lane_width_in_smc(struct radeon_device *rdev,
 {
 	u32 lane_width;
 	u32 new_lane_width =
-		(radeon_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT;
+		((radeon_new_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1;
 	u32 current_lane_width =
-		(radeon_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT;
+		((radeon_current_state->caps & ATOM_PPLIB_PCIE_LINK_WIDTH_MASK) >> ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT) + 1;
 
 	if (new_lane_width != current_lane_width) {
 		radeon_set_pcie_lanes(rdev, new_lane_width);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 447371f4de56..72855182b191 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -31,7 +31,6 @@
 #include <linux/vmalloc.h>
 #include <linux/hyperv.h>
 #include <linux/export.h>
-#include <asm/hyperv.h>
 #include <asm/mshyperv.h>
 
 #include "hyperv_vmbus.h"
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 8137b3885b99..9b82549cbbc8 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -29,7 +29,6 @@
 #include <linux/version.h>
 #include <linux/random.h>
 #include <linux/clockchips.h>
-#include <asm/hyperv.h>
 #include <asm/mshyperv.h>
 #include "hyperv_vmbus.h"
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 36d34fe3ccb3..f761bef36e77 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -27,6 +27,7 @@
 
 #include <linux/list.h>
 #include <asm/sync_bitops.h>
+#include <asm/hyperv-tlfs.h>
 #include <linux/atomic.h>
 #include <linux/hyperv.h>
 #include <linux/interrupt.h>
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index bc65c4d79c1f..b10fe26c4891 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -36,7 +36,6 @@
 #include <linux/cpu.h>
 #include <linux/sched/task_stack.h>
 
-#include <asm/hyperv.h>
 #include <asm/mshyperv.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 033e57366d56..f249a4428458 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1231,8 +1231,9 @@ config SENSORS_NCT6775
 	help
 	  If you say yes here you get support for the hardware monitoring
 	  functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D,
-	  NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This
-	  driver replaces the w83627ehf driver for NCT6775F and NCT6776F.
+	  NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D, and compatible
+	  Super-I/O chips. This driver replaces the w83627ehf driver for
+	  NCT6775F and NCT6776F.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called nct6775.
diff --git a/drivers/hwmon/g762.c b/drivers/hwmon/g762.c
index 6d1208b2b6d2..6c83c385a7ca 100644
--- a/drivers/hwmon/g762.c
+++ b/drivers/hwmon/g762.c
@@ -128,7 +128,6 @@ enum g762_regs {
 			 G762_REG_FAN_CMD2_GEAR_MODE_1)) >> 2))
 
 struct g762_data {
-	struct device *hwmon_dev;
 	struct i2c_client *client;
 	struct clk *clk;
 
@@ -594,6 +593,14 @@ MODULE_DEVICE_TABLE(of, g762_dt_match);
  * call to g762_of_clock_disable(). Note that a reference to clock is kept
  * in our private data structure to be used in this function.
  */
+static void g762_of_clock_disable(void *data)
+{
+	struct g762_data *g762 = data;
+
+	clk_disable_unprepare(g762->clk);
+	clk_put(g762->clk);
+}
+
 static int g762_of_clock_enable(struct i2c_client *client)
 {
 	struct g762_data *data;
@@ -626,6 +633,7 @@ static int g762_of_clock_enable(struct i2c_client *client)
 	data = i2c_get_clientdata(client);
 	data->clk = clk;
 
+	devm_add_action(&client->dev, g762_of_clock_disable, data);
 	return 0;
 
  clk_unprep:
@@ -637,17 +645,6 @@ static int g762_of_clock_enable(struct i2c_client *client)
 	return ret;
 }
 
-static void g762_of_clock_disable(struct i2c_client *client)
-{
-	struct g762_data *data = i2c_get_clientdata(client);
-
-	if (!data->clk)
-		return;
-
-	clk_disable_unprepare(data->clk);
-	clk_put(data->clk);
-}
-
 static int g762_of_prop_import_one(struct i2c_client *client,
 				   const char *pname,
 				   int (*psetter)(struct device *dev,
@@ -698,8 +695,6 @@ static int g762_of_clock_enable(struct i2c_client *client)
 {
 	return 0;
 }
-
-static void g762_of_clock_disable(struct i2c_client *client) { }
 #endif
 
 /*
@@ -1054,6 +1049,7 @@ static inline int g762_fan_init(struct device *dev)
 static int g762_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
+	struct device *hwmon_dev;
 	struct g762_data *data;
 	int ret;
 
@@ -1080,35 +1076,15 @@ static int g762_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return ret;
 	ret = g762_of_prop_import(client);
 	if (ret)
-		goto clock_dis;
+		return ret;
 	/* ... or platform_data */
 	ret = g762_pdata_prop_import(client);
 	if (ret)
-		goto clock_dis;
+		return ret;
 
-	data->hwmon_dev = hwmon_device_register_with_groups(dev, client->name,
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							    data, g762_groups);
-	if (IS_ERR(data->hwmon_dev)) {
-		ret = PTR_ERR(data->hwmon_dev);
-		goto clock_dis;
-	}
-
-	return 0;
-
- clock_dis:
-	g762_of_clock_disable(client);
-
-	return ret;
-}
-
-static int g762_remove(struct i2c_client *client)
-{
-	struct g762_data *data = i2c_get_clientdata(client);
-
-	hwmon_device_unregister(data->hwmon_dev);
-	g762_of_clock_disable(client);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static struct i2c_driver g762_driver = {
@@ -1117,7 +1093,6 @@ static struct i2c_driver g762_driver = {
 		.of_match_table = of_match_ptr(g762_dt_match),
 	},
 	.probe	  = g762_probe,
-	.remove	  = g762_remove,
 	.id_table = g762_id,
 };
 
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index 2a91974a10bb..d40fe5122e94 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -52,6 +52,7 @@
  */
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
 						I2C_CLIENT_END };
+enum chips { lm92, max6635 };
 
 /* The LM92 registers */
 #define LM92_REG_CONFIG			0x01 /* 8-bit, RW */
@@ -259,62 +260,6 @@ static void lm92_init_client(struct i2c_client *client)
 					  config & 0xFE);
 }
 
-/*
- * The MAX6635 has no identification register, so we have to use tricks
- * to identify it reliably. This is somewhat slow.
- * Note that we do NOT rely on the 2 MSB of the configuration register
- * always reading 0, as suggested by the datasheet, because it was once
- * reported not to be true.
- */
-static int max6635_check(struct i2c_client *client)
-{
-	u16 temp_low, temp_high, temp_hyst, temp_crit;
-	u8 conf;
-	int i;
-
-	/*
-	 * No manufacturer ID register, so a read from this address will
-	 * always return the last read value.
-	 */
-	temp_low = i2c_smbus_read_word_data(client, LM92_REG_TEMP_LOW);
-	if (i2c_smbus_read_word_data(client, LM92_REG_MAN_ID) != temp_low)
-		return 0;
-	temp_high = i2c_smbus_read_word_data(client, LM92_REG_TEMP_HIGH);
-	if (i2c_smbus_read_word_data(client, LM92_REG_MAN_ID) != temp_high)
-		return 0;
-
-	/* Limits are stored as integer values (signed, 9-bit). */
-	if ((temp_low & 0x7f00) || (temp_high & 0x7f00))
-		return 0;
-	temp_hyst = i2c_smbus_read_word_data(client, LM92_REG_TEMP_HYST);
-	temp_crit = i2c_smbus_read_word_data(client, LM92_REG_TEMP_CRIT);
-	if ((temp_hyst & 0x7f00) || (temp_crit & 0x7f00))
-		return 0;
-
-	/*
-	 * Registers addresses were found to cycle over 16-byte boundaries.
-	 * We don't test all registers with all offsets so as to save some
-	 * reads and time, but this should still be sufficient to dismiss
-	 * non-MAX6635 chips.
-	 */
-	conf = i2c_smbus_read_byte_data(client, LM92_REG_CONFIG);
-	for (i = 16; i < 96; i *= 2) {
-		if (temp_hyst != i2c_smbus_read_word_data(client,
-				 LM92_REG_TEMP_HYST + i - 16)
-		 || temp_crit != i2c_smbus_read_word_data(client,
-				 LM92_REG_TEMP_CRIT + i)
-		 || temp_low != i2c_smbus_read_word_data(client,
-				LM92_REG_TEMP_LOW + i + 16)
-		 || temp_high != i2c_smbus_read_word_data(client,
-				 LM92_REG_TEMP_HIGH + i + 32)
-		 || conf != i2c_smbus_read_byte_data(client,
-			    LM92_REG_CONFIG + i))
-			return 0;
-	}
-
-	return 1;
-}
-
 static struct attribute *lm92_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
@@ -348,8 +293,6 @@ static int lm92_detect(struct i2c_client *new_client,
 
 	if ((config & 0xe0) == 0x00 && man_id == 0x0180)
 		pr_info("lm92: Found National Semiconductor LM92 chip\n");
-	else if (max6635_check(new_client))
-		pr_info("lm92: Found Maxim MAX6635 chip\n");
 	else
 		return -ENODEV;
 
@@ -387,8 +330,8 @@ static int lm92_probe(struct i2c_client *new_client,
  */
 
 static const struct i2c_device_id lm92_id[] = {
-	{ "lm92", 0 },
-	/* max6635 could be added here */
+	{ "lm92", lm92 },
+	{ "max6635", max6635 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm92_id);
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index c219e43b8f02..aebce560bfaf 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -41,7 +41,7 @@
  * nct6792d    15      6       6       2+6    0xc910 0xc1    0x5ca3
  * nct6793d    15      6       6       2+6    0xd120 0xc1    0x5ca3
  * nct6795d    14      6       6       2+6    0xd350 0xc1    0x5ca3
- *
+ * nct6796d    14      7       7       2+6    0xd420 0xc1    0x5ca3
  *
  * #temp lists the number of monitored temperature sources (first value) plus
  * the number of directly connectable temperature sensors (second value).
@@ -68,7 +68,7 @@
 #define USE_ALTERNATE
 
 enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
-	     nct6795 };
+	     nct6795, nct6796 };
 
 /* used to set data->name = nct6775_device_names[data->sio_kind] */
 static const char * const nct6775_device_names[] = {
@@ -80,6 +80,7 @@ static const char * const nct6775_device_names[] = {
 	"nct6792",
 	"nct6793",
 	"nct6795",
+	"nct6796",
 };
 
 static const char * const nct6775_sio_names[] __initconst = {
@@ -91,6 +92,7 @@ static const char * const nct6775_sio_names[] __initconst = {
 	"NCT6792D",
 	"NCT6793D",
 	"NCT6795D",
+	"NCT6796D",
 };
 
 static unsigned short force_id;
@@ -125,6 +127,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal");
 #define SIO_NCT6792_ID		0xc910
 #define SIO_NCT6793_ID		0xd120
 #define SIO_NCT6795_ID		0xd350
+#define SIO_NCT6796_ID		0xd420
 #define SIO_ID_MASK		0xFFF0
 
 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
@@ -201,7 +204,7 @@ superio_exit(int ioreg)
 #define NUM_REG_ALARM	7	/* Max number of alarm registers */
 #define NUM_REG_BEEP	5	/* Max number of beep registers */
 
-#define NUM_FAN		6
+#define NUM_FAN		7
 
 #define TEMP_SOURCE_VIRTUAL	0x1f
 
@@ -272,26 +275,26 @@ static const u8 NCT6775_PWM_MODE_MASK[] = { 0x01, 0x02, 0x01 };
 /* Advanced Fan control, some values are common for all fans */
 
 static const u16 NCT6775_REG_TARGET[] = {
-	0x101, 0x201, 0x301, 0x801, 0x901, 0xa01 };
+	0x101, 0x201, 0x301, 0x801, 0x901, 0xa01, 0xb01 };
 static const u16 NCT6775_REG_FAN_MODE[] = {
-	0x102, 0x202, 0x302, 0x802, 0x902, 0xa02 };
+	0x102, 0x202, 0x302, 0x802, 0x902, 0xa02, 0xb02 };
 static const u16 NCT6775_REG_FAN_STEP_DOWN_TIME[] = {
-	0x103, 0x203, 0x303, 0x803, 0x903, 0xa03 };
+	0x103, 0x203, 0x303, 0x803, 0x903, 0xa03, 0xb03 };
 static const u16 NCT6775_REG_FAN_STEP_UP_TIME[] = {
-	0x104, 0x204, 0x304, 0x804, 0x904, 0xa04 };
+	0x104, 0x204, 0x304, 0x804, 0x904, 0xa04, 0xb04 };
 static const u16 NCT6775_REG_FAN_STOP_OUTPUT[] = {
-	0x105, 0x205, 0x305, 0x805, 0x905, 0xa05 };
+	0x105, 0x205, 0x305, 0x805, 0x905, 0xa05, 0xb05 };
 static const u16 NCT6775_REG_FAN_START_OUTPUT[] = {
-	0x106, 0x206, 0x306, 0x806, 0x906, 0xa06 };
+	0x106, 0x206, 0x306, 0x806, 0x906, 0xa06, 0xb06 };
 static const u16 NCT6775_REG_FAN_MAX_OUTPUT[] = { 0x10a, 0x20a, 0x30a };
 static const u16 NCT6775_REG_FAN_STEP_OUTPUT[] = { 0x10b, 0x20b, 0x30b };
 
 static const u16 NCT6775_REG_FAN_STOP_TIME[] = {
-	0x107, 0x207, 0x307, 0x807, 0x907, 0xa07 };
+	0x107, 0x207, 0x307, 0x807, 0x907, 0xa07, 0xb07 };
 static const u16 NCT6775_REG_PWM[] = {
-	0x109, 0x209, 0x309, 0x809, 0x909, 0xa09 };
+	0x109, 0x209, 0x309, 0x809, 0x909, 0xa09, 0xb09 };
 static const u16 NCT6775_REG_PWM_READ[] = {
-	0x01, 0x03, 0x11, 0x13, 0x15, 0xa09 };
+	0x01, 0x03, 0x11, 0x13, 0x15, 0xa09, 0xb09 };
 
 static const u16 NCT6775_REG_FAN[] = { 0x630, 0x632, 0x634, 0x636, 0x638 };
 static const u16 NCT6775_REG_FAN_MIN[] = { 0x3b, 0x3c, 0x3d };
@@ -314,7 +317,7 @@ static const u16 NCT6775_REG_TEMP_SOURCE[ARRAY_SIZE(NCT6775_REG_TEMP)] = {
 	0x621, 0x622, 0x623, 0x624, 0x625, 0x626 };
 
 static const u16 NCT6775_REG_TEMP_SEL[] = {
-	0x100, 0x200, 0x300, 0x800, 0x900, 0xa00 };
+	0x100, 0x200, 0x300, 0x800, 0x900, 0xa00, 0xb00 };
 
 static const u16 NCT6775_REG_WEIGHT_TEMP_SEL[] = {
 	0x139, 0x239, 0x339, 0x839, 0x939, 0xa39 };
@@ -330,9 +333,9 @@ static const u16 NCT6775_REG_WEIGHT_TEMP_BASE[] = {
 static const u16 NCT6775_REG_TEMP_OFFSET[] = { 0x454, 0x455, 0x456 };
 
 static const u16 NCT6775_REG_AUTO_TEMP[] = {
-	0x121, 0x221, 0x321, 0x821, 0x921, 0xa21 };
+	0x121, 0x221, 0x321, 0x821, 0x921, 0xa21, 0xb21 };
 static const u16 NCT6775_REG_AUTO_PWM[] = {
-	0x127, 0x227, 0x327, 0x827, 0x927, 0xa27 };
+	0x127, 0x227, 0x327, 0x827, 0x927, 0xa27, 0xb27 };
 
 #define NCT6775_AUTO_TEMP(data, nr, p)	((data)->REG_AUTO_TEMP[nr] + (p))
 #define NCT6775_AUTO_PWM(data, nr, p)	((data)->REG_AUTO_PWM[nr] + (p))
@@ -340,9 +343,9 @@ static const u16 NCT6775_REG_AUTO_PWM[] = {
 static const u16 NCT6775_REG_CRITICAL_ENAB[] = { 0x134, 0x234, 0x334 };
 
 static const u16 NCT6775_REG_CRITICAL_TEMP[] = {
-	0x135, 0x235, 0x335, 0x835, 0x935, 0xa35 };
+	0x135, 0x235, 0x335, 0x835, 0x935, 0xa35, 0xb35 };
 static const u16 NCT6775_REG_CRITICAL_TEMP_TOLERANCE[] = {
-	0x138, 0x238, 0x338, 0x838, 0x938, 0xa38 };
+	0x138, 0x238, 0x338, 0x838, 0x938, 0xa38, 0xb38 };
 
 static const char *const nct6775_temp_label[] = {
 	"",
@@ -414,13 +417,15 @@ static const s8 NCT6776_BEEP_BITS[] = {
 	30, 31 };			/* intrusion0, intrusion1 */
 
 static const u16 NCT6776_REG_TOLERANCE_H[] = {
-	0x10c, 0x20c, 0x30c, 0x80c, 0x90c, 0xa0c };
+	0x10c, 0x20c, 0x30c, 0x80c, 0x90c, 0xa0c, 0xb0c };
 
 static const u8 NCT6776_REG_PWM_MODE[] = { 0x04, 0, 0, 0, 0, 0 };
 static const u8 NCT6776_PWM_MODE_MASK[] = { 0x01, 0, 0, 0, 0, 0 };
 
-static const u16 NCT6776_REG_FAN_MIN[] = { 0x63a, 0x63c, 0x63e, 0x640, 0x642 };
-static const u16 NCT6776_REG_FAN_PULSES[] = { 0x644, 0x645, 0x646, 0, 0 };
+static const u16 NCT6776_REG_FAN_MIN[] = {
+	0x63a, 0x63c, 0x63e, 0x640, 0x642, 0x64a, 0x64c };
+static const u16 NCT6776_REG_FAN_PULSES[] = {
+	0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0 };
 
 static const u16 NCT6776_REG_WEIGHT_DUTY_BASE[] = {
 	0x13e, 0x23e, 0x33e, 0x83e, 0x93e, 0xa3e };
@@ -495,15 +500,15 @@ static const s8 NCT6779_BEEP_BITS[] = {
 	30, 31 };			/* intrusion0, intrusion1 */
 
 static const u16 NCT6779_REG_FAN[] = {
-	0x4b0, 0x4b2, 0x4b4, 0x4b6, 0x4b8, 0x4ba };
+	0x4b0, 0x4b2, 0x4b4, 0x4b6, 0x4b8, 0x4ba, 0x660 };
 static const u16 NCT6779_REG_FAN_PULSES[] = {
-	0x644, 0x645, 0x646, 0x647, 0x648, 0x649 };
+	0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0 };
 
 static const u16 NCT6779_REG_CRITICAL_PWM_ENABLE[] = {
-	0x136, 0x236, 0x336, 0x836, 0x936, 0xa36 };
+	0x136, 0x236, 0x336, 0x836, 0x936, 0xa36, 0xb36 };
 #define NCT6779_CRITICAL_PWM_ENABLE_MASK	0x01
 static const u16 NCT6779_REG_CRITICAL_PWM[] = {
-	0x137, 0x237, 0x337, 0x837, 0x937, 0xa37 };
+	0x137, 0x237, 0x337, 0x837, 0x937, 0xa37, 0xb37 };
 
 static const u16 NCT6779_REG_TEMP[] = { 0x27, 0x150 };
 static const u16 NCT6779_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b };
@@ -570,12 +575,12 @@ static const u16 NCT6779_REG_TEMP_CRIT[32] = {
 
 #define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE	0x28
 
-static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[6] = { 0, 0x239 };
-static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[6] = { 0, 0x23a };
-static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[6] = { 0, 0x23b };
-static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[6] = { 0, 0x23c };
-static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[6] = { 0, 0x23d };
-static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[6] = { 0, 0x23e };
+static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[NUM_FAN] = { 0, 0x239 };
+static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[NUM_FAN] = { 0, 0x23a };
+static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[NUM_FAN] = { 0, 0x23b };
+static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[NUM_FAN] = { 0, 0x23c };
+static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[NUM_FAN] = { 0, 0x23d };
+static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[NUM_FAN] = { 0, 0x23e };
 
 static const u16 NCT6791_REG_ALARM[NUM_REG_ALARM] = {
 	0x459, 0x45A, 0x45B, 0x568, 0x45D };
@@ -707,6 +712,43 @@ static const char *const nct6795_temp_label[] = {
 
 #define NCT6795_TEMP_MASK	0xbfffff7e
 
+static const char *const nct6796_temp_label[] = {
+	"",
+	"SYSTIN",
+	"CPUTIN",
+	"AUXTIN0",
+	"AUXTIN1",
+	"AUXTIN2",
+	"AUXTIN3",
+	"AUXTIN4",
+	"SMBUSMASTER 0",
+	"SMBUSMASTER 1",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"PECI Agent 0",
+	"PECI Agent 1",
+	"PCH_CHIP_CPU_MAX_TEMP",
+	"PCH_CHIP_TEMP",
+	"PCH_CPU_TEMP",
+	"PCH_MCH_TEMP",
+	"PCH_DIM0_TEMP",
+	"PCH_DIM1_TEMP",
+	"PCH_DIM2_TEMP",
+	"PCH_DIM3_TEMP",
+	"BYTE_TEMP0",
+	"BYTE_TEMP1",
+	"PECI Agent 0 Calibration",
+	"PECI Agent 1 Calibration",
+	"",
+	"Virtual_TEMP"
+};
+
+#define NCT6796_TEMP_MASK	0xbfff03fe
+
 /* NCT6102D/NCT6106D specific data */
 
 #define NCT6106_REG_VBAT	0x318
@@ -1231,11 +1273,13 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
 	case nct6792:
 	case nct6793:
 	case nct6795:
+	case nct6796:
 		return reg == 0x150 || reg == 0x153 || reg == 0x155 ||
 		  ((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) ||
 		  reg == 0x402 ||
 		  reg == 0x63a || reg == 0x63c || reg == 0x63e ||
-		  reg == 0x640 || reg == 0x642 ||
+		  reg == 0x640 || reg == 0x642 || reg == 0x64a ||
+		  reg == 0x64c || reg == 0x660 ||
 		  reg == 0x73 || reg == 0x75 || reg == 0x77 || reg == 0x79 ||
 		  reg == 0x7b || reg == 0x7d;
 	}
@@ -1469,7 +1513,7 @@ static void nct6775_update_pwm(struct device *dev)
 		duty_is_dc = data->REG_PWM_MODE[i] &&
 		  (nct6775_read_value(data, data->REG_PWM_MODE[i])
 		   & data->PWM_MODE_MASK[i]);
-		data->pwm_mode[i] = duty_is_dc;
+		data->pwm_mode[i] = !duty_is_dc;
 
 		fanmodecfg = nct6775_read_value(data, data->REG_FAN_MODE[i]);
 		for (j = 0; j < ARRAY_SIZE(data->REG_PWM); j++) {
@@ -1584,6 +1628,7 @@ static void nct6775_update_pwm_limits(struct device *dev)
 		case nct6792:
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			reg = nct6775_read_value(data,
 					data->REG_CRITICAL_PWM_ENABLE[i]);
 			if (reg & data->CRITICAL_PWM_ENABLE_MASK)
@@ -2092,6 +2137,8 @@ static umode_t nct6775_fan_is_visible(struct kobject *kobj,
 		return 0;
 	if (nr == 2 && data->BEEP_BITS[FAN_ALARM_BASE + fan] == -1)
 		return 0;
+	if (nr == 3 && !data->REG_FAN_PULSES[fan])
+		return 0;
 	if (nr == 4 && !(data->has_fan_min & BIT(fan)))
 		return 0;
 	if (nr == 5 && data->kind != nct6775)
@@ -2350,7 +2397,7 @@ show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
 	struct nct6775_data *data = nct6775_update_device(dev);
 	struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
-	return sprintf(buf, "%d\n", !data->pwm_mode[sattr->index]);
+	return sprintf(buf, "%d\n", data->pwm_mode[sattr->index]);
 }
 
 static ssize_t
@@ -2371,9 +2418,9 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
 	if (val > 1)
 		return -EINVAL;
 
-	/* Setting DC mode is not supported for all chips/channels */
+	/* Setting DC mode (0) is not supported for all chips/channels */
 	if (data->REG_PWM_MODE[nr] == 0) {
-		if (val)
+		if (!val)
 			return -EINVAL;
 		return count;
 	}
@@ -2382,7 +2429,7 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
 	data->pwm_mode[nr] = val;
 	reg = nct6775_read_value(data, data->REG_PWM_MODE[nr]);
 	reg &= ~data->PWM_MODE_MASK[nr];
-	if (val)
+	if (!val)
 		reg |= data->PWM_MODE_MASK[nr];
 	nct6775_write_value(data, data->REG_PWM_MODE[nr], reg);
 	mutex_unlock(&data->update_lock);
@@ -3004,6 +3051,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr,
 		case nct6792:
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			nct6775_write_value(data, data->REG_CRITICAL_PWM[nr],
 					    val);
 			reg = nct6775_read_value(data,
@@ -3358,8 +3406,10 @@ static inline void nct6775_init_device(struct nct6775_data *data)
 static void
 nct6775_check_fan_inputs(struct nct6775_data *data)
 {
-	bool fan3pin, fan4pin, fan4min, fan5pin, fan6pin;
-	bool pwm3pin, pwm4pin, pwm5pin, pwm6pin;
+	bool fan3pin = false, fan4pin = false, fan4min = false;
+	bool fan5pin = false, fan6pin = false, fan7pin = false;
+	bool pwm3pin = false, pwm4pin = false, pwm5pin = false;
+	bool pwm6pin = false, pwm7pin = false;
 	int sioreg = data->sioreg;
 	int regval;
 
@@ -3376,12 +3426,6 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 
 		/* On NCT6775, fan4 shares pins with the fdc interface */
 		fan4pin = !(superio_inb(sioreg, 0x2A) & 0x80);
-		fan4min = false;
-		fan5pin = false;
-		fan6pin = false;
-		pwm4pin = false;
-		pwm5pin = false;
-		pwm6pin = false;
 	} else if (data->kind == nct6776) {
 		bool gpok = superio_inb(sioreg, 0x27) & 0x80;
 		const char *board_vendor, *board_name;
@@ -3421,25 +3465,15 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 			fan5pin = superio_inb(sioreg, 0x1C) & 0x02;
 
 		fan4min = fan4pin;
-		fan6pin = false;
 		pwm3pin = fan3pin;
-		pwm4pin = false;
-		pwm5pin = false;
-		pwm6pin = false;
 	} else if (data->kind == nct6106) {
 		regval = superio_inb(sioreg, 0x24);
 		fan3pin = !(regval & 0x80);
 		pwm3pin = regval & 0x08;
-
-		fan4pin = false;
-		fan4min = false;
-		fan5pin = false;
-		fan6pin = false;
-		pwm4pin = false;
-		pwm5pin = false;
-		pwm6pin = false;
-	} else { /* NCT6779D, NCT6791D, NCT6792D, NCT6793D, or NCT6795D */
-		int regval_1b, regval_2a, regval_eb;
+	} else {
+		/* NCT6779D, NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D */
+		int regval_1b, regval_2a, regval_2f;
+		bool dsw_en;
 
 		regval = superio_inb(sioreg, 0x1c);
 
@@ -3460,31 +3494,60 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 			break;
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			regval_1b = superio_inb(sioreg, 0x1b);
 			regval_2a = superio_inb(sioreg, 0x2a);
+			regval_2f = superio_inb(sioreg, 0x2f);
+			dsw_en = regval_2f & BIT(3);
 
 			if (!pwm5pin)
 				pwm5pin = regval & BIT(7);
-			fan6pin = regval & BIT(1);
-			pwm6pin = regval & BIT(0);
+
 			if (!fan5pin)
 				fan5pin = regval_1b & BIT(5);
 
 			superio_select(sioreg, NCT6775_LD_12);
-			regval_eb = superio_inb(sioreg, 0xeb);
-			if (!fan5pin)
-				fan5pin = regval_eb & BIT(5);
-			if (!pwm5pin)
-				pwm5pin = (regval_eb & BIT(4)) &&
-					   !(regval_2a & BIT(0));
-			if (!fan6pin)
-				fan6pin = regval_eb & BIT(3);
-			if (!pwm6pin)
-				pwm6pin = regval_eb & BIT(2);
+			if (data->kind != nct6796) {
+				int regval_eb = superio_inb(sioreg, 0xeb);
+
+				if (!dsw_en) {
+					fan6pin = regval & BIT(1);
+					pwm6pin = regval & BIT(0);
+				}
+
+				if (!fan5pin)
+					fan5pin = regval_eb & BIT(5);
+				if (!pwm5pin)
+					pwm5pin = (regval_eb & BIT(4)) &&
+						!(regval_2a & BIT(0));
+				if (!fan6pin)
+					fan6pin = regval_eb & BIT(3);
+				if (!pwm6pin)
+					pwm6pin = regval_eb & BIT(2);
+			}
+
+			if (data->kind == nct6795 || data->kind == nct6796) {
+				int regval_ed = superio_inb(sioreg, 0xed);
+
+				if (!fan6pin)
+					fan6pin = (regval_2a & BIT(4)) &&
+					  (!dsw_en ||
+					   (dsw_en && (regval_ed & BIT(4))));
+				if (!pwm6pin)
+					pwm6pin = (regval_2a & BIT(3)) &&
+					  (regval_ed & BIT(2));
+			}
+
+			if (data->kind == nct6796) {
+				int regval_1d = superio_inb(sioreg, 0x1d);
+				int regval_2b = superio_inb(sioreg, 0x2b);
+
+				fan7pin = !(regval_2b & BIT(2));
+				pwm7pin = !(regval_1d & (BIT(2) | BIT(3)));
+			}
+
 			break;
 		default:	/* NCT6779D */
-			fan6pin = false;
-			pwm6pin = false;
 			break;
 		}
 
@@ -3493,11 +3556,11 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 
 	/* fan 1 and 2 (0x03) are always present */
 	data->has_fan = 0x03 | (fan3pin << 2) | (fan4pin << 3) |
-		(fan5pin << 4) | (fan6pin << 5);
+		(fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
 	data->has_fan_min = 0x03 | (fan3pin << 2) | (fan4min << 3) |
-		(fan5pin << 4);
+		(fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
 	data->has_pwm = 0x03 | (pwm3pin << 2) | (pwm4pin << 3) |
-		(pwm5pin << 4) | (pwm6pin << 5);
+		(pwm5pin << 4) | (pwm6pin << 5) | (pwm7pin << 6);
 }
 
 static void add_temp_sensors(struct nct6775_data *data, const u16 *regp,
@@ -3856,8 +3919,9 @@ static int nct6775_probe(struct platform_device *pdev)
 	case nct6792:
 	case nct6793:
 	case nct6795:
+	case nct6796:
 		data->in_num = 15;
-		data->pwm_num = 6;
+		data->pwm_num = (data->kind == nct6796) ? 7 : 6;
 		data->auto_pwm_num = 4;
 		data->has_fan_div = false;
 		data->temp_fixed_num = 6;
@@ -3891,6 +3955,10 @@ static int nct6775_probe(struct platform_device *pdev)
 			data->temp_label = nct6795_temp_label;
 			data->temp_mask = NCT6795_TEMP_MASK;
 			break;
+		case nct6796:
+			data->temp_label = nct6796_temp_label;
+			data->temp_mask = NCT6796_TEMP_MASK;
+			break;
 		}
 
 		data->REG_CONFIG = NCT6775_REG_CONFIG;
@@ -4159,6 +4227,7 @@ static int nct6775_probe(struct platform_device *pdev)
 	case nct6792:
 	case nct6793:
 	case nct6795:
+	case nct6796:
 		break;
 	}
 
@@ -4193,6 +4262,7 @@ static int nct6775_probe(struct platform_device *pdev)
 		case nct6792:
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			tmp |= 0x7e;
 			break;
 		}
@@ -4291,7 +4361,8 @@ static int __maybe_unused nct6775_resume(struct device *dev)
 		superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable);
 
 	if (data->kind == nct6791 || data->kind == nct6792 ||
-	    data->kind == nct6793 || data->kind == nct6795)
+	    data->kind == nct6793 || data->kind == nct6795 ||
+	    data->kind == nct6796)
 		nct6791_enable_io_mapping(sioreg);
 
 	superio_exit(sioreg);
@@ -4391,6 +4462,9 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
 	case SIO_NCT6795_ID:
 		sio_data->kind = nct6795;
 		break;
+	case SIO_NCT6796_ID:
+		sio_data->kind = nct6796;
+		break;
 	default:
 		if (val != 0xffff)
 			pr_debug("unsupported chip ID: 0x%04x\n", val);
@@ -4417,7 +4491,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
 	}
 
 	if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
-	    sio_data->kind == nct6793 || sio_data->kind == nct6795)
+	    sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
+	    sio_data->kind == nct6796)
 		nct6791_enable_io_mapping(sioaddr);
 
 	superio_exit(sioaddr);
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 6e4298e99222..e71aec69e76e 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -31,8 +31,8 @@ config SENSORS_ADM1275
 	default n
 	help
 	  If you say yes here you get hardware monitoring support for Analog
-	  Devices ADM1075, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294
-	  Hot-Swap Controller and Digital Power Monitors.
+	  Devices ADM1075, ADM1272, ADM1275, ADM1276, ADM1278, ADM1293,
+	  and ADM1294 Hot-Swap Controller and Digital Power Monitors.
 
 	  This driver can also be built as a module. If so, the module will
 	  be called adm1275.
diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index 00d6995af4c2..13600fa79e7f 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -3,6 +3,7 @@
  * and Digital Power Monitor
  *
  * Copyright (c) 2011 Ericsson AB.
+ * Copyright (c) 2018 Guenter Roeck
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -24,7 +25,7 @@
 #include <linux/bitops.h>
 #include "pmbus.h"
 
-enum chips { adm1075, adm1275, adm1276, adm1278, adm1293, adm1294 };
+enum chips { adm1075, adm1272, adm1275, adm1276, adm1278, adm1293, adm1294 };
 
 #define ADM1275_MFR_STATUS_IOUT_WARN2	BIT(0)
 #define ADM1293_MFR_STATUS_VAUX_UV_WARN	BIT(5)
@@ -41,6 +42,8 @@ enum chips { adm1075, adm1275, adm1276, adm1278, adm1293, adm1294 };
 #define ADM1075_IRANGE_25		BIT(3)
 #define ADM1075_IRANGE_MASK		(BIT(3) | BIT(4))
 
+#define ADM1272_IRANGE			BIT(0)
+
 #define ADM1278_TEMP1_EN		BIT(3)
 #define ADM1278_VIN_EN			BIT(2)
 #define ADM1278_VOUT_EN			BIT(1)
@@ -105,6 +108,19 @@ static const struct coefficients adm1075_coefficients[] = {
 	[4] = { 4279, 0, -1 },		/* power, irange50 */
 };
 
+static const struct coefficients adm1272_coefficients[] = {
+	[0] = { 6770, 0, -2 },		/* voltage, vrange 60V */
+	[1] = { 4062, 0, -2 },		/* voltage, vrange 100V */
+	[2] = { 1326, 20480, -1 },	/* current, vsense range 15mV */
+	[3] = { 663, 20480, -1 },	/* current, vsense range 30mV */
+	[4] = { 3512, 0, -2 },		/* power, vrange 60V, irange 15mV */
+	[5] = { 21071, 0, -3 },		/* power, vrange 100V, irange 15mV */
+	[6] = { 17561, 0, -3 },		/* power, vrange 60V, irange 30mV */
+	[7] = { 10535, 0, -3 },		/* power, vrange 100V, irange 30mV */
+	[8] = { 42, 31871, -1 },	/* temperature */
+
+};
+
 static const struct coefficients adm1275_coefficients[] = {
 	[0] = { 19199, 0, -2 },		/* voltage, vrange set */
 	[1] = { 6720, 0, -1 },		/* voltage, vrange not set */
@@ -154,7 +170,7 @@ static int adm1275_read_word_data(struct i2c_client *client, int page, int reg)
 	const struct adm1275_data *data = to_adm1275_data(info);
 	int ret = 0;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {
@@ -240,7 +256,7 @@ static int adm1275_write_word_data(struct i2c_client *client, int page, int reg,
 	const struct adm1275_data *data = to_adm1275_data(info);
 	int ret;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {
@@ -335,6 +351,7 @@ static int adm1275_read_byte_data(struct i2c_client *client, int page, int reg)
 
 static const struct i2c_device_id adm1275_id[] = {
 	{ "adm1075", adm1075 },
+	{ "adm1272", adm1272 },
 	{ "adm1275", adm1275 },
 	{ "adm1276", adm1276 },
 	{ "adm1278", adm1278 },
@@ -451,6 +468,54 @@ static int adm1275_probe(struct i2c_client *client,
 			info->func[0] |=
 			  PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
 		break;
+	case adm1272:
+		data->have_vout = true;
+		data->have_pin_max = true;
+		data->have_temp_max = true;
+
+		coefficients = adm1272_coefficients;
+		vindex = (config & ADM1275_VRANGE) ? 1 : 0;
+		cindex = (config & ADM1272_IRANGE) ? 3 : 2;
+		/* pindex depends on the combination of the above */
+		switch (config & (ADM1275_VRANGE | ADM1272_IRANGE)) {
+		case 0:
+		default:
+			pindex = 4;
+			break;
+		case ADM1275_VRANGE:
+			pindex = 5;
+			break;
+		case ADM1272_IRANGE:
+			pindex = 6;
+			break;
+		case ADM1275_VRANGE | ADM1272_IRANGE:
+			pindex = 7;
+			break;
+		}
+		tindex = 8;
+
+		info->func[0] |= PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT |
+			PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
+
+		/* Enable VOUT if not enabled (it is disabled by default) */
+		if (!(config & ADM1278_VOUT_EN)) {
+			config |= ADM1278_VOUT_EN;
+			ret = i2c_smbus_write_byte_data(client,
+							ADM1275_PMON_CONFIG,
+							config);
+			if (ret < 0) {
+				dev_err(&client->dev,
+					"Failed to enable VOUT monitoring\n");
+				return -ENODEV;
+			}
+		}
+
+		if (config & ADM1278_TEMP1_EN)
+			info->func[0] |=
+				PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
+		if (config & ADM1278_VIN_EN)
+			info->func[0] |= PMBUS_HAVE_VIN;
+		break;
 	case adm1275:
 		if (device_config & ADM1275_IOUT_WARN2_SELECT)
 			data->have_oc_fault = true;
diff --git a/drivers/hwmon/pmbus/max8688.c b/drivers/hwmon/pmbus/max8688.c
index dd4883a19045..e951f9b87abb 100644
--- a/drivers/hwmon/pmbus/max8688.c
+++ b/drivers/hwmon/pmbus/max8688.c
@@ -45,7 +45,7 @@ static int max8688_read_word_data(struct i2c_client *client, int page, int reg)
 {
 	int ret;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index b74dbeca2e8d..70cecb06f93c 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/debugfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -27,6 +28,8 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/pmbus.h>
+#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include "pmbus.h"
 
 enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
@@ -35,8 +38,19 @@ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
 #define UCD9000_NUM_PAGES		0xd6
 #define UCD9000_FAN_CONFIG_INDEX	0xe7
 #define UCD9000_FAN_CONFIG		0xe8
+#define UCD9000_MFR_STATUS		0xf3
+#define UCD9000_GPIO_SELECT		0xfa
+#define UCD9000_GPIO_CONFIG		0xfb
 #define UCD9000_DEVICE_ID		0xfd
 
+/* GPIO CONFIG bits */
+#define UCD9000_GPIO_CONFIG_ENABLE	BIT(0)
+#define UCD9000_GPIO_CONFIG_OUT_ENABLE	BIT(1)
+#define UCD9000_GPIO_CONFIG_OUT_VALUE	BIT(2)
+#define UCD9000_GPIO_CONFIG_STATUS	BIT(3)
+#define UCD9000_GPIO_INPUT		0
+#define UCD9000_GPIO_OUTPUT		1
+
 #define UCD9000_MON_TYPE(x)	(((x) >> 5) & 0x07)
 #define UCD9000_MON_PAGE(x)	((x) & 0x0f)
 
@@ -47,12 +61,29 @@ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
 
 #define UCD9000_NUM_FAN		4
 
+#define UCD9000_GPIO_NAME_LEN	16
+#define UCD9090_NUM_GPIOS	23
+#define UCD901XX_NUM_GPIOS	26
+#define UCD90910_NUM_GPIOS	26
+
+#define UCD9000_DEBUGFS_NAME_LEN	24
+#define UCD9000_GPI_COUNT		8
+
 struct ucd9000_data {
 	u8 fan_data[UCD9000_NUM_FAN][I2C_SMBUS_BLOCK_MAX];
 	struct pmbus_driver_info info;
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip gpio;
+#endif
+	struct dentry *debugfs;
 };
 #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info)
 
+struct ucd9000_debugfs_entry {
+	struct i2c_client *client;
+	u8 index;
+};
+
 static int ucd9000_get_fan_config(struct i2c_client *client, int fan)
 {
 	int fan_config = 0;
@@ -149,6 +180,312 @@ static const struct of_device_id ucd9000_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ucd9000_of_match);
 
+#ifdef CONFIG_GPIOLIB
+static int ucd9000_gpio_read_config(struct i2c_client *client,
+				    unsigned int offset)
+{
+	int ret;
+
+	/* No page set required */
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_SELECT, offset);
+	if (ret < 0)
+		return ret;
+
+	return i2c_smbus_read_byte_data(client, UCD9000_GPIO_CONFIG);
+}
+
+static int ucd9000_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+	struct i2c_client *client  = gpiochip_get_data(gc);
+	int ret;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0)
+		return ret;
+
+	return !!(ret & UCD9000_GPIO_CONFIG_STATUS);
+}
+
+static void ucd9000_gpio_set(struct gpio_chip *gc, unsigned int offset,
+			     int value)
+{
+	struct i2c_client *client = gpiochip_get_data(gc);
+	int ret;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0) {
+		dev_dbg(&client->dev, "failed to read GPIO %d config: %d\n",
+			offset, ret);
+		return;
+	}
+
+	if (value) {
+		if (ret & UCD9000_GPIO_CONFIG_STATUS)
+			return;
+
+		ret |= UCD9000_GPIO_CONFIG_STATUS;
+	} else {
+		if (!(ret & UCD9000_GPIO_CONFIG_STATUS))
+			return;
+
+		ret &= ~UCD9000_GPIO_CONFIG_STATUS;
+	}
+
+	ret |= UCD9000_GPIO_CONFIG_ENABLE;
+
+	/* Page set not required */
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, ret);
+	if (ret < 0) {
+		dev_dbg(&client->dev, "Failed to write GPIO %d config: %d\n",
+			offset, ret);
+		return;
+	}
+
+	ret &= ~UCD9000_GPIO_CONFIG_ENABLE;
+
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, ret);
+	if (ret < 0)
+		dev_dbg(&client->dev, "Failed to write GPIO %d config: %d\n",
+			offset, ret);
+}
+
+static int ucd9000_gpio_get_direction(struct gpio_chip *gc,
+				      unsigned int offset)
+{
+	struct i2c_client *client = gpiochip_get_data(gc);
+	int ret;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0)
+		return ret;
+
+	return !(ret & UCD9000_GPIO_CONFIG_OUT_ENABLE);
+}
+
+static int ucd9000_gpio_set_direction(struct gpio_chip *gc,
+				      unsigned int offset, bool direction_out,
+				      int requested_out)
+{
+	struct i2c_client *client = gpiochip_get_data(gc);
+	int ret, config, out_val;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0)
+		return ret;
+
+	if (direction_out) {
+		out_val = requested_out ? UCD9000_GPIO_CONFIG_OUT_VALUE : 0;
+
+		if (ret & UCD9000_GPIO_CONFIG_OUT_ENABLE) {
+			if ((ret & UCD9000_GPIO_CONFIG_OUT_VALUE) == out_val)
+				return 0;
+		} else {
+			ret |= UCD9000_GPIO_CONFIG_OUT_ENABLE;
+		}
+
+		if (out_val)
+			ret |= UCD9000_GPIO_CONFIG_OUT_VALUE;
+		else
+			ret &= ~UCD9000_GPIO_CONFIG_OUT_VALUE;
+
+	} else {
+		if (!(ret & UCD9000_GPIO_CONFIG_OUT_ENABLE))
+			return 0;
+
+		ret &= ~UCD9000_GPIO_CONFIG_OUT_ENABLE;
+	}
+
+	ret |= UCD9000_GPIO_CONFIG_ENABLE;
+	config = ret;
+
+	/* Page set not required */
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, config);
+	if (ret < 0)
+		return ret;
+
+	config &= ~UCD9000_GPIO_CONFIG_ENABLE;
+
+	return i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, config);
+}
+
+static int ucd9000_gpio_direction_input(struct gpio_chip *gc,
+					unsigned int offset)
+{
+	return ucd9000_gpio_set_direction(gc, offset, UCD9000_GPIO_INPUT, 0);
+}
+
+static int ucd9000_gpio_direction_output(struct gpio_chip *gc,
+					 unsigned int offset, int val)
+{
+	return ucd9000_gpio_set_direction(gc, offset, UCD9000_GPIO_OUTPUT,
+					  val);
+}
+
+static void ucd9000_probe_gpio(struct i2c_client *client,
+			       const struct i2c_device_id *mid,
+			       struct ucd9000_data *data)
+{
+	int rc;
+
+	switch (mid->driver_data) {
+	case ucd9090:
+		data->gpio.ngpio = UCD9090_NUM_GPIOS;
+		break;
+	case ucd90120:
+	case ucd90124:
+	case ucd90160:
+		data->gpio.ngpio = UCD901XX_NUM_GPIOS;
+		break;
+	case ucd90910:
+		data->gpio.ngpio = UCD90910_NUM_GPIOS;
+		break;
+	default:
+		return; /* GPIO support is optional. */
+	}
+
+	/*
+	 * Pinmux support has not been added to the new gpio_chip.
+	 * This support should be added when possible given the mux
+	 * behavior of these IO devices.
+	 */
+	data->gpio.label = client->name;
+	data->gpio.get_direction = ucd9000_gpio_get_direction;
+	data->gpio.direction_input = ucd9000_gpio_direction_input;
+	data->gpio.direction_output = ucd9000_gpio_direction_output;
+	data->gpio.get = ucd9000_gpio_get;
+	data->gpio.set = ucd9000_gpio_set;
+	data->gpio.can_sleep = true;
+	data->gpio.base = -1;
+	data->gpio.parent = &client->dev;
+
+	rc = devm_gpiochip_add_data(&client->dev, &data->gpio, client);
+	if (rc)
+		dev_warn(&client->dev, "Could not add gpiochip: %d\n", rc);
+}
+#else
+static void ucd9000_probe_gpio(struct i2c_client *client,
+			       const struct i2c_device_id *mid,
+			       struct ucd9000_data *data)
+{
+}
+#endif /* CONFIG_GPIOLIB */
+
+#ifdef CONFIG_DEBUG_FS
+static int ucd9000_get_mfr_status(struct i2c_client *client, u8 *buffer)
+{
+	int ret = pmbus_set_page(client, 0);
+
+	if (ret < 0)
+		return ret;
+
+	return i2c_smbus_read_block_data(client, UCD9000_MFR_STATUS, buffer);
+}
+
+static int ucd9000_debugfs_show_mfr_status_bit(void *data, u64 *val)
+{
+	struct ucd9000_debugfs_entry *entry = data;
+	struct i2c_client *client = entry->client;
+	u8 buffer[I2C_SMBUS_BLOCK_MAX];
+	int ret;
+
+	ret = ucd9000_get_mfr_status(client, buffer);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Attribute only created for devices with gpi fault bits at bits
+	 * 16-23, which is the second byte of the response.
+	 */
+	*val = !!(buffer[1] & BIT(entry->index));
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(ucd9000_debugfs_mfr_status_bit,
+			 ucd9000_debugfs_show_mfr_status_bit, NULL, "%1lld\n");
+
+static ssize_t ucd9000_debugfs_read_mfr_status(struct file *file,
+					       char __user *buf, size_t count,
+					       loff_t *ppos)
+{
+	struct i2c_client *client = file->private_data;
+	u8 buffer[I2C_SMBUS_BLOCK_MAX];
+	char str[(I2C_SMBUS_BLOCK_MAX * 2) + 2];
+	char *res;
+	int rc;
+
+	rc = ucd9000_get_mfr_status(client, buffer);
+	if (rc < 0)
+		return rc;
+
+	res = bin2hex(str, buffer, min(rc, I2C_SMBUS_BLOCK_MAX));
+	*res++ = '\n';
+	*res = 0;
+
+	return simple_read_from_buffer(buf, count, ppos, str, res - str);
+}
+
+static const struct file_operations ucd9000_debugfs_show_mfr_status_fops = {
+	.llseek = noop_llseek,
+	.read = ucd9000_debugfs_read_mfr_status,
+	.open = simple_open,
+};
+
+static int ucd9000_init_debugfs(struct i2c_client *client,
+				const struct i2c_device_id *mid,
+				struct ucd9000_data *data)
+{
+	struct dentry *debugfs;
+	struct ucd9000_debugfs_entry *entries;
+	int i;
+	char name[UCD9000_DEBUGFS_NAME_LEN];
+
+	debugfs = pmbus_get_debugfs_dir(client);
+	if (!debugfs)
+		return -ENOENT;
+
+	data->debugfs = debugfs_create_dir(client->name, debugfs);
+	if (!data->debugfs)
+		return -ENOENT;
+
+	/*
+	 * Of the chips this driver supports, only the UCD9090, UCD90160,
+	 * and UCD90910 report GPI faults in their MFR_STATUS register, so only
+	 * create the GPI fault debugfs attributes for those chips.
+	 */
+	if (mid->driver_data == ucd9090 || mid->driver_data == ucd90160 ||
+	    mid->driver_data == ucd90910) {
+		entries = devm_kzalloc(&client->dev,
+				       sizeof(*entries) * UCD9000_GPI_COUNT,
+				       GFP_KERNEL);
+		if (!entries)
+			return -ENOMEM;
+
+		for (i = 0; i < UCD9000_GPI_COUNT; i++) {
+			entries[i].client = client;
+			entries[i].index = i;
+			scnprintf(name, UCD9000_DEBUGFS_NAME_LEN,
+				  "gpi%d_alarm", i + 1);
+			debugfs_create_file(name, 0444, data->debugfs,
+					    &entries[i],
+					    &ucd9000_debugfs_mfr_status_bit);
+		}
+	}
+
+	scnprintf(name, UCD9000_DEBUGFS_NAME_LEN, "mfr_status");
+	debugfs_create_file(name, 0444, data->debugfs, client,
+			    &ucd9000_debugfs_show_mfr_status_fops);
+
+	return 0;
+}
+#else
+static int ucd9000_init_debugfs(struct i2c_client *client,
+				const struct i2c_device_id *mid,
+				struct ucd9000_data *data)
+{
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
 static int ucd9000_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -263,7 +600,18 @@ static int ucd9000_probe(struct i2c_client *client,
 		  | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34;
 	}
 
-	return pmbus_do_probe(client, mid, info);
+	ucd9000_probe_gpio(client, mid, data);
+
+	ret = pmbus_do_probe(client, mid, info);
+	if (ret)
+		return ret;
+
+	ret = ucd9000_init_debugfs(client, mid, data);
+	if (ret)
+		dev_warn(&client->dev, "Failed to register debugfs: %d\n",
+			 ret);
+
+	return 0;
 }
 
 /* This is the driver that will be inserted */
diff --git a/drivers/hwmon/sht21.c b/drivers/hwmon/sht21.c
index 190e7b39ce32..2c7ba70921f5 100644
--- a/drivers/hwmon/sht21.c
+++ b/drivers/hwmon/sht21.c
@@ -16,8 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA
  *
- * Data sheet available (5/2010) at
- * http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT21.pdf
+ * Data sheet available at http://www.sensirion.com/file/datasheet_sht21
  */
 
 #include <linux/module.h>
diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c
index 07a0cb0a1f28..0e81f287d305 100644
--- a/drivers/hwmon/via-cputemp.c
+++ b/drivers/hwmon/via-cputemp.c
@@ -136,20 +136,24 @@ static int via_cputemp_probe(struct platform_device *pdev)
 	data->id = pdev->id;
 	data->name = "via_cputemp";
 
-	switch (c->x86_model) {
-	case 0xA:
-		/* C7 A */
-	case 0xD:
-		/* C7 D */
-		data->msr_temp = 0x1169;
-		data->msr_vid = 0x198;
-		break;
-	case 0xF:
-		/* Nano */
+	if (c->x86 == 7) {
 		data->msr_temp = 0x1423;
-		break;
-	default:
-		return -ENODEV;
+	} else {
+		switch (c->x86_model) {
+		case 0xA:
+			/* C7 A */
+		case 0xD:
+			/* C7 D */
+			data->msr_temp = 0x1169;
+			data->msr_vid = 0x198;
+			break;
+		case 0xF:
+			/* Nano */
+			data->msr_temp = 0x1423;
+			break;
+		default:
+			return -ENODEV;
+		}
 	}
 
 	/* test if we can access the TEMPERATURE MSR */
@@ -283,6 +287,7 @@ static const struct x86_cpu_id __initconst cputemp_ids[] = {
 	{ X86_VENDOR_CENTAUR, 6, 0xa, }, /* C7 A */
 	{ X86_VENDOR_CENTAUR, 6, 0xd, }, /* C7 D */
 	{ X86_VENDOR_CENTAUR, 6, 0xf, }, /* Nano */
+	{ X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, },
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 4c0895165727..c4865b08d7fb 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -979,6 +979,16 @@ config I2C_SUN6I_P2WI
 	  This interface is used to connect to specific PMIC devices (like the
 	  AXP221).
 
+config I2C_SYNQUACER
+	tristate "Socionext SynQuacer I2C controller"
+	depends on ARCH_SYNQUACER || COMPILE_TEST
+	help
+	  Say Y here to include support for the I2C controller used in some
+	  Fujitsu and Socionext SoCs.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called i2c-synquacer.
+
 config I2C_TEGRA
 	tristate "NVIDIA Tegra internal I2C controller"
 	depends on ARCH_TEGRA
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 9e475a54e36e..189e34ba050f 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_I2C_STM32F4)	+= i2c-stm32f4.o
 obj-$(CONFIG_I2C_STM32F7)	+= i2c-stm32f7.o
 obj-$(CONFIG_I2C_STU300)	+= i2c-stu300.o
 obj-$(CONFIG_I2C_SUN6I_P2WI)	+= i2c-sun6i-p2wi.o
+obj-$(CONFIG_I2C_SYNQUACER)	+= i2c-synquacer.o
 obj-$(CONFIG_I2C_TEGRA)		+= i2c-tegra.o
 obj-$(CONFIG_I2C_TEGRA_BPMP)	+= i2c-tegra-bpmp.o
 obj-$(CONFIG_I2C_UNIPHIER)	+= i2c-uniphier.o
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 05732531829f..fd36c39ddf4e 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -163,7 +163,7 @@ static int i2c_dw_init_master(struct dw_i2c_dev *dev)
 		if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK))
 			dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT;
 		dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
-	} else {
+	} else if (dev->sda_hold_time) {
 		dev_warn(dev->dev,
 			"Hardware too old to adjust SDA hold time.\n");
 	}
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index b02428498f6d..12ec8484e653 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -128,6 +128,10 @@
 #define HSI2C_TIMEOUT_EN			(1u << 31)
 #define HSI2C_TIMEOUT_MASK			0xff
 
+/* I2C_MANUAL_CMD register bits */
+#define HSI2C_CMD_READ_DATA			(1u << 4)
+#define HSI2C_CMD_SEND_STOP			(1u << 2)
+
 /* I2C_TRANS_STATUS register bits */
 #define HSI2C_MASTER_BUSY			(1u << 17)
 #define HSI2C_SLAVE_BUSY			(1u << 16)
@@ -441,12 +445,6 @@ static irqreturn_t exynos5_i2c_irq(int irqno, void *dev_id)
 			i2c->state = -ETIMEDOUT;
 			goto stop;
 		}
-
-		trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
-		if ((trans_status & HSI2C_MASTER_ST_MASK) == HSI2C_MASTER_ST_LOSE) {
-			i2c->state = -EAGAIN;
-			goto stop;
-		}
 	} else if (int_status & HSI2C_INT_I2C) {
 		trans_status = readl(i2c->regs + HSI2C_TRANS_STATUS);
 		if (trans_status & HSI2C_NO_DEV_ACK) {
@@ -544,6 +542,57 @@ static int exynos5_i2c_wait_bus_idle(struct exynos5_i2c *i2c)
 	return -EBUSY;
 }
 
+static void exynos5_i2c_bus_recover(struct exynos5_i2c *i2c)
+{
+	u32 val;
+
+	val = readl(i2c->regs + HSI2C_CTL) | HSI2C_RXCHON;
+	writel(val, i2c->regs + HSI2C_CTL);
+	val = readl(i2c->regs + HSI2C_CONF) & ~HSI2C_AUTO_MODE;
+	writel(val, i2c->regs + HSI2C_CONF);
+
+	/*
+	 * Specification says master should send nine clock pulses. It can be
+	 * emulated by sending manual read command (nine pulses for read eight
+	 * bits + one pulse for NACK).
+	 */
+	writel(HSI2C_CMD_READ_DATA, i2c->regs + HSI2C_MANUAL_CMD);
+	exynos5_i2c_wait_bus_idle(i2c);
+	writel(HSI2C_CMD_SEND_STOP, i2c->regs + HSI2C_MANUAL_CMD);
+	exynos5_i2c_wait_bus_idle(i2c);
+
+	val = readl(i2c->regs + HSI2C_CTL) & ~HSI2C_RXCHON;
+	writel(val, i2c->regs + HSI2C_CTL);
+	val = readl(i2c->regs + HSI2C_CONF) | HSI2C_AUTO_MODE;
+	writel(val, i2c->regs + HSI2C_CONF);
+}
+
+static void exynos5_i2c_bus_check(struct exynos5_i2c *i2c)
+{
+	unsigned long timeout;
+
+	if (i2c->variant->hw != HSI2C_EXYNOS7)
+		return;
+
+	/*
+	 * HSI2C_MASTER_ST_LOSE state in EXYNOS7 variant before transaction
+	 * indicates that bus is stuck (SDA is low). In such case bus recovery
+	 * can be performed.
+	 */
+	timeout = jiffies + msecs_to_jiffies(100);
+	for (;;) {
+		u32 st = readl(i2c->regs + HSI2C_TRANS_STATUS);
+
+		if ((st & HSI2C_MASTER_ST_MASK) != HSI2C_MASTER_ST_LOSE)
+			return;
+
+		if (time_is_before_jiffies(timeout))
+			return;
+
+		exynos5_i2c_bus_recover(i2c);
+	}
+}
+
 /*
  * exynos5_i2c_message_start: Configures the bus and starts the xfer
  * i2c: struct exynos5_i2c pointer for the current bus
@@ -598,6 +647,8 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
 	writel(fifo_ctl, i2c->regs + HSI2C_FIFO_CTL);
 	writel(i2c_ctl, i2c->regs + HSI2C_CTL);
 
+	exynos5_i2c_bus_check(i2c);
+
 	/*
 	 * Enable interrupts before starting the transfer so that we don't
 	 * miss any INT_I2C interrupts.
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 692b34125866..e0d59e9ff3c6 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -966,8 +966,6 @@ static void i801_enable_host_notify(struct i2c_adapter *adapter)
 	if (!(priv->features & FEATURE_HOST_NOTIFY))
 		return;
 
-	priv->original_slvcmd = inb_p(SMBSLVCMD(priv));
-
 	if (!(SMBSLVCMD_HST_NTFY_INTREN & priv->original_slvcmd))
 		outb_p(SMBSLVCMD_HST_NTFY_INTREN | priv->original_slvcmd,
 		       SMBSLVCMD(priv));
@@ -1615,6 +1613,10 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		outb_p(inb_p(SMBAUXCTL(priv)) &
 		       ~(SMBAUXCTL_CRC | SMBAUXCTL_E32B), SMBAUXCTL(priv));
 
+	/* Remember original Host Notify setting */
+	if (priv->features & FEATURE_HOST_NOTIFY)
+		priv->original_slvcmd = inb_p(SMBSLVCMD(priv));
+
 	/* Default timeout in interrupt mode: 200 ms */
 	priv->adapter.timeout = HZ / 5;
 
@@ -1699,6 +1701,15 @@ static void i801_remove(struct pci_dev *dev)
 	 */
 }
 
+static void i801_shutdown(struct pci_dev *dev)
+{
+	struct i801_priv *priv = pci_get_drvdata(dev);
+
+	/* Restore config registers to avoid hard hang on some systems */
+	i801_disable_host_notify(priv);
+	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
+}
+
 #ifdef CONFIG_PM
 static int i801_suspend(struct device *dev)
 {
@@ -1728,6 +1739,7 @@ static struct pci_driver i801_driver = {
 	.id_table	= i801_ids,
 	.probe		= i801_probe,
 	.remove		= i801_remove,
+	.shutdown	= i801_shutdown,
 	.driver		= {
 		.pm	= &i801_pm_ops,
 	},
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 999557729ad2..d7267dd9c7bf 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -194,6 +194,7 @@ struct imx_i2c_dma {
 struct imx_i2c_struct {
 	struct i2c_adapter	adapter;
 	struct clk		*clk;
+	struct notifier_block	clk_change_nb;
 	void __iomem		*base;
 	wait_queue_head_t	queue;
 	unsigned long		i2csr;
@@ -467,15 +468,14 @@ static int i2c_imx_acked(struct imx_i2c_struct *i2c_imx)
 	return 0;
 }
 
-static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx)
+static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx,
+			    unsigned int i2c_clk_rate)
 {
 	struct imx_i2c_clk_pair *i2c_clk_div = i2c_imx->hwdata->clk_div;
-	unsigned int i2c_clk_rate;
 	unsigned int div;
 	int i;
 
 	/* Divider value calculation */
-	i2c_clk_rate = clk_get_rate(i2c_imx->clk);
 	if (i2c_imx->cur_clk == i2c_clk_rate)
 		return;
 
@@ -510,6 +510,20 @@ static void i2c_imx_set_clk(struct imx_i2c_struct *i2c_imx)
 #endif
 }
 
+static int i2c_imx_clk_notifier_call(struct notifier_block *nb,
+				     unsigned long action, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct imx_i2c_struct *i2c_imx = container_of(&ndata->clk,
+						      struct imx_i2c_struct,
+						      clk);
+
+	if (action & POST_RATE_CHANGE)
+		i2c_imx_set_clk(i2c_imx, ndata->new_rate);
+
+	return NOTIFY_OK;
+}
+
 static int i2c_imx_start(struct imx_i2c_struct *i2c_imx)
 {
 	unsigned int temp = 0;
@@ -517,8 +531,6 @@ static int i2c_imx_start(struct imx_i2c_struct *i2c_imx)
 
 	dev_dbg(&i2c_imx->adapter.dev, "<%s>\n", __func__);
 
-	i2c_imx_set_clk(i2c_imx);
-
 	imx_i2c_write_reg(i2c_imx->ifdr, i2c_imx, IMX_I2C_IFDR);
 	/* Enable I2C controller */
 	imx_i2c_write_reg(i2c_imx->hwdata->i2sr_clr_opcode, i2c_imx, IMX_I2C_I2SR);
@@ -1131,6 +1143,9 @@ static int i2c_imx_probe(struct platform_device *pdev)
 				   "clock-frequency", &i2c_imx->bitrate);
 	if (ret < 0 && pdata && pdata->bitrate)
 		i2c_imx->bitrate = pdata->bitrate;
+	i2c_imx->clk_change_nb.notifier_call = i2c_imx_clk_notifier_call;
+	clk_notifier_register(i2c_imx->clk, &i2c_imx->clk_change_nb);
+	i2c_imx_set_clk(i2c_imx, clk_get_rate(i2c_imx->clk));
 
 	/* Set up chip registers to defaults */
 	imx_i2c_write_reg(i2c_imx->hwdata->i2cr_ien_opcode ^ I2CR_IEN,
@@ -1141,12 +1156,12 @@ static int i2c_imx_probe(struct platform_device *pdev)
 	ret = i2c_imx_init_recovery_info(i2c_imx, pdev);
 	/* Give it another chance if pinctrl used is not ready yet */
 	if (ret == -EPROBE_DEFER)
-		goto rpm_disable;
+		goto clk_notifier_unregister;
 
 	/* Add I2C adapter */
 	ret = i2c_add_numbered_adapter(&i2c_imx->adapter);
 	if (ret < 0)
-		goto rpm_disable;
+		goto clk_notifier_unregister;
 
 	pm_runtime_mark_last_busy(&pdev->dev);
 	pm_runtime_put_autosuspend(&pdev->dev);
@@ -1162,6 +1177,8 @@ static int i2c_imx_probe(struct platform_device *pdev)
 
 	return 0;   /* Return OK */
 
+clk_notifier_unregister:
+	clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
 rpm_disable:
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
@@ -1195,6 +1212,7 @@ static int i2c_imx_remove(struct platform_device *pdev)
 	imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2CR);
 	imx_i2c_write_reg(0, i2c_imx, IMX_I2C_I2SR);
 
+	clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
 	clk_disable_unprepare(i2c_imx->clk);
 
 	pm_runtime_put_noidle(&pdev->dev);
@@ -1208,7 +1226,7 @@ static int i2c_imx_runtime_suspend(struct device *dev)
 {
 	struct imx_i2c_struct *i2c_imx = dev_get_drvdata(dev);
 
-	clk_disable_unprepare(i2c_imx->clk);
+	clk_disable(i2c_imx->clk);
 
 	return 0;
 }
@@ -1218,7 +1236,7 @@ static int i2c_imx_runtime_resume(struct device *dev)
 	struct imx_i2c_struct *i2c_imx = dev_get_drvdata(dev);
 	int ret;
 
-	ret = clk_prepare_enable(i2c_imx->clk);
+	ret = clk_enable(i2c_imx->clk);
 	if (ret)
 		dev_err(dev, "can't enable I2C clock, ret=%d\n", ret);
 
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 440fe4a96e68..a5a95ea5b81a 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -845,12 +845,16 @@ mv64xxx_of_config(struct mv64xxx_i2c_data *drv_data,
 	 */
 	if (of_device_is_compatible(np, "marvell,mv78230-i2c")) {
 		drv_data->offload_enabled = true;
-		drv_data->errata_delay = true;
+		/* The delay is only needed in standard mode (100kHz) */
+		if (bus_freq <= 100000)
+			drv_data->errata_delay = true;
 	}
 
 	if (of_device_is_compatible(np, "marvell,mv78230-a0-i2c")) {
 		drv_data->offload_enabled = false;
-		drv_data->errata_delay = true;
+		/* The delay is only needed in standard mode (100kHz) */
+		if (bus_freq <= 100000)
+			drv_data->errata_delay = true;
 	}
 
 	if (of_device_is_compatible(np, "allwinner,sun6i-a31-i2c"))
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 853a2abedb05..bc2707ffd409 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -173,33 +173,19 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	i2c->adap.dev.parent = &pdev->dev;
 	i2c->adap.dev.of_node = np;
 
+	i2c->gpio = devm_gpiod_get_optional(&pdev->dev, "reset-gpios", GPIOD_OUT_LOW);
+	if (IS_ERR(i2c->gpio))
+		return PTR_ERR(i2c->gpio);
+
+	i2c->adap.timeout = HZ;
+	ret = device_property_read_u32(&pdev->dev, "clock-frequency",
+				       &i2c->algo_data.i2c_clock);
+	if (ret)
+		i2c->algo_data.i2c_clock = 59000;
+
 	if (platform_data) {
 		i2c->adap.timeout = platform_data->timeout;
 		i2c->algo_data.i2c_clock = platform_data->i2c_clock_speed;
-		if (gpio_is_valid(platform_data->gpio)) {
-			ret = devm_gpio_request_one(&pdev->dev,
-						    platform_data->gpio,
-						    GPIOF_ACTIVE_LOW,
-						    i2c->adap.name);
-			if (ret == 0) {
-				i2c->gpio = gpio_to_desc(platform_data->gpio);
-				gpiod_direction_output(i2c->gpio, 0);
-			} else {
-				dev_warn(&pdev->dev, "Registering gpio failed!\n");
-				i2c->gpio = NULL;
-			}
-		}
-	} else if (np) {
-		i2c->adap.timeout = HZ;
-		i2c->gpio = devm_gpiod_get_optional(&pdev->dev, "reset-gpios", GPIOD_OUT_LOW);
-		if (IS_ERR(i2c->gpio))
-			return PTR_ERR(i2c->gpio);
-		of_property_read_u32_index(np, "clock-frequency", 0,
-					   &i2c->algo_data.i2c_clock);
-	} else {
-		i2c->adap.timeout = HZ;
-		i2c->algo_data.i2c_clock = 59000;
-		i2c->gpio = NULL;
 	}
 
 	i2c->algo_data.data = i2c;
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 462948e2c535..90946a8b9a75 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -40,7 +40,6 @@
 #include <linux/dmi.h>
 #include <linux/acpi.h>
 #include <linux/io.h>
-#include <linux/mutex.h>
 
 
 /* PIIX4 SMBus address offsets */
@@ -153,10 +152,7 @@ static const struct dmi_system_id piix4_dmi_ibm[] = {
 
 /*
  * SB800 globals
- * piix4_mutex_sb800 protects piix4_port_sel_sb800 and the pair
- * of I/O ports at SB800_PIIX4_SMB_IDX.
  */
-static DEFINE_MUTEX(piix4_mutex_sb800);
 static u8 piix4_port_sel_sb800;
 static u8 piix4_port_mask_sb800;
 static u8 piix4_port_shift_sb800;
@@ -298,12 +294,19 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
 	else
 		smb_en = (aux) ? 0x28 : 0x2c;
 
-	mutex_lock(&piix4_mutex_sb800);
+	if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, "sb800_piix4_smb")) {
+		dev_err(&PIIX4_dev->dev,
+			"SMB base address index region 0x%x already in use.\n",
+			SB800_PIIX4_SMB_IDX);
+		return -EBUSY;
+	}
+
 	outb_p(smb_en, SB800_PIIX4_SMB_IDX);
 	smba_en_lo = inb_p(SB800_PIIX4_SMB_IDX + 1);
 	outb_p(smb_en + 1, SB800_PIIX4_SMB_IDX);
 	smba_en_hi = inb_p(SB800_PIIX4_SMB_IDX + 1);
-	mutex_unlock(&piix4_mutex_sb800);
+
+	release_region(SB800_PIIX4_SMB_IDX, 2);
 
 	if (!smb_en) {
 		smb_en_status = smba_en_lo & 0x10;
@@ -373,7 +376,12 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
 			break;
 		}
 	} else {
-		mutex_lock(&piix4_mutex_sb800);
+		if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2,
+					  "sb800_piix4_smb")) {
+			release_region(piix4_smba, SMBIOSIZE);
+			return -EBUSY;
+		}
+
 		outb_p(SB800_PIIX4_PORT_IDX_SEL, SB800_PIIX4_SMB_IDX);
 		port_sel = inb_p(SB800_PIIX4_SMB_IDX + 1);
 		piix4_port_sel_sb800 = (port_sel & 0x01) ?
@@ -381,7 +389,7 @@ static int piix4_setup_sb800(struct pci_dev *PIIX4_dev,
 				       SB800_PIIX4_PORT_IDX;
 		piix4_port_mask_sb800 = SB800_PIIX4_PORT_IDX_MASK;
 		piix4_port_shift_sb800 = SB800_PIIX4_PORT_IDX_SHIFT;
-		mutex_unlock(&piix4_mutex_sb800);
+		release_region(SB800_PIIX4_SMB_IDX, 2);
 	}
 
 	dev_info(&PIIX4_dev->dev,
@@ -462,13 +470,13 @@ static int piix4_transaction(struct i2c_adapter *piix4_adapter)
 
 	/* We will always wait for a fraction of a second! (See PIIX4 docs errata) */
 	if (srvrworks_csb5_delay) /* Extra delay for SERVERWORKS_CSB5 */
-		msleep(2);
+		usleep_range(2000, 2100);
 	else
-		msleep(1);
+		usleep_range(250, 500);
 
 	while ((++timeout < MAX_TIMEOUT) &&
 	       ((temp = inb_p(SMBHSTSTS)) & 0x01))
-		msleep(1);
+		usleep_range(250, 500);
 
 	/* If the SMBus is still busy, we give up */
 	if (timeout == MAX_TIMEOUT) {
@@ -679,7 +687,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 	u8 port;
 	int retval;
 
-	mutex_lock(&piix4_mutex_sb800);
+	if (!request_muxed_region(SB800_PIIX4_SMB_IDX, 2, "sb800_piix4_smb"))
+		return -EBUSY;
 
 	/* Request the SMBUS semaphore, avoid conflicts with the IMC */
 	smbslvcnt  = inb_p(SMBSLVCNT);
@@ -695,8 +704,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 	} while (--retries);
 	/* SMBus is still owned by the IMC, we give up */
 	if (!retries) {
-		mutex_unlock(&piix4_mutex_sb800);
-		return -EBUSY;
+		retval = -EBUSY;
+		goto release;
 	}
 
 	/*
@@ -753,8 +762,8 @@ static s32 piix4_access_sb800(struct i2c_adapter *adap, u16 addr,
 	if ((size == I2C_SMBUS_BLOCK_DATA) && adapdata->notify_imc)
 		piix4_imc_wakeup();
 
-	mutex_unlock(&piix4_mutex_sb800);
-
+release:
+	release_region(SB800_PIIX4_SMB_IDX, 2);
 	return retval;
 }
 
@@ -899,13 +908,6 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		bool notify_imc = false;
 		is_sb800 = true;
 
-		if (!request_region(SB800_PIIX4_SMB_IDX, 2, "smba_idx")) {
-			dev_err(&dev->dev,
-			"SMBus base address index region 0x%x already in use!\n",
-			SB800_PIIX4_SMB_IDX);
-			return -EBUSY;
-		}
-
 		if (dev->vendor == PCI_VENDOR_ID_AMD &&
 		    dev->device == PCI_DEVICE_ID_AMD_KERNCZ_SMBUS) {
 			u8 imc;
@@ -922,20 +924,16 @@ static int piix4_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 		/* base address location etc changed in SB800 */
 		retval = piix4_setup_sb800(dev, id, 0);
-		if (retval < 0) {
-			release_region(SB800_PIIX4_SMB_IDX, 2);
+		if (retval < 0)
 			return retval;
-		}
 
 		/*
 		 * Try to register multiplexed main SMBus adapter,
 		 * give up if we can't
 		 */
 		retval = piix4_add_adapters_sb800(dev, retval, notify_imc);
-		if (retval < 0) {
-			release_region(SB800_PIIX4_SMB_IDX, 2);
+		if (retval < 0)
 			return retval;
-		}
 	} else {
 		retval = piix4_setup(dev, id);
 		if (retval < 0)
@@ -983,11 +981,8 @@ static void piix4_adap_remove(struct i2c_adapter *adap)
 
 	if (adapdata->smba) {
 		i2c_del_adapter(adap);
-		if (adapdata->port == (0 << piix4_port_shift_sb800)) {
+		if (adapdata->port == (0 << piix4_port_shift_sb800))
 			release_region(adapdata->smba, SMBIOSIZE);
-			if (adapdata->sb800_main)
-				release_region(SB800_PIIX4_SMB_IDX, 2);
-		}
 		kfree(adapdata);
 		kfree(adap);
 	}
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 08f8e0107642..904dfec7ab96 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2009-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2009-2013, 2016-2018, The Linux Foundation. All rights reserved.
  * Copyright (c) 2014, Sony Mobile Communications AB.
  *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/acpi.h>
@@ -73,8 +64,11 @@
 #define QUP_IN_SVC_FLAG		BIT(9)
 #define QUP_MX_OUTPUT_DONE	BIT(10)
 #define QUP_MX_INPUT_DONE	BIT(11)
+#define OUT_BLOCK_WRITE_REQ	BIT(12)
+#define IN_BLOCK_READ_REQ	BIT(13)
 
 /* I2C mini core related values */
+#define QUP_NO_INPUT		BIT(7)
 #define QUP_CLOCK_AUTO_GATE	BIT(13)
 #define I2C_MINI_CORE		(2 << 8)
 #define I2C_N_VAL		15
@@ -113,6 +107,7 @@
 #define QUP_TAG_V2_DATAWR              0x82
 #define QUP_TAG_V2_DATAWR_STOP         0x83
 #define QUP_TAG_V2_DATARD              0x85
+#define QUP_TAG_V2_DATARD_NACK         0x86
 #define QUP_TAG_V2_DATARD_STOP         0x87
 
 /* Status, Error flags */
@@ -127,23 +122,87 @@
 #define ONE_BYTE			0x1
 #define QUP_I2C_MX_CONFIG_DURING_RUN   BIT(31)
 
+/* Maximum transfer length for single DMA descriptor */
 #define MX_TX_RX_LEN			SZ_64K
 #define MX_BLOCKS			(MX_TX_RX_LEN / QUP_READ_LIMIT)
+/* Maximum transfer length for all DMA descriptors */
+#define MX_DMA_TX_RX_LEN		(2 * MX_TX_RX_LEN)
+#define MX_DMA_BLOCKS			(MX_DMA_TX_RX_LEN / QUP_READ_LIMIT)
 
-/* Max timeout in ms for 32k bytes */
-#define TOUT_MAX			300
+/*
+ * Minimum transfer timeout for i2c transfers in seconds. It will be added on
+ * the top of maximum transfer time calculated from i2c bus speed to compensate
+ * the overheads.
+ */
+#define TOUT_MIN			2
 
 /* Default values. Use these if FW query fails */
 #define DEFAULT_CLK_FREQ 100000
 #define DEFAULT_SRC_CLK 20000000
 
+/*
+ * Max tags length (start, stop and maximum 2 bytes address) for each QUP
+ * data transfer
+ */
+#define QUP_MAX_TAGS_LEN		4
+/* Max data length for each DATARD tags */
+#define RECV_MAX_DATA_LEN		254
+/* TAG length for DATA READ in RX FIFO  */
+#define READ_RX_TAGS_LEN		2
+
+/*
+ * count: no of blocks
+ * pos: current block number
+ * tx_tag_len: tx tag length for current block
+ * rx_tag_len: rx tag length for current block
+ * data_len: remaining data length for current message
+ * cur_blk_len: data length for current block
+ * total_tx_len: total tx length including tag bytes for current QUP transfer
+ * total_rx_len: total rx length including tag bytes for current QUP transfer
+ * tx_fifo_data_pos: current byte number in TX FIFO word
+ * tx_fifo_free: number of free bytes in current QUP block write.
+ * rx_fifo_data_pos: current byte number in RX FIFO word
+ * fifo_available: number of available bytes in RX FIFO for current
+ *		   QUP block read
+ * tx_fifo_data: QUP TX FIFO write works on word basis (4 bytes). New byte write
+ *		 to TX FIFO will be appended in this data and will be written to
+ *		 TX FIFO when all the 4 bytes are available.
+ * rx_fifo_data: QUP RX FIFO read works on word basis (4 bytes). This will
+ *		 contains the 4 bytes of RX data.
+ * cur_data: pointer to tell cur data position for current message
+ * cur_tx_tags: pointer to tell cur position in tags
+ * tx_tags_sent: all tx tag bytes have been written in FIFO word
+ * send_last_word: for tx FIFO, last word send is pending in current block
+ * rx_bytes_read: if all the bytes have been read from rx FIFO.
+ * rx_tags_fetched: all the rx tag bytes have been fetched from rx fifo word
+ * is_tx_blk_mode: whether tx uses block or FIFO mode in case of non BAM xfer.
+ * is_rx_blk_mode: whether rx uses block or FIFO mode in case of non BAM xfer.
+ * tags: contains tx tag bytes for current QUP transfer
+ */
 struct qup_i2c_block {
-	int	count;
-	int	pos;
-	int	tx_tag_len;
-	int	rx_tag_len;
-	int	data_len;
-	u8	tags[6];
+	int		count;
+	int		pos;
+	int		tx_tag_len;
+	int		rx_tag_len;
+	int		data_len;
+	int		cur_blk_len;
+	int		total_tx_len;
+	int		total_rx_len;
+	int		tx_fifo_data_pos;
+	int		tx_fifo_free;
+	int		rx_fifo_data_pos;
+	int		fifo_available;
+	u32		tx_fifo_data;
+	u32		rx_fifo_data;
+	u8		*cur_data;
+	u8		*cur_tx_tags;
+	bool		tx_tags_sent;
+	bool		send_last_word;
+	bool		rx_tags_fetched;
+	bool		rx_bytes_read;
+	bool		is_tx_blk_mode;
+	bool		is_rx_blk_mode;
+	u8		tags[6];
 };
 
 struct qup_i2c_tag {
@@ -155,6 +214,7 @@ struct qup_i2c_bam {
 	struct	qup_i2c_tag tag;
 	struct	dma_chan *dma;
 	struct	scatterlist *sg;
+	unsigned int sg_cnt;
 };
 
 struct qup_i2c_dev {
@@ -171,7 +231,9 @@ struct qup_i2c_dev {
 	int			out_blk_sz;
 	int			in_blk_sz;
 
+	int			blk_xfer_limit;
 	unsigned long		one_byte_t;
+	unsigned long		xfer_timeout;
 	struct qup_i2c_block	blk;
 
 	struct i2c_msg		*msg;
@@ -184,23 +246,37 @@ struct qup_i2c_dev {
 
 	/* To check if this is the last msg */
 	bool			is_last;
+	bool			is_smbus_read;
 
 	/* To configure when bus is in run state */
-	int			config_run;
+	u32			config_run;
 
 	/* dma parameters */
 	bool			is_dma;
+	/* To check if the current transfer is using DMA */
+	bool			use_dma;
+	unsigned int		max_xfer_sg_len;
+	unsigned int		tag_buf_pos;
+	/* The threshold length above which block mode will be used */
+	unsigned int		blk_mode_threshold;
 	struct			dma_pool *dpool;
 	struct			qup_i2c_tag start_tag;
 	struct			qup_i2c_bam brx;
 	struct			qup_i2c_bam btx;
 
 	struct completion	xfer;
+	/* function to write data in tx fifo */
+	void (*write_tx_fifo)(struct qup_i2c_dev *qup);
+	/* function to read data from rx fifo */
+	void (*read_rx_fifo)(struct qup_i2c_dev *qup);
+	/* function to write tags in tx fifo for i2c read transfer */
+	void (*write_rx_tags)(struct qup_i2c_dev *qup);
 };
 
 static irqreturn_t qup_i2c_interrupt(int irq, void *dev)
 {
 	struct qup_i2c_dev *qup = dev;
+	struct qup_i2c_block *blk = &qup->blk;
 	u32 bus_err;
 	u32 qup_err;
 	u32 opflags;
@@ -226,17 +302,65 @@ static irqreturn_t qup_i2c_interrupt(int irq, void *dev)
 	if (bus_err)
 		writel(bus_err, qup->base + QUP_I2C_STATUS);
 
+	/*
+	 * Check for BAM mode and returns if already error has come for current
+	 * transfer. In Error case, sometimes, QUP generates more than one
+	 * interrupt.
+	 */
+	if (qup->use_dma && (qup->qup_err || qup->bus_err))
+		return IRQ_HANDLED;
+
 	/* Reset the QUP State in case of error */
 	if (qup_err || bus_err) {
-		writel(QUP_RESET_STATE, qup->base + QUP_STATE);
+		/*
+		 * Don’t reset the QUP state in case of BAM mode. The BAM
+		 * flush operation needs to be scheduled in transfer function
+		 * which will clear the remaining schedule descriptors in BAM
+		 * HW FIFO and generates the BAM interrupt.
+		 */
+		if (!qup->use_dma)
+			writel(QUP_RESET_STATE, qup->base + QUP_STATE);
 		goto done;
 	}
 
-	if (opflags & QUP_IN_SVC_FLAG)
+	if (opflags & QUP_OUT_SVC_FLAG) {
+		writel(QUP_OUT_SVC_FLAG, qup->base + QUP_OPERATIONAL);
+
+		if (opflags & OUT_BLOCK_WRITE_REQ) {
+			blk->tx_fifo_free += qup->out_blk_sz;
+			if (qup->msg->flags & I2C_M_RD)
+				qup->write_rx_tags(qup);
+			else
+				qup->write_tx_fifo(qup);
+		}
+	}
+
+	if (opflags & QUP_IN_SVC_FLAG) {
 		writel(QUP_IN_SVC_FLAG, qup->base + QUP_OPERATIONAL);
 
-	if (opflags & QUP_OUT_SVC_FLAG)
-		writel(QUP_OUT_SVC_FLAG, qup->base + QUP_OPERATIONAL);
+		if (!blk->is_rx_blk_mode) {
+			blk->fifo_available += qup->in_fifo_sz;
+			qup->read_rx_fifo(qup);
+		} else if (opflags & IN_BLOCK_READ_REQ) {
+			blk->fifo_available += qup->in_blk_sz;
+			qup->read_rx_fifo(qup);
+		}
+	}
+
+	if (qup->msg->flags & I2C_M_RD) {
+		if (!blk->rx_bytes_read)
+			return IRQ_HANDLED;
+	} else {
+		/*
+		 * Ideally, QUP_MAX_OUTPUT_DONE_FLAG should be checked
+		 * for FIFO mode also. But, QUP_MAX_OUTPUT_DONE_FLAG lags
+		 * behind QUP_OUTPUT_SERVICE_FLAG sometimes. The only reason
+		 * of interrupt for write message in FIFO mode is
+		 * QUP_MAX_OUTPUT_DONE_FLAG condition.
+		 */
+		if (blk->is_tx_blk_mode && !(opflags & QUP_MX_OUTPUT_DONE))
+			return IRQ_HANDLED;
+	}
 
 done:
 	qup->qup_err = qup_err;
@@ -303,147 +427,47 @@ static int qup_i2c_change_state(struct qup_i2c_dev *qup, u32 state)
 	return 0;
 }
 
-/**
- * qup_i2c_wait_ready - wait for a give number of bytes in tx/rx path
- * @qup: The qup_i2c_dev device
- * @op: The bit/event to wait on
- * @val: value of the bit to wait on, 0 or 1
- * @len: The length the bytes to be transferred
- */
-static int qup_i2c_wait_ready(struct qup_i2c_dev *qup, int op, bool val,
-			      int len)
+/* Check if I2C bus returns to IDLE state */
+static int qup_i2c_bus_active(struct qup_i2c_dev *qup, int len)
 {
 	unsigned long timeout;
-	u32 opflags;
 	u32 status;
-	u32 shift = __ffs(op);
 	int ret = 0;
 
-	len *= qup->one_byte_t;
-	/* timeout after a wait of twice the max time */
 	timeout = jiffies + len * 4;
-
 	for (;;) {
-		opflags = readl(qup->base + QUP_OPERATIONAL);
 		status = readl(qup->base + QUP_I2C_STATUS);
+		if (!(status & I2C_STATUS_BUS_ACTIVE))
+			break;
 
-		if (((opflags & op) >> shift) == val) {
-			if ((op == QUP_OUT_NOT_EMPTY) && qup->is_last) {
-				if (!(status & I2C_STATUS_BUS_ACTIVE)) {
-					ret = 0;
-					goto done;
-				}
-			} else {
-				ret = 0;
-				goto done;
-			}
-		}
-
-		if (time_after(jiffies, timeout)) {
+		if (time_after(jiffies, timeout))
 			ret = -ETIMEDOUT;
-			goto done;
-		}
-		usleep_range(len, len * 2);
-	}
-
-done:
-	if (qup->bus_err || qup->qup_err)
-		ret =  (qup->bus_err & QUP_I2C_NACK_FLAG) ? -ENXIO : -EIO;
-
-	return ret;
-}
-
-static void qup_i2c_set_write_mode_v2(struct qup_i2c_dev *qup,
-				      struct i2c_msg *msg)
-{
-	/* Number of entries to shift out, including the tags */
-	int total = msg->len + qup->blk.tx_tag_len;
-
-	total |= qup->config_run;
-
-	if (total < qup->out_fifo_sz) {
-		/* FIFO mode */
-		writel(QUP_REPACK_EN, qup->base + QUP_IO_MODE);
-		writel(total, qup->base + QUP_MX_WRITE_CNT);
-	} else {
-		/* BLOCK mode (transfer data on chunks) */
-		writel(QUP_OUTPUT_BLK_MODE | QUP_REPACK_EN,
-		       qup->base + QUP_IO_MODE);
-		writel(total, qup->base + QUP_MX_OUTPUT_CNT);
-	}
-}
-
-static void qup_i2c_set_write_mode(struct qup_i2c_dev *qup, struct i2c_msg *msg)
-{
-	/* Number of entries to shift out, including the start */
-	int total = msg->len + 1;
-
-	if (total < qup->out_fifo_sz) {
-		/* FIFO mode */
-		writel(QUP_REPACK_EN, qup->base + QUP_IO_MODE);
-		writel(total, qup->base + QUP_MX_WRITE_CNT);
-	} else {
-		/* BLOCK mode (transfer data on chunks) */
-		writel(QUP_OUTPUT_BLK_MODE | QUP_REPACK_EN,
-		       qup->base + QUP_IO_MODE);
-		writel(total, qup->base + QUP_MX_OUTPUT_CNT);
-	}
-}
-
-static int check_for_fifo_space(struct qup_i2c_dev *qup)
-{
-	int ret;
-
-	ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
-	if (ret)
-		goto out;
-
-	ret = qup_i2c_wait_ready(qup, QUP_OUT_FULL,
-				 RESET_BIT, 4 * ONE_BYTE);
-	if (ret) {
-		/* Fifo is full. Drain out the fifo */
-		ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
-		if (ret)
-			goto out;
 
-		ret = qup_i2c_wait_ready(qup, QUP_OUT_NOT_EMPTY,
-					 RESET_BIT, 256 * ONE_BYTE);
-		if (ret) {
-			dev_err(qup->dev, "timeout for fifo out full");
-			goto out;
-		}
-
-		ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
-		if (ret)
-			goto out;
+		usleep_range(len, len * 2);
 	}
 
-out:
 	return ret;
 }
 
-static int qup_i2c_issue_write(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+static void qup_i2c_write_tx_fifo_v1(struct qup_i2c_dev *qup)
 {
+	struct qup_i2c_block *blk = &qup->blk;
+	struct i2c_msg *msg = qup->msg;
 	u32 addr = msg->addr << 1;
 	u32 qup_tag;
 	int idx;
 	u32 val;
-	int ret = 0;
 
 	if (qup->pos == 0) {
 		val = QUP_TAG_START | addr;
 		idx = 1;
+		blk->tx_fifo_free--;
 	} else {
 		val = 0;
 		idx = 0;
 	}
 
-	while (qup->pos < msg->len) {
-		/* Check that there's space in the FIFO for our pair */
-		ret = check_for_fifo_space(qup);
-		if (ret)
-			return ret;
-
+	while (blk->tx_fifo_free && qup->pos < msg->len) {
 		if (qup->pos == msg->len - 1)
 			qup_tag = QUP_TAG_STOP;
 		else
@@ -460,70 +484,24 @@ static int qup_i2c_issue_write(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 
 		qup->pos++;
 		idx++;
+		blk->tx_fifo_free--;
 	}
-
-	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
-
-	return ret;
 }
 
 static void qup_i2c_set_blk_data(struct qup_i2c_dev *qup,
 				 struct i2c_msg *msg)
 {
-	memset(&qup->blk, 0, sizeof(qup->blk));
-
+	qup->blk.pos = 0;
 	qup->blk.data_len = msg->len;
-	qup->blk.count = (msg->len + QUP_READ_LIMIT - 1) / QUP_READ_LIMIT;
-
-	/* 4 bytes for first block and 2 writes for rest */
-	qup->blk.tx_tag_len = 4 + (qup->blk.count - 1) * 2;
-
-	/* There are 2 tag bytes that are read in to fifo for every block */
-	if (msg->flags & I2C_M_RD)
-		qup->blk.rx_tag_len = qup->blk.count * 2;
-}
-
-static int qup_i2c_send_data(struct qup_i2c_dev *qup, int tlen, u8 *tbuf,
-			     int dlen, u8 *dbuf)
-{
-	u32 val = 0, idx = 0, pos = 0, i = 0, t;
-	int  len = tlen + dlen;
-	u8 *buf = tbuf;
-	int ret = 0;
-
-	while (len > 0) {
-		ret = check_for_fifo_space(qup);
-		if (ret)
-			return ret;
-
-		t = (len >= 4) ? 4 : len;
-
-		while (idx < t) {
-			if (!i && (pos >= tlen)) {
-				buf = dbuf;
-				pos = 0;
-				i = 1;
-			}
-			val |= buf[pos++] << (idx++ * 8);
-		}
-
-		writel(val, qup->base + QUP_OUT_FIFO_BASE);
-		idx  = 0;
-		val = 0;
-		len -= 4;
-	}
-
-	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
-
-	return ret;
+	qup->blk.count = DIV_ROUND_UP(msg->len, qup->blk_xfer_limit);
 }
 
 static int qup_i2c_get_data_len(struct qup_i2c_dev *qup)
 {
 	int data_len;
 
-	if (qup->blk.data_len > QUP_READ_LIMIT)
-		data_len = QUP_READ_LIMIT;
+	if (qup->blk.data_len > qup->blk_xfer_limit)
+		data_len = qup->blk_xfer_limit;
 	else
 		data_len = qup->blk.data_len;
 
@@ -540,9 +518,9 @@ static int qup_i2c_set_tags_smb(u16 addr, u8 *tags, struct qup_i2c_dev *qup,
 {
 	int len = 0;
 
-	if (msg->len > 1) {
+	if (qup->is_smbus_read) {
 		tags[len++] = QUP_TAG_V2_DATARD_STOP;
-		tags[len++] = qup_i2c_get_data_len(qup) - 1;
+		tags[len++] = qup_i2c_get_data_len(qup);
 	} else {
 		tags[len++] = QUP_TAG_V2_START;
 		tags[len++] = addr & 0xff;
@@ -558,7 +536,7 @@ static int qup_i2c_set_tags_smb(u16 addr, u8 *tags, struct qup_i2c_dev *qup,
 }
 
 static int qup_i2c_set_tags(u8 *tags, struct qup_i2c_dev *qup,
-			    struct i2c_msg *msg,  int is_dma)
+			    struct i2c_msg *msg)
 {
 	u16 addr = i2c_8bit_addr_from_msg(msg);
 	int len = 0;
@@ -586,7 +564,9 @@ static int qup_i2c_set_tags(u8 *tags, struct qup_i2c_dev *qup,
 			tags[len++] = QUP_TAG_V2_DATAWR_STOP;
 	} else {
 		if (msg->flags & I2C_M_RD)
-			tags[len++] = QUP_TAG_V2_DATARD;
+			tags[len++] = qup->blk.pos == (qup->blk.count - 1) ?
+				      QUP_TAG_V2_DATARD_NACK :
+				      QUP_TAG_V2_DATARD;
 		else
 			tags[len++] = QUP_TAG_V2_DATAWR;
 	}
@@ -599,32 +579,9 @@ static int qup_i2c_set_tags(u8 *tags, struct qup_i2c_dev *qup,
 	else
 		tags[len++] = data_len;
 
-	if ((msg->flags & I2C_M_RD) && last && is_dma) {
-		tags[len++] = QUP_BAM_INPUT_EOT;
-		tags[len++] = QUP_BAM_FLUSH_STOP;
-	}
-
 	return len;
 }
 
-static int qup_i2c_issue_xfer_v2(struct qup_i2c_dev *qup, struct i2c_msg *msg)
-{
-	int data_len = 0, tag_len, index;
-	int ret;
-
-	tag_len = qup_i2c_set_tags(qup->blk.tags, qup, msg, 0);
-	index = msg->len - qup->blk.data_len;
-
-	/* only tags are written for read */
-	if (!(msg->flags & I2C_M_RD))
-		data_len = qup_i2c_get_data_len(qup);
-
-	ret = qup_i2c_send_data(qup, tag_len, qup->blk.tags,
-				data_len, &msg->buf[index]);
-	qup->blk.data_len -= data_len;
-
-	return ret;
-}
 
 static void qup_i2c_bam_cb(void *data)
 {
@@ -684,115 +641,109 @@ static int qup_i2c_req_dma(struct qup_i2c_dev *qup)
 	return 0;
 }
 
-static int qup_i2c_bam_do_xfer(struct qup_i2c_dev *qup, struct i2c_msg *msg,
-			       int num)
+static int qup_i2c_bam_make_desc(struct qup_i2c_dev *qup, struct i2c_msg *msg)
 {
-	struct dma_async_tx_descriptor *txd, *rxd = NULL;
-	int ret = 0, idx = 0, limit = QUP_READ_LIMIT;
-	dma_cookie_t cookie_rx, cookie_tx;
-	u32 rx_nents = 0, tx_nents = 0, len, blocks, rem;
-	u32 i, tlen, tx_len, tx_buf = 0, rx_buf = 0, off = 0;
+	int ret = 0, limit = QUP_READ_LIMIT;
+	u32 len = 0, blocks, rem;
+	u32 i = 0, tlen, tx_len = 0;
 	u8 *tags;
 
-	while (idx < num) {
-		tx_len = 0, len = 0, i = 0;
-
-		qup->is_last = (idx == (num - 1));
-
-		qup_i2c_set_blk_data(qup, msg);
+	qup->blk_xfer_limit = QUP_READ_LIMIT;
+	qup_i2c_set_blk_data(qup, msg);
 
-		blocks = qup->blk.count;
-		rem = msg->len - (blocks - 1) * limit;
+	blocks = qup->blk.count;
+	rem = msg->len - (blocks - 1) * limit;
 
-		if (msg->flags & I2C_M_RD) {
-			rx_nents += (blocks * 2) + 1;
-			tx_nents += 1;
+	if (msg->flags & I2C_M_RD) {
+		while (qup->blk.pos < blocks) {
+			tlen = (i == (blocks - 1)) ? rem : limit;
+			tags = &qup->start_tag.start[qup->tag_buf_pos + len];
+			len += qup_i2c_set_tags(tags, qup, msg);
+			qup->blk.data_len -= tlen;
 
-			while (qup->blk.pos < blocks) {
-				tlen = (i == (blocks - 1)) ? rem : limit;
-				tags = &qup->start_tag.start[off + len];
-				len += qup_i2c_set_tags(tags, qup, msg, 1);
-				qup->blk.data_len -= tlen;
+			/* scratch buf to read the start and len tags */
+			ret = qup_sg_set_buf(&qup->brx.sg[qup->brx.sg_cnt++],
+					     &qup->brx.tag.start[0],
+					     2, qup, DMA_FROM_DEVICE);
 
-				/* scratch buf to read the start and len tags */
-				ret = qup_sg_set_buf(&qup->brx.sg[rx_buf++],
-						     &qup->brx.tag.start[0],
-						     2, qup, DMA_FROM_DEVICE);
+			if (ret)
+				return ret;
 
-				if (ret)
-					return ret;
+			ret = qup_sg_set_buf(&qup->brx.sg[qup->brx.sg_cnt++],
+					     &msg->buf[limit * i],
+					     tlen, qup,
+					     DMA_FROM_DEVICE);
+			if (ret)
+				return ret;
 
-				ret = qup_sg_set_buf(&qup->brx.sg[rx_buf++],
-						     &msg->buf[limit * i],
-						     tlen, qup,
-						     DMA_FROM_DEVICE);
-				if (ret)
-					return ret;
+			i++;
+			qup->blk.pos = i;
+		}
+		ret = qup_sg_set_buf(&qup->btx.sg[qup->btx.sg_cnt++],
+				     &qup->start_tag.start[qup->tag_buf_pos],
+				     len, qup, DMA_TO_DEVICE);
+		if (ret)
+			return ret;
 
-				i++;
-				qup->blk.pos = i;
-			}
-			ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
-					     &qup->start_tag.start[off],
-					     len, qup, DMA_TO_DEVICE);
+		qup->tag_buf_pos += len;
+	} else {
+		while (qup->blk.pos < blocks) {
+			tlen = (i == (blocks - 1)) ? rem : limit;
+			tags = &qup->start_tag.start[qup->tag_buf_pos + tx_len];
+			len = qup_i2c_set_tags(tags, qup, msg);
+			qup->blk.data_len -= tlen;
+
+			ret = qup_sg_set_buf(&qup->btx.sg[qup->btx.sg_cnt++],
+					     tags, len,
+					     qup, DMA_TO_DEVICE);
 			if (ret)
 				return ret;
 
-			off += len;
-			/* scratch buf to read the BAM EOT and FLUSH tags */
-			ret = qup_sg_set_buf(&qup->brx.sg[rx_buf++],
-					     &qup->brx.tag.start[0],
-					     2, qup, DMA_FROM_DEVICE);
+			tx_len += len;
+			ret = qup_sg_set_buf(&qup->btx.sg[qup->btx.sg_cnt++],
+					     &msg->buf[limit * i],
+					     tlen, qup, DMA_TO_DEVICE);
 			if (ret)
 				return ret;
-		} else {
-			tx_nents += (blocks * 2);
-
-			while (qup->blk.pos < blocks) {
-				tlen = (i == (blocks - 1)) ? rem : limit;
-				tags = &qup->start_tag.start[off + tx_len];
-				len = qup_i2c_set_tags(tags, qup, msg, 1);
-				qup->blk.data_len -= tlen;
-
-				ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
-						     tags, len,
-						     qup, DMA_TO_DEVICE);
-				if (ret)
-					return ret;
-
-				tx_len += len;
-				ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
-						     &msg->buf[limit * i],
-						     tlen, qup, DMA_TO_DEVICE);
-				if (ret)
-					return ret;
-				i++;
-				qup->blk.pos = i;
-			}
-			off += tx_len;
-
-			if (idx == (num - 1)) {
-				len = 1;
-				if (rx_nents) {
-					qup->btx.tag.start[0] =
-							QUP_BAM_INPUT_EOT;
-					len++;
-				}
-				qup->btx.tag.start[len - 1] =
-							QUP_BAM_FLUSH_STOP;
-				ret = qup_sg_set_buf(&qup->btx.sg[tx_buf++],
-						     &qup->btx.tag.start[0],
-						     len, qup, DMA_TO_DEVICE);
-				if (ret)
-					return ret;
-				tx_nents += 1;
-			}
+			i++;
+			qup->blk.pos = i;
 		}
-		idx++;
-		msg++;
+
+		qup->tag_buf_pos += tx_len;
 	}
 
-	txd = dmaengine_prep_slave_sg(qup->btx.dma, qup->btx.sg, tx_nents,
+	return 0;
+}
+
+static int qup_i2c_bam_schedule_desc(struct qup_i2c_dev *qup)
+{
+	struct dma_async_tx_descriptor *txd, *rxd = NULL;
+	int ret = 0;
+	dma_cookie_t cookie_rx, cookie_tx;
+	u32 len = 0;
+	u32 tx_cnt = qup->btx.sg_cnt, rx_cnt = qup->brx.sg_cnt;
+
+	/* schedule the EOT and FLUSH I2C tags */
+	len = 1;
+	if (rx_cnt) {
+		qup->btx.tag.start[0] = QUP_BAM_INPUT_EOT;
+		len++;
+
+		/* scratch buf to read the BAM EOT FLUSH tags */
+		ret = qup_sg_set_buf(&qup->brx.sg[rx_cnt++],
+				     &qup->brx.tag.start[0],
+				     1, qup, DMA_FROM_DEVICE);
+		if (ret)
+			return ret;
+	}
+
+	qup->btx.tag.start[len - 1] = QUP_BAM_FLUSH_STOP;
+	ret = qup_sg_set_buf(&qup->btx.sg[tx_cnt++], &qup->btx.tag.start[0],
+			     len, qup, DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	txd = dmaengine_prep_slave_sg(qup->btx.dma, qup->btx.sg, tx_cnt,
 				      DMA_MEM_TO_DEV,
 				      DMA_PREP_INTERRUPT | DMA_PREP_FENCE);
 	if (!txd) {
@@ -801,7 +752,7 @@ static int qup_i2c_bam_do_xfer(struct qup_i2c_dev *qup, struct i2c_msg *msg,
 		goto desc_err;
 	}
 
-	if (!rx_nents) {
+	if (!rx_cnt) {
 		txd->callback = qup_i2c_bam_cb;
 		txd->callback_param = qup;
 	}
@@ -814,9 +765,9 @@ static int qup_i2c_bam_do_xfer(struct qup_i2c_dev *qup, struct i2c_msg *msg,
 
 	dma_async_issue_pending(qup->btx.dma);
 
-	if (rx_nents) {
+	if (rx_cnt) {
 		rxd = dmaengine_prep_slave_sg(qup->brx.dma, qup->brx.sg,
-					      rx_nents, DMA_DEV_TO_MEM,
+					      rx_cnt, DMA_DEV_TO_MEM,
 					      DMA_PREP_INTERRUPT);
 		if (!rxd) {
 			dev_err(qup->dev, "failed to get rx desc\n");
@@ -838,49 +789,51 @@ static int qup_i2c_bam_do_xfer(struct qup_i2c_dev *qup, struct i2c_msg *msg,
 		dma_async_issue_pending(qup->brx.dma);
 	}
 
-	if (!wait_for_completion_timeout(&qup->xfer, TOUT_MAX * HZ)) {
+	if (!wait_for_completion_timeout(&qup->xfer, qup->xfer_timeout)) {
 		dev_err(qup->dev, "normal trans timed out\n");
 		ret = -ETIMEDOUT;
 	}
 
 	if (ret || qup->bus_err || qup->qup_err) {
+		reinit_completion(&qup->xfer);
+
 		if (qup_i2c_change_state(qup, QUP_RUN_STATE)) {
 			dev_err(qup->dev, "change to run state timed out");
 			goto desc_err;
 		}
 
-		if (rx_nents)
-			writel(QUP_BAM_INPUT_EOT,
-			       qup->base + QUP_OUT_FIFO_BASE);
-
-		writel(QUP_BAM_FLUSH_STOP, qup->base + QUP_OUT_FIFO_BASE);
-
 		qup_i2c_flush(qup);
 
 		/* wait for remaining interrupts to occur */
 		if (!wait_for_completion_timeout(&qup->xfer, HZ))
 			dev_err(qup->dev, "flush timed out\n");
 
-		qup_i2c_rel_dma(qup);
-
 		ret =  (qup->bus_err & QUP_I2C_NACK_FLAG) ? -ENXIO : -EIO;
 	}
 
 desc_err:
-	dma_unmap_sg(qup->dev, qup->btx.sg, tx_nents, DMA_TO_DEVICE);
+	dma_unmap_sg(qup->dev, qup->btx.sg, tx_cnt, DMA_TO_DEVICE);
 
-	if (rx_nents)
-		dma_unmap_sg(qup->dev, qup->brx.sg, rx_nents,
+	if (rx_cnt)
+		dma_unmap_sg(qup->dev, qup->brx.sg, rx_cnt,
 			     DMA_FROM_DEVICE);
 
 	return ret;
 }
 
+static void qup_i2c_bam_clear_tag_buffers(struct qup_i2c_dev *qup)
+{
+	qup->btx.sg_cnt = 0;
+	qup->brx.sg_cnt = 0;
+	qup->tag_buf_pos = 0;
+}
+
 static int qup_i2c_bam_xfer(struct i2c_adapter *adap, struct i2c_msg *msg,
 			    int num)
 {
 	struct qup_i2c_dev *qup = i2c_get_adapdata(adap);
 	int ret = 0;
+	int idx = 0;
 
 	enable_irq(qup->irq);
 	ret = qup_i2c_req_dma(qup);
@@ -903,9 +856,34 @@ static int qup_i2c_bam_xfer(struct i2c_adapter *adap, struct i2c_msg *msg,
 		goto out;
 
 	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
+	qup_i2c_bam_clear_tag_buffers(qup);
+
+	for (idx = 0; idx < num; idx++) {
+		qup->msg = msg + idx;
+		qup->is_last = idx == (num - 1);
+
+		ret = qup_i2c_bam_make_desc(qup, qup->msg);
+		if (ret)
+			break;
+
+		/*
+		 * Make DMA descriptor and schedule the BAM transfer if its
+		 * already crossed the maximum length. Since the memory for all
+		 * tags buffers have been taken for 2 maximum possible
+		 * transfers length so it will never cross the buffer actual
+		 * length.
+		 */
+		if (qup->btx.sg_cnt > qup->max_xfer_sg_len ||
+		    qup->brx.sg_cnt > qup->max_xfer_sg_len ||
+		    qup->is_last) {
+			ret = qup_i2c_bam_schedule_desc(qup);
+			if (ret)
+				break;
+
+			qup_i2c_bam_clear_tag_buffers(qup);
+		}
+	}
 
-	qup->msg = msg;
-	ret = qup_i2c_bam_do_xfer(qup, qup->msg, num);
 out:
 	disable_irq(qup->irq);
 
@@ -919,7 +897,7 @@ static int qup_i2c_wait_for_complete(struct qup_i2c_dev *qup,
 	unsigned long left;
 	int ret = 0;
 
-	left = wait_for_completion_timeout(&qup->xfer, HZ);
+	left = wait_for_completion_timeout(&qup->xfer, qup->xfer_timeout);
 	if (!left) {
 		writel(1, qup->base + QUP_SW_RESET);
 		ret = -ETIMEDOUT;
@@ -931,363 +909,635 @@ static int qup_i2c_wait_for_complete(struct qup_i2c_dev *qup,
 	return ret;
 }
 
-static int qup_i2c_write_one_v2(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+static void qup_i2c_read_rx_fifo_v1(struct qup_i2c_dev *qup)
 {
-	int ret = 0;
+	struct qup_i2c_block *blk = &qup->blk;
+	struct i2c_msg *msg = qup->msg;
+	u32 val = 0;
+	int idx = 0;
 
-	qup->msg = msg;
-	qup->pos = 0;
-	enable_irq(qup->irq);
-	qup_i2c_set_blk_data(qup, msg);
-	qup_i2c_set_write_mode_v2(qup, msg);
+	while (blk->fifo_available && qup->pos < msg->len) {
+		if ((idx & 1) == 0) {
+			/* Reading 2 words at time */
+			val = readl(qup->base + QUP_IN_FIFO_BASE);
+			msg->buf[qup->pos++] = val & 0xFF;
+		} else {
+			msg->buf[qup->pos++] = val >> QUP_MSW_SHIFT;
+		}
+		idx++;
+		blk->fifo_available--;
+	}
 
+	if (qup->pos == msg->len)
+		blk->rx_bytes_read = true;
+}
+
+static void qup_i2c_write_rx_tags_v1(struct qup_i2c_dev *qup)
+{
+	struct i2c_msg *msg = qup->msg;
+	u32 addr, len, val;
+
+	addr = i2c_8bit_addr_from_msg(msg);
+
+	/* 0 is used to specify a length 256 (QUP_READ_LIMIT) */
+	len = (msg->len == QUP_READ_LIMIT) ? 0 : msg->len;
+
+	val = ((QUP_TAG_REC | len) << QUP_MSW_SHIFT) | QUP_TAG_START | addr;
+	writel(val, qup->base + QUP_OUT_FIFO_BASE);
+}
+
+static void qup_i2c_conf_v1(struct qup_i2c_dev *qup)
+{
+	struct qup_i2c_block *blk = &qup->blk;
+	u32 qup_config = I2C_MINI_CORE | I2C_N_VAL;
+	u32 io_mode = QUP_REPACK_EN;
+
+	blk->is_tx_blk_mode =
+		blk->total_tx_len > qup->out_fifo_sz ? true : false;
+	blk->is_rx_blk_mode =
+		blk->total_rx_len > qup->in_fifo_sz ? true : false;
+
+	if (blk->is_tx_blk_mode) {
+		io_mode |= QUP_OUTPUT_BLK_MODE;
+		writel(0, qup->base + QUP_MX_WRITE_CNT);
+		writel(blk->total_tx_len, qup->base + QUP_MX_OUTPUT_CNT);
+	} else {
+		writel(0, qup->base + QUP_MX_OUTPUT_CNT);
+		writel(blk->total_tx_len, qup->base + QUP_MX_WRITE_CNT);
+	}
+
+	if (blk->total_rx_len) {
+		if (blk->is_rx_blk_mode) {
+			io_mode |= QUP_INPUT_BLK_MODE;
+			writel(0, qup->base + QUP_MX_READ_CNT);
+			writel(blk->total_rx_len, qup->base + QUP_MX_INPUT_CNT);
+		} else {
+			writel(0, qup->base + QUP_MX_INPUT_CNT);
+			writel(blk->total_rx_len, qup->base + QUP_MX_READ_CNT);
+		}
+	} else {
+		qup_config |= QUP_NO_INPUT;
+	}
+
+	writel(qup_config, qup->base + QUP_CONFIG);
+	writel(io_mode, qup->base + QUP_IO_MODE);
+}
+
+static void qup_i2c_clear_blk_v1(struct qup_i2c_block *blk)
+{
+	blk->tx_fifo_free = 0;
+	blk->fifo_available = 0;
+	blk->rx_bytes_read = false;
+}
+
+static int qup_i2c_conf_xfer_v1(struct qup_i2c_dev *qup, bool is_rx)
+{
+	struct qup_i2c_block *blk = &qup->blk;
+	int ret;
+
+	qup_i2c_clear_blk_v1(blk);
+	qup_i2c_conf_v1(qup);
 	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
 	if (ret)
-		goto err;
+		return ret;
 
 	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
 
-	do {
-		ret = qup_i2c_issue_xfer_v2(qup, msg);
-		if (ret)
-			goto err;
+	ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
+	if (ret)
+		return ret;
 
-		ret = qup_i2c_wait_for_complete(qup, msg);
-		if (ret)
-			goto err;
+	reinit_completion(&qup->xfer);
+	enable_irq(qup->irq);
+	if (!blk->is_tx_blk_mode) {
+		blk->tx_fifo_free = qup->out_fifo_sz;
+
+		if (is_rx)
+			qup_i2c_write_rx_tags_v1(qup);
+		else
+			qup_i2c_write_tx_fifo_v1(qup);
+	}
+
+	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+	if (ret)
+		goto err;
 
-		qup->blk.pos++;
-	} while (qup->blk.pos < qup->blk.count);
+	ret = qup_i2c_wait_for_complete(qup, qup->msg);
+	if (ret)
+		goto err;
 
-	ret = qup_i2c_wait_ready(qup, QUP_OUT_NOT_EMPTY, RESET_BIT, ONE_BYTE);
+	ret = qup_i2c_bus_active(qup, ONE_BYTE);
 
 err:
 	disable_irq(qup->irq);
-	qup->msg = NULL;
-
 	return ret;
 }
 
-static int qup_i2c_write_one(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+static int qup_i2c_write_one(struct qup_i2c_dev *qup)
 {
-	int ret;
+	struct i2c_msg *msg = qup->msg;
+	struct qup_i2c_block *blk = &qup->blk;
 
-	qup->msg = msg;
 	qup->pos = 0;
+	blk->total_tx_len = msg->len + 1;
+	blk->total_rx_len = 0;
 
-	enable_irq(qup->irq);
+	return qup_i2c_conf_xfer_v1(qup, false);
+}
 
-	qup_i2c_set_write_mode(qup, msg);
+static int qup_i2c_read_one(struct qup_i2c_dev *qup)
+{
+	struct qup_i2c_block *blk = &qup->blk;
 
-	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+	qup->pos = 0;
+	blk->total_tx_len = 2;
+	blk->total_rx_len = qup->msg->len;
+
+	return qup_i2c_conf_xfer_v1(qup, true);
+}
+
+static int qup_i2c_xfer(struct i2c_adapter *adap,
+			struct i2c_msg msgs[],
+			int num)
+{
+	struct qup_i2c_dev *qup = i2c_get_adapdata(adap);
+	int ret, idx;
+
+	ret = pm_runtime_get_sync(qup->dev);
+	if (ret < 0)
+		goto out;
+
+	qup->bus_err = 0;
+	qup->qup_err = 0;
+
+	writel(1, qup->base + QUP_SW_RESET);
+	ret = qup_i2c_poll_state(qup, QUP_RESET_STATE);
 	if (ret)
-		goto err;
+		goto out;
 
-	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
+	/* Configure QUP as I2C mini core */
+	writel(I2C_MINI_CORE | I2C_N_VAL, qup->base + QUP_CONFIG);
 
-	do {
-		ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
-		if (ret)
-			goto err;
+	for (idx = 0; idx < num; idx++) {
+		if (msgs[idx].len == 0) {
+			ret = -EINVAL;
+			goto out;
+		}
 
-		ret = qup_i2c_issue_write(qup, msg);
-		if (ret)
-			goto err;
+		if (qup_i2c_poll_state_i2c_master(qup)) {
+			ret = -EIO;
+			goto out;
+		}
+
+		if (qup_i2c_check_msg_len(&msgs[idx])) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		qup->msg = &msgs[idx];
+		if (msgs[idx].flags & I2C_M_RD)
+			ret = qup_i2c_read_one(qup);
+		else
+			ret = qup_i2c_write_one(qup);
 
-		ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
 		if (ret)
-			goto err;
+			break;
 
-		ret = qup_i2c_wait_for_complete(qup, msg);
+		ret = qup_i2c_change_state(qup, QUP_RESET_STATE);
 		if (ret)
-			goto err;
-	} while (qup->pos < msg->len);
+			break;
+	}
 
-	/* Wait for the outstanding data in the fifo to drain */
-	ret = qup_i2c_wait_ready(qup, QUP_OUT_NOT_EMPTY, RESET_BIT, ONE_BYTE);
-err:
-	disable_irq(qup->irq);
-	qup->msg = NULL;
+	if (ret == 0)
+		ret = num;
+out:
+
+	pm_runtime_mark_last_busy(qup->dev);
+	pm_runtime_put_autosuspend(qup->dev);
 
 	return ret;
 }
 
-static void qup_i2c_set_read_mode(struct qup_i2c_dev *qup, int len)
+/*
+ * Configure registers related with reconfiguration during run and call it
+ * before each i2c sub transfer.
+ */
+static void qup_i2c_conf_count_v2(struct qup_i2c_dev *qup)
 {
-	if (len < qup->in_fifo_sz) {
-		/* FIFO mode */
-		writel(QUP_REPACK_EN, qup->base + QUP_IO_MODE);
-		writel(len, qup->base + QUP_MX_READ_CNT);
+	struct qup_i2c_block *blk = &qup->blk;
+	u32 qup_config = I2C_MINI_CORE | I2C_N_VAL_V2;
+
+	if (blk->is_tx_blk_mode)
+		writel(qup->config_run | blk->total_tx_len,
+		       qup->base + QUP_MX_OUTPUT_CNT);
+	else
+		writel(qup->config_run | blk->total_tx_len,
+		       qup->base + QUP_MX_WRITE_CNT);
+
+	if (blk->total_rx_len) {
+		if (blk->is_rx_blk_mode)
+			writel(qup->config_run | blk->total_rx_len,
+			       qup->base + QUP_MX_INPUT_CNT);
+		else
+			writel(qup->config_run | blk->total_rx_len,
+			       qup->base + QUP_MX_READ_CNT);
 	} else {
-		/* BLOCK mode (transfer data on chunks) */
-		writel(QUP_INPUT_BLK_MODE | QUP_REPACK_EN,
-		       qup->base + QUP_IO_MODE);
-		writel(len, qup->base + QUP_MX_INPUT_CNT);
+		qup_config |= QUP_NO_INPUT;
 	}
+
+	writel(qup_config, qup->base + QUP_CONFIG);
 }
 
-static void qup_i2c_set_read_mode_v2(struct qup_i2c_dev *qup, int len)
+/*
+ * Configure registers related with transfer mode (FIFO/Block)
+ * before starting of i2c transfer. It will be called only once in
+ * QUP RESET state.
+ */
+static void qup_i2c_conf_mode_v2(struct qup_i2c_dev *qup)
 {
-	int tx_len = qup->blk.tx_tag_len;
+	struct qup_i2c_block *blk = &qup->blk;
+	u32 io_mode = QUP_REPACK_EN;
 
-	len += qup->blk.rx_tag_len;
-	len |= qup->config_run;
-	tx_len |= qup->config_run;
+	if (blk->is_tx_blk_mode) {
+		io_mode |= QUP_OUTPUT_BLK_MODE;
+		writel(0, qup->base + QUP_MX_WRITE_CNT);
+	} else {
+		writel(0, qup->base + QUP_MX_OUTPUT_CNT);
+	}
 
-	if (len < qup->in_fifo_sz) {
-		/* FIFO mode */
-		writel(QUP_REPACK_EN, qup->base + QUP_IO_MODE);
-		writel(tx_len, qup->base + QUP_MX_WRITE_CNT);
-		writel(len, qup->base + QUP_MX_READ_CNT);
+	if (blk->is_rx_blk_mode) {
+		io_mode |= QUP_INPUT_BLK_MODE;
+		writel(0, qup->base + QUP_MX_READ_CNT);
 	} else {
-		/* BLOCK mode (transfer data on chunks) */
-		writel(QUP_INPUT_BLK_MODE | QUP_REPACK_EN,
-		       qup->base + QUP_IO_MODE);
-		writel(tx_len, qup->base + QUP_MX_OUTPUT_CNT);
-		writel(len, qup->base + QUP_MX_INPUT_CNT);
+		writel(0, qup->base + QUP_MX_INPUT_CNT);
 	}
+
+	writel(io_mode, qup->base + QUP_IO_MODE);
 }
 
-static void qup_i2c_issue_read(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+/* Clear required variables before starting of any QUP v2 sub transfer. */
+static void qup_i2c_clear_blk_v2(struct qup_i2c_block *blk)
 {
-	u32 addr, len, val;
-
-	addr = i2c_8bit_addr_from_msg(msg);
-
-	/* 0 is used to specify a length 256 (QUP_READ_LIMIT) */
-	len = (msg->len == QUP_READ_LIMIT) ? 0 : msg->len;
-
-	val = ((QUP_TAG_REC | len) << QUP_MSW_SHIFT) | QUP_TAG_START | addr;
-	writel(val, qup->base + QUP_OUT_FIFO_BASE);
+	blk->send_last_word = false;
+	blk->tx_tags_sent = false;
+	blk->tx_fifo_data = 0;
+	blk->tx_fifo_data_pos = 0;
+	blk->tx_fifo_free = 0;
+
+	blk->rx_tags_fetched = false;
+	blk->rx_bytes_read = false;
+	blk->rx_fifo_data = 0;
+	blk->rx_fifo_data_pos = 0;
+	blk->fifo_available = 0;
 }
 
-
-static int qup_i2c_read_fifo(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+/* Receive data from RX FIFO for read message in QUP v2 i2c transfer. */
+static void qup_i2c_recv_data(struct qup_i2c_dev *qup)
 {
-	u32 val = 0;
-	int idx;
-	int ret = 0;
+	struct qup_i2c_block *blk = &qup->blk;
+	int j;
 
-	for (idx = 0; qup->pos < msg->len; idx++) {
-		if ((idx & 1) == 0) {
-			/* Check that FIFO have data */
-			ret = qup_i2c_wait_ready(qup, QUP_IN_NOT_EMPTY,
-						 SET_BIT, 4 * ONE_BYTE);
-			if (ret)
-				return ret;
+	for (j = blk->rx_fifo_data_pos;
+	     blk->cur_blk_len && blk->fifo_available;
+	     blk->cur_blk_len--, blk->fifo_available--) {
+		if (j == 0)
+			blk->rx_fifo_data = readl(qup->base + QUP_IN_FIFO_BASE);
 
-			/* Reading 2 words at time */
-			val = readl(qup->base + QUP_IN_FIFO_BASE);
+		*(blk->cur_data++) = blk->rx_fifo_data;
+		blk->rx_fifo_data >>= 8;
 
-			msg->buf[qup->pos++] = val & 0xFF;
-		} else {
-			msg->buf[qup->pos++] = val >> QUP_MSW_SHIFT;
-		}
+		if (j == 3)
+			j = 0;
+		else
+			j++;
 	}
 
-	return ret;
+	blk->rx_fifo_data_pos = j;
 }
 
-static int qup_i2c_read_fifo_v2(struct qup_i2c_dev *qup,
-				struct i2c_msg *msg)
+/* Receive tags for read message in QUP v2 i2c transfer. */
+static void qup_i2c_recv_tags(struct qup_i2c_dev *qup)
 {
-	u32 val;
-	int idx, pos = 0, ret = 0, total, msg_offset = 0;
+	struct qup_i2c_block *blk = &qup->blk;
 
-	/*
-	 * If the message length is already read in
-	 * the first byte of the buffer, account for
-	 * that by setting the offset
-	 */
-	if (qup_i2c_check_msg_len(msg) && (msg->len > 1))
-		msg_offset = 1;
-	total = qup_i2c_get_data_len(qup);
-	total -= msg_offset;
-
-	/* 2 extra bytes for read tags */
-	while (pos < (total + 2)) {
-		/* Check that FIFO have data */
-		ret = qup_i2c_wait_ready(qup, QUP_IN_NOT_EMPTY,
-					 SET_BIT, 4 * ONE_BYTE);
-		if (ret) {
-			dev_err(qup->dev, "timeout for fifo not empty");
-			return ret;
-		}
-		val = readl(qup->base + QUP_IN_FIFO_BASE);
+	blk->rx_fifo_data = readl(qup->base + QUP_IN_FIFO_BASE);
+	blk->rx_fifo_data >>= blk->rx_tag_len  * 8;
+	blk->rx_fifo_data_pos = blk->rx_tag_len;
+	blk->fifo_available -= blk->rx_tag_len;
+}
 
-		for (idx = 0; idx < 4; idx++, val >>= 8, pos++) {
-			/* first 2 bytes are tag bytes */
-			if (pos < 2)
-				continue;
+/*
+ * Read the data and tags from RX FIFO. Since in read case, the tags will be
+ * preceded by received data bytes so
+ * 1. Check if rx_tags_fetched is false i.e. the start of QUP block so receive
+ *    all tag bytes and discard that.
+ * 2. Read the data from RX FIFO. When all the data bytes have been read then
+ *    set rx_bytes_read to true.
+ */
+static void qup_i2c_read_rx_fifo_v2(struct qup_i2c_dev *qup)
+{
+	struct qup_i2c_block *blk = &qup->blk;
 
-			if (pos >= (total + 2))
-				goto out;
-			msg->buf[qup->pos + msg_offset] = val & 0xff;
-			qup->pos++;
-		}
+	if (!blk->rx_tags_fetched) {
+		qup_i2c_recv_tags(qup);
+		blk->rx_tags_fetched = true;
 	}
 
-out:
-	qup->blk.data_len -= total;
-
-	return ret;
+	qup_i2c_recv_data(qup);
+	if (!blk->cur_blk_len)
+		blk->rx_bytes_read = true;
 }
 
-static int qup_i2c_read_one_v2(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+/*
+ * Write bytes in TX FIFO for write message in QUP v2 i2c transfer. QUP TX FIFO
+ * write works on word basis (4 bytes). Append new data byte write for TX FIFO
+ * in tx_fifo_data and write to TX FIFO when all the 4 bytes are present.
+ */
+static void
+qup_i2c_write_blk_data(struct qup_i2c_dev *qup, u8 **data, unsigned int *len)
 {
-	int ret = 0;
+	struct qup_i2c_block *blk = &qup->blk;
+	unsigned int j;
+
+	for (j = blk->tx_fifo_data_pos; *len && blk->tx_fifo_free;
+	     (*len)--, blk->tx_fifo_free--) {
+		blk->tx_fifo_data |= *(*data)++ << (j * 8);
+		if (j == 3) {
+			writel(blk->tx_fifo_data,
+			       qup->base + QUP_OUT_FIFO_BASE);
+			blk->tx_fifo_data = 0x0;
+			j = 0;
+		} else {
+			j++;
+		}
+	}
 
-	qup->msg = msg;
-	qup->pos  = 0;
-	enable_irq(qup->irq);
-	qup_i2c_set_blk_data(qup, msg);
-	qup_i2c_set_read_mode_v2(qup, msg->len);
+	blk->tx_fifo_data_pos = j;
+}
 
-	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
-	if (ret)
-		goto err;
+/* Transfer tags for read message in QUP v2 i2c transfer. */
+static void qup_i2c_write_rx_tags_v2(struct qup_i2c_dev *qup)
+{
+	struct qup_i2c_block *blk = &qup->blk;
 
-	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
+	qup_i2c_write_blk_data(qup, &blk->cur_tx_tags, &blk->tx_tag_len);
+	if (blk->tx_fifo_data_pos)
+		writel(blk->tx_fifo_data, qup->base + QUP_OUT_FIFO_BASE);
+}
 
-	do {
-		ret = qup_i2c_issue_xfer_v2(qup, msg);
-		if (ret)
-			goto err;
+/*
+ * Write the data and tags in TX FIFO. Since in write case, both tags and data
+ * need to be written and QUP write tags can have maximum 256 data length, so
+ *
+ * 1. Check if tx_tags_sent is false i.e. the start of QUP block so write the
+ *    tags to TX FIFO and set tx_tags_sent to true.
+ * 2. Check if send_last_word is true. It will be set when last few data bytes
+ *    (less than 4 bytes) are reamining to be written in FIFO because of no FIFO
+ *    space. All this data bytes are available in tx_fifo_data so write this
+ *    in FIFO.
+ * 3. Write the data to TX FIFO and check for cur_blk_len. If it is non zero
+ *    then more data is pending otherwise following 3 cases can be possible
+ *    a. if tx_fifo_data_pos is zero i.e. all the data bytes in this block
+ *       have been written in TX FIFO so nothing else is required.
+ *    b. tx_fifo_free is non zero i.e tx FIFO is free so copy the remaining data
+ *       from tx_fifo_data to tx FIFO. Since, qup_i2c_write_blk_data do write
+ *	 in 4 bytes and FIFO space is in multiple of 4 bytes so tx_fifo_free
+ *       will be always greater than or equal to 4 bytes.
+ *    c. tx_fifo_free is zero. In this case, last few bytes (less than 4
+ *       bytes) are copied to tx_fifo_data but couldn't be sent because of
+ *       FIFO full so make send_last_word true.
+ */
+static void qup_i2c_write_tx_fifo_v2(struct qup_i2c_dev *qup)
+{
+	struct qup_i2c_block *blk = &qup->blk;
 
-		ret = qup_i2c_wait_for_complete(qup, msg);
-		if (ret)
-			goto err;
+	if (!blk->tx_tags_sent) {
+		qup_i2c_write_blk_data(qup, &blk->cur_tx_tags,
+				       &blk->tx_tag_len);
+		blk->tx_tags_sent = true;
+	}
 
-		ret = qup_i2c_read_fifo_v2(qup, msg);
-		if (ret)
-			goto err;
+	if (blk->send_last_word)
+		goto send_last_word;
 
-		qup->blk.pos++;
+	qup_i2c_write_blk_data(qup, &blk->cur_data, &blk->cur_blk_len);
+	if (!blk->cur_blk_len) {
+		if (!blk->tx_fifo_data_pos)
+			return;
 
-		/* Handle SMBus block read length */
-		if (qup_i2c_check_msg_len(msg) && (msg->len == 1)) {
-			if (msg->buf[0] > I2C_SMBUS_BLOCK_MAX) {
-				ret = -EPROTO;
-				goto err;
-			}
-			msg->len += msg->buf[0];
-			qup->pos = 0;
-			qup_i2c_set_blk_data(qup, msg);
-			/* set tag length for block read */
-			qup->blk.tx_tag_len = 2;
-			qup_i2c_set_read_mode_v2(qup, msg->buf[0]);
-		}
-	} while (qup->blk.pos < qup->blk.count);
+		if (blk->tx_fifo_free)
+			goto send_last_word;
 
-err:
-	disable_irq(qup->irq);
-	qup->msg = NULL;
+		blk->send_last_word = true;
+	}
 
-	return ret;
+	return;
+
+send_last_word:
+	writel(blk->tx_fifo_data, qup->base + QUP_OUT_FIFO_BASE);
 }
 
-static int qup_i2c_read_one(struct qup_i2c_dev *qup, struct i2c_msg *msg)
+/*
+ * Main transfer function which read or write i2c data.
+ * The QUP v2 supports reconfiguration during run in which multiple i2c sub
+ * transfers can be scheduled.
+ */
+static int
+qup_i2c_conf_xfer_v2(struct qup_i2c_dev *qup, bool is_rx, bool is_first,
+		     bool change_pause_state)
 {
+	struct qup_i2c_block *blk = &qup->blk;
+	struct i2c_msg *msg = qup->msg;
 	int ret;
 
-	qup->msg = msg;
-	qup->pos  = 0;
+	/*
+	 * Check if its SMBus Block read for which the top level read will be
+	 * done into 2 QUP reads. One with message length 1 while other one is
+	 * with actual length.
+	 */
+	if (qup_i2c_check_msg_len(msg)) {
+		if (qup->is_smbus_read) {
+			/*
+			 * If the message length is already read in
+			 * the first byte of the buffer, account for
+			 * that by setting the offset
+			 */
+			blk->cur_data += 1;
+			is_first = false;
+		} else {
+			change_pause_state = false;
+		}
+	}
 
-	enable_irq(qup->irq);
-	qup_i2c_set_read_mode(qup, msg->len);
+	qup->config_run = is_first ? 0 : QUP_I2C_MX_CONFIG_DURING_RUN;
 
-	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
-	if (ret)
-		goto err;
+	qup_i2c_clear_blk_v2(blk);
+	qup_i2c_conf_count_v2(qup);
 
-	writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
+	/* If it is first sub transfer, then configure i2c bus clocks */
+	if (is_first) {
+		ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
+		if (ret)
+			return ret;
 
-	ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
-	if (ret)
-		goto err;
+		writel(qup->clk_ctl, qup->base + QUP_I2C_CLK_CTL);
 
-	qup_i2c_issue_read(qup, msg);
+		ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
+		if (ret)
+			return ret;
+	}
+
+	reinit_completion(&qup->xfer);
+	enable_irq(qup->irq);
+	/*
+	 * In FIFO mode, tx FIFO can be written directly while in block mode the
+	 * it will be written after getting OUT_BLOCK_WRITE_REQ interrupt
+	 */
+	if (!blk->is_tx_blk_mode) {
+		blk->tx_fifo_free = qup->out_fifo_sz;
+
+		if (is_rx)
+			qup_i2c_write_rx_tags_v2(qup);
+		else
+			qup_i2c_write_tx_fifo_v2(qup);
+	}
 
 	ret = qup_i2c_change_state(qup, QUP_RUN_STATE);
 	if (ret)
 		goto err;
 
-	do {
-		ret = qup_i2c_wait_for_complete(qup, msg);
-		if (ret)
-			goto err;
+	ret = qup_i2c_wait_for_complete(qup, msg);
+	if (ret)
+		goto err;
 
-		ret = qup_i2c_read_fifo(qup, msg);
+	/* Move to pause state for all the transfers, except last one */
+	if (change_pause_state) {
+		ret = qup_i2c_change_state(qup, QUP_PAUSE_STATE);
 		if (ret)
 			goto err;
-	} while (qup->pos < msg->len);
+	}
 
 err:
 	disable_irq(qup->irq);
-	qup->msg = NULL;
-
 	return ret;
 }
 
-static int qup_i2c_xfer(struct i2c_adapter *adap,
-			struct i2c_msg msgs[],
-			int num)
+/*
+ * Transfer one read/write message in i2c transfer. It splits the message into
+ * multiple of blk_xfer_limit data length blocks and schedule each
+ * QUP block individually.
+ */
+static int qup_i2c_xfer_v2_msg(struct qup_i2c_dev *qup, int msg_id, bool is_rx)
 {
-	struct qup_i2c_dev *qup = i2c_get_adapdata(adap);
-	int ret, idx;
+	int ret = 0;
+	unsigned int data_len, i;
+	struct i2c_msg *msg = qup->msg;
+	struct qup_i2c_block *blk = &qup->blk;
+	u8 *msg_buf = msg->buf;
 
-	ret = pm_runtime_get_sync(qup->dev);
-	if (ret < 0)
-		goto out;
+	qup->blk_xfer_limit = is_rx ? RECV_MAX_DATA_LEN : QUP_READ_LIMIT;
+	qup_i2c_set_blk_data(qup, msg);
 
-	qup->bus_err = 0;
-	qup->qup_err = 0;
+	for (i = 0; i < blk->count; i++) {
+		data_len =  qup_i2c_get_data_len(qup);
+		blk->pos = i;
+		blk->cur_tx_tags = blk->tags;
+		blk->cur_blk_len = data_len;
+		blk->tx_tag_len =
+			qup_i2c_set_tags(blk->cur_tx_tags, qup, qup->msg);
 
-	writel(1, qup->base + QUP_SW_RESET);
-	ret = qup_i2c_poll_state(qup, QUP_RESET_STATE);
-	if (ret)
-		goto out;
+		blk->cur_data = msg_buf;
 
-	/* Configure QUP as I2C mini core */
-	writel(I2C_MINI_CORE | I2C_N_VAL, qup->base + QUP_CONFIG);
-
-	for (idx = 0; idx < num; idx++) {
-		if (msgs[idx].len == 0) {
-			ret = -EINVAL;
-			goto out;
+		if (is_rx) {
+			blk->total_tx_len = blk->tx_tag_len;
+			blk->rx_tag_len = 2;
+			blk->total_rx_len = blk->rx_tag_len + data_len;
+		} else {
+			blk->total_tx_len = blk->tx_tag_len + data_len;
+			blk->total_rx_len = 0;
 		}
 
-		if (qup_i2c_poll_state_i2c_master(qup)) {
-			ret = -EIO;
-			goto out;
-		}
+		ret = qup_i2c_conf_xfer_v2(qup, is_rx, !msg_id && !i,
+					   !qup->is_last || i < blk->count - 1);
+		if (ret)
+			return ret;
 
-		if (qup_i2c_check_msg_len(&msgs[idx])) {
-			ret = -EINVAL;
-			goto out;
+		/* Handle SMBus block read length */
+		if (qup_i2c_check_msg_len(msg) && msg->len == 1 &&
+		    !qup->is_smbus_read) {
+			if (msg->buf[0] > I2C_SMBUS_BLOCK_MAX)
+				return -EPROTO;
+
+			msg->len = msg->buf[0];
+			qup->is_smbus_read = true;
+			ret = qup_i2c_xfer_v2_msg(qup, msg_id, true);
+			qup->is_smbus_read = false;
+			if (ret)
+				return ret;
+
+			msg->len += 1;
 		}
 
+		msg_buf += data_len;
+		blk->data_len -= qup->blk_xfer_limit;
+	}
+
+	return ret;
+}
+
+/*
+ * QUP v2 supports 3 modes
+ * Programmed IO using FIFO mode : Less than FIFO size
+ * Programmed IO using Block mode : Greater than FIFO size
+ * DMA using BAM : Appropriate for any transaction size but the address should
+ *		   be DMA applicable
+ *
+ * This function determines the mode which will be used for this transfer. An
+ * i2c transfer contains multiple message. Following are the rules to determine
+ * the mode used.
+ * 1. Determine complete length, maximum tx and rx length for complete transfer.
+ * 2. If complete transfer length is greater than fifo size then use the DMA
+ *    mode.
+ * 3. In FIFO or block mode, tx and rx can operate in different mode so check
+ *    for maximum tx and rx length to determine mode.
+ */
+static int
+qup_i2c_determine_mode_v2(struct qup_i2c_dev *qup,
+			  struct i2c_msg msgs[], int num)
+{
+	int idx;
+	bool no_dma = false;
+	unsigned int max_tx_len = 0, max_rx_len = 0, total_len = 0;
+
+	/* All i2c_msgs should be transferred using either dma or cpu */
+	for (idx = 0; idx < num; idx++) {
+		if (msgs[idx].len == 0)
+			return -EINVAL;
+
 		if (msgs[idx].flags & I2C_M_RD)
-			ret = qup_i2c_read_one(qup, &msgs[idx]);
+			max_rx_len = max_t(unsigned int, max_rx_len,
+					   msgs[idx].len);
 		else
-			ret = qup_i2c_write_one(qup, &msgs[idx]);
+			max_tx_len = max_t(unsigned int, max_tx_len,
+					   msgs[idx].len);
 
-		if (ret)
-			break;
+		if (is_vmalloc_addr(msgs[idx].buf))
+			no_dma = true;
 
-		ret = qup_i2c_change_state(qup, QUP_RESET_STATE);
-		if (ret)
-			break;
+		total_len += msgs[idx].len;
 	}
 
-	if (ret == 0)
-		ret = num;
-out:
-
-	pm_runtime_mark_last_busy(qup->dev);
-	pm_runtime_put_autosuspend(qup->dev);
+	if (!no_dma && qup->is_dma &&
+	    (total_len > qup->out_fifo_sz || total_len > qup->in_fifo_sz)) {
+		qup->use_dma = true;
+	} else {
+		qup->blk.is_tx_blk_mode = max_tx_len > qup->out_fifo_sz -
+			QUP_MAX_TAGS_LEN ? true : false;
+		qup->blk.is_rx_blk_mode = max_rx_len > qup->in_fifo_sz -
+			READ_RX_TAGS_LEN ? true : false;
+	}
 
-	return ret;
+	return 0;
 }
 
 static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
@@ -1295,7 +1545,7 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
 			   int num)
 {
 	struct qup_i2c_dev *qup = i2c_get_adapdata(adap);
-	int ret, len, idx = 0, use_dma = 0;
+	int ret, idx = 0;
 
 	qup->bus_err = 0;
 	qup->qup_err = 0;
@@ -1304,6 +1554,10 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
 	if (ret < 0)
 		goto out;
 
+	ret = qup_i2c_determine_mode_v2(qup, msgs, num);
+	if (ret)
+		goto out;
+
 	writel(1, qup->base + QUP_SW_RESET);
 	ret = qup_i2c_poll_state(qup, QUP_RESET_STATE);
 	if (ret)
@@ -1313,59 +1567,35 @@ static int qup_i2c_xfer_v2(struct i2c_adapter *adap,
 	writel(I2C_MINI_CORE | I2C_N_VAL_V2, qup->base + QUP_CONFIG);
 	writel(QUP_V2_TAGS_EN, qup->base + QUP_I2C_MASTER_GEN);
 
-	if ((qup->is_dma)) {
-		/* All i2c_msgs should be transferred using either dma or cpu */
-		for (idx = 0; idx < num; idx++) {
-			if (msgs[idx].len == 0) {
-				ret = -EINVAL;
-				goto out;
-			}
-
-			len = (msgs[idx].len > qup->out_fifo_sz) ||
-			      (msgs[idx].len > qup->in_fifo_sz);
-
-			if ((!is_vmalloc_addr(msgs[idx].buf)) && len) {
-				use_dma = 1;
-			 } else {
-				use_dma = 0;
-				break;
-			}
-		}
+	if (qup_i2c_poll_state_i2c_master(qup)) {
+		ret = -EIO;
+		goto out;
 	}
 
-	idx = 0;
+	if (qup->use_dma) {
+		reinit_completion(&qup->xfer);
+		ret = qup_i2c_bam_xfer(adap, &msgs[0], num);
+		qup->use_dma = false;
+	} else {
+		qup_i2c_conf_mode_v2(qup);
 
-	do {
-		if (msgs[idx].len == 0) {
-			ret = -EINVAL;
-			goto out;
-		}
+		for (idx = 0; idx < num; idx++) {
+			qup->msg = &msgs[idx];
+			qup->is_last = idx == (num - 1);
 
-		if (qup_i2c_poll_state_i2c_master(qup)) {
-			ret = -EIO;
-			goto out;
+			ret = qup_i2c_xfer_v2_msg(qup, idx,
+					!!(msgs[idx].flags & I2C_M_RD));
+			if (ret)
+				break;
 		}
+		qup->msg = NULL;
+	}
 
-		qup->is_last = (idx == (num - 1));
-		if (idx)
-			qup->config_run = QUP_I2C_MX_CONFIG_DURING_RUN;
-		else
-			qup->config_run = 0;
-
-		reinit_completion(&qup->xfer);
-
-		if (use_dma) {
-			ret = qup_i2c_bam_xfer(adap, &msgs[idx], num);
-		} else {
-			if (msgs[idx].flags & I2C_M_RD)
-				ret = qup_i2c_read_one_v2(qup, &msgs[idx]);
-			else
-				ret = qup_i2c_write_one_v2(qup, &msgs[idx]);
-		}
-	} while ((idx++ < (num - 1)) && !use_dma && !ret);
+	if (!ret)
+		ret = qup_i2c_bus_active(qup, ONE_BYTE);
 
 	if (!ret)
-		ret = qup_i2c_change_state(qup, QUP_RESET_STATE);
+		qup_i2c_change_state(qup, QUP_RESET_STATE);
 
 	if (ret == 0)
 		ret = num;
@@ -1429,6 +1659,7 @@ static int qup_i2c_probe(struct platform_device *pdev)
 	u32 src_clk_freq = DEFAULT_SRC_CLK;
 	u32 clk_freq = DEFAULT_CLK_FREQ;
 	int blocks;
+	bool is_qup_v1;
 
 	qup = devm_kzalloc(&pdev->dev, sizeof(*qup), GFP_KERNEL);
 	if (!qup)
@@ -1447,8 +1678,10 @@ static int qup_i2c_probe(struct platform_device *pdev)
 	if (of_device_is_compatible(pdev->dev.of_node, "qcom,i2c-qup-v1.1.1")) {
 		qup->adap.algo = &qup_i2c_algo;
 		qup->adap.quirks = &qup_i2c_quirks;
+		is_qup_v1 = true;
 	} else {
 		qup->adap.algo = &qup_i2c_algo_v2;
+		is_qup_v1 = false;
 		ret = qup_i2c_req_dma(qup);
 
 		if (ret == -EPROBE_DEFER)
@@ -1456,7 +1689,8 @@ static int qup_i2c_probe(struct platform_device *pdev)
 		else if (ret != 0)
 			goto nodma;
 
-		blocks = (MX_BLOCKS << 1) + 1;
+		qup->max_xfer_sg_len = (MX_BLOCKS << 1);
+		blocks = (MX_DMA_BLOCKS << 1) + 1;
 		qup->btx.sg = devm_kzalloc(&pdev->dev,
 					   sizeof(*qup->btx.sg) * blocks,
 					   GFP_KERNEL);
@@ -1573,14 +1807,31 @@ nodma:
 		ret = -EIO;
 		goto fail;
 	}
-	qup->out_blk_sz = blk_sizes[size] / 2;
+	qup->out_blk_sz = blk_sizes[size];
 
 	size = QUP_INPUT_BLOCK_SIZE(io_mode);
 	if (size >= ARRAY_SIZE(blk_sizes)) {
 		ret = -EIO;
 		goto fail;
 	}
-	qup->in_blk_sz = blk_sizes[size] / 2;
+	qup->in_blk_sz = blk_sizes[size];
+
+	if (is_qup_v1) {
+		/*
+		 * in QUP v1, QUP_CONFIG uses N as 15 i.e 16 bits constitutes a
+		 * single transfer but the block size is in bytes so divide the
+		 * in_blk_sz and out_blk_sz by 2
+		 */
+		qup->in_blk_sz /= 2;
+		qup->out_blk_sz /= 2;
+		qup->write_tx_fifo = qup_i2c_write_tx_fifo_v1;
+		qup->read_rx_fifo = qup_i2c_read_rx_fifo_v1;
+		qup->write_rx_tags = qup_i2c_write_rx_tags_v1;
+	} else {
+		qup->write_tx_fifo = qup_i2c_write_tx_fifo_v2;
+		qup->read_rx_fifo = qup_i2c_read_rx_fifo_v2;
+		qup->write_rx_tags = qup_i2c_write_rx_tags_v2;
+	}
 
 	size = QUP_OUTPUT_FIFO_SIZE(io_mode);
 	qup->out_fifo_sz = qup->out_blk_sz * (2 << size);
@@ -1598,6 +1849,8 @@ nodma:
 	 */
 	one_bit_t = (USEC_PER_SEC / clk_freq) + 1;
 	qup->one_byte_t = one_bit_t * 9;
+	qup->xfer_timeout = TOUT_MIN * HZ +
+		usecs_to_jiffies(MX_DMA_TX_RX_LEN * qup->one_byte_t);
 
 	dev_dbg(qup->dev, "IN:block:%d, fifo:%d, OUT:block:%d, fifo:%d\n",
 		qup->in_blk_sz, qup->in_fifo_sz,
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 4159ebcec2bb..c6915b835396 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -102,8 +102,8 @@
 #define RCAR_IRQ_RECV	(MNR | MAL | MST | MAT | MDR)
 #define RCAR_IRQ_STOP	(MST)
 
-#define RCAR_IRQ_ACK_SEND	(~(MAT | MDE) & 0xFF)
-#define RCAR_IRQ_ACK_RECV	(~(MAT | MDR) & 0xFF)
+#define RCAR_IRQ_ACK_SEND	(~(MAT | MDE) & 0x7F)
+#define RCAR_IRQ_ACK_RECV	(~(MAT | MDR) & 0x7F)
 
 #define ID_LAST_MSG	(1 << 0)
 #define ID_FIRST_MSG	(1 << 1)
diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c
index 7aa7b9cb6203..a01389b85f13 100644
--- a/drivers/i2c/busses/i2c-scmi.c
+++ b/drivers/i2c/busses/i2c-scmi.c
@@ -182,7 +182,8 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 	status = acpi_evaluate_object(smbus_cmi->handle, method, &input,
 				      &buffer);
 	if (ACPI_FAILURE(status)) {
-		ACPI_ERROR((AE_INFO, "Evaluating %s: %i", method, status));
+		acpi_handle_err(smbus_cmi->handle,
+				"Failed to evaluate %s: %i\n", method, status);
 		return -EIO;
 	}
 
@@ -190,19 +191,19 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 	if (pkg && pkg->type == ACPI_TYPE_PACKAGE)
 		obj = pkg->package.elements;
 	else {
-		ACPI_ERROR((AE_INFO, "Invalid argument type"));
+		acpi_handle_err(smbus_cmi->handle, "Invalid argument type\n");
 		result = -EIO;
 		goto out;
 	}
 	if (obj == NULL || obj->type != ACPI_TYPE_INTEGER) {
-		ACPI_ERROR((AE_INFO, "Invalid argument type"));
+		acpi_handle_err(smbus_cmi->handle, "Invalid argument type\n");
 		result = -EIO;
 		goto out;
 	}
 
 	result = obj->integer.value;
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s return status: %i\n",
-			  method, result));
+	acpi_handle_debug(smbus_cmi->handle,  "%s return status: %i\n", method,
+			  result);
 
 	switch (result) {
 	case ACPI_SMBUS_STATUS_OK:
@@ -227,7 +228,7 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 
 	obj = pkg->package.elements + 1;
 	if (obj->type != ACPI_TYPE_INTEGER) {
-		ACPI_ERROR((AE_INFO, "Invalid argument type"));
+		acpi_handle_err(smbus_cmi->handle, "Invalid argument type\n");
 		result = -EIO;
 		goto out;
 	}
@@ -239,7 +240,8 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 	case I2C_SMBUS_BYTE_DATA:
 	case I2C_SMBUS_WORD_DATA:
 		if (obj->type != ACPI_TYPE_INTEGER) {
-			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			acpi_handle_err(smbus_cmi->handle,
+					"Invalid argument type\n");
 			result = -EIO;
 			goto out;
 		}
@@ -250,7 +252,8 @@ acpi_smbus_cmi_access(struct i2c_adapter *adap, u16 addr, unsigned short flags,
 		break;
 	case I2C_SMBUS_BLOCK_DATA:
 		if (obj->type != ACPI_TYPE_BUFFER) {
-			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			acpi_handle_err(smbus_cmi->handle,
+					"Invalid argument type\n");
 			result = -EIO;
 			goto out;
 		}
@@ -300,6 +303,7 @@ static int acpi_smbus_cmi_add_cap(struct acpi_smbus_cmi *smbus_cmi,
 				  const char *name)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_handle *handle = smbus_cmi->handle;
 	union acpi_object *obj;
 	acpi_status status;
 
@@ -308,8 +312,8 @@ static int acpi_smbus_cmi_add_cap(struct acpi_smbus_cmi *smbus_cmi,
 					smbus_cmi->methods->mt_info,
 					NULL, &buffer);
 		if (ACPI_FAILURE(status)) {
-			ACPI_ERROR((AE_INFO, "Evaluating %s: %i",
-				   smbus_cmi->methods->mt_info, status));
+			acpi_handle_err(handle, "Failed to evaluate %s: %i\n",
+					smbus_cmi->methods->mt_info, status);
 			return -EIO;
 		}
 
@@ -317,18 +321,18 @@ static int acpi_smbus_cmi_add_cap(struct acpi_smbus_cmi *smbus_cmi,
 		if (obj && obj->type == ACPI_TYPE_PACKAGE)
 			obj = obj->package.elements;
 		else {
-			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			acpi_handle_err(handle, "Invalid argument type\n");
 			kfree(buffer.pointer);
 			return -EIO;
 		}
 
 		if (obj->type != ACPI_TYPE_INTEGER) {
-			ACPI_ERROR((AE_INFO, "Invalid argument type"));
+			acpi_handle_err(handle, "Invalid argument type\n");
 			kfree(buffer.pointer);
 			return -EIO;
 		} else
-			ACPI_DEBUG_PRINT((ACPI_DB_INFO, "SMBus CMI Version %x"
-					  "\n", (int)obj->integer.value));
+			acpi_handle_debug(handle, "SMBus CMI Version %x\n",
+					  (int)obj->integer.value);
 
 		kfree(buffer.pointer);
 		smbus_cmi->cap_info = 1;
@@ -337,8 +341,7 @@ static int acpi_smbus_cmi_add_cap(struct acpi_smbus_cmi *smbus_cmi,
 	else if (!strcmp(name, smbus_cmi->methods->mt_sbw))
 		smbus_cmi->cap_write = 1;
 	else
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Unsupported CMI method: %s\n",
-				 name));
+		acpi_handle_debug(handle, "Unsupported CMI method: %s\n", name);
 
 	return 0;
 }
diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c
index 47c8d00de53f..ba600d77a3f8 100644
--- a/drivers/i2c/busses/i2c-stm32f4.c
+++ b/drivers/i2c/busses/i2c-stm32f4.c
@@ -349,7 +349,7 @@ static void stm32f4_i2c_read_msg(struct stm32f4_i2c_dev *i2c_dev)
 static void stm32f4_i2c_terminate_xfer(struct stm32f4_i2c_dev *i2c_dev)
 {
 	struct stm32f4_i2c_msg *msg = &i2c_dev->msg;
-	void __iomem *reg = i2c_dev->base + STM32F4_I2C_CR2;
+	void __iomem *reg;
 
 	stm32f4_i2c_disable_irq(i2c_dev);
 
diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
new file mode 100644
index 000000000000..a021f866d8c2
--- /dev/null
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -0,0 +1,667 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 FUJITSU SEMICONDUCTOR LIMITED
+ */
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define WAIT_PCLK(n, rate)	\
+	ndelay(DIV_ROUND_UP(DIV_ROUND_UP(1000000000, rate), n) + 10)
+
+/* I2C register address definitions */
+#define SYNQUACER_I2C_REG_BSR		(0x00 << 2) // Bus Status
+#define SYNQUACER_I2C_REG_BCR		(0x01 << 2) // Bus Control
+#define SYNQUACER_I2C_REG_CCR		(0x02 << 2) // Clock Control
+#define SYNQUACER_I2C_REG_ADR		(0x03 << 2) // Address
+#define SYNQUACER_I2C_REG_DAR		(0x04 << 2) // Data
+#define SYNQUACER_I2C_REG_CSR		(0x05 << 2) // Expansion CS
+#define SYNQUACER_I2C_REG_FSR		(0x06 << 2) // Bus Clock Freq
+#define SYNQUACER_I2C_REG_BC2R		(0x07 << 2) // Bus Control 2
+
+/* I2C register bit definitions */
+#define SYNQUACER_I2C_BSR_FBT		BIT(0)	// First Byte Transfer
+#define SYNQUACER_I2C_BSR_GCA		BIT(1)	// General Call Address
+#define SYNQUACER_I2C_BSR_AAS		BIT(2)	// Address as Slave
+#define SYNQUACER_I2C_BSR_TRX		BIT(3)	// Transfer/Receive
+#define SYNQUACER_I2C_BSR_LRB		BIT(4)	// Last Received Bit
+#define SYNQUACER_I2C_BSR_AL		BIT(5)	// Arbitration Lost
+#define SYNQUACER_I2C_BSR_RSC		BIT(6)	// Repeated Start Cond.
+#define SYNQUACER_I2C_BSR_BB		BIT(7)	// Bus Busy
+
+#define SYNQUACER_I2C_BCR_INT		BIT(0)	// Interrupt
+#define SYNQUACER_I2C_BCR_INTE		BIT(1)	// Interrupt Enable
+#define SYNQUACER_I2C_BCR_GCAA		BIT(2)	// Gen. Call Access Ack.
+#define SYNQUACER_I2C_BCR_ACK		BIT(3)	// Acknowledge
+#define SYNQUACER_I2C_BCR_MSS		BIT(4)	// Master Slave Select
+#define SYNQUACER_I2C_BCR_SCC		BIT(5)	// Start Condition Cont.
+#define SYNQUACER_I2C_BCR_BEIE		BIT(6)	// Bus Error Int Enable
+#define SYNQUACER_I2C_BCR_BER		BIT(7)	// Bus Error
+
+#define SYNQUACER_I2C_CCR_CS_MASK	(0x1f)	// CCR Clock Period Sel.
+#define SYNQUACER_I2C_CCR_EN		BIT(5)	// Enable
+#define SYNQUACER_I2C_CCR_FM		BIT(6)	// Speed Mode Select
+
+#define SYNQUACER_I2C_CSR_CS_MASK	(0x3f)	// CSR Clock Period Sel.
+
+#define SYNQUACER_I2C_BC2R_SCLL		BIT(0)	// SCL Low Drive
+#define SYNQUACER_I2C_BC2R_SDAL		BIT(1)	// SDA Low Drive
+#define SYNQUACER_I2C_BC2R_SCLS		BIT(4)	// SCL Status
+#define SYNQUACER_I2C_BC2R_SDAS		BIT(5)	// SDA Status
+
+/* PCLK frequency */
+#define SYNQUACER_I2C_BUS_CLK_FR(rate)	(((rate) / 20000000) + 1)
+
+/* STANDARD MODE frequency */
+#define SYNQUACER_I2C_CLK_MASTER_STD(rate)			\
+	DIV_ROUND_UP(DIV_ROUND_UP((rate), 100000) - 2, 2)
+/* FAST MODE frequency */
+#define SYNQUACER_I2C_CLK_MASTER_FAST(rate)			\
+	DIV_ROUND_UP((DIV_ROUND_UP((rate), 400000) - 2) * 2, 3)
+
+/* (clkrate <= 18000000) */
+/* calculate the value of CS bits in CCR register on standard mode */
+#define SYNQUACER_I2C_CCR_CS_STD_MAX_18M(rate)			\
+	   ((SYNQUACER_I2C_CLK_MASTER_STD(rate) - 65)		\
+					& SYNQUACER_I2C_CCR_CS_MASK)
+
+/* calculate the value of CS bits in CSR register on standard mode */
+#define SYNQUACER_I2C_CSR_CS_STD_MAX_18M(rate)		0x00
+
+/* calculate the value of CS bits in CCR register on fast mode */
+#define SYNQUACER_I2C_CCR_CS_FAST_MAX_18M(rate)			\
+	   ((SYNQUACER_I2C_CLK_MASTER_FAST(rate) - 1)		\
+					& SYNQUACER_I2C_CCR_CS_MASK)
+
+/* calculate the value of CS bits in CSR register on fast mode */
+#define SYNQUACER_I2C_CSR_CS_FAST_MAX_18M(rate)		0x00
+
+/* (clkrate > 18000000) */
+/* calculate the value of CS bits in CCR register on standard mode */
+#define SYNQUACER_I2C_CCR_CS_STD_MIN_18M(rate)			\
+	   ((SYNQUACER_I2C_CLK_MASTER_STD(rate) - 1)		\
+					& SYNQUACER_I2C_CCR_CS_MASK)
+
+/* calculate the value of CS bits in CSR register on standard mode */
+#define SYNQUACER_I2C_CSR_CS_STD_MIN_18M(rate)			\
+	   (((SYNQUACER_I2C_CLK_MASTER_STD(rate) - 1) >> 5)	\
+					& SYNQUACER_I2C_CSR_CS_MASK)
+
+/* calculate the value of CS bits in CCR register on fast mode */
+#define SYNQUACER_I2C_CCR_CS_FAST_MIN_18M(rate)			\
+	   ((SYNQUACER_I2C_CLK_MASTER_FAST(rate) - 1)		\
+					& SYNQUACER_I2C_CCR_CS_MASK)
+
+/* calculate the value of CS bits in CSR register on fast mode */
+#define SYNQUACER_I2C_CSR_CS_FAST_MIN_18M(rate)			\
+	   (((SYNQUACER_I2C_CLK_MASTER_FAST(rate) - 1) >> 5)	\
+					& SYNQUACER_I2C_CSR_CS_MASK)
+
+/* min I2C clock frequency 14M */
+#define SYNQUACER_I2C_MIN_CLK_RATE	(14 * 1000000)
+/* max I2C clock frequency 200M */
+#define SYNQUACER_I2C_MAX_CLK_RATE	(200 * 1000000)
+/* I2C clock frequency 18M */
+#define SYNQUACER_I2C_CLK_RATE_18M	(18 * 1000000)
+
+#define SYNQUACER_I2C_SPEED_FM		400	// Fast Mode
+#define SYNQUACER_I2C_SPEED_SM		100	// Standard Mode
+
+enum i2c_state {
+	STATE_IDLE,
+	STATE_START,
+	STATE_READ,
+	STATE_WRITE
+};
+
+struct synquacer_i2c {
+	struct completion	completion;
+
+	struct i2c_msg		*msg;
+	u32			msg_num;
+	u32			msg_idx;
+	u32			msg_ptr;
+
+	int			irq;
+	struct device		*dev;
+	void __iomem		*base;
+	struct clk		*pclk;
+	u32			pclkrate;
+	u32			speed_khz;
+	u32			timeout_ms;
+	enum i2c_state		state;
+	struct i2c_adapter	adapter;
+
+	bool			is_suspended;
+};
+
+static inline int is_lastmsg(struct synquacer_i2c *i2c)
+{
+	return i2c->msg_idx >= (i2c->msg_num - 1);
+}
+
+static inline int is_msglast(struct synquacer_i2c *i2c)
+{
+	return i2c->msg_ptr == (i2c->msg->len - 1);
+}
+
+static inline int is_msgend(struct synquacer_i2c *i2c)
+{
+	return i2c->msg_ptr >= i2c->msg->len;
+}
+
+static inline unsigned long calc_timeout_ms(struct synquacer_i2c *i2c,
+					    struct i2c_msg *msgs,
+					    int num)
+{
+	unsigned long bit_count = 0;
+	int i;
+
+	for (i = 0; i < num; i++, msgs++)
+		bit_count += msgs->len;
+
+	return DIV_ROUND_UP((bit_count * 9 + num * 10) * 3, 200) + 10;
+}
+
+static void synquacer_i2c_stop(struct synquacer_i2c *i2c, int ret)
+{
+	/*
+	 * clear IRQ (INT=0, BER=0)
+	 * set Stop Condition (MSS=0)
+	 * Interrupt Disable
+	 */
+	writeb(0, i2c->base + SYNQUACER_I2C_REG_BCR);
+
+	i2c->state = STATE_IDLE;
+
+	i2c->msg_ptr = 0;
+	i2c->msg = NULL;
+	i2c->msg_idx++;
+	i2c->msg_num = 0;
+	if (ret)
+		i2c->msg_idx = ret;
+
+	complete(&i2c->completion);
+}
+
+static void synquacer_i2c_hw_init(struct synquacer_i2c *i2c)
+{
+	unsigned char ccr_cs, csr_cs;
+	u32 rt = i2c->pclkrate;
+
+	/* Set own Address */
+	writeb(0, i2c->base + SYNQUACER_I2C_REG_ADR);
+
+	/* Set PCLK frequency */
+	writeb(SYNQUACER_I2C_BUS_CLK_FR(i2c->pclkrate),
+	       i2c->base + SYNQUACER_I2C_REG_FSR);
+
+	switch (i2c->speed_khz) {
+	case SYNQUACER_I2C_SPEED_FM:
+		if (i2c->pclkrate <= SYNQUACER_I2C_CLK_RATE_18M) {
+			ccr_cs = SYNQUACER_I2C_CCR_CS_FAST_MAX_18M(rt);
+			csr_cs = SYNQUACER_I2C_CSR_CS_FAST_MAX_18M(rt);
+		} else {
+			ccr_cs = SYNQUACER_I2C_CCR_CS_FAST_MIN_18M(rt);
+			csr_cs = SYNQUACER_I2C_CSR_CS_FAST_MIN_18M(rt);
+		}
+
+		/* Set Clock and enable, Set fast mode */
+		writeb(ccr_cs | SYNQUACER_I2C_CCR_FM |
+		       SYNQUACER_I2C_CCR_EN,
+		       i2c->base + SYNQUACER_I2C_REG_CCR);
+		writeb(csr_cs, i2c->base + SYNQUACER_I2C_REG_CSR);
+		break;
+	case SYNQUACER_I2C_SPEED_SM:
+		if (i2c->pclkrate <= SYNQUACER_I2C_CLK_RATE_18M) {
+			ccr_cs = SYNQUACER_I2C_CCR_CS_STD_MAX_18M(rt);
+			csr_cs = SYNQUACER_I2C_CSR_CS_STD_MAX_18M(rt);
+		} else {
+			ccr_cs = SYNQUACER_I2C_CCR_CS_STD_MIN_18M(rt);
+			csr_cs = SYNQUACER_I2C_CSR_CS_STD_MIN_18M(rt);
+		}
+
+		/* Set Clock and enable, Set standard mode */
+		writeb(ccr_cs | SYNQUACER_I2C_CCR_EN,
+		      i2c->base + SYNQUACER_I2C_REG_CCR);
+		writeb(csr_cs, i2c->base + SYNQUACER_I2C_REG_CSR);
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	/* clear IRQ (INT=0, BER=0), Interrupt Disable */
+	writeb(0, i2c->base + SYNQUACER_I2C_REG_BCR);
+	writeb(0, i2c->base + SYNQUACER_I2C_REG_BC2R);
+}
+
+static void synquacer_i2c_hw_reset(struct synquacer_i2c *i2c)
+{
+	/* Disable clock */
+	writeb(0, i2c->base + SYNQUACER_I2C_REG_CCR);
+	writeb(0, i2c->base + SYNQUACER_I2C_REG_CSR);
+
+	WAIT_PCLK(100, i2c->pclkrate);
+}
+
+static int synquacer_i2c_master_start(struct synquacer_i2c *i2c,
+				      struct i2c_msg *pmsg)
+{
+	unsigned char bsr, bcr;
+
+	writeb(i2c_8bit_addr_from_msg(pmsg), i2c->base + SYNQUACER_I2C_REG_DAR);
+
+	dev_dbg(i2c->dev, "slave:0x%02x\n", pmsg->addr);
+
+	/* Generate Start Condition */
+	bsr = readb(i2c->base + SYNQUACER_I2C_REG_BSR);
+	bcr = readb(i2c->base + SYNQUACER_I2C_REG_BCR);
+	dev_dbg(i2c->dev, "bsr:0x%02x, bcr:0x%02x\n", bsr, bcr);
+
+	if ((bsr & SYNQUACER_I2C_BSR_BB) &&
+	    !(bcr & SYNQUACER_I2C_BCR_MSS)) {
+		dev_dbg(i2c->dev, "bus is busy");
+		return -EBUSY;
+	}
+
+	if (bsr & SYNQUACER_I2C_BSR_BB) { /* Bus is busy */
+		dev_dbg(i2c->dev, "Continuous Start");
+		writeb(bcr | SYNQUACER_I2C_BCR_SCC,
+		       i2c->base + SYNQUACER_I2C_REG_BCR);
+	} else {
+		if (bcr & SYNQUACER_I2C_BCR_MSS) {
+			dev_dbg(i2c->dev, "not in master mode");
+			return -EAGAIN;
+		}
+		dev_dbg(i2c->dev, "Start Condition");
+		/* Start Condition + Enable Interrupts */
+		writeb(bcr | SYNQUACER_I2C_BCR_MSS |
+		       SYNQUACER_I2C_BCR_INTE | SYNQUACER_I2C_BCR_BEIE,
+		       i2c->base + SYNQUACER_I2C_REG_BCR);
+	}
+
+	WAIT_PCLK(10, i2c->pclkrate);
+
+	/* get BSR & BCR registers */
+	bsr = readb(i2c->base + SYNQUACER_I2C_REG_BSR);
+	bcr = readb(i2c->base + SYNQUACER_I2C_REG_BCR);
+	dev_dbg(i2c->dev, "bsr:0x%02x, bcr:0x%02x\n", bsr, bcr);
+
+	if ((bsr & SYNQUACER_I2C_BSR_AL) ||
+	    !(bcr & SYNQUACER_I2C_BCR_MSS)) {
+		dev_dbg(i2c->dev, "arbitration lost\n");
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static int synquacer_i2c_doxfer(struct synquacer_i2c *i2c,
+				struct i2c_msg *msgs, int num)
+{
+	unsigned char bsr;
+	unsigned long timeout;
+	int ret;
+
+	if (i2c->is_suspended)
+		return -EBUSY;
+
+	synquacer_i2c_hw_init(i2c);
+	bsr = readb(i2c->base + SYNQUACER_I2C_REG_BSR);
+	if (bsr & SYNQUACER_I2C_BSR_BB) {
+		dev_err(i2c->dev, "cannot get bus (bus busy)\n");
+		return -EBUSY;
+	}
+
+	reinit_completion(&i2c->completion);
+
+	i2c->msg = msgs;
+	i2c->msg_num = num;
+	i2c->msg_ptr = 0;
+	i2c->msg_idx = 0;
+	i2c->state = STATE_START;
+
+	ret = synquacer_i2c_master_start(i2c, i2c->msg);
+	if (ret < 0) {
+		dev_dbg(i2c->dev, "Address failed: (%d)\n", ret);
+		return ret;
+	}
+
+	timeout = wait_for_completion_timeout(&i2c->completion,
+					msecs_to_jiffies(i2c->timeout_ms));
+	if (timeout == 0) {
+		dev_dbg(i2c->dev, "timeout\n");
+		return -EAGAIN;
+	}
+
+	ret = i2c->msg_idx;
+	if (ret != num) {
+		dev_dbg(i2c->dev, "incomplete xfer (%d)\n", ret);
+		return -EAGAIN;
+	}
+
+	/* wait 2 clock periods to ensure the stop has been through the bus */
+	udelay(DIV_ROUND_UP(2 * 1000, i2c->speed_khz));
+
+	return 0;
+}
+
+static irqreturn_t synquacer_i2c_isr(int irq, void *dev_id)
+{
+	struct synquacer_i2c *i2c = dev_id;
+
+	unsigned char byte;
+	unsigned char bsr, bcr;
+	int ret;
+
+	bcr = readb(i2c->base + SYNQUACER_I2C_REG_BCR);
+	bsr = readb(i2c->base + SYNQUACER_I2C_REG_BSR);
+	dev_dbg(i2c->dev, "bsr:0x%02x, bcr:0x%02x\n", bsr, bcr);
+
+	if (bcr & SYNQUACER_I2C_BCR_BER) {
+		dev_err(i2c->dev, "bus error\n");
+		synquacer_i2c_stop(i2c, -EAGAIN);
+		goto out;
+	}
+	if ((bsr & SYNQUACER_I2C_BSR_AL) ||
+	    !(bcr & SYNQUACER_I2C_BCR_MSS)) {
+		dev_dbg(i2c->dev, "arbitration lost\n");
+		synquacer_i2c_stop(i2c, -EAGAIN);
+		goto out;
+	}
+
+	switch (i2c->state) {
+	case STATE_START:
+		if (bsr & SYNQUACER_I2C_BSR_LRB) {
+			dev_dbg(i2c->dev, "ack was not received\n");
+			synquacer_i2c_stop(i2c, -EAGAIN);
+			goto out;
+		}
+
+		if (i2c->msg->flags & I2C_M_RD)
+			i2c->state = STATE_READ;
+		else
+			i2c->state = STATE_WRITE;
+
+		if (is_lastmsg(i2c) && i2c->msg->len == 0) {
+			synquacer_i2c_stop(i2c, 0);
+			goto out;
+		}
+
+		if (i2c->state == STATE_READ)
+			goto prepare_read;
+
+		/* fallthru */
+
+	case STATE_WRITE:
+		if (bsr & SYNQUACER_I2C_BSR_LRB) {
+			dev_dbg(i2c->dev, "WRITE: No Ack\n");
+			synquacer_i2c_stop(i2c, -EAGAIN);
+			goto out;
+		}
+
+		if (!is_msgend(i2c)) {
+			writeb(i2c->msg->buf[i2c->msg_ptr++],
+			       i2c->base + SYNQUACER_I2C_REG_DAR);
+
+			/* clear IRQ, and continue */
+			writeb(SYNQUACER_I2C_BCR_BEIE |
+			       SYNQUACER_I2C_BCR_MSS |
+			       SYNQUACER_I2C_BCR_INTE,
+			       i2c->base + SYNQUACER_I2C_REG_BCR);
+			break;
+		}
+		if (is_lastmsg(i2c)) {
+			synquacer_i2c_stop(i2c, 0);
+			break;
+		}
+		dev_dbg(i2c->dev, "WRITE: Next Message\n");
+
+		i2c->msg_ptr = 0;
+		i2c->msg_idx++;
+		i2c->msg++;
+
+		/* send the new start */
+		ret = synquacer_i2c_master_start(i2c, i2c->msg);
+		if (ret < 0) {
+			dev_dbg(i2c->dev, "restart error (%d)\n", ret);
+			synquacer_i2c_stop(i2c, -EAGAIN);
+			break;
+		}
+		i2c->state = STATE_START;
+		break;
+
+	case STATE_READ:
+		byte = readb(i2c->base + SYNQUACER_I2C_REG_DAR);
+		if (!(bsr & SYNQUACER_I2C_BSR_FBT)) /* data */
+			i2c->msg->buf[i2c->msg_ptr++] = byte;
+		else /* address */
+			dev_dbg(i2c->dev, "address:0x%02x. ignore it.\n", byte);
+
+prepare_read:
+		if (is_msglast(i2c)) {
+			writeb(SYNQUACER_I2C_BCR_MSS |
+			       SYNQUACER_I2C_BCR_BEIE |
+			       SYNQUACER_I2C_BCR_INTE,
+			       i2c->base + SYNQUACER_I2C_REG_BCR);
+			break;
+		}
+		if (!is_msgend(i2c)) {
+			writeb(SYNQUACER_I2C_BCR_MSS |
+			       SYNQUACER_I2C_BCR_BEIE |
+			       SYNQUACER_I2C_BCR_INTE |
+			       SYNQUACER_I2C_BCR_ACK,
+			       i2c->base + SYNQUACER_I2C_REG_BCR);
+			break;
+		}
+		if (is_lastmsg(i2c)) {
+			/* last message, send stop and complete */
+			dev_dbg(i2c->dev, "READ: Send Stop\n");
+			synquacer_i2c_stop(i2c, 0);
+			break;
+		}
+		dev_dbg(i2c->dev, "READ: Next Transfer\n");
+
+		i2c->msg_ptr = 0;
+		i2c->msg_idx++;
+		i2c->msg++;
+
+		ret = synquacer_i2c_master_start(i2c, i2c->msg);
+		if (ret < 0) {
+			dev_dbg(i2c->dev, "restart error (%d)\n", ret);
+			synquacer_i2c_stop(i2c, -EAGAIN);
+		} else {
+			i2c->state = STATE_START;
+		}
+		break;
+	default:
+		dev_err(i2c->dev, "called in err STATE (%d)\n", i2c->state);
+		break;
+	}
+
+out:
+	WAIT_PCLK(10, i2c->pclkrate);
+	return IRQ_HANDLED;
+}
+
+static int synquacer_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+			      int num)
+{
+	struct synquacer_i2c *i2c;
+	int retry;
+	int ret;
+
+	i2c = i2c_get_adapdata(adap);
+	i2c->timeout_ms = calc_timeout_ms(i2c, msgs, num);
+
+	dev_dbg(i2c->dev, "calculated timeout %d ms\n", i2c->timeout_ms);
+
+	for (retry = 0; retry < adap->retries; retry++) {
+		ret = synquacer_i2c_doxfer(i2c, msgs, num);
+		if (ret != -EAGAIN)
+			return ret;
+
+		dev_dbg(i2c->dev, "Retrying transmission (%d)\n", retry);
+
+		synquacer_i2c_hw_reset(i2c);
+	}
+	return -EIO;
+}
+
+static u32 synquacer_i2c_functionality(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm synquacer_i2c_algo = {
+	.master_xfer	= synquacer_i2c_xfer,
+	.functionality	= synquacer_i2c_functionality,
+};
+
+static struct i2c_adapter synquacer_i2c_ops = {
+	.owner		= THIS_MODULE,
+	.name		= "synquacer_i2c-adapter",
+	.algo		= &synquacer_i2c_algo,
+	.retries	= 5,
+};
+
+static int synquacer_i2c_probe(struct platform_device *pdev)
+{
+	struct synquacer_i2c *i2c;
+	struct resource *r;
+	u32 bus_speed;
+	int ret;
+
+	i2c = devm_kzalloc(&pdev->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return -ENOMEM;
+
+	bus_speed = i2c_acpi_find_bus_speed(&pdev->dev);
+	if (!bus_speed)
+		device_property_read_u32(&pdev->dev, "clock-frequency",
+					 &bus_speed);
+
+	device_property_read_u32(&pdev->dev, "socionext,pclk-rate",
+				 &i2c->pclkrate);
+
+	i2c->pclk = devm_clk_get(&pdev->dev, "pclk");
+	if (IS_ERR(i2c->pclk) && PTR_ERR(i2c->pclk) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+	if (!IS_ERR_OR_NULL(i2c->pclk)) {
+		dev_dbg(&pdev->dev, "clock source %p\n", i2c->pclk);
+
+		ret = clk_prepare_enable(i2c->pclk);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to enable clock (%d)\n",
+				ret);
+			return ret;
+		}
+		i2c->pclkrate = clk_get_rate(i2c->pclk);
+	}
+
+	if (i2c->pclkrate < SYNQUACER_I2C_MIN_CLK_RATE ||
+	    i2c->pclkrate > SYNQUACER_I2C_MAX_CLK_RATE) {
+		dev_err(&pdev->dev, "PCLK missing or out of range (%d)\n",
+			i2c->pclkrate);
+		return -EINVAL;
+	}
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	i2c->base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(i2c->base))
+		return PTR_ERR(i2c->base);
+
+	i2c->irq = platform_get_irq(pdev, 0);
+	if (i2c->irq < 0) {
+		dev_err(&pdev->dev, "no IRQ resource found\n");
+		return -ENODEV;
+	}
+
+	ret = devm_request_irq(&pdev->dev, i2c->irq, synquacer_i2c_isr,
+			       0, dev_name(&pdev->dev), i2c);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot claim IRQ %d\n", i2c->irq);
+		return ret;
+	}
+
+	i2c->state = STATE_IDLE;
+	i2c->dev = &pdev->dev;
+	i2c->adapter = synquacer_i2c_ops;
+	i2c_set_adapdata(&i2c->adapter, i2c);
+	i2c->adapter.dev.parent = &pdev->dev;
+	i2c->adapter.nr = pdev->id;
+	init_completion(&i2c->completion);
+
+	if (bus_speed < 400000)
+		i2c->speed_khz = SYNQUACER_I2C_SPEED_SM;
+	else
+		i2c->speed_khz = SYNQUACER_I2C_SPEED_FM;
+
+	synquacer_i2c_hw_init(i2c);
+
+	ret = i2c_add_numbered_adapter(&i2c->adapter);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to add bus to i2c core\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, i2c);
+
+	dev_info(&pdev->dev, "%s: synquacer_i2c adapter\n",
+		 dev_name(&i2c->adapter.dev));
+
+	return 0;
+}
+
+static int synquacer_i2c_remove(struct platform_device *pdev)
+{
+	struct synquacer_i2c *i2c = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(&i2c->adapter);
+	if (!IS_ERR(i2c->pclk))
+		clk_disable_unprepare(i2c->pclk);
+
+	return 0;
+};
+
+static const struct of_device_id synquacer_i2c_dt_ids[] = {
+	{ .compatible = "socionext,synquacer-i2c" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, synquacer_i2c_dt_ids);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id synquacer_i2c_acpi_ids[] = {
+	{ "SCX0003" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(acpi, synquacer_i2c_acpi_ids);
+#endif
+
+static struct platform_driver synquacer_i2c_driver = {
+	.probe	= synquacer_i2c_probe,
+	.remove	= synquacer_i2c_remove,
+	.driver	= {
+		.name = "synquacer_i2c",
+		.of_match_table = of_match_ptr(synquacer_i2c_dt_ids),
+		.acpi_match_table = ACPI_PTR(synquacer_i2c_acpi_ids),
+	},
+};
+module_platform_driver(synquacer_i2c_driver);
+
+MODULE_AUTHOR("Fujitsu Semiconductor Ltd");
+MODULE_DESCRIPTION("Socionext SynQuacer I2C Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index ae6ed254e01d..c80527816ad0 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -851,7 +851,7 @@ static const struct of_device_id xiic_of_match[] = {
 MODULE_DEVICE_TABLE(of, xiic_of_match);
 #endif
 
-static int __maybe_unused cdns_i2c_runtime_suspend(struct device *dev)
+static int __maybe_unused xiic_i2c_runtime_suspend(struct device *dev)
 {
 	struct xiic_i2c *i2c = dev_get_drvdata(dev);
 
@@ -860,7 +860,7 @@ static int __maybe_unused cdns_i2c_runtime_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused cdns_i2c_runtime_resume(struct device *dev)
+static int __maybe_unused xiic_i2c_runtime_resume(struct device *dev)
 {
 	struct xiic_i2c *i2c = dev_get_drvdata(dev);
 	int ret;
@@ -875,8 +875,8 @@ static int __maybe_unused cdns_i2c_runtime_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops xiic_dev_pm_ops = {
-	SET_RUNTIME_PM_OPS(cdns_i2c_runtime_suspend,
-			   cdns_i2c_runtime_resume, NULL)
+	SET_RUNTIME_PM_OPS(xiic_i2c_runtime_suspend,
+			   xiic_i2c_runtime_resume, NULL)
 };
 static struct platform_driver xiic_i2c_driver = {
 	.probe   = xiic_i2c_probe,
diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c
index b970bf8f38e5..eb8913eba0c5 100644
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/delay.h>
 
 #define XLP9XX_I2C_DIV			0x0
 #define XLP9XX_I2C_CTRL			0x1
@@ -36,6 +37,8 @@
 #define XLP9XX_I2C_TIMEOUT		0X10
 #define XLP9XX_I2C_GENCALLADDR		0x11
 
+#define XLP9XX_I2C_STATUS_BUSY		BIT(0)
+
 #define XLP9XX_I2C_CMD_START		BIT(7)
 #define XLP9XX_I2C_CMD_STOP		BIT(6)
 #define XLP9XX_I2C_CMD_READ		BIT(5)
@@ -71,6 +74,7 @@
 #define XLP9XX_I2C_HIGH_FREQ		400000
 #define XLP9XX_I2C_FIFO_SIZE		0x80U
 #define XLP9XX_I2C_TIMEOUT_MS		1000
+#define XLP9XX_I2C_BUSY_TIMEOUT		50
 
 #define XLP9XX_I2C_FIFO_WCNT_MASK	0xff
 #define XLP9XX_I2C_STATUS_ERRMASK	(XLP9XX_I2C_INTEN_ARLOST | \
@@ -125,7 +129,16 @@ static void xlp9xx_i2c_update_rx_fifo_thres(struct xlp9xx_i2c_dev *priv)
 {
 	u32 thres;
 
-	thres = min(priv->msg_buf_remaining, XLP9XX_I2C_FIFO_SIZE);
+	if (priv->len_recv)
+		/* interrupt after the first read to examine
+		 * the length byte before proceeding further
+		 */
+		thres = 1;
+	else if (priv->msg_buf_remaining > XLP9XX_I2C_FIFO_SIZE)
+		thres = XLP9XX_I2C_FIFO_SIZE;
+	else
+		thres = priv->msg_buf_remaining;
+
 	xlp9xx_write_i2c_reg(priv, XLP9XX_I2C_MFIFOCTRL,
 			     thres << XLP9XX_I2C_MFIFOCTRL_HITH_SHIFT);
 }
@@ -144,7 +157,7 @@ static void xlp9xx_i2c_fill_tx_fifo(struct xlp9xx_i2c_dev *priv)
 
 static void xlp9xx_i2c_drain_rx_fifo(struct xlp9xx_i2c_dev *priv)
 {
-	u32 len, i;
+	u32 len, i, val;
 	u8 rlen, *buf = priv->msg_buf;
 
 	len = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_FIFOWCNT) &
@@ -156,19 +169,27 @@ static void xlp9xx_i2c_drain_rx_fifo(struct xlp9xx_i2c_dev *priv)
 		rlen = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_MRXFIFO);
 		*buf++ = rlen;
 		len--;
+
 		if (priv->client_pec)
 			++rlen;
 		/* update remaining bytes and message length */
 		priv->msg_buf_remaining = rlen;
 		priv->msg_len = rlen + 1;
 		priv->len_recv = false;
-	}
 
-	len = min(priv->msg_buf_remaining, len);
-	for (i = 0; i < len; i++, buf++)
-		*buf = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_MRXFIFO);
+		/* Update transfer length to read only actual data */
+		val = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_CTRL);
+		val = (val & ~XLP9XX_I2C_CTRL_MCTLEN_MASK) |
+			((rlen + 1) << XLP9XX_I2C_CTRL_MCTLEN_SHIFT);
+		xlp9xx_write_i2c_reg(priv, XLP9XX_I2C_CTRL, val);
+	} else {
+		len = min(priv->msg_buf_remaining, len);
+		for (i = 0; i < len; i++, buf++)
+			*buf = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_MRXFIFO);
+
+		priv->msg_buf_remaining -= len;
+	}
 
-	priv->msg_buf_remaining -= len;
 	priv->msg_buf = buf;
 
 	if (priv->msg_buf_remaining)
@@ -224,6 +245,26 @@ xfer_done:
 	return IRQ_HANDLED;
 }
 
+static int xlp9xx_i2c_check_bus_status(struct xlp9xx_i2c_dev *priv)
+{
+	u32 status;
+	u32 busy_timeout = XLP9XX_I2C_BUSY_TIMEOUT;
+
+	while (busy_timeout) {
+		status = xlp9xx_read_i2c_reg(priv, XLP9XX_I2C_STATUS);
+		if ((status & XLP9XX_I2C_STATUS_BUSY) == 0)
+			break;
+
+		busy_timeout--;
+		usleep_range(1000, 1100);
+	}
+
+	if (!busy_timeout)
+		return -EIO;
+
+	return 0;
+}
+
 static int xlp9xx_i2c_init(struct xlp9xx_i2c_dev *priv)
 {
 	u32 prescale;
@@ -311,7 +352,9 @@ static int xlp9xx_i2c_xfer_msg(struct xlp9xx_i2c_dev *priv, struct i2c_msg *msg,
 
 	/* set cmd reg */
 	cmd = XLP9XX_I2C_CMD_START;
-	cmd |= (priv->msg_read ? XLP9XX_I2C_CMD_READ : XLP9XX_I2C_CMD_WRITE);
+	if (msg->len)
+		cmd |= (priv->msg_read ?
+			XLP9XX_I2C_CMD_READ : XLP9XX_I2C_CMD_WRITE);
 	if (last_msg)
 		cmd |= XLP9XX_I2C_CMD_STOP;
 
@@ -320,11 +363,12 @@ static int xlp9xx_i2c_xfer_msg(struct xlp9xx_i2c_dev *priv, struct i2c_msg *msg,
 	timeleft = msecs_to_jiffies(XLP9XX_I2C_TIMEOUT_MS);
 	timeleft = wait_for_completion_timeout(&priv->msg_complete, timeleft);
 
-	if (priv->msg_err) {
+	if (priv->msg_err & XLP9XX_I2C_INTEN_BUSERR) {
 		dev_dbg(priv->dev, "transfer error %x!\n", priv->msg_err);
-		if (priv->msg_err & XLP9XX_I2C_INTEN_BUSERR)
-			xlp9xx_i2c_init(priv);
+		xlp9xx_write_i2c_reg(priv, XLP9XX_I2C_CMD, XLP9XX_I2C_CMD_STOP);
 		return -EIO;
+	} else if (priv->msg_err & XLP9XX_I2C_INTEN_NACKADDR) {
+		return -ENXIO;
 	}
 
 	if (timeleft == 0) {
@@ -345,6 +389,14 @@ static int xlp9xx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
 	int i, ret;
 	struct xlp9xx_i2c_dev *priv = i2c_get_adapdata(adap);
 
+	ret = xlp9xx_i2c_check_bus_status(priv);
+	if (ret) {
+		xlp9xx_i2c_init(priv);
+		ret = xlp9xx_i2c_check_bus_status(priv);
+		if (ret)
+			return ret;
+	}
+
 	for (i = 0; i < num; i++) {
 		ret = xlp9xx_i2c_xfer_msg(priv, &msgs[i], i == num - 1);
 		if (ret != 0)
@@ -356,8 +408,8 @@ static int xlp9xx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
 
 static u32 xlp9xx_i2c_functionality(struct i2c_adapter *adapter)
 {
-	return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_I2C |
-		I2C_FUNC_10BIT_ADDR;
+	return I2C_FUNC_SMBUS_EMUL | I2C_FUNC_SMBUS_READ_BLOCK_DATA |
+			I2C_FUNC_I2C | I2C_FUNC_10BIT_ADDR;
 }
 
 static const struct i2c_algorithm xlp9xx_i2c_algo = {
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 5a00bf443d06..1ba40bb2b966 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -58,6 +58,8 @@
 #define I2C_ADDR_7BITS_MAX	0x77
 #define I2C_ADDR_7BITS_COUNT	(I2C_ADDR_7BITS_MAX + 1)
 
+#define I2C_ADDR_DEVICE_ID	0x7c
+
 /*
  * core_lock protects i2c_adapter_idr, and guarantees that device detection,
  * deletion of detected devices, and attach_adapter calls are serialized
@@ -67,18 +69,18 @@ static DEFINE_IDR(i2c_adapter_idr);
 
 static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver);
 
-static struct static_key i2c_trace_msg = STATIC_KEY_INIT_FALSE;
+static DEFINE_STATIC_KEY_FALSE(i2c_trace_msg_key);
 static bool is_registered;
 
 int i2c_transfer_trace_reg(void)
 {
-	static_key_slow_inc(&i2c_trace_msg);
+	static_branch_inc(&i2c_trace_msg_key);
 	return 0;
 }
 
 void i2c_transfer_trace_unreg(void)
 {
-	static_key_slow_dec(&i2c_trace_msg);
+	static_branch_dec(&i2c_trace_msg_key);
 }
 
 const struct i2c_device_id *i2c_match_id(const struct i2c_device_id *id,
@@ -124,6 +126,10 @@ static int i2c_device_uevent(struct device *dev, struct kobj_uevent_env *env)
 	struct i2c_client *client = to_i2c_client(dev);
 	int rc;
 
+	rc = of_device_uevent_modalias(dev, env);
+	if (rc != -ENODEV)
+		return rc;
+
 	rc = acpi_device_uevent_modalias(dev, env);
 	if (rc != -ENODEV)
 		return rc;
@@ -439,6 +445,10 @@ show_modalias(struct device *dev, struct device_attribute *attr, char *buf)
 	struct i2c_client *client = to_i2c_client(dev);
 	int len;
 
+	len = of_device_modalias(dev, buf, PAGE_SIZE);
+	if (len != -ENODEV)
+		return len;
+
 	len = acpi_device_modalias(dev, buf, PAGE_SIZE -1);
 	if (len != -ENODEV)
 		return len;
@@ -507,7 +517,7 @@ static unsigned short i2c_encode_flags_to_addr(struct i2c_client *client)
 
 /* This is a permissive address validity check, I2C address map constraints
  * are purposely not enforced, except for the general call address. */
-int i2c_check_addr_validity(unsigned addr, unsigned short flags)
+static int i2c_check_addr_validity(unsigned int addr, unsigned short flags)
 {
 	if (flags & I2C_CLIENT_TEN) {
 		/* 10-bit address, all values are valid */
@@ -1835,14 +1845,18 @@ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 	unsigned long orig_jiffies;
 	int ret, try;
 
+	if (WARN_ON(!msgs || num < 1))
+		return -EINVAL;
+
 	if (adap->quirks && i2c_check_for_quirks(adap, msgs, num))
 		return -EOPNOTSUPP;
 
-	/* i2c_trace_msg gets enabled when tracepoint i2c_transfer gets
+	/*
+	 * i2c_trace_msg_key gets enabled when tracepoint i2c_transfer gets
 	 * enabled.  This is an efficient way of keeping the for-loop from
 	 * being executed when not needed.
 	 */
-	if (static_key_false(&i2c_trace_msg)) {
+	if (static_branch_unlikely(&i2c_trace_msg_key)) {
 		int i;
 		for (i = 0; i < num; i++)
 			if (msgs[i].flags & I2C_M_RD)
@@ -1861,12 +1875,12 @@ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 			break;
 	}
 
-	if (static_key_false(&i2c_trace_msg)) {
+	if (static_branch_unlikely(&i2c_trace_msg_key)) {
 		int i;
 		for (i = 0; i < ret; i++)
 			if (msgs[i].flags & I2C_M_RD)
 				trace_i2c_reply(adap, &msgs[i], i);
-		trace_i2c_result(adap, i, ret);
+		trace_i2c_result(adap, num, ret);
 	}
 
 	return ret;
@@ -1968,6 +1982,37 @@ int i2c_transfer_buffer_flags(const struct i2c_client *client, char *buf,
 }
 EXPORT_SYMBOL(i2c_transfer_buffer_flags);
 
+/**
+ * i2c_get_device_id - get manufacturer, part id and die revision of a device
+ * @client: The device to query
+ * @id: The queried information
+ *
+ * Returns negative errno on error, zero on success.
+ */
+int i2c_get_device_id(const struct i2c_client *client,
+		      struct i2c_device_identity *id)
+{
+	struct i2c_adapter *adap = client->adapter;
+	union i2c_smbus_data raw_id;
+	int ret;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_SMBUS_READ_I2C_BLOCK))
+		return -EOPNOTSUPP;
+
+	raw_id.block[0] = 3;
+	ret = i2c_smbus_xfer(adap, I2C_ADDR_DEVICE_ID, 0,
+			     I2C_SMBUS_READ, client->addr << 1,
+			     I2C_SMBUS_I2C_BLOCK_DATA, &raw_id);
+	if (ret)
+		return ret;
+
+	id->manufacturer_id = (raw_id.block[1] << 4) | (raw_id.block[2] >> 4);
+	id->part_id = ((raw_id.block[2] & 0xf) << 5) | (raw_id.block[3] >> 3);
+	id->die_revision = raw_id.block[3] & 0x7;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(i2c_get_device_id);
+
 /* ----------------------------------------------------
  * the i2c address scanning function
  * Will not work for 10-bit addresses!
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
index 8d474bb1dc15..c405270a98b4 100644
--- a/drivers/i2c/i2c-core-of.c
+++ b/drivers/i2c/i2c-core-of.c
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Jochen Friedrich <jochen@scram.de>
  * based on a previous patch from Jon Smirl <jonsmirl@gmail.com>
  *
- * Copyright (C) 2013 Wolfram Sang <wsa@the-dreams.de>
+ * Copyright (C) 2013, 2018 Wolfram Sang <wsa@the-dreams.de>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -25,12 +25,11 @@
 static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 						 struct device_node *node)
 {
-	struct i2c_client *result;
+	struct i2c_client *client;
 	struct i2c_board_info info = {};
 	struct dev_archdata dev_ad = {};
-	const __be32 *addr_be;
 	u32 addr;
-	int len;
+	int ret;
 
 	dev_dbg(&adap->dev, "of_i2c: register %pOF\n", node);
 
@@ -40,13 +39,12 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 		return ERR_PTR(-EINVAL);
 	}
 
-	addr_be = of_get_property(node, "reg", &len);
-	if (!addr_be || (len < sizeof(*addr_be))) {
+	ret = of_property_read_u32(node, "reg", &addr);
+	if (ret) {
 		dev_err(&adap->dev, "of_i2c: invalid reg on %pOF\n", node);
-		return ERR_PTR(-EINVAL);
+		return ERR_PTR(ret);
 	}
 
-	addr = be32_to_cpup(addr_be);
 	if (addr & I2C_TEN_BIT_ADDRESS) {
 		addr &= ~I2C_TEN_BIT_ADDRESS;
 		info.flags |= I2C_CLIENT_TEN;
@@ -57,15 +55,9 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 		info.flags |= I2C_CLIENT_SLAVE;
 	}
 
-	if (i2c_check_addr_validity(addr, info.flags)) {
-		dev_err(&adap->dev, "of_i2c: invalid addr=%x on %pOF\n",
-			addr, node);
-		return ERR_PTR(-EINVAL);
-	}
-
 	info.addr = addr;
-	info.of_node = of_node_get(node);
 	info.archdata = &dev_ad;
+	info.of_node = of_node_get(node);
 
 	if (of_property_read_bool(node, "host-notify"))
 		info.flags |= I2C_CLIENT_HOST_NOTIFY;
@@ -73,13 +65,13 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 	if (of_get_property(node, "wakeup-source", NULL))
 		info.flags |= I2C_CLIENT_WAKE;
 
-	result = i2c_new_device(adap, &info);
-	if (result == NULL) {
+	client = i2c_new_device(adap, &info);
+	if (!client) {
 		dev_err(&adap->dev, "of_i2c: Failure registering %pOF\n", node);
 		of_node_put(node);
 		return ERR_PTR(-EINVAL);
 	}
-	return result;
+	return client;
 }
 
 void of_i2c_register_devices(struct i2c_adapter *adap)
@@ -103,7 +95,7 @@ void of_i2c_register_devices(struct i2c_adapter *adap)
 
 		client = of_i2c_register_device(adap, node);
 		if (IS_ERR(client)) {
-			dev_warn(&adap->dev,
+			dev_err(&adap->dev,
 				 "Failed to create I2C device for %pOF\n",
 				 node);
 			of_node_clear_flag(node, OF_POPULATED);
diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 59d5cf376f6a..b5aec33002c3 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -308,17 +308,21 @@ static void i2c_smbus_try_get_dmabuf(struct i2c_msg *msg, u8 init_val)
 		msg->buf[0] = init_val;
 }
 
-/* Simulate a SMBus command using the i2c protocol
-   No checking of parameters is done!  */
+/*
+ * Simulate a SMBus command using the I2C protocol.
+ * No checking of parameters is done!
+ */
 static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr,
 				   unsigned short flags,
 				   char read_write, u8 command, int size,
 				   union i2c_smbus_data *data)
 {
-	/* So we need to generate a series of msgs. In the case of writing, we
-	  need to use only one message; when reading, we need two. We initialize
-	  most things with sane defaults, to keep the code below somewhat
-	  simpler. */
+	/*
+	 * So we need to generate a series of msgs. In the case of writing, we
+	 * need to use only one message; when reading, we need two. We
+	 * initialize most things with sane defaults, to keep the code below
+	 * somewhat simpler.
+	 */
 	unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX+3];
 	unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX+2];
 	int num = read_write == I2C_SMBUS_READ ? 2 : 1;
diff --git a/drivers/i2c/i2c-core.h b/drivers/i2c/i2c-core.h
index 3d3d9bf02101..37576f50fe20 100644
--- a/drivers/i2c/i2c-core.h
+++ b/drivers/i2c/i2c-core.h
@@ -27,7 +27,6 @@ extern struct rw_semaphore	__i2c_board_lock;
 extern struct list_head	__i2c_board_list;
 extern int		__i2c_first_dynamic_bus_num;
 
-int i2c_check_addr_validity(unsigned addr, unsigned short flags);
 int i2c_check_7bit_addr_validity_strict(unsigned short addr);
 
 #ifdef CONFIG_ACPI
diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index fbb84c7ef282..09bafd3e68fa 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -77,6 +77,7 @@ struct chip_desc {
 		pca954x_ismux = 0,
 		pca954x_isswi
 	} muxtype;
+	struct i2c_device_identity id;
 };
 
 struct pca954x {
@@ -97,59 +98,83 @@ static const struct chip_desc chips[] = {
 		.nchans = 2,
 		.enable = 0x4,
 		.muxtype = pca954x_ismux,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9542] = {
 		.nchans = 2,
 		.enable = 0x4,
 		.has_irq = 1,
 		.muxtype = pca954x_ismux,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9543] = {
 		.nchans = 2,
 		.has_irq = 1,
 		.muxtype = pca954x_isswi,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9544] = {
 		.nchans = 4,
 		.enable = 0x4,
 		.has_irq = 1,
 		.muxtype = pca954x_ismux,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9545] = {
 		.nchans = 4,
 		.has_irq = 1,
 		.muxtype = pca954x_isswi,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9546] = {
 		.nchans = 4,
 		.muxtype = pca954x_isswi,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9547] = {
 		.nchans = 8,
 		.enable = 0x8,
 		.muxtype = pca954x_ismux,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9548] = {
 		.nchans = 8,
 		.muxtype = pca954x_isswi,
+		.id = { .manufacturer_id = I2C_DEVICE_ID_NONE },
 	},
 	[pca_9846] = {
 		.nchans = 4,
 		.muxtype = pca954x_isswi,
+		.id = {
+			.manufacturer_id = I2C_DEVICE_ID_NXP_SEMICONDUCTORS,
+			.part_id = 0x10b,
+		},
 	},
 	[pca_9847] = {
 		.nchans = 8,
 		.enable = 0x8,
 		.muxtype = pca954x_ismux,
+		.id = {
+			.manufacturer_id = I2C_DEVICE_ID_NXP_SEMICONDUCTORS,
+			.part_id = 0x108,
+		},
 	},
 	[pca_9848] = {
 		.nchans = 8,
 		.muxtype = pca954x_isswi,
+		.id = {
+			.manufacturer_id = I2C_DEVICE_ID_NXP_SEMICONDUCTORS,
+			.part_id = 0x10a,
+		},
 	},
 	[pca_9849] = {
 		.nchans = 4,
 		.enable = 0x4,
 		.muxtype = pca954x_ismux,
+		.id = {
+			.manufacturer_id = I2C_DEVICE_ID_NXP_SEMICONDUCTORS,
+			.part_id = 0x109,
+		},
 	},
 };
 
@@ -369,6 +394,30 @@ static int pca954x_probe(struct i2c_client *client,
 	if (IS_ERR(gpio))
 		return PTR_ERR(gpio);
 
+	match = of_match_device(of_match_ptr(pca954x_of_match), &client->dev);
+	if (match)
+		data->chip = of_device_get_match_data(&client->dev);
+	else
+		data->chip = &chips[id->driver_data];
+
+	if (data->chip->id.manufacturer_id != I2C_DEVICE_ID_NONE) {
+		struct i2c_device_identity id;
+
+		ret = i2c_get_device_id(client, &id);
+		if (ret && ret != -EOPNOTSUPP)
+			return ret;
+
+		if (!ret &&
+		    (id.manufacturer_id != data->chip->id.manufacturer_id ||
+		     id.part_id != data->chip->id.part_id)) {
+			dev_warn(&client->dev,
+				 "unexpected device id %03x-%03x-%x\n",
+				 id.manufacturer_id, id.part_id,
+				 id.die_revision);
+			return -ENODEV;
+		}
+	}
+
 	/* Write the mux register at addr to verify
 	 * that the mux is in fact present. This also
 	 * initializes the mux to disconnected state.
@@ -378,12 +427,6 @@ static int pca954x_probe(struct i2c_client *client,
 		return -ENODEV;
 	}
 
-	match = of_match_device(of_match_ptr(pca954x_of_match), &client->dev);
-	if (match)
-		data->chip = of_device_get_match_data(&client->dev);
-	else
-		data->chip = &chips[id->driver_data];
-
 	data->last_chan = 0;		   /* force the first selection */
 
 	idle_disconnect_dt = of_node &&
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 7659bc48f1db..5d9699fe1b55 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -23,12 +23,13 @@
 #include <linux/delay.h>
 #include <linux/firmware.h>
 #include <linux/i2c.h>
-#include <linux/platform_data/atmel_mxt_ts.h>
 #include <linux/input/mt.h>
 #include <linux/interrupt.h>
 #include <linux/of.h>
+#include <linux/property.h>
 #include <linux/slab.h>
 #include <linux/gpio/consumer.h>
+#include <linux/property.h>
 #include <asm/unaligned.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
@@ -268,12 +269,16 @@ static const struct v4l2_file_operations mxt_video_fops = {
 	.poll = vb2_fop_poll,
 };
 
+enum mxt_suspend_mode {
+	MXT_SUSPEND_DEEP_SLEEP	= 0,
+	MXT_SUSPEND_T9_CTRL	= 1,
+};
+
 /* Each client has this additional data */
 struct mxt_data {
 	struct i2c_client *client;
 	struct input_dev *input_dev;
 	char phys[64];		/* device physical location */
-	const struct mxt_platform_data *pdata;
 	struct mxt_object *object_table;
 	struct mxt_info info;
 	unsigned int irq;
@@ -324,6 +329,11 @@ struct mxt_data {
 
 	/* for config update handling */
 	struct completion crc_completion;
+
+	u32 *t19_keymap;
+	unsigned int t19_num_keys;
+
+	enum mxt_suspend_mode suspend_mode;
 };
 
 struct mxt_vb2_buffer {
@@ -742,15 +752,14 @@ static int mxt_write_object(struct mxt_data *data,
 static void mxt_input_button(struct mxt_data *data, u8 *message)
 {
 	struct input_dev *input = data->input_dev;
-	const struct mxt_platform_data *pdata = data->pdata;
 	int i;
 
-	for (i = 0; i < pdata->t19_num_keys; i++) {
-		if (pdata->t19_keymap[i] == KEY_RESERVED)
+	for (i = 0; i < data->t19_num_keys; i++) {
+		if (data->t19_keymap[i] == KEY_RESERVED)
 			continue;
 
 		/* Active-low switch */
-		input_report_key(input, pdata->t19_keymap[i],
+		input_report_key(input, data->t19_keymap[i],
 				 !(message[1] & BIT(i)));
 	}
 }
@@ -758,7 +767,7 @@ static void mxt_input_button(struct mxt_data *data, u8 *message)
 static void mxt_input_sync(struct mxt_data *data)
 {
 	input_mt_report_pointer_emulation(data->input_dev,
-					  data->pdata->t19_num_keys);
+					  data->t19_num_keys);
 	input_sync(data->input_dev);
 }
 
@@ -1858,7 +1867,6 @@ static void mxt_input_close(struct input_dev *dev);
 static void mxt_set_up_as_touchpad(struct input_dev *input_dev,
 				   struct mxt_data *data)
 {
-	const struct mxt_platform_data *pdata = data->pdata;
 	int i;
 
 	input_dev->name = "Atmel maXTouch Touchpad";
@@ -1872,15 +1880,14 @@ static void mxt_set_up_as_touchpad(struct input_dev *input_dev,
 	input_abs_set_res(input_dev, ABS_MT_POSITION_Y,
 			  MXT_PIXELS_PER_MM);
 
-	for (i = 0; i < pdata->t19_num_keys; i++)
-		if (pdata->t19_keymap[i] != KEY_RESERVED)
+	for (i = 0; i < data->t19_num_keys; i++)
+		if (data->t19_keymap[i] != KEY_RESERVED)
 			input_set_capability(input_dev, EV_KEY,
-					     pdata->t19_keymap[i]);
+					     data->t19_keymap[i]);
 }
 
 static int mxt_initialize_input_device(struct mxt_data *data)
 {
-	const struct mxt_platform_data *pdata = data->pdata;
 	struct device *dev = &data->client->dev;
 	struct input_dev *input_dev;
 	int error;
@@ -1946,7 +1953,7 @@ static int mxt_initialize_input_device(struct mxt_data *data)
 	}
 
 	/* If device has buttons we assume it is a touchpad */
-	if (pdata->t19_num_keys) {
+	if (data->t19_num_keys) {
 		mxt_set_up_as_touchpad(input_dev, data);
 		mt_flags |= INPUT_MT_POINTER;
 	} else {
@@ -2868,7 +2875,7 @@ static const struct attribute_group mxt_attr_group = {
 
 static void mxt_start(struct mxt_data *data)
 {
-	switch (data->pdata->suspend_mode) {
+	switch (data->suspend_mode) {
 	case MXT_SUSPEND_T9_CTRL:
 		mxt_soft_reset(data);
 
@@ -2886,12 +2893,11 @@ static void mxt_start(struct mxt_data *data)
 		mxt_t6_command(data, MXT_COMMAND_CALIBRATE, 1, false);
 		break;
 	}
-
 }
 
 static void mxt_stop(struct mxt_data *data)
 {
-	switch (data->pdata->suspend_mode) {
+	switch (data->suspend_mode) {
 	case MXT_SUSPEND_T9_CTRL:
 		/* Touch disable */
 		mxt_write_object(data,
@@ -2921,55 +2927,49 @@ static void mxt_input_close(struct input_dev *dev)
 	mxt_stop(data);
 }
 
-#ifdef CONFIG_OF
-static const struct mxt_platform_data *mxt_parse_dt(struct i2c_client *client)
+static int mxt_parse_device_properties(struct mxt_data *data)
 {
-	struct mxt_platform_data *pdata;
-	struct device_node *np = client->dev.of_node;
+	static const char keymap_property[] = "linux,gpio-keymap";
+	struct device *dev = &data->client->dev;
 	u32 *keymap;
-	int proplen, ret;
-
-	if (!np)
-		return ERR_PTR(-ENOENT);
-
-	pdata = devm_kzalloc(&client->dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
+	int n_keys;
+	int error;
 
-	if (of_find_property(np, "linux,gpio-keymap", &proplen)) {
-		pdata->t19_num_keys = proplen / sizeof(u32);
+	if (device_property_present(dev, keymap_property)) {
+		n_keys = device_property_read_u32_array(dev, keymap_property,
+							NULL, 0);
+		if (n_keys <= 0) {
+			error = n_keys < 0 ? n_keys : -EINVAL;
+			dev_err(dev, "invalid/malformed '%s' property: %d\n",
+				keymap_property, error);
+			return error;
+		}
 
-		keymap = devm_kzalloc(&client->dev,
-				pdata->t19_num_keys * sizeof(keymap[0]),
-				GFP_KERNEL);
+		keymap = devm_kmalloc_array(dev, n_keys, sizeof(*keymap),
+					    GFP_KERNEL);
 		if (!keymap)
-			return ERR_PTR(-ENOMEM);
+			return -ENOMEM;
 
-		ret = of_property_read_u32_array(np, "linux,gpio-keymap",
-						 keymap, pdata->t19_num_keys);
-		if (ret)
-			dev_warn(&client->dev,
-				 "Couldn't read linux,gpio-keymap: %d\n", ret);
+		error = device_property_read_u32_array(dev, keymap_property,
+						       keymap, n_keys);
+		if (error) {
+			dev_err(dev, "failed to parse '%s' property: %d\n",
+				keymap_property, error);
+			return error;
+		}
 
-		pdata->t19_keymap = keymap;
+		data->t19_keymap = keymap;
+		data->t19_num_keys = n_keys;
 	}
 
-	pdata->suspend_mode = MXT_SUSPEND_DEEP_SLEEP;
-
-	return pdata;
-}
-#else
-static const struct mxt_platform_data *mxt_parse_dt(struct i2c_client *client)
-{
-	return ERR_PTR(-ENOENT);
+	return 0;
 }
-#endif
 
 #ifdef CONFIG_ACPI
 
 struct mxt_acpi_platform_data {
 	const char *hid;
-	struct mxt_platform_data pdata;
+	const struct property_entry *props;
 };
 
 static unsigned int samus_touchpad_buttons[] = {
@@ -2979,14 +2979,16 @@ static unsigned int samus_touchpad_buttons[] = {
 	BTN_LEFT
 };
 
+static const struct property_entry samus_touchpad_props[] = {
+	PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", samus_touchpad_buttons),
+	{ }
+};
+
 static struct mxt_acpi_platform_data samus_platform_data[] = {
 	{
 		/* Touchpad */
 		.hid	= "ATML0000",
-		.pdata	= {
-			.t19_num_keys	= ARRAY_SIZE(samus_touchpad_buttons),
-			.t19_keymap	= samus_touchpad_buttons,
-		},
+		.props	= samus_touchpad_props,
 	},
 	{
 		/* Touchscreen */
@@ -3004,14 +3006,16 @@ static unsigned int chromebook_tp_buttons[] = {
 	BTN_LEFT
 };
 
+static const struct property_entry chromebook_tp_props[] = {
+	PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", chromebook_tp_buttons),
+	{ }
+};
+
 static struct mxt_acpi_platform_data chromebook_platform_data[] = {
 	{
 		/* Touchpad */
 		.hid	= "ATML0000",
-		.pdata	= {
-			.t19_num_keys	= ARRAY_SIZE(chromebook_tp_buttons),
-			.t19_keymap	= chromebook_tp_buttons,
-		},
+		.props	= chromebook_tp_props,
 	},
 	{
 		/* Touchscreen */
@@ -3041,83 +3045,85 @@ static const struct dmi_system_id mxt_dmi_table[] = {
 	{ }
 };
 
-static const struct mxt_platform_data *mxt_parse_acpi(struct i2c_client *client)
+static int mxt_prepare_acpi_properties(struct i2c_client *client)
 {
 	struct acpi_device *adev;
 	const struct dmi_system_id *system_id;
 	const struct mxt_acpi_platform_data *acpi_pdata;
 
-	/*
-	 * Ignore ACPI devices representing bootloader mode.
-	 *
-	 * This is a bit of a hack: Google Chromebook BIOS creates ACPI
-	 * devices for both application and bootloader modes, but we are
-	 * interested in application mode only (if device is in bootloader
-	 * mode we'll end up switching into application anyway). So far
-	 * application mode addresses were all above 0x40, so we'll use it
-	 * as a threshold.
-	 */
-	if (client->addr < 0x40)
-		return ERR_PTR(-ENXIO);
-
 	adev = ACPI_COMPANION(&client->dev);
 	if (!adev)
-		return ERR_PTR(-ENOENT);
+		return -ENOENT;
 
 	system_id = dmi_first_match(mxt_dmi_table);
 	if (!system_id)
-		return ERR_PTR(-ENOENT);
+		return -ENOENT;
 
 	acpi_pdata = system_id->driver_data;
 	if (!acpi_pdata)
-		return ERR_PTR(-ENOENT);
+		return -ENOENT;
 
 	while (acpi_pdata->hid) {
-		if (!strcmp(acpi_device_hid(adev), acpi_pdata->hid))
-			return &acpi_pdata->pdata;
+		if (!strcmp(acpi_device_hid(adev), acpi_pdata->hid)) {
+			/*
+			 * Remove previously installed properties if we
+			 * are probing this device not for the very first
+			 * time.
+			 */
+			device_remove_properties(&client->dev);
+
+			/*
+			 * Now install the platform-specific properties
+			 * that are missing from ACPI.
+			 */
+			device_add_properties(&client->dev, acpi_pdata->props);
+			break;
+		}
 
 		acpi_pdata++;
 	}
 
-	return ERR_PTR(-ENOENT);
+	return 0;
 }
 #else
-static const struct mxt_platform_data *mxt_parse_acpi(struct i2c_client *client)
+static int mxt_prepare_acpi_properties(struct i2c_client *client)
 {
-	return ERR_PTR(-ENOENT);
+	return -ENOENT;
 }
 #endif
 
-static const struct mxt_platform_data *
-mxt_get_platform_data(struct i2c_client *client)
-{
-	const struct mxt_platform_data *pdata;
-
-	pdata = dev_get_platdata(&client->dev);
-	if (pdata)
-		return pdata;
-
-	pdata = mxt_parse_dt(client);
-	if (!IS_ERR(pdata) || PTR_ERR(pdata) != -ENOENT)
-		return pdata;
-
-	pdata = mxt_parse_acpi(client);
-	if (!IS_ERR(pdata) || PTR_ERR(pdata) != -ENOENT)
-		return pdata;
-
-	dev_err(&client->dev, "No platform data specified\n");
-	return ERR_PTR(-EINVAL);
-}
+static const struct dmi_system_id chromebook_T9_suspend_dmi[] = {
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Link"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Peppy"),
+		},
+	},
+	{ }
+};
 
 static int mxt_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct mxt_data *data;
-	const struct mxt_platform_data *pdata;
 	int error;
 
-	pdata = mxt_get_platform_data(client);
-	if (IS_ERR(pdata))
-		return PTR_ERR(pdata);
+	/*
+	 * Ignore ACPI devices representing bootloader mode.
+	 *
+	 * This is a bit of a hack: Google Chromebook BIOS creates ACPI
+	 * devices for both application and bootloader modes, but we are
+	 * interested in application mode only (if device is in bootloader
+	 * mode we'll end up switching into application anyway). So far
+	 * application mode addresses were all above 0x40, so we'll use it
+	 * as a threshold.
+	 */
+	if (ACPI_COMPANION(&client->dev) && client->addr < 0x40)
+		return -ENXIO;
 
 	data = devm_kzalloc(&client->dev, sizeof(struct mxt_data), GFP_KERNEL);
 	if (!data)
@@ -3127,7 +3133,6 @@ static int mxt_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		 client->adapter->nr, client->addr);
 
 	data->client = client;
-	data->pdata = pdata;
 	data->irq = client->irq;
 	i2c_set_clientdata(client, data);
 
@@ -3135,6 +3140,17 @@ static int mxt_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	init_completion(&data->reset_completion);
 	init_completion(&data->crc_completion);
 
+	data->suspend_mode = dmi_check_system(chromebook_T9_suspend_dmi) ?
+		MXT_SUSPEND_T9_CTRL : MXT_SUSPEND_DEEP_SLEEP;
+
+	error = mxt_prepare_acpi_properties(client);
+	if (error && error != -ENOENT)
+		return error;
+
+	error = mxt_parse_device_properties(data);
+	if (error)
+		return error;
+
 	data->reset_gpio = devm_gpiod_get_optional(&client->dev,
 						   "reset", GPIOD_OUT_LOW);
 	if (IS_ERR(data->reset_gpio)) {
@@ -3144,8 +3160,7 @@ static int mxt_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	}
 
 	error = devm_request_threaded_irq(&client->dev, client->irq,
-					  NULL, mxt_interrupt,
-					  pdata->irqflags | IRQF_ONESHOT,
+					  NULL, mxt_interrupt, IRQF_ONESHOT,
 					  client->name, data);
 	if (error) {
 		dev_err(&client->dev, "Failed to register interrupt\n");
@@ -3265,7 +3280,7 @@ MODULE_DEVICE_TABLE(i2c, mxt_id);
 static struct i2c_driver mxt_driver = {
 	.driver = {
 		.name	= "atmel_mxt_ts",
-		.of_match_table = of_match_ptr(mxt_of_match),
+		.of_match_table = mxt_of_match,
 		.acpi_match_table = ACPI_PTR(mxt_acpi_id),
 		.pm	= &mxt_pm_ops,
 	},
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 83819d0cbf90..2a99f0f14795 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -81,11 +81,12 @@
  */
 #define AMD_IOMMU_PGSIZES	((~0xFFFUL) & ~(2ULL << 38))
 
-static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(pd_bitmap_lock);
+static DEFINE_SPINLOCK(iommu_table_lock);
 
 /* List of all available dev_data structures */
-static LIST_HEAD(dev_data_list);
-static DEFINE_SPINLOCK(dev_data_list_lock);
+static LLIST_HEAD(dev_data_list);
 
 LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
@@ -204,40 +205,33 @@ static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
-	unsigned long flags;
 
 	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
 	if (!dev_data)
 		return NULL;
 
 	dev_data->devid = devid;
-
-	spin_lock_irqsave(&dev_data_list_lock, flags);
-	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
-	spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
 	ratelimit_default_init(&dev_data->rs);
 
+	llist_add(&dev_data->dev_data_list, &dev_data_list);
 	return dev_data;
 }
 
 static struct iommu_dev_data *search_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
-	unsigned long flags;
+	struct llist_node *node;
 
-	spin_lock_irqsave(&dev_data_list_lock, flags);
-	list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
+	if (llist_empty(&dev_data_list))
+		return NULL;
+
+	node = dev_data_list.first;
+	llist_for_each_entry(dev_data, node, dev_data_list) {
 		if (dev_data->devid == devid)
-			goto out_unlock;
+			return dev_data;
 	}
 
-	dev_data = NULL;
-
-out_unlock:
-	spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
-	return dev_data;
+	return NULL;
 }
 
 static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
@@ -311,6 +305,8 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
 
 	if (dev_data == NULL) {
 		dev_data = alloc_dev_data(devid);
+		if (!dev_data)
+			return NULL;
 
 		if (translation_pre_enabled(iommu))
 			dev_data->defer_attach = true;
@@ -548,6 +544,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
 
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
+	struct device *dev = iommu->iommu.dev;
 	int type, devid, domid, flags;
 	volatile u32 *event = __evt;
 	int count = 0;
@@ -574,53 +571,53 @@ retry:
 		amd_iommu_report_page_fault(devid, domid, address, flags);
 		return;
 	} else {
-		printk(KERN_ERR "AMD-Vi: Event logged [");
+		dev_err(dev, "AMD-Vi: Event logged [");
 	}
 
 	switch (type) {
 	case EVENT_TYPE_ILL_DEV:
-		printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
-		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address, flags);
+		dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
+			"address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address, flags);
 		dump_dte_entry(devid);
 		break;
 	case EVENT_TYPE_DEV_TAB_ERR:
-		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
-		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address, flags);
+		dev_err(dev, "DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+			"address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address, flags);
 		break;
 	case EVENT_TYPE_PAGE_TAB_ERR:
-		printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
-		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       domid, address, flags);
+		dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+			"domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			domid, address, flags);
 		break;
 	case EVENT_TYPE_ILL_CMD:
-		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+		dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
 		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
-		printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
-		       "flags=0x%04x]\n", address, flags);
+		dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx "
+			"flags=0x%04x]\n", address, flags);
 		break;
 	case EVENT_TYPE_IOTLB_INV_TO:
-		printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
-		       "address=0x%016llx]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address);
+		dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
+			"address=0x%016llx]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address);
 		break;
 	case EVENT_TYPE_INV_DEV_REQ:
-		printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
-		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address, flags);
+		dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
+			"address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address, flags);
 		break;
 	default:
-		printk(KERN_ERR "UNKNOWN type=0x%02x event[0]=0x%08x "
-		       "event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
-		       type, event[0], event[1], event[2], event[3]);
+		dev_err(dev, KERN_ERR "UNKNOWN event[0]=0x%08x event[1]=0x%08x "
+			"event[2]=0x%08x event[3]=0x%08x\n",
+			event[0], event[1], event[2], event[3]);
 	}
 
 	memset(__evt, 0, 4 * sizeof(u32));
@@ -1057,9 +1054,9 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu,
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&iommu->lock, flags);
+	raw_spin_lock_irqsave(&iommu->lock, flags);
 	ret = __iommu_queue_command_sync(iommu, cmd, sync);
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return ret;
 }
@@ -1085,7 +1082,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
 
-	spin_lock_irqsave(&iommu->lock, flags);
+	raw_spin_lock_irqsave(&iommu->lock, flags);
 
 	iommu->cmd_sem = 0;
 
@@ -1096,7 +1093,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	ret = wait_on_sem(&iommu->cmd_sem);
 
 out_unlock:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return ret;
 }
@@ -1606,29 +1603,26 @@ static void del_domain_from_list(struct protection_domain *domain)
 
 static u16 domain_id_alloc(void)
 {
-	unsigned long flags;
 	int id;
 
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock(&pd_bitmap_lock);
 	id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
 	BUG_ON(id == 0);
 	if (id > 0 && id < MAX_DOMAIN_ID)
 		__set_bit(id, amd_iommu_pd_alloc_bitmap);
 	else
 		id = 0;
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock(&pd_bitmap_lock);
 
 	return id;
 }
 
 static void domain_id_free(int id)
 {
-	unsigned long flags;
-
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock(&pd_bitmap_lock);
 	if (id > 0 && id < MAX_DOMAIN_ID)
 		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock(&pd_bitmap_lock);
 }
 
 #define DEFINE_FREE_PT_FN(LVL, FN)				\
@@ -2104,9 +2098,9 @@ static int attach_device(struct device *dev,
 	}
 
 skip_ats_check:
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev_data, domain);
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
 	/*
 	 * We might boot into a crash-kernel here. The crashed kernel
@@ -2156,9 +2150,9 @@ static void detach_device(struct device *dev)
 	domain   = dev_data->domain;
 
 	/* lock device table */
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__detach_device(dev_data);
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
 	if (!dev_is_pci(dev))
 		return;
@@ -2795,7 +2789,7 @@ static void cleanup_domain(struct protection_domain *domain)
 	struct iommu_dev_data *entry;
 	unsigned long flags;
 
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
 
 	while (!list_empty(&domain->dev_list)) {
 		entry = list_first_entry(&domain->dev_list,
@@ -2803,7 +2797,7 @@ static void cleanup_domain(struct protection_domain *domain)
 		__detach_device(entry);
 	}
 
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
 
 static void protection_domain_free(struct protection_domain *domain)
@@ -3025,15 +3019,12 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 	size_t unmap_size;
 
 	if (domain->mode == PAGE_MODE_NONE)
-		return -EINVAL;
+		return 0;
 
 	mutex_lock(&domain->api_lock);
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
 	mutex_unlock(&domain->api_lock);
 
-	domain_flush_tlb_pde(domain);
-	domain_flush_complete(domain);
-
 	return unmap_size;
 }
 
@@ -3151,6 +3142,19 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
 	return dev_data->defer_attach;
 }
 
+static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	struct protection_domain *dom = to_pdomain(domain);
+
+	domain_flush_tlb_pde(dom);
+	domain_flush_complete(dom);
+}
+
+static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
+				      unsigned long iova, size_t size)
+{
+}
+
 const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
 	.domain_alloc = amd_iommu_domain_alloc,
@@ -3169,6 +3173,9 @@ const struct iommu_ops amd_iommu_ops = {
 	.apply_resv_region = amd_iommu_apply_resv_region,
 	.is_attach_deferred = amd_iommu_is_attach_deferred,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
+	.flush_iotlb_all = amd_iommu_flush_iotlb_all,
+	.iotlb_range_add = amd_iommu_iotlb_range_add,
+	.iotlb_sync = amd_iommu_flush_iotlb_all,
 };
 
 /*****************************************************************************
@@ -3570,14 +3577,62 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 	amd_iommu_dev_table[devid].data[2] = dte;
 }
 
-static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
+static struct irq_remap_table *get_irq_table(u16 devid)
+{
+	struct irq_remap_table *table;
+
+	if (WARN_ONCE(!amd_iommu_rlookup_table[devid],
+		      "%s: no iommu for devid %x\n", __func__, devid))
+		return NULL;
+
+	table = irq_lookup_table[devid];
+	if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid))
+		return NULL;
+
+	return table;
+}
+
+static struct irq_remap_table *__alloc_irq_table(void)
+{
+	struct irq_remap_table *table;
+
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
+	if (!table->table) {
+		kfree(table);
+		return NULL;
+	}
+	raw_spin_lock_init(&table->lock);
+
+	if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+		memset(table->table, 0,
+		       MAX_IRQS_PER_TABLE * sizeof(u32));
+	else
+		memset(table->table, 0,
+		       (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
+	return table;
+}
+
+static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
+				  struct irq_remap_table *table)
+{
+	irq_lookup_table[devid] = table;
+	set_dte_irq_entry(devid, table);
+	iommu_flush_dte(iommu, devid);
+}
+
+static struct irq_remap_table *alloc_irq_table(u16 devid)
 {
 	struct irq_remap_table *table = NULL;
+	struct irq_remap_table *new_table = NULL;
 	struct amd_iommu *iommu;
 	unsigned long flags;
 	u16 alias;
 
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&iommu_table_lock, flags);
 
 	iommu = amd_iommu_rlookup_table[devid];
 	if (!iommu)
@@ -3590,60 +3645,45 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
 	alias = amd_iommu_alias_table[devid];
 	table = irq_lookup_table[alias];
 	if (table) {
-		irq_lookup_table[devid] = table;
-		set_dte_irq_entry(devid, table);
-		iommu_flush_dte(iommu, devid);
-		goto out;
+		set_remap_table_entry(iommu, devid, table);
+		goto out_wait;
 	}
+	spin_unlock_irqrestore(&iommu_table_lock, flags);
 
 	/* Nothing there yet, allocate new irq remapping table */
-	table = kzalloc(sizeof(*table), GFP_ATOMIC);
-	if (!table)
-		goto out_unlock;
-
-	/* Initialize table spin-lock */
-	spin_lock_init(&table->lock);
+	new_table = __alloc_irq_table();
+	if (!new_table)
+		return NULL;
 
-	if (ioapic)
-		/* Keep the first 32 indexes free for IOAPIC interrupts */
-		table->min_index = 32;
+	spin_lock_irqsave(&iommu_table_lock, flags);
 
-	table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
-	if (!table->table) {
-		kfree(table);
-		table = NULL;
+	table = irq_lookup_table[devid];
+	if (table)
 		goto out_unlock;
-	}
 
-	if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
-		memset(table->table, 0,
-		       MAX_IRQS_PER_TABLE * sizeof(u32));
-	else
-		memset(table->table, 0,
-		       (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
-
-	if (ioapic) {
-		int i;
-
-		for (i = 0; i < 32; ++i)
-			iommu->irte_ops->set_allocated(table, i);
+	table = irq_lookup_table[alias];
+	if (table) {
+		set_remap_table_entry(iommu, devid, table);
+		goto out_wait;
 	}
 
-	irq_lookup_table[devid] = table;
-	set_dte_irq_entry(devid, table);
-	iommu_flush_dte(iommu, devid);
-	if (devid != alias) {
-		irq_lookup_table[alias] = table;
-		set_dte_irq_entry(alias, table);
-		iommu_flush_dte(iommu, alias);
-	}
+	table = new_table;
+	new_table = NULL;
 
-out:
+	set_remap_table_entry(iommu, devid, table);
+	if (devid != alias)
+		set_remap_table_entry(iommu, alias, table);
+
+out_wait:
 	iommu_completion_wait(iommu);
 
 out_unlock:
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&iommu_table_lock, flags);
 
+	if (new_table) {
+		kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+		kfree(new_table);
+	}
 	return table;
 }
 
@@ -3657,14 +3697,14 @@ static int alloc_irq_index(u16 devid, int count, bool align)
 	if (!iommu)
 		return -ENODEV;
 
-	table = get_irq_table(devid, false);
+	table = alloc_irq_table(devid);
 	if (!table)
 		return -ENODEV;
 
 	if (align)
 		alignment = roundup_pow_of_two(count);
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 
 	/* Scan table for free entries */
 	for (index = ALIGN(table->min_index, alignment), c = 0;
@@ -3691,7 +3731,7 @@ static int alloc_irq_index(u16 devid, int count, bool align)
 	index = -ENOSPC;
 
 out:
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	return index;
 }
@@ -3708,11 +3748,11 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
 	if (iommu == NULL)
 		return -EINVAL;
 
-	table = get_irq_table(devid, false);
+	table = get_irq_table(devid);
 	if (!table)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 
 	entry = (struct irte_ga *)table->table;
 	entry = &entry[index];
@@ -3723,7 +3763,7 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
 	if (data)
 		data->ref = entry;
 
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
@@ -3741,13 +3781,13 @@ static int modify_irte(u16 devid, int index, union irte *irte)
 	if (iommu == NULL)
 		return -EINVAL;
 
-	table = get_irq_table(devid, false);
+	table = get_irq_table(devid);
 	if (!table)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 	table->table[index] = irte->val;
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
@@ -3765,13 +3805,13 @@ static void free_irte(u16 devid, int index)
 	if (iommu == NULL)
 		return;
 
-	table = get_irq_table(devid, false);
+	table = get_irq_table(devid);
 	if (!table)
 		return;
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 	iommu->irte_ops->clear_allocated(table, index);
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
@@ -3852,10 +3892,8 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
 				 u8 vector, u32 dest_apicid)
 {
 	struct irte_ga *irte = (struct irte_ga *) entry;
-	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
-	if (!dev_data || !dev_data->use_vapic ||
-	    !irte->lo.fields_remap.guest_mode) {
+	if (!irte->lo.fields_remap.guest_mode) {
 		irte->hi.fields.vector = vector;
 		irte->lo.fields_remap.destination = dest_apicid;
 		modify_irte_ga(devid, index, irte, NULL);
@@ -4061,7 +4099,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
 	struct amd_ir_data *data = NULL;
 	struct irq_cfg *cfg;
 	int i, ret, devid;
-	int index = -1;
+	int index;
 
 	if (!info)
 		return -EINVAL;
@@ -4085,10 +4123,26 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
 		return ret;
 
 	if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
-		if (get_irq_table(devid, true))
+		struct irq_remap_table *table;
+		struct amd_iommu *iommu;
+
+		table = alloc_irq_table(devid);
+		if (table) {
+			if (!table->min_index) {
+				/*
+				 * Keep the first 32 indexes free for IOAPIC
+				 * interrupts.
+				 */
+				table->min_index = 32;
+				iommu = amd_iommu_rlookup_table[devid];
+				for (i = 0; i < 32; ++i)
+					iommu->irte_ops->set_allocated(table, i);
+			}
+			WARN_ON(table->min_index != 32);
 			index = info->ioapic_pin;
-		else
-			ret = -ENOMEM;
+		} else {
+			index = -ENOMEM;
+		}
 	} else {
 		bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
 
@@ -4354,7 +4408,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 {
 	unsigned long flags;
 	struct amd_iommu *iommu;
-	struct irq_remap_table *irt;
+	struct irq_remap_table *table;
 	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
 	int devid = ir_data->irq_2_irte.devid;
 	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
@@ -4368,11 +4422,11 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 	if (!iommu)
 		return -ENODEV;
 
-	irt = get_irq_table(devid, false);
-	if (!irt)
+	table = get_irq_table(devid);
+	if (!table)
 		return -ENODEV;
 
-	spin_lock_irqsave(&irt->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 
 	if (ref->lo.fields_vapic.guest_mode) {
 		if (cpu >= 0)
@@ -4381,7 +4435,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 		barrier();
 	}
 
-	spin_unlock_irqrestore(&irt->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4e4a615bf13f..904c575d1677 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1474,7 +1474,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 {
 	int ret;
 
-	spin_lock_init(&iommu->lock);
+	raw_spin_lock_init(&iommu->lock);
 
 	/* Add IOMMU to internal data structures */
 	list_add_tail(&iommu->list, &amd_iommu_list);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6a877ebd058b..1c9b080276c9 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -408,7 +408,7 @@ extern bool amd_iommu_iotlb_sup;
 #define IRQ_TABLE_ALIGNMENT	128
 
 struct irq_remap_table {
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	unsigned min_index;
 	u32 *table;
 };
@@ -490,7 +490,7 @@ struct amd_iommu {
 	int index;
 
 	/* locks the accesses to the hardware */
-	spinlock_t lock;
+	raw_spinlock_t lock;
 
 	/* Pointer to PCI device of this IOMMU */
 	struct pci_dev *dev;
@@ -627,7 +627,7 @@ struct devid_map {
  */
 struct iommu_dev_data {
 	struct list_head list;		  /* For domain->dev_list */
-	struct list_head dev_data_list;	  /* For global dev_data_list */
+	struct llist_node dev_data_list;  /* For global dev_data_list */
 	struct protection_domain *domain; /* Domain the device is bound to */
 	u16 devid;			  /* PCI Device ID */
 	u16 alias;			  /* Alias Device ID */
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 3f2f1fc68b52..1d647104bccc 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -22,6 +22,8 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
 #include <linux/err.h>
@@ -43,18 +45,15 @@
 
 /* MMIO registers */
 #define ARM_SMMU_IDR0			0x0
-#define IDR0_ST_LVL_SHIFT		27
-#define IDR0_ST_LVL_MASK		0x3
-#define IDR0_ST_LVL_2LVL		(1 << IDR0_ST_LVL_SHIFT)
-#define IDR0_STALL_MODEL_SHIFT		24
-#define IDR0_STALL_MODEL_MASK		0x3
-#define IDR0_STALL_MODEL_STALL		(0 << IDR0_STALL_MODEL_SHIFT)
-#define IDR0_STALL_MODEL_FORCE		(2 << IDR0_STALL_MODEL_SHIFT)
-#define IDR0_TTENDIAN_SHIFT		21
-#define IDR0_TTENDIAN_MASK		0x3
-#define IDR0_TTENDIAN_LE		(2 << IDR0_TTENDIAN_SHIFT)
-#define IDR0_TTENDIAN_BE		(3 << IDR0_TTENDIAN_SHIFT)
-#define IDR0_TTENDIAN_MIXED		(0 << IDR0_TTENDIAN_SHIFT)
+#define IDR0_ST_LVL			GENMASK(28, 27)
+#define IDR0_ST_LVL_2LVL		1
+#define IDR0_STALL_MODEL		GENMASK(25, 24)
+#define IDR0_STALL_MODEL_STALL		0
+#define IDR0_STALL_MODEL_FORCE		2
+#define IDR0_TTENDIAN			GENMASK(22, 21)
+#define IDR0_TTENDIAN_MIXED		0
+#define IDR0_TTENDIAN_LE		2
+#define IDR0_TTENDIAN_BE		3
 #define IDR0_CD2L			(1 << 19)
 #define IDR0_VMID16			(1 << 18)
 #define IDR0_PRI			(1 << 16)
@@ -64,10 +63,9 @@
 #define IDR0_ATS			(1 << 10)
 #define IDR0_HYP			(1 << 9)
 #define IDR0_COHACC			(1 << 4)
-#define IDR0_TTF_SHIFT			2
-#define IDR0_TTF_MASK			0x3
-#define IDR0_TTF_AARCH64		(2 << IDR0_TTF_SHIFT)
-#define IDR0_TTF_AARCH32_64		(3 << IDR0_TTF_SHIFT)
+#define IDR0_TTF			GENMASK(3, 2)
+#define IDR0_TTF_AARCH64		2
+#define IDR0_TTF_AARCH32_64		3
 #define IDR0_S1P			(1 << 1)
 #define IDR0_S2P			(1 << 0)
 
@@ -75,31 +73,27 @@
 #define IDR1_TABLES_PRESET		(1 << 30)
 #define IDR1_QUEUES_PRESET		(1 << 29)
 #define IDR1_REL			(1 << 28)
-#define IDR1_CMDQ_SHIFT			21
-#define IDR1_CMDQ_MASK			0x1f
-#define IDR1_EVTQ_SHIFT			16
-#define IDR1_EVTQ_MASK			0x1f
-#define IDR1_PRIQ_SHIFT			11
-#define IDR1_PRIQ_MASK			0x1f
-#define IDR1_SSID_SHIFT			6
-#define IDR1_SSID_MASK			0x1f
-#define IDR1_SID_SHIFT			0
-#define IDR1_SID_MASK			0x3f
+#define IDR1_CMDQS			GENMASK(25, 21)
+#define IDR1_EVTQS			GENMASK(20, 16)
+#define IDR1_PRIQS			GENMASK(15, 11)
+#define IDR1_SSIDSIZE			GENMASK(10, 6)
+#define IDR1_SIDSIZE			GENMASK(5, 0)
 
 #define ARM_SMMU_IDR5			0x14
-#define IDR5_STALL_MAX_SHIFT		16
-#define IDR5_STALL_MAX_MASK		0xffff
+#define IDR5_STALL_MAX			GENMASK(31, 16)
 #define IDR5_GRAN64K			(1 << 6)
 #define IDR5_GRAN16K			(1 << 5)
 #define IDR5_GRAN4K			(1 << 4)
-#define IDR5_OAS_SHIFT			0
-#define IDR5_OAS_MASK			0x7
-#define IDR5_OAS_32_BIT			(0 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_36_BIT			(1 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_40_BIT			(2 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_42_BIT			(3 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_44_BIT			(4 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_48_BIT			(5 << IDR5_OAS_SHIFT)
+#define IDR5_OAS			GENMASK(2, 0)
+#define IDR5_OAS_32_BIT			0
+#define IDR5_OAS_36_BIT			1
+#define IDR5_OAS_40_BIT			2
+#define IDR5_OAS_42_BIT			3
+#define IDR5_OAS_44_BIT			4
+#define IDR5_OAS_48_BIT			5
+#define IDR5_OAS_52_BIT			6
+#define IDR5_VAX			GENMASK(11, 10)
+#define IDR5_VAX_52_BIT			1
 
 #define ARM_SMMU_CR0			0x20
 #define CR0_CMDQEN			(1 << 3)
@@ -110,18 +104,16 @@
 #define ARM_SMMU_CR0ACK			0x24
 
 #define ARM_SMMU_CR1			0x28
-#define CR1_SH_NSH			0
-#define CR1_SH_OSH			2
-#define CR1_SH_ISH			3
+#define CR1_TABLE_SH			GENMASK(11, 10)
+#define CR1_TABLE_OC			GENMASK(9, 8)
+#define CR1_TABLE_IC			GENMASK(7, 6)
+#define CR1_QUEUE_SH			GENMASK(5, 4)
+#define CR1_QUEUE_OC			GENMASK(3, 2)
+#define CR1_QUEUE_IC			GENMASK(1, 0)
+/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
 #define CR1_CACHE_NC			0
 #define CR1_CACHE_WB			1
 #define CR1_CACHE_WT			2
-#define CR1_TABLE_SH_SHIFT		10
-#define CR1_TABLE_OC_SHIFT		8
-#define CR1_TABLE_IC_SHIFT		6
-#define CR1_QUEUE_SH_SHIFT		4
-#define CR1_QUEUE_OC_SHIFT		2
-#define CR1_QUEUE_IC_SHIFT		0
 
 #define ARM_SMMU_CR2			0x2c
 #define CR2_PTM				(1 << 2)
@@ -129,8 +121,8 @@
 #define CR2_E2H				(1 << 0)
 
 #define ARM_SMMU_GBPA			0x44
-#define GBPA_ABORT			(1 << 20)
 #define GBPA_UPDATE			(1 << 31)
+#define GBPA_ABORT			(1 << 20)
 
 #define ARM_SMMU_IRQ_CTRL		0x50
 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
@@ -158,18 +150,14 @@
 
 #define ARM_SMMU_STRTAB_BASE		0x80
 #define STRTAB_BASE_RA			(1UL << 62)
-#define STRTAB_BASE_ADDR_SHIFT		6
-#define STRTAB_BASE_ADDR_MASK		0x3ffffffffffUL
+#define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
 
 #define ARM_SMMU_STRTAB_BASE_CFG	0x88
-#define STRTAB_BASE_CFG_LOG2SIZE_SHIFT	0
-#define STRTAB_BASE_CFG_LOG2SIZE_MASK	0x3f
-#define STRTAB_BASE_CFG_SPLIT_SHIFT	6
-#define STRTAB_BASE_CFG_SPLIT_MASK	0x1f
-#define STRTAB_BASE_CFG_FMT_SHIFT	16
-#define STRTAB_BASE_CFG_FMT_MASK	0x3
-#define STRTAB_BASE_CFG_FMT_LINEAR	(0 << STRTAB_BASE_CFG_FMT_SHIFT)
-#define STRTAB_BASE_CFG_FMT_2LVL	(1 << STRTAB_BASE_CFG_FMT_SHIFT)
+#define STRTAB_BASE_CFG_FMT		GENMASK(17, 16)
+#define STRTAB_BASE_CFG_FMT_LINEAR	0
+#define STRTAB_BASE_CFG_FMT_2LVL	1
+#define STRTAB_BASE_CFG_SPLIT		GENMASK(10, 6)
+#define STRTAB_BASE_CFG_LOG2SIZE	GENMASK(5, 0)
 
 #define ARM_SMMU_CMDQ_BASE		0x90
 #define ARM_SMMU_CMDQ_PROD		0x98
@@ -190,14 +178,16 @@
 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
 
 /* Common MSI config fields */
-#define MSI_CFG0_ADDR_SHIFT		2
-#define MSI_CFG0_ADDR_MASK		0x3fffffffffffUL
-#define MSI_CFG2_SH_SHIFT		4
-#define MSI_CFG2_SH_NSH			(0UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_SH_OSH			(2UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_SH_ISH			(3UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_MEMATTR_SHIFT		0
-#define MSI_CFG2_MEMATTR_DEVICE_nGnRE	(0x1 << MSI_CFG2_MEMATTR_SHIFT)
+#define MSI_CFG0_ADDR_MASK		GENMASK_ULL(51, 2)
+#define MSI_CFG2_SH			GENMASK(5, 4)
+#define MSI_CFG2_MEMATTR		GENMASK(3, 0)
+
+/* Common memory attribute values */
+#define ARM_SMMU_SH_NSH			0
+#define ARM_SMMU_SH_OSH			2
+#define ARM_SMMU_SH_ISH			3
+#define ARM_SMMU_MEMATTR_DEVICE_nGnRE	0x1
+#define ARM_SMMU_MEMATTR_OIWB		0xf
 
 #define Q_IDX(q, p)			((p) & ((1 << (q)->max_n_shift) - 1))
 #define Q_WRP(q, p)			((p) & (1 << (q)->max_n_shift))
@@ -207,10 +197,8 @@
 					 Q_IDX(q, p) * (q)->ent_dwords)
 
 #define Q_BASE_RWA			(1UL << 62)
-#define Q_BASE_ADDR_SHIFT		5
-#define Q_BASE_ADDR_MASK		0xfffffffffffUL
-#define Q_BASE_LOG2SIZE_SHIFT		0
-#define Q_BASE_LOG2SIZE_MASK		0x1fUL
+#define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
+#define Q_BASE_LOG2SIZE			GENMASK(4, 0)
 
 /*
  * Stream table.
@@ -223,187 +211,143 @@
 #define STRTAB_SPLIT			8
 
 #define STRTAB_L1_DESC_DWORDS		1
-#define STRTAB_L1_DESC_SPAN_SHIFT	0
-#define STRTAB_L1_DESC_SPAN_MASK	0x1fUL
-#define STRTAB_L1_DESC_L2PTR_SHIFT	6
-#define STRTAB_L1_DESC_L2PTR_MASK	0x3ffffffffffUL
+#define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
+#define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
 
 #define STRTAB_STE_DWORDS		8
 #define STRTAB_STE_0_V			(1UL << 0)
-#define STRTAB_STE_0_CFG_SHIFT		1
-#define STRTAB_STE_0_CFG_MASK		0x7UL
-#define STRTAB_STE_0_CFG_ABORT		(0UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_BYPASS		(4UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_S1_TRANS	(5UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_S2_TRANS	(6UL << STRTAB_STE_0_CFG_SHIFT)
-
-#define STRTAB_STE_0_S1FMT_SHIFT	4
-#define STRTAB_STE_0_S1FMT_LINEAR	(0UL << STRTAB_STE_0_S1FMT_SHIFT)
-#define STRTAB_STE_0_S1CTXPTR_SHIFT	6
-#define STRTAB_STE_0_S1CTXPTR_MASK	0x3ffffffffffUL
-#define STRTAB_STE_0_S1CDMAX_SHIFT	59
-#define STRTAB_STE_0_S1CDMAX_MASK	0x1fUL
+#define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
+#define STRTAB_STE_0_CFG_ABORT		0
+#define STRTAB_STE_0_CFG_BYPASS		4
+#define STRTAB_STE_0_CFG_S1_TRANS	5
+#define STRTAB_STE_0_CFG_S2_TRANS	6
+
+#define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
+#define STRTAB_STE_0_S1FMT_LINEAR	0
+#define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
+#define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
 
 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
 #define STRTAB_STE_1_S1C_CACHE_WB	3UL
-#define STRTAB_STE_1_S1C_SH_NSH		0UL
-#define STRTAB_STE_1_S1C_SH_OSH		2UL
-#define STRTAB_STE_1_S1C_SH_ISH		3UL
-#define STRTAB_STE_1_S1CIR_SHIFT	2
-#define STRTAB_STE_1_S1COR_SHIFT	4
-#define STRTAB_STE_1_S1CSH_SHIFT	6
+#define STRTAB_STE_1_S1CIR		GENMASK_ULL(3, 2)
+#define STRTAB_STE_1_S1COR		GENMASK_ULL(5, 4)
+#define STRTAB_STE_1_S1CSH		GENMASK_ULL(7, 6)
 
 #define STRTAB_STE_1_S1STALLD		(1UL << 27)
 
+#define STRTAB_STE_1_EATS		GENMASK_ULL(29, 28)
 #define STRTAB_STE_1_EATS_ABT		0UL
 #define STRTAB_STE_1_EATS_TRANS		1UL
 #define STRTAB_STE_1_EATS_S1CHK		2UL
-#define STRTAB_STE_1_EATS_SHIFT		28
 
+#define STRTAB_STE_1_STRW		GENMASK_ULL(31, 30)
 #define STRTAB_STE_1_STRW_NSEL1		0UL
 #define STRTAB_STE_1_STRW_EL2		2UL
-#define STRTAB_STE_1_STRW_SHIFT		30
 
+#define STRTAB_STE_1_SHCFG		GENMASK_ULL(45, 44)
 #define STRTAB_STE_1_SHCFG_INCOMING	1UL
-#define STRTAB_STE_1_SHCFG_SHIFT	44
 
-#define STRTAB_STE_2_S2VMID_SHIFT	0
-#define STRTAB_STE_2_S2VMID_MASK	0xffffUL
-#define STRTAB_STE_2_VTCR_SHIFT		32
-#define STRTAB_STE_2_VTCR_MASK		0x7ffffUL
+#define STRTAB_STE_2_S2VMID		GENMASK_ULL(15, 0)
+#define STRTAB_STE_2_VTCR		GENMASK_ULL(50, 32)
 #define STRTAB_STE_2_S2AA64		(1UL << 51)
 #define STRTAB_STE_2_S2ENDI		(1UL << 52)
 #define STRTAB_STE_2_S2PTW		(1UL << 54)
 #define STRTAB_STE_2_S2R		(1UL << 58)
 
-#define STRTAB_STE_3_S2TTB_SHIFT	4
-#define STRTAB_STE_3_S2TTB_MASK		0xfffffffffffUL
+#define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
 
 /* Context descriptor (stage-1 only) */
 #define CTXDESC_CD_DWORDS		8
-#define CTXDESC_CD_0_TCR_T0SZ_SHIFT	0
-#define ARM64_TCR_T0SZ_SHIFT		0
-#define ARM64_TCR_T0SZ_MASK		0x1fUL
-#define CTXDESC_CD_0_TCR_TG0_SHIFT	6
-#define ARM64_TCR_TG0_SHIFT		14
-#define ARM64_TCR_TG0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_IRGN0_SHIFT	8
-#define ARM64_TCR_IRGN0_SHIFT		8
-#define ARM64_TCR_IRGN0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_ORGN0_SHIFT	10
-#define ARM64_TCR_ORGN0_SHIFT		10
-#define ARM64_TCR_ORGN0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_SH0_SHIFT	12
-#define ARM64_TCR_SH0_SHIFT		12
-#define ARM64_TCR_SH0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_EPD0_SHIFT	14
-#define ARM64_TCR_EPD0_SHIFT		7
-#define ARM64_TCR_EPD0_MASK		0x1UL
-#define CTXDESC_CD_0_TCR_EPD1_SHIFT	30
-#define ARM64_TCR_EPD1_SHIFT		23
-#define ARM64_TCR_EPD1_MASK		0x1UL
+#define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
+#define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)
+#define CTXDESC_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
+#define ARM64_TCR_TG0			GENMASK_ULL(15, 14)
+#define CTXDESC_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
+#define ARM64_TCR_IRGN0			GENMASK_ULL(9, 8)
+#define CTXDESC_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
+#define ARM64_TCR_ORGN0			GENMASK_ULL(11, 10)
+#define CTXDESC_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
+#define ARM64_TCR_SH0			GENMASK_ULL(13, 12)
+#define CTXDESC_CD_0_TCR_EPD0		(1ULL << 14)
+#define ARM64_TCR_EPD0			(1ULL << 7)
+#define CTXDESC_CD_0_TCR_EPD1		(1ULL << 30)
+#define ARM64_TCR_EPD1			(1ULL << 23)
 
 #define CTXDESC_CD_0_ENDI		(1UL << 15)
 #define CTXDESC_CD_0_V			(1UL << 31)
 
-#define CTXDESC_CD_0_TCR_IPS_SHIFT	32
-#define ARM64_TCR_IPS_SHIFT		32
-#define ARM64_TCR_IPS_MASK		0x7UL
-#define CTXDESC_CD_0_TCR_TBI0_SHIFT	38
-#define ARM64_TCR_TBI0_SHIFT		37
-#define ARM64_TCR_TBI0_MASK		0x1UL
+#define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
+#define ARM64_TCR_IPS			GENMASK_ULL(34, 32)
+#define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
+#define ARM64_TCR_TBI0			(1ULL << 37)
 
 #define CTXDESC_CD_0_AA64		(1UL << 41)
 #define CTXDESC_CD_0_S			(1UL << 44)
 #define CTXDESC_CD_0_R			(1UL << 45)
 #define CTXDESC_CD_0_A			(1UL << 46)
-#define CTXDESC_CD_0_ASET_SHIFT		47
-#define CTXDESC_CD_0_ASET_SHARED	(0UL << CTXDESC_CD_0_ASET_SHIFT)
-#define CTXDESC_CD_0_ASET_PRIVATE	(1UL << CTXDESC_CD_0_ASET_SHIFT)
-#define CTXDESC_CD_0_ASID_SHIFT		48
-#define CTXDESC_CD_0_ASID_MASK		0xffffUL
-
-#define CTXDESC_CD_1_TTB0_SHIFT		4
-#define CTXDESC_CD_1_TTB0_MASK		0xfffffffffffUL
+#define CTXDESC_CD_0_ASET		(1UL << 47)
+#define CTXDESC_CD_0_ASID		GENMASK_ULL(63, 48)
 
-#define CTXDESC_CD_3_MAIR_SHIFT		0
+#define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
 
 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
-#define ARM_SMMU_TCR2CD(tcr, fld)					\
-	(((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)	\
-	 << CTXDESC_CD_0_TCR_##fld##_SHIFT)
+#define ARM_SMMU_TCR2CD(tcr, fld)	FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
+					FIELD_GET(ARM64_TCR_##fld, tcr))
 
 /* Command queue */
 #define CMDQ_ENT_DWORDS			2
 #define CMDQ_MAX_SZ_SHIFT		8
 
-#define CMDQ_ERR_SHIFT			24
-#define CMDQ_ERR_MASK			0x7f
+#define CMDQ_CONS_ERR			GENMASK(30, 24)
 #define CMDQ_ERR_CERROR_NONE_IDX	0
 #define CMDQ_ERR_CERROR_ILL_IDX		1
 #define CMDQ_ERR_CERROR_ABT_IDX		2
 
-#define CMDQ_0_OP_SHIFT			0
-#define CMDQ_0_OP_MASK			0xffUL
+#define CMDQ_0_OP			GENMASK_ULL(7, 0)
 #define CMDQ_0_SSV			(1UL << 11)
 
-#define CMDQ_PREFETCH_0_SID_SHIFT	32
-#define CMDQ_PREFETCH_1_SIZE_SHIFT	0
-#define CMDQ_PREFETCH_1_ADDR_MASK	~0xfffUL
+#define CMDQ_PREFETCH_0_SID		GENMASK_ULL(63, 32)
+#define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
+#define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
 
-#define CMDQ_CFGI_0_SID_SHIFT		32
-#define CMDQ_CFGI_0_SID_MASK		0xffffffffUL
+#define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
-#define CMDQ_CFGI_1_RANGE_SHIFT		0
-#define CMDQ_CFGI_1_RANGE_MASK		0x1fUL
+#define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
 
-#define CMDQ_TLBI_0_VMID_SHIFT		32
-#define CMDQ_TLBI_0_ASID_SHIFT		48
+#define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
+#define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
 #define CMDQ_TLBI_1_LEAF		(1UL << 0)
-#define CMDQ_TLBI_1_VA_MASK		~0xfffUL
-#define CMDQ_TLBI_1_IPA_MASK		0xfffffffff000UL
-
-#define CMDQ_PRI_0_SSID_SHIFT		12
-#define CMDQ_PRI_0_SSID_MASK		0xfffffUL
-#define CMDQ_PRI_0_SID_SHIFT		32
-#define CMDQ_PRI_0_SID_MASK		0xffffffffUL
-#define CMDQ_PRI_1_GRPID_SHIFT		0
-#define CMDQ_PRI_1_GRPID_MASK		0x1ffUL
-#define CMDQ_PRI_1_RESP_SHIFT		12
-#define CMDQ_PRI_1_RESP_DENY		(0UL << CMDQ_PRI_1_RESP_SHIFT)
-#define CMDQ_PRI_1_RESP_FAIL		(1UL << CMDQ_PRI_1_RESP_SHIFT)
-#define CMDQ_PRI_1_RESP_SUCC		(2UL << CMDQ_PRI_1_RESP_SHIFT)
-
-#define CMDQ_SYNC_0_CS_SHIFT		12
-#define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_CS_IRQ		(1UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_MSH_SHIFT		22
-#define CMDQ_SYNC_0_MSH_ISH		(3UL << CMDQ_SYNC_0_MSH_SHIFT)
-#define CMDQ_SYNC_0_MSIATTR_SHIFT	24
-#define CMDQ_SYNC_0_MSIATTR_OIWB	(0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
-#define CMDQ_SYNC_0_MSIDATA_SHIFT	32
-#define CMDQ_SYNC_0_MSIDATA_MASK	0xffffffffUL
-#define CMDQ_SYNC_1_MSIADDR_SHIFT	0
-#define CMDQ_SYNC_1_MSIADDR_MASK	0xffffffffffffcUL
+#define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
+#define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
+
+#define CMDQ_PRI_0_SSID			GENMASK_ULL(31, 12)
+#define CMDQ_PRI_0_SID			GENMASK_ULL(63, 32)
+#define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
+#define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
+
+#define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
+#define CMDQ_SYNC_0_CS_NONE		0
+#define CMDQ_SYNC_0_CS_IRQ		1
+#define CMDQ_SYNC_0_CS_SEV		2
+#define CMDQ_SYNC_0_MSH			GENMASK_ULL(23, 22)
+#define CMDQ_SYNC_0_MSIATTR		GENMASK_ULL(27, 24)
+#define CMDQ_SYNC_0_MSIDATA		GENMASK_ULL(63, 32)
+#define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)
 
 /* Event queue */
 #define EVTQ_ENT_DWORDS			4
 #define EVTQ_MAX_SZ_SHIFT		7
 
-#define EVTQ_0_ID_SHIFT			0
-#define EVTQ_0_ID_MASK			0xffUL
+#define EVTQ_0_ID			GENMASK_ULL(7, 0)
 
 /* PRI queue */
 #define PRIQ_ENT_DWORDS			2
 #define PRIQ_MAX_SZ_SHIFT		8
 
-#define PRIQ_0_SID_SHIFT		0
-#define PRIQ_0_SID_MASK			0xffffffffUL
-#define PRIQ_0_SSID_SHIFT		32
-#define PRIQ_0_SSID_MASK		0xfffffUL
+#define PRIQ_0_SID			GENMASK_ULL(31, 0)
+#define PRIQ_0_SSID			GENMASK_ULL(51, 32)
 #define PRIQ_0_PERM_PRIV		(1UL << 58)
 #define PRIQ_0_PERM_EXEC		(1UL << 59)
 #define PRIQ_0_PERM_READ		(1UL << 60)
@@ -411,10 +355,8 @@
 #define PRIQ_0_PRG_LAST			(1UL << 62)
 #define PRIQ_0_SSID_V			(1UL << 63)
 
-#define PRIQ_1_PRG_IDX_SHIFT		0
-#define PRIQ_1_PRG_IDX_MASK		0x1ffUL
-#define PRIQ_1_ADDR_SHIFT		12
-#define PRIQ_1_ADDR_MASK		0xfffffffffffffUL
+#define PRIQ_1_PRG_IDX			GENMASK_ULL(8, 0)
+#define PRIQ_1_ADDR_MASK		GENMASK_ULL(63, 12)
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US	100
@@ -430,9 +372,9 @@ MODULE_PARM_DESC(disable_bypass,
 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
 enum pri_resp {
-	PRI_RESP_DENY,
-	PRI_RESP_FAIL,
-	PRI_RESP_SUCC,
+	PRI_RESP_DENY = 0,
+	PRI_RESP_FAIL = 1,
+	PRI_RESP_SUCC = 2,
 };
 
 enum arm_smmu_msi_index {
@@ -611,6 +553,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_STALLS		(1 << 11)
 #define ARM_SMMU_FEAT_HYP		(1 << 12)
 #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
+#define ARM_SMMU_FEAT_VAX		(1 << 14)
 	u32				features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
@@ -836,67 +779,64 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 {
 	memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
-	cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
+	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 
 	switch (ent->opcode) {
 	case CMDQ_OP_TLBI_EL2_ALL:
 	case CMDQ_OP_TLBI_NSNH_ALL:
 		break;
 	case CMDQ_OP_PREFETCH_CFG:
-		cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
-		cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
+		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
 		break;
 	case CMDQ_OP_CFGI_STE:
-		cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
-		cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
+		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
 		break;
 	case CMDQ_OP_CFGI_ALL:
 		/* Cover the entire SID range */
-		cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
+		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
 		break;
 	case CMDQ_OP_TLBI_NH_VA:
-		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
-		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
 		break;
 	case CMDQ_OP_TLBI_S2_IPA:
-		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
-		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
 		break;
 	case CMDQ_OP_TLBI_NH_ASID:
-		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 		/* Fallthrough */
 	case CMDQ_OP_TLBI_S12_VMALL:
-		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 		break;
 	case CMDQ_OP_PRI_RESP:
-		cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
-		cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
-		cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
-		cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
+		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
+		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
 		switch (ent->pri.resp) {
 		case PRI_RESP_DENY:
-			cmd[1] |= CMDQ_PRI_1_RESP_DENY;
-			break;
 		case PRI_RESP_FAIL:
-			cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
-			break;
 		case PRI_RESP_SUCC:
-			cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
 			break;
 		default:
 			return -EINVAL;
 		}
+		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
 		break;
 	case CMDQ_OP_CMD_SYNC:
 		if (ent->sync.msiaddr)
-			cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
+			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
 		else
-			cmd[0] |= CMDQ_SYNC_0_CS_SEV;
-		cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
-		cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
+			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
 		cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
 		break;
 	default:
@@ -918,7 +858,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 	u64 cmd[CMDQ_ENT_DWORDS];
 	struct arm_smmu_queue *q = &smmu->cmdq.q;
 	u32 cons = readl_relaxed(q->cons_reg);
-	u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
+	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
 	struct arm_smmu_cmdq_ent cmd_sync = {
 		.opcode = CMDQ_OP_CMD_SYNC,
 	};
@@ -1083,8 +1023,8 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 #ifdef __BIG_ENDIAN
 	      CTXDESC_CD_0_ENDI |
 #endif
-	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
-	      CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
+	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
+	      CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
 	      CTXDESC_CD_0_V;
 
 	/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
@@ -1093,10 +1033,10 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 
 	cfg->cdptr[0] = cpu_to_le64(val);
 
-	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
+	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
 	cfg->cdptr[1] = cpu_to_le64(val);
 
-	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
+	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
 }
 
 /* Stream table manipulation functions */
@@ -1105,10 +1045,8 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
 {
 	u64 val = 0;
 
-	val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
-		<< STRTAB_L1_DESC_SPAN_SHIFT;
-	val |= desc->l2ptr_dma &
-	       STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
+	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
+	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
 
 	*dst = cpu_to_le64(val);
 }
@@ -1156,10 +1094,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	};
 
 	if (val & STRTAB_STE_0_V) {
-		u64 cfg;
-
-		cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
-		switch (cfg) {
+		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
 		case STRTAB_STE_0_CFG_BYPASS:
 			break;
 		case STRTAB_STE_0_CFG_S1_TRANS:
@@ -1180,13 +1115,13 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	/* Bypass/fault */
 	if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
 		if (!ste->assigned && disable_bypass)
-			val |= STRTAB_STE_0_CFG_ABORT;
+			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
 		else
-			val |= STRTAB_STE_0_CFG_BYPASS;
+			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
 
 		dst[0] = cpu_to_le64(val);
-		dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
-			 << STRTAB_STE_1_SHCFG_SHIFT);
+		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+						STRTAB_STE_1_SHCFG_INCOMING));
 		dst[2] = 0; /* Nuke the VMID */
 		/*
 		 * The SMMU can perform negative caching, so we must sync
@@ -1200,41 +1135,36 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	if (ste->s1_cfg) {
 		BUG_ON(ste_live);
 		dst[1] = cpu_to_le64(
-			 STRTAB_STE_1_S1C_CACHE_WBRA
-			 << STRTAB_STE_1_S1CIR_SHIFT |
-			 STRTAB_STE_1_S1C_CACHE_WBRA
-			 << STRTAB_STE_1_S1COR_SHIFT |
-			 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
+			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
 #ifdef CONFIG_PCI_ATS
-			 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
+			 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
 #endif
-			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
+			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
 
 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
-		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
-		        << STRTAB_STE_0_S1CTXPTR_SHIFT) |
-			STRTAB_STE_0_CFG_S1_TRANS;
+		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
 	}
 
 	if (ste->s2_cfg) {
 		BUG_ON(ste_live);
 		dst[2] = cpu_to_le64(
-			 ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
-			 (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
-			  << STRTAB_STE_2_VTCR_SHIFT |
+			 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
+			 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
 #ifdef __BIG_ENDIAN
 			 STRTAB_STE_2_S2ENDI |
 #endif
 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
 			 STRTAB_STE_2_S2R);
 
-		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
-			 STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
+		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
 
-		val |= STRTAB_STE_0_CFG_S2_TRANS;
+		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
 	}
 
 	arm_smmu_sync_ste_for_sid(smmu, sid);
@@ -1295,7 +1225,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 
 	do {
 		while (!queue_remove_raw(q, evt)) {
-			u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
+			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
 
 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
@@ -1323,11 +1253,11 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
 	u16 grpid;
 	bool ssv, last;
 
-	sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
-	ssv = evt[0] & PRIQ_0_SSID_V;
-	ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
-	last = evt[0] & PRIQ_0_PRG_LAST;
-	grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
+	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
+	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
+	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
+	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
+	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
 
 	dev_info(smmu->dev, "unexpected PRI request received:\n");
 	dev_info(smmu->dev,
@@ -1337,7 +1267,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
-		 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
+		 evt[1] & PRIQ_1_ADDR_MASK);
 
 	if (last) {
 		struct arm_smmu_cmdq_ent cmd = {
@@ -1664,7 +1594,8 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 
 	switch (smmu_domain->stage) {
 	case ARM_SMMU_DOMAIN_S1:
-		ias = VA_BITS;
+		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
+		ias = min_t(unsigned long, ias, VA_BITS);
 		oas = smmu->ias;
 		fmt = ARM_64_LPAE_S1;
 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
@@ -1696,7 +1627,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		return -ENOMEM;
 
 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
-	domain->geometry.aperture_end = (1UL << ias) - 1;
+	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
 	domain->geometry.force_aperture = true;
 
 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
@@ -2102,9 +2033,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 	q->ent_dwords	= dwords;
 
 	q->q_base  = Q_BASE_RWA;
-	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
-	q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
-		     << Q_BASE_LOG2SIZE_SHIFT;
+	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
+	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
 
 	q->prod = q->cons = 0;
 	return 0;
@@ -2186,11 +2116,9 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 	cfg->strtab = strtab;
 
 	/* Configure strtab_base_cfg for 2 levels */
-	reg  = STRTAB_BASE_CFG_FMT_2LVL;
-	reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
-		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
-	reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
-		<< STRTAB_BASE_CFG_SPLIT_SHIFT;
+	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
 	cfg->strtab_base_cfg = reg;
 
 	return arm_smmu_init_l1_strtab(smmu);
@@ -2216,9 +2144,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
 	cfg->num_l1_ents = 1 << smmu->sid_bits;
 
 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
-	reg  = STRTAB_BASE_CFG_FMT_LINEAR;
-	reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
-		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
+	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
 	cfg->strtab_base_cfg = reg;
 
 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
@@ -2239,8 +2166,7 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
 		return ret;
 
 	/* Set the strtab base address */
-	reg  = smmu->strtab_cfg.strtab_dma &
-	       STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
+	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
 	reg |= STRTAB_BASE_RA;
 	smmu->strtab_cfg.strtab_base = reg;
 
@@ -2303,11 +2229,11 @@ static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
 
 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
-	doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
+	doorbell &= MSI_CFG0_ADDR_MASK;
 
 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
 	writel_relaxed(msg->data, smmu->base + cfg[1]);
-	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
 }
 
 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
@@ -2328,10 +2254,15 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
 		return;
 
+	if (!dev->msi_domain) {
+		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
+		return;
+	}
+
 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
 	if (ret) {
-		dev_warn(dev, "failed to allocate MSIs\n");
+		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
 		return;
 	}
 
@@ -2370,6 +2301,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 						"arm-smmu-v3-evtq", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
+	} else {
+		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
 	}
 
 	irq = smmu->gerr_irq;
@@ -2378,6 +2311,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 				       0, "arm-smmu-v3-gerror", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
+	} else {
+		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
 	}
 
 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
@@ -2391,6 +2326,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 			if (ret < 0)
 				dev_warn(smmu->dev,
 					 "failed to enable priq irq\n");
+		} else {
+			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
 		}
 	}
 }
@@ -2463,12 +2400,12 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 		return ret;
 
 	/* CR1 (table and queue memory attributes) */
-	reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
-	      (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
-	      (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
-	      (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
-	      (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
-	      (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
+	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
+	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
+	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
+	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
+	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
+	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
 
 	/* CR2 (random crap) */
@@ -2578,7 +2515,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
 
 	/* 2-level structures */
-	if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
+	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
 
 	if (reg & IDR0_CD2L)
@@ -2589,7 +2526,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	 * We currently require the same endianness as the CPU, but this
 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
 	 */
-	switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
+	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
 	case IDR0_TTENDIAN_MIXED:
 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
 		break;
@@ -2631,7 +2568,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
 			 coherent ? "true" : "false");
 
-	switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
+	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
 	case IDR0_STALL_MODEL_FORCE:
 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
 		/* Fallthrough */
@@ -2651,7 +2588,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	}
 
 	/* We only support the AArch64 table format at present */
-	switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
+	switch (FIELD_GET(IDR0_TTF, reg)) {
 	case IDR0_TTF_AARCH32_64:
 		smmu->ias = 40;
 		/* Fallthrough */
@@ -2674,22 +2611,22 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	}
 
 	/* Queue sizes, capped at 4k */
-	smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
-				       reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
+	smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
+					 FIELD_GET(IDR1_CMDQS, reg));
 	if (!smmu->cmdq.q.max_n_shift) {
 		/* Odd alignment restrictions on the base, so ignore for now */
 		dev_err(smmu->dev, "unit-length command queue not supported\n");
 		return -ENXIO;
 	}
 
-	smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
-				       reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
-	smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
-				       reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
+	smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
+					 FIELD_GET(IDR1_EVTQS, reg));
+	smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
+					 FIELD_GET(IDR1_PRIQS, reg));
 
 	/* SID/SSID sizes */
-	smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
-	smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
+	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
+	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
 
 	/*
 	 * If the SMMU supports fewer bits than would fill a single L2 stream
@@ -2702,8 +2639,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
 
 	/* Maximum number of outstanding stalls */
-	smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
-				& IDR5_STALL_MAX_MASK;
+	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
 
 	/* Page sizes */
 	if (reg & IDR5_GRAN64K)
@@ -2713,13 +2649,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	if (reg & IDR5_GRAN4K)
 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
 
-	if (arm_smmu_ops.pgsize_bitmap == -1UL)
-		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
-	else
-		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+	/* Input address size */
+	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
+		smmu->features |= ARM_SMMU_FEAT_VAX;
 
 	/* Output address size */
-	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
+	switch (FIELD_GET(IDR5_OAS, reg)) {
 	case IDR5_OAS_32_BIT:
 		smmu->oas = 32;
 		break;
@@ -2735,6 +2670,10 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	case IDR5_OAS_44_BIT:
 		smmu->oas = 44;
 		break;
+	case IDR5_OAS_52_BIT:
+		smmu->oas = 52;
+		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
+		break;
 	default:
 		dev_info(smmu->dev,
 			"unknown output address size. Truncating to 48-bit\n");
@@ -2743,6 +2682,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 		smmu->oas = 48;
 	}
 
+	if (arm_smmu_ops.pgsize_bitmap == -1UL)
+		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
+	else
+		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+
 	/* Set the DMA mask for our table walker */
 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
 		dev_warn(smmu->dev,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 25914d36c5ac..f05f3cf90756 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi_iort.h>
 #include <linux/device.h>
 #include <linux/dma-iommu.h>
 #include <linux/gfp.h>
@@ -167,13 +168,18 @@ EXPORT_SYMBOL(iommu_put_dma_cookie);
  *
  * IOMMU drivers can use this to implement their .get_resv_regions callback
  * for general non-IOMMU-specific reservations. Currently, this covers host
- * bridge windows for PCI devices.
+ * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI
+ * based ARM platforms that may require HW MSI reservation.
  */
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 {
 	struct pci_host_bridge *bridge;
 	struct resource_entry *window;
 
+	if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) &&
+		iort_iommu_msi_get_resv_regions(dev, list) < 0)
+		return;
+
 	if (!dev_is_pci(dev))
 		return;
 
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9a7ffd13c7f0..accf58388bdb 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -806,7 +806,7 @@ int __init dmar_dev_scope_init(void)
 	return dmar_dev_scope_status;
 }
 
-void dmar_register_bus_notifier(void)
+void __init dmar_register_bus_notifier(void)
 {
 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
 }
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c5f4f7691b57..85879cfec52f 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1239,17 +1239,6 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
 	return phys;
 }
 
-static struct iommu_group *get_device_iommu_group(struct device *dev)
-{
-	struct iommu_group *group;
-
-	group = iommu_group_get(dev);
-	if (!group)
-		group = iommu_group_alloc();
-
-	return group;
-}
-
 static int exynos_iommu_add_device(struct device *dev)
 {
 	struct exynos_iommu_owner *owner = dev->archdata.iommu;
@@ -1345,7 +1334,7 @@ static const struct iommu_ops exynos_iommu_ops = {
 	.unmap = exynos_iommu_unmap,
 	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
-	.device_group = get_device_iommu_group,
+	.device_group = generic_device_group,
 	.add_device = exynos_iommu_add_device,
 	.remove_device = exynos_iommu_remove_device,
 	.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 24d1b1b42013..749d8f235346 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5043,7 +5043,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	struct page *freelist = NULL;
-	struct intel_iommu *iommu;
 	unsigned long start_pfn, last_pfn;
 	unsigned int npages;
 	int iommu_id, level = 0;
@@ -5062,12 +5061,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 
 	npages = last_pfn - start_pfn + 1;
 
-	for_each_domain_iommu(iommu_id, dmar_domain) {
-		iommu = g_iommus[iommu_id];
-
+	for_each_domain_iommu(iommu_id, dmar_domain)
 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
 				      start_pfn, npages, !freelist, 0);
-	}
 
 	dma_free_pagelist(freelist);
 
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 99bc9bd64b9e..e8cd984cf9c8 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -396,6 +396,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 				pasid_max - 1, GFP_KERNEL);
 		if (ret < 0) {
 			kfree(svm);
+			kfree(sdev);
 			goto out;
 		}
 		svm->pasid = ret;
@@ -422,17 +423,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		iommu->pasid_table[svm->pasid].val = pasid_entry_val;
 
 		wmb();
-		/* In caching mode, we still have to flush with PASID 0 when
-		 * a PASID table entry becomes present. Not entirely clear
-		 * *why* that would be the case — surely we could just issue
-		 * a flush with the PASID value that we've changed? The PASID
-		 * is the index into the table, after all. It's not like domain
-		 * IDs in the case of the equivalent context-entry change in
-		 * caching mode. And for that matter it's not entirely clear why
-		 * a VMM would be in the business of caching the PASID table
-		 * anyway. Surely that can be left entirely to the guest? */
+
+		/*
+		 * Flush PASID cache when a PASID table entry becomes
+		 * present.
+		 */
 		if (cap_caching_mode(iommu->cap))
-			intel_flush_pasid_dev(svm, sdev, 0);
+			intel_flush_pasid_dev(svm, sdev, svm->pasid);
 	}
 	list_add_rcu(&sdev->list, &svm->devs);
 
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 2ca08dc9331c..10e4a3d11c02 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -357,8 +357,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
 	return false;
 }
 
-static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
-			   size_t, int, arm_v7s_iopte *);
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
+			      size_t, int, arm_v7s_iopte *);
 
 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 			    unsigned long iova, phys_addr_t paddr, int prot,
@@ -541,9 +541,10 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
 	return pte;
 }
 
-static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
-				   unsigned long iova, size_t size,
-				   arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep)
+static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
+				      unsigned long iova, size_t size,
+				      arm_v7s_iopte blk_pte,
+				      arm_v7s_iopte *ptep)
 {
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	arm_v7s_iopte pte, *tablep;
@@ -584,9 +585,9 @@ static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 	return size;
 }
 
-static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
-			    unsigned long iova, size_t size, int lvl,
-			    arm_v7s_iopte *ptep)
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
+			      unsigned long iova, size_t size, int lvl,
+			      arm_v7s_iopte *ptep)
 {
 	arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
 	struct io_pgtable *iop = &data->iop;
@@ -656,8 +657,8 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 	return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
 }
 
-static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			 size_t size)
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+			    size_t size)
 {
 	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 51e5c43caed1..39c2a056da21 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt)	"arm-lpae io-pgtable: " fmt
 
 #include <linux/atomic.h>
+#include <linux/bitops.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
@@ -32,7 +33,7 @@
 
 #include "io-pgtable.h"
 
-#define ARM_LPAE_MAX_ADDR_BITS		48
+#define ARM_LPAE_MAX_ADDR_BITS		52
 #define ARM_LPAE_S2_MAX_CONCAT_PAGES	16
 #define ARM_LPAE_MAX_LEVELS		4
 
@@ -86,6 +87,8 @@
 #define ARM_LPAE_PTE_TYPE_TABLE		3
 #define ARM_LPAE_PTE_TYPE_PAGE		3
 
+#define ARM_LPAE_PTE_ADDR_MASK		GENMASK_ULL(47,12)
+
 #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
 #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
 #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
@@ -159,6 +162,7 @@
 #define ARM_LPAE_TCR_PS_42_BIT		0x3ULL
 #define ARM_LPAE_TCR_PS_44_BIT		0x4ULL
 #define ARM_LPAE_TCR_PS_48_BIT		0x5ULL
+#define ARM_LPAE_TCR_PS_52_BIT		0x6ULL
 
 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)	((n) << 3)
 #define ARM_LPAE_MAIR_ATTR_MASK		0xff
@@ -170,9 +174,7 @@
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV	2
 
 /* IOPTE accessors */
-#define iopte_deref(pte,d)					\
-	(__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)	\
-	& ~(ARM_LPAE_GRANULE(d) - 1ULL)))
+#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
 #define iopte_type(pte,l)					\
 	(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
@@ -184,12 +186,6 @@
 		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) :	\
 		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
 
-#define iopte_to_pfn(pte,d)					\
-	(((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) >> (d)->pg_shift)
-
-#define pfn_to_iopte(pfn,d)					\
-	(((pfn) << (d)->pg_shift) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1))
-
 struct arm_lpae_io_pgtable {
 	struct io_pgtable	iop;
 
@@ -203,6 +199,27 @@ struct arm_lpae_io_pgtable {
 
 typedef u64 arm_lpae_iopte;
 
+static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
+				     struct arm_lpae_io_pgtable *data)
+{
+	arm_lpae_iopte pte = paddr;
+
+	/* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
+	return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
+}
+
+static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
+				  struct arm_lpae_io_pgtable *data)
+{
+	u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
+
+	if (data->pg_shift < 16)
+		return paddr;
+
+	/* Rotate the packed high-order bits back to the top */
+	return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
+}
+
 static bool selftest_running = false;
 
 static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -268,9 +285,9 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 		__arm_lpae_sync_pte(ptep, cfg);
 }
 
-static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
-			    unsigned long iova, size_t size, int lvl,
-			    arm_lpae_iopte *ptep);
+static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+			       unsigned long iova, size_t size, int lvl,
+			       arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 				phys_addr_t paddr, arm_lpae_iopte prot,
@@ -287,7 +304,7 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
-	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+	pte |= paddr_to_iopte(paddr, data);
 
 	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
 }
@@ -506,10 +523,10 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 	kfree(data);
 }
 
-static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
-				    unsigned long iova, size_t size,
-				    arm_lpae_iopte blk_pte, int lvl,
-				    arm_lpae_iopte *ptep)
+static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
+				       unsigned long iova, size_t size,
+				       arm_lpae_iopte blk_pte, int lvl,
+				       arm_lpae_iopte *ptep)
 {
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	arm_lpae_iopte pte, *tablep;
@@ -528,7 +545,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	if (size == split_sz)
 		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
-	blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
+	blk_paddr = iopte_to_paddr(blk_pte, data);
 	pte = iopte_prot(blk_pte);
 
 	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
@@ -560,9 +577,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	return size;
 }
 
-static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
-			    unsigned long iova, size_t size, int lvl,
-			    arm_lpae_iopte *ptep)
+static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+			       unsigned long iova, size_t size, int lvl,
+			       arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte;
 	struct io_pgtable *iop = &data->iop;
@@ -606,8 +623,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 	return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
 }
 
-static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			  size_t size)
+static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+			     size_t size)
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	arm_lpae_iopte *ptep = data->pgd;
@@ -652,12 +669,13 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 
 found_translation:
 	iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
-	return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
+	return iopte_to_paddr(pte, data) | iova;
 }
 
 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 {
-	unsigned long granule;
+	unsigned long granule, page_sizes;
+	unsigned int max_addr_bits = 48;
 
 	/*
 	 * We need to restrict the supported page sizes to match the
@@ -677,17 +695,24 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 
 	switch (granule) {
 	case SZ_4K:
-		cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
+		page_sizes = (SZ_4K | SZ_2M | SZ_1G);
 		break;
 	case SZ_16K:
-		cfg->pgsize_bitmap &= (SZ_16K | SZ_32M);
+		page_sizes = (SZ_16K | SZ_32M);
 		break;
 	case SZ_64K:
-		cfg->pgsize_bitmap &= (SZ_64K | SZ_512M);
+		max_addr_bits = 52;
+		page_sizes = (SZ_64K | SZ_512M);
+		if (cfg->oas > 48)
+			page_sizes |= 1ULL << 42; /* 4TB */
 		break;
 	default:
-		cfg->pgsize_bitmap = 0;
+		page_sizes = 0;
 	}
+
+	cfg->pgsize_bitmap &= page_sizes;
+	cfg->ias = min(cfg->ias, max_addr_bits);
+	cfg->oas = min(cfg->oas, max_addr_bits);
 }
 
 static struct arm_lpae_io_pgtable *
@@ -784,6 +809,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	case 48:
 		reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT);
 		break;
+	case 52:
+		reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+		break;
 	default:
 		goto out_free_data;
 	}
@@ -891,6 +919,9 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	case 48:
 		reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT);
 		break;
+	case 52:
+		reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT);
+		break;
 	default:
 		goto out_free_data;
 	}
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index cd2e1eafffe6..2df79093cad9 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -119,8 +119,8 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
 	int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
-	int (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
-		     size_t size);
+	size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
+			size_t size);
 	phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
 				    unsigned long iova);
 };
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 69fef991c651..d2aa23202bb9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1573,10 +1573,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 
 	if (unlikely(ops->unmap == NULL ||
 		     domain->pgsize_bitmap == 0UL))
-		return -ENODEV;
+		return 0;
 
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
-		return -EINVAL;
+		return 0;
 
 	/* find out the minimum page size supported */
 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -1589,7 +1589,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 	if (!IS_ALIGNED(iova | size, min_pagesz)) {
 		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
 		       iova, size, min_pagesz);
-		return -EINVAL;
+		return 0;
 	}
 
 	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f227d73e7bf6..f2832a10fcea 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -60,7 +60,7 @@
 	(((prot) & 0x3) << F_MMU_TF_PROTECT_SEL_SHIFT(data))
 
 #define REG_MMU_IVRP_PADDR			0x114
-#define F_MMU_IVRP_PA_SET(pa, ext)		(((pa) >> 1) | ((!!(ext)) << 31))
+
 #define REG_MMU_VLD_PA_RNG			0x118
 #define F_MMU_VLD_PA_RNG(EA, SA)		(((EA) << 8) | (SA))
 
@@ -539,8 +539,13 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 		F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
 	writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
 
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
-		       data->base + REG_MMU_IVRP_PADDR);
+	if (data->m4u_plat == M4U_MT8173)
+		regval = (data->protect_base >> 1) | (data->enable_4GB << 31);
+	else
+		regval = lower_32_bits(data->protect_base) |
+			 upper_32_bits(data->protect_base);
+	writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
+
 	if (data->enable_4GB && data->m4u_plat != M4U_MT8173) {
 		/*
 		 * If 4GB mode is enabled, the validate PA range is from
@@ -695,6 +700,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev)
 	reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
 	reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
 	reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
+	reg->ivrp_paddr = readl_relaxed(base + REG_MMU_IVRP_PADDR);
 	clk_disable_unprepare(data->bclk);
 	return 0;
 }
@@ -717,8 +723,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
 	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
 	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
 	writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
-		       base + REG_MMU_IVRP_PADDR);
+	writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR);
 	if (data->m4u_dom)
 		writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
 		       base + REG_MMU_PT_BASE_ADDR);
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index b4451a1c7c2f..778498b8633f 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -32,6 +32,7 @@ struct mtk_iommu_suspend_reg {
 	u32				ctrl_reg;
 	u32				int_control0;
 	u32				int_main_control;
+	u32				ivrp_paddr;
 };
 
 enum mtk_iommu_plat {
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 5a96fd14ac22..a7c2a973784f 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -417,20 +417,12 @@ static int mtk_iommu_create_mapping(struct device *dev,
 		m4udev->archdata.iommu = mtk_mapping;
 	}
 
-	ret = arm_iommu_attach_device(dev, mtk_mapping);
-	if (ret)
-		goto err_release_mapping;
-
 	return 0;
-
-err_release_mapping:
-	arm_iommu_release_mapping(mtk_mapping);
-	m4udev->archdata.iommu = NULL;
-	return ret;
 }
 
 static int mtk_iommu_add_device(struct device *dev)
 {
+	struct dma_iommu_mapping *mtk_mapping;
 	struct of_phandle_args iommu_spec;
 	struct of_phandle_iterator it;
 	struct mtk_iommu_data *data;
@@ -451,15 +443,30 @@ static int mtk_iommu_add_device(struct device *dev)
 	if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
 		return -ENODEV; /* Not a iommu client device */
 
-	data = dev->iommu_fwspec->iommu_priv;
-	iommu_device_link(&data->iommu, dev);
-
-	group = iommu_group_get_for_dev(dev);
+	/*
+	 * This is a short-term bodge because the ARM DMA code doesn't
+	 * understand multi-device groups, but we have to call into it
+	 * successfully (and not just rely on a normal IOMMU API attach
+	 * here) in order to set the correct DMA API ops on @dev.
+	 */
+	group = iommu_group_alloc();
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	err = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
-	return 0;
+	if (err)
+		return err;
+
+	data = dev->iommu_fwspec->iommu_priv;
+	mtk_mapping = data->dev->archdata.iommu;
+	err = arm_iommu_attach_device(dev, mtk_mapping);
+	if (err) {
+		iommu_group_remove_device(dev);
+		return err;
+	}
+
+	return iommu_device_link(&data->iommu, dev);;
 }
 
 static void mtk_iommu_remove_device(struct device *dev)
@@ -476,24 +483,6 @@ static void mtk_iommu_remove_device(struct device *dev)
 	iommu_fwspec_free(dev);
 }
 
-static struct iommu_group *mtk_iommu_device_group(struct device *dev)
-{
-	struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
-
-	if (!data)
-		return ERR_PTR(-ENODEV);
-
-	/* All the client devices are in the same m4u iommu-group */
-	if (!data->m4u_group) {
-		data->m4u_group = iommu_group_alloc();
-		if (IS_ERR(data->m4u_group))
-			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
-	} else {
-		iommu_group_ref_get(data->m4u_group);
-	}
-	return data->m4u_group;
-}
-
 static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 {
 	u32 regval;
@@ -546,7 +535,6 @@ static struct iommu_ops mtk_iommu_ops = {
 	.iova_to_phys	= mtk_iommu_iova_to_phys,
 	.add_device	= mtk_iommu_add_device,
 	.remove_device	= mtk_iommu_remove_device,
-	.device_group	= mtk_iommu_device_group,
 	.pgsize_bitmap	= ~0UL << MT2701_IOMMU_PAGE_SHIFT,
 };
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index e135ab830ebf..c33b7b104e72 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1536,7 +1536,7 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev)
 	struct iommu_group *group = ERR_PTR(-EINVAL);
 
 	if (arch_data->iommu_dev)
-		group = arch_data->iommu_dev->group;
+		group = iommu_group_ref_get(arch_data->iommu_dev->group);
 
 	return group;
 }
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 9d991c2d8767..5fc8656c60f9 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -4,6 +4,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/clk.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -13,13 +14,15 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iommu.h>
-#include <linux/jiffies.h>
+#include <linux/iopoll.h>
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
@@ -36,7 +39,10 @@
 #define RK_MMU_AUTO_GATING	0x24
 
 #define DTE_ADDR_DUMMY		0xCAFEBABE
-#define FORCE_RESET_TIMEOUT	100	/* ms */
+
+#define RK_MMU_POLL_PERIOD_US		100
+#define RK_MMU_FORCE_RESET_TIMEOUT_US	100000
+#define RK_MMU_POLL_TIMEOUT_US		1000
 
 /* RK_MMU_STATUS fields */
 #define RK_MMU_STATUS_PAGING_ENABLED       BIT(0)
@@ -73,11 +79,8 @@
   */
 #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
 
-#define IOMMU_REG_POLL_COUNT_FAST 1000
-
 struct rk_iommu_domain {
 	struct list_head iommus;
-	struct platform_device *pdev;
 	u32 *dt; /* page directory table */
 	dma_addr_t dt_dma;
 	spinlock_t iommus_lock; /* lock for iommus list */
@@ -86,24 +89,37 @@ struct rk_iommu_domain {
 	struct iommu_domain domain;
 };
 
+/* list of clocks required by IOMMU */
+static const char * const rk_iommu_clocks[] = {
+	"aclk", "iface",
+};
+
 struct rk_iommu {
 	struct device *dev;
 	void __iomem **bases;
 	int num_mmu;
-	int *irq;
-	int num_irq;
+	struct clk_bulk_data *clocks;
+	int num_clocks;
 	bool reset_disabled;
 	struct iommu_device iommu;
 	struct list_head node; /* entry in rk_iommu_domain.iommus */
 	struct iommu_domain *domain; /* domain to which iommu is attached */
+	struct iommu_group *group;
+};
+
+struct rk_iommudata {
+	struct device_link *link; /* runtime PM link from IOMMU to master */
+	struct rk_iommu *iommu;
 };
 
+static struct device *dma_dev;
+
 static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
 				  unsigned int count)
 {
 	size_t size = count * sizeof(u32); /* count of u32 entry */
 
-	dma_sync_single_for_device(&dom->pdev->dev, dma, size, DMA_TO_DEVICE);
+	dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE);
 }
 
 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -111,27 +127,6 @@ static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
 	return container_of(dom, struct rk_iommu_domain, domain);
 }
 
-/**
- * Inspired by _wait_for in intel_drv.h
- * This is NOT safe for use in interrupt context.
- *
- * Note that it's important that we check the condition again after having
- * timed out, since the timeout could be due to preemption or similar and
- * we've never had a chance to check the condition before the timeout.
- */
-#define rk_wait_for(COND, MS) ({ \
-	unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1;	\
-	int ret__ = 0;							\
-	while (!(COND)) {						\
-		if (time_after(jiffies, timeout__)) {			\
-			ret__ = (COND) ? 0 : -ETIMEDOUT;		\
-			break;						\
-		}							\
-		usleep_range(50, 100);					\
-	}								\
-	ret__;								\
-})
-
 /*
  * The Rockchip rk3288 iommu uses a 2-level page table.
  * The first level is the "Directory Table" (DT).
@@ -296,19 +291,21 @@ static void rk_iommu_base_command(void __iomem *base, u32 command)
 {
 	writel(command, base + RK_MMU_COMMAND);
 }
-static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova,
+static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova_start,
 			       size_t size)
 {
 	int i;
-
-	dma_addr_t iova_end = iova + size;
+	dma_addr_t iova_end = iova_start + size;
 	/*
 	 * TODO(djkurtz): Figure out when it is more efficient to shootdown the
 	 * entire iotlb rather than iterate over individual iovas.
 	 */
-	for (i = 0; i < iommu->num_mmu; i++)
-		for (; iova < iova_end; iova += SPAGE_SIZE)
+	for (i = 0; i < iommu->num_mmu; i++) {
+		dma_addr_t iova;
+
+		for (iova = iova_start; iova < iova_end; iova += SPAGE_SIZE)
 			rk_iommu_write(iommu->bases[i], RK_MMU_ZAP_ONE_LINE, iova);
+	}
 }
 
 static bool rk_iommu_is_stall_active(struct rk_iommu *iommu)
@@ -335,9 +332,21 @@ static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu)
 	return enable;
 }
 
+static bool rk_iommu_is_reset_done(struct rk_iommu *iommu)
+{
+	bool done = true;
+	int i;
+
+	for (i = 0; i < iommu->num_mmu; i++)
+		done &= rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0;
+
+	return done;
+}
+
 static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (rk_iommu_is_stall_active(iommu))
 		return 0;
@@ -348,7 +357,9 @@ static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 
 	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL);
 
-	ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_stall_active, iommu, val,
+				 val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n",
@@ -360,13 +371,16 @@ static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (!rk_iommu_is_stall_active(iommu))
 		return 0;
 
 	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL);
 
-	ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_stall_active, iommu, val,
+				 !val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n",
@@ -378,13 +392,16 @@ static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (rk_iommu_is_paging_enabled(iommu))
 		return 0;
 
 	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING);
 
-	ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_paging_enabled, iommu, val,
+				 val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n",
@@ -396,13 +413,16 @@ static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 static int rk_iommu_disable_paging(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (!rk_iommu_is_paging_enabled(iommu))
 		return 0;
 
 	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING);
 
-	ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_paging_enabled, iommu, val,
+				 !val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n",
@@ -415,6 +435,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
 {
 	int ret, i;
 	u32 dte_addr;
+	bool val;
 
 	if (iommu->reset_disabled)
 		return 0;
@@ -435,13 +456,12 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
 
 	rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET);
 
-	for (i = 0; i < iommu->num_mmu; i++) {
-		ret = rk_wait_for(rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0x00000000,
-				  FORCE_RESET_TIMEOUT);
-		if (ret) {
-			dev_err(iommu->dev, "FORCE_RESET command timed out\n");
-			return ret;
-		}
+	ret = readx_poll_timeout(rk_iommu_is_reset_done, iommu, val,
+				 val, RK_MMU_FORCE_RESET_TIMEOUT_US,
+				 RK_MMU_POLL_TIMEOUT_US);
+	if (ret) {
+		dev_err(iommu->dev, "FORCE_RESET command timed out\n");
+		return ret;
 	}
 
 	return 0;
@@ -503,6 +523,12 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 	int i;
 
+	if (WARN_ON(!pm_runtime_get_if_in_use(iommu->dev)))
+		return 0;
+
+	if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))
+		goto out;
+
 	for (i = 0; i < iommu->num_mmu; i++) {
 		int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS);
 		if (int_status == 0)
@@ -549,6 +575,10 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status);
 	}
 
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+
+out:
+	pm_runtime_put(iommu->dev);
 	return ret;
 }
 
@@ -590,8 +620,17 @@ static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
 	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
 	list_for_each(pos, &rk_domain->iommus) {
 		struct rk_iommu *iommu;
+
 		iommu = list_entry(pos, struct rk_iommu, node);
-		rk_iommu_zap_lines(iommu, iova, size);
+
+		/* Only zap TLBs of IOMMUs that are powered on. */
+		if (pm_runtime_get_if_in_use(iommu->dev)) {
+			WARN_ON(clk_bulk_enable(iommu->num_clocks,
+						iommu->clocks));
+			rk_iommu_zap_lines(iommu, iova, size);
+			clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+			pm_runtime_put(iommu->dev);
+		}
 	}
 	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 }
@@ -608,7 +647,6 @@ static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain,
 static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 				  dma_addr_t iova)
 {
-	struct device *dev = &rk_domain->pdev->dev;
 	u32 *page_table, *dte_addr;
 	u32 dte_index, dte;
 	phys_addr_t pt_phys;
@@ -626,9 +664,9 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 	if (!page_table)
 		return ERR_PTR(-ENOMEM);
 
-	pt_dma = dma_map_single(dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, pt_dma)) {
-		dev_err(dev, "DMA mapping error while allocating page table\n");
+	pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dma_dev, pt_dma)) {
+		dev_err(dma_dev, "DMA mapping error while allocating page table\n");
 		free_page((unsigned long)page_table);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -790,52 +828,46 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
 
 static struct rk_iommu *rk_iommu_from_dev(struct device *dev)
 {
-	struct iommu_group *group;
-	struct device *iommu_dev;
-	struct rk_iommu *rk_iommu;
+	struct rk_iommudata *data = dev->archdata.iommu;
 
-	group = iommu_group_get(dev);
-	if (!group)
-		return NULL;
-	iommu_dev = iommu_group_get_iommudata(group);
-	rk_iommu = dev_get_drvdata(iommu_dev);
-	iommu_group_put(group);
+	return data ? data->iommu : NULL;
+}
+
+/* Must be called with iommu powered on and attached */
+static void rk_iommu_disable(struct rk_iommu *iommu)
+{
+	int i;
 
-	return rk_iommu;
+	/* Ignore error while disabling, just keep going */
+	WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
+	rk_iommu_enable_stall(iommu);
+	rk_iommu_disable_paging(iommu);
+	for (i = 0; i < iommu->num_mmu; i++) {
+		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0);
+		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
+	}
+	rk_iommu_disable_stall(iommu);
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
 }
 
-static int rk_iommu_attach_device(struct iommu_domain *domain,
-				  struct device *dev)
+/* Must be called with iommu powered on and attached */
+static int rk_iommu_enable(struct rk_iommu *iommu)
 {
-	struct rk_iommu *iommu;
+	struct iommu_domain *domain = iommu->domain;
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
-	unsigned long flags;
 	int ret, i;
 
-	/*
-	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
-	 * Such a device does not belong to an iommu group.
-	 */
-	iommu = rk_iommu_from_dev(dev);
-	if (!iommu)
-		return 0;
+	ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks);
+	if (ret)
+		return ret;
 
 	ret = rk_iommu_enable_stall(iommu);
 	if (ret)
-		return ret;
+		goto out_disable_clocks;
 
 	ret = rk_iommu_force_reset(iommu);
 	if (ret)
-		return ret;
-
-	iommu->domain = domain;
-
-	for (i = 0; i < iommu->num_irq; i++) {
-		ret = devm_request_irq(iommu->dev, iommu->irq[i], rk_iommu_irq,
-				       IRQF_SHARED, dev_name(dev), iommu);
-		if (ret)
-			return ret;
-	}
+		goto out_disable_stall;
 
 	for (i = 0; i < iommu->num_mmu; i++) {
 		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
@@ -845,18 +877,12 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	}
 
 	ret = rk_iommu_enable_paging(iommu);
-	if (ret)
-		return ret;
-
-	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
-	list_add_tail(&iommu->node, &rk_domain->iommus);
-	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
-
-	dev_dbg(dev, "Attached to iommu domain\n");
 
+out_disable_stall:
 	rk_iommu_disable_stall(iommu);
-
-	return 0;
+out_disable_clocks:
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+	return ret;
 }
 
 static void rk_iommu_detach_device(struct iommu_domain *domain,
@@ -865,60 +891,90 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 	struct rk_iommu *iommu;
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
-	int i;
 
 	/* Allow 'virtual devices' (eg drm) to detach from domain */
 	iommu = rk_iommu_from_dev(dev);
 	if (!iommu)
 		return;
 
+	dev_dbg(dev, "Detaching from iommu domain\n");
+
+	/* iommu already detached */
+	if (iommu->domain != domain)
+		return;
+
+	iommu->domain = NULL;
+
 	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
 	list_del_init(&iommu->node);
 	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 
-	/* Ignore error while disabling, just keep going */
-	rk_iommu_enable_stall(iommu);
-	rk_iommu_disable_paging(iommu);
-	for (i = 0; i < iommu->num_mmu; i++) {
-		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0);
-		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
+	if (pm_runtime_get_if_in_use(iommu->dev)) {
+		rk_iommu_disable(iommu);
+		pm_runtime_put(iommu->dev);
 	}
-	rk_iommu_disable_stall(iommu);
+}
 
-	for (i = 0; i < iommu->num_irq; i++)
-		devm_free_irq(iommu->dev, iommu->irq[i], iommu);
+static int rk_iommu_attach_device(struct iommu_domain *domain,
+		struct device *dev)
+{
+	struct rk_iommu *iommu;
+	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
+	unsigned long flags;
+	int ret;
 
-	iommu->domain = NULL;
+	/*
+	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
+	 * Such a device does not belong to an iommu group.
+	 */
+	iommu = rk_iommu_from_dev(dev);
+	if (!iommu)
+		return 0;
+
+	dev_dbg(dev, "Attaching to iommu domain\n");
+
+	/* iommu already attached */
+	if (iommu->domain == domain)
+		return 0;
 
-	dev_dbg(dev, "Detached from iommu domain\n");
+	if (iommu->domain)
+		rk_iommu_detach_device(iommu->domain, dev);
+
+	iommu->domain = domain;
+
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_add_tail(&iommu->node, &rk_domain->iommus);
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+
+	if (!pm_runtime_get_if_in_use(iommu->dev))
+		return 0;
+
+	ret = rk_iommu_enable(iommu);
+	if (ret)
+		rk_iommu_detach_device(iommu->domain, dev);
+
+	pm_runtime_put(iommu->dev);
+
+	return ret;
 }
 
 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 {
 	struct rk_iommu_domain *rk_domain;
-	struct platform_device *pdev;
-	struct device *iommu_dev;
 
 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
 		return NULL;
 
-	/* Register a pdev per domain, so DMA API can base on this *dev
-	 * even some virtual master doesn't have an iommu slave
-	 */
-	pdev = platform_device_register_simple("rk_iommu_domain",
-					       PLATFORM_DEVID_AUTO, NULL, 0);
-	if (IS_ERR(pdev))
+	if (!dma_dev)
 		return NULL;
 
-	rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL);
+	rk_domain = devm_kzalloc(dma_dev, sizeof(*rk_domain), GFP_KERNEL);
 	if (!rk_domain)
-		goto err_unreg_pdev;
-
-	rk_domain->pdev = pdev;
+		return NULL;
 
 	if (type == IOMMU_DOMAIN_DMA &&
 	    iommu_get_dma_cookie(&rk_domain->domain))
-		goto err_unreg_pdev;
+		return NULL;
 
 	/*
 	 * rk32xx iommus use a 2 level pagetable.
@@ -929,11 +985,10 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 	if (!rk_domain->dt)
 		goto err_put_cookie;
 
-	iommu_dev = &pdev->dev;
-	rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt,
+	rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt,
 					   SPAGE_SIZE, DMA_TO_DEVICE);
-	if (dma_mapping_error(iommu_dev, rk_domain->dt_dma)) {
-		dev_err(iommu_dev, "DMA map error for DT\n");
+	if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) {
+		dev_err(dma_dev, "DMA map error for DT\n");
 		goto err_free_dt;
 	}
 
@@ -954,8 +1009,6 @@ err_free_dt:
 err_put_cookie:
 	if (type == IOMMU_DOMAIN_DMA)
 		iommu_put_dma_cookie(&rk_domain->domain);
-err_unreg_pdev:
-	platform_device_unregister(pdev);
 
 	return NULL;
 }
@@ -972,126 +1025,82 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
 		if (rk_dte_is_pt_valid(dte)) {
 			phys_addr_t pt_phys = rk_dte_pt_address(dte);
 			u32 *page_table = phys_to_virt(pt_phys);
-			dma_unmap_single(&rk_domain->pdev->dev, pt_phys,
+			dma_unmap_single(dma_dev, pt_phys,
 					 SPAGE_SIZE, DMA_TO_DEVICE);
 			free_page((unsigned long)page_table);
 		}
 	}
 
-	dma_unmap_single(&rk_domain->pdev->dev, rk_domain->dt_dma,
+	dma_unmap_single(dma_dev, rk_domain->dt_dma,
 			 SPAGE_SIZE, DMA_TO_DEVICE);
 	free_page((unsigned long)rk_domain->dt);
 
 	if (domain->type == IOMMU_DOMAIN_DMA)
 		iommu_put_dma_cookie(&rk_domain->domain);
-
-	platform_device_unregister(rk_domain->pdev);
 }
 
-static bool rk_iommu_is_dev_iommu_master(struct device *dev)
+static int rk_iommu_add_device(struct device *dev)
 {
-	struct device_node *np = dev->of_node;
-	int ret;
-
-	/*
-	 * An iommu master has an iommus property containing a list of phandles
-	 * to iommu nodes, each with an #iommu-cells property with value 0.
-	 */
-	ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells");
-	return (ret > 0);
-}
+	struct iommu_group *group;
+	struct rk_iommu *iommu;
+	struct rk_iommudata *data;
 
-static int rk_iommu_group_set_iommudata(struct iommu_group *group,
-					struct device *dev)
-{
-	struct device_node *np = dev->of_node;
-	struct platform_device *pd;
-	int ret;
-	struct of_phandle_args args;
+	data = dev->archdata.iommu;
+	if (!data)
+		return -ENODEV;
 
-	/*
-	 * An iommu master has an iommus property containing a list of phandles
-	 * to iommu nodes, each with an #iommu-cells property with value 0.
-	 */
-	ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0,
-					 &args);
-	if (ret) {
-		dev_err(dev, "of_parse_phandle_with_args(%pOF) => %d\n",
-			np, ret);
-		return ret;
-	}
-	if (args.args_count != 0) {
-		dev_err(dev, "incorrect number of iommu params found for %pOF (found %d, expected 0)\n",
-			args.np, args.args_count);
-		return -EINVAL;
-	}
+	iommu = rk_iommu_from_dev(dev);
 
-	pd = of_find_device_by_node(args.np);
-	of_node_put(args.np);
-	if (!pd) {
-		dev_err(dev, "iommu %pOF not found\n", args.np);
-		return -EPROBE_DEFER;
-	}
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+	iommu_group_put(group);
 
-	/* TODO(djkurtz): handle multiple slave iommus for a single master */
-	iommu_group_set_iommudata(group, &pd->dev, NULL);
+	iommu_device_link(&iommu->iommu, dev);
+	data->link = device_link_add(dev, iommu->dev, DL_FLAG_PM_RUNTIME);
 
 	return 0;
 }
 
-static int rk_iommu_add_device(struct device *dev)
+static void rk_iommu_remove_device(struct device *dev)
 {
-	struct iommu_group *group;
 	struct rk_iommu *iommu;
-	int ret;
-
-	if (!rk_iommu_is_dev_iommu_master(dev))
-		return -ENODEV;
+	struct rk_iommudata *data = dev->archdata.iommu;
 
-	group = iommu_group_get(dev);
-	if (!group) {
-		group = iommu_group_alloc();
-		if (IS_ERR(group)) {
-			dev_err(dev, "Failed to allocate IOMMU group\n");
-			return PTR_ERR(group);
-		}
-	}
+	iommu = rk_iommu_from_dev(dev);
 
-	ret = iommu_group_add_device(group, dev);
-	if (ret)
-		goto err_put_group;
+	device_link_del(data->link);
+	iommu_device_unlink(&iommu->iommu, dev);
+	iommu_group_remove_device(dev);
+}
 
-	ret = rk_iommu_group_set_iommudata(group, dev);
-	if (ret)
-		goto err_remove_device;
+static struct iommu_group *rk_iommu_device_group(struct device *dev)
+{
+	struct rk_iommu *iommu;
 
 	iommu = rk_iommu_from_dev(dev);
-	if (iommu)
-		iommu_device_link(&iommu->iommu, dev);
-
-	iommu_group_put(group);
-
-	return 0;
 
-err_remove_device:
-	iommu_group_remove_device(dev);
-err_put_group:
-	iommu_group_put(group);
-	return ret;
+	return iommu_group_ref_get(iommu->group);
 }
 
-static void rk_iommu_remove_device(struct device *dev)
+static int rk_iommu_of_xlate(struct device *dev,
+			     struct of_phandle_args *args)
 {
-	struct rk_iommu *iommu;
+	struct platform_device *iommu_dev;
+	struct rk_iommudata *data;
 
-	if (!rk_iommu_is_dev_iommu_master(dev))
-		return;
+	data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
 
-	iommu = rk_iommu_from_dev(dev);
-	if (iommu)
-		iommu_device_unlink(&iommu->iommu, dev);
+	iommu_dev = of_find_device_by_node(args->np);
 
-	iommu_group_remove_device(dev);
+	data->iommu = platform_get_drvdata(iommu_dev);
+	dev->archdata.iommu = data;
+
+	of_dev_put(iommu_dev);
+
+	return 0;
 }
 
 static const struct iommu_ops rk_iommu_ops = {
@@ -1105,31 +1114,9 @@ static const struct iommu_ops rk_iommu_ops = {
 	.add_device = rk_iommu_add_device,
 	.remove_device = rk_iommu_remove_device,
 	.iova_to_phys = rk_iommu_iova_to_phys,
+	.device_group = rk_iommu_device_group,
 	.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
-};
-
-static int rk_iommu_domain_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-
-	dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), GFP_KERNEL);
-	if (!dev->dma_parms)
-		return -ENOMEM;
-
-	/* Set dma_ops for dev, otherwise it would be dummy_dma_ops */
-	arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false);
-
-	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
-	dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
-
-	return 0;
-}
-
-static struct platform_driver rk_iommu_domain_driver = {
-	.probe = rk_iommu_domain_probe,
-	.driver = {
-		   .name = "rk_iommu_domain",
-	},
+	.of_xlate = rk_iommu_of_xlate,
 };
 
 static int rk_iommu_probe(struct platform_device *pdev)
@@ -1138,7 +1125,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	struct rk_iommu *iommu;
 	struct resource *res;
 	int num_res = pdev->num_resources;
-	int err, i;
+	int err, i, irq;
 
 	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
@@ -1165,50 +1152,108 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	if (iommu->num_mmu == 0)
 		return PTR_ERR(iommu->bases[0]);
 
-	iommu->num_irq = platform_irq_count(pdev);
-	if (iommu->num_irq < 0)
-		return iommu->num_irq;
-	if (iommu->num_irq == 0)
-		return -ENXIO;
-
-	iommu->irq = devm_kcalloc(dev, iommu->num_irq, sizeof(*iommu->irq),
-				  GFP_KERNEL);
-	if (!iommu->irq)
-		return -ENOMEM;
+	i = 0;
+	while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+		if (irq < 0)
+			return irq;
 
-	for (i = 0; i < iommu->num_irq; i++) {
-		iommu->irq[i] = platform_get_irq(pdev, i);
-		if (iommu->irq[i] < 0) {
-			dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq[i]);
-			return -ENXIO;
-		}
+		err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
+				       IRQF_SHARED, dev_name(dev), iommu);
+		if (err)
+			return err;
 	}
 
 	iommu->reset_disabled = device_property_read_bool(dev,
 					"rockchip,disable-mmu-reset");
 
-	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	iommu->num_clocks = ARRAY_SIZE(rk_iommu_clocks);
+	iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
+				     sizeof(*iommu->clocks), GFP_KERNEL);
+	if (!iommu->clocks)
+		return -ENOMEM;
+
+	for (i = 0; i < iommu->num_clocks; ++i)
+		iommu->clocks[i].id = rk_iommu_clocks[i];
+
+	err = devm_clk_bulk_get(iommu->dev, iommu->num_clocks, iommu->clocks);
+	if (err)
+		return err;
+
+	err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
 	if (err)
 		return err;
 
+	iommu->group = iommu_group_alloc();
+	if (IS_ERR(iommu->group)) {
+		err = PTR_ERR(iommu->group);
+		goto err_unprepare_clocks;
+	}
+
+	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	if (err)
+		goto err_put_group;
+
 	iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops);
+	iommu_device_set_fwnode(&iommu->iommu, &dev->of_node->fwnode);
+
 	err = iommu_device_register(&iommu->iommu);
+	if (err)
+		goto err_remove_sysfs;
+
+	/*
+	 * Use the first registered IOMMU device for domain to use with DMA
+	 * API, since a domain might not physically correspond to a single
+	 * IOMMU device..
+	 */
+	if (!dma_dev)
+		dma_dev = &pdev->dev;
+
+	bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
 
+	pm_runtime_enable(dev);
+
+	return 0;
+err_remove_sysfs:
+	iommu_device_sysfs_remove(&iommu->iommu);
+err_put_group:
+	iommu_group_put(iommu->group);
+err_unprepare_clocks:
+	clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
 	return err;
 }
 
-static int rk_iommu_remove(struct platform_device *pdev)
+static void rk_iommu_shutdown(struct platform_device *pdev)
 {
-	struct rk_iommu *iommu = platform_get_drvdata(pdev);
+	pm_runtime_force_suspend(&pdev->dev);
+}
 
-	if (iommu) {
-		iommu_device_sysfs_remove(&iommu->iommu);
-		iommu_device_unregister(&iommu->iommu);
-	}
+static int __maybe_unused rk_iommu_suspend(struct device *dev)
+{
+	struct rk_iommu *iommu = dev_get_drvdata(dev);
 
+	if (!iommu->domain)
+		return 0;
+
+	rk_iommu_disable(iommu);
 	return 0;
 }
 
+static int __maybe_unused rk_iommu_resume(struct device *dev)
+{
+	struct rk_iommu *iommu = dev_get_drvdata(dev);
+
+	if (!iommu->domain)
+		return 0;
+
+	return rk_iommu_enable(iommu);
+}
+
+static const struct dev_pm_ops rk_iommu_pm_ops = {
+	SET_RUNTIME_PM_OPS(rk_iommu_suspend, rk_iommu_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
 static const struct of_device_id rk_iommu_dt_ids[] = {
 	{ .compatible = "rockchip,iommu" },
 	{ /* sentinel */ }
@@ -1217,45 +1262,22 @@ MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
 
 static struct platform_driver rk_iommu_driver = {
 	.probe = rk_iommu_probe,
-	.remove = rk_iommu_remove,
+	.shutdown = rk_iommu_shutdown,
 	.driver = {
 		   .name = "rk_iommu",
 		   .of_match_table = rk_iommu_dt_ids,
+		   .pm = &rk_iommu_pm_ops,
+		   .suppress_bind_attrs = true,
 	},
 };
 
 static int __init rk_iommu_init(void)
 {
-	struct device_node *np;
-	int ret;
-
-	np = of_find_matching_node(NULL, rk_iommu_dt_ids);
-	if (!np)
-		return 0;
-
-	of_node_put(np);
-
-	ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
-	if (ret)
-		return ret;
-
-	ret = platform_driver_register(&rk_iommu_domain_driver);
-	if (ret)
-		return ret;
-
-	ret = platform_driver_register(&rk_iommu_driver);
-	if (ret)
-		platform_driver_unregister(&rk_iommu_domain_driver);
-	return ret;
+	return platform_driver_register(&rk_iommu_driver);
 }
-static void __exit rk_iommu_exit(void)
-{
-	platform_driver_unregister(&rk_iommu_driver);
-	platform_driver_unregister(&rk_iommu_domain_driver);
-}
-
 subsys_initcall(rk_iommu_init);
-module_exit(rk_iommu_exit);
+
+IOMMU_OF_DECLARE(rk_iommu_of, "rockchip,iommu");
 
 MODULE_DESCRIPTION("IOMMU API for Rockchip");
 MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 2982e93d2369..5416f2b2ac21 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3612,7 +3612,8 @@ static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
 		return -ENOMEM;
 	}
 
-	err = iort_register_domain_token(its_entry->translation_id, dom_handle);
+	err = iort_register_domain_token(its_entry->translation_id, res.start,
+					 dom_handle);
 	if (err) {
 		pr_err("ITS@%pa: Unable to register GICv3 ITS domain token (ITS ID %d) to IORT\n",
 		       &res.start, its_entry->translation_id);
diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c
index 15db69d8ba69..ca623e6446e4 100644
--- a/drivers/macintosh/adb-iop.c
+++ b/drivers/macintosh/adb-iop.c
@@ -53,13 +53,13 @@ static void adb_iop_poll(void);
 static int adb_iop_reset_bus(void);
 
 struct adb_driver adb_iop_driver = {
-	"ISM IOP",
-	adb_iop_probe,
-	adb_iop_init,
-	adb_iop_send_request,
-	adb_iop_autopoll,
-	adb_iop_poll,
-	adb_iop_reset_bus
+	.name         = "ISM IOP",
+	.probe        = adb_iop_probe,
+	.init         = adb_iop_init,
+	.send_request = adb_iop_send_request,
+	.autopoll     = adb_iop_autopoll,
+	.poll         = adb_iop_poll,
+	.reset_bus    = adb_iop_reset_bus
 };
 
 static void adb_iop_end_req(struct adb_request *req, int state)
diff --git a/drivers/macintosh/ans-lcd.c b/drivers/macintosh/ans-lcd.c
index 1de81d922d8a..c8e078b911c7 100644
--- a/drivers/macintosh/ans-lcd.c
+++ b/drivers/macintosh/ans-lcd.c
@@ -201,3 +201,4 @@ anslcd_exit(void)
 
 module_init(anslcd_init);
 module_exit(anslcd_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index 9a6223add30e..eb3adfb7f88d 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -70,14 +70,13 @@ static void macio_adb_poll(void);
 static int macio_adb_reset_bus(void);
 
 struct adb_driver macio_adb_driver = {
-	"MACIO",
-	macio_probe,
-	macio_init,
-	macio_send_request,
-	/*macio_write,*/
-	macio_adb_autopoll,
-	macio_adb_poll,
-	macio_adb_reset_bus
+	.name         = "MACIO",
+	.probe        = macio_probe,
+	.init         = macio_init,
+	.send_request = macio_send_request,
+	.autopoll     = macio_adb_autopoll,
+	.poll         = macio_adb_poll,
+	.reset_bus    = macio_adb_reset_bus,
 };
 
 int macio_probe(void)
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 910b5b6f96b1..1f29d2413c74 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -154,8 +154,8 @@ static void rackmeter_do_pause(struct rackmeter *rm, int pause)
 		DBDMA_DO_STOP(rm->dma_regs);
 		return;
 	}
-	memset(rdma->buf1, 0, ARRAY_SIZE(rdma->buf1));
-	memset(rdma->buf2, 0, ARRAY_SIZE(rdma->buf2));
+	memset(rdma->buf1, 0, sizeof(rdma->buf1));
+	memset(rdma->buf2, 0, sizeof(rdma->buf2));
 
 	rm->dma_buf_v->mark = 0;
 
@@ -397,7 +397,7 @@ static int rackmeter_probe(struct macio_dev* mdev,
 	}
 
 	/* Create and initialize our instance data */
-	rm = kzalloc(sizeof(struct rackmeter), GFP_KERNEL);
+	rm = kzalloc(sizeof(*rm), GFP_KERNEL);
 	if (rm == NULL) {
 		printk(KERN_ERR "rackmeter: failed to allocate memory !\n");
 		rc = -ENOMEM;
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 4ba06a1695ea..cf6f7d52d6be 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -91,13 +91,13 @@ static void macii_poll(void);
 static int macii_reset_bus(void);
 
 struct adb_driver via_macii_driver = {
-	"Mac II",
-	macii_probe,
-	macii_init,
-	macii_send_request,
-	macii_autopoll,
-	macii_poll,
-	macii_reset_bus
+	.name         = "Mac II",
+	.probe        = macii_probe,
+	.init         = macii_init,
+	.send_request = macii_send_request,
+	.autopoll     = macii_autopoll,
+	.poll         = macii_poll,
+	.reset_bus    = macii_reset_bus,
 };
 
 static enum macii_state {
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 94c0f3f7df69..433dbeddfcf9 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -198,14 +198,14 @@ static const struct file_operations pmu_battery_proc_fops;
 static const struct file_operations pmu_options_proc_fops;
 
 #ifdef CONFIG_ADB
-struct adb_driver via_pmu_driver = {
-	"PMU",
-	pmu_probe,
-	pmu_init,
-	pmu_send_request,
-	pmu_adb_autopoll,
-	pmu_poll_adb,
-	pmu_adb_reset_bus
+const struct adb_driver via_pmu_driver = {
+	.name         = "PMU",
+	.probe        = pmu_probe,
+	.init         = pmu_init,
+	.send_request = pmu_send_request,
+	.autopoll     = pmu_adb_autopoll,
+	.poll         = pmu_poll_adb,
+	.reset_bus    = pmu_adb_reset_bus,
 };
 #endif /* CONFIG_ADB */
 
diff --git a/drivers/macintosh/via-pmu68k.c b/drivers/macintosh/via-pmu68k.c
index 7d9c4baf8c11..d545ed45e482 100644
--- a/drivers/macintosh/via-pmu68k.c
+++ b/drivers/macintosh/via-pmu68k.c
@@ -120,13 +120,13 @@ static void pmu_enable_backlight(int on);
 static void pmu_set_brightness(int level);
 
 struct adb_driver via_pmu_driver = {
-	"68K PMU",
-	pmu_probe,
-	pmu_init,
-	pmu_send_request,
-	pmu_autopoll,
-	pmu_poll,
-	pmu_reset_bus
+	.name         = "68K PMU",
+	.probe        = pmu_probe,
+	.init         = pmu_init,
+	.send_request = pmu_send_request,
+	.autopoll     = pmu_autopoll,
+	.poll         = pmu_poll,
+	.reset_bus    = pmu_reset_bus,
 };
 
 /*
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2c8ac3688815..edff083f7c4e 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -201,7 +201,7 @@ config BLK_DEV_DM_BUILTIN
 config BLK_DEV_DM
 	tristate "Device mapper support"
 	select BLK_DEV_DM_BUILTIN
-	select DAX
+	depends on DAX || DAX=n
 	---help---
 	  Device-mapper is a low level volume manager.  It works by allowing
 	  people to specify mappings for ranges of logical sectors.  Various
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 99297212eeec..775c06d953b7 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -154,6 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 	return fn(ti, lc->dev, lc->start, ti->len, data);
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -184,6 +185,11 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define linear_dax_direct_access NULL
+#define linear_dax_copy_from_iter NULL
+#endif
+
 static struct target_type linear_target = {
 	.name   = "linear",
 	.version = {1, 4, 0},
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 9de072b7782a..c90c7c08a77f 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -611,51 +611,6 @@ static int log_mark(struct log_writes_c *lc, char *data)
 	return 0;
 }
 
-static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
-		   struct iov_iter *i)
-{
-	struct pending_block *block;
-
-	if (!bytes)
-		return 0;
-
-	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
-	if (!block) {
-		DMERR("Error allocating dax pending block");
-		return -ENOMEM;
-	}
-
-	block->data = kzalloc(bytes, GFP_KERNEL);
-	if (!block->data) {
-		DMERR("Error allocating dax data space");
-		kfree(block);
-		return -ENOMEM;
-	}
-
-	/* write data provided via the iterator */
-	if (!copy_from_iter(block->data, bytes, i)) {
-		DMERR("Error copying dax data");
-		kfree(block->data);
-		kfree(block);
-		return -EIO;
-	}
-
-	/* rewind the iterator so that the block driver can use it */
-	iov_iter_revert(i, bytes);
-
-	block->datalen = bytes;
-	block->sector = bio_to_dev_sectors(lc, sector);
-	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
-
-	atomic_inc(&lc->pending_blocks);
-	spin_lock_irq(&lc->blocks_lock);
-	list_add_tail(&block->list, &lc->unflushed_blocks);
-	spin_unlock_irq(&lc->blocks_lock);
-	wake_up_process(lc->log_kthread);
-
-	return 0;
-}
-
 static void log_writes_dtr(struct dm_target *ti)
 {
 	struct log_writes_c *lc = ti->private;
@@ -925,6 +880,52 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
 	limits->io_min = limits->physical_block_size;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
+static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
+		   struct iov_iter *i)
+{
+	struct pending_block *block;
+
+	if (!bytes)
+		return 0;
+
+	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
+	if (!block) {
+		DMERR("Error allocating dax pending block");
+		return -ENOMEM;
+	}
+
+	block->data = kzalloc(bytes, GFP_KERNEL);
+	if (!block->data) {
+		DMERR("Error allocating dax data space");
+		kfree(block);
+		return -ENOMEM;
+	}
+
+	/* write data provided via the iterator */
+	if (!copy_from_iter(block->data, bytes, i)) {
+		DMERR("Error copying dax data");
+		kfree(block->data);
+		kfree(block);
+		return -EIO;
+	}
+
+	/* rewind the iterator so that the block driver can use it */
+	iov_iter_revert(i, bytes);
+
+	block->datalen = bytes;
+	block->sector = bio_to_dev_sectors(lc, sector);
+	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
+
+	atomic_inc(&lc->pending_blocks);
+	spin_lock_irq(&lc->blocks_lock);
+	list_add_tail(&block->list, &lc->unflushed_blocks);
+	spin_unlock_irq(&lc->blocks_lock);
+	wake_up_process(lc->log_kthread);
+
+	return 0;
+}
+
 static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 					 long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -961,6 +962,10 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
 dax_copy:
 	return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
 }
+#else
+#define log_writes_dax_direct_access NULL
+#define log_writes_dax_copy_from_iter NULL
+#endif
 
 static struct target_type log_writes_target = {
 	.name   = "log-writes",
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index bb907cb3e60d..fe7fb9b1aec3 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -313,6 +313,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -353,6 +354,11 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define stripe_dax_direct_access NULL
+#define stripe_dax_copy_from_iter NULL
+#endif
+
 /*
  * Stripe status:
  *
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5a81c47be4e4..4ea404dbcf0b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1826,7 +1826,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 static struct mapped_device *alloc_dev(int minor)
 {
 	int r, numa_node_id = dm_get_numa_node();
-	struct dax_device *dax_dev;
+	struct dax_device *dax_dev = NULL;
 	struct mapped_device *md;
 	void *old_md;
 
@@ -1892,9 +1892,11 @@ static struct mapped_device *alloc_dev(int minor)
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);
 
-	dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
-	if (!dax_dev)
-		goto bad;
+	if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
+		dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+		if (!dax_dev)
+			goto bad;
+	}
 	md->dax_dev = dax_dev;
 
 	add_disk_no_queue_reg(md->disk);
diff --git a/drivers/media/cec/cec-pin.c b/drivers/media/cec/cec-pin.c
index fafe1ebc8aff..2a5df99735fa 100644
--- a/drivers/media/cec/cec-pin.c
+++ b/drivers/media/cec/cec-pin.c
@@ -668,7 +668,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
 		/* Start bit low is too short, go back to idle */
 		if (delta < CEC_TIM_START_BIT_LOW_MIN - CEC_TIM_IDLE_SAMPLE) {
 			if (!pin->rx_start_bit_low_too_short_cnt++) {
-				pin->rx_start_bit_low_too_short_ts = pin->ts;
+				pin->rx_start_bit_low_too_short_ts = ktime_to_ns(pin->ts);
 				pin->rx_start_bit_low_too_short_delta = delta;
 			}
 			cec_pin_to_idle(pin);
@@ -700,7 +700,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
 		/* Start bit is too short, go back to idle */
 		if (delta < CEC_TIM_START_BIT_TOTAL_MIN - CEC_TIM_IDLE_SAMPLE) {
 			if (!pin->rx_start_bit_too_short_cnt++) {
-				pin->rx_start_bit_too_short_ts = pin->ts;
+				pin->rx_start_bit_too_short_ts = ktime_to_ns(pin->ts);
 				pin->rx_start_bit_too_short_delta = delta;
 			}
 			cec_pin_to_idle(pin);
@@ -770,7 +770,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
 		 */
 		if (delta < CEC_TIM_DATA_BIT_TOTAL_MIN) {
 			if (!pin->rx_data_bit_too_short_cnt++) {
-				pin->rx_data_bit_too_short_ts = pin->ts;
+				pin->rx_data_bit_too_short_ts = ktime_to_ns(pin->ts);
 				pin->rx_data_bit_too_short_delta = delta;
 			}
 			cec_pin_low(pin);
diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
index 37632bc524d4..9b64f4f354bf 100644
--- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
+++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
@@ -1149,7 +1149,7 @@ static void gen_twopix(struct tpg_data *tpg,
 	case V4L2_PIX_FMT_NV42:
 		buf[0][offset] = r_y_h;
 		buf[1][2 * offset] = b_v;
-		buf[1][(2 * offset + 1) %8] = g_u_s;
+		buf[1][(2 * offset + 1) % 8] = g_u_s;
 		break;
 
 	case V4L2_PIX_FMT_YUYV:
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 21a7d4b47e1a..e33414975065 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -2089,7 +2089,7 @@ static int dvb_frontend_handle_compat_ioctl(struct file *file, unsigned int cmd,
 		}
 		for (i = 0; i < tvps->num; i++) {
 			err = dtv_property_process_get(
-			    fe, &getp, (struct dtv_property *)tvp + i, file);
+			    fe, &getp, (struct dtv_property *)(tvp + i), file);
 			if (err < 0) {
 				kfree(tvp);
 				return err;
diff --git a/drivers/media/i2c/adv748x/adv748x-afe.c b/drivers/media/i2c/adv748x/adv748x-afe.c
index 5188178588c9..61514bae7e5c 100644
--- a/drivers/media/i2c/adv748x/adv748x-afe.c
+++ b/drivers/media/i2c/adv748x/adv748x-afe.c
@@ -275,7 +275,8 @@ static int adv748x_afe_s_stream(struct v4l2_subdev *sd, int enable)
 {
 	struct adv748x_afe *afe = adv748x_sd_to_afe(sd);
 	struct adv748x_state *state = adv748x_afe_to_state(afe);
-	int ret, signal = V4L2_IN_ST_NO_SIGNAL;
+	u32 signal = V4L2_IN_ST_NO_SIGNAL;
+	int ret;
 
 	mutex_lock(&state->mutex);
 
diff --git a/drivers/media/i2c/dw9714.c b/drivers/media/i2c/dw9714.c
index 8dbbf0f917df..91fae01d052b 100644
--- a/drivers/media/i2c/dw9714.c
+++ b/drivers/media/i2c/dw9714.c
@@ -1,15 +1,5 @@
-/*
- * Copyright (c) 2015--2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015--2017 Intel Corporation.
 
 #include <linux/delay.h>
 #include <linux/i2c.h>
diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c
index 664e8acdf2a0..daec33f4196a 100644
--- a/drivers/media/i2c/imx274.c
+++ b/drivers/media/i2c/imx274.c
@@ -1426,7 +1426,7 @@ static int imx274_set_vflip(struct stimx274 *priv, int val)
 
 	err = imx274_write_reg(priv, IMX274_VFLIP_REG, val);
 	if (err) {
-		dev_err(&priv->client->dev, "VFILP control error\n");
+		dev_err(&priv->client->dev, "VFLIP control error\n");
 		return err;
 	}
 
diff --git a/drivers/media/i2c/ov13858.c b/drivers/media/i2c/ov13858.c
index 30ee9f71bf0d..3dbcae257164 100644
--- a/drivers/media/i2c/ov13858.c
+++ b/drivers/media/i2c/ov13858.c
@@ -1,16 +1,5 @@
-/*
- * Copyright (c) 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Intel Corporation.
 
 #include <linux/acpi.h>
 #include <linux/i2c.h>
@@ -1375,7 +1364,9 @@ ov13858_set_pad_format(struct v4l2_subdev *sd,
 	if (fmt->format.code != MEDIA_BUS_FMT_SGRBG10_1X10)
 		fmt->format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 
-	mode = v4l2_find_nearest_size(supported_modes, width, height,
+	mode = v4l2_find_nearest_size(supported_modes,
+				      ARRAY_SIZE(supported_modes),
+				      width, height,
 				      fmt->format.width, fmt->format.height);
 	ov13858_update_pad_format(mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
diff --git a/drivers/media/i2c/ov2685.c b/drivers/media/i2c/ov2685.c
index 83c55e8288e7..385c1886a947 100644
--- a/drivers/media/i2c/ov2685.c
+++ b/drivers/media/i2c/ov2685.c
@@ -832,7 +832,6 @@ MODULE_DEVICE_TABLE(of, ov2685_of_match);
 static struct i2c_driver ov2685_i2c_driver = {
 	.driver = {
 		.name = "ov2685",
-		.owner = THIS_MODULE,
 		.pm = &ov2685_pm_ops,
 		.of_match_table = of_match_ptr(ov2685_of_match),
 	},
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c
index 03940f0cdfa6..852026baa2e7 100644
--- a/drivers/media/i2c/ov5640.c
+++ b/drivers/media/i2c/ov5640.c
@@ -1641,6 +1641,9 @@ static int ov5640_set_mode(struct ov5640_dev *sensor,
 	return 0;
 }
 
+static int ov5640_set_framefmt(struct ov5640_dev *sensor,
+			       struct v4l2_mbus_framefmt *format);
+
 /* restore the last set video mode after chip power-on */
 static int ov5640_restore_mode(struct ov5640_dev *sensor)
 {
@@ -1652,7 +1655,11 @@ static int ov5640_restore_mode(struct ov5640_dev *sensor)
 		return ret;
 
 	/* now restore the last capture mode */
-	return ov5640_set_mode(sensor, &ov5640_mode_init_data);
+	ret = ov5640_set_mode(sensor, &ov5640_mode_init_data);
+	if (ret < 0)
+		return ret;
+
+	return ov5640_set_framefmt(sensor, &sensor->fmt);
 }
 
 static void ov5640_power(struct ov5640_dev *sensor, bool enable)
@@ -1874,7 +1881,13 @@ static int ov5640_try_fmt_internal(struct v4l2_subdev *sd,
 		if (ov5640_formats[i].code == fmt->code)
 			break;
 	if (i >= ARRAY_SIZE(ov5640_formats))
-		fmt->code = ov5640_formats[0].code;
+		i = 0;
+
+	fmt->code = ov5640_formats[i].code;
+	fmt->colorspace = ov5640_formats[i].colorspace;
+	fmt->ycbcr_enc = V4L2_MAP_YCBCR_ENC_DEFAULT(fmt->colorspace);
+	fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+	fmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(fmt->colorspace);
 
 	return 0;
 }
@@ -1885,6 +1898,7 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 {
 	struct ov5640_dev *sensor = to_ov5640_dev(sd);
 	const struct ov5640_mode_info *new_mode;
+	struct v4l2_mbus_framefmt *mbus_fmt = &format->format;
 	int ret;
 
 	if (format->pad != 0)
@@ -1897,7 +1911,7 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 		goto out;
 	}
 
-	ret = ov5640_try_fmt_internal(sd, &format->format,
+	ret = ov5640_try_fmt_internal(sd, mbus_fmt,
 				      sensor->current_fr, &new_mode);
 	if (ret)
 		goto out;
@@ -1906,12 +1920,12 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 		struct v4l2_mbus_framefmt *fmt =
 			v4l2_subdev_get_try_format(sd, cfg, 0);
 
-		*fmt = format->format;
+		*fmt = *mbus_fmt;
 		goto out;
 	}
 
 	sensor->current_mode = new_mode;
-	sensor->fmt = format->format;
+	sensor->fmt = *mbus_fmt;
 	sensor->pending_mode_change = true;
 out:
 	mutex_unlock(&sensor->lock);
@@ -2496,6 +2510,7 @@ static int ov5640_probe(struct i2c_client *client,
 	struct device *dev = &client->dev;
 	struct fwnode_handle *endpoint;
 	struct ov5640_dev *sensor;
+	struct v4l2_mbus_framefmt *fmt;
 	int ret;
 
 	sensor = devm_kzalloc(dev, sizeof(*sensor), GFP_KERNEL);
@@ -2503,10 +2518,15 @@ static int ov5640_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	sensor->i2c_client = client;
-	sensor->fmt.code = MEDIA_BUS_FMT_UYVY8_2X8;
-	sensor->fmt.width = 640;
-	sensor->fmt.height = 480;
-	sensor->fmt.field = V4L2_FIELD_NONE;
+	fmt = &sensor->fmt;
+	fmt->code = ov5640_formats[0].code;
+	fmt->colorspace = ov5640_formats[0].colorspace;
+	fmt->ycbcr_enc = V4L2_MAP_YCBCR_ENC_DEFAULT(fmt->colorspace);
+	fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+	fmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(fmt->colorspace);
+	fmt->width = 640;
+	fmt->height = 480;
+	fmt->field = V4L2_FIELD_NONE;
 	sensor->frame_interval.numerator = 1;
 	sensor->frame_interval.denominator = ov5640_framerates[OV5640_30_FPS];
 	sensor->current_fr = OV5640_30_FPS;
diff --git a/drivers/media/i2c/ov5645.c b/drivers/media/i2c/ov5645.c
index d28845f7356f..4e3142a7e5a7 100644
--- a/drivers/media/i2c/ov5645.c
+++ b/drivers/media/i2c/ov5645.c
@@ -959,23 +959,6 @@ __ov5645_get_pad_crop(struct ov5645 *ov5645, struct v4l2_subdev_pad_config *cfg,
 	}
 }
 
-static const struct ov5645_mode_info *
-ov5645_find_nearest_mode(unsigned int width, unsigned int height)
-{
-	int i;
-
-	for (i = ARRAY_SIZE(ov5645_mode_info_data) - 1; i >= 0; i--) {
-		if (ov5645_mode_info_data[i].width <= width &&
-		    ov5645_mode_info_data[i].height <= height)
-			break;
-	}
-
-	if (i < 0)
-		i = 0;
-
-	return &ov5645_mode_info_data[i];
-}
-
 static int ov5645_set_format(struct v4l2_subdev *sd,
 			     struct v4l2_subdev_pad_config *cfg,
 			     struct v4l2_subdev_format *format)
@@ -989,8 +972,11 @@ static int ov5645_set_format(struct v4l2_subdev *sd,
 	__crop = __ov5645_get_pad_crop(ov5645, cfg, format->pad,
 			format->which);
 
-	new_mode = ov5645_find_nearest_mode(format->format.width,
-					    format->format.height);
+	new_mode = v4l2_find_nearest_size(ov5645_mode_info_data,
+			       ARRAY_SIZE(ov5645_mode_info_data),
+			       width, height,
+			       format->format.width, format->format.height);
+
 	__crop->width = new_mode->width;
 	__crop->height = new_mode->height;
 
@@ -1131,13 +1117,14 @@ static int ov5645_probe(struct i2c_client *client,
 
 	ret = v4l2_fwnode_endpoint_parse(of_fwnode_handle(endpoint),
 					 &ov5645->ep);
+
+	of_node_put(endpoint);
+
 	if (ret < 0) {
 		dev_err(dev, "parsing endpoint node failed\n");
 		return ret;
 	}
 
-	of_node_put(endpoint);
-
 	if (ov5645->ep.bus_type != V4L2_MBUS_CSI2) {
 		dev_err(dev, "invalid bus type, must be CSI2\n");
 		return -EINVAL;
diff --git a/drivers/media/i2c/ov5670.c b/drivers/media/i2c/ov5670.c
index d2db480da1b9..7b7c74d77370 100644
--- a/drivers/media/i2c/ov5670.c
+++ b/drivers/media/i2c/ov5670.c
@@ -1,16 +1,5 @@
-/*
- * Copyright (c) 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Intel Corporation.
 
 #include <linux/acpi.h>
 #include <linux/i2c.h>
@@ -2230,7 +2219,9 @@ static int ov5670_set_pad_format(struct v4l2_subdev *sd,
 
 	fmt->format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 
-	mode = v4l2_find_nearest_size(supported_modes, width, height,
+	mode = v4l2_find_nearest_size(supported_modes,
+				      ARRAY_SIZE(supported_modes),
+				      width, height,
 				      fmt->format.width, fmt->format.height);
 	ov5670_update_pad_format(mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
index 226f90886484..af17aaa21f58 100644
--- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
@@ -1081,11 +1081,11 @@ static int mtk_jpeg_clk_init(struct mtk_jpeg_dev *jpeg)
 
 	jpeg->clk_jdec = devm_clk_get(jpeg->dev, "jpgdec");
 	if (IS_ERR(jpeg->clk_jdec))
-		return -EINVAL;
+		return PTR_ERR(jpeg->clk_jdec);
 
 	jpeg->clk_jdec_smi = devm_clk_get(jpeg->dev, "jpgdec-smi");
 	if (IS_ERR(jpeg->clk_jdec_smi))
-		return -EINVAL;
+		return PTR_ERR(jpeg->clk_jdec_smi);
 
 	return 0;
 }
diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c
index 521d4b36c090..c4a577848dd7 100644
--- a/drivers/media/platform/qcom/venus/firmware.c
+++ b/drivers/media/platform/qcom/venus/firmware.c
@@ -76,7 +76,7 @@ int venus_boot(struct device *dev, const char *fwname)
 	}
 
 	ret = qcom_mdt_load(dev, mdt, fwname, VENUS_PAS_ID, mem_va, mem_phys,
-			    mem_size);
+			    mem_size, NULL);
 
 	release_firmware(mdt);
 
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index c9e9576bb08a..49bbd1861d3a 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -135,20 +135,21 @@ find_format_by_index(struct venus_inst *inst, unsigned int index, u32 type)
 		return NULL;
 
 	for (i = 0; i < size; i++) {
+		bool valid;
+
 		if (fmt[i].type != type)
 			continue;
-		if (k == index)
+		valid = type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE ||
+			venus_helper_check_codec(inst, fmt[i].pixfmt);
+		if (k == index && valid)
 			break;
-		k++;
+		if (valid)
+			k++;
 	}
 
 	if (i == size)
 		return NULL;
 
-	if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE &&
-	    !venus_helper_check_codec(inst, fmt[i].pixfmt))
-		return NULL;
-
 	return &fmt[i];
 }
 
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index e3a10a852cad..6b2ce479584e 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -120,20 +120,21 @@ find_format_by_index(struct venus_inst *inst, unsigned int index, u32 type)
 		return NULL;
 
 	for (i = 0; i < size; i++) {
+		bool valid;
+
 		if (fmt[i].type != type)
 			continue;
-		if (k == index)
+		valid = type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE ||
+			venus_helper_check_codec(inst, fmt[i].pixfmt);
+		if (k == index && valid)
 			break;
-		k++;
+		if (valid)
+			k++;
 	}
 
 	if (i == size)
 		return NULL;
 
-	if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE &&
-	    !venus_helper_check_codec(inst, fmt[i].pixfmt))
-		return NULL;
-
 	return &fmt[i];
 }
 
diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c
index 01c703683657..1599159f2574 100644
--- a/drivers/media/platform/vivid/vivid-vid-cap.c
+++ b/drivers/media/platform/vivid/vivid-vid-cap.c
@@ -561,8 +561,9 @@ int vivid_try_fmt_vid_cap(struct file *file, void *priv,
 	mp->field = vivid_field_cap(dev, mp->field);
 	if (vivid_is_webcam(dev)) {
 		const struct v4l2_frmsize_discrete *sz =
-			v4l2_find_nearest_size(webcam_sizes, width, height,
-					       mp->width, mp->height);
+			v4l2_find_nearest_size(webcam_sizes,
+					       VIVID_WEBCAM_SIZES, width,
+					       height, mp->width, mp->height);
 
 		w = sz->width;
 		h = sz->height;
diff --git a/drivers/media/platform/vsp1/vsp1_wpf.c b/drivers/media/platform/vsp1/vsp1_wpf.c
index f7f3b4b2c2de..8bd6b2f1af15 100644
--- a/drivers/media/platform/vsp1/vsp1_wpf.c
+++ b/drivers/media/platform/vsp1/vsp1_wpf.c
@@ -452,7 +452,7 @@ static void wpf_configure(struct vsp1_entity *entity,
 			: VI6_WPF_SRCRPF_RPF_ACT_SUB(input->entity.index);
 	}
 
-	if (pipe->bru || pipe->num_inputs > 1)
+	if (pipe->bru)
 		srcrpf |= pipe->bru->type == VSP1_ENTITY_BRU
 			? VI6_WPF_SRCRPF_VIRACT_MST
 			: VI6_WPF_SRCRPF_VIRACT2_MST;
diff --git a/drivers/media/tuners/r820t.c b/drivers/media/tuners/r820t.c
index bc9299059f48..3e14b9e2e763 100644
--- a/drivers/media/tuners/r820t.c
+++ b/drivers/media/tuners/r820t.c
@@ -20,6 +20,8 @@
 //
 //	RF Gain set/get is not implemented.
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/videodev2.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -2371,7 +2373,7 @@ err:
 err_no_gate:
 	mutex_unlock(&r820t_list_mutex);
 
-	tuner_info("%s: failed=%d\n", __func__, rc);
+	pr_info("%s: failed=%d\n", __func__, rc);
 	r820t_release(fe);
 	return NULL;
 }
diff --git a/drivers/media/usb/cx231xx/cx231xx-dvb.c b/drivers/media/usb/cx231xx/cx231xx-dvb.c
index 713029420fcf..67ed66712d05 100644
--- a/drivers/media/usb/cx231xx/cx231xx-dvb.c
+++ b/drivers/media/usb/cx231xx/cx231xx-dvb.c
@@ -276,7 +276,7 @@ static int start_streaming(struct cx231xx_dvb *dvb)
 
 	if (dev->USE_ISO) {
 		dev_dbg(dev->dev, "DVB transfer mode is ISO.\n");
-		cx231xx_set_alt_setting(dev, INDEX_TS1, 4);
+		cx231xx_set_alt_setting(dev, INDEX_TS1, 5);
 		rc = cx231xx_set_mode(dev, CX231XX_DIGITAL_MODE);
 		if (rc < 0)
 			return rc;
diff --git a/drivers/media/usb/gspca/Kconfig b/drivers/media/usb/gspca/Kconfig
index d214a21acff7..bc9a439745aa 100644
--- a/drivers/media/usb/gspca/Kconfig
+++ b/drivers/media/usb/gspca/Kconfig
@@ -7,7 +7,7 @@ menuconfig USB_GSPCA
 	  Say Y here if you want to enable selecting webcams based
 	  on the GSPCA framework.
 
-	  See <file:Documentation/video4linux/gspca.txt> for more info.
+	  See <file:Documentation/media/v4l-drivers/gspca-cardlist.rst> for more info.
 
 	  This driver uses the Video For Linux API. You must say Y or M to
 	  "Video For Linux" to use this driver.
diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index 5198c9eeb348..4312935f1dfc 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -101,7 +101,7 @@ static int get_v4l2_window32(struct v4l2_window __user *kp,
 static int put_v4l2_window32(struct v4l2_window __user *kp,
 			     struct v4l2_window32 __user *up)
 {
-	struct v4l2_clip __user *kclips = kp->clips;
+	struct v4l2_clip __user *kclips;
 	struct v4l2_clip32 __user *uclips;
 	compat_caddr_t p;
 	u32 clipcount;
@@ -116,6 +116,8 @@ static int put_v4l2_window32(struct v4l2_window __user *kp,
 	if (!clipcount)
 		return 0;
 
+	if (get_user(kclips, &kp->clips))
+		return -EFAULT;
 	if (get_user(p, &up->clips))
 		return -EFAULT;
 	uclips = compat_ptr(p);
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 0301fe426a43..1d0b2208e8fb 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -939,10 +939,14 @@ int __video_register_device(struct video_device *vdev,
 #endif
 	vdev->minor = i + minor_offset;
 	vdev->num = nr;
-	devnode_set(vdev);
 
 	/* Should not happen since we thought this minor was free */
-	WARN_ON(video_device[vdev->minor] != NULL);
+	if (WARN_ON(video_device[vdev->minor])) {
+		mutex_unlock(&videodev_lock);
+		printk(KERN_ERR "video_device not empty!\n");
+		return -ENFILE;
+	}
+	devnode_set(vdev);
 	vdev->index = get_index(vdev);
 	video_device[vdev->minor] = vdev;
 	mutex_unlock(&videodev_lock);
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index e4fafdd96e5e..eafd06f62a3a 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -305,8 +305,8 @@ static void cros_ec_sensors_register(struct cros_ec_dev *ec)
 
 	resp = (struct ec_response_motion_sense *)msg->data;
 	sensor_num = resp->dump.sensor_count;
-	/* Allocate 2 extra sensors in case lid angle or FIFO are needed */
-	sensor_cells = kzalloc(sizeof(struct mfd_cell) * (sensor_num + 2),
+	/* Allocate 1 extra sensors in FIFO are needed */
+	sensor_cells = kzalloc(sizeof(struct mfd_cell) * (sensor_num + 1),
 			       GFP_KERNEL);
 	if (sensor_cells == NULL)
 		goto error;
@@ -362,16 +362,10 @@ static void cros_ec_sensors_register(struct cros_ec_dev *ec)
 		sensor_type[resp->info.type]++;
 		id++;
 	}
-	if (sensor_type[MOTIONSENSE_TYPE_ACCEL] >= 2) {
-		sensor_platforms[id].sensor_num = sensor_num;
 
-		sensor_cells[id].name = "cros-ec-angle";
-		sensor_cells[id].id = 0;
-		sensor_cells[id].platform_data = &sensor_platforms[id];
-		sensor_cells[id].pdata_size =
-			sizeof(struct cros_ec_sensor_platform);
-		id++;
-	}
+	if (sensor_type[MOTIONSENSE_TYPE_ACCEL] >= 2)
+		ec->has_kb_wake_angle = true;
+
 	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) {
 		sensor_cells[id].name = "cros-ec-ring";
 		id++;
@@ -424,6 +418,14 @@ static int ec_device_probe(struct platform_device *pdev)
 		goto failed;
 	}
 
+	/* check whether this EC is a sensor hub. */
+	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
+		cros_ec_sensors_register(ec);
+
+	/* Take control of the lightbar from the EC. */
+	lb_manual_suspend_ctrl(ec, 1);
+
+	/* We can now add the sysfs class, we know which parameter to show */
 	retval = cdev_device_add(&ec->cdev, &ec->class_dev);
 	if (retval) {
 		dev_err(dev, "cdev_device_add failed => %d\n", retval);
@@ -433,13 +435,6 @@ static int ec_device_probe(struct platform_device *pdev)
 	if (cros_ec_debugfs_init(ec))
 		dev_warn(dev, "failed to create debugfs directory\n");
 
-	/* check whether this EC is a sensor hub. */
-	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
-		cros_ec_sensors_register(ec);
-
-	/* Take control of the lightbar from the EC. */
-	lb_manual_suspend_ctrl(ec, 1);
-
 	return 0;
 
 failed:
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 4f015da78f28..a4c9c8297a6d 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -369,6 +369,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */
 #define CXL_PSL_TFC_An_R  (1ull << (63-31)) /* Restart PSL transaction */
 
+/****** CXL_PSL_DEBUG *****************************************************/
+#define CXL_PSL_DEBUG_CDC  (1ull << (63-27)) /* Coherent Data cache support */
+
 /****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/
 #define CXL_XSL9_IERAT_MLPID    (1ull << (63-0))  /* Match LPID */
 #define CXL_XSL9_IERAT_MPID     (1ull << (63-1))  /* Match PID */
@@ -669,6 +672,7 @@ struct cxl_native {
 	irq_hw_number_t err_hwirq;
 	unsigned int err_virq;
 	u64 ps_off;
+	bool no_data_cache; /* set if no data cache on the card */
 	const struct cxl_service_layer_ops *sl_ops;
 };
 
@@ -1065,7 +1069,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
 			  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index 30ccba436b3b..0bc7c31cf739 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
 	if (rc)
 		return rc;
 
-	rc = cxl_get_xsl9_dsnctl(capp_unit_id, &cfg->dsnctl);
+	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
 	if (rc)
 		return rc;
 	if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
@@ -208,49 +208,74 @@ int cxllib_get_PE_attributes(struct task_struct *task,
 }
 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
 
-int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+static int get_vma_info(struct mm_struct *mm, u64 addr,
+			u64 *vma_start, u64 *vma_end,
+			unsigned long *page_size)
 {
-	int rc;
-	u64 dar;
 	struct vm_area_struct *vma = NULL;
-	unsigned long page_size;
-
-	if (mm == NULL)
-		return -EFAULT;
+	int rc = 0;
 
 	down_read(&mm->mmap_sem);
 
 	vma = find_vma(mm, addr);
 	if (!vma) {
-		pr_err("Can't find vma for addr %016llx\n", addr);
 		rc = -EFAULT;
 		goto out;
 	}
-	/* get the size of the pages allocated */
-	page_size = vma_kernel_pagesize(vma);
-
-	for (dar = (addr & ~(page_size - 1)); dar < (addr + size); dar += page_size) {
-		if (dar < vma->vm_start || dar >= vma->vm_end) {
-			vma = find_vma(mm, addr);
-			if (!vma) {
-				pr_err("Can't find vma for addr %016llx\n", addr);
-				rc = -EFAULT;
-				goto out;
-			}
-			/* get the size of the pages allocated */
-			page_size = vma_kernel_pagesize(vma);
+	*page_size = vma_kernel_pagesize(vma);
+	*vma_start = vma->vm_start;
+	*vma_end = vma->vm_end;
+out:
+	up_read(&mm->mmap_sem);
+	return rc;
+}
+
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+{
+	int rc;
+	u64 dar, vma_start, vma_end;
+	unsigned long page_size;
+
+	if (mm == NULL)
+		return -EFAULT;
+
+	/*
+	 * The buffer we have to process can extend over several pages
+	 * and may also cover several VMAs.
+	 * We iterate over all the pages. The page size could vary
+	 * between VMAs.
+	 */
+	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
+	if (rc)
+		return rc;
+
+	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
+	     dar += page_size) {
+		if (dar < vma_start || dar >= vma_end) {
+			/*
+			 * We don't hold the mm->mmap_sem semaphore
+			 * while iterating, since the semaphore is
+			 * required by one of the lower-level page
+			 * fault processing functions and it could
+			 * create a deadlock.
+			 *
+			 * It means the VMAs can be altered between 2
+			 * loop iterations and we could theoretically
+			 * miss a page (however unlikely). But that's
+			 * not really a problem, as the driver will
+			 * retry access, get another page fault on the
+			 * missing page and call us again.
+			 */
+			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
+					&page_size);
+			if (rc)
+				return rc;
 		}
 
 		rc = cxl_handle_mm_fault(mm, flags, dar);
-		if (rc) {
-			pr_err("cxl_handle_mm_fault failed %d", rc);
-			rc = -EFAULT;
-			goto out;
-		}
+		if (rc)
+			return -EFAULT;
 	}
-	rc = 0;
-out:
-	up_read(&mm->mmap_sem);
-	return rc;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(cxllib_handle_fault);
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 1b3d7c65ea3f..98f867fcef24 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -353,8 +353,17 @@ int cxl_data_cache_flush(struct cxl *adapter)
 	u64 reg;
 	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
 
-	pr_devel("Flushing data cache\n");
+	/*
+	 * Do a datacache flush only if datacache is available.
+	 * In case of PSL9D datacache absent hence flush operation.
+	 * would timeout.
+	 */
+	if (adapter->native->no_data_cache) {
+		pr_devel("No PSL data cache. Ignoring cache flush req.\n");
+		return 0;
+	}
 
+	pr_devel("Flushing data cache\n");
 	reg = cxl_p1_read(adapter, CXL_PSL_Control);
 	reg |= CXL_PSL_Control_Fr;
 	cxl_p1_write(adapter, CXL_PSL_Control, reg);
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 758842f65a1b..83f1d08058fc 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -407,21 +407,59 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
 	return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind,
+			       u64 *nbwind)
+{
+	static u64 nbw, asn, capi = 0;
+	struct device_node *np;
+	const __be32 *prop;
+
+	mutex_lock(&indications_mutex);
+	if (!capi) {
+		if (!(np = pnv_pci_get_phb_node(dev))) {
+			mutex_unlock(&indications_mutex);
+			return -ENODEV;
+		}
+
+		prop = of_get_property(np, "ibm,phb-indications", NULL);
+		if (!prop) {
+			nbw = 0x0300UL; /* legacy values */
+			asn = 0x0400UL;
+			capi = 0x0200UL;
+		} else {
+			nbw = (u64)be32_to_cpu(prop[2]);
+			asn = (u64)be32_to_cpu(prop[1]);
+			capi = (u64)be32_to_cpu(prop[0]);
+		}
+		of_node_put(np);
+	}
+	*capiind = capi;
+	*asnind = asn;
+	*nbwind = nbw;
+	mutex_unlock(&indications_mutex);
+	return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
 	u64 xsl_dsnctl;
+	u64 capiind, asnind, nbwind;
 
 	/*
 	 * CAPI Identifier bits [0:7]
 	 * bit 61:60 MSI bits --> 0
 	 * bit 59 TVT selector --> 0
 	 */
+	if (get_phb_indications(dev, &capiind, &asnind, &nbwind))
+		return -ENODEV;
 
 	/*
 	 * Tell XSL where to route data to.
 	 * The field chipid should match the PHB CAPI_CMPM register
 	 */
-	xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+	xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
 	xsl_dsnctl |= (capp_unit_id << (63-15));
 
 	/* nMMU_ID Defaults to: b’000001001’*/
@@ -435,14 +473,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 		 * nbwind=0x03, bits [57:58], must include capi indicator.
 		 * Not supported on P9 DD1.
 		 */
-		xsl_dsnctl |= ((u64)0x03 << (63-47));
+		xsl_dsnctl |= (nbwind << (63-55));
 
 		/*
 		 * Upper 16b address bits of ASB_Notify messages sent to the
 		 * system. Need to match the PHB’s ASN Compare/Mask Register.
 		 * Not supported on P9 DD1.
 		 */
-		xsl_dsnctl |= ((u64)0x04 << (63-55));
+		xsl_dsnctl |= asnind;
 	}
 
 	*reg = xsl_dsnctl;
@@ -456,13 +494,14 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
 	u64 chipid;
 	u32 phb_index;
 	u64 capp_unit_id;
+	u64 psl_debug;
 	int rc;
 
 	rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
 	if (rc)
 		return rc;
 
-	rc = cxl_get_xsl9_dsnctl(capp_unit_id, &xsl_dsnctl);
+	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl);
 	if (rc)
 		return rc;
 
@@ -503,8 +542,22 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
 	if (cxl_is_power9_dd1()) {
 		/* Disabling deadlock counter CAR */
 		cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL);
-	} else
-		cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x4000000000000000ULL);
+		/* Enable NORST */
+		cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL);
+	} else {
+		/* Enable NORST and DD2 features */
+		cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
+	}
+
+	/*
+	 * Check if PSL has data-cache. We need to flush adapter datacache
+	 * when as its about to be removed.
+	 */
+	psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG);
+	if (psl_debug & CXL_PSL_DEBUG_CDC) {
+		dev_dbg(&dev->dev, "No data-cache present\n");
+		adapter->native->no_data_cache = true;
+	}
 
 	return 0;
 }
@@ -568,12 +621,6 @@ static int init_implementation_adapter_regs_xsl(struct cxl *adapter, struct pci_
 /* For the PSL this is a multiple for 0 < n <= 7: */
 #define PSL_2048_250MHZ_CYCLES 1
 
-static void write_timebase_ctrl_psl9(struct cxl *adapter)
-{
-	cxl_p1_write(adapter, CXL_PSL9_TB_CTLSTAT,
-		     TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES));
-}
-
 static void write_timebase_ctrl_psl8(struct cxl *adapter)
 {
 	cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
@@ -612,9 +659,6 @@ static u64 timebase_read_xsl(struct cxl *adapter)
 
 static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
 {
-	u64 psl_tb;
-	int delta;
-	unsigned int retry = 0;
 	struct device_node *np;
 
 	adapter->psl_timebase_synced = false;
@@ -635,26 +679,13 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
 	 * Setup PSL Timebase Control and Status register
 	 * with the recommended Timebase Sync Count value
 	 */
-	adapter->native->sl_ops->write_timebase_ctrl(adapter);
+	if (adapter->native->sl_ops->write_timebase_ctrl)
+		adapter->native->sl_ops->write_timebase_ctrl(adapter);
 
 	/* Enable PSL Timebase */
 	cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
 	cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
 
-	/* Wait until CORE TB and PSL TB difference <= 16usecs */
-	do {
-		msleep(1);
-		if (retry++ > 5) {
-			dev_info(&dev->dev, "PSL timebase can't synchronize\n");
-			return;
-		}
-		psl_tb = adapter->native->sl_ops->timebase_read(adapter);
-		delta = mftb() - psl_tb;
-		if (delta < 0)
-			delta = -delta;
-	} while (tb_to_ns(delta) > 16000);
-
-	adapter->psl_timebase_synced = true;
 	return;
 }
 
@@ -1449,10 +1480,8 @@ int cxl_pci_reset(struct cxl *adapter)
 
 	/*
 	 * The adapter is about to be reset, so ignore errors.
-	 * Not supported on P9 DD1
 	 */
-	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
-		cxl_data_cache_flush(adapter);
+	cxl_data_cache_flush(adapter);
 
 	/* pcie_warm_reset requests a fundamental pci reset which includes a
 	 * PERST assert/deassert.  PERST triggers a loading of the image
@@ -1801,7 +1830,6 @@ static const struct cxl_service_layer_ops psl9_ops = {
 	.psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
 	.err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9,
 	.debugfs_stop_trace = cxl_stop_trace_psl9,
-	.write_timebase_ctrl = write_timebase_ctrl_psl9,
 	.timebase_read = timebase_read_psl9,
 	.capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
 	.needs_reset_before_disable = true,
@@ -1936,10 +1964,8 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
 
 	/*
 	 * Flush adapter datacache as its about to be removed.
-	 * Not supported on P9 DD1.
 	 */
-	if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
-		cxl_data_cache_flush(adapter);
+	cxl_data_cache_flush(adapter);
 
 	cxl_deconfigure_adapter(adapter);
 
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index a8b6d6a635e9..95285b7f636f 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -62,7 +62,19 @@ static ssize_t psl_timebase_synced_show(struct device *device,
 					char *buf)
 {
 	struct cxl *adapter = to_cxl_adapter(device);
+	u64 psl_tb, delta;
 
+	/* Recompute the status only in native mode */
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		psl_tb = adapter->native->sl_ops->timebase_read(adapter);
+		delta = abs(mftb() - psl_tb);
+
+		/* CORE TB and PSL TB difference <= 16usecs ? */
+		adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false;
+		pr_devel("PSL timebase %s - delta: 0x%016llx\n",
+			 (tb_to_ns(delta) < 16000) ? "synchronized" :
+			 "not synchronized", tb_to_ns(delta));
+	}
 	return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
 }
 
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 24108bfad889..6193270e7b3d 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -400,10 +400,14 @@ static void skip_back_repeat_test(char *arg)
 	int go_back = simple_strtol(arg, NULL, 10);
 
 	repeat_test--;
-	if (repeat_test <= 0)
+	if (repeat_test <= 0) {
 		ts.idx++;
-	else
+	} else {
+		if (repeat_test % 100 == 0)
+			v1printk("kgdbts:RUN ... %d remaining\n", repeat_test);
+
 		ts.idx -= go_back;
+	}
 	fill_get_buf(ts.tst[ts.idx].get);
 }
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 02485e310c81..9e923cd1d80e 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -3080,6 +3080,7 @@ static void __exit mmc_blk_exit(void)
 	mmc_unregister_driver(&mmc_driver);
 	unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
 	unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES);
+	bus_unregister(&mmc_rpmb_bus_type);
 }
 
 module_init(mmc_blk_init);
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 712e08d9a45e..a0168e9e4fce 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -362,9 +362,9 @@ static void jz4740_mmc_set_irq_enabled(struct jz4740_mmc_host *host,
 		host->irq_mask &= ~irq;
 	else
 		host->irq_mask |= irq;
-	spin_unlock_irqrestore(&host->lock, flags);
 
 	writew(host->irq_mask, host->base + JZ_REG_MMC_IMASK);
+	spin_unlock_irqrestore(&host->lock, flags);
 }
 
 static void jz4740_mmc_clock_enable(struct jz4740_mmc_host *host,
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index e30df9ad8197..308029930304 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -913,7 +913,7 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host)
 		host->check_scc_error(host);
 
 	/* If SET_BLOCK_COUNT, continue with main command */
-	if (host->mrq) {
+	if (host->mrq && !mrq->cmd->error) {
 		tmio_process_mrq(host, mrq);
 		return;
 	}
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index b1fc28f63882..d0b63bbf46a7 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -244,7 +244,7 @@ static int ubiblock_open(struct block_device *bdev, fmode_t mode)
 	 * in any case.
 	 */
 	if (mode & FMODE_WRITE) {
-		ret = -EPERM;
+		ret = -EROFS;
 		goto out_unlock;
 	}
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index e941395de3ae..753494e042d5 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -854,6 +854,17 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
 		return -EINVAL;
 	}
 
+	/*
+	 * Both UBI and UBIFS have been designed for SLC NAND and NOR flashes.
+	 * MLC NAND is different and needs special care, otherwise UBI or UBIFS
+	 * will die soon and you will lose all your data.
+	 */
+	if (mtd->type == MTD_MLCNANDFLASH) {
+		pr_err("ubi: refuse attaching mtd%d - MLC NAND is not supported\n",
+			mtd->index);
+		return -EINVAL;
+	}
+
 	if (ubi_num == UBI_DEV_NUM_AUTO) {
 		/* Search for an empty slot in the @ubi_devices array */
 		for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++)
diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c
index 590d967011bb..98f7d6be8d1f 100644
--- a/drivers/mtd/ubi/fastmap-wl.c
+++ b/drivers/mtd/ubi/fastmap-wl.c
@@ -362,7 +362,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi)
 {
 	int i;
 
-	flush_work(&ubi->fm_work);
 	return_unused_pool_pebs(ubi, &ubi->fm_pool);
 	return_unused_pool_pebs(ubi, &ubi->fm_wl_pool);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index c96a92118b8b..32f6d2e24d66 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -951,9 +951,11 @@ void aq_nic_shutdown(struct aq_nic_s *self)
 
 	netif_device_detach(self->ndev);
 
-	err = aq_nic_stop(self);
-	if (err < 0)
-		goto err_exit;
+	if (netif_running(self->ndev)) {
+		err = aq_nic_stop(self);
+		if (err < 0)
+			goto err_exit;
+	}
 	aq_nic_deinit(self);
 
 err_exit:
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 84d7f4dd4ce1..e652d86b87d4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -48,6 +48,8 @@
 #define FORCE_FLASHLESS 0
 
 static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
+static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
+				      enum hal_atl_utils_fw_state_e state);
 
 int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 {
@@ -247,6 +249,20 @@ int hw_atl_utils_soft_reset(struct aq_hw_s *self)
 
 	self->rbl_enabled = (boot_exit_code != 0);
 
+	/* FW 1.x may bootup in an invalid POWER state (WOL feature).
+	 * We should work around this by forcing its state back to DEINIT
+	 */
+	if (!hw_atl_utils_ver_match(HW_ATL_FW_VER_1X,
+				    aq_hw_read_reg(self,
+						   HW_ATL_MPI_FW_VERSION))) {
+		int err = 0;
+
+		hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
+		AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR) &
+			       HW_ATL_MPI_STATE_MSK) == MPI_DEINIT,
+			       10, 1000U);
+	}
+
 	if (self->rbl_enabled)
 		return hw_atl_utils_soft_reset_rbl(self);
 	else
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 1991f0c7bc0e..f83769d8047b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6090,7 +6090,7 @@ static void bnxt_free_irq(struct bnxt *bp)
 	free_irq_cpu_rmap(bp->dev->rx_cpu_rmap);
 	bp->dev->rx_cpu_rmap = NULL;
 #endif
-	if (!bp->irq_tbl)
+	if (!bp->irq_tbl || !bp->bnapi)
 		return;
 
 	for (i = 0; i < bp->cp_nr_rings; i++) {
@@ -7686,6 +7686,8 @@ int bnxt_check_rings(struct bnxt *bp, int tx, int rx, bool sh, int tcs,
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		rx_rings <<= 1;
 	cp = sh ? max_t(int, tx_rings_needed, rx) : tx_rings_needed + rx;
+	if (bp->flags & BNXT_FLAG_NEW_RM)
+		cp += bnxt_get_ulp_msix_num(bp);
 	return bnxt_hwrm_check_rings(bp, tx_rings_needed, rx_rings, rx, cp,
 				     vnics);
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 8d8ccd67e0e2..1f622ca2a64f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -870,17 +870,22 @@ static int bnxt_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 			 u8 *hfunc)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+	struct bnxt_vnic_info *vnic;
 	int i = 0;
 
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 
-	if (indir)
+	if (!bp->vnic_info)
+		return 0;
+
+	vnic = &bp->vnic_info[0];
+	if (indir && vnic->rss_table) {
 		for (i = 0; i < HW_HASH_INDEX_SIZE; i++)
 			indir[i] = le16_to_cpu(vnic->rss_table[i]);
+	}
 
-	if (key)
+	if (key && vnic->rss_hash_key)
 		memcpy(key, vnic->rss_hash_key, HW_HASH_KEY_SIZE);
 
 	return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 65c2cee35766..795f45024c20 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -377,6 +377,30 @@ static bool is_wildcard(void *mask, int len)
 	return true;
 }
 
+static bool is_exactmatch(void *mask, int len)
+{
+	const u8 *p = mask;
+	int i;
+
+	for (i = 0; i < len; i++)
+		if (p[i] != 0xff)
+			return false;
+
+	return true;
+}
+
+static bool bits_set(void *key, int len)
+{
+	const u8 *p = key;
+	int i;
+
+	for (i = 0; i < len; i++)
+		if (p[i] != 0)
+			return true;
+
+	return false;
+}
+
 static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 				    __le16 ref_flow_handle,
 				    __le32 tunnel_handle, __le16 *flow_handle)
@@ -764,6 +788,41 @@ static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
 		return false;
 	}
 
+	/* Currently source/dest MAC cannot be partial wildcard  */
+	if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
+	    !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
+		netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
+		return false;
+	}
+	if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
+	    !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
+		netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
+		return false;
+	}
+
+	/* Currently VLAN fields cannot be partial wildcard */
+	if (bits_set(&flow->l2_key.inner_vlan_tci,
+		     sizeof(flow->l2_key.inner_vlan_tci)) &&
+	    !is_exactmatch(&flow->l2_mask.inner_vlan_tci,
+			   sizeof(flow->l2_mask.inner_vlan_tci))) {
+		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TCI\n");
+		return false;
+	}
+	if (bits_set(&flow->l2_key.inner_vlan_tpid,
+		     sizeof(flow->l2_key.inner_vlan_tpid)) &&
+	    !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
+			   sizeof(flow->l2_mask.inner_vlan_tpid))) {
+		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
+		return false;
+	}
+
+	/* Currently Ethertype must be set */
+	if (!is_exactmatch(&flow->l2_mask.ether_type,
+			   sizeof(flow->l2_mask.ether_type))) {
+		netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
+		return false;
+	}
+
 	return true;
 }
 
@@ -992,8 +1051,10 @@ static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 
 	/* Check if there's another flow using the same tunnel decap.
 	 * If not, add this tunnel to the table and resolve the other
-	 * tunnel header fileds
+	 * tunnel header fileds. Ignore src_port in the tunnel_key,
+	 * since it is not required for decap filters.
 	 */
+	decap_key->tp_src = 0;
 	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
 					     &tc_info->decap_ht_params,
 					     decap_key);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index 26290403f38f..38f635cf8408 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -64,6 +64,31 @@ static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
 	return rc;
 }
 
+static int bnxt_hwrm_vfr_qcfg(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
+			      u16 *max_mtu)
+{
+	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_func_qcfg_input req = {0};
+	u16 mtu;
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_QCFG, -1, -1);
+	req.fid = cpu_to_le16(bp->pf.vf[vf_rep->vf_idx].fw_fid);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		mtu = le16_to_cpu(resp->max_mtu_configured);
+		if (!mtu)
+			*max_mtu = BNXT_MAX_MTU;
+		else
+			*max_mtu = mtu;
+	}
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static int bnxt_vf_rep_open(struct net_device *dev)
 {
 	struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
@@ -365,6 +390,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
 				    struct net_device *dev)
 {
 	struct net_device *pf_dev = bp->dev;
+	u16 max_mtu;
 
 	dev->netdev_ops = &bnxt_vf_rep_netdev_ops;
 	dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops;
@@ -380,6 +406,10 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
 	bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx,
 				 dev->perm_addr);
 	ether_addr_copy(dev->dev_addr, dev->perm_addr);
+	/* Set VF-Rep's max-mtu to the corresponding VF's max-mtu */
+	if (!bnxt_hwrm_vfr_qcfg(bp, vf_rep, &max_mtu))
+		dev->max_mtu = max_mtu;
+	dev->min_mtu = ETH_ZLEN;
 }
 
 static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c
index 2220c771092b..2220c771092b 100755..100644
--- a/drivers/net/ethernet/cadence/macb_ptp.c
+++ b/drivers/net/ethernet/cadence/macb_ptp.c
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 5fc46c5a4f36..448d1fafc827 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -265,14 +265,9 @@ struct nicvf_drv_stats {
 
 struct cavium_ptp;
 
-struct xcast_addr {
-	struct list_head list;
-	u64              addr;
-};
-
 struct xcast_addr_list {
-	struct list_head list;
 	int              count;
+	u64              mc[];
 };
 
 struct nicvf_work {
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 1e9a31fef729..707db3304396 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1929,7 +1929,7 @@ static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
 						  work.work);
 	struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
 	union nic_mbx mbx = {};
-	struct xcast_addr *xaddr, *next;
+	int idx;
 
 	if (!vf_work)
 		return;
@@ -1956,16 +1956,10 @@ static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
 	/* check if we have any specific MACs to be added to PF DMAC filter */
 	if (vf_work->mc) {
 		/* now go through kernel list of MACs and add them one by one */
-		list_for_each_entry_safe(xaddr, next,
-					 &vf_work->mc->list, list) {
+		for (idx = 0; idx < vf_work->mc->count; idx++) {
 			mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
-			mbx.xcast.data.mac = xaddr->addr;
+			mbx.xcast.data.mac = vf_work->mc->mc[idx];
 			nicvf_send_msg_to_pf(nic, &mbx);
-
-			/* after receiving ACK from PF release memory */
-			list_del(&xaddr->list);
-			kfree(xaddr);
-			vf_work->mc->count--;
 		}
 		kfree(vf_work->mc);
 	}
@@ -1996,17 +1990,15 @@ static void nicvf_set_rx_mode(struct net_device *netdev)
 			mode |= BGX_XCAST_MCAST_FILTER;
 			/* here we need to copy mc addrs */
 			if (netdev_mc_count(netdev)) {
-				struct xcast_addr *xaddr;
-
-				mc_list = kmalloc(sizeof(*mc_list), GFP_ATOMIC);
-				INIT_LIST_HEAD(&mc_list->list);
+				mc_list = kmalloc(offsetof(typeof(*mc_list),
+							   mc[netdev_mc_count(netdev)]),
+						  GFP_ATOMIC);
+				if (unlikely(!mc_list))
+					return;
+				mc_list->count = 0;
 				netdev_hw_addr_list_for_each(ha, &netdev->mc) {
-					xaddr = kmalloc(sizeof(*xaddr),
-							GFP_ATOMIC);
-					xaddr->addr =
+					mc_list->mc[mc_list->count] =
 						ether_addr_to_u64(ha->addr);
-					list_add_tail(&xaddr->list,
-						      &mc_list->list);
 					mc_list->count++;
 				}
 			}
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index 80ad16acf0f1..ac2c3f6a12bc 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -377,6 +377,38 @@ static const struct of_device_id fsl_pq_mdio_match[] = {
 };
 MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match);
 
+static void set_tbipa(const u32 tbipa_val, struct platform_device *pdev,
+		      uint32_t __iomem * (*get_tbipa)(void __iomem *),
+		      void __iomem *reg_map, struct resource *reg_res)
+{
+	struct device_node *np = pdev->dev.of_node;
+	uint32_t __iomem *tbipa;
+	bool tbipa_mapped;
+
+	tbipa = of_iomap(np, 1);
+	if (tbipa) {
+		tbipa_mapped = true;
+	} else {
+		tbipa_mapped = false;
+		tbipa = (*get_tbipa)(reg_map);
+
+		/*
+		 * Add consistency check to make sure TBI is contained within
+		 * the mapped range (not because we would get a segfault,
+		 * rather to catch bugs in computing TBI address). Print error
+		 * message but continue anyway.
+		 */
+		if ((void *)tbipa > reg_map + resource_size(reg_res) - 4)
+			dev_err(&pdev->dev, "invalid register map (should be at least 0x%04zx to contain TBI address)\n",
+				((void *)tbipa - reg_map) + 4);
+	}
+
+	iowrite32be(be32_to_cpu(tbipa_val), tbipa);
+
+	if (tbipa_mapped)
+		iounmap(tbipa);
+}
+
 static int fsl_pq_mdio_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *id =
@@ -450,8 +482,6 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 
 		if (tbi) {
 			const u32 *prop = of_get_property(tbi, "reg", NULL);
-			uint32_t __iomem *tbipa;
-
 			if (!prop) {
 				dev_err(&pdev->dev,
 					"missing 'reg' property in node %pOF\n",
@@ -459,20 +489,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
 				err = -EBUSY;
 				goto error;
 			}
-
-			tbipa = data->get_tbipa(priv->map);
-
-			/*
-			 * Add consistency check to make sure TBI is contained
-			 * within the mapped range (not because we would get a
-			 * segfault, rather to catch bugs in computing TBI
-			 * address). Print error message but continue anyway.
-			 */
-			if ((void *)tbipa > priv->map + resource_size(&res) - 4)
-				dev_err(&pdev->dev, "invalid register map (should be at least 0x%04zx to contain TBI address)\n",
-					((void *)tbipa - priv->map) + 4);
-
-			iowrite32be(be32_to_cpup(prop), tbipa);
+			set_tbipa(*prop, pdev,
+				  data->get_tbipa, priv->map, &res);
 		}
 	}
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index b492af6affc3..aad5658d79d5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -118,6 +118,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
 static int ibmvnic_init(struct ibmvnic_adapter *);
 static void release_crq_queue(struct ibmvnic_adapter *);
 static int __ibmvnic_set_mac(struct net_device *netdev, struct sockaddr *p);
+static int init_crq_queue(struct ibmvnic_adapter *adapter);
 
 struct ibmvnic_stat {
 	char name[ETH_GSTRING_LEN];
@@ -320,18 +321,16 @@ failure:
 	dev_info(dev, "replenish pools failure\n");
 	pool->free_map[pool->next_free] = index;
 	pool->rx_buff[index].skb = NULL;
-	if (!dma_mapping_error(dev, dma_addr))
-		dma_unmap_single(dev, dma_addr, pool->buff_size,
-				 DMA_FROM_DEVICE);
 
 	dev_kfree_skb_any(skb);
 	adapter->replenish_add_buff_failure++;
 	atomic_add(buffers_added, &pool->available);
 
-	if (lpar_rc == H_CLOSED) {
+	if (lpar_rc == H_CLOSED || adapter->failover_pending) {
 		/* Disable buffer pool replenishment and report carrier off if
-		 * queue is closed. Firmware guarantees that a signal will
-		 * be sent to the driver, triggering a reset.
+		 * queue is closed or pending failover.
+		 * Firmware guarantees that a signal will be sent to the
+		 * driver, triggering a reset.
 		 */
 		deactivate_rx_pools(adapter);
 		netif_carrier_off(adapter->netdev);
@@ -1071,6 +1070,14 @@ static int ibmvnic_open(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int rc;
 
+	/* If device failover is pending, just set device state and return.
+	 * Device operation will be handled by reset routine.
+	 */
+	if (adapter->failover_pending) {
+		adapter->state = VNIC_OPEN;
+		return 0;
+	}
+
 	mutex_lock(&adapter->reset_lock);
 
 	if (adapter->state != VNIC_CLOSED) {
@@ -1218,7 +1225,6 @@ static int __ibmvnic_close(struct net_device *netdev)
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
 	if (rc)
 		return rc;
-	ibmvnic_cleanup(netdev);
 	adapter->state = VNIC_CLOSED;
 	return 0;
 }
@@ -1228,8 +1234,17 @@ static int ibmvnic_close(struct net_device *netdev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int rc;
 
+	/* If device failover is pending, just set device state and return.
+	 * Device operation will be handled by reset routine.
+	 */
+	if (adapter->failover_pending) {
+		adapter->state = VNIC_CLOSED;
+		return 0;
+	}
+
 	mutex_lock(&adapter->reset_lock);
 	rc = __ibmvnic_close(netdev);
+	ibmvnic_cleanup(netdev);
 	mutex_unlock(&adapter->reset_lock);
 
 	return rc;
@@ -1562,8 +1577,9 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		dev_kfree_skb_any(skb);
 		tx_buff->skb = NULL;
 
-		if (lpar_rc == H_CLOSED) {
-			/* Disable TX and report carrier off if queue is closed.
+		if (lpar_rc == H_CLOSED || adapter->failover_pending) {
+			/* Disable TX and report carrier off if queue is closed
+			 * or pending failover.
 			 * Firmware guarantees that a signal will be sent to the
 			 * driver, triggering a reset or some other action.
 			 */
@@ -1711,14 +1727,10 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	old_num_rx_queues = adapter->req_rx_queues;
 	old_num_tx_queues = adapter->req_tx_queues;
 
-	if (rwi->reset_reason == VNIC_RESET_MOBILITY) {
-		rc = ibmvnic_reenable_crq_queue(adapter);
-		if (rc)
-			return 0;
-		ibmvnic_cleanup(netdev);
-	} else if (rwi->reset_reason == VNIC_RESET_FAILOVER) {
-		ibmvnic_cleanup(netdev);
-	} else {
+	ibmvnic_cleanup(netdev);
+
+	if (adapter->reset_reason != VNIC_RESET_MOBILITY &&
+	    adapter->reset_reason != VNIC_RESET_FAILOVER) {
 		rc = __ibmvnic_close(netdev);
 		if (rc)
 			return rc;
@@ -1737,6 +1749,23 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		 */
 		adapter->state = VNIC_PROBED;
 
+		if (adapter->wait_for_reset) {
+			rc = init_crq_queue(adapter);
+		} else if (adapter->reset_reason == VNIC_RESET_MOBILITY) {
+			rc = ibmvnic_reenable_crq_queue(adapter);
+			release_sub_crqs(adapter, 1);
+		} else {
+			rc = ibmvnic_reset_crq(adapter);
+			if (!rc)
+				rc = vio_enable_interrupts(adapter->vdev);
+		}
+
+		if (rc) {
+			netdev_err(adapter->netdev,
+				   "Couldn't initialize crq. rc=%d\n", rc);
+			return rc;
+		}
+
 		rc = ibmvnic_init(adapter);
 		if (rc)
 			return IBMVNIC_INIT_FAILED;
@@ -1878,23 +1907,26 @@ static void __ibmvnic_reset(struct work_struct *work)
 	mutex_unlock(&adapter->reset_lock);
 }
 
-static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
-			  enum ibmvnic_reset_reason reason)
+static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
+			 enum ibmvnic_reset_reason reason)
 {
 	struct ibmvnic_rwi *rwi, *tmp;
 	struct net_device *netdev = adapter->netdev;
 	struct list_head *entry;
+	int ret;
 
 	if (adapter->state == VNIC_REMOVING ||
-	    adapter->state == VNIC_REMOVED) {
-		netdev_dbg(netdev, "Adapter removing, skipping reset\n");
-		return;
+	    adapter->state == VNIC_REMOVED ||
+	    adapter->failover_pending) {
+		ret = EBUSY;
+		netdev_dbg(netdev, "Adapter removing or pending failover, skipping reset\n");
+		goto err;
 	}
 
 	if (adapter->state == VNIC_PROBING) {
 		netdev_warn(netdev, "Adapter reset during probe\n");
-		adapter->init_done_rc = EAGAIN;
-		return;
+		ret = adapter->init_done_rc = EAGAIN;
+		goto err;
 	}
 
 	mutex_lock(&adapter->rwi_lock);
@@ -1904,7 +1936,8 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
 		if (tmp->reset_reason == reason) {
 			netdev_dbg(netdev, "Skipping matching reset\n");
 			mutex_unlock(&adapter->rwi_lock);
-			return;
+			ret = EBUSY;
+			goto err;
 		}
 	}
 
@@ -1912,7 +1945,8 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
 	if (!rwi) {
 		mutex_unlock(&adapter->rwi_lock);
 		ibmvnic_close(netdev);
-		return;
+		ret = ENOMEM;
+		goto err;
 	}
 
 	rwi->reset_reason = reason;
@@ -1921,6 +1955,12 @@ static void ibmvnic_reset(struct ibmvnic_adapter *adapter,
 
 	netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
 	schedule_work(&adapter->ibmvnic_reset);
+
+	return 0;
+err:
+	if (adapter->wait_for_reset)
+		adapter->wait_for_reset = false;
+	return -ret;
 }
 
 static void ibmvnic_tx_timeout(struct net_device *dev)
@@ -2055,6 +2095,8 @@ static void ibmvnic_netpoll_controller(struct net_device *dev)
 
 static int wait_for_reset(struct ibmvnic_adapter *adapter)
 {
+	int rc, ret;
+
 	adapter->fallback.mtu = adapter->req_mtu;
 	adapter->fallback.rx_queues = adapter->req_rx_queues;
 	adapter->fallback.tx_queues = adapter->req_tx_queues;
@@ -2062,11 +2104,15 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
 	adapter->fallback.tx_entries = adapter->req_tx_entries_per_subcrq;
 
 	init_completion(&adapter->reset_done);
-	ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
 	adapter->wait_for_reset = true;
+	rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+	if (rc)
+		return rc;
 	wait_for_completion(&adapter->reset_done);
 
+	ret = 0;
 	if (adapter->reset_done_rc) {
+		ret = -EIO;
 		adapter->desired.mtu = adapter->fallback.mtu;
 		adapter->desired.rx_queues = adapter->fallback.rx_queues;
 		adapter->desired.tx_queues = adapter->fallback.tx_queues;
@@ -2074,12 +2120,15 @@ static int wait_for_reset(struct ibmvnic_adapter *adapter)
 		adapter->desired.tx_entries = adapter->fallback.tx_entries;
 
 		init_completion(&adapter->reset_done);
-		ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+		adapter->wait_for_reset = true;
+		rc = ibmvnic_reset(adapter, VNIC_RESET_CHANGE_PARAM);
+		if (rc)
+			return ret;
 		wait_for_completion(&adapter->reset_done);
 	}
 	adapter->wait_for_reset = false;
 
-	return adapter->reset_done_rc;
+	return ret;
 }
 
 static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
@@ -2364,6 +2413,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter,
 	}
 
 	memset(scrq->msgs, 0, 4 * PAGE_SIZE);
+	atomic_set(&scrq->used, 0);
 	scrq->cur = 0;
 
 	rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
@@ -2574,7 +2624,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 	union sub_crq *next;
 	int index;
 	int i, j;
-	u8 first;
+	u8 *first;
 
 restart_loop:
 	while (pending_scrq(adapter, scrq)) {
@@ -2605,11 +2655,12 @@ restart_loop:
 				txbuff->data_dma[j] = 0;
 			}
 			/* if sub_crq was sent indirectly */
-			first = txbuff->indir_arr[0].generic.first;
-			if (first == IBMVNIC_CRQ_CMD) {
+			first = &txbuff->indir_arr[0].generic.first;
+			if (*first == IBMVNIC_CRQ_CMD) {
 				dma_unmap_single(dev, txbuff->indir_dma,
 						 sizeof(txbuff->indir_arr),
 						 DMA_TO_DEVICE);
+				*first = 0;
 			}
 
 			if (txbuff->last_frag) {
@@ -3882,9 +3933,9 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
 	int i;
 
 	dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
-			 DMA_BIDIRECTIONAL);
+			 DMA_TO_DEVICE);
 	dma_unmap_single(dev, adapter->login_rsp_buf_token,
-			 adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL);
+			 adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
 
 	/* If the number of queues requested can't be allocated by the
 	 * server, the login response will return with code 1. We will need
@@ -4144,7 +4195,9 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 		case IBMVNIC_CRQ_INIT:
 			dev_info(dev, "Partner initialized\n");
 			adapter->from_passive_init = true;
+			adapter->failover_pending = false;
 			complete(&adapter->init_done);
+			ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
 			break;
 		case IBMVNIC_CRQ_INIT_COMPLETE:
 			dev_info(dev, "Partner initialization complete\n");
@@ -4161,7 +4214,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
 			ibmvnic_reset(adapter, VNIC_RESET_MOBILITY);
 		} else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
 			dev_info(dev, "Backing device failover detected\n");
-			ibmvnic_reset(adapter, VNIC_RESET_FAILOVER);
+			adapter->failover_pending = true;
 		} else {
 			/* The adapter lost the connection */
 			dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
@@ -4461,19 +4514,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 	u64 old_num_rx_queues, old_num_tx_queues;
 	int rc;
 
-	if (adapter->resetting && !adapter->wait_for_reset) {
-		rc = ibmvnic_reset_crq(adapter);
-		if (!rc)
-			rc = vio_enable_interrupts(adapter->vdev);
-	} else {
-		rc = init_crq_queue(adapter);
-	}
-
-	if (rc) {
-		dev_err(dev, "Couldn't initialize crq. rc=%d\n", rc);
-		return rc;
-	}
-
 	adapter->from_passive_init = false;
 
 	old_num_rx_queues = adapter->req_rx_queues;
@@ -4498,7 +4538,8 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter)
 		return -1;
 	}
 
-	if (adapter->resetting && !adapter->wait_for_reset) {
+	if (adapter->resetting && !adapter->wait_for_reset &&
+	    adapter->reset_reason != VNIC_RESET_MOBILITY) {
 		if (adapter->req_rx_queues != old_num_rx_queues ||
 		    adapter->req_tx_queues != old_num_tx_queues) {
 			release_sub_crqs(adapter, 0);
@@ -4586,6 +4627,13 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	adapter->mac_change_pending = false;
 
 	do {
+		rc = init_crq_queue(adapter);
+		if (rc) {
+			dev_err(&dev->dev, "Couldn't initialize crq. rc=%d\n",
+				rc);
+			goto ibmvnic_init_fail;
+		}
+
 		rc = ibmvnic_init(adapter);
 		if (rc && rc != EAGAIN)
 			goto ibmvnic_init_fail;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 89efe700eafe..99c0b58c2c39 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1108,6 +1108,7 @@ struct ibmvnic_adapter {
 	bool napi_enabled, from_passive_init;
 
 	bool mac_change_pending;
+	bool failover_pending;
 
 	struct ibmvnic_tunables desired;
 	struct ibmvnic_tunables fallback;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 385f5d425d19..21977ec984c4 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -468,8 +468,10 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	mac_buf_len = sizeof(struct ice_aqc_manage_mac_read_resp);
 	mac_buf = devm_kzalloc(ice_hw_to_dev(hw), mac_buf_len, GFP_KERNEL);
 
-	if (!mac_buf)
+	if (!mac_buf) {
+		status = ICE_ERR_NO_MEMORY;
 		goto err_unroll_fltr_mgmt_struct;
+	}
 
 	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
 	devm_kfree(ice_hw_to_dev(hw), mac_buf);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 186764a5c263..1db304c01d10 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -156,7 +156,7 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 
 static int ice_get_regs_len(struct net_device __always_unused *netdev)
 {
-	return ARRAY_SIZE(ice_regs_dump_list);
+	return sizeof(ice_regs_dump_list);
 }
 
 static void
@@ -170,7 +170,7 @@ ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
 
 	regs->version = 1;
 
-	for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list) / sizeof(u32); ++i)
+	for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i)
 		regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
 }
 
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 7fc1bbf51c44..54a038943c06 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1604,7 +1604,7 @@ static int mvpp2_prs_init_from_hw(struct mvpp2 *priv,
 {
 	int i;
 
-	if (pe->index > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
+	if (tid > MVPP2_PRS_TCAM_SRAM_SIZE - 1)
 		return -EINVAL;
 
 	memset(pe, 0, sizeof(*pe));
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 53fffd09d133..ca38a30fbe91 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3805,18 +3805,6 @@ static const struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	},
 };
 
-static u64 mlxsw_sp_resource_kvd_linear_occ_get(struct devlink *devlink)
-{
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-
-	return mlxsw_sp_kvdl_occ_get(mlxsw_sp);
-}
-
-static const struct devlink_resource_ops mlxsw_sp_resource_kvd_linear_ops = {
-	.occ_get = mlxsw_sp_resource_kvd_linear_occ_get,
-};
-
 static void
 mlxsw_sp_resource_size_params_prepare(struct mlxsw_core *mlxsw_core,
 				      struct devlink_resource_size_params *kvd_size_params,
@@ -3877,8 +3865,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 	err = devlink_resource_register(devlink, MLXSW_SP_RESOURCE_NAME_KVD,
 					kvd_size, MLXSW_SP_RESOURCE_KVD,
 					DEVLINK_RESOURCE_ID_PARENT_TOP,
-					&kvd_size_params,
-					NULL);
+					&kvd_size_params);
 	if (err)
 		return err;
 
@@ -3887,8 +3874,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 					linear_size,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
 					MLXSW_SP_RESOURCE_KVD,
-					&linear_size_params,
-					&mlxsw_sp_resource_kvd_linear_ops);
+					&linear_size_params);
 	if (err)
 		return err;
 
@@ -3905,8 +3891,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 					double_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_DOUBLE,
 					MLXSW_SP_RESOURCE_KVD,
-					&hash_double_size_params,
-					NULL);
+					&hash_double_size_params);
 	if (err)
 		return err;
 
@@ -3915,8 +3900,7 @@ static int mlxsw_sp_resources_register(struct mlxsw_core *mlxsw_core)
 					single_size,
 					MLXSW_SP_RESOURCE_KVD_HASH_SINGLE,
 					MLXSW_SP_RESOURCE_KVD,
-					&hash_single_size_params,
-					NULL);
+					&hash_single_size_params);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 82820ba43728..804d4d2c8031 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -442,7 +442,6 @@ void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
 int mlxsw_sp_kvdl_alloc_size_query(struct mlxsw_sp *mlxsw_sp,
 				   unsigned int entry_count,
 				   unsigned int *p_alloc_size);
-u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp);
 int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core);
 
 struct mlxsw_sp_acl_rule_info {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
index 8796db44dcc3..fe4327f547d2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c
@@ -315,8 +315,9 @@ static u64 mlxsw_sp_kvdl_part_occ(struct mlxsw_sp_kvdl_part *part)
 	return occ;
 }
 
-u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
+static u64 mlxsw_sp_kvdl_occ_get(void *priv)
 {
+	const struct mlxsw_sp *mlxsw_sp = priv;
 	u64 occ = 0;
 	int i;
 
@@ -326,48 +327,33 @@ u64 mlxsw_sp_kvdl_occ_get(const struct mlxsw_sp *mlxsw_sp)
 	return occ;
 }
 
-static u64 mlxsw_sp_kvdl_single_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_single_occ_get(void *priv)
 {
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_kvdl_part *part;
 
 	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_SINGLE];
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
-static u64 mlxsw_sp_kvdl_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_chunks_occ_get(void *priv)
 {
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_kvdl_part *part;
 
 	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_CHUNKS];
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
-static u64 mlxsw_sp_kvdl_large_chunks_occ_get(struct devlink *devlink)
+static u64 mlxsw_sp_kvdl_large_chunks_occ_get(void *priv)
 {
-	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_kvdl_part *part;
 
 	part = mlxsw_sp->kvdl->parts[MLXSW_SP_KVDL_PART_ID_LARGE_CHUNKS];
 	return mlxsw_sp_kvdl_part_occ(part);
 }
 
-static const struct devlink_resource_ops mlxsw_sp_kvdl_single_ops = {
-	.occ_get = mlxsw_sp_kvdl_single_occ_get,
-};
-
-static const struct devlink_resource_ops mlxsw_sp_kvdl_chunks_ops = {
-	.occ_get = mlxsw_sp_kvdl_chunks_occ_get,
-};
-
-static const struct devlink_resource_ops mlxsw_sp_kvdl_chunks_large_ops = {
-	.occ_get = mlxsw_sp_kvdl_large_chunks_occ_get,
-};
-
 int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
@@ -386,8 +372,7 @@ int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
 					MLXSW_SP_KVDL_SINGLE_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params,
-					&mlxsw_sp_kvdl_single_ops);
+					&size_params);
 	if (err)
 		return err;
 
@@ -398,8 +383,7 @@ int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
 					MLXSW_SP_KVDL_CHUNKS_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params,
-					&mlxsw_sp_kvdl_chunks_ops);
+					&size_params);
 	if (err)
 		return err;
 
@@ -410,13 +394,13 @@ int mlxsw_sp_kvdl_resources_register(struct mlxsw_core *mlxsw_core)
 					MLXSW_SP_KVDL_LARGE_CHUNKS_SIZE,
 					MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
 					MLXSW_SP_RESOURCE_KVD_LINEAR,
-					&size_params,
-					&mlxsw_sp_kvdl_chunks_large_ops);
+					&size_params);
 	return err;
 }
 
 int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 	struct mlxsw_sp_kvdl *kvdl;
 	int err;
 
@@ -429,6 +413,23 @@ int mlxsw_sp_kvdl_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_kvdl_parts_init;
 
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR,
+					  mlxsw_sp_kvdl_occ_get,
+					  mlxsw_sp);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE,
+					  mlxsw_sp_kvdl_single_occ_get,
+					  mlxsw_sp);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS,
+					  mlxsw_sp_kvdl_chunks_occ_get,
+					  mlxsw_sp);
+	devlink_resource_occ_get_register(devlink,
+					  MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS,
+					  mlxsw_sp_kvdl_large_chunks_occ_get,
+					  mlxsw_sp);
+
 	return 0;
 
 err_kvdl_parts_init:
@@ -438,6 +439,16 @@ err_kvdl_parts_init:
 
 void mlxsw_sp_kvdl_fini(struct mlxsw_sp *mlxsw_sp)
 {
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_CHUNKS);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR_SINGLE);
+	devlink_resource_occ_get_unregister(devlink,
+					    MLXSW_SP_RESOURCE_KVD_LINEAR);
 	mlxsw_sp_kvdl_parts_fini(mlxsw_sp);
 	kfree(mlxsw_sp->kvdl);
 }
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 9c2567b0d93e..dfad93fca0a6 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -375,7 +375,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
 	 * because generally mcdi responses are fast. After that, back off
 	 * and poll once a jiffy (approximately)
 	 */
-	spins = TICK_USEC;
+	spins = USER_TICK_USEC;
 	finish = jiffies + MCDI_RPC_TIMEOUT;
 
 	while (1) {
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index c9910c33e671..04f611e6f678 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -109,11 +109,11 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
 	call_rcu(&nvdev->rcu, free_netvsc_device);
 }
 
-static void netvsc_revoke_buf(struct hv_device *device,
-			      struct netvsc_device *net_device)
+static void netvsc_revoke_recv_buf(struct hv_device *device,
+				   struct netvsc_device *net_device,
+				   struct net_device *ndev)
 {
 	struct nvsp_message *revoke_packet;
-	struct net_device *ndev = hv_get_drvdata(device);
 	int ret;
 
 	/*
@@ -157,6 +157,14 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		}
 		net_device->recv_section_cnt = 0;
 	}
+}
+
+static void netvsc_revoke_send_buf(struct hv_device *device,
+				   struct netvsc_device *net_device,
+				   struct net_device *ndev)
+{
+	struct nvsp_message *revoke_packet;
+	int ret;
 
 	/* Deal with the send buffer we may have setup.
 	 * If we got a  send section size, it means we received a
@@ -202,10 +210,10 @@ static void netvsc_revoke_buf(struct hv_device *device,
 	}
 }
 
-static void netvsc_teardown_gpadl(struct hv_device *device,
-				  struct netvsc_device *net_device)
+static void netvsc_teardown_recv_gpadl(struct hv_device *device,
+				       struct netvsc_device *net_device,
+				       struct net_device *ndev)
 {
-	struct net_device *ndev = hv_get_drvdata(device);
 	int ret;
 
 	if (net_device->recv_buf_gpadl_handle) {
@@ -222,6 +230,13 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
 		}
 		net_device->recv_buf_gpadl_handle = 0;
 	}
+}
+
+static void netvsc_teardown_send_gpadl(struct hv_device *device,
+				       struct netvsc_device *net_device,
+				       struct net_device *ndev)
+{
+	int ret;
 
 	if (net_device->send_buf_gpadl_handle) {
 		ret = vmbus_teardown_gpadl(device->channel,
@@ -437,8 +452,10 @@ static int netvsc_init_buf(struct hv_device *device,
 	goto exit;
 
 cleanup:
-	netvsc_revoke_buf(device, net_device);
-	netvsc_teardown_gpadl(device, net_device);
+	netvsc_revoke_recv_buf(device, net_device, ndev);
+	netvsc_revoke_send_buf(device, net_device, ndev);
+	netvsc_teardown_recv_gpadl(device, net_device, ndev);
+	netvsc_teardown_send_gpadl(device, net_device, ndev);
 
 exit:
 	return ret;
@@ -457,7 +474,6 @@ static int negotiate_nvsp_ver(struct hv_device *device,
 	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
 	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
 	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
-
 	trace_nvsp_send(ndev, init_packet);
 
 	/* Send the init request */
@@ -575,7 +591,17 @@ void netvsc_device_remove(struct hv_device *device)
 		= rtnl_dereference(net_device_ctx->nvdev);
 	int i;
 
-	netvsc_revoke_buf(device, net_device);
+	/*
+	 * Revoke receive buffer. If host is pre-Win2016 then tear down
+	 * receive buffer GPADL. Do the same for send buffer.
+	 */
+	netvsc_revoke_recv_buf(device, net_device, ndev);
+	if (vmbus_proto_version < VERSION_WIN10)
+		netvsc_teardown_recv_gpadl(device, net_device, ndev);
+
+	netvsc_revoke_send_buf(device, net_device, ndev);
+	if (vmbus_proto_version < VERSION_WIN10)
+		netvsc_teardown_send_gpadl(device, net_device, ndev);
 
 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
@@ -589,15 +615,17 @@ void netvsc_device_remove(struct hv_device *device)
 	 */
 	netdev_dbg(ndev, "net device safe to remove\n");
 
-	/* older versions require that buffer be revoked before close */
-	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
-		netvsc_teardown_gpadl(device, net_device);
-
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
 
-	if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
-		netvsc_teardown_gpadl(device, net_device);
+	/*
+	 * If host is Win2016 or higher then we do the GPADL tear down
+	 * here after VMBus is closed.
+	*/
+	if (vmbus_proto_version >= VERSION_WIN10) {
+		netvsc_teardown_recv_gpadl(device, net_device, ndev);
+		netvsc_teardown_send_gpadl(device, net_device, ndev);
+	}
 
 	/* Release all resources */
 	free_netvsc_device_rcu(net_device);
diff --git a/drivers/net/netdevsim/devlink.c b/drivers/net/netdevsim/devlink.c
index 1dba47936456..bef7db5d129a 100644
--- a/drivers/net/netdevsim/devlink.c
+++ b/drivers/net/netdevsim/devlink.c
@@ -30,52 +30,36 @@ static struct net *nsim_devlink_net(struct devlink *devlink)
 
 /* IPv4
  */
-static u64 nsim_ipv4_fib_resource_occ_get(struct devlink *devlink)
+static u64 nsim_ipv4_fib_resource_occ_get(void *priv)
 {
-	struct net *net = nsim_devlink_net(devlink);
+	struct net *net = priv;
 
 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false);
 }
 
-static struct devlink_resource_ops nsim_ipv4_fib_res_ops = {
-	.occ_get = nsim_ipv4_fib_resource_occ_get,
-};
-
-static u64 nsim_ipv4_fib_rules_res_occ_get(struct devlink *devlink)
+static u64 nsim_ipv4_fib_rules_res_occ_get(void *priv)
 {
-	struct net *net = nsim_devlink_net(devlink);
+	struct net *net = priv;
 
 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false);
 }
 
-static struct devlink_resource_ops nsim_ipv4_fib_rules_res_ops = {
-	.occ_get = nsim_ipv4_fib_rules_res_occ_get,
-};
-
 /* IPv6
  */
-static u64 nsim_ipv6_fib_resource_occ_get(struct devlink *devlink)
+static u64 nsim_ipv6_fib_resource_occ_get(void *priv)
 {
-	struct net *net = nsim_devlink_net(devlink);
+	struct net *net = priv;
 
 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false);
 }
 
-static struct devlink_resource_ops nsim_ipv6_fib_res_ops = {
-	.occ_get = nsim_ipv6_fib_resource_occ_get,
-};
-
-static u64 nsim_ipv6_fib_rules_res_occ_get(struct devlink *devlink)
+static u64 nsim_ipv6_fib_rules_res_occ_get(void *priv)
 {
-	struct net *net = nsim_devlink_net(devlink);
+	struct net *net = priv;
 
 	return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false);
 }
 
-static struct devlink_resource_ops nsim_ipv6_fib_rules_res_ops = {
-	.occ_get = nsim_ipv6_fib_rules_res_occ_get,
-};
-
 static int devlink_resources_register(struct devlink *devlink)
 {
 	struct devlink_resource_size_params params = {
@@ -91,7 +75,7 @@ static int devlink_resources_register(struct devlink *devlink)
 	err = devlink_resource_register(devlink, "IPv4", (u64)-1,
 					NSIM_RESOURCE_IPV4,
 					DEVLINK_RESOURCE_ID_PARENT_TOP,
-					&params, NULL);
+					&params);
 	if (err) {
 		pr_err("Failed to register IPv4 top resource\n");
 		goto out;
@@ -100,8 +84,7 @@ static int devlink_resources_register(struct devlink *devlink)
 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true);
 	err = devlink_resource_register(devlink, "fib", n,
 					NSIM_RESOURCE_IPV4_FIB,
-					NSIM_RESOURCE_IPV4,
-					&params, &nsim_ipv4_fib_res_ops);
+					NSIM_RESOURCE_IPV4, &params);
 	if (err) {
 		pr_err("Failed to register IPv4 FIB resource\n");
 		return err;
@@ -110,8 +93,7 @@ static int devlink_resources_register(struct devlink *devlink)
 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true);
 	err = devlink_resource_register(devlink, "fib-rules", n,
 					NSIM_RESOURCE_IPV4_FIB_RULES,
-					NSIM_RESOURCE_IPV4,
-					&params, &nsim_ipv4_fib_rules_res_ops);
+					NSIM_RESOURCE_IPV4, &params);
 	if (err) {
 		pr_err("Failed to register IPv4 FIB rules resource\n");
 		return err;
@@ -121,7 +103,7 @@ static int devlink_resources_register(struct devlink *devlink)
 	err = devlink_resource_register(devlink, "IPv6", (u64)-1,
 					NSIM_RESOURCE_IPV6,
 					DEVLINK_RESOURCE_ID_PARENT_TOP,
-					&params, NULL);
+					&params);
 	if (err) {
 		pr_err("Failed to register IPv6 top resource\n");
 		goto out;
@@ -130,8 +112,7 @@ static int devlink_resources_register(struct devlink *devlink)
 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true);
 	err = devlink_resource_register(devlink, "fib", n,
 					NSIM_RESOURCE_IPV6_FIB,
-					NSIM_RESOURCE_IPV6,
-					&params, &nsim_ipv6_fib_res_ops);
+					NSIM_RESOURCE_IPV6, &params);
 	if (err) {
 		pr_err("Failed to register IPv6 FIB resource\n");
 		return err;
@@ -140,12 +121,28 @@ static int devlink_resources_register(struct devlink *devlink)
 	n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true);
 	err = devlink_resource_register(devlink, "fib-rules", n,
 					NSIM_RESOURCE_IPV6_FIB_RULES,
-					NSIM_RESOURCE_IPV6,
-					&params, &nsim_ipv6_fib_rules_res_ops);
+					NSIM_RESOURCE_IPV6, &params);
 	if (err) {
 		pr_err("Failed to register IPv6 FIB rules resource\n");
 		return err;
 	}
+
+	devlink_resource_occ_get_register(devlink,
+					  NSIM_RESOURCE_IPV4_FIB,
+					  nsim_ipv4_fib_resource_occ_get,
+					  net);
+	devlink_resource_occ_get_register(devlink,
+					  NSIM_RESOURCE_IPV4_FIB_RULES,
+					  nsim_ipv4_fib_rules_res_occ_get,
+					  net);
+	devlink_resource_occ_get_register(devlink,
+					  NSIM_RESOURCE_IPV6_FIB,
+					  nsim_ipv6_fib_resource_occ_get,
+					  net);
+	devlink_resource_occ_get_register(devlink,
+					  NSIM_RESOURCE_IPV6_FIB_RULES,
+					  nsim_ipv6_fib_rules_res_occ_get,
+					  net);
 out:
 	return err;
 }
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 654f42d00092..a6c87793d899 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1207,6 +1207,23 @@ static void dp83640_remove(struct phy_device *phydev)
 	kfree(dp83640);
 }
 
+static int dp83640_soft_reset(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_soft_reset(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* From DP83640 datasheet: "Software driver code must wait 3 us
+	 * following a software reset before allowing further serial MII
+	 * operations with the DP83640."
+	 */
+	udelay(10);		/* Taking udelay inaccuracy into account */
+
+	return 0;
+}
+
 static int dp83640_config_init(struct phy_device *phydev)
 {
 	struct dp83640_private *dp83640 = phydev->priv;
@@ -1501,6 +1518,7 @@ static struct phy_driver dp83640_driver = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.probe		= dp83640_probe,
 	.remove		= dp83640_remove,
+	.soft_reset	= dp83640_soft_reset,
 	.config_init	= dp83640_config_init,
 	.ack_interrupt  = dp83640_ack_interrupt,
 	.config_intr    = dp83640_config_intr,
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index a75c511950c3..c22e8e383247 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -828,6 +828,22 @@ static int m88e1121_config_init(struct phy_device *phydev)
 	return marvell_config_init(phydev);
 }
 
+static int m88e1318_config_init(struct phy_device *phydev)
+{
+	if (phy_interrupt_is_valid(phydev)) {
+		int err = phy_modify_paged(
+			phydev, MII_MARVELL_LED_PAGE,
+			MII_88E1318S_PHY_LED_TCR,
+			MII_88E1318S_PHY_LED_TCR_FORCE_INT,
+			MII_88E1318S_PHY_LED_TCR_INTn_ENABLE |
+			MII_88E1318S_PHY_LED_TCR_INT_ACTIVE_LOW);
+		if (err < 0)
+			return err;
+	}
+
+	return m88e1121_config_init(phydev);
+}
+
 static int m88e1510_config_init(struct phy_device *phydev)
 {
 	int err;
@@ -870,7 +886,7 @@ static int m88e1510_config_init(struct phy_device *phydev)
 		phydev->advertising &= ~pause;
 	}
 
-	return m88e1121_config_init(phydev);
+	return m88e1318_config_init(phydev);
 }
 
 static int m88e1118_config_aneg(struct phy_device *phydev)
@@ -2086,7 +2102,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = marvell_probe,
-		.config_init = &m88e1121_config_init,
+		.config_init = &m88e1318_config_init,
 		.config_aneg = &m88e1318_config_aneg,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c
index 5782733959f0..f4e93f5fc204 100644
--- a/drivers/net/slip/slhc.c
+++ b/drivers/net/slip/slhc.c
@@ -509,6 +509,10 @@ slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize)
 		if(x < 0 || x > comp->rslot_limit)
 			goto bad;
 
+		/* Check if the cstate is initialized */
+		if (!comp->rstate[x].initialized)
+			goto bad;
+
 		comp->flags &=~ SLF_TOSS;
 		comp->recv_current = x;
 	} else {
@@ -673,6 +677,7 @@ slhc_remember(struct slcompress *comp, unsigned char *icp, int isize)
 	if (cs->cs_tcp.doff > 5)
 	  memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4);
 	cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2;
+	cs->initialized = true;
 	/* Put headers back on packet
 	 * Neither header checksum is recalculated
 	 */
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a1ba262f40ad..28583aa0c17d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -743,8 +743,15 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 
 static void tun_detach(struct tun_file *tfile, bool clean)
 {
+	struct tun_struct *tun;
+	struct net_device *dev;
+
 	rtnl_lock();
+	tun = rtnl_dereference(tfile->tun);
+	dev = tun ? tun->dev : NULL;
 	__tun_detach(tfile, clean);
+	if (dev)
+		netdev_state_change(dev);
 	rtnl_unlock();
 }
 
@@ -2562,10 +2569,15 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			/* One or more queue has already been attached, no need
 			 * to initialize the device again.
 			 */
+			netdev_state_change(dev);
 			return 0;
 		}
-	}
-	else {
+
+		tun->flags = (tun->flags & ~TUN_FEATURES) |
+			      (ifr->ifr_flags & TUN_FEATURES);
+
+		netdev_state_change(dev);
+	} else {
 		char *name;
 		unsigned long flags = 0;
 		int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
@@ -2642,6 +2654,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 				     ~(NETIF_F_HW_VLAN_CTAG_TX |
 				       NETIF_F_HW_VLAN_STAG_TX);
 
+		tun->flags = (tun->flags & ~TUN_FEATURES) |
+			      (ifr->ifr_flags & TUN_FEATURES);
+
 		INIT_LIST_HEAD(&tun->disabled);
 		err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
 		if (err < 0)
@@ -2656,9 +2671,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 	tun_debug(KERN_INFO, tun, "tun_set_iff\n");
 
-	tun->flags = (tun->flags & ~TUN_FEATURES) |
-		(ifr->ifr_flags & TUN_FEATURES);
-
 	/* Make sure persistent devices do not get stuck in
 	 * xoff state.
 	 */
@@ -2805,6 +2817,9 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
 	} else
 		ret = -EINVAL;
 
+	if (ret >= 0)
+		netdev_state_change(tun->dev);
+
 unlock:
 	rtnl_unlock();
 	return ret;
@@ -2845,6 +2860,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	unsigned int ifindex;
 	int le;
 	int ret;
+	bool do_notify = false;
 
 	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
 	    (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
@@ -2941,10 +2957,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		if (arg && !(tun->flags & IFF_PERSIST)) {
 			tun->flags |= IFF_PERSIST;
 			__module_get(THIS_MODULE);
+			do_notify = true;
 		}
 		if (!arg && (tun->flags & IFF_PERSIST)) {
 			tun->flags &= ~IFF_PERSIST;
 			module_put(THIS_MODULE);
+			do_notify = true;
 		}
 
 		tun_debug(KERN_INFO, tun, "persist %s\n",
@@ -2959,6 +2977,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			break;
 		}
 		tun->owner = owner;
+		do_notify = true;
 		tun_debug(KERN_INFO, tun, "owner set to %u\n",
 			  from_kuid(&init_user_ns, tun->owner));
 		break;
@@ -2971,6 +2990,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			break;
 		}
 		tun->group = group;
+		do_notify = true;
 		tun_debug(KERN_INFO, tun, "group set to %u\n",
 			  from_kgid(&init_user_ns, tun->group));
 		break;
@@ -3130,6 +3150,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}
 
+	if (do_notify)
+		netdev_state_change(tun->dev);
+
 unlock:
 	rtnl_unlock();
 	if (tun)
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index fff4b13eece2..5c42cf81a08b 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -902,6 +902,12 @@ static const struct usb_device_id	products[] = {
 				      USB_CDC_PROTO_NONE),
 	.driver_info = (unsigned long)&wwan_info,
 }, {
+	/* Cinterion AHS3 modem by GEMALTO */
+	USB_DEVICE_AND_INTERFACE_INFO(0x1e2d, 0x0055, USB_CLASS_COMM,
+				      USB_CDC_SUBCLASS_ETHERNET,
+				      USB_CDC_PROTO_NONE),
+	.driver_info = (unsigned long)&wwan_info,
+}, {
 	USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET,
 			USB_CDC_PROTO_NONE),
 	.driver_info = (unsigned long) &cdc_info,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index aff105f5f58c..0867f7275852 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -928,7 +928,8 @@ static int lan78xx_read_otp(struct lan78xx_net *dev, u32 offset,
 			offset += 0x100;
 		else
 			ret = -EINVAL;
-		ret = lan78xx_read_raw_otp(dev, offset, length, data);
+		if (!ret)
+			ret = lan78xx_read_raw_otp(dev, offset, length, data);
 	}
 
 	return ret;
@@ -2502,7 +2503,7 @@ static void lan78xx_init_stats(struct lan78xx_net *dev)
 	dev->stats.rollover_max.eee_tx_lpi_transitions = 0xFFFFFFFF;
 	dev->stats.rollover_max.eee_tx_lpi_time = 0xFFFFFFFF;
 
-	lan78xx_defer_kevent(dev, EVENT_STAT_UPDATE);
+	set_bit(EVENT_STAT_UPDATE, &dev->flags);
 }
 
 static int lan78xx_open(struct net_device *net)
@@ -2514,10 +2515,6 @@ static int lan78xx_open(struct net_device *net)
 	if (ret < 0)
 		goto out;
 
-	ret = lan78xx_reset(dev);
-	if (ret < 0)
-		goto done;
-
 	phy_start(net->phydev);
 
 	netif_dbg(dev, ifup, dev->net, "phy initialised successfully");
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 6afe896e5cb8..96d26cfae90b 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static struct ida hwsim_netgroup_ida = IDA_INIT;
+static DEFINE_IDA(hwsim_netgroup_ida);
 
 struct hwsim_net {
 	int netgroup;
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index a65f2e1d9f53..85997184e047 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -20,7 +20,7 @@ if LIBNVDIMM
 config BLK_DEV_PMEM
 	tristate "PMEM: Persistent memory block device support"
 	default LIBNVDIMM
-	select DAX
+	select DAX_DRIVER
 	select ND_BTT if BTT
 	select ND_PFN if NVDIMM_PFN
 	help
@@ -102,4 +102,15 @@ config NVDIMM_DAX
 
 	  Select Y if unsure
 
+config OF_PMEM
+	# FIXME: make tristate once OF_NUMA dependency removed
+	bool "Device-tree support for persistent memory regions"
+	depends on OF
+	default LIBNVDIMM
+	help
+	  Allows regions of persistent memory to be described in the
+	  device-tree.
+
+	  Select Y if unsure.
+
 endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 70d5f3ad9909..e8847045dac0 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
+obj-$(CONFIG_OF_PMEM) += of_pmem.o
 
 nd_pmem-y := pmem.o
 
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index d58925295aa7..795ad4ff35ca 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -26,7 +26,7 @@ static void nd_btt_release(struct device *dev)
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
 	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
 	kfree(nd_btt->uuid);
@@ -74,8 +74,8 @@ static ssize_t sector_size_store(struct device *dev,
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
 			btt_lbasize_supported);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -101,8 +101,8 @@ static ssize_t uuid_store(struct device *dev,
 
 	device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
 
 	return rc ? rc : len;
@@ -131,8 +131,8 @@ static ssize_t namespace_store(struct device *dev,
 	device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -206,8 +206,8 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
 	dev->groups = nd_btt_attribute_groups;
 	device_initialize(&nd_btt->dev);
 	if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
-		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-				__func__, dev_name(ndns->claim));
+		dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+				dev_name(ndns->claim));
 		put_device(dev);
 		return NULL;
 	}
@@ -346,8 +346,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
 		return -ENOMEM;
 	btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL);
 	rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb);
-	dev_dbg(dev, "%s: btt: %s\n", __func__,
-			rc == 0 ? dev_name(btt_dev) : "<none>");
+	dev_dbg(dev, "btt: %s\n", rc == 0 ? dev_name(btt_dev) : "<none>");
 	if (rc < 0) {
 		struct nd_btt *nd_btt = to_nd_btt(btt_dev);
 
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 78eabc3a1ab1..a64023690cad 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 	nvdimm_bus->dev.release = nvdimm_bus_release;
 	nvdimm_bus->dev.groups = nd_desc->attr_groups;
 	nvdimm_bus->dev.bus = &nvdimm_bus_type;
+	nvdimm_bus->dev.of_node = nd_desc->of_node;
 	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
 	rc = device_register(&nvdimm_bus->dev);
 	if (rc) {
@@ -984,8 +985,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 	if (cmd == ND_CMD_CALL) {
 		func = pkg.nd_command;
-		dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n",
-				__func__, dimm_name, pkg.nd_command,
+		dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n",
+				dimm_name, pkg.nd_command,
 				in_len, out_len, buf_len);
 	}
 
@@ -996,8 +997,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		u32 copy;
 
 		if (out_size == UINT_MAX) {
-			dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			return -EFAULT;
 		}
 		if (out_len < sizeof(out_env))
@@ -1012,9 +1013,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 	buf_len = (u64) out_len + (u64) in_len;
 	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
-		dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__,
-				dimm_name, cmd_name, buf_len,
-				ND_IOCTL_MAX_BUFLEN);
+		dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
+				cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
 		return -EINVAL;
 	}
 
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b2fc29b8279b..30852270484f 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -148,7 +148,7 @@ ssize_t nd_namespace_store(struct device *dev,
 	char *name;
 
 	if (dev->driver) {
-		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		dev_dbg(dev, "namespace already active\n");
 		return -EBUSY;
 	}
 
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 1dc527660637..acce050856a8 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -134,7 +134,7 @@ static void nvdimm_map_release(struct kref *kref)
 	nvdimm_map = container_of(kref, struct nvdimm_map, kref);
 	nvdimm_bus = nvdimm_map->nvdimm_bus;
 
-	dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+	dev_dbg(&nvdimm_bus->dev, "%pa\n", &nvdimm_map->offset);
 	list_del(&nvdimm_map->list);
 	if (nvdimm_map->flags)
 		memunmap(nvdimm_map->mem);
@@ -230,8 +230,8 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
 
 	for (i = 0; i < 16; i++) {
 		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
-			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
-					__func__, i, str - buf, str[0],
+			dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+					i, str - buf, str[0],
 					str + 1 - buf, str[1]);
 			return -EINVAL;
 		}
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 1bf2bd318371..0453f49dc708 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -24,7 +24,7 @@ static void nd_dax_release(struct device *dev)
 	struct nd_dax *nd_dax = to_nd_dax(dev);
 	struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(dev, &nd_pfn->ndns);
 	ida_simple_remove(&nd_region->dax_ida, nd_pfn->id);
 	kfree(nd_pfn->uuid);
@@ -129,8 +129,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
 	pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
 	nd_pfn->pfn_sb = pfn_sb;
 	rc = nd_pfn_validate(nd_pfn, DAX_SIG);
-	dev_dbg(dev, "%s: dax: %s\n", __func__,
-			rc == 0 ? dev_name(dax_dev) : "<none>");
+	dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>");
 	if (rc < 0) {
 		nd_detach_ndns(dax_dev, &nd_pfn->ndns);
 		put_device(dax_dev);
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index f8913b8124b6..233907889f96 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -67,9 +67,11 @@ static int nvdimm_probe(struct device *dev)
 	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
 	nd_label_copy(ndd, to_next_namespace_index(ndd),
 			to_current_namespace_index(ndd));
-	rc = nd_label_reserve_dpa(ndd);
-	if (ndd->ns_current >= 0)
-		nvdimm_set_aliasing(dev);
+	if (ndd->ns_current >= 0) {
+		rc = nd_label_reserve_dpa(ndd);
+		if (rc == 0)
+			nvdimm_set_aliasing(dev);
+	}
 	nvdimm_clear_locked(dev);
 	nvdimm_bus_unlock(dev);
 
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 097794d9f786..e00d45522b80 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -131,7 +131,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 		}
 		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
 	}
-	dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+	dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
 	kfree(cmd);
 
 	return rc;
@@ -266,8 +266,7 @@ void nvdimm_drvdata_release(struct kref *kref)
 	struct device *dev = ndd->dev;
 	struct resource *res, *_r;
 
-	dev_dbg(dev, "%s\n", __func__);
-
+	dev_dbg(dev, "trace\n");
 	nvdimm_bus_lock(dev);
 	for_each_dpa_resource_safe(ndd, res, _r)
 		nvdimm_free_dpa(ndd, res);
@@ -660,7 +659,7 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
 	nd_synchronize();
 
 	device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
-	dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+	dev_dbg(&nvdimm_bus->dev, "count: %d\n", count);
 	if (count != dimm_count)
 		return -ENXIO;
 	return 0;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index de66c02f6140..1d28cd656536 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -45,9 +45,27 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
 	return ndd->nslabel_size;
 }
 
+static size_t __sizeof_namespace_index(u32 nslot)
+{
+	return ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
+			NSINDEX_ALIGN);
+}
+
+static int __nvdimm_num_label_slots(struct nvdimm_drvdata *ndd,
+		size_t index_size)
+{
+	return (ndd->nsarea.config_size - index_size * 2) /
+			sizeof_namespace_label(ndd);
+}
+
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
 {
-	return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
+	u32 tmp_nslot, n;
+
+	tmp_nslot = ndd->nsarea.config_size / sizeof_namespace_label(ndd);
+	n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN;
+
+	return __nvdimm_num_label_slots(ndd, NSINDEX_ALIGN * n);
 }
 
 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
@@ -55,18 +73,14 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
 	u32 nslot, space, size;
 
 	/*
-	 * The minimum index space is 512 bytes, with that amount of
-	 * index we can describe ~1400 labels which is less than a byte
-	 * of overhead per label.  Round up to a byte of overhead per
-	 * label and determine the size of the index region.  Yes, this
-	 * starts to waste space at larger config_sizes, but it's
-	 * unlikely we'll ever see anything but 128K.
+	 * Per UEFI 2.7, the minimum size of the Label Storage Area is large
+	 * enough to hold 2 index blocks and 2 labels.  The minimum index
+	 * block size is 256 bytes, and the minimum label size is 256 bytes.
 	 */
 	nslot = nvdimm_num_label_slots(ndd);
 	space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
-	size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
-			NSINDEX_ALIGN) * 2;
-	if (size <= space)
+	size = __sizeof_namespace_index(nslot) * 2;
+	if (size <= space && nslot >= 2)
 		return size / 2;
 
 	dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n",
@@ -121,8 +135,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 
 		memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
 		if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
-			dev_dbg(dev, "%s: nsindex%d signature invalid\n",
-					__func__, i);
+			dev_dbg(dev, "nsindex%d signature invalid\n", i);
 			continue;
 		}
 
@@ -135,8 +148,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 			labelsize = 128;
 
 		if (labelsize != sizeof_namespace_label(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n",
-					__func__, i, nsindex[i]->labelsize);
+			dev_dbg(dev, "nsindex%d labelsize %d invalid\n",
+					i, nsindex[i]->labelsize);
 			continue;
 		}
 
@@ -145,30 +158,28 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
 		nsindex[i]->checksum = __cpu_to_le64(sum_save);
 		if (sum != sum_save) {
-			dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
-					__func__, i);
+			dev_dbg(dev, "nsindex%d checksum invalid\n", i);
 			continue;
 		}
 
 		seq = __le32_to_cpu(nsindex[i]->seq);
 		if ((seq & NSINDEX_SEQ_MASK) == 0) {
-			dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
-					__func__, i, seq);
+			dev_dbg(dev, "nsindex%d sequence: %#x invalid\n", i, seq);
 			continue;
 		}
 
 		/* sanity check the index against expected values */
 		if (__le64_to_cpu(nsindex[i]->myoff)
 				!= i * sizeof_namespace_index(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
-					__func__, i, (unsigned long long)
+			dev_dbg(dev, "nsindex%d myoff: %#llx invalid\n",
+					i, (unsigned long long)
 					__le64_to_cpu(nsindex[i]->myoff));
 			continue;
 		}
 		if (__le64_to_cpu(nsindex[i]->otheroff)
 				!= (!i) * sizeof_namespace_index(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
-					__func__, i, (unsigned long long)
+			dev_dbg(dev, "nsindex%d otheroff: %#llx invalid\n",
+					i, (unsigned long long)
 					__le64_to_cpu(nsindex[i]->otheroff));
 			continue;
 		}
@@ -176,8 +187,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		size = __le64_to_cpu(nsindex[i]->mysize);
 		if (size > sizeof_namespace_index(ndd)
 				|| size < sizeof(struct nd_namespace_index)) {
-			dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
-					__func__, i, size);
+			dev_dbg(dev, "nsindex%d mysize: %#llx invalid\n", i, size);
 			continue;
 		}
 
@@ -185,9 +195,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		if (nslot * sizeof_namespace_label(ndd)
 				+ 2 * sizeof_namespace_index(ndd)
 				> ndd->nsarea.config_size) {
-			dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
-					__func__, i, nslot,
-					ndd->nsarea.config_size);
+			dev_dbg(dev, "nsindex%d nslot: %u invalid, config_size: %#x\n",
+					i, nslot, ndd->nsarea.config_size);
 			continue;
 		}
 		valid[i] = true;
@@ -356,8 +365,8 @@ static bool slot_valid(struct nvdimm_drvdata *ndd,
 		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
 		nd_label->checksum = __cpu_to_le64(sum_save);
 		if (sum != sum_save) {
-			dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n",
-				__func__, slot, sum);
+			dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
+				slot, sum);
 			return false;
 		}
 	}
@@ -422,8 +431,8 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
 			u64 dpa = __le64_to_cpu(nd_label->dpa);
 
 			dev_dbg(ndd->dev,
-				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
-					__func__, slot, label_slot, dpa, size);
+				"slot%d invalid slot: %d dpa: %llx size: %llx\n",
+					slot, label_slot, dpa, size);
 			continue;
 		}
 		count++;
@@ -650,7 +659,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	slot = nd_label_alloc_slot(ndd);
 	if (slot == UINT_MAX)
 		return -ENXIO;
-	dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+	dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
 	nd_label = to_label(ndd, slot);
 	memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -678,7 +687,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
 		nd_label->checksum = __cpu_to_le64(sum);
 	}
-	nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
+	nd_dbg_dpa(nd_region, ndd, res, "\n");
 
 	/* update label */
 	offset = nd_label_offset(ndd, nd_label);
@@ -700,7 +709,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		break;
 	}
 	if (victim) {
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		slot = to_slot(ndd, victim->label);
 		nd_label_free_slot(ndd, slot);
 		victim->label = NULL;
@@ -868,7 +877,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 		slot = nd_label_alloc_slot(ndd);
 		if (slot == UINT_MAX)
 			goto abort;
-		dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
 		nd_label = to_label(ndd, slot);
 		memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -928,7 +937,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 
 	/* free up now unused slots in the new index */
 	for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		nd_label_free_slot(ndd, slot);
 	}
 
@@ -1092,7 +1101,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 		active--;
 		slot = to_slot(ndd, nd_label);
 		nd_label_free_slot(ndd, slot);
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		list_move_tail(&label_ent->list, &list);
 		label_ent->label = NULL;
 	}
@@ -1100,7 +1109,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 
 	if (active == 0) {
 		nd_mapping_free_labels(nd_mapping);
-		dev_dbg(ndd->dev, "%s: no more active labels\n", __func__);
+		dev_dbg(ndd->dev, "no more active labels\n");
 	}
 	mutex_unlock(&nd_mapping->lock);
 
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 1ebf4d3d01ba..18bbe183b3a9 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -33,7 +33,7 @@ enum {
 	BTTINFO_UUID_LEN = 16,
 	BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
 	BTTINFO_MAJOR_VERSION = 1,
-	ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+	ND_LABEL_MIN_SIZE = 256 * 4, /* see sizeof_namespace_index() */
 	ND_LABEL_ID_SIZE = 50,
 	ND_NSINDEX_INIT = 0x1,
 };
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 658ada497be0..28afdd668905 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -421,7 +421,7 @@ static ssize_t alt_name_store(struct device *dev,
 	rc = __alt_name_store(dev, buf, len);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1007,7 +1007,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 	if (uuid_not_set(uuid, dev, __func__))
 		return -ENXIO;
 	if (nd_region->ndr_mappings == 0) {
-		dev_dbg(dev, "%s: not associated with dimm(s)\n", __func__);
+		dev_dbg(dev, "not associated with dimm(s)\n");
 		return -ENXIO;
 	}
 
@@ -1105,8 +1105,7 @@ static ssize_t size_store(struct device *dev,
 		*uuid = NULL;
 	}
 
-	dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
-			? "fail" : "success", rc);
+	dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
 
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
@@ -1270,8 +1269,8 @@ static ssize_t uuid_store(struct device *dev,
 		rc = nd_namespace_label_update(nd_region, dev);
 	else
 		kfree(uuid);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1355,9 +1354,8 @@ static ssize_t sector_size_store(struct device *dev,
 		rc = nd_size_select_store(dev, buf, lbasize, supported);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
-			rc, rc < 0 ? "tried" : "wrote", buf,
-			buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
+			buf, buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1519,7 +1517,7 @@ static ssize_t holder_class_store(struct device *dev,
 	rc = __holder_class_store(dev, buf);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1717,8 +1715,7 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 		if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__))
 			return ERR_PTR(-ENODEV);
 		if (!nsblk->lbasize) {
-			dev_dbg(&ndns->dev, "%s: sector size not set\n",
-				__func__);
+			dev_dbg(&ndns->dev, "sector size not set\n");
 			return ERR_PTR(-ENODEV);
 		}
 		if (!nd_namespace_blk_validate(nsblk))
@@ -1798,9 +1795,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 			}
 
 			if (found_uuid) {
-				dev_dbg(ndd->dev,
-						"%s duplicate entry for uuid\n",
-						__func__);
+				dev_dbg(ndd->dev, "duplicate entry for uuid\n");
 				return false;
 			}
 			found_uuid = true;
@@ -1926,7 +1921,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	}
 
 	if (i < nd_region->ndr_mappings) {
-		struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+		struct nvdimm *nvdimm = nd_region->mapping[i].nvdimm;
 
 		/*
 		 * Give up if we don't find an instance of a uuid at each
@@ -1934,7 +1929,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 		 * find a dimm with two instances of the same uuid.
 		 */
 		dev_err(&nd_region->dev, "%s missing label for %pUb\n",
-				dev_name(ndd->dev), nd_label->uuid);
+				nvdimm_name(nvdimm), nd_label->uuid);
 		rc = -EINVAL;
 		goto err;
 	}
@@ -1994,14 +1989,13 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	namespace_pmem_release(dev);
 	switch (rc) {
 	case -EINVAL:
-		dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+		dev_dbg(&nd_region->dev, "invalid label(s)\n");
 		break;
 	case -ENODEV:
-		dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+		dev_dbg(&nd_region->dev, "label not found\n");
 		break;
 	default:
-		dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
-				__func__, rc);
+		dev_dbg(&nd_region->dev, "unexpected err: %d\n", rc);
 		break;
 	}
 	return ERR_PTR(rc);
@@ -2334,8 +2328,8 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
 	}
 
-	dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
-			__func__, count, is_nd_blk(&nd_region->dev)
+	dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n",
+			count, is_nd_blk(&nd_region->dev)
 			? "blk" : "pmem", count == 1 ? "" : "s");
 
 	if (count == 0) {
@@ -2467,7 +2461,7 @@ static int init_active_labels(struct nd_region *nd_region)
 		get_ndd(ndd);
 
 		count = nd_label_active_count(ndd);
-		dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+		dev_dbg(ndd->dev, "count: %d\n", count);
 		if (!count)
 			continue;
 		for (j = 0; j < count; j++) {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 184e070d50a2..32e0364b48b9 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -340,7 +340,6 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
 }
 #endif
 
-struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
new file mode 100644
index 000000000000..85013bad35de
--- /dev/null
+++ b/drivers/nvdimm/of_pmem.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define pr_fmt(fmt) "of_pmem: " fmt
+
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+static const struct attribute_group *region_attr_groups[] = {
+	&nd_region_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static const struct attribute_group *bus_attr_groups[] = {
+	&nvdimm_bus_attribute_group,
+	NULL,
+};
+
+struct of_pmem_private {
+	struct nvdimm_bus_descriptor bus_desc;
+	struct nvdimm_bus *bus;
+};
+
+static int of_pmem_region_probe(struct platform_device *pdev)
+{
+	struct of_pmem_private *priv;
+	struct device_node *np;
+	struct nvdimm_bus *bus;
+	bool is_volatile;
+	int i;
+
+	np = dev_of_node(&pdev->dev);
+	if (!np)
+		return -ENXIO;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->bus_desc.attr_groups = bus_attr_groups;
+	priv->bus_desc.provider_name = "of_pmem";
+	priv->bus_desc.module = THIS_MODULE;
+	priv->bus_desc.of_node = np;
+
+	priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc);
+	if (!bus) {
+		kfree(priv);
+		return -ENODEV;
+	}
+	platform_set_drvdata(pdev, priv);
+
+	is_volatile = !!of_find_property(np, "volatile", NULL);
+	dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n",
+			is_volatile ? "volatile" : "non-volatile",  np);
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		struct nd_region_desc ndr_desc;
+		struct nd_region *region;
+
+		/*
+		 * NB: libnvdimm copies the data from ndr_desc into it's own
+		 * structures so passing a stack pointer is fine.
+		 */
+		memset(&ndr_desc, 0, sizeof(ndr_desc));
+		ndr_desc.attr_groups = region_attr_groups;
+		ndr_desc.numa_node = of_node_to_nid(np);
+		ndr_desc.res = &pdev->resource[i];
+		ndr_desc.of_node = np;
+		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+
+		if (is_volatile)
+			region = nvdimm_volatile_region_create(bus, &ndr_desc);
+		else
+			region = nvdimm_pmem_region_create(bus, &ndr_desc);
+
+		if (!region)
+			dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
+					ndr_desc.res, np);
+		else
+			dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
+					ndr_desc.res, np);
+	}
+
+	return 0;
+}
+
+static int of_pmem_region_remove(struct platform_device *pdev)
+{
+	struct of_pmem_private *priv = platform_get_drvdata(pdev);
+
+	nvdimm_bus_unregister(priv->bus);
+	kfree(priv);
+
+	return 0;
+}
+
+static const struct of_device_id of_pmem_region_match[] = {
+	{ .compatible = "pmem-region" },
+	{ },
+};
+
+static struct platform_driver of_pmem_region_driver = {
+	.probe = of_pmem_region_probe,
+	.remove = of_pmem_region_remove,
+	.driver = {
+		.name = "of_pmem",
+		.owner = THIS_MODULE,
+		.of_match_table = of_pmem_region_match,
+	},
+};
+
+module_platform_driver(of_pmem_region_driver);
+MODULE_DEVICE_TABLE(of, of_pmem_region_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 2f4d18752c97..30b08791597d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -27,7 +27,7 @@ static void nd_pfn_release(struct device *dev)
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
 	ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
 	kfree(nd_pfn->uuid);
@@ -94,8 +94,8 @@ static ssize_t mode_store(struct device *dev,
 		else
 			rc = -EINVAL;
 	}
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -144,8 +144,8 @@ static ssize_t align_store(struct device *dev,
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_pfn->align,
 			nd_pfn_supported_alignments());
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -171,8 +171,8 @@ static ssize_t uuid_store(struct device *dev,
 
 	device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
 
 	return rc ? rc : len;
@@ -201,8 +201,8 @@ static ssize_t namespace_store(struct device *dev,
 	device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -314,8 +314,8 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
 	dev = &nd_pfn->dev;
 	device_initialize(&nd_pfn->dev);
 	if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
-		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-				__func__, dev_name(ndns->claim));
+		dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+				dev_name(ndns->claim));
 		put_device(dev);
 		return NULL;
 	}
@@ -510,8 +510,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
 	nd_pfn = to_nd_pfn(pfn_dev);
 	nd_pfn->pfn_sb = pfn_sb;
 	rc = nd_pfn_validate(nd_pfn, PFN_SIG);
-	dev_dbg(dev, "%s: pfn: %s\n", __func__,
-			rc == 0 ? dev_name(pfn_dev) : "<none>");
+	dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>");
 	if (rc < 0) {
 		nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
 		put_device(pfn_dev);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 5a96d30c294a..9d714926ecf5 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -66,7 +66,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
 		rc = BLK_STS_IOERR;
 	if (cleared > 0 && cleared / 512) {
 		cleared /= 512;
-		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
+		dev_dbg(dev, "%#llx clear %ld sector%s\n",
 				(unsigned long long) sector, cleared,
 				cleared > 1 ? "s" : "");
 		badblocks_clear(&pmem->bb, sector, cleared);
@@ -547,17 +547,7 @@ static struct nd_device_driver nd_pmem_driver = {
 	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
-static int __init pmem_init(void)
-{
-	return nd_driver_register(&nd_pmem_driver);
-}
-module_init(pmem_init);
-
-static void pmem_exit(void)
-{
-	driver_unregister(&nd_pmem_driver.drv);
-}
-module_exit(pmem_exit);
+module_nd_driver(nd_pmem_driver);
 
 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 034f0a07d627..b9ca0033cc99 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -27,10 +27,10 @@ static int nd_region_probe(struct device *dev)
 	if (nd_region->num_lanes > num_online_cpus()
 			&& nd_region->num_lanes < num_possible_cpus()
 			&& !test_and_set_bit(0, &once)) {
-		dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
+		dev_dbg(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
 				num_online_cpus(), nd_region->num_lanes,
 				num_possible_cpus());
-		dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
+		dev_dbg(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
 				nd_region->num_lanes);
 	}
 
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 1593e1806b16..a612be6f019d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -182,6 +182,14 @@ struct nd_region *to_nd_region(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nd_region);
 
+struct device *nd_region_dev(struct nd_region *nd_region)
+{
+	if (!nd_region)
+		return NULL;
+	return &nd_region->dev;
+}
+EXPORT_SYMBOL_GPL(nd_region_dev);
+
 struct nd_blk_region *to_nd_blk_region(struct device *dev)
 {
 	struct nd_region *nd_region = to_nd_region(dev);
@@ -1014,6 +1022,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 	dev->parent = &nvdimm_bus->dev;
 	dev->type = dev_type;
 	dev->groups = ndr_desc->attr_groups;
+	dev->of_node = ndr_desc->of_node;
 	nd_region->ndr_size = resource_size(ndr_desc->res);
 	nd_region->ndr_start = ndr_desc->res->start;
 	nd_device_register(dev);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 197a6ba9700f..9df4f71e58ca 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -376,6 +376,15 @@ static void nvme_put_ns(struct nvme_ns *ns)
 	kref_put(&ns->kref, nvme_free_ns);
 }
 
+static inline void nvme_clear_nvme_request(struct request *req)
+{
+	if (!(req->rq_flags & RQF_DONTPREP)) {
+		nvme_req(req)->retries = 0;
+		nvme_req(req)->flags = 0;
+		req->rq_flags |= RQF_DONTPREP;
+	}
+}
+
 struct request *nvme_alloc_request(struct request_queue *q,
 		struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid)
 {
@@ -392,6 +401,7 @@ struct request *nvme_alloc_request(struct request_queue *q,
 		return req;
 
 	req->cmd_flags |= REQ_FAILFAST_DRIVER;
+	nvme_clear_nvme_request(req);
 	nvme_req(req)->cmd = cmd;
 
 	return req;
@@ -608,11 +618,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
 {
 	blk_status_t ret = BLK_STS_OK;
 
-	if (!(req->rq_flags & RQF_DONTPREP)) {
-		nvme_req(req)->retries = 0;
-		nvme_req(req)->flags = 0;
-		req->rq_flags |= RQF_DONTPREP;
-	}
+	nvme_clear_nvme_request(req);
 
 	switch (req_op(req)) {
 	case REQ_OP_DRV_IN:
@@ -742,6 +748,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
 		return PTR_ERR(req);
 
 	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+	nvme_req(req)->flags |= NVME_REQ_USERCMD;
 
 	if (ubuffer && bufflen) {
 		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
@@ -826,7 +833,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
 	}
 }
 
-void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
+static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
 {
 	if (unlikely(ctrl->kato == 0))
 		return;
@@ -836,7 +843,6 @@ void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
 	ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
 	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
-EXPORT_SYMBOL_GPL(nvme_start_keep_alive);
 
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
 {
@@ -1103,7 +1109,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	}
 
 	if (ctrl->effects)
-		effects = le32_to_cpu(ctrl->effects->iocs[opcode]);
+		effects = le32_to_cpu(ctrl->effects->acs[opcode]);
 	else
 		effects = nvme_known_admin_effects(opcode);
 
@@ -2220,7 +2226,7 @@ out_unlock:
 
 int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 		     u8 log_page, void *log,
-		     size_t size, size_t offset)
+		     size_t size, u64 offset)
 {
 	struct nvme_command c = { };
 	unsigned long dwlen = size / 4 - 1;
@@ -2235,8 +2241,8 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	c.get_log_page.lid = log_page;
 	c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
 	c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
-	c.get_log_page.lpol = cpu_to_le32(offset & ((1ULL << 32) - 1));
-	c.get_log_page.lpou = cpu_to_le32(offset >> 32ULL);
+	c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
+	c.get_log_page.lpou = cpu_to_le32(upper_32_bits(offset));
 
 	return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
 }
@@ -2833,7 +2839,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 		goto out_free_head;
 	head->instance = ret;
 	INIT_LIST_HEAD(&head->list);
-	init_srcu_struct(&head->srcu);
+	ret = init_srcu_struct(&head->srcu);
+	if (ret)
+		goto out_ida_remove;
 	head->subsys = ctrl->subsys;
 	head->ns_id = nsid;
 	kref_init(&head->ref);
@@ -2855,6 +2863,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 	return head;
 out_cleanup_srcu:
 	cleanup_srcu_struct(&head->srcu);
+out_ida_remove:
 	ida_simple_remove(&ctrl->subsys->ns_ida, head->instance);
 out_free_head:
 	kfree(head);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 8f0f34d06d46..124c458806df 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -536,6 +536,85 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
 	return NULL;
 }
 
+blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq,
+		bool queue_live, bool is_connected)
+{
+	struct nvme_command *cmd = nvme_req(rq)->cmd;
+
+	if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected))
+		return BLK_STS_OK;
+
+	switch (ctrl->state) {
+	case NVME_CTRL_DELETING:
+		goto reject_io;
+
+	case NVME_CTRL_NEW:
+	case NVME_CTRL_CONNECTING:
+		if (!is_connected)
+			/*
+			 * This is the case of starting a new
+			 * association but connectivity was lost
+			 * before it was fully created. We need to
+			 * error the commands used to initialize the
+			 * controller so the reconnect can go into a
+			 * retry attempt. The commands should all be
+			 * marked REQ_FAILFAST_DRIVER, which will hit
+			 * the reject path below. Anything else will
+			 * be queued while the state settles.
+			 */
+			goto reject_or_queue_io;
+
+		if ((queue_live &&
+		     !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) ||
+		    (!queue_live && blk_rq_is_passthrough(rq) &&
+		     cmd->common.opcode == nvme_fabrics_command &&
+		     cmd->fabrics.fctype == nvme_fabrics_type_connect))
+			/*
+			 * If queue is live, allow only commands that
+			 * are internally generated pass through. These
+			 * are commands on the admin queue to initialize
+			 * the controller. This will reject any ioctl
+			 * admin cmds received while initializing.
+			 *
+			 * If the queue is not live, allow only a
+			 * connect command. This will reject any ioctl
+			 * admin cmd as well as initialization commands
+			 * if the controller reverted the queue to non-live.
+			 */
+			return BLK_STS_OK;
+
+		/*
+		 * fall-thru to the reject_or_queue_io clause
+		 */
+		break;
+
+	/* these cases fall-thru
+	 * case NVME_CTRL_LIVE:
+	 * case NVME_CTRL_RESETTING:
+	 */
+	default:
+		break;
+	}
+
+reject_or_queue_io:
+	/*
+	 * Any other new io is something we're not in a state to send
+	 * to the device. Default action is to busy it and retry it
+	 * after the controller state is recovered. However, anything
+	 * marked for failfast or nvme multipath is immediately failed.
+	 * Note: commands used to initialize the controller will be
+	 *  marked for failfast.
+	 * Note: nvme cli/ioctl commands are marked for failfast.
+	 */
+	if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
+		return BLK_STS_RESOURCE;
+
+reject_io:
+	nvme_req(rq)->status = NVME_SC_ABORT_REQ;
+	return BLK_STS_IOERR;
+}
+EXPORT_SYMBOL_GPL(nvmf_check_if_ready);
+
 static const match_table_t opt_tokens = {
 	{ NVMF_OPT_TRANSPORT,		"transport=%s"		},
 	{ NVMF_OPT_TRADDR,		"traddr=%s"		},
@@ -608,8 +687,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			opts->discovery_nqn =
 				!(strcmp(opts->subsysnqn,
 					 NVME_DISC_SUBSYS_NAME));
-			if (opts->discovery_nqn)
+			if (opts->discovery_nqn) {
+				opts->kato = 0;
 				opts->nr_io_queues = 0;
+			}
 			break;
 		case NVMF_OPT_TRADDR:
 			p = match_strdup(args);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index a3145d90c1d2..ef46c915b7b5 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -157,36 +157,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
 void nvmf_free_options(struct nvmf_ctrl_options *opts);
 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
-
-static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
-		struct request *rq)
-{
-	struct nvme_command *cmd = nvme_req(rq)->cmd;
-
-	/*
-	 * We cannot accept any other command until the connect command has
-	 * completed, so only allow connect to pass.
-	 */
-	if (!blk_rq_is_passthrough(rq) ||
-	    cmd->common.opcode != nvme_fabrics_command ||
-	    cmd->fabrics.fctype != nvme_fabrics_type_connect) {
-		/*
-		 * Connecting state means transport disruption or initial
-		 * establishment, which can take a long time and even might
-		 * fail permanently, fail fast to give upper layers a chance
-		 * to failover.
-		 * Deleting state means that the ctrl will never accept commands
-		 * again, fail it permanently.
-		 */
-		if (ctrl->state == NVME_CTRL_CONNECTING ||
-		    ctrl->state == NVME_CTRL_DELETING) {
-			nvme_req(rq)->status = NVME_SC_ABORT_REQ;
-			return BLK_STS_IOERR;
-		}
-		return BLK_STS_RESOURCE; /* try again later */
-	}
-
-	return BLK_STS_OK;
-}
+blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl,
+	struct request *rq, bool queue_live, bool is_connected);
 
 #endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index c6e719b2f3ca..6cb26bcf6ec0 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2277,14 +2277,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 	return BLK_STS_OK;
 }
 
-static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue,
-		struct request *rq)
-{
-	if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags)))
-		return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
-	return BLK_STS_OK;
-}
-
 static blk_status_t
 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 			const struct blk_mq_queue_data *bd)
@@ -2300,7 +2292,9 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
 	u32 data_len;
 	blk_status_t ret;
 
-	ret = nvme_fc_is_ready(queue, rq);
+	ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
+		test_bit(NVME_FC_Q_LIVE, &queue->flags),
+		ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE);
 	if (unlikely(ret))
 		return ret;
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cf93690b3ffc..061fecfd44f5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -105,6 +105,7 @@ struct nvme_request {
 
 enum {
 	NVME_REQ_CANCELLED		= (1 << 0),
+	NVME_REQ_USERCMD		= (1 << 1),
 };
 
 static inline struct nvme_request *nvme_req(struct request *req)
@@ -422,7 +423,6 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		unsigned timeout, int qid, int at_head,
 		blk_mq_req_flags_t flags);
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
-void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
 int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
@@ -430,7 +430,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
 int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
 
 int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
-		u8 log_page, void *log, size_t size, size_t offset);
+		u8 log_page, void *log, size_t size, u64 offset);
 
 extern const struct attribute_group nvme_ns_id_attr_group;
 extern const struct block_device_operations nvme_ns_head_ops;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 295fbec1e5f2..fbc71fac6f1e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -84,6 +84,7 @@ struct nvme_dev {
 	struct dma_pool *prp_small_pool;
 	unsigned online_queues;
 	unsigned max_qid;
+	unsigned int num_vecs;
 	int q_depth;
 	u32 db_stride;
 	void __iomem *bar;
@@ -414,7 +415,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
 {
 	struct nvme_dev *dev = set->driver_data;
 
-	return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev), 0);
+	return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev),
+			dev->num_vecs > 1 ? 1 /* admin queue */ : 0);
 }
 
 /**
@@ -1380,8 +1382,7 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 	return 0;
 }
 
-static int nvme_alloc_queue(struct nvme_dev *dev, int qid,
-		int depth, int node)
+static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
 {
 	struct nvme_queue *nvmeq = &dev->queues[qid];
 
@@ -1457,7 +1458,11 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
 		nvmeq->sq_cmds_io = dev->cmb + offset;
 	}
 
-	nvmeq->cq_vector = qid - 1;
+	/*
+	 * A queue's vector matches the queue identifier unless the controller
+	 * has only one vector available.
+	 */
+	nvmeq->cq_vector = dev->num_vecs == 1 ? 0 : qid;
 	result = adapter_alloc_cq(dev, qid, nvmeq);
 	if (result < 0)
 		goto release_vector;
@@ -1596,8 +1601,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH,
-			dev_to_node(dev->dev));
+	result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
 	if (result)
 		return result;
 
@@ -1630,9 +1634,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
 	int ret = 0;
 
 	for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
-		/* vector == qid - 1, match nvme_create_queue */
-		if (nvme_alloc_queue(dev, i, dev->q_depth,
-		     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
+		if (nvme_alloc_queue(dev, i, dev->q_depth)) {
 			ret = -ENOMEM;
 			break;
 		}
@@ -1914,6 +1916,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	int result, nr_io_queues;
 	unsigned long size;
 
+	struct irq_affinity affd = {
+		.pre_vectors = 1
+	};
+
 	nr_io_queues = num_possible_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
 	if (result < 0)
@@ -1949,11 +1955,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	 * setting up the full range we need.
 	 */
 	pci_free_irq_vectors(pdev);
-	nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues,
-			PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY);
-	if (nr_io_queues <= 0)
+	result = pci_alloc_irq_vectors_affinity(pdev, 1, nr_io_queues + 1,
+			PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
+	if (result <= 0)
 		return -EIO;
-	dev->max_qid = nr_io_queues;
+	dev->num_vecs = result;
+	dev->max_qid = max(result - 1, 1);
 
 	/*
 	 * Should investigate if there's a performance win from allocating
@@ -2201,7 +2208,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 
 	nvme_stop_queues(&dev->ctrl);
 
-	if (!dead) {
+	if (!dead && dev->ctrl.queue_count > 0) {
 		/*
 		 * If the controller is still alive tell it to stop using the
 		 * host memory buffer.  In theory the shutdown / reset should
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 758537e9ba07..1eb4438a8763 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1601,17 +1601,6 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
 	return BLK_EH_HANDLED;
 }
 
-/*
- * We cannot accept any other command until the Connect command has completed.
- */
-static inline blk_status_t
-nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
-{
-	if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags)))
-		return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
-	return BLK_STS_OK;
-}
-
 static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
@@ -1627,7 +1616,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	WARN_ON_ONCE(rq->tag < 0);
 
-	ret = nvme_rdma_is_ready(queue, rq);
+	ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
+		test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true);
 	if (unlikely(ret))
 		return ret;
 
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 90dcdc40ac71..5e0e9fcc0d4d 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -178,6 +178,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	id->vid = 0;
 	id->ssvid = 0;
 
+	memset(id->sn, ' ', sizeof(id->sn));
 	bin2hex(id->sn, &ctrl->subsys->serial,
 		min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
 	memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index a72425d8bce0..231e04e0a496 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -59,7 +59,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
 	memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
 	memcpy(e->traddr, traddr, NVMF_TRADDR_SIZE);
 	memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE);
-	memcpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
+	strncpy(e->subnqn, subsys_nqn, NVMF_NQN_SIZE);
 }
 
 /*
diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 28bbdff4a88b..cd2344179673 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -173,8 +173,8 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
 
 	sector = le64_to_cpu(write_zeroes->slba) <<
 		(req->ns->blksize_shift - 9);
-	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length)) <<
-		(req->ns->blksize_shift - 9)) + 1;
+	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
+		(req->ns->blksize_shift - 9));
 
 	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
 				GFP_KERNEL, &bio, 0))
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index a350765d2d5c..31fdfba556a8 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -149,14 +149,6 @@ nvme_loop_timeout(struct request *rq, bool reserved)
 	return BLK_EH_HANDLED;
 }
 
-static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue,
-		struct request *rq)
-{
-	if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags)))
-		return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
-	return BLK_STS_OK;
-}
-
 static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
@@ -166,7 +158,8 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
 	blk_status_t ret;
 
-	ret = nvme_loop_is_ready(queue, req);
+	ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req,
+		test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true);
 	if (unlikely(ret))
 		return ret;
 
@@ -174,15 +167,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ret)
 		return ret;
 
+	blk_mq_start_request(req);
 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
 	iod->req.port = nvmet_loop_port;
 	if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
-			&queue->nvme_sq, &nvme_loop_ops)) {
-		nvme_cleanup_cmd(req);
-		blk_mq_start_request(req);
-		nvme_loop_queue_response(&iod->req);
+			&queue->nvme_sq, &nvme_loop_ops))
 		return BLK_STS_OK;
-	}
 
 	if (blk_rq_payload_bytes(req)) {
 		iod->sg_table.sgl = iod->first_sgl;
@@ -196,8 +186,6 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		iod->req.transfer_len = blk_rq_payload_bytes(req);
 	}
 
-	blk_mq_start_request(req);
-
 	schedule_work(&iod->work);
 	return BLK_STS_OK;
 }
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 02c5984ab09b..6bb37c18292a 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -295,7 +295,7 @@ static void __init of_unittest_printf(void)
 		return;
 	}
 
-	num_to_str(phandle_str, sizeof(phandle_str), np->phandle);
+	num_to_str(phandle_str, sizeof(phandle_str), np->phandle, 0);
 
 	of_unittest_printf_one(np, "%pOF",  full_name);
 	of_unittest_printf_one(np, "%pOFf", full_name);
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 23da3046f160..d44100687dfe 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -919,8 +919,8 @@ static void pnv_php_unregister_one(struct device_node *dn)
 		return;
 
 	php_slot->state = PNV_PHP_STATE_OFFLINE;
-	pnv_php_put_slot(php_slot);
 	pci_hp_deregister(&php_slot->slot);
+	pnv_php_put_slot(php_slot);
 }
 
 static void pnv_php_unregister(struct device_node *dn)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aa86e904f93c..e597655a5643 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5628,7 +5628,6 @@ void pci_reassigndev_resource_alignment(struct pci_dev *dev)
 		return;
 	}
 
-	pci_info(dev, "Disabling memory decoding and releasing memory resources\n");
 	pci_read_config_word(dev, PCI_COMMAND, &command);
 	command &= ~PCI_COMMAND_MEMORY;
 	pci_write_config_word(dev, PCI_COMMAND, command);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 26141b1946ee..2990ad1e7c99 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4673,9 +4673,13 @@ static void quirk_no_ext_tags(struct pci_dev *pdev)
 
 	pci_walk_bus(bridge->bus, pci_configure_extended_tags, NULL);
 }
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0132, quirk_no_ext_tags);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0140, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0141, quirk_no_ext_tags);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0142, quirk_no_ext_tags);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0144, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0420, quirk_no_ext_tags);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
 
 #ifdef CONFIG_PCI_ATS
 /*
diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c
index 0cd83a4bd4c8..d8ca40a97693 100644
--- a/drivers/pci/setup-res.c
+++ b/drivers/pci/setup-res.c
@@ -168,8 +168,6 @@ EXPORT_SYMBOL(pci_claim_resource);
 
 void pci_disable_bridge_window(struct pci_dev *dev)
 {
-	pci_info(dev, "disabling bridge mem windows\n");
-
 	/* MMIO Base/Limit */
 	pci_write_config_dword(dev, PCI_MEMORY_BASE, 0x0000fff0);
 
diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile
index f1f89ddb1bfd..28502bd159e0 100644
--- a/drivers/pcmcia/Makefile
+++ b/drivers/pcmcia/Makefile
@@ -43,13 +43,9 @@ sa1111_cs-$(CONFIG_SA1100_JORNADA720)		+= sa1111_jornada720.o
 sa1111_cs-$(CONFIG_ARCH_LUBBOCK)		+= sa1111_lubbock.o
 
 sa1100_cs-y					+= sa1100_generic.o
-sa1100_cs-$(CONFIG_SA1100_ASSABET)		+= sa1100_assabet.o
-sa1100_cs-$(CONFIG_SA1100_CERF)			+= sa1100_cerf.o
 sa1100_cs-$(CONFIG_SA1100_COLLIE)		+= pxa2xx_sharpsl.o
 sa1100_cs-$(CONFIG_SA1100_H3100)		+= sa1100_h3600.o
 sa1100_cs-$(CONFIG_SA1100_H3600)		+= sa1100_h3600.o
-sa1100_cs-$(CONFIG_SA1100_NANOENGINE)		+= sa1100_nanoengine.o
-sa1100_cs-$(CONFIG_SA1100_SHANNON)		+= sa1100_shannon.o
 sa1100_cs-$(CONFIG_SA1100_SIMPAD)		+= sa1100_simpad.o
 
 pxa2xx_cm_x2xx_cs-y				+= pxa2xx_cm_x2xx.o pxa2xx_cm_x255.o pxa2xx_cm_x270.o
diff --git a/drivers/pcmcia/sa1100_assabet.c b/drivers/pcmcia/sa1100_assabet.c
deleted file mode 100644
index 78ad2bba76db..000000000000
--- a/drivers/pcmcia/sa1100_assabet.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * drivers/pcmcia/sa1100_assabet.c
- *
- * PCMCIA implementation routines for Assabet
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/gpio.h>
-
-#include <asm/mach-types.h>
-#include <mach/assabet.h>
-
-#include "sa1100_generic.h"
-
-static int assabet_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
-{
-	skt->stat[SOC_STAT_CD].gpio = ASSABET_GPIO_CF_CD;
-	skt->stat[SOC_STAT_CD].name = "CF CD";
-	skt->stat[SOC_STAT_BVD1].gpio = ASSABET_GPIO_CF_BVD1;
-	skt->stat[SOC_STAT_BVD1].name = "CF BVD1";
-	skt->stat[SOC_STAT_BVD2].gpio = ASSABET_GPIO_CF_BVD2;
-	skt->stat[SOC_STAT_BVD2].name = "CF BVD2";
-	skt->stat[SOC_STAT_RDY].gpio = ASSABET_GPIO_CF_IRQ;
-	skt->stat[SOC_STAT_RDY].name = "CF RDY";
-
-	return 0;
-}
-
-static int
-assabet_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state)
-{
-	unsigned int mask;
-
-	switch (state->Vcc) {
-	case 0:
-		mask = 0;
-		break;
-
-	case 50:
-		printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V...\n",
-			__func__);
-
-	case 33:  /* Can only apply 3.3V to the CF slot. */
-		mask = ASSABET_BCR_CF_PWR;
-		break;
-
-	default:
-		printk(KERN_ERR "%s(): unrecognized Vcc %u\n", __func__,
-			state->Vcc);
-		return -1;
-	}
-
-	/* Silently ignore Vpp, speaker enable. */
-
-	if (state->flags & SS_RESET)
-		mask |= ASSABET_BCR_CF_RST;
-	if (!(state->flags & SS_OUTPUT_ENA))
-		mask |= ASSABET_BCR_CF_BUS_OFF;
-
-	ASSABET_BCR_frob(ASSABET_BCR_CF_RST | ASSABET_BCR_CF_PWR |
-			ASSABET_BCR_CF_BUS_OFF, mask);
-
-	return 0;
-}
-
-/*
- * Disable card status IRQs on suspend.
- */
-static void assabet_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt)
-{
-	/*
-	 * Tristate the CF bus signals.  Also assert CF
-	 * reset as per user guide page 4-11.
-	 */
-	ASSABET_BCR_set(ASSABET_BCR_CF_BUS_OFF | ASSABET_BCR_CF_RST);
-}
-
-static struct pcmcia_low_level assabet_pcmcia_ops = { 
-	.owner			= THIS_MODULE,
-	.hw_init		= assabet_pcmcia_hw_init,
-	.socket_state		= soc_common_cf_socket_state,
-	.configure_socket	= assabet_pcmcia_configure_socket,
-	.socket_suspend		= assabet_pcmcia_socket_suspend,
-};
-
-int pcmcia_assabet_init(struct device *dev)
-{
-	int ret = -ENODEV;
-
-	if (machine_is_assabet() && !machine_has_neponset())
-		ret = sa11xx_drv_pcmcia_probe(dev, &assabet_pcmcia_ops, 1, 1);
-
-	return ret;
-}
diff --git a/drivers/pcmcia/sa1100_cerf.c b/drivers/pcmcia/sa1100_cerf.c
deleted file mode 100644
index 2a54081d161d..000000000000
--- a/drivers/pcmcia/sa1100_cerf.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * drivers/pcmcia/sa1100_cerf.c
- *
- * PCMCIA implementation routines for CerfBoard
- * Based off the Assabet.
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/gpio.h>
-
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/irq.h>
-#include <mach/cerf.h>
-#include "sa1100_generic.h"
-
-#define CERF_SOCKET	1
-
-static int cerf_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
-{
-	int ret;
-
-	ret = gpio_request_one(CERF_GPIO_CF_RESET, GPIOF_OUT_INIT_LOW, "CF_RESET");
-	if (ret)
-		return ret;
-
-	skt->stat[SOC_STAT_CD].gpio = CERF_GPIO_CF_CD;
-	skt->stat[SOC_STAT_CD].name = "CF_CD";
-	skt->stat[SOC_STAT_BVD1].gpio = CERF_GPIO_CF_BVD1;
-	skt->stat[SOC_STAT_BVD1].name = "CF_BVD1";
-	skt->stat[SOC_STAT_BVD2].gpio = CERF_GPIO_CF_BVD2;
-	skt->stat[SOC_STAT_BVD2].name = "CF_BVD2";
-	skt->stat[SOC_STAT_RDY].gpio = CERF_GPIO_CF_IRQ;
-	skt->stat[SOC_STAT_RDY].name = "CF_IRQ";
-
-	return 0;
-}
-
-static void cerf_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
-{
-	gpio_free(CERF_GPIO_CF_RESET);
-}
-
-static int
-cerf_pcmcia_configure_socket(struct soc_pcmcia_socket *skt,
-			     const socket_state_t *state)
-{
-	switch (state->Vcc) {
-	case 0:
-	case 50:
-	case 33:
-		break;
-
-	default:
-		printk(KERN_ERR "%s(): unrecognized Vcc %u\n",
-			__func__, state->Vcc);
-		return -1;
-	}
-
-	gpio_set_value(CERF_GPIO_CF_RESET, !!(state->flags & SS_RESET));
-
-	return 0;
-}
-
-static struct pcmcia_low_level cerf_pcmcia_ops = { 
-	.owner			= THIS_MODULE,
-	.hw_init		= cerf_pcmcia_hw_init,
-	.hw_shutdown		= cerf_pcmcia_hw_shutdown,
-	.socket_state		= soc_common_cf_socket_state,
-	.configure_socket	= cerf_pcmcia_configure_socket,
-};
-
-int pcmcia_cerf_init(struct device *dev)
-{
-	int ret = -ENODEV;
-
-	if (machine_is_cerf())
-		ret = sa11xx_drv_pcmcia_probe(dev, &cerf_pcmcia_ops, CERF_SOCKET, 1);
-
-	return ret;
-}
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index 66acdc85727c..47b060c57418 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -31,7 +31,9 @@
 ======================================================================*/
 
 #include <linux/module.h>
+#include <linux/gpio/consumer.h>
 #include <linux/init.h>
+#include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 
@@ -41,24 +43,64 @@
 
 #include "sa1100_generic.h"
 
+static const char *sa11x0_cf_gpio_names[] = {
+	[SOC_STAT_CD] = "detect",
+	[SOC_STAT_BVD1] = "bvd1",
+	[SOC_STAT_BVD2] = "bvd2",
+	[SOC_STAT_RDY] = "ready",
+};
+
+static int sa11x0_cf_hw_init(struct soc_pcmcia_socket *skt)
+{
+	struct device *dev = skt->socket.dev.parent;
+	int i;
+
+	skt->gpio_reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(skt->gpio_reset))
+		return PTR_ERR(skt->gpio_reset);
+
+	skt->gpio_bus_enable = devm_gpiod_get_optional(dev, "bus-enable",
+						       GPIOD_OUT_HIGH);
+	if (IS_ERR(skt->gpio_bus_enable))
+		return PTR_ERR(skt->gpio_bus_enable);
+
+	skt->vcc.reg = devm_regulator_get_optional(dev, "vcc");
+	if (IS_ERR(skt->vcc.reg))
+		return PTR_ERR(skt->vcc.reg);
+
+	if (!skt->vcc.reg)
+		dev_warn(dev,
+			 "no Vcc regulator provided, ignoring Vcc controls\n");
+
+	for (i = 0; i < ARRAY_SIZE(sa11x0_cf_gpio_names); i++) {
+		skt->stat[i].name = sa11x0_cf_gpio_names[i];
+		skt->stat[i].desc = devm_gpiod_get_optional(dev,
+					sa11x0_cf_gpio_names[i], GPIOD_IN);
+		if (IS_ERR(skt->stat[i].desc))
+			return PTR_ERR(skt->stat[i].desc);
+	}
+	return 0;
+}
+
+static int sa11x0_cf_configure_socket(struct soc_pcmcia_socket *skt,
+	const socket_state_t *state)
+{
+	return soc_pcmcia_regulator_set(skt, &skt->vcc, state->Vcc);
+}
+
+static struct pcmcia_low_level sa11x0_cf_ops = {
+	.owner = THIS_MODULE,
+	.hw_init = sa11x0_cf_hw_init,
+	.socket_state = soc_common_cf_socket_state,
+	.configure_socket = sa11x0_cf_configure_socket,
+};
+
 int __init pcmcia_collie_init(struct device *dev);
 
-static int (*sa11x0_pcmcia_hw_init[])(struct device *dev) = {
-#ifdef CONFIG_SA1100_ASSABET
-	pcmcia_assabet_init,
-#endif
-#ifdef CONFIG_SA1100_CERF
-	pcmcia_cerf_init,
-#endif
+static int (*sa11x0_pcmcia_legacy_hw_init[])(struct device *dev) = {
 #if defined(CONFIG_SA1100_H3100) || defined(CONFIG_SA1100_H3600)
 	pcmcia_h3600_init,
 #endif
-#ifdef CONFIG_SA1100_NANOENGINE
-	pcmcia_nanoengine_init,
-#endif
-#ifdef CONFIG_SA1100_SHANNON
-	pcmcia_shannon_init,
-#endif
 #ifdef CONFIG_SA1100_SIMPAD
 	pcmcia_simpad_init,
 #endif
@@ -67,15 +109,15 @@ static int (*sa11x0_pcmcia_hw_init[])(struct device *dev) = {
 #endif
 };
 
-static int sa11x0_drv_pcmcia_probe(struct platform_device *dev)
+static int sa11x0_drv_pcmcia_legacy_probe(struct platform_device *dev)
 {
 	int i, ret = -ENODEV;
 
 	/*
 	 * Initialise any "on-board" PCMCIA sockets.
 	 */
-	for (i = 0; i < ARRAY_SIZE(sa11x0_pcmcia_hw_init); i++) {
-		ret = sa11x0_pcmcia_hw_init[i](&dev->dev);
+	for (i = 0; i < ARRAY_SIZE(sa11x0_pcmcia_legacy_hw_init); i++) {
+		ret = sa11x0_pcmcia_legacy_hw_init[i](&dev->dev);
 		if (ret == 0)
 			break;
 	}
@@ -83,7 +125,7 @@ static int sa11x0_drv_pcmcia_probe(struct platform_device *dev)
 	return ret;
 }
 
-static int sa11x0_drv_pcmcia_remove(struct platform_device *dev)
+static int sa11x0_drv_pcmcia_legacy_remove(struct platform_device *dev)
 {
 	struct skt_dev_info *sinfo = platform_get_drvdata(dev);
 	int i;
@@ -96,6 +138,45 @@ static int sa11x0_drv_pcmcia_remove(struct platform_device *dev)
 	return 0;
 }
 
+static int sa11x0_drv_pcmcia_probe(struct platform_device *pdev)
+{
+	struct soc_pcmcia_socket *skt;
+	struct device *dev = &pdev->dev;
+
+	if (pdev->id == -1)
+		return sa11x0_drv_pcmcia_legacy_probe(pdev);
+
+	skt = devm_kzalloc(dev, sizeof(*skt), GFP_KERNEL);
+	if (!skt)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, skt);
+
+	skt->nr = pdev->id;
+	skt->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(skt->clk))
+		return PTR_ERR(skt->clk);
+
+	sa11xx_drv_pcmcia_ops(&sa11x0_cf_ops);
+	soc_pcmcia_init_one(skt, &sa11x0_cf_ops, dev);
+
+	return sa11xx_drv_pcmcia_add_one(skt);
+}
+
+static int sa11x0_drv_pcmcia_remove(struct platform_device *dev)
+{
+	struct soc_pcmcia_socket *skt;
+
+	if (dev->id == -1)
+		return sa11x0_drv_pcmcia_legacy_remove(dev);
+
+	skt = platform_get_drvdata(dev);
+
+	soc_pcmcia_remove_one(skt);
+
+	return 0;
+}
+
 static struct platform_driver sa11x0_pcmcia_driver = {
 	.driver = {
 		.name		= "sa11x0-pcmcia",
diff --git a/drivers/pcmcia/sa1100_generic.h b/drivers/pcmcia/sa1100_generic.h
index a5f1f1dd63cb..7b7cdcd20187 100644
--- a/drivers/pcmcia/sa1100_generic.h
+++ b/drivers/pcmcia/sa1100_generic.h
@@ -6,18 +6,14 @@
  * Declaration for all machine specific init/exit functions.
  */
 extern int pcmcia_adsbitsy_init(struct device *);
-extern int pcmcia_assabet_init(struct device *);
 extern int pcmcia_badge4_init(struct device *);
-extern int pcmcia_cerf_init(struct device *);
 extern int pcmcia_flexanet_init(struct device *);
 extern int pcmcia_freebird_init(struct device *);
 extern int pcmcia_gcplus_init(struct device *);
 extern int pcmcia_graphicsmaster_init(struct device *);
 extern int pcmcia_h3600_init(struct device *);
-extern int pcmcia_nanoengine_init(struct device *);
 extern int pcmcia_pangolin_init(struct device *);
 extern int pcmcia_pfs168_init(struct device *);
-extern int pcmcia_shannon_init(struct device *);
 extern int pcmcia_simpad_init(struct device *);
 extern int pcmcia_stork_init(struct device *);
 extern int pcmcia_system3_init(struct device *);
diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c
index aebf9a66fdde..a91222bc3824 100644
--- a/drivers/pcmcia/sa1100_h3600.c
+++ b/drivers/pcmcia/sa1100_h3600.c
@@ -24,13 +24,15 @@ static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
 {
 	int err;
 
+	skt->stat[SOC_STAT_CD].name = skt->nr ? "pcmcia1-detect" : "pcmcia0-detect";
+	skt->stat[SOC_STAT_RDY].name = skt->nr ? "pcmcia1-ready" : "pcmcia0-ready";
+
+	err = soc_pcmcia_request_gpiods(skt);
+	if (err)
+		return err;
+
 	switch (skt->nr) {
 	case 0:
-		skt->stat[SOC_STAT_CD].gpio = H3XXX_GPIO_PCMCIA_CD0;
-		skt->stat[SOC_STAT_CD].name = "PCMCIA CD0";
-		skt->stat[SOC_STAT_RDY].gpio = H3XXX_GPIO_PCMCIA_IRQ0;
-		skt->stat[SOC_STAT_RDY].name = "PCMCIA IRQ0";
-
 		err = gpio_request(H3XXX_EGPIO_OPT_NVRAM_ON, "OPT NVRAM ON");
 		if (err)
 			goto err01;
@@ -57,10 +59,6 @@ static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
 			goto err06;
 		break;
 	case 1:
-		skt->stat[SOC_STAT_CD].gpio = H3XXX_GPIO_PCMCIA_CD1;
-		skt->stat[SOC_STAT_CD].name = "PCMCIA CD1";
-		skt->stat[SOC_STAT_RDY].gpio = H3XXX_GPIO_PCMCIA_IRQ1;
-		skt->stat[SOC_STAT_RDY].name = "PCMCIA IRQ1";
 		break;
 	}
 	return 0;
diff --git a/drivers/pcmcia/sa1100_nanoengine.c b/drivers/pcmcia/sa1100_nanoengine.c
deleted file mode 100644
index 35c30ff41e81..000000000000
--- a/drivers/pcmcia/sa1100_nanoengine.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * drivers/pcmcia/sa1100_nanoengine.c
- *
- * PCMCIA implementation routines for BSI nanoEngine.
- *
- * In order to have a fully functional pcmcia subsystem in a BSE nanoEngine
- * board you should carefully read this:
- * http://cambuca.ldhs.cetuc.puc-rio.br/nanoengine/
- *
- * Copyright (C) 2010 Marcelo Roberto Jimenez <mroberto@cpti.cetuc.puc-rio.br>
- *
- * Based on original work for kernel 2.4 by
- * Miguel Freitas <miguel@cpti.cetuc.puc-rio.br>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/gpio.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-
-#include <asm/mach-types.h>
-#include <asm/irq.h>
-
-#include <mach/hardware.h>
-#include <mach/nanoengine.h>
-
-#include "sa1100_generic.h"
-
-struct nanoengine_pins {
-	unsigned output_pins;
-	unsigned clear_outputs;
-	int gpio_rst;
-	int gpio_cd;
-	int gpio_rdy;
-};
-
-static struct nanoengine_pins nano_skts[] = {
-	{
-		.gpio_rst		= GPIO_PC_RESET0,
-		.gpio_cd		= GPIO_PC_CD0,
-		.gpio_rdy		= GPIO_PC_READY0,
-	}, {
-		.gpio_rst		= GPIO_PC_RESET1,
-		.gpio_cd		= GPIO_PC_CD1,
-		.gpio_rdy		= GPIO_PC_READY1,
-	}
-};
-
-unsigned num_nano_pcmcia_sockets = ARRAY_SIZE(nano_skts);
-
-static int nanoengine_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
-{
-	unsigned i = skt->nr;
-	int ret;
-
-	if (i >= num_nano_pcmcia_sockets)
-		return -ENXIO;
-
-	ret = gpio_request_one(nano_skts[i].gpio_rst, GPIOF_OUT_INIT_LOW,
-		i ? "PC RST1" : "PC RST0");
-	if (ret)
-		return ret;
-
-	skt->stat[SOC_STAT_CD].gpio = nano_skts[i].gpio_cd;
-	skt->stat[SOC_STAT_CD].name = i ? "PC CD1" : "PC CD0";
-	skt->stat[SOC_STAT_RDY].gpio = nano_skts[i].gpio_rdy;
-	skt->stat[SOC_STAT_RDY].name = i ? "PC RDY1" : "PC RDY0";
-
-	return 0;
-}
-
-static void nanoengine_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
-{
-	gpio_free(nano_skts[skt->nr].gpio_rst);
-}
-
-static int nanoengine_pcmcia_configure_socket(
-	struct soc_pcmcia_socket *skt, const socket_state_t *state)
-{
-	unsigned i = skt->nr;
-
-	if (i >= num_nano_pcmcia_sockets)
-		return -ENXIO;
-
-	gpio_set_value(nano_skts[skt->nr].gpio_rst, !!(state->flags & SS_RESET));
-
-	return 0;
-}
-
-static void nanoengine_pcmcia_socket_state(
-	struct soc_pcmcia_socket *skt, struct pcmcia_state *state)
-{
-	unsigned i = skt->nr;
-
-	if (i >= num_nano_pcmcia_sockets)
-		return;
-
-	state->bvd1 = 1;
-	state->bvd2 = 1;
-	state->vs_3v = 1; /* Can only apply 3.3V */
-	state->vs_Xv = 0;
-}
-
-static struct pcmcia_low_level nanoengine_pcmcia_ops = {
-	.owner			= THIS_MODULE,
-
-	.hw_init		= nanoengine_pcmcia_hw_init,
-	.hw_shutdown		= nanoengine_pcmcia_hw_shutdown,
-
-	.configure_socket	= nanoengine_pcmcia_configure_socket,
-	.socket_state		= nanoengine_pcmcia_socket_state,
-};
-
-int pcmcia_nanoengine_init(struct device *dev)
-{
-	int ret = -ENODEV;
-
-	if (machine_is_nanoengine())
-		ret = sa11xx_drv_pcmcia_probe(
-			dev, &nanoengine_pcmcia_ops, 0, 2);
-
-	return ret;
-}
-
diff --git a/drivers/pcmcia/sa1100_shannon.c b/drivers/pcmcia/sa1100_shannon.c
deleted file mode 100644
index 0e52a575986e..000000000000
--- a/drivers/pcmcia/sa1100_shannon.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * drivers/pcmcia/sa1100_shannon.c
- *
- * PCMCIA implementation routines for Shannon
- *
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <mach/shannon.h>
-#include <asm/irq.h>
-#include "sa1100_generic.h"
-
-static int shannon_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
-{
-	/* All those are inputs */
-	GAFR &= ~(GPIO_GPIO(SHANNON_GPIO_EJECT_0) |
-		  GPIO_GPIO(SHANNON_GPIO_EJECT_1) |
-		  GPIO_GPIO(SHANNON_GPIO_RDY_0) |
-		  GPIO_GPIO(SHANNON_GPIO_RDY_1));
-
-	if (skt->nr == 0) {
-		skt->stat[SOC_STAT_CD].gpio = SHANNON_GPIO_EJECT_0;
-		skt->stat[SOC_STAT_CD].name = "PCMCIA_CD_0";
-		skt->stat[SOC_STAT_RDY].gpio = SHANNON_GPIO_RDY_0;
-		skt->stat[SOC_STAT_RDY].name = "PCMCIA_RDY_0";
-	} else {
-		skt->stat[SOC_STAT_CD].gpio = SHANNON_GPIO_EJECT_1;
-		skt->stat[SOC_STAT_CD].name = "PCMCIA_CD_1";
-		skt->stat[SOC_STAT_RDY].gpio = SHANNON_GPIO_RDY_1;
-		skt->stat[SOC_STAT_RDY].name = "PCMCIA_RDY_1";
-	}
-
-	return 0;
-}
-
-static void
-shannon_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
-			    struct pcmcia_state *state)
-{
-	switch (skt->nr) {
-	case 0:
-		state->bvd1   = 1; 
-		state->bvd2   = 1; 
-		state->vs_3v  = 1; /* FIXME Can only apply 3.3V on Shannon. */
-		state->vs_Xv  = 0;
-		break;
-
-	case 1:
-		state->bvd1   = 1; 
-		state->bvd2   = 1; 
-		state->vs_3v  = 1; /* FIXME Can only apply 3.3V on Shannon. */
-		state->vs_Xv  = 0;
-		break;
-	}
-}
-
-static int
-shannon_pcmcia_configure_socket(struct soc_pcmcia_socket *skt,
-				const socket_state_t *state)
-{
-	switch (state->Vcc) {
-	case 0:	/* power off */
-		printk(KERN_WARNING "%s(): CS asked for 0V, still applying 3.3V..\n", __func__);
-		break;
-	case 50:
-		printk(KERN_WARNING "%s(): CS asked for 5V, applying 3.3V..\n", __func__);
-	case 33:
-		break;
-	default:
-		printk(KERN_ERR "%s(): unrecognized Vcc %u\n",
-		       __func__, state->Vcc);
-		return -1;
-	}
-
-	printk(KERN_WARNING "%s(): Warning, Can't perform reset\n", __func__);
-	
-	/* Silently ignore Vpp, output enable, speaker enable. */
-
-	return 0;
-}
-
-static struct pcmcia_low_level shannon_pcmcia_ops = {
-	.owner			= THIS_MODULE,
-	.hw_init		= shannon_pcmcia_hw_init,
-	.socket_state		= shannon_pcmcia_socket_state,
-	.configure_socket	= shannon_pcmcia_configure_socket,
-};
-
-int pcmcia_shannon_init(struct device *dev)
-{
-	int ret = -ENODEV;
-
-	if (machine_is_shannon())
-		ret = sa11xx_drv_pcmcia_probe(dev, &shannon_pcmcia_ops, 0, 2);
-
-	return ret;
-}
diff --git a/drivers/pcmcia/sa1100_simpad.c b/drivers/pcmcia/sa1100_simpad.c
index 7ce65bb23a8e..e235ee14eaa6 100644
--- a/drivers/pcmcia/sa1100_simpad.c
+++ b/drivers/pcmcia/sa1100_simpad.c
@@ -12,7 +12,6 @@
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
-#include <asm/irq.h>
 #include <mach/simpad.h>
 #include "sa1100_generic.h"
  
@@ -21,12 +20,10 @@ static int simpad_pcmcia_hw_init(struct soc_pcmcia_socket *skt)
 
 	simpad_clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1);
 
-	skt->stat[SOC_STAT_CD].gpio = GPIO_CF_CD;
-	skt->stat[SOC_STAT_CD].name = "CF_CD";
-	skt->stat[SOC_STAT_RDY].gpio = GPIO_CF_IRQ;
-	skt->stat[SOC_STAT_RDY].name = "CF_RDY";
+	skt->stat[SOC_STAT_CD].name = "cf-detect";
+	skt->stat[SOC_STAT_RDY].name = "cf-ready";
 
-	return 0;
+	return soc_pcmcia_request_gpiods(skt);
 }
 
 static void simpad_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt)
@@ -42,9 +39,6 @@ simpad_pcmcia_socket_state(struct soc_pcmcia_socket *skt,
 {
 	long cs3reg = simpad_get_cs3_ro();
 
-	/* the detect signal is inverted - fix that up here */
-	state->detect = !state->detect;
-
 	state->bvd1 = 1; /* Might be cs3reg & PCMCIA_BVD1 */
 	state->bvd2 = 1; /* Might be cs3reg & PCMCIA_BVD2 */
 
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index d8599736a41a..5c47f451e43b 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -1,33 +1,20 @@
-/*
- *  chromeos_laptop.c - Driver to instantiate Chromebook i2c/smbus devices.
- *
- *  Author : Benson Leung <bleung@chromium.org>
- *
- *  Copyright (C) 2012 Google, Inc.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+// Driver to instantiate Chromebook i2c/smbus devices.
+//
+// Copyright (C) 2012 Google, Inc.
+// Author: Benson Leung <bleung@chromium.org>
+
+#define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
 
 #include <linux/dmi.h>
 #include <linux/i2c.h>
-#include <linux/platform_data/atmel_mxt_ts.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
+#include <linux/ioport.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 
 #define ATMEL_TP_I2C_ADDR	0x4b
 #define ATMEL_TP_I2C_BL_ADDR	0x25
@@ -38,18 +25,11 @@
 #define ISL_ALS_I2C_ADDR	0x44
 #define TAOS_ALS_I2C_ADDR	0x29
 
-#define MAX_I2C_DEVICE_DEFERRALS	5
-
-static struct i2c_client *als;
-static struct i2c_client *tp;
-static struct i2c_client *ts;
-
 static const char *i2c_adapter_names[] = {
 	"SMBus I801 adapter",
 	"i915 gmbus vga",
 	"i915 gmbus panel",
 	"Synopsys DesignWare I2C adapter",
-	"Synopsys DesignWare I2C adapter",
 };
 
 /* Keep this enum consistent with i2c_adapter_names */
@@ -57,126 +37,41 @@ enum i2c_adapter_type {
 	I2C_ADAPTER_SMBUS = 0,
 	I2C_ADAPTER_VGADDC,
 	I2C_ADAPTER_PANEL,
-	I2C_ADAPTER_DESIGNWARE_0,
-	I2C_ADAPTER_DESIGNWARE_1,
-};
-
-enum i2c_peripheral_state {
-	UNPROBED = 0,
-	PROBED,
-	TIMEDOUT,
+	I2C_ADAPTER_DESIGNWARE,
 };
 
 struct i2c_peripheral {
-	int (*add)(enum i2c_adapter_type type);
-	enum i2c_adapter_type type;
-	enum i2c_peripheral_state state;
-	int tries;
-};
-
-#define MAX_I2C_PERIPHERALS 4
+	struct i2c_board_info board_info;
+	unsigned short alt_addr;
 
-struct chromeos_laptop {
-	struct i2c_peripheral i2c_peripherals[MAX_I2C_PERIPHERALS];
-};
-
-static struct chromeos_laptop *cros_laptop;
-
-static struct i2c_board_info cyapa_device = {
-	I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
-	.flags		= I2C_CLIENT_WAKE,
-};
+	const char *dmi_name;
+	unsigned long irqflags;
+	struct resource irq_resource;
 
-static struct i2c_board_info elantech_device = {
-	I2C_BOARD_INFO("elan_i2c", ELAN_TP_I2C_ADDR),
-	.flags		= I2C_CLIENT_WAKE,
-};
-
-static struct i2c_board_info isl_als_device = {
-	I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR),
-};
-
-static struct i2c_board_info tsl2583_als_device = {
-	I2C_BOARD_INFO("tsl2583", TAOS_ALS_I2C_ADDR),
-};
-
-static struct i2c_board_info tsl2563_als_device = {
-	I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR),
-};
-
-static int mxt_t19_keys[] = {
-	KEY_RESERVED,
-	KEY_RESERVED,
-	KEY_RESERVED,
-	KEY_RESERVED,
-	KEY_RESERVED,
-	BTN_LEFT
-};
-
-static struct mxt_platform_data atmel_224s_tp_platform_data = {
-	.irqflags		= IRQF_TRIGGER_FALLING,
-	.t19_num_keys		= ARRAY_SIZE(mxt_t19_keys),
-	.t19_keymap		= mxt_t19_keys,
-	.suspend_mode		= MXT_SUSPEND_T9_CTRL,
-};
+	enum i2c_adapter_type type;
+	u32 pci_devid;
 
-static struct i2c_board_info atmel_224s_tp_device = {
-	I2C_BOARD_INFO("atmel_mxt_tp", ATMEL_TP_I2C_ADDR),
-	.platform_data = &atmel_224s_tp_platform_data,
-	.flags		= I2C_CLIENT_WAKE,
+	struct i2c_client *client;
 };
 
-static struct mxt_platform_data atmel_1664s_platform_data = {
-	.irqflags		= IRQF_TRIGGER_FALLING,
-	.suspend_mode		= MXT_SUSPEND_T9_CTRL,
+struct chromeos_laptop {
+	/*
+	 * Note that we can't mark this pointer as const because
+	 * i2c_new_probed_device() changes passed in I2C board info, so.
+	 */
+	struct i2c_peripheral *i2c_peripherals;
+	unsigned int num_i2c_peripherals;
 };
 
-static struct i2c_board_info atmel_1664s_device = {
-	I2C_BOARD_INFO("atmel_mxt_ts", ATMEL_TS_I2C_ADDR),
-	.platform_data = &atmel_1664s_platform_data,
-	.flags		= I2C_CLIENT_WAKE,
-};
+static const struct chromeos_laptop *cros_laptop;
 
-static struct i2c_client *__add_probed_i2c_device(
-		const char *name,
-		int bus,
-		struct i2c_board_info *info,
-		const unsigned short *alt_addr_list)
+static struct i2c_client *
+chromes_laptop_instantiate_i2c_device(struct i2c_adapter *adapter,
+				      struct i2c_board_info *info,
+				      unsigned short alt_addr)
 {
-	const struct dmi_device *dmi_dev;
-	const struct dmi_dev_onboard *dev_data;
-	struct i2c_adapter *adapter;
-	struct i2c_client *client = NULL;
 	const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END };
-
-	if (bus < 0)
-		return NULL;
-	/*
-	 * If a name is specified, look for irq platform information stashed
-	 * in DMI_DEV_TYPE_DEV_ONBOARD by the Chrome OS custom system firmware.
-	 */
-	if (name) {
-		dmi_dev = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD, name, NULL);
-		if (!dmi_dev) {
-			pr_err("%s failed to dmi find device %s.\n",
-			       __func__,
-			       name);
-			return NULL;
-		}
-		dev_data = (struct dmi_dev_onboard *)dmi_dev->device_data;
-		if (!dev_data) {
-			pr_err("%s failed to get data from dmi for %s.\n",
-			       __func__, name);
-			return NULL;
-		}
-		info->irq = dev_data->instance;
-	}
-
-	adapter = i2c_get_adapter(bus);
-	if (!adapter) {
-		pr_err("%s failed to get i2c adapter %d.\n", __func__, bus);
-		return NULL;
-	}
+	struct i2c_client *client;
 
 	/*
 	 * Add the i2c device. If we can't detect it at the primary
@@ -184,339 +79,345 @@ static struct i2c_client *__add_probed_i2c_device(
 	 * structure gets assigned primary address.
 	 */
 	client = i2c_new_probed_device(adapter, info, addr_list, NULL);
-	if (!client && alt_addr_list) {
+	if (!client && alt_addr) {
 		struct i2c_board_info dummy_info = {
 			I2C_BOARD_INFO("dummy", info->addr),
 		};
+		const unsigned short alt_addr_list[] = {
+			alt_addr, I2C_CLIENT_END
+		};
 		struct i2c_client *dummy;
 
 		dummy = i2c_new_probed_device(adapter, &dummy_info,
 					      alt_addr_list, NULL);
 		if (dummy) {
-			pr_debug("%s %d-%02x is probed at %02x\n",
-				  __func__, bus, info->addr, dummy->addr);
+			pr_debug("%d-%02x is probed at %02x\n",
+				 adapter->nr, info->addr, dummy->addr);
 			i2c_unregister_device(dummy);
 			client = i2c_new_device(adapter, info);
 		}
 	}
 
 	if (!client)
-		pr_notice("%s failed to register device %d-%02x\n",
-			  __func__, bus, info->addr);
+		pr_debug("failed to register device %d-%02x\n",
+			 adapter->nr, info->addr);
 	else
-		pr_debug("%s added i2c device %d-%02x\n",
-			 __func__, bus, info->addr);
+		pr_debug("added i2c device %d-%02x\n",
+			 adapter->nr, info->addr);
 
-	i2c_put_adapter(adapter);
 	return client;
 }
 
-struct i2c_lookup {
-	const char *name;
-	int instance;
-	int n;
-};
-
-static int __find_i2c_adap(struct device *dev, void *data)
-{
-	struct i2c_lookup *lookup = data;
-	static const char *prefix = "i2c-";
-	struct i2c_adapter *adapter;
-
-	if (strncmp(dev_name(dev), prefix, strlen(prefix)) != 0)
-		return 0;
-	adapter = to_i2c_adapter(dev);
-	if (strncmp(adapter->name, lookup->name, strlen(lookup->name)) == 0 &&
-	    lookup->n++ == lookup->instance)
-		return 1;
-	return 0;
-}
-
-static int find_i2c_adapter_num(enum i2c_adapter_type type)
-{
-	struct device *dev = NULL;
-	struct i2c_adapter *adapter;
-	struct i2c_lookup lookup;
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.name = i2c_adapter_names[type];
-	lookup.instance = (type == I2C_ADAPTER_DESIGNWARE_1) ? 1 : 0;
-
-	/* find the adapter by name */
-	dev = bus_find_device(&i2c_bus_type, NULL, &lookup, __find_i2c_adap);
-	if (!dev) {
-		/* Adapters may appear later. Deferred probing will retry */
-		pr_notice("%s: i2c adapter %s not found on system.\n", __func__,
-			  lookup.name);
-		return -ENODEV;
-	}
-	adapter = to_i2c_adapter(dev);
-	return adapter->nr;
-}
-
-/*
- * Takes a list of addresses in addrs as such :
- * { addr1, ... , addrn, I2C_CLIENT_END };
- * add_probed_i2c_device will use i2c_new_probed_device
- * and probe for devices at all of the addresses listed.
- * Returns NULL if no devices found.
- * See Documentation/i2c/instantiating-devices for more information.
- */
-static struct i2c_client *add_probed_i2c_device(
-		const char *name,
-		enum i2c_adapter_type type,
-		struct i2c_board_info *info,
-		const unsigned short *addrs)
+static bool chromeos_laptop_match_adapter_devid(struct device *dev, u32 devid)
 {
-	return __add_probed_i2c_device(name,
-				       find_i2c_adapter_num(type),
-				       info,
-				       addrs);
-}
+	struct pci_dev *pdev;
 
-/*
- * Probes for a device at a single address, the one provided by
- * info->addr.
- * Returns NULL if no device found.
- */
-static struct i2c_client *add_i2c_device(const char *name,
-						enum i2c_adapter_type type,
-						struct i2c_board_info *info)
-{
-	return __add_probed_i2c_device(name,
-				       find_i2c_adapter_num(type),
-				       info,
-				       NULL);
-}
-
-static int setup_cyapa_tp(enum i2c_adapter_type type)
-{
-	if (tp)
-		return 0;
+	if (!dev_is_pci(dev))
+		return false;
 
-	/* add cyapa touchpad */
-	tp = add_i2c_device("trackpad", type, &cyapa_device);
-	return (!tp) ? -EAGAIN : 0;
+	pdev = to_pci_dev(dev);
+	return devid == PCI_DEVID(pdev->bus->number, pdev->devfn);
 }
 
-static int setup_atmel_224s_tp(enum i2c_adapter_type type)
+static void chromeos_laptop_check_adapter(struct i2c_adapter *adapter)
 {
-	const unsigned short addr_list[] = { ATMEL_TP_I2C_BL_ADDR,
-					     I2C_CLIENT_END };
-	if (tp)
-		return 0;
-
-	/* add atmel mxt touchpad */
-	tp = add_probed_i2c_device("trackpad", type,
-				   &atmel_224s_tp_device, addr_list);
-	return (!tp) ? -EAGAIN : 0;
-}
+	struct i2c_peripheral *i2c_dev;
+	int i;
 
-static int setup_elantech_tp(enum i2c_adapter_type type)
-{
-	if (tp)
-		return 0;
+	for (i = 0; i < cros_laptop->num_i2c_peripherals; i++) {
+		i2c_dev = &cros_laptop->i2c_peripherals[i];
 
-	/* add elantech touchpad */
-	tp = add_i2c_device("trackpad", type, &elantech_device);
-	return (!tp) ? -EAGAIN : 0;
-}
+		/* Skip devices already created */
+		if (i2c_dev->client)
+			continue;
 
-static int setup_atmel_1664s_ts(enum i2c_adapter_type type)
-{
-	const unsigned short addr_list[] = { ATMEL_TS_I2C_BL_ADDR,
-					     I2C_CLIENT_END };
-	if (ts)
-		return 0;
-
-	/* add atmel mxt touch device */
-	ts = add_probed_i2c_device("touchscreen", type,
-				   &atmel_1664s_device, addr_list);
-	return (!ts) ? -EAGAIN : 0;
-}
+		if (strncmp(adapter->name, i2c_adapter_names[i2c_dev->type],
+			    strlen(i2c_adapter_names[i2c_dev->type])))
+			continue;
 
-static int setup_isl29018_als(enum i2c_adapter_type type)
-{
-	if (als)
-		return 0;
+		if (i2c_dev->pci_devid &&
+		    !chromeos_laptop_match_adapter_devid(adapter->dev.parent,
+							 i2c_dev->pci_devid)) {
+			continue;
+		}
 
-	/* add isl29018 light sensor */
-	als = add_i2c_device("lightsensor", type, &isl_als_device);
-	return (!als) ? -EAGAIN : 0;
+		i2c_dev->client =
+			chromes_laptop_instantiate_i2c_device(adapter,
+							&i2c_dev->board_info,
+							i2c_dev->alt_addr);
+	}
 }
 
-static int setup_tsl2583_als(enum i2c_adapter_type type)
+static void chromeos_laptop_detach_i2c_client(struct i2c_client *client)
 {
-	if (als)
-		return 0;
-
-	/* add tsl2583 light sensor */
-	als = add_i2c_device(NULL, type, &tsl2583_als_device);
-	return (!als) ? -EAGAIN : 0;
-}
+	struct i2c_peripheral *i2c_dev;
+	int i;
 
-static int setup_tsl2563_als(enum i2c_adapter_type type)
-{
-	if (als)
-		return 0;
+	for (i = 0; i < cros_laptop->num_i2c_peripherals; i++) {
+		i2c_dev = &cros_laptop->i2c_peripherals[i];
 
-	/* add tsl2563 light sensor */
-	als = add_i2c_device(NULL, type, &tsl2563_als_device);
-	return (!als) ? -EAGAIN : 0;
+		if (i2c_dev->client == client)
+			i2c_dev->client = NULL;
+	}
 }
 
-static int __init chromeos_laptop_dmi_matched(const struct dmi_system_id *id)
+static int chromeos_laptop_i2c_notifier_call(struct notifier_block *nb,
+					     unsigned long action, void *data)
 {
-	cros_laptop = (void *)id->driver_data;
-	pr_debug("DMI Matched %s.\n", id->ident);
+	struct device *dev = data;
+
+	switch (action) {
+	case BUS_NOTIFY_ADD_DEVICE:
+		if (dev->type == &i2c_adapter_type)
+			chromeos_laptop_check_adapter(to_i2c_adapter(dev));
+		break;
+
+	case BUS_NOTIFY_REMOVED_DEVICE:
+		if (dev->type == &i2c_client_type)
+			chromeos_laptop_detach_i2c_client(to_i2c_client(dev));
+		break;
+	}
 
-	/* Indicate to dmi_scan that processing is done. */
-	return 1;
+	return 0;
 }
 
-static int chromeos_laptop_probe(struct platform_device *pdev)
-{
-	int i;
-	int ret = 0;
-
-	for (i = 0; i < MAX_I2C_PERIPHERALS; i++) {
-		struct i2c_peripheral *i2c_dev;
-
-		i2c_dev = &cros_laptop->i2c_peripherals[i];
-
-		/* No more peripherals. */
-		if (i2c_dev->add == NULL)
-			break;
-
-		if (i2c_dev->state == TIMEDOUT || i2c_dev->state == PROBED)
-			continue;
-
-		/*
-		 * Check that the i2c adapter is present.
-		 * -EPROBE_DEFER if missing as the adapter may appear much
-		 * later.
-		 */
-		if (find_i2c_adapter_num(i2c_dev->type) == -ENODEV) {
-			ret = -EPROBE_DEFER;
-			continue;
-		}
-
-		/* Add the device. */
-		if (i2c_dev->add(i2c_dev->type) == -EAGAIN) {
-			/*
-			 * Set -EPROBE_DEFER a limited num of times
-			 * if device is not successfully added.
-			 */
-			if (++i2c_dev->tries < MAX_I2C_DEVICE_DEFERRALS) {
-				ret = -EPROBE_DEFER;
-			} else {
-				/* Ran out of tries. */
-				pr_notice("%s: Ran out of tries for device.\n",
-					  __func__);
-				i2c_dev->state = TIMEDOUT;
-			}
-		} else {
-			i2c_dev->state = PROBED;
-		}
-	}
+static struct notifier_block chromeos_laptop_i2c_notifier = {
+	.notifier_call = chromeos_laptop_i2c_notifier_call,
+};
 
-	return ret;
+#define DECLARE_CROS_LAPTOP(_name)					\
+static const struct chromeos_laptop _name __initconst = {		\
+	.i2c_peripherals	= _name##_peripherals,			\
+	.num_i2c_peripherals	= ARRAY_SIZE(_name##_peripherals),	\
 }
 
-static struct chromeos_laptop samsung_series_5_550 = {
-	.i2c_peripherals = {
-		/* Touchpad. */
-		{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
-		/* Light Sensor. */
-		{ .add = setup_isl29018_als, I2C_ADAPTER_SMBUS },
+static struct i2c_peripheral samsung_series_5_550_peripherals[] __initdata = {
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_SMBUS,
+	},
+	/* Light Sensor. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR),
+		},
+		.dmi_name	= "lightsensor",
+		.type		= I2C_ADAPTER_SMBUS,
 	},
 };
+DECLARE_CROS_LAPTOP(samsung_series_5_550);
 
-static struct chromeos_laptop samsung_series_5 = {
-	.i2c_peripherals = {
-		/* Light Sensor. */
-		{ .add = setup_tsl2583_als, I2C_ADAPTER_SMBUS },
+static struct i2c_peripheral samsung_series_5_peripherals[] __initdata = {
+	/* Light Sensor. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("tsl2583", TAOS_ALS_I2C_ADDR),
+		},
+		.type		= I2C_ADAPTER_SMBUS,
 	},
 };
+DECLARE_CROS_LAPTOP(samsung_series_5);
 
-static struct chromeos_laptop chromebook_pixel = {
-	.i2c_peripherals = {
-		/* Touch Screen. */
-		{ .add = setup_atmel_1664s_ts, I2C_ADAPTER_PANEL },
-		/* Touchpad. */
-		{ .add = setup_atmel_224s_tp, I2C_ADAPTER_VGADDC },
-		/* Light Sensor. */
-		{ .add = setup_isl29018_als, I2C_ADAPTER_PANEL },
-	},
+static const int chromebook_pixel_tp_keys[] __initconst = {
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	BTN_LEFT
 };
 
-static struct chromeos_laptop hp_chromebook_14 = {
-	.i2c_peripherals = {
-		/* Touchpad. */
-		{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
+static const struct property_entry
+chromebook_pixel_trackpad_props[] __initconst = {
+	PROPERTY_ENTRY_U32_ARRAY("linux,gpio-keymap", chromebook_pixel_tp_keys),
+	{ }
+};
+
+static struct i2c_peripheral chromebook_pixel_peripherals[] __initdata = {
+	/* Touch Screen. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("atmel_mxt_ts",
+					ATMEL_TS_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "touchscreen",
+		.irqflags	= IRQF_TRIGGER_FALLING,
+		.type		= I2C_ADAPTER_PANEL,
+		.alt_addr	= ATMEL_TS_I2C_BL_ADDR,
+	},
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("atmel_mxt_tp",
+					ATMEL_TP_I2C_ADDR),
+			.properties	=
+				chromebook_pixel_trackpad_props,
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.irqflags	= IRQF_TRIGGER_FALLING,
+		.type		= I2C_ADAPTER_VGADDC,
+		.alt_addr	= ATMEL_TP_I2C_BL_ADDR,
+	},
+	/* Light Sensor. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR),
+		},
+		.dmi_name	= "lightsensor",
+		.type		= I2C_ADAPTER_PANEL,
 	},
 };
+DECLARE_CROS_LAPTOP(chromebook_pixel);
 
-static struct chromeos_laptop dell_chromebook_11 = {
-	.i2c_peripherals = {
-		/* Touchpad. */
-		{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
-		/* Elan Touchpad option. */
-		{ .add = setup_elantech_tp, I2C_ADAPTER_DESIGNWARE_0 },
+static struct i2c_peripheral hp_chromebook_14_peripherals[] __initdata = {
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_DESIGNWARE,
 	},
 };
+DECLARE_CROS_LAPTOP(hp_chromebook_14);
 
-static struct chromeos_laptop toshiba_cb35 = {
-	.i2c_peripherals = {
-		/* Touchpad. */
-		{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
+static struct i2c_peripheral dell_chromebook_11_peripherals[] __initdata = {
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_DESIGNWARE,
+	},
+	/* Elan Touchpad option. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("elan_i2c", ELAN_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_DESIGNWARE,
 	},
 };
+DECLARE_CROS_LAPTOP(dell_chromebook_11);
 
-static struct chromeos_laptop acer_c7_chromebook = {
-	.i2c_peripherals = {
-		/* Touchpad. */
-		{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
+static struct i2c_peripheral toshiba_cb35_peripherals[] __initdata = {
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_DESIGNWARE,
 	},
 };
+DECLARE_CROS_LAPTOP(toshiba_cb35);
 
-static struct chromeos_laptop acer_ac700 = {
-	.i2c_peripherals = {
-		/* Light Sensor. */
-		{ .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
+static struct i2c_peripheral acer_c7_chromebook_peripherals[] __initdata = {
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_SMBUS,
 	},
 };
+DECLARE_CROS_LAPTOP(acer_c7_chromebook);
 
-static struct chromeos_laptop acer_c720 = {
-	.i2c_peripherals = {
-		/* Touchscreen. */
-		{ .add = setup_atmel_1664s_ts, I2C_ADAPTER_DESIGNWARE_1 },
-		/* Touchpad. */
-		{ .add = setup_cyapa_tp, I2C_ADAPTER_DESIGNWARE_0 },
-		/* Elan Touchpad option. */
-		{ .add = setup_elantech_tp, I2C_ADAPTER_DESIGNWARE_0 },
-		/* Light Sensor. */
-		{ .add = setup_isl29018_als, I2C_ADAPTER_DESIGNWARE_1 },
+static struct i2c_peripheral acer_ac700_peripherals[] __initdata = {
+	/* Light Sensor. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("tsl2583", TAOS_ALS_I2C_ADDR),
+		},
+		.type		= I2C_ADAPTER_SMBUS,
 	},
 };
+DECLARE_CROS_LAPTOP(acer_ac700);
 
-static struct chromeos_laptop hp_pavilion_14_chromebook = {
-	.i2c_peripherals = {
-		/* Touchpad. */
-		{ .add = setup_cyapa_tp, I2C_ADAPTER_SMBUS },
+static struct i2c_peripheral acer_c720_peripherals[] __initdata = {
+	/* Touchscreen. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("atmel_mxt_ts",
+					ATMEL_TS_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "touchscreen",
+		.irqflags	= IRQF_TRIGGER_FALLING,
+		.type		= I2C_ADAPTER_DESIGNWARE,
+		.pci_devid	= PCI_DEVID(0, PCI_DEVFN(0x15, 0x2)),
+		.alt_addr	= ATMEL_TS_I2C_BL_ADDR,
+	},
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_DESIGNWARE,
+		.pci_devid	= PCI_DEVID(0, PCI_DEVFN(0x15, 0x1)),
+	},
+	/* Elan Touchpad option. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("elan_i2c", ELAN_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_DESIGNWARE,
+		.pci_devid	= PCI_DEVID(0, PCI_DEVFN(0x15, 0x1)),
+	},
+	/* Light Sensor. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR),
+		},
+		.dmi_name	= "lightsensor",
+		.type		= I2C_ADAPTER_DESIGNWARE,
+		.pci_devid	= PCI_DEVID(0, PCI_DEVFN(0x15, 0x2)),
 	},
 };
+DECLARE_CROS_LAPTOP(acer_c720);
 
-static struct chromeos_laptop cr48 = {
-	.i2c_peripherals = {
-		/* Light Sensor. */
-		{ .add = setup_tsl2563_als, I2C_ADAPTER_SMBUS },
+static struct i2c_peripheral
+hp_pavilion_14_chromebook_peripherals[] __initdata = {
+	/* Touchpad. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR),
+			.flags		= I2C_CLIENT_WAKE,
+		},
+		.dmi_name	= "trackpad",
+		.type		= I2C_ADAPTER_SMBUS,
 	},
 };
+DECLARE_CROS_LAPTOP(hp_pavilion_14_chromebook);
 
-#define _CBDD(board_) \
-	.callback = chromeos_laptop_dmi_matched, \
-	.driver_data = (void *)&board_
+static struct i2c_peripheral cr48_peripherals[] __initdata = {
+	/* Light Sensor. */
+	{
+		.board_info	= {
+			I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR),
+		},
+		.type		= I2C_ADAPTER_SMBUS,
+	},
+};
+DECLARE_CROS_LAPTOP(cr48);
 
 static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
 	{
@@ -525,14 +426,14 @@ static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Lumpy"),
 		},
-		_CBDD(samsung_series_5_550),
+		.driver_data = (void *)&samsung_series_5_550,
 	},
 	{
 		.ident = "Samsung Series 5",
 		.matches = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Alex"),
 		},
-		_CBDD(samsung_series_5),
+		.driver_data = (void *)&samsung_series_5,
 	},
 	{
 		.ident = "Chromebook Pixel",
@@ -540,7 +441,7 @@ static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Link"),
 		},
-		_CBDD(chromebook_pixel),
+		.driver_data = (void *)&chromebook_pixel,
 	},
 	{
 		.ident = "Wolf",
@@ -548,7 +449,7 @@ static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_BIOS_VENDOR, "coreboot"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Wolf"),
 		},
-		_CBDD(dell_chromebook_11),
+		.driver_data = (void *)&dell_chromebook_11,
 	},
 	{
 		.ident = "HP Chromebook 14",
@@ -556,7 +457,7 @@ static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_BIOS_VENDOR, "coreboot"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Falco"),
 		},
-		_CBDD(hp_chromebook_14),
+		.driver_data = (void *)&hp_chromebook_14,
 	},
 	{
 		.ident = "Toshiba CB35",
@@ -564,99 +465,214 @@ static const struct dmi_system_id chromeos_laptop_dmi_table[] __initconst = {
 			DMI_MATCH(DMI_BIOS_VENDOR, "coreboot"),
 			DMI_MATCH(DMI_PRODUCT_NAME, "Leon"),
 		},
-		_CBDD(toshiba_cb35),
+		.driver_data = (void *)&toshiba_cb35,
 	},
 	{
 		.ident = "Acer C7 Chromebook",
 		.matches = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Parrot"),
 		},
-		_CBDD(acer_c7_chromebook),
+		.driver_data = (void *)&acer_c7_chromebook,
 	},
 	{
 		.ident = "Acer AC700",
 		.matches = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"),
 		},
-		_CBDD(acer_ac700),
+		.driver_data = (void *)&acer_ac700,
 	},
 	{
 		.ident = "Acer C720",
 		.matches = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Peppy"),
 		},
-		_CBDD(acer_c720),
+		.driver_data = (void *)&acer_c720,
 	},
 	{
 		.ident = "HP Pavilion 14 Chromebook",
 		.matches = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Butterfly"),
 		},
-		_CBDD(hp_pavilion_14_chromebook),
+		.driver_data = (void *)&hp_pavilion_14_chromebook,
 	},
 	{
 		.ident = "Cr-48",
 		.matches = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Mario"),
 		},
-		_CBDD(cr48),
+		.driver_data = (void *)&cr48,
 	},
 	{ }
 };
 MODULE_DEVICE_TABLE(dmi, chromeos_laptop_dmi_table);
 
-static struct platform_device *cros_platform_device;
+static int __init chromeos_laptop_scan_adapter(struct device *dev, void *data)
+{
+	struct i2c_adapter *adapter;
 
-static struct platform_driver cros_platform_driver = {
-	.driver = {
-		.name = "chromeos_laptop",
-	},
-	.probe = chromeos_laptop_probe,
-};
+	adapter = i2c_verify_adapter(dev);
+	if (adapter)
+		chromeos_laptop_check_adapter(adapter);
+
+	return 0;
+}
+
+static int __init chromeos_laptop_get_irq_from_dmi(const char *dmi_name)
+{
+	const struct dmi_device *dmi_dev;
+	const struct dmi_dev_onboard *dev_data;
+
+	dmi_dev = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD, dmi_name, NULL);
+	if (!dmi_dev) {
+		pr_err("failed to find DMI device '%s'\n", dmi_name);
+		return -ENOENT;
+	}
+
+	dev_data = dmi_dev->device_data;
+	if (!dev_data) {
+		pr_err("failed to get data from DMI for '%s'\n", dmi_name);
+		return -EINVAL;
+	}
+
+	return dev_data->instance;
+}
+
+static int __init chromeos_laptop_setup_irq(struct i2c_peripheral *i2c_dev)
+{
+	int irq;
+
+	if (i2c_dev->dmi_name) {
+		irq = chromeos_laptop_get_irq_from_dmi(i2c_dev->dmi_name);
+		if (irq < 0)
+			return irq;
+
+		i2c_dev->irq_resource  = (struct resource)
+			DEFINE_RES_NAMED(irq, 1, NULL,
+					 IORESOURCE_IRQ | i2c_dev->irqflags);
+		i2c_dev->board_info.resources = &i2c_dev->irq_resource;
+		i2c_dev->board_info.num_resources = 1;
+	}
+
+	return 0;
+}
+
+static struct chromeos_laptop * __init
+chromeos_laptop_prepare(const struct chromeos_laptop *src)
+{
+	struct chromeos_laptop *cros_laptop;
+	struct i2c_peripheral *i2c_dev;
+	struct i2c_board_info *info;
+	int error;
+	int i;
+
+	cros_laptop = kzalloc(sizeof(*cros_laptop), GFP_KERNEL);
+	if (!cros_laptop)
+		return ERR_PTR(-ENOMEM);
+
+	cros_laptop->i2c_peripherals = kmemdup(src->i2c_peripherals,
+					       src->num_i2c_peripherals *
+						sizeof(*src->i2c_peripherals),
+					       GFP_KERNEL);
+	if (!cros_laptop->i2c_peripherals) {
+		error = -ENOMEM;
+		goto err_free_cros_laptop;
+	}
+
+	cros_laptop->num_i2c_peripherals = src->num_i2c_peripherals;
+
+	for (i = 0; i < cros_laptop->num_i2c_peripherals; i++) {
+		i2c_dev = &cros_laptop->i2c_peripherals[i];
+		info = &i2c_dev->board_info;
+
+		error = chromeos_laptop_setup_irq(i2c_dev);
+		if (error)
+			goto err_destroy_cros_peripherals;
+
+		/* We need to deep-copy properties */
+		if (info->properties) {
+			info->properties =
+				property_entries_dup(info->properties);
+			if (IS_ERR(info->properties)) {
+				error = PTR_ERR(info->properties);
+				goto err_destroy_cros_peripherals;
+			}
+		}
+	}
+
+	return cros_laptop;
+
+err_destroy_cros_peripherals:
+	while (--i >= 0) {
+		i2c_dev = &cros_laptop->i2c_peripherals[i];
+		info = &i2c_dev->board_info;
+		if (info->properties)
+			property_entries_free(info->properties);
+	}
+	kfree(cros_laptop->i2c_peripherals);
+err_free_cros_laptop:
+	kfree(cros_laptop);
+	return ERR_PTR(error);
+}
+
+static void chromeos_laptop_destroy(const struct chromeos_laptop *cros_laptop)
+{
+	struct i2c_peripheral *i2c_dev;
+	struct i2c_board_info *info;
+	int i;
+
+	for (i = 0; i < cros_laptop->num_i2c_peripherals; i++) {
+		i2c_dev = &cros_laptop->i2c_peripherals[i];
+		info = &i2c_dev->board_info;
+
+		if (i2c_dev->client)
+			i2c_unregister_device(i2c_dev->client);
+
+		if (info->properties)
+			property_entries_free(info->properties);
+	}
+
+	kfree(cros_laptop->i2c_peripherals);
+	kfree(cros_laptop);
+}
 
 static int __init chromeos_laptop_init(void)
 {
-	int ret;
+	const struct dmi_system_id *dmi_id;
+	int error;
 
-	if (!dmi_check_system(chromeos_laptop_dmi_table)) {
-		pr_debug("%s unsupported system.\n", __func__);
+	dmi_id = dmi_first_match(chromeos_laptop_dmi_table);
+	if (!dmi_id) {
+		pr_debug("unsupported system\n");
 		return -ENODEV;
 	}
 
-	ret = platform_driver_register(&cros_platform_driver);
-	if (ret)
-		return ret;
+	pr_debug("DMI Matched %s\n", dmi_id->ident);
+
+	cros_laptop = chromeos_laptop_prepare((void *)dmi_id->driver_data);
+	if (IS_ERR(cros_laptop))
+		return PTR_ERR(cros_laptop);
 
-	cros_platform_device = platform_device_alloc("chromeos_laptop", -1);
-	if (!cros_platform_device) {
-		ret = -ENOMEM;
-		goto fail_platform_device1;
+	error = bus_register_notifier(&i2c_bus_type,
+				      &chromeos_laptop_i2c_notifier);
+	if (error) {
+		pr_err("failed to register i2c bus notifier: %d\n", error);
+		chromeos_laptop_destroy(cros_laptop);
+		return error;
 	}
 
-	ret = platform_device_add(cros_platform_device);
-	if (ret)
-		goto fail_platform_device2;
+	/*
+	 * Scan adapters that have been registered before we installed
+	 * the notifier to make sure we do not miss any devices.
+	 */
+	i2c_for_each_dev(NULL, chromeos_laptop_scan_adapter);
 
 	return 0;
-
-fail_platform_device2:
-	platform_device_put(cros_platform_device);
-fail_platform_device1:
-	platform_driver_unregister(&cros_platform_driver);
-	return ret;
 }
 
 static void __exit chromeos_laptop_exit(void)
 {
-	if (als)
-		i2c_unregister_device(als);
-	if (tp)
-		i2c_unregister_device(tp);
-	if (ts)
-		i2c_unregister_device(ts);
-
-	platform_device_unregister(cros_platform_device);
-	platform_driver_unregister(&cros_platform_driver);
+	bus_unregister_notifier(&i2c_bus_type, &chromeos_laptop_i2c_notifier);
+	chromeos_laptop_destroy(cros_laptop);
 }
 
 module_init(chromeos_laptop_init);
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 0e88e18362c1..cc265ed8deb7 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -211,6 +211,58 @@ static int cros_ec_console_log_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static ssize_t cros_ec_pdinfo_read(struct file *file,
+				   char __user *user_buf,
+				   size_t count,
+				   loff_t *ppos)
+{
+	char read_buf[EC_USB_PD_MAX_PORTS * 40], *p = read_buf;
+	struct cros_ec_debugfs *debug_info = file->private_data;
+	struct cros_ec_device *ec_dev = debug_info->ec->ec_dev;
+	struct {
+		struct cros_ec_command msg;
+		union {
+			struct ec_response_usb_pd_control_v1 resp;
+			struct ec_params_usb_pd_control params;
+		};
+	} __packed ec_buf;
+	struct cros_ec_command *msg;
+	struct ec_response_usb_pd_control_v1 *resp;
+	struct ec_params_usb_pd_control *params;
+	int i;
+
+	msg = &ec_buf.msg;
+	params = (struct ec_params_usb_pd_control *)msg->data;
+	resp = (struct ec_response_usb_pd_control_v1 *)msg->data;
+
+	msg->command = EC_CMD_USB_PD_CONTROL;
+	msg->version = 1;
+	msg->insize = sizeof(*resp);
+	msg->outsize = sizeof(*params);
+
+	/*
+	 * Read status from all PD ports until failure, typically caused
+	 * by attempting to read status on a port that doesn't exist.
+	 */
+	for (i = 0; i < EC_USB_PD_MAX_PORTS; ++i) {
+		params->port = i;
+		params->role = 0;
+		params->mux = 0;
+		params->swap = 0;
+
+		if (cros_ec_cmd_xfer_status(ec_dev, msg) < 0)
+			break;
+
+		p += scnprintf(p, sizeof(read_buf) + read_buf - p,
+			       "p%d: %s en:%.2x role:%.2x pol:%.2x\n", i,
+			       resp->state, resp->enabled, resp->role,
+			       resp->polarity);
+	}
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       read_buf, p - read_buf);
+}
+
 const struct file_operations cros_ec_console_log_fops = {
 	.owner = THIS_MODULE,
 	.open = cros_ec_console_log_open,
@@ -220,6 +272,13 @@ const struct file_operations cros_ec_console_log_fops = {
 	.release = cros_ec_console_log_release,
 };
 
+const struct file_operations cros_ec_pdinfo_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = cros_ec_pdinfo_read,
+	.llseek = default_llseek,
+};
+
 static int ec_read_version_supported(struct cros_ec_dev *ec)
 {
 	struct ec_params_get_cmd_versions_v1 *params;
@@ -288,7 +347,7 @@ static int cros_ec_create_console_log(struct cros_ec_debugfs *debug_info)
 	init_waitqueue_head(&debug_info->log_wq);
 
 	if (!debugfs_create_file("console_log",
-				 S_IFREG | S_IRUGO,
+				 S_IFREG | 0444,
 				 debug_info->dir,
 				 debug_info,
 				 &cros_ec_console_log_fops))
@@ -341,7 +400,7 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
 	debug_info->panicinfo_blob.size = ret;
 
 	if (!debugfs_create_blob("panicinfo",
-				 S_IFREG | S_IRUGO,
+				 S_IFREG | 0444,
 				 debug_info->dir,
 				 &debug_info->panicinfo_blob)) {
 		ret = -ENOMEM;
@@ -355,6 +414,15 @@ free:
 	return ret;
 }
 
+static int cros_ec_create_pdinfo(struct cros_ec_debugfs *debug_info)
+{
+	if (!debugfs_create_file("pdinfo", 0444, debug_info->dir, debug_info,
+				 &cros_ec_pdinfo_fops))
+		return -ENOMEM;
+
+	return 0;
+}
+
 int cros_ec_debugfs_init(struct cros_ec_dev *ec)
 {
 	struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev);
@@ -379,6 +447,10 @@ int cros_ec_debugfs_init(struct cros_ec_dev *ec)
 	if (ret)
 		goto remove_debugfs;
 
+	ret = cros_ec_create_pdinfo(debug_info);
+	if (ret)
+		goto remove_debugfs;
+
 	ec->debug_info = debug_info;
 
 	return 0;
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index af89e82eecd2..3682e1539251 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -31,6 +31,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/suspend.h>
 
 #define DRV_NAME "cros_ec_lpcs"
 #define ACPI_DRV_NAME "GOOG0004"
@@ -235,6 +236,9 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 	    cros_ec_get_next_event(ec_dev, NULL) > 0)
 		blocking_notifier_call_chain(&ec_dev->event_notifier, 0,
 					     ec_dev);
+
+	if (value == ACPI_NOTIFY_DEVICE_WAKE)
+		pm_system_wakeup();
 }
 
 static int cros_ec_lpc_probe(struct platform_device *pdev)
@@ -342,6 +346,18 @@ static const struct dmi_system_id cros_ec_lpc_dmi_table[] __initconst = {
 		},
 	},
 	{
+		/*
+		 * If the box is running custom coreboot firmware then the
+		 * DMI BIOS version string will not be matched by "Google_",
+		 * but the system vendor string will still be matched by
+		 * "GOOGLE".
+		 */
+		.matches = {
+			DMI_MATCH(DMI_BIOS_VENDOR, "coreboot"),
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+		},
+	},
+	{
 		/* x86-link, the Chromebook Pixel. */
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c
index da0a719d32f7..5a6db3fe213a 100644
--- a/drivers/platform/chrome/cros_ec_sysfs.c
+++ b/drivers/platform/chrome/cros_ec_sysfs.c
@@ -34,10 +34,12 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 
+#define to_cros_ec_dev(dev)  container_of(dev, struct cros_ec_dev, class_dev)
+
 /* Accessor functions */
 
-static ssize_t show_ec_reboot(struct device *dev,
-			      struct device_attribute *attr, char *buf)
+static ssize_t reboot_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	int count = 0;
 
@@ -48,9 +50,9 @@ static ssize_t show_ec_reboot(struct device *dev,
 	return count;
 }
 
-static ssize_t store_ec_reboot(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t count)
+static ssize_t reboot_store(struct device *dev,
+			    struct device_attribute *attr,
+			    const char *buf, size_t count)
 {
 	static const struct {
 		const char * const str;
@@ -70,8 +72,7 @@ static ssize_t store_ec_reboot(struct device *dev,
 	int got_cmd = 0, offset = 0;
 	int i;
 	int ret;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 
 	msg = kmalloc(sizeof(*msg) + sizeof(*param), GFP_KERNEL);
 	if (!msg)
@@ -114,22 +115,16 @@ static ssize_t store_ec_reboot(struct device *dev,
 	msg->command = EC_CMD_REBOOT_EC + ec->cmd_offset;
 	msg->outsize = sizeof(*param);
 	msg->insize = 0;
-	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
-	if (ret < 0) {
+	ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
+	if (ret < 0)
 		count = ret;
-		goto exit;
-	}
-	if (msg->result != EC_RES_SUCCESS) {
-		dev_dbg(ec->dev, "EC result %d\n", msg->result);
-		count = -EINVAL;
-	}
 exit:
 	kfree(msg);
 	return count;
 }
 
-static ssize_t show_ec_version(struct device *dev,
-			       struct device_attribute *attr, char *buf)
+static ssize_t version_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
 {
 	static const char * const image_names[] = {"unknown", "RO", "RW"};
 	struct ec_response_get_version *r_ver;
@@ -138,8 +133,7 @@ static ssize_t show_ec_version(struct device *dev,
 	struct cros_ec_command *msg;
 	int ret;
 	int count = 0;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 
 	msg = kmalloc(sizeof(*msg) + EC_HOST_PARAM_SIZE, GFP_KERNEL);
 	if (!msg)
@@ -150,17 +144,11 @@ static ssize_t show_ec_version(struct device *dev,
 	msg->command = EC_CMD_GET_VERSION + ec->cmd_offset;
 	msg->insize = sizeof(*r_ver);
 	msg->outsize = 0;
-	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
 	if (ret < 0) {
 		count = ret;
 		goto exit;
 	}
-	if (msg->result != EC_RES_SUCCESS) {
-		count = scnprintf(buf, PAGE_SIZE,
-				  "ERROR: EC returned %d\n", msg->result);
-		goto exit;
-	}
-
 	r_ver = (struct ec_response_get_version *)msg->data;
 	/* Strings should be null-terminated, but let's be sure. */
 	r_ver->version_string_ro[sizeof(r_ver->version_string_ro) - 1] = '\0';
@@ -237,14 +225,13 @@ exit:
 	return count;
 }
 
-static ssize_t show_ec_flashinfo(struct device *dev,
-				 struct device_attribute *attr, char *buf)
+static ssize_t flashinfo_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
 	struct ec_response_flash_info *resp;
 	struct cros_ec_command *msg;
 	int ret;
-	struct cros_ec_dev *ec = container_of(dev,
-					      struct cros_ec_dev, class_dev);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
 
 	msg = kmalloc(sizeof(*msg) + sizeof(*resp), GFP_KERNEL);
 	if (!msg)
@@ -255,14 +242,9 @@ static ssize_t show_ec_flashinfo(struct device *dev,
 	msg->command = EC_CMD_FLASH_INFO + ec->cmd_offset;
 	msg->insize = sizeof(*resp);
 	msg->outsize = 0;
-	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
+	ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
 	if (ret < 0)
 		goto exit;
-	if (msg->result != EC_RES_SUCCESS) {
-		ret = scnprintf(buf, PAGE_SIZE,
-				"ERROR: EC returned %d\n", msg->result);
-		goto exit;
-	}
 
 	resp = (struct ec_response_flash_info *)msg->data;
 
@@ -276,21 +258,102 @@ exit:
 	return ret;
 }
 
+/* Keyboard wake angle control */
+static ssize_t kb_wake_angle_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
+	struct ec_response_motion_sense *resp;
+	struct ec_params_motion_sense *param;
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = kmalloc(sizeof(*msg) + EC_HOST_PARAM_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_motion_sense *)msg->data;
+	msg->command = EC_CMD_MOTION_SENSE_CMD + ec->cmd_offset;
+	msg->version = 2;
+	param->cmd = MOTIONSENSE_CMD_KB_WAKE_ANGLE;
+	param->kb_wake_angle.data = EC_MOTION_SENSE_NO_VALUE;
+	msg->outsize = sizeof(*param);
+	msg->insize = sizeof(*resp);
+
+	ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
+	if (ret < 0)
+		goto exit;
+
+	resp = (struct ec_response_motion_sense *)msg->data;
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", resp->kb_wake_angle.ret);
+exit:
+	kfree(msg);
+	return ret;
+}
+
+static ssize_t kb_wake_angle_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
+	struct ec_params_motion_sense *param;
+	struct cros_ec_command *msg;
+	u16 angle;
+	int ret;
+
+	ret = kstrtou16(buf, 0, &angle);
+	if (ret)
+		return ret;
+
+	msg = kmalloc(sizeof(*msg) + EC_HOST_PARAM_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	param = (struct ec_params_motion_sense *)msg->data;
+	msg->command = EC_CMD_MOTION_SENSE_CMD + ec->cmd_offset;
+	msg->version = 2;
+	param->cmd = MOTIONSENSE_CMD_KB_WAKE_ANGLE;
+	param->kb_wake_angle.data = angle;
+	msg->outsize = sizeof(*param);
+	msg->insize = sizeof(struct ec_response_motion_sense);
+
+	ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
+	kfree(msg);
+	if (ret < 0)
+		return ret;
+	return count;
+}
+
 /* Module initialization */
 
-static DEVICE_ATTR(reboot, S_IWUSR | S_IRUGO, show_ec_reboot, store_ec_reboot);
-static DEVICE_ATTR(version, S_IRUGO, show_ec_version, NULL);
-static DEVICE_ATTR(flashinfo, S_IRUGO, show_ec_flashinfo, NULL);
+static DEVICE_ATTR_RW(reboot);
+static DEVICE_ATTR_RO(version);
+static DEVICE_ATTR_RO(flashinfo);
+static DEVICE_ATTR_RW(kb_wake_angle);
 
 static struct attribute *__ec_attrs[] = {
+	&dev_attr_kb_wake_angle.attr,
 	&dev_attr_reboot.attr,
 	&dev_attr_version.attr,
 	&dev_attr_flashinfo.attr,
 	NULL,
 };
 
+static umode_t cros_ec_ctrl_visible(struct kobject *kobj,
+				    struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct cros_ec_dev *ec = to_cros_ec_dev(dev);
+
+	if (a == &dev_attr_kb_wake_angle.attr && !ec->has_kb_wake_angle)
+		return 0;
+
+	return a->mode;
+}
+
 struct attribute_group cros_ec_attr_group = {
 	.attrs = __ec_attrs,
+	.is_visible = cros_ec_ctrl_visible,
 };
 EXPORT_SYMBOL(cros_ec_attr_group);
 
diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
index 313cf8ad77bf..ea9e7f4479ca 100644
--- a/drivers/platform/mellanox/mlxreg-hotplug.c
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -93,9 +93,11 @@ struct mlxreg_hotplug_priv_data {
 	bool after_probe;
 };
 
-static int mlxreg_hotplug_device_create(struct device *dev,
+static int mlxreg_hotplug_device_create(struct mlxreg_hotplug_priv_data *priv,
 					struct mlxreg_core_data *data)
 {
+	struct mlxreg_core_hotplug_platform_data *pdata;
+
 	/*
 	 * Return if adapter number is negative. It could be in case hotplug
 	 * event is not associated with hotplug device.
@@ -103,19 +105,21 @@ static int mlxreg_hotplug_device_create(struct device *dev,
 	if (data->hpdev.nr < 0)
 		return 0;
 
-	data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr);
+	pdata = dev_get_platdata(&priv->pdev->dev);
+	data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr +
+					      pdata->shift_nr);
 	if (!data->hpdev.adapter) {
-		dev_err(dev, "Failed to get adapter for bus %d\n",
-			data->hpdev.nr);
+		dev_err(priv->dev, "Failed to get adapter for bus %d\n",
+			data->hpdev.nr + pdata->shift_nr);
 		return -EFAULT;
 	}
 
 	data->hpdev.client = i2c_new_device(data->hpdev.adapter,
 					    data->hpdev.brdinfo);
 	if (!data->hpdev.client) {
-		dev_err(dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
-			data->hpdev.brdinfo->type, data->hpdev.nr,
-			data->hpdev.brdinfo->addr);
+		dev_err(priv->dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
+			data->hpdev.brdinfo->type, data->hpdev.nr +
+			pdata->shift_nr, data->hpdev.brdinfo->addr);
 
 		i2c_put_adapter(data->hpdev.adapter);
 		data->hpdev.adapter = NULL;
@@ -270,10 +274,10 @@ mlxreg_hotplug_work_helper(struct mlxreg_hotplug_priv_data *priv,
 			if (item->inversed)
 				mlxreg_hotplug_device_destroy(data);
 			else
-				mlxreg_hotplug_device_create(priv->dev, data);
+				mlxreg_hotplug_device_create(priv, data);
 		} else {
 			if (item->inversed)
-				mlxreg_hotplug_device_create(priv->dev, data);
+				mlxreg_hotplug_device_create(priv, data);
 			else
 				mlxreg_hotplug_device_destroy(data);
 		}
@@ -319,7 +323,7 @@ mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv,
 		if (regval == MLXREG_HOTPLUG_HEALTH_MASK) {
 			if ((data->health_cntr++ == MLXREG_HOTPLUG_RST_CNTR) ||
 			    !priv->after_probe) {
-				mlxreg_hotplug_device_create(priv->dev, data);
+				mlxreg_hotplug_device_create(priv, data);
 				data->attached = true;
 			}
 		} else {
@@ -550,6 +554,7 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
 {
 	struct mlxreg_core_hotplug_platform_data *pdata;
 	struct mlxreg_hotplug_priv_data *priv;
+	struct i2c_adapter *deferred_adap;
 	int err;
 
 	pdata = dev_get_platdata(&pdev->dev);
@@ -558,6 +563,12 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	/* Defer probing if the necessary adapter is not configured yet. */
+	deferred_adap = i2c_get_adapter(pdata->deferred_nr);
+	if (!deferred_adap)
+		return -EPROBE_DEFER;
+	i2c_put_adapter(deferred_adap);
+
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index ef016e46544a..39d06dd1f63a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -757,6 +757,8 @@ config TOPSTAR_LAPTOP
 	depends on ACPI
 	depends on INPUT
 	select INPUT_SPARSEKMAP
+	select LEDS_CLASS
+	select NEW_LEDS
 	---help---
 	  This driver adds support for hotkeys found on Topstar laptops.
 
@@ -1174,6 +1176,7 @@ config INTEL_TELEMETRY
 
 config MLX_PLATFORM
 	tristate "Mellanox Technologies platform support"
+	depends on I2C && REGMAP
 	---help---
 	  This option enables system support for the Mellanox Technologies
 	  platform. The Mellanox systems provide data center networking
diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c
index 2485c80a9fdd..33fb2a20458a 100644
--- a/drivers/platform/x86/dell-smbios-base.c
+++ b/drivers/platform/x86/dell-smbios-base.c
@@ -514,7 +514,7 @@ static int build_tokens_sysfs(struct platform_device *dev)
 		continue;
 
 loop_fail_create_value:
-		kfree(value_name);
+		kfree(location_name);
 		goto out_unwind_strings;
 	}
 	smbios_attribute_group.attrs = token_attrs;
@@ -525,7 +525,7 @@ loop_fail_create_value:
 	return 0;
 
 out_unwind_strings:
-	for (i = i-1; i > 0; i--) {
+	while (i--) {
 		kfree(token_location_attrs[i].attr.name);
 		kfree(token_value_attrs[i].attr.name);
 	}
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 2cfbd3fa5136..cd95b6f3a064 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -53,6 +53,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/bitops.h>
 #include <linux/dmi.h>
 #include <linux/backlight.h>
 #include <linux/fb.h>
@@ -61,12 +62,11 @@
 #include <linux/kfifo.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
 #include <acpi/video.h>
 
-#define FUJITSU_DRIVER_VERSION "0.6.0"
+#define FUJITSU_DRIVER_VERSION		"0.6.0"
 
-#define FUJITSU_LCD_N_LEVELS 8
+#define FUJITSU_LCD_N_LEVELS		8
 
 #define ACPI_FUJITSU_CLASS		"fujitsu"
 #define ACPI_FUJITSU_BL_HID		"FUJ02B1"
@@ -76,41 +76,51 @@
 #define ACPI_FUJITSU_LAPTOP_DRIVER_NAME	"Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
 #define ACPI_FUJITSU_LAPTOP_DEVICE_NAME	"Fujitsu FUJ02E3"
 
-#define ACPI_FUJITSU_NOTIFY_CODE1     0x80
+#define ACPI_FUJITSU_NOTIFY_CODE	0x80
 
 /* FUNC interface - command values */
-#define FUNC_FLAGS	0x1000
-#define FUNC_LEDS	0x1001
-#define FUNC_BUTTONS	0x1002
-#define FUNC_BACKLIGHT  0x1004
+#define FUNC_FLAGS			BIT(12)
+#define FUNC_LEDS			(BIT(12) | BIT(0))
+#define FUNC_BUTTONS			(BIT(12) | BIT(1))
+#define FUNC_BACKLIGHT			(BIT(12) | BIT(2))
 
 /* FUNC interface - responses */
-#define UNSUPPORTED_CMD 0x80000000
+#define UNSUPPORTED_CMD			0x80000000
 
 /* FUNC interface - status flags */
-#define FLAG_RFKILL	0x020
-#define FLAG_LID	0x100
-#define FLAG_DOCK	0x200
+#define FLAG_RFKILL			BIT(5)
+#define FLAG_LID			BIT(8)
+#define FLAG_DOCK			BIT(9)
 
 /* FUNC interface - LED control */
-#define FUNC_LED_OFF	0x1
-#define FUNC_LED_ON	0x30001
-#define KEYBOARD_LAMPS	0x100
-#define LOGOLAMP_POWERON 0x2000
-#define LOGOLAMP_ALWAYS  0x4000
-#define RADIO_LED_ON	0x20
-#define ECO_LED	0x10000
-#define ECO_LED_ON	0x80000
-
-/* Hotkey details */
-#define KEY1_CODE	0x410	/* codes for the keys in the GIRB register */
-#define KEY2_CODE	0x411
-#define KEY3_CODE	0x412
-#define KEY4_CODE	0x413
-#define KEY5_CODE	0x420
-
-#define MAX_HOTKEY_RINGBUFFER_SIZE 100
-#define RINGBUFFERSIZE 40
+#define FUNC_LED_OFF			BIT(0)
+#define FUNC_LED_ON			(BIT(0) | BIT(16) | BIT(17))
+#define LOGOLAMP_POWERON		BIT(13)
+#define LOGOLAMP_ALWAYS			BIT(14)
+#define KEYBOARD_LAMPS			BIT(8)
+#define RADIO_LED_ON			BIT(5)
+#define ECO_LED				BIT(16)
+#define ECO_LED_ON			BIT(19)
+
+/* FUNC interface - backlight power control */
+#define BACKLIGHT_PARAM_POWER		BIT(2)
+#define BACKLIGHT_OFF			(BIT(0) | BIT(1))
+#define BACKLIGHT_ON			0
+
+/* Scancodes read from the GIRB register */
+#define KEY1_CODE			0x410
+#define KEY2_CODE			0x411
+#define KEY3_CODE			0x412
+#define KEY4_CODE			0x413
+#define KEY5_CODE			0x420
+
+/* Hotkey ringbuffer limits */
+#define MAX_HOTKEY_RINGBUFFER_SIZE	100
+#define RINGBUFFERSIZE			40
+
+/* Module parameters */
+static int use_alt_lcd_levels = -1;
+static bool disable_brightness_adjust;
 
 /* Device controlling the backlight and associated keys */
 struct fujitsu_bl {
@@ -122,8 +132,6 @@ struct fujitsu_bl {
 };
 
 static struct fujitsu_bl *fujitsu_bl;
-static int use_alt_lcd_levels = -1;
-static bool disable_brightness_adjust;
 
 /* Device used to access hotkeys and other features on the laptop */
 struct fujitsu_laptop {
@@ -256,9 +264,11 @@ static int bl_update_status(struct backlight_device *b)
 
 	if (fext) {
 		if (b->props.power == FB_BLANK_POWERDOWN)
-			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
+			call_fext_func(fext, FUNC_BACKLIGHT, 0x1,
+				       BACKLIGHT_PARAM_POWER, BACKLIGHT_OFF);
 		else
-			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
+			call_fext_func(fext, FUNC_BACKLIGHT, 0x1,
+				       BACKLIGHT_PARAM_POWER, BACKLIGHT_ON);
 	}
 
 	return set_lcd_level(device, b->props.brightness);
@@ -385,7 +395,7 @@ static int fujitsu_backlight_register(struct acpi_device *device)
 static int acpi_fujitsu_bl_add(struct acpi_device *device)
 {
 	struct fujitsu_bl *priv;
-	int error;
+	int ret;
 
 	if (acpi_video_get_backlight_type() != acpi_backlight_vendor)
 		return -ENODEV;
@@ -399,10 +409,6 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
 	strcpy(acpi_device_class(device), ACPI_FUJITSU_CLASS);
 	device->driver_data = priv;
 
-	error = acpi_fujitsu_bl_input_setup(device);
-	if (error)
-		return error;
-
 	pr_info("ACPI: %s [%s]\n",
 		acpi_device_name(device), acpi_device_bid(device));
 
@@ -410,11 +416,11 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
 		priv->max_brightness = FUJITSU_LCD_N_LEVELS;
 	get_lcd_level(device);
 
-	error = fujitsu_backlight_register(device);
-	if (error)
-		return error;
+	ret = acpi_fujitsu_bl_input_setup(device);
+	if (ret)
+		return ret;
 
-	return 0;
+	return fujitsu_backlight_register(device);
 }
 
 /* Brightness notify */
@@ -424,7 +430,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
 	struct fujitsu_bl *priv = acpi_driver_data(device);
 	int oldb, newb;
 
-	if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
+	if (event != ACPI_FUJITSU_NOTIFY_CODE) {
 		acpi_handle_info(device->handle, "unsupported event [0x%x]\n",
 				 event);
 		sparse_keymap_report_event(priv->input, -1, 1, true);
@@ -455,7 +461,9 @@ static const struct key_entry keymap_default[] = {
 	{ KE_KEY, KEY3_CODE, { KEY_PROG3 } },
 	{ KE_KEY, KEY4_CODE, { KEY_PROG4 } },
 	{ KE_KEY, KEY5_CODE, { KEY_RFKILL } },
+	{ KE_KEY, BIT(5),    { KEY_RFKILL } },
 	{ KE_KEY, BIT(26),   { KEY_TOUCHPAD_TOGGLE } },
+	{ KE_KEY, BIT(29),   { KEY_MICMUTE } },
 	{ KE_END, 0 }
 };
 
@@ -693,7 +701,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
 	struct led_classdev *led;
-	int result;
+	int ret;
 
 	if (call_fext_func(device,
 			   FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
@@ -704,9 +712,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->name = "fujitsu::logolamp";
 		led->brightness_set_blocking = logolamp_set;
 		led->brightness_get = logolamp_get;
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	if ((call_fext_func(device,
@@ -719,9 +727,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->name = "fujitsu::kblamps";
 		led->brightness_set_blocking = kblamps_set;
 		led->brightness_get = kblamps_get;
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	/*
@@ -742,9 +750,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->brightness_set_blocking = radio_led_set;
 		led->brightness_get = radio_led_get;
 		led->default_trigger = "rfkill-any";
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	/* Support for eco led is not always signaled in bit corresponding
@@ -762,9 +770,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->name = "fujitsu::eco_led";
 		led->brightness_set_blocking = eco_led_set;
 		led->brightness_get = eco_led_get;
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -773,8 +781,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 {
 	struct fujitsu_laptop *priv;
-	int error;
-	int i;
+	int ret, i = 0;
 
 	priv = devm_kzalloc(&device->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -789,23 +796,16 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 
 	/* kfifo */
 	spin_lock_init(&priv->fifo_lock);
-	error = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int),
-			    GFP_KERNEL);
-	if (error) {
-		pr_err("kfifo_alloc failed\n");
-		goto err_stop;
-	}
-
-	error = acpi_fujitsu_laptop_input_setup(device);
-	if (error)
-		goto err_free_fifo;
+	ret = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int),
+			  GFP_KERNEL);
+	if (ret)
+		return ret;
 
 	pr_info("ACPI: %s [%s]\n",
 		acpi_device_name(device), acpi_device_bid(device));
 
-	i = 0;
-	while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0
-		&& (i++) < MAX_HOTKEY_RINGBUFFER_SIZE)
+	while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0 &&
+	       i++ < MAX_HOTKEY_RINGBUFFER_SIZE)
 		; /* No action, result is discarded */
 	acpi_handle_debug(device->handle, "Discarded %i ringbuffer entries\n",
 			  i);
@@ -829,26 +829,31 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 	/* Sync backlight power status */
 	if (fujitsu_bl && fujitsu_bl->bl_device &&
 	    acpi_video_get_backlight_type() == acpi_backlight_vendor) {
-		if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
+		if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2,
+				   BACKLIGHT_PARAM_POWER, 0x0) == BACKLIGHT_OFF)
 			fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
 		else
 			fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
 	}
 
-	error = acpi_fujitsu_laptop_leds_register(device);
-	if (error)
+	ret = acpi_fujitsu_laptop_input_setup(device);
+	if (ret)
+		goto err_free_fifo;
+
+	ret = acpi_fujitsu_laptop_leds_register(device);
+	if (ret)
 		goto err_free_fifo;
 
-	error = fujitsu_laptop_platform_add(device);
-	if (error)
+	ret = fujitsu_laptop_platform_add(device);
+	if (ret)
 		goto err_free_fifo;
 
 	return 0;
 
 err_free_fifo:
 	kfifo_free(&priv->fifo);
-err_stop:
-	return error;
+
+	return ret;
 }
 
 static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
@@ -865,11 +870,11 @@ static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
 static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
-	int status;
+	int ret;
 
-	status = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode,
-				 sizeof(scancode), &priv->fifo_lock);
-	if (status != sizeof(scancode)) {
+	ret = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode,
+			      sizeof(scancode), &priv->fifo_lock);
+	if (ret != sizeof(scancode)) {
 		dev_info(&priv->input->dev, "Could not push scancode [0x%x]\n",
 			 scancode);
 		return;
@@ -882,13 +887,12 @@ static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode)
 static void acpi_fujitsu_laptop_release(struct acpi_device *device)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
-	int scancode, status;
+	int scancode, ret;
 
 	while (true) {
-		status = kfifo_out_locked(&priv->fifo,
-					  (unsigned char *)&scancode,
-					  sizeof(scancode), &priv->fifo_lock);
-		if (status != sizeof(scancode))
+		ret = kfifo_out_locked(&priv->fifo, (unsigned char *)&scancode,
+				       sizeof(scancode), &priv->fifo_lock);
+		if (ret != sizeof(scancode))
 			return;
 		sparse_keymap_report_event(priv->input, scancode, 0, false);
 		dev_dbg(&priv->input->dev,
@@ -899,10 +903,10 @@ static void acpi_fujitsu_laptop_release(struct acpi_device *device)
 static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
-	int scancode, i = 0;
+	int scancode, i = 0, ret;
 	unsigned int irb;
 
-	if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
+	if (event != ACPI_FUJITSU_NOTIFY_CODE) {
 		acpi_handle_info(device->handle, "Unsupported event [0x%x]\n",
 				 event);
 		sparse_keymap_report_event(priv->input, -1, 1, true);
@@ -930,9 +934,18 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 	 * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
 	 * handled in software; its state is queried using FUNC_FLAGS
 	 */
-	if ((priv->flags_supported & BIT(26)) &&
-	    (call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26)))
-		sparse_keymap_report_event(priv->input, BIT(26), 1, true);
+	if (priv->flags_supported & (BIT(5) | BIT(26) | BIT(29))) {
+		ret = call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0);
+		if (ret & BIT(5))
+			sparse_keymap_report_event(priv->input,
+						   BIT(5), 1, true);
+		if (ret & BIT(26))
+			sparse_keymap_report_event(priv->input,
+						   BIT(26), 1, true);
+		if (ret & BIT(29))
+			sparse_keymap_report_event(priv->input,
+						   BIT(29), 1, true);
+	}
 }
 
 /* Initialization */
diff --git a/drivers/platform/x86/gpd-pocket-fan.c b/drivers/platform/x86/gpd-pocket-fan.c
index 2d645c505f81..be85ed966bf3 100644
--- a/drivers/platform/x86/gpd-pocket-fan.c
+++ b/drivers/platform/x86/gpd-pocket-fan.c
@@ -19,12 +19,12 @@
 static int temp_limits[3] = { 55000, 60000, 65000 };
 module_param_array(temp_limits, int, NULL, 0444);
 MODULE_PARM_DESC(temp_limits,
-		 "Milli-celcius values above which the fan speed increases");
+		 "Millicelsius values above which the fan speed increases");
 
 static int hysteresis = 3000;
 module_param(hysteresis, int, 0444);
 MODULE_PARM_DESC(hysteresis,
-		 "Hysteresis in milli-celcius before lowering the fan speed");
+		 "Hysteresis in millicelsius before lowering the fan speed");
 
 static int speed_on_ac = 2;
 module_param(speed_on_ac, int, 0444);
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index 5e3df194723e..b5adba227783 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -16,16 +16,14 @@
  *
  */
 
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/input.h>
 #include <linux/platform_device.h>
-#include <linux/input/sparse-keymap.h>
-#include <linux/acpi.h>
 #include <linux/suspend.h>
-#include <acpi/acpi_bus.h>
-#include <linux/dmi.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Alex Hung");
@@ -67,8 +65,8 @@ static const struct key_entry intel_array_keymap[] = {
 	{ KE_IGNORE, 0xC5, { KEY_VOLUMEUP } },                /* Release */
 	{ KE_KEY,    0xC6, { KEY_VOLUMEDOWN } },              /* Press */
 	{ KE_IGNORE, 0xC7, { KEY_VOLUMEDOWN } },              /* Release */
-	{ KE_SW,     0xC8, { .sw = { SW_ROTATE_LOCK, 1 } } }, /* Press */
-	{ KE_SW,     0xC9, { .sw = { SW_ROTATE_LOCK, 0 } } }, /* Release */
+	{ KE_KEY,    0xC8, { KEY_ROTATE_LOCK_TOGGLE } },      /* Press */
+	{ KE_IGNORE, 0xC9, { KEY_ROTATE_LOCK_TOGGLE } },      /* Release */
 	{ KE_KEY,    0xCE, { KEY_POWER } },                   /* Press */
 	{ KE_IGNORE, 0xCF, { KEY_POWER } },                   /* Release */
 	{ KE_END },
diff --git a/drivers/platform/x86/intel_turbo_max_3.c b/drivers/platform/x86/intel_turbo_max_3.c
index d4ea01805879..a6d5aa0c3c47 100644
--- a/drivers/platform/x86/intel_turbo_max_3.c
+++ b/drivers/platform/x86/intel_turbo_max_3.c
@@ -138,9 +138,6 @@ static int __init itmt_legacy_init(void)
 	if (!id)
 		return -ENODEV;
 
-	if (boot_cpu_has(X86_FEATURE_HWP))
-		return -ENODEV;
-
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
 				"platform/x86/turbo_max_3:online",
 				itmt_legacy_cpu_online,	NULL);
diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c
index 454e14f02285..7a0bd24c1ae2 100644
--- a/drivers/platform/x86/mlx-platform.c
+++ b/drivers/platform/x86/mlx-platform.c
@@ -85,6 +85,15 @@
 #define MLXPLAT_CPLD_FAN_MASK		GENMASK(3, 0)
 #define MLXPLAT_CPLD_FAN_NG_MASK	GENMASK(5, 0)
 
+/* Default I2C parent bus number */
+#define MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR	1
+
+/* Maximum number of possible physical buses equipped on system */
+#define MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM	16
+
+/* Number of channels in group */
+#define MLXPLAT_CPLD_GRP_CHNL_NUM		8
+
 /* Start channel numbers */
 #define MLXPLAT_CPLD_CH1			2
 #define MLXPLAT_CPLD_CH2			10
@@ -124,7 +133,7 @@ static const struct resource mlxplat_lpc_resources[] = {
 };
 
 /* Platform default channels */
-static const int mlxplat_default_channels[][8] = {
+static const int mlxplat_default_channels[][MLXPLAT_CPLD_GRP_CHNL_NUM] = {
 	{
 		MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2,
 		MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 +
@@ -694,6 +703,8 @@ static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_default_channels[i]);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_default_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -708,6 +719,8 @@ static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_msn21xx_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -722,6 +735,8 @@ static int __init mlxplat_dmi_msn274x_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_msn274x_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -736,6 +751,8 @@ static int __init mlxplat_dmi_msn201x_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_msn201x_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -750,6 +767,8 @@ static int __init mlxplat_dmi_qmb7xx_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_default_ng_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -830,10 +849,48 @@ static const struct dmi_system_id mlxplat_dmi_table[] __initconst = {
 
 MODULE_DEVICE_TABLE(dmi, mlxplat_dmi_table);
 
+static int mlxplat_mlxcpld_verify_bus_topology(int *nr)
+{
+	struct i2c_adapter *search_adap;
+	int shift, i;
+
+	/* Scan adapters from expected id to verify it is free. */
+	*nr = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR;
+	for (i = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR; i <
+	     MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM; i++) {
+		search_adap = i2c_get_adapter(i);
+		if (search_adap) {
+			i2c_put_adapter(search_adap);
+			continue;
+		}
+
+		/* Return if expected parent adapter is free. */
+		if (i == MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR)
+			return 0;
+		break;
+	}
+
+	/* Return with error if free id for adapter is not found. */
+	if (i == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM)
+		return -ENODEV;
+
+	/* Shift adapter ids, since expected parent adapter is not free. */
+	*nr = i;
+	for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+		shift = *nr - mlxplat_mux_data[i].parent;
+		mlxplat_mux_data[i].parent = *nr;
+		mlxplat_mux_data[i].base_nr += shift;
+		if (shift > 0)
+			mlxplat_hotplug->shift_nr = shift;
+	}
+
+	return 0;
+}
+
 static int __init mlxplat_init(void)
 {
 	struct mlxplat_priv *priv;
-	int i, err;
+	int i, nr, err;
 
 	if (!dmi_check_system(mlxplat_dmi_table))
 		return -ENODEV;
@@ -853,7 +910,12 @@ static int __init mlxplat_init(void)
 	}
 	platform_set_drvdata(mlxplat_dev, priv);
 
-	priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1,
+	err = mlxplat_mlxcpld_verify_bus_topology(&nr);
+	if (nr < 0)
+		goto fail_alloc;
+
+	nr = (nr == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM) ? -1 : nr;
+	priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", nr,
 							 NULL, 0);
 	if (IS_ERR(priv->pdev_i2c)) {
 		err = PTR_ERR(priv->pdev_i2c);
diff --git a/drivers/platform/x86/silead_dmi.c b/drivers/platform/x86/silead_dmi.c
index 3a624090191d..452aacabaa8e 100644
--- a/drivers/platform/x86/silead_dmi.c
+++ b/drivers/platform/x86/silead_dmi.c
@@ -446,6 +446,23 @@ static const struct dmi_system_id silead_ts_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "X3 Plus"),
 		},
 	},
+	{
+		/* I.T.Works TW701 */
+		.driver_data = (void *)&surftab_wintron70_st70416_6_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "i71c"),
+			DMI_MATCH(DMI_BIOS_VERSION, "itWORKS.G.WI71C.JGBMRB"),
+		},
+	},
+	{
+		/* Yours Y8W81, same case and touchscreen as Chuwi Vi8 */
+		.driver_data = (void *)&chuwi_vi8_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "YOURS"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Y8W81"),
+		},
+	},
 	{ },
 };
 
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 1c57ee2b6d19..da1ca4856ea1 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -8703,16 +8703,24 @@ static const struct attribute_group fan_attr_group = {
 	  .ec = TPID(__id1, __id2),		\
 	  .quirks = __quirks }
 
+#define TPACPI_FAN_QB(__id1, __id2, __quirks)	\
+	{ .vendor = PCI_VENDOR_ID_LENOVO,	\
+	  .bios = TPID(__id1, __id2),		\
+	  .ec = TPACPI_MATCH_ANY,		\
+	  .quirks = __quirks }
+
 static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
 	TPACPI_FAN_QI('1', 'Y', TPACPI_FAN_Q1),
 	TPACPI_FAN_QI('7', '8', TPACPI_FAN_Q1),
 	TPACPI_FAN_QI('7', '6', TPACPI_FAN_Q1),
 	TPACPI_FAN_QI('7', '0', TPACPI_FAN_Q1),
 	TPACPI_FAN_QL('7', 'M', TPACPI_FAN_2FAN),
+	TPACPI_FAN_QB('N', '1', TPACPI_FAN_2FAN),
 };
 
 #undef TPACPI_FAN_QL
 #undef TPACPI_FAN_QI
+#undef TPACPI_FAN_QB
 
 static int __init fan_init(struct ibm_init_struct *iibm)
 {
diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c
index 1032c00b907b..f7761d98c0fd 100644
--- a/drivers/platform/x86/topstar-laptop.c
+++ b/drivers/platform/x86/topstar-laptop.c
@@ -1,14 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * ACPI driver for Topstar notebooks (hotkeys support only)
+ * Topstar Laptop ACPI Extras driver
  *
  * Copyright (c) 2009 Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+ * Copyright (c) 2018 Guillaume Douézan-Grard
  *
  * Implementation inspired by existing x86 platform drivers, in special
- * asus/eepc/fujitsu-laptop, thanks to their authors
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * asus/eepc/fujitsu-laptop, thanks to their authors.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -18,15 +16,93 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
 
-#define ACPI_TOPSTAR_CLASS "topstar"
+#define TOPSTAR_LAPTOP_CLASS "topstar"
 
-struct topstar_hkey {
-	struct input_dev *inputdev;
+struct topstar_laptop {
+	struct acpi_device *device;
+	struct platform_device *platform;
+	struct input_dev *input;
+	struct led_classdev led;
 };
 
+/*
+ * LED
+ */
+
+static enum led_brightness topstar_led_get(struct led_classdev *led)
+{
+	return led->brightness;
+}
+
+static int topstar_led_set(struct led_classdev *led,
+		enum led_brightness state)
+{
+	struct topstar_laptop *topstar = container_of(led,
+			struct topstar_laptop, led);
+
+	struct acpi_object_list params;
+	union acpi_object in_obj;
+	unsigned long long int ret;
+	acpi_status status;
+
+	params.count = 1;
+	params.pointer = &in_obj;
+	in_obj.type = ACPI_TYPE_INTEGER;
+	in_obj.integer.value = 0x83;
+
+	/*
+	 * Topstar ACPI returns 0x30001 when the LED is ON and 0x30000 when it
+	 * is OFF.
+	 */
+	status = acpi_evaluate_integer(topstar->device->handle,
+			"GETX", &params, &ret);
+	if (ACPI_FAILURE(status))
+		return -1;
+
+	/*
+	 * FNCX(0x83) toggles the LED (more precisely, it is supposed to
+	 * act as an hardware switch and disconnect the WLAN adapter but
+	 * it seems to be faulty on some models like the Topstar U931
+	 * Notebook).
+	 */
+	if ((ret == 0x30001 && state == LED_OFF)
+			|| (ret == 0x30000 && state != LED_OFF)) {
+		status = acpi_execute_simple_method(topstar->device->handle,
+				"FNCX", 0x83);
+		if (ACPI_FAILURE(status))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int topstar_led_init(struct topstar_laptop *topstar)
+{
+	topstar->led = (struct led_classdev) {
+		.default_trigger = "rfkill0",
+		.brightness_get = topstar_led_get,
+		.brightness_set_blocking = topstar_led_set,
+		.name = TOPSTAR_LAPTOP_CLASS "::wlan",
+	};
+
+	return led_classdev_register(&topstar->platform->dev, &topstar->led);
+}
+
+static void topstar_led_exit(struct topstar_laptop *topstar)
+{
+	led_classdev_unregister(&topstar->led);
+}
+
+/*
+ * Input
+ */
+
 static const struct key_entry topstar_keymap[] = {
 	{ KE_KEY, 0x80, { KEY_BRIGHTNESSUP } },
 	{ KE_KEY, 0x81, { KEY_BRIGHTNESSDOWN } },
@@ -57,107 +133,217 @@ static const struct key_entry topstar_keymap[] = {
 	{ KE_END, 0 }
 };
 
-static void acpi_topstar_notify(struct acpi_device *device, u32 event)
+static void topstar_input_notify(struct topstar_laptop *topstar, int event)
 {
-	static bool dup_evnt[2];
-	bool *dup;
-	struct topstar_hkey *hkey = acpi_driver_data(device);
-
-	/* 0x83 and 0x84 key events comes duplicated... */
-	if (event == 0x83 || event == 0x84) {
-		dup = &dup_evnt[event - 0x83];
-		if (*dup) {
-			*dup = false;
-			return;
-		}
-		*dup = true;
-	}
-
-	if (!sparse_keymap_report_event(hkey->inputdev, event, 1, true))
+	if (!sparse_keymap_report_event(topstar->input, event, 1, true))
 		pr_info("unknown event = 0x%02x\n", event);
 }
 
-static int acpi_topstar_fncx_switch(struct acpi_device *device, bool state)
-{
-	acpi_status status;
-
-	status = acpi_execute_simple_method(device->handle, "FNCX",
-						state ? 0x86 : 0x87);
-	if (ACPI_FAILURE(status)) {
-		pr_err("Unable to switch FNCX notifications\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-static int acpi_topstar_init_hkey(struct topstar_hkey *hkey)
+static int topstar_input_init(struct topstar_laptop *topstar)
 {
 	struct input_dev *input;
-	int error;
+	int err;
 
 	input = input_allocate_device();
 	if (!input)
 		return -ENOMEM;
 
 	input->name = "Topstar Laptop extra buttons";
-	input->phys = "topstar/input0";
+	input->phys = TOPSTAR_LAPTOP_CLASS "/input0";
 	input->id.bustype = BUS_HOST;
+	input->dev.parent = &topstar->platform->dev;
 
-	error = sparse_keymap_setup(input, topstar_keymap, NULL);
-	if (error) {
+	err = sparse_keymap_setup(input, topstar_keymap, NULL);
+	if (err) {
 		pr_err("Unable to setup input device keymap\n");
 		goto err_free_dev;
 	}
 
-	error = input_register_device(input);
-	if (error) {
+	err = input_register_device(input);
+	if (err) {
 		pr_err("Unable to register input device\n");
 		goto err_free_dev;
 	}
 
-	hkey->inputdev = input;
+	topstar->input = input;
 	return 0;
 
- err_free_dev:
+err_free_dev:
 	input_free_device(input);
-	return error;
+	return err;
 }
 
-static int acpi_topstar_add(struct acpi_device *device)
+static void topstar_input_exit(struct topstar_laptop *topstar)
 {
-	struct topstar_hkey *tps_hkey;
+	input_unregister_device(topstar->input);
+}
 
-	tps_hkey = kzalloc(sizeof(struct topstar_hkey), GFP_KERNEL);
-	if (!tps_hkey)
+/*
+ * Platform
+ */
+
+static struct platform_driver topstar_platform_driver = {
+	.driver = {
+		.name = TOPSTAR_LAPTOP_CLASS,
+	},
+};
+
+static int topstar_platform_init(struct topstar_laptop *topstar)
+{
+	int err;
+
+	topstar->platform = platform_device_alloc(TOPSTAR_LAPTOP_CLASS, -1);
+	if (!topstar->platform)
 		return -ENOMEM;
 
-	strcpy(acpi_device_name(device), "Topstar TPSACPI");
-	strcpy(acpi_device_class(device), ACPI_TOPSTAR_CLASS);
+	platform_set_drvdata(topstar->platform, topstar);
+
+	err = platform_device_add(topstar->platform);
+	if (err)
+		goto err_device_put;
+
+	return 0;
 
-	if (acpi_topstar_fncx_switch(device, true))
-		goto add_err;
+err_device_put:
+	platform_device_put(topstar->platform);
+	return err;
+}
+
+static void topstar_platform_exit(struct topstar_laptop *topstar)
+{
+	platform_device_unregister(topstar->platform);
+}
+
+/*
+ * ACPI
+ */
+
+static int topstar_acpi_fncx_switch(struct acpi_device *device, bool state)
+{
+	acpi_status status;
+	u64 arg = state ? 0x86 : 0x87;
 
-	if (acpi_topstar_init_hkey(tps_hkey))
-		goto add_err;
+	status = acpi_execute_simple_method(device->handle, "FNCX", arg);
+	if (ACPI_FAILURE(status)) {
+		pr_err("Unable to switch FNCX notifications\n");
+		return -ENODEV;
+	}
 
-	device->driver_data = tps_hkey;
 	return 0;
+}
 
-add_err:
-	kfree(tps_hkey);
-	return -ENODEV;
+static void topstar_acpi_notify(struct acpi_device *device, u32 event)
+{
+	struct topstar_laptop *topstar = acpi_driver_data(device);
+	static bool dup_evnt[2];
+	bool *dup;
+
+	/* 0x83 and 0x84 key events comes duplicated... */
+	if (event == 0x83 || event == 0x84) {
+		dup = &dup_evnt[event - 0x83];
+		if (*dup) {
+			*dup = false;
+			return;
+		}
+		*dup = true;
+	}
+
+	topstar_input_notify(topstar, event);
 }
 
-static int acpi_topstar_remove(struct acpi_device *device)
+static int topstar_acpi_init(struct topstar_laptop *topstar)
 {
-	struct topstar_hkey *tps_hkey = acpi_driver_data(device);
+	return topstar_acpi_fncx_switch(topstar->device, true);
+}
 
-	acpi_topstar_fncx_switch(device, false);
+static void topstar_acpi_exit(struct topstar_laptop *topstar)
+{
+	topstar_acpi_fncx_switch(topstar->device, false);
+}
 
-	input_unregister_device(tps_hkey->inputdev);
-	kfree(tps_hkey);
+/*
+ * Enable software-based WLAN LED control on systems with defective
+ * hardware switch.
+ */
+static bool led_workaround;
 
+static int dmi_led_workaround(const struct dmi_system_id *id)
+{
+	led_workaround = true;
+	return 0;
+}
+
+static const struct dmi_system_id topstar_dmi_ids[] = {
+	{
+		.callback = dmi_led_workaround,
+		.ident = "Topstar U931/RVP7",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "U931"),
+			DMI_MATCH(DMI_BOARD_VERSION, "RVP7"),
+		},
+	},
+	{}
+};
+
+static int topstar_acpi_add(struct acpi_device *device)
+{
+	struct topstar_laptop *topstar;
+	int err;
+
+	dmi_check_system(topstar_dmi_ids);
+
+	topstar = kzalloc(sizeof(struct topstar_laptop), GFP_KERNEL);
+	if (!topstar)
+		return -ENOMEM;
+
+	strcpy(acpi_device_name(device), "Topstar TPSACPI");
+	strcpy(acpi_device_class(device), TOPSTAR_LAPTOP_CLASS);
+	device->driver_data = topstar;
+	topstar->device = device;
+
+	err = topstar_acpi_init(topstar);
+	if (err)
+		goto err_free;
+
+	err = topstar_platform_init(topstar);
+	if (err)
+		goto err_acpi_exit;
+
+	err = topstar_input_init(topstar);
+	if (err)
+		goto err_platform_exit;
+
+	if (led_workaround) {
+		err = topstar_led_init(topstar);
+		if (err)
+			goto err_input_exit;
+	}
+
+	return 0;
+
+err_input_exit:
+	topstar_input_exit(topstar);
+err_platform_exit:
+	topstar_platform_exit(topstar);
+err_acpi_exit:
+	topstar_acpi_exit(topstar);
+err_free:
+	kfree(topstar);
+	return err;
+}
+
+static int topstar_acpi_remove(struct acpi_device *device)
+{
+	struct topstar_laptop *topstar = acpi_driver_data(device);
+
+	if (led_workaround)
+		topstar_led_exit(topstar);
+
+	topstar_input_exit(topstar);
+	topstar_platform_exit(topstar);
+	topstar_acpi_exit(topstar);
+
+	kfree(topstar);
 	return 0;
 }
 
@@ -168,18 +354,47 @@ static const struct acpi_device_id topstar_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, topstar_device_ids);
 
-static struct acpi_driver acpi_topstar_driver = {
+static struct acpi_driver topstar_acpi_driver = {
 	.name = "Topstar laptop ACPI driver",
-	.class = ACPI_TOPSTAR_CLASS,
+	.class = TOPSTAR_LAPTOP_CLASS,
 	.ids = topstar_device_ids,
 	.ops = {
-		.add = acpi_topstar_add,
-		.remove = acpi_topstar_remove,
-		.notify = acpi_topstar_notify,
+		.add = topstar_acpi_add,
+		.remove = topstar_acpi_remove,
+		.notify = topstar_acpi_notify,
 	},
 };
-module_acpi_driver(acpi_topstar_driver);
+
+static int __init topstar_laptop_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&topstar_platform_driver);
+	if (ret < 0)
+		return ret;
+
+	ret = acpi_bus_register_driver(&topstar_acpi_driver);
+	if (ret < 0)
+		goto err_driver_unreg;
+
+	pr_info("ACPI extras driver loaded\n");
+	return 0;
+
+err_driver_unreg:
+	platform_driver_unregister(&topstar_platform_driver);
+	return ret;
+}
+
+static void __exit topstar_laptop_exit(void)
+{
+	acpi_bus_unregister_driver(&topstar_acpi_driver);
+	platform_driver_unregister(&topstar_platform_driver);
+}
+
+module_init(topstar_laptop_init);
+module_exit(topstar_laptop_exit);
 
 MODULE_AUTHOR("Herton Ronaldo Krzesinski");
+MODULE_AUTHOR("Guillaume Douézan-Grard");
 MODULE_DESCRIPTION("Topstar Laptop ACPI Extras driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 8796211ef24a..8e3d0146ff8c 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -130,13 +130,11 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
 	uuid_le guid_input;
 	struct wmi_block *wblock;
 	struct guid_block *block;
-	struct list_head *p;
 
 	if (uuid_le_to_bin(guid_string, &guid_input))
 		return false;
 
-	list_for_each(p, &wmi_block_list) {
-		wblock = list_entry(p, struct wmi_block, list);
+	list_for_each_entry(wblock, &wmi_block_list, list) {
 		block = &wblock->gblock;
 
 		if (memcmp(block->guid, &guid_input, 16) == 0) {
@@ -519,7 +517,6 @@ wmi_notify_handler handler, void *data)
 	struct wmi_block *block;
 	acpi_status status = AE_NOT_EXIST;
 	uuid_le guid_input;
-	struct list_head *p;
 
 	if (!guid || !handler)
 		return AE_BAD_PARAMETER;
@@ -527,9 +524,8 @@ wmi_notify_handler handler, void *data)
 	if (uuid_le_to_bin(guid, &guid_input))
 		return AE_BAD_PARAMETER;
 
-	list_for_each(p, &wmi_block_list) {
+	list_for_each_entry(block, &wmi_block_list, list) {
 		acpi_status wmi_status;
-		block = list_entry(p, struct wmi_block, list);
 
 		if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
 			if (block->handler &&
@@ -560,7 +556,6 @@ acpi_status wmi_remove_notify_handler(const char *guid)
 	struct wmi_block *block;
 	acpi_status status = AE_NOT_EXIST;
 	uuid_le guid_input;
-	struct list_head *p;
 
 	if (!guid)
 		return AE_BAD_PARAMETER;
@@ -568,9 +563,8 @@ acpi_status wmi_remove_notify_handler(const char *guid)
 	if (uuid_le_to_bin(guid, &guid_input))
 		return AE_BAD_PARAMETER;
 
-	list_for_each(p, &wmi_block_list) {
+	list_for_each_entry(block, &wmi_block_list, list) {
 		acpi_status wmi_status;
-		block = list_entry(p, struct wmi_block, list);
 
 		if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
 			if (!block->handler ||
@@ -610,15 +604,13 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
 	union acpi_object params[1];
 	struct guid_block *gblock;
 	struct wmi_block *wblock;
-	struct list_head *p;
 
 	input.count = 1;
 	input.pointer = params;
 	params[0].type = ACPI_TYPE_INTEGER;
 	params[0].integer.value = event;
 
-	list_for_each(p, &wmi_block_list) {
-		wblock = list_entry(p, struct wmi_block, list);
+	list_for_each_entry(wblock, &wmi_block_list, list) {
 		gblock = &wblock->gblock;
 
 		if ((gblock->flags & ACPI_WMI_EVENT) &&
@@ -933,12 +925,11 @@ static int wmi_dev_probe(struct device *dev)
 			goto probe_failure;
 		}
 
-		buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL);
+		buf = kasprintf(GFP_KERNEL, "wmi/%s", wdriver->driver.name);
 		if (!buf) {
 			ret = -ENOMEM;
 			goto probe_string_failure;
 		}
-		sprintf(buf, "wmi/%s", wdriver->driver.name);
 		wblock->char_dev.minor = MISC_DYNAMIC_MINOR;
 		wblock->char_dev.name = buf;
 		wblock->char_dev.fops = &wmi_fops;
@@ -1261,11 +1252,9 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
 {
 	struct guid_block *block;
 	struct wmi_block *wblock;
-	struct list_head *p;
 	bool found_it = false;
 
-	list_for_each(p, &wmi_block_list) {
-		wblock = list_entry(p, struct wmi_block, list);
+	list_for_each_entry(wblock, &wmi_block_list, list) {
 		block = &wblock->gblock;
 
 		if (wblock->acpi_device->handle == handle &&
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 38d49dbbf9b7..4635cb35008c 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -200,10 +200,10 @@ config PWM_IMX
 	  will be called pwm-imx.
 
 config PWM_JZ4740
-	tristate "Ingenic JZ4740 PWM support"
-	depends on MACH_JZ4740
+	tristate "Ingenic JZ47xx PWM support"
+	depends on MACH_INGENIC
 	help
-	  Generic PWM framework driver for Ingenic JZ4740 based
+	  Generic PWM framework driver for Ingenic JZ47xx based
 	  machines.
 
 	  To compile this driver as a module, choose M here: the module
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index acd3ce8ecf3f..4fb1be246c44 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -401,7 +401,6 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 	tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
 	if (tcbpwm == NULL) {
 		err = -ENOMEM;
-		dev_err(&pdev->dev, "failed to allocate memory\n");
 		goto err_free_tc;
 	}
 
diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c
index 2ba5c3a398ff..08cbe8120588 100644
--- a/drivers/pwm/pwm-imx.c
+++ b/drivers/pwm/pwm-imx.c
@@ -35,6 +35,7 @@
 #define MX3_PWMSAR			0x0C    /* PWM Sample Register */
 #define MX3_PWMPR			0x10    /* PWM Period Register */
 #define MX3_PWMCR_PRESCALER(x)		((((x) - 1) & 0xFFF) << 4)
+#define MX3_PWMCR_STOPEN		(1 << 25)
 #define MX3_PWMCR_DOZEEN		(1 << 24)
 #define MX3_PWMCR_WAITEN		(1 << 23)
 #define MX3_PWMCR_DBGEN			(1 << 22)
@@ -210,7 +211,7 @@ static int imx_pwm_apply_v2(struct pwm_chip *chip, struct pwm_device *pwm,
 		writel(period_cycles, imx->mmio_base + MX3_PWMPR);
 
 		cr = MX3_PWMCR_PRESCALER(prescale) |
-		     MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
+		     MX3_PWMCR_STOPEN | MX3_PWMCR_DOZEEN | MX3_PWMCR_WAITEN |
 		     MX3_PWMCR_DBGEN | MX3_PWMCR_CLKSRC_IPG_HIGH |
 		     MX3_PWMCR_EN;
 
diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c
index a75ff3622450..a7b134af5e04 100644
--- a/drivers/pwm/pwm-jz4740.c
+++ b/drivers/pwm/pwm-jz4740.c
@@ -18,6 +18,7 @@
 #include <linux/gpio.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 
@@ -71,9 +72,15 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
 	uint32_t ctrl = jz4740_timer_get_ctrl(pwm->hwpwm);
 
+	/* Disable PWM output.
+	 * In TCU2 mode (channel 1/2 on JZ4750+), this must be done before the
+	 * counter is stopped, while in TCU1 mode the order does not matter.
+	 */
 	ctrl &= ~JZ_TIMER_CTRL_PWM_ENABLE;
-	jz4740_timer_disable(pwm->hwpwm);
 	jz4740_timer_set_ctrl(pwm->hwpwm, ctrl);
+
+	/* Stop counter */
+	jz4740_timer_disable(pwm->hwpwm);
 }
 
 static int jz4740_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
@@ -124,10 +131,29 @@ static int jz4740_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	return 0;
 }
 
+static int jz4740_pwm_set_polarity(struct pwm_chip *chip,
+		struct pwm_device *pwm, enum pwm_polarity polarity)
+{
+	uint32_t ctrl = jz4740_timer_get_ctrl(pwm->pwm);
+
+	switch (polarity) {
+	case PWM_POLARITY_NORMAL:
+		ctrl &= ~JZ_TIMER_CTRL_PWM_ACTIVE_LOW;
+		break;
+	case PWM_POLARITY_INVERSED:
+		ctrl |= JZ_TIMER_CTRL_PWM_ACTIVE_LOW;
+		break;
+	}
+
+	jz4740_timer_set_ctrl(pwm->hwpwm, ctrl);
+	return 0;
+}
+
 static const struct pwm_ops jz4740_pwm_ops = {
 	.request = jz4740_pwm_request,
 	.free = jz4740_pwm_free,
 	.config = jz4740_pwm_config,
+	.set_polarity = jz4740_pwm_set_polarity,
 	.enable = jz4740_pwm_enable,
 	.disable = jz4740_pwm_disable,
 	.owner = THIS_MODULE,
@@ -149,6 +175,8 @@ static int jz4740_pwm_probe(struct platform_device *pdev)
 	jz4740->chip.ops = &jz4740_pwm_ops;
 	jz4740->chip.npwm = NUM_PWM;
 	jz4740->chip.base = -1;
+	jz4740->chip.of_xlate = of_pwm_xlate_with_flags;
+	jz4740->chip.of_pwm_n_cells = 3;
 
 	platform_set_drvdata(pdev, jz4740);
 
@@ -162,9 +190,20 @@ static int jz4740_pwm_remove(struct platform_device *pdev)
 	return pwmchip_remove(&jz4740->chip);
 }
 
+#ifdef CONFIG_OF
+static const struct of_device_id jz4740_pwm_dt_ids[] = {
+	{ .compatible = "ingenic,jz4740-pwm", },
+	{ .compatible = "ingenic,jz4770-pwm", },
+	{ .compatible = "ingenic,jz4780-pwm", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, jz4740_pwm_dt_ids);
+#endif
+
 static struct platform_driver jz4740_pwm_driver = {
 	.driver = {
 		.name = "jz4740-pwm",
+		.of_match_table = of_match_ptr(jz4740_pwm_dt_ids),
 	},
 	.probe = jz4740_pwm_probe,
 	.remove = jz4740_pwm_remove,
diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
index f5d97e0ad52b..328c124773b2 100644
--- a/drivers/pwm/pwm-mediatek.c
+++ b/drivers/pwm/pwm-mediatek.c
@@ -29,7 +29,9 @@
 #define PWMGDUR			0x0c
 #define PWMWAVENUM		0x28
 #define PWMDWIDTH		0x2c
+#define PWM45DWIDTH_FIXUP	0x30
 #define PWMTHRES		0x30
+#define PWM45THRES_FIXUP	0x34
 
 #define PWM_CLK_DIV_MAX		7
 
@@ -54,6 +56,7 @@ static const char * const mtk_pwm_clk_name[MTK_CLK_MAX] = {
 
 struct mtk_pwm_platform_data {
 	unsigned int num_pwms;
+	bool pwm45_fixup;
 };
 
 /**
@@ -66,6 +69,7 @@ struct mtk_pwm_chip {
 	struct pwm_chip chip;
 	void __iomem *regs;
 	struct clk *clks[MTK_CLK_MAX];
+	const struct mtk_pwm_platform_data *soc;
 };
 
 static const unsigned int mtk_pwm_reg_offset[] = {
@@ -131,18 +135,25 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 {
 	struct mtk_pwm_chip *pc = to_mtk_pwm_chip(chip);
 	struct clk *clk = pc->clks[MTK_CLK_PWM1 + pwm->hwpwm];
-	u32 resolution, clkdiv = 0;
+	u32 clkdiv = 0, cnt_period, cnt_duty, reg_width = PWMDWIDTH,
+	    reg_thres = PWMTHRES;
+	u64 resolution;
 	int ret;
 
 	ret = mtk_pwm_clk_enable(chip, pwm);
 	if (ret < 0)
 		return ret;
 
-	resolution = NSEC_PER_SEC / clk_get_rate(clk);
+	/* Using resolution in picosecond gets accuracy higher */
+	resolution = (u64)NSEC_PER_SEC * 1000;
+	do_div(resolution, clk_get_rate(clk));
 
-	while (period_ns / resolution > 8191) {
+	cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000, resolution);
+	while (cnt_period > 8191) {
 		resolution *= 2;
 		clkdiv++;
+		cnt_period = DIV_ROUND_CLOSEST_ULL((u64)period_ns * 1000,
+						   resolution);
 	}
 
 	if (clkdiv > PWM_CLK_DIV_MAX) {
@@ -151,9 +162,19 @@ static int mtk_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 		return -EINVAL;
 	}
 
+	if (pc->soc->pwm45_fixup && pwm->hwpwm > 2) {
+		/*
+		 * PWM[4,5] has distinct offset for PWMDWIDTH and PWMTHRES
+		 * from the other PWMs on MT7623.
+		 */
+		reg_width = PWM45DWIDTH_FIXUP;
+		reg_thres = PWM45THRES_FIXUP;
+	}
+
+	cnt_duty = DIV_ROUND_CLOSEST_ULL((u64)duty_ns * 1000, resolution);
 	mtk_pwm_writel(pc, pwm->hwpwm, PWMCON, BIT(15) | clkdiv);
-	mtk_pwm_writel(pc, pwm->hwpwm, PWMDWIDTH, period_ns / resolution);
-	mtk_pwm_writel(pc, pwm->hwpwm, PWMTHRES, duty_ns / resolution);
+	mtk_pwm_writel(pc, pwm->hwpwm, reg_width, cnt_period);
+	mtk_pwm_writel(pc, pwm->hwpwm, reg_thres, cnt_duty);
 
 	mtk_pwm_clk_disable(chip, pwm);
 
@@ -211,6 +232,7 @@ static int mtk_pwm_probe(struct platform_device *pdev)
 	data = of_device_get_match_data(&pdev->dev);
 	if (data == NULL)
 		return -EINVAL;
+	pc->soc = data;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	pc->regs = devm_ioremap_resource(&pdev->dev, res);
@@ -251,14 +273,17 @@ static int mtk_pwm_remove(struct platform_device *pdev)
 
 static const struct mtk_pwm_platform_data mt2712_pwm_data = {
 	.num_pwms = 8,
+	.pwm45_fixup = false,
 };
 
 static const struct mtk_pwm_platform_data mt7622_pwm_data = {
 	.num_pwms = 6,
+	.pwm45_fixup = false,
 };
 
 static const struct mtk_pwm_platform_data mt7623_pwm_data = {
 	.num_pwms = 5,
+	.pwm45_fixup = true,
 };
 
 static const struct of_device_id mtk_pwm_of_match[] = {
@@ -280,5 +305,4 @@ static struct platform_driver mtk_pwm_driver = {
 module_platform_driver(mtk_pwm_driver);
 
 MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
-MODULE_ALIAS("platform:mtk-pwm");
 MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-puv3.c b/drivers/pwm/pwm-puv3.c
index ed6007b27585..754fd9a98f6b 100644
--- a/drivers/pwm/pwm-puv3.c
+++ b/drivers/pwm/pwm-puv3.c
@@ -107,10 +107,8 @@ static int pwm_probe(struct platform_device *pdev)
 	int ret;
 
 	puv3 = devm_kzalloc(&pdev->dev, sizeof(*puv3), GFP_KERNEL);
-	if (puv3 == NULL) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	if (!puv3)
 		return -ENOMEM;
-	}
 
 	puv3->clk = devm_clk_get(&pdev->dev, "OST_CLK");
 	if (IS_ERR(puv3->clk))
diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index 1c85ecc9e7ac..91d11f2e2fef 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -134,16 +134,12 @@ static int rcar_pwm_set_counter(struct rcar_pwm_chip *rp, int div, int duty_ns,
 
 static int rcar_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-	struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip);
-
-	return clk_prepare_enable(rp->clk);
+	return pm_runtime_get_sync(chip->dev);
 }
 
 static void rcar_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 {
-	struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip);
-
-	clk_disable_unprepare(rp->clk);
+	pm_runtime_put(chip->dev);
 }
 
 static int rcar_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
@@ -156,8 +152,12 @@ static int rcar_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	if (div < 0)
 		return div;
 
-	/* Let the core driver set pwm->period if disabled and duty_ns == 0 */
-	if (!pwm_is_enabled(pwm) && !duty_ns)
+	/*
+	 * Let the core driver set pwm->period if disabled and duty_ns == 0.
+	 * But, this driver should prevent to set the new duty_ns if current
+	 * duty_cycle is not set
+	 */
+	if (!pwm_is_enabled(pwm) && !duty_ns && !pwm->state.duty_cycle)
 		return 0;
 
 	rcar_pwm_update(rp, RCAR_PWMCR_SYNC, RCAR_PWMCR_SYNC, RCAR_PWMCR);
@@ -258,11 +258,53 @@ static const struct of_device_id rcar_pwm_of_table[] = {
 };
 MODULE_DEVICE_TABLE(of, rcar_pwm_of_table);
 
+#ifdef CONFIG_PM_SLEEP
+static struct pwm_device *rcar_pwm_dev_to_pwm_dev(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev);
+	struct pwm_chip *chip = &rcar_pwm->chip;
+
+	return &chip->pwms[0];
+}
+
+static int rcar_pwm_suspend(struct device *dev)
+{
+	struct pwm_device *pwm = rcar_pwm_dev_to_pwm_dev(dev);
+
+	if (!test_bit(PWMF_REQUESTED, &pwm->flags))
+		return 0;
+
+	pm_runtime_put(dev);
+
+	return 0;
+}
+
+static int rcar_pwm_resume(struct device *dev)
+{
+	struct pwm_device *pwm = rcar_pwm_dev_to_pwm_dev(dev);
+
+	if (!test_bit(PWMF_REQUESTED, &pwm->flags))
+		return 0;
+
+	pm_runtime_get_sync(dev);
+
+	rcar_pwm_config(pwm->chip, pwm, pwm->state.duty_cycle,
+			pwm->state.period);
+	if (pwm_is_enabled(pwm))
+		rcar_pwm_enable(pwm->chip, pwm);
+
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+static SIMPLE_DEV_PM_OPS(rcar_pwm_pm_ops, rcar_pwm_suspend, rcar_pwm_resume);
+
 static struct platform_driver rcar_pwm_driver = {
 	.probe = rcar_pwm_probe,
 	.remove = rcar_pwm_remove,
 	.driver = {
 		.name = "pwm-rcar",
+		.pm	= &rcar_pwm_pm_ops,
 		.of_match_table = of_match_ptr(rcar_pwm_of_table),
 	}
 };
diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
index 1ac9e4384142..7c13e2505080 100644
--- a/drivers/pwm/pwm-stm32-lp.c
+++ b/drivers/pwm/pwm-stm32-lp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * STM32 Low-Power Timer PWM driver
  *
@@ -5,8 +6,6 @@
  *
  * Author: Gerald Baeza <gerald.baeza@st.com>
  *
- * License terms: GNU General Public License (GPL), version 2
- *
  * Inspired by Gerald Baeza's pwm-stm32 driver
  */
 
@@ -203,6 +202,8 @@ static int stm32_pwm_lp_probe(struct platform_device *pdev)
 	priv->chip.dev = &pdev->dev;
 	priv->chip.ops = &stm32_pwm_lp_ops;
 	priv->chip.npwm = 1;
+	priv->chip.of_xlate = of_pwm_xlate_with_flags;
+	priv->chip.of_pwm_n_cells = 3;
 
 	ret = pwmchip_add(&priv->chip);
 	if (ret < 0)
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index 6139512aab7b..2708212933f7 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -1,10 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) STMicroelectronics 2016
  *
  * Author: Gerald Baeza <gerald.baeza@st.com>
  *
- * License terms: GNU General Public License (GPL), version 2
- *
  * Inspired by timer-stm32.c from Maxime Coquelin
  *             pwm-atmel.c from Bo Shen
  */
@@ -21,7 +20,7 @@
 
 struct stm32_pwm {
 	struct pwm_chip chip;
-	struct device *dev;
+	struct mutex lock; /* protect pwm config/enable */
 	struct clk *clk;
 	struct regmap *regmap;
 	u32 max_arr;
@@ -214,9 +213,23 @@ static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	return ret;
 }
 
+static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
+				  struct pwm_state *state)
+{
+	struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
+	int ret;
+
+	/* protect common prescaler for all active channels */
+	mutex_lock(&priv->lock);
+	ret = stm32_pwm_apply(chip, pwm, state);
+	mutex_unlock(&priv->lock);
+
+	return ret;
+}
+
 static const struct pwm_ops stm32pwm_ops = {
 	.owner = THIS_MODULE,
-	.apply = stm32_pwm_apply,
+	.apply = stm32_pwm_apply_locked,
 };
 
 static int stm32_pwm_set_breakinput(struct stm32_pwm *priv,
@@ -336,6 +349,7 @@ static int stm32_pwm_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
+	mutex_init(&priv->lock);
 	priv->regmap = ddata->regmap;
 	priv->clk = ddata->clk;
 	priv->max_arr = ddata->max_arr;
diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index 334199c58f1d..470d4f71e7eb 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -73,7 +73,6 @@ static const u32 prescaler_table[] = {
 
 struct sun4i_pwm_data {
 	bool has_prescaler_bypass;
-	bool has_rdy;
 	unsigned int npwm;
 };
 
@@ -117,7 +116,8 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
 
 	val = sun4i_pwm_readl(sun4i_pwm, PWM_CTRL_REG);
 
-	if ((val == PWM_PRESCAL_MASK) && sun4i_pwm->data->has_prescaler_bypass)
+	if ((PWM_REG_PRESCAL(val, pwm->hwpwm) == PWM_PRESCAL_MASK) &&
+	    sun4i_pwm->data->has_prescaler_bypass)
 		prescaler = 1;
 	else
 		prescaler = prescaler_table[PWM_REG_PRESCAL(val, pwm->hwpwm)];
@@ -130,7 +130,8 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
 	else
 		state->polarity = PWM_POLARITY_INVERSED;
 
-	if (val & BIT_CH(PWM_CLK_GATING | PWM_EN, pwm->hwpwm))
+	if ((val & BIT_CH(PWM_CLK_GATING | PWM_EN, pwm->hwpwm)) ==
+	    BIT_CH(PWM_CLK_GATING | PWM_EN, pwm->hwpwm))
 		state->enabled = true;
 	else
 		state->enabled = false;
@@ -311,52 +312,37 @@ static const struct pwm_ops sun4i_pwm_ops = {
 	.owner = THIS_MODULE,
 };
 
-static const struct sun4i_pwm_data sun4i_pwm_data_a10 = {
+static const struct sun4i_pwm_data sun4i_pwm_dual_nobypass = {
 	.has_prescaler_bypass = false,
-	.has_rdy = false,
 	.npwm = 2,
 };
 
-static const struct sun4i_pwm_data sun4i_pwm_data_a10s = {
+static const struct sun4i_pwm_data sun4i_pwm_dual_bypass = {
 	.has_prescaler_bypass = true,
-	.has_rdy = true,
 	.npwm = 2,
 };
 
-static const struct sun4i_pwm_data sun4i_pwm_data_a13 = {
+static const struct sun4i_pwm_data sun4i_pwm_single_bypass = {
 	.has_prescaler_bypass = true,
-	.has_rdy = true,
-	.npwm = 1,
-};
-
-static const struct sun4i_pwm_data sun4i_pwm_data_a20 = {
-	.has_prescaler_bypass = true,
-	.has_rdy = true,
-	.npwm = 2,
-};
-
-static const struct sun4i_pwm_data sun4i_pwm_data_h3 = {
-	.has_prescaler_bypass = true,
-	.has_rdy = true,
 	.npwm = 1,
 };
 
 static const struct of_device_id sun4i_pwm_dt_ids[] = {
 	{
 		.compatible = "allwinner,sun4i-a10-pwm",
-		.data = &sun4i_pwm_data_a10,
+		.data = &sun4i_pwm_dual_nobypass,
 	}, {
 		.compatible = "allwinner,sun5i-a10s-pwm",
-		.data = &sun4i_pwm_data_a10s,
+		.data = &sun4i_pwm_dual_bypass,
 	}, {
 		.compatible = "allwinner,sun5i-a13-pwm",
-		.data = &sun4i_pwm_data_a13,
+		.data = &sun4i_pwm_single_bypass,
 	}, {
 		.compatible = "allwinner,sun7i-a20-pwm",
-		.data = &sun4i_pwm_data_a20,
+		.data = &sun4i_pwm_dual_bypass,
 	}, {
 		.compatible = "allwinner,sun8i-h3-pwm",
-		.data = &sun4i_pwm_data_h3,
+		.data = &sun4i_pwm_single_bypass,
 	}, {
 		/* sentinel */
 	},
diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index 83f2b0b15712..7c71cdb8a9d8 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -273,7 +273,8 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
 	ret = device_register(&export->child);
 	if (ret) {
 		clear_bit(PWMF_EXPORTED, &pwm->flags);
-		kfree(export);
+		put_device(&export->child);
+		export = NULL;
 		return ret;
 	}
 
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index cfb54e01d758..9d27016c899e 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -212,7 +212,6 @@ struct mport_cdev_priv {
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 	struct dma_chan		*dmach;
 	struct list_head	async_list;
-	struct list_head	pend_list;
 	spinlock_t              req_lock;
 	struct mutex		dma_lock;
 	struct kref		dma_ref;
@@ -258,8 +257,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mport_cdev_wait);
 static struct class *dev_class;
 static dev_t dev_number;
 
-static struct workqueue_struct *dma_wq;
-
 static void mport_release_mapping(struct kref *ref);
 
 static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg,
@@ -539,6 +536,7 @@ static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg)
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 
 struct mport_dma_req {
+	struct kref refcount;
 	struct list_head node;
 	struct file *filp;
 	struct mport_cdev_priv *priv;
@@ -554,11 +552,6 @@ struct mport_dma_req {
 	struct completion req_comp;
 };
 
-struct mport_faf_work {
-	struct work_struct work;
-	struct mport_dma_req *req;
-};
-
 static void mport_release_def_dma(struct kref *dma_ref)
 {
 	struct mport_dev *md =
@@ -578,8 +571,10 @@ static void mport_release_dma(struct kref *dma_ref)
 	complete(&priv->comp);
 }
 
-static void dma_req_free(struct mport_dma_req *req)
+static void dma_req_free(struct kref *ref)
 {
+	struct mport_dma_req *req = container_of(ref, struct mport_dma_req,
+			refcount);
 	struct mport_cdev_priv *priv = req->priv;
 	unsigned int i;
 
@@ -611,30 +606,7 @@ static void dma_xfer_callback(void *param)
 	req->status = dma_async_is_tx_complete(priv->dmach, req->cookie,
 					       NULL, NULL);
 	complete(&req->req_comp);
-}
-
-static void dma_faf_cleanup(struct work_struct *_work)
-{
-	struct mport_faf_work *work = container_of(_work,
-						struct mport_faf_work, work);
-	struct mport_dma_req *req = work->req;
-
-	dma_req_free(req);
-	kfree(work);
-}
-
-static void dma_faf_callback(void *param)
-{
-	struct mport_dma_req *req = (struct mport_dma_req *)param;
-	struct mport_faf_work *work;
-
-	work = kmalloc(sizeof(*work), GFP_ATOMIC);
-	if (!work)
-		return;
-
-	INIT_WORK(&work->work, dma_faf_cleanup);
-	work->req = req;
-	queue_work(dma_wq, &work->work);
+	kref_put(&req->refcount, dma_req_free);
 }
 
 /*
@@ -765,16 +737,14 @@ static int do_dma_request(struct mport_dma_req *req,
 		goto err_out;
 	}
 
-	if (sync == RIO_TRANSFER_FAF)
-		tx->callback = dma_faf_callback;
-	else
-		tx->callback = dma_xfer_callback;
+	tx->callback = dma_xfer_callback;
 	tx->callback_param = req;
 
 	req->dmach = chan;
 	req->sync = sync;
 	req->status = DMA_IN_PROGRESS;
 	init_completion(&req->req_comp);
+	kref_get(&req->refcount);
 
 	cookie = dmaengine_submit(tx);
 	req->cookie = cookie;
@@ -785,6 +755,7 @@ static int do_dma_request(struct mport_dma_req *req,
 	if (dma_submit_error(cookie)) {
 		rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)",
 			   cookie, xfer->rio_addr, xfer->length);
+		kref_put(&req->refcount, dma_req_free);
 		ret = -EIO;
 		goto err_out;
 	}
@@ -860,6 +831,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
 	if (!req)
 		return -ENOMEM;
 
+	kref_init(&req->refcount);
+
 	ret = get_dma_channel(priv);
 	if (ret) {
 		kfree(req);
@@ -968,42 +941,20 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
 	ret = do_dma_request(req, xfer, sync, nents);
 
 	if (ret >= 0) {
-		if (sync == RIO_TRANSFER_SYNC)
-			goto sync_out;
-		return ret; /* return ASYNC cookie */
-	}
-
-	if (ret == -ETIMEDOUT || ret == -EINTR) {
-		/*
-		 * This can happen only in case of SYNC transfer.
-		 * Do not free unfinished request structure immediately.
-		 * Place it into pending list and deal with it later
-		 */
-		spin_lock(&priv->req_lock);
-		list_add_tail(&req->node, &priv->pend_list);
-		spin_unlock(&priv->req_lock);
-		return ret;
+		if (sync == RIO_TRANSFER_ASYNC)
+			return ret; /* return ASYNC cookie */
+	} else {
+		rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
 	}
 
-
-	rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
-sync_out:
-	dma_unmap_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
-	sg_free_table(&req->sgt);
 err_pg:
-	if (page_list) {
+	if (!req->page_list) {
 		for (i = 0; i < nr_pages; i++)
 			put_page(page_list[i]);
 		kfree(page_list);
 	}
 err_req:
-	if (req->map) {
-		mutex_lock(&md->buf_mutex);
-		kref_put(&req->map->ref, mport_release_mapping);
-		mutex_unlock(&md->buf_mutex);
-	}
-	put_dma_channel(priv);
-	kfree(req);
+	kref_put(&req->refcount, dma_req_free);
 	return ret;
 }
 
@@ -1121,7 +1072,7 @@ static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
 		ret = 0;
 
 	if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED)
-		dma_req_free(req);
+		kref_put(&req->refcount, dma_req_free);
 
 	return ret;
 
@@ -1966,7 +1917,6 @@ static int mport_cdev_open(struct inode *inode, struct file *filp)
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 	INIT_LIST_HEAD(&priv->async_list);
-	INIT_LIST_HEAD(&priv->pend_list);
 	spin_lock_init(&priv->req_lock);
 	mutex_init(&priv->dma_lock);
 #endif
@@ -2006,8 +1956,6 @@ static void mport_cdev_release_dma(struct file *filp)
 
 	md = priv->md;
 
-	flush_workqueue(dma_wq);
-
 	spin_lock(&priv->req_lock);
 	if (!list_empty(&priv->async_list)) {
 		rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)",
@@ -2023,20 +1971,7 @@ static void mport_cdev_release_dma(struct file *filp)
 				   req->filp, req->cookie,
 				   completion_done(&req->req_comp)?"yes":"no");
 			list_del(&req->node);
-			dma_req_free(req);
-		}
-	}
-
-	if (!list_empty(&priv->pend_list)) {
-		rmcd_debug(EXIT, "Free pending DMA requests for filp=%p %s(%d)",
-			   filp, current->comm, task_pid_nr(current));
-		list_for_each_entry_safe(req,
-					 req_next, &priv->pend_list, node) {
-			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
-				   req->filp, req->cookie,
-				   completion_done(&req->req_comp)?"yes":"no");
-			list_del(&req->node);
-			dma_req_free(req);
+			kref_put(&req->refcount, dma_req_free);
 		}
 	}
 
@@ -2048,15 +1983,6 @@ static void mport_cdev_release_dma(struct file *filp)
 			current->comm, task_pid_nr(current), wret);
 	}
 
-	spin_lock(&priv->req_lock);
-
-	if (!list_empty(&priv->pend_list)) {
-		rmcd_debug(EXIT, "ATTN: pending DMA requests, filp=%p %s(%d)",
-			   filp, current->comm, task_pid_nr(current));
-	}
-
-	spin_unlock(&priv->req_lock);
-
 	if (priv->dmach != priv->md->dma_chan) {
 		rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
 			   filp, current->comm, task_pid_nr(current));
@@ -2573,8 +2499,6 @@ static void mport_cdev_remove(struct mport_dev *md)
 	cdev_device_del(&md->cdev, &md->dev);
 	mport_cdev_kill_fasync(md);
 
-	flush_workqueue(dma_wq);
-
 	/* TODO: do we need to give clients some time to close file
 	 * descriptors? Simple wait for XX, or kref?
 	 */
@@ -2691,17 +2615,8 @@ static int __init mport_init(void)
 		goto err_cli;
 	}
 
-	dma_wq = create_singlethread_workqueue("dma_wq");
-	if (!dma_wq) {
-		rmcd_error("failed to create DMA work queue");
-		ret = -ENOMEM;
-		goto err_wq;
-	}
-
 	return 0;
 
-err_wq:
-	class_interface_unregister(&rio_mport_interface);
 err_cli:
 	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
 err_chr:
@@ -2717,7 +2632,6 @@ static void __exit mport_exit(void)
 	class_interface_unregister(&rio_mport_interface);
 	class_destroy(dev_class);
 	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
-	destroy_workqueue(dma_wq);
 }
 
 module_init(mport_init);
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 23429bdaca84..161b927d9de1 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -76,7 +76,7 @@ static u16 rio_destid_alloc(struct rio_net *net)
 }
 
 /**
- * rio_destid_reserve - Reserve the specivied destID
+ * rio_destid_reserve - Reserve the specified destID
  * @net: RIO network
  * @destid: destID to reserve
  *
@@ -885,7 +885,7 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
  *
  * For each enumerated device, ensure that each switch in a system
  * has correct routing entries. Add routes for devices that where
- * unknown dirung the first enumeration pass through the switch.
+ * unknown during the first enumeration pass through the switch.
  */
 static void rio_update_route_tables(struct rio_net *net)
 {
@@ -983,7 +983,7 @@ static int rio_enum_mport(struct rio_mport *mport, u32 flags)
 		/* reserve mport destID in new net */
 		rio_destid_reserve(net, mport->host_deviceid);
 
-		/* Enable Input Output Port (transmitter reviever) */
+		/* Enable Input Output Port (transmitter receiver) */
 		rio_enable_rx_tx_port(mport, 1, 0, 0, 0);
 
 		/* Set component tag for host */
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index b609e1d3654b..027274008b08 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -6,6 +6,7 @@ config REMOTEPROC
 	select CRC32
 	select FW_LOADER
 	select VIRTIO
+	select WANT_DEV_COREDUMP
 	help
 	  Support for remote processors (such as DSP coprocessors). These
 	  are mainly used on embedded systems.
@@ -90,6 +91,7 @@ config QCOM_ADSP_PIL
 	depends on QCOM_SMEM
 	depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
 	depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
+	depends on QCOM_SYSMON || QCOM_SYSMON=n
 	select MFD_SYSCON
 	select QCOM_MDT_LOADER
 	select QCOM_RPROC_COMMON
@@ -107,6 +109,7 @@ config QCOM_Q6V5_PIL
 	depends on QCOM_SMEM
 	depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
 	depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
+	depends on QCOM_SYSMON || QCOM_SYSMON=n
 	select MFD_SYSCON
 	select QCOM_RPROC_COMMON
 	select QCOM_SCM
@@ -114,12 +117,28 @@ config QCOM_Q6V5_PIL
 	  Say y here to support the Qualcomm Peripherial Image Loader for the
 	  Hexagon V5 based remote processors.
 
+config QCOM_SYSMON
+	tristate "Qualcomm sysmon driver"
+	depends on RPMSG
+	depends on ARCH_QCOM
+	depends on NET
+	select QCOM_QMI_HELPERS
+	help
+	  The sysmon driver implements a sysmon QMI client and a handler for
+	  the sys_mon SMD and GLINK channel, which are used for graceful
+	  shutdown, retrieving failure information and propagating information
+	  about other subsystems being shut down.
+
+	  Say y here if your system runs firmware on any other subsystems, e.g.
+	  modem or DSP.
+
 config QCOM_WCNSS_PIL
 	tristate "Qualcomm WCNSS Peripheral Image Loader"
 	depends on OF && ARCH_QCOM
 	depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
 	depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
 	depends on QCOM_SMEM
+	depends on QCOM_SYSMON || QCOM_SYSMON=n
 	select QCOM_MDT_LOADER
 	select QCOM_RPROC_COMMON
 	select QCOM_SCM
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index 6e16450ce11f..02627ede8d4a 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_KEYSTONE_REMOTEPROC)	+= keystone_remoteproc.o
 obj-$(CONFIG_QCOM_ADSP_PIL)		+= qcom_adsp_pil.o
 obj-$(CONFIG_QCOM_RPROC_COMMON)		+= qcom_common.o
 obj-$(CONFIG_QCOM_Q6V5_PIL)		+= qcom_q6v5_pil.o
+obj-$(CONFIG_QCOM_SYSMON)		+= qcom_sysmon.o
 obj-$(CONFIG_QCOM_WCNSS_PIL)		+= qcom_wcnss_pil.o
 qcom_wcnss_pil-y			+= qcom_wcnss.o
 qcom_wcnss_pil-y			+= qcom_wcnss_iris.o
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 633268e9d550..54c07fd3f204 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -333,14 +333,14 @@ static int imx_rproc_probe(struct platform_device *pdev)
 	/* set some other name then imx */
 	rproc = rproc_alloc(dev, "imx-rproc", &imx_rproc_ops,
 			    NULL, sizeof(*priv));
-	if (!rproc) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!rproc)
+		return -ENOMEM;
 
 	dcfg = of_device_get_match_data(dev);
-	if (!dcfg)
-		return -EINVAL;
+	if (!dcfg) {
+		ret = -EINVAL;
+		goto err_put_rproc;
+	}
 
 	priv = rproc->priv;
 	priv->rproc = rproc;
@@ -359,8 +359,8 @@ static int imx_rproc_probe(struct platform_device *pdev)
 	priv->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(priv->clk)) {
 		dev_err(dev, "Failed to get clock\n");
-		rproc_free(rproc);
-		return PTR_ERR(priv->clk);
+		ret = PTR_ERR(priv->clk);
+		goto err_put_rproc;
 	}
 
 	/*
@@ -370,8 +370,7 @@ static int imx_rproc_probe(struct platform_device *pdev)
 	ret = clk_prepare_enable(priv->clk);
 	if (ret) {
 		dev_err(&rproc->dev, "Failed to enable clock\n");
-		rproc_free(rproc);
-		return ret;
+		goto err_put_rproc;
 	}
 
 	ret = rproc_add(rproc);
@@ -380,13 +379,13 @@ static int imx_rproc_probe(struct platform_device *pdev)
 		goto err_put_clk;
 	}
 
-	return ret;
+	return 0;
 
 err_put_clk:
 	clk_disable_unprepare(priv->clk);
 err_put_rproc:
 	rproc_free(rproc);
-err:
+
 	return ret;
 }
 
diff --git a/drivers/remoteproc/qcom_adsp_pil.c b/drivers/remoteproc/qcom_adsp_pil.c
index 373c167892d7..89a86ce07f99 100644
--- a/drivers/remoteproc/qcom_adsp_pil.c
+++ b/drivers/remoteproc/qcom_adsp_pil.c
@@ -38,7 +38,10 @@ struct adsp_data {
 	const char *firmware_name;
 	int pas_id;
 	bool has_aggre2_clk;
+
 	const char *ssr_name;
+	const char *sysmon_name;
+	int ssctl_id;
 };
 
 struct qcom_adsp {
@@ -75,6 +78,7 @@ struct qcom_adsp {
 	struct qcom_rproc_glink glink_subdev;
 	struct qcom_rproc_subdev smd_subdev;
 	struct qcom_rproc_ssr ssr_subdev;
+	struct qcom_sysmon *sysmon;
 };
 
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
@@ -82,7 +86,9 @@ static int adsp_load(struct rproc *rproc, const struct firmware *fw)
 	struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
 
 	return qcom_mdt_load(adsp->dev, fw, rproc->firmware, adsp->pas_id,
-			     adsp->mem_region, adsp->mem_phys, adsp->mem_size);
+			     adsp->mem_region, adsp->mem_phys, adsp->mem_size,
+			     &adsp->mem_reloc);
+
 }
 
 static int adsp_start(struct rproc *rproc)
@@ -177,6 +183,7 @@ static const struct rproc_ops adsp_ops = {
 	.start = adsp_start,
 	.stop = adsp_stop,
 	.da_to_va = adsp_da_to_va,
+	.parse_fw = qcom_register_dump_segments,
 	.load = adsp_load,
 };
 
@@ -201,9 +208,6 @@ static irqreturn_t adsp_fatal_interrupt(int irq, void *dev)
 
 	rproc_report_crash(adsp->rproc, RPROC_FATAL_ERROR);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -398,6 +402,9 @@ static int adsp_probe(struct platform_device *pdev)
 	qcom_add_glink_subdev(rproc, &adsp->glink_subdev);
 	qcom_add_smd_subdev(rproc, &adsp->smd_subdev);
 	qcom_add_ssr_subdev(rproc, &adsp->ssr_subdev, desc->ssr_name);
+	adsp->sysmon = qcom_add_sysmon_subdev(rproc,
+					      desc->sysmon_name,
+					      desc->ssctl_id);
 
 	ret = rproc_add(rproc);
 	if (ret)
@@ -419,6 +426,7 @@ static int adsp_remove(struct platform_device *pdev)
 	rproc_del(adsp->rproc);
 
 	qcom_remove_glink_subdev(adsp->rproc, &adsp->glink_subdev);
+	qcom_remove_sysmon_subdev(adsp->sysmon);
 	qcom_remove_smd_subdev(adsp->rproc, &adsp->smd_subdev);
 	qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev);
 	rproc_free(adsp->rproc);
@@ -432,6 +440,8 @@ static const struct adsp_data adsp_resource_init = {
 		.pas_id = 1,
 		.has_aggre2_clk = false,
 		.ssr_name = "lpass",
+		.sysmon_name = "adsp",
+		.ssctl_id = 0x14,
 };
 
 static const struct adsp_data slpi_resource_init = {
@@ -440,6 +450,8 @@ static const struct adsp_data slpi_resource_init = {
 		.pas_id = 12,
 		.has_aggre2_clk = true,
 		.ssr_name = "dsps",
+		.sysmon_name = "slpi",
+		.ssctl_id = 0x16,
 };
 
 static const struct of_device_id adsp_of_match[] = {
diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index 00602499713f..acfc99f82fb8 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -22,6 +22,7 @@
 #include <linux/remoteproc.h>
 #include <linux/rpmsg/qcom_glink.h>
 #include <linux/rpmsg/qcom_smd.h>
+#include <linux/soc/qcom/mdt_loader.h>
 
 #include "remoteproc_internal.h"
 #include "qcom_common.h"
@@ -41,7 +42,7 @@ static int glink_subdev_probe(struct rproc_subdev *subdev)
 	return PTR_ERR_OR_ZERO(glink->edge);
 }
 
-static void glink_subdev_remove(struct rproc_subdev *subdev)
+static void glink_subdev_remove(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_glink *glink = to_glink_subdev(subdev);
 
@@ -74,11 +75,57 @@ EXPORT_SYMBOL_GPL(qcom_add_glink_subdev);
  */
 void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink)
 {
+	if (!glink->node)
+		return;
+
 	rproc_remove_subdev(rproc, &glink->subdev);
 	of_node_put(glink->node);
 }
 EXPORT_SYMBOL_GPL(qcom_remove_glink_subdev);
 
+/**
+ * qcom_register_dump_segments() - register segments for coredump
+ * @rproc:	remoteproc handle
+ * @fw:		firmware header
+ *
+ * Register all segments of the ELF in the remoteproc coredump segment list
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int qcom_register_dump_segments(struct rproc *rproc,
+				const struct firmware *fw)
+{
+	const struct elf32_phdr *phdrs;
+	const struct elf32_phdr *phdr;
+	const struct elf32_hdr *ehdr;
+	int ret;
+	int i;
+
+	ehdr = (struct elf32_hdr *)fw->data;
+	phdrs = (struct elf32_phdr *)(ehdr + 1);
+
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		phdr = &phdrs[i];
+
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		if ((phdr->p_flags & QCOM_MDT_TYPE_MASK) == QCOM_MDT_TYPE_HASH)
+			continue;
+
+		if (!phdr->p_memsz)
+			continue;
+
+		ret = rproc_coredump_add_segment(rproc, phdr->p_paddr,
+						 phdr->p_memsz);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(qcom_register_dump_segments);
+
 static int smd_subdev_probe(struct rproc_subdev *subdev)
 {
 	struct qcom_rproc_subdev *smd = to_smd_subdev(subdev);
@@ -88,7 +135,7 @@ static int smd_subdev_probe(struct rproc_subdev *subdev)
 	return PTR_ERR_OR_ZERO(smd->edge);
 }
 
-static void smd_subdev_remove(struct rproc_subdev *subdev)
+static void smd_subdev_remove(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_subdev *smd = to_smd_subdev(subdev);
 
@@ -121,6 +168,9 @@ EXPORT_SYMBOL_GPL(qcom_add_smd_subdev);
  */
 void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd)
 {
+	if (!smd->node)
+		return;
+
 	rproc_remove_subdev(rproc, &smd->subdev);
 	of_node_put(smd->node);
 }
@@ -157,7 +207,7 @@ static int ssr_notify_start(struct rproc_subdev *subdev)
 	return  0;
 }
 
-static void ssr_notify_stop(struct rproc_subdev *subdev)
+static void ssr_notify_stop(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
 
diff --git a/drivers/remoteproc/qcom_common.h b/drivers/remoteproc/qcom_common.h
index 728be9834d8b..58de71e4781c 100644
--- a/drivers/remoteproc/qcom_common.h
+++ b/drivers/remoteproc/qcom_common.h
@@ -4,6 +4,9 @@
 
 #include <linux/remoteproc.h>
 #include "remoteproc_internal.h"
+#include <linux/soc/qcom/qmi.h>
+
+struct qcom_sysmon;
 
 struct qcom_rproc_glink {
 	struct rproc_subdev subdev;
@@ -30,6 +33,8 @@ struct qcom_rproc_ssr {
 void qcom_add_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink);
 void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink);
 
+int qcom_register_dump_segments(struct rproc *rproc, const struct firmware *fw);
+
 void qcom_add_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd);
 void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd);
 
@@ -37,4 +42,22 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
 			 const char *ssr_name);
 void qcom_remove_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr);
 
+#if IS_ENABLED(CONFIG_QCOM_SYSMON)
+struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+					   const char *name,
+					   int ssctl_instance);
+void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon);
+#else
+static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+							 const char *name,
+							 int ssctl_instance)
+{
+	return NULL;
+}
+
+static inline void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
+{
+}
+#endif
+
 #endif
diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c
index b4e5e725848d..8e70a627e0bb 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -168,6 +168,7 @@ struct q6v5 {
 
 	struct qcom_rproc_subdev smd_subdev;
 	struct qcom_rproc_ssr ssr_subdev;
+	struct qcom_sysmon *sysmon;
 	bool need_mem_protection;
 	int mpss_perm;
 	int mba_perm;
@@ -939,9 +940,6 @@ static irqreturn_t q6v5_wdog_interrupt(int irq, void *dev)
 
 	rproc_report_crash(qproc->rproc, RPROC_WATCHDOG);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -959,9 +957,6 @@ static irqreturn_t q6v5_fatal_interrupt(int irq, void *dev)
 
 	rproc_report_crash(qproc->rproc, RPROC_FATAL_ERROR);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -1215,6 +1210,7 @@ static int q6v5_probe(struct platform_device *pdev)
 	qproc->mba_perm = BIT(QCOM_SCM_VMID_HLOS);
 	qcom_add_smd_subdev(rproc, &qproc->smd_subdev);
 	qcom_add_ssr_subdev(rproc, &qproc->ssr_subdev, "mpss");
+	qproc->sysmon = qcom_add_sysmon_subdev(rproc, "modem", 0x12);
 
 	ret = rproc_add(rproc);
 	if (ret)
@@ -1234,6 +1230,7 @@ static int q6v5_remove(struct platform_device *pdev)
 
 	rproc_del(qproc->rproc);
 
+	qcom_remove_sysmon_subdev(qproc->sysmon);
 	qcom_remove_smd_subdev(qproc->rproc, &qproc->smd_subdev);
 	qcom_remove_ssr_subdev(qproc->rproc, &qproc->ssr_subdev);
 	rproc_free(qproc->rproc);
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
new file mode 100644
index 000000000000..f085545d7da5
--- /dev/null
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017, Linaro Ltd.
+ */
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/notifier.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/remoteproc/qcom_rproc.h>
+#include <linux/rpmsg.h>
+
+#include "qcom_common.h"
+
+static BLOCKING_NOTIFIER_HEAD(sysmon_notifiers);
+
+struct qcom_sysmon {
+	struct rproc_subdev subdev;
+	struct rproc *rproc;
+
+	struct list_head node;
+
+	const char *name;
+
+	int ssctl_version;
+	int ssctl_instance;
+
+	struct notifier_block nb;
+
+	struct device *dev;
+
+	struct rpmsg_endpoint *ept;
+	struct completion comp;
+	struct mutex lock;
+
+	bool ssr_ack;
+
+	struct qmi_handle qmi;
+	struct sockaddr_qrtr ssctl;
+};
+
+static DEFINE_MUTEX(sysmon_lock);
+static LIST_HEAD(sysmon_list);
+
+/**
+ * sysmon_send_event() - send notification of other remote's SSR event
+ * @sysmon:	sysmon context
+ * @name:	other remote's name
+ */
+static void sysmon_send_event(struct qcom_sysmon *sysmon, const char *name)
+{
+	char req[50];
+	int len;
+	int ret;
+
+	len = snprintf(req, sizeof(req), "ssr:%s:before_shutdown", name);
+	if (len >= sizeof(req))
+		return;
+
+	mutex_lock(&sysmon->lock);
+	reinit_completion(&sysmon->comp);
+	sysmon->ssr_ack = false;
+
+	ret = rpmsg_send(sysmon->ept, req, len);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to send sysmon event\n");
+		goto out_unlock;
+	}
+
+	ret = wait_for_completion_timeout(&sysmon->comp,
+					  msecs_to_jiffies(5000));
+	if (!ret) {
+		dev_err(sysmon->dev, "timeout waiting for sysmon ack\n");
+		goto out_unlock;
+	}
+
+	if (!sysmon->ssr_ack)
+		dev_err(sysmon->dev, "unexpected response to sysmon event\n");
+
+out_unlock:
+	mutex_unlock(&sysmon->lock);
+}
+
+/**
+ * sysmon_request_shutdown() - request graceful shutdown of remote
+ * @sysmon:	sysmon context
+ */
+static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
+{
+	char *req = "ssr:shutdown";
+	int ret;
+
+	mutex_lock(&sysmon->lock);
+	reinit_completion(&sysmon->comp);
+	sysmon->ssr_ack = false;
+
+	ret = rpmsg_send(sysmon->ept, req, strlen(req) + 1);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "send sysmon shutdown request failed\n");
+		goto out_unlock;
+	}
+
+	ret = wait_for_completion_timeout(&sysmon->comp,
+					  msecs_to_jiffies(5000));
+	if (!ret) {
+		dev_err(sysmon->dev, "timeout waiting for sysmon ack\n");
+		goto out_unlock;
+	}
+
+	if (!sysmon->ssr_ack)
+		dev_err(sysmon->dev,
+			"unexpected response to sysmon shutdown request\n");
+
+out_unlock:
+	mutex_unlock(&sysmon->lock);
+}
+
+static int sysmon_callback(struct rpmsg_device *rpdev, void *data, int count,
+			   void *priv, u32 addr)
+{
+	struct qcom_sysmon *sysmon = priv;
+	const char *ssr_ack = "ssr:ack";
+	const int ssr_ack_len = strlen(ssr_ack) + 1;
+
+	if (!sysmon)
+		return -EINVAL;
+
+	if (count >= ssr_ack_len && !memcmp(data, ssr_ack, ssr_ack_len))
+		sysmon->ssr_ack = true;
+
+	complete(&sysmon->comp);
+
+	return 0;
+}
+
+#define SSCTL_SHUTDOWN_REQ		0x21
+#define SSCTL_SUBSYS_EVENT_REQ		0x23
+
+#define SSCTL_MAX_MSG_LEN		7
+
+#define SSCTL_SUBSYS_NAME_LENGTH	15
+
+enum {
+	SSCTL_SSR_EVENT_BEFORE_POWERUP,
+	SSCTL_SSR_EVENT_AFTER_POWERUP,
+	SSCTL_SSR_EVENT_BEFORE_SHUTDOWN,
+	SSCTL_SSR_EVENT_AFTER_SHUTDOWN,
+};
+
+enum {
+	SSCTL_SSR_EVENT_FORCED,
+	SSCTL_SSR_EVENT_GRACEFUL,
+};
+
+struct ssctl_shutdown_resp {
+	struct qmi_response_type_v01 resp;
+};
+
+static struct qmi_elem_info ssctl_shutdown_resp_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x02,
+		.offset		= offsetof(struct ssctl_shutdown_resp, resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{}
+};
+
+struct ssctl_subsys_event_req {
+	u8 subsys_name_len;
+	char subsys_name[SSCTL_SUBSYS_NAME_LENGTH];
+	u32 event;
+	u8 evt_driven_valid;
+	u32 evt_driven;
+};
+
+static struct qmi_elem_info ssctl_subsys_event_req_ei[] = {
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint8_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x01,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   subsys_name_len),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= SSCTL_SUBSYS_NAME_LENGTH,
+		.elem_size	= sizeof(char),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= 0x01,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   subsys_name),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint32_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x02,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   event),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint8_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x10,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   evt_driven_valid),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint32_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x10,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   evt_driven),
+		.ei_array	= NULL,
+	},
+	{}
+};
+
+struct ssctl_subsys_event_resp {
+	struct qmi_response_type_v01 resp;
+};
+
+static struct qmi_elem_info ssctl_subsys_event_resp_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x02,
+		.offset		= offsetof(struct ssctl_subsys_event_resp,
+					   resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{}
+};
+
+/**
+ * ssctl_request_shutdown() - request shutdown via SSCTL QMI service
+ * @sysmon:	sysmon context
+ */
+static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
+{
+	struct ssctl_shutdown_resp resp;
+	struct qmi_txn txn;
+	int ret;
+
+	ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_shutdown_resp_ei, &resp);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to allocate QMI txn\n");
+		return;
+	}
+
+	ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
+			       SSCTL_SHUTDOWN_REQ, 0, NULL, NULL);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to send shutdown request\n");
+		qmi_txn_cancel(&txn);
+		return;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0)
+		dev_err(sysmon->dev, "failed receiving QMI response\n");
+	else if (resp.resp.result)
+		dev_err(sysmon->dev, "shutdown request failed\n");
+	else
+		dev_dbg(sysmon->dev, "shutdown request completed\n");
+}
+
+/**
+ * ssctl_send_event() - send notification of other remote's SSR event
+ * @sysmon:	sysmon context
+ * @name:	other remote's name
+ */
+static void ssctl_send_event(struct qcom_sysmon *sysmon, const char *name)
+{
+	struct ssctl_subsys_event_resp resp;
+	struct ssctl_subsys_event_req req;
+	struct qmi_txn txn;
+	int ret;
+
+	memset(&resp, 0, sizeof(resp));
+	ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_subsys_event_resp_ei, &resp);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to allocate QMI txn\n");
+		return;
+	}
+
+	memset(&req, 0, sizeof(req));
+	strlcpy(req.subsys_name, name, sizeof(req.subsys_name));
+	req.subsys_name_len = strlen(req.subsys_name);
+	req.event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
+	req.evt_driven_valid = true;
+	req.evt_driven = SSCTL_SSR_EVENT_FORCED;
+
+	ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
+			       SSCTL_SUBSYS_EVENT_REQ, 40,
+			       ssctl_subsys_event_req_ei, &req);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to send shutdown request\n");
+		qmi_txn_cancel(&txn);
+		return;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0)
+		dev_err(sysmon->dev, "failed receiving QMI response\n");
+	else if (resp.resp.result)
+		dev_err(sysmon->dev, "ssr event send failed\n");
+	else
+		dev_dbg(sysmon->dev, "ssr event send completed\n");
+}
+
+/**
+ * ssctl_new_server() - QMI callback indicating a new service
+ * @qmi:	QMI handle
+ * @svc:	service information
+ *
+ * Return: 0 if we're interested in this service, -EINVAL otherwise.
+ */
+static int ssctl_new_server(struct qmi_handle *qmi, struct qmi_service *svc)
+{
+	struct qcom_sysmon *sysmon = container_of(qmi, struct qcom_sysmon, qmi);
+
+	switch (svc->version) {
+	case 1:
+		if (svc->instance != 0)
+			return -EINVAL;
+		if (strcmp(sysmon->name, "modem"))
+			return -EINVAL;
+		break;
+	case 2:
+		if (svc->instance != sysmon->ssctl_instance)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	};
+
+	sysmon->ssctl_version = svc->version;
+
+	sysmon->ssctl.sq_family = AF_QIPCRTR;
+	sysmon->ssctl.sq_node = svc->node;
+	sysmon->ssctl.sq_port = svc->port;
+
+	svc->priv = sysmon;
+
+	return 0;
+}
+
+/**
+ * ssctl_del_server() - QMI callback indicating that @svc is removed
+ * @qmi:	QMI handle
+ * @svc:	service information
+ */
+static void ssctl_del_server(struct qmi_handle *qmi, struct qmi_service *svc)
+{
+	struct qcom_sysmon *sysmon = svc->priv;
+
+	sysmon->ssctl_version = 0;
+}
+
+static const struct qmi_ops ssctl_ops = {
+	.new_server = ssctl_new_server,
+	.del_server = ssctl_del_server,
+};
+
+static int sysmon_start(struct rproc_subdev *subdev)
+{
+	return 0;
+}
+
+static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
+{
+	struct qcom_sysmon *sysmon = container_of(subdev, struct qcom_sysmon, subdev);
+
+	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)sysmon->name);
+
+	/* Don't request graceful shutdown if we've crashed */
+	if (crashed)
+		return;
+
+	if (sysmon->ssctl_version)
+		ssctl_request_shutdown(sysmon);
+	else if (sysmon->ept)
+		sysmon_request_shutdown(sysmon);
+}
+
+/**
+ * sysmon_notify() - notify sysmon target of another's SSR
+ * @nb:		notifier_block associated with sysmon instance
+ * @event:	unused
+ * @data:	SSR identifier of the remote that is going down
+ */
+static int sysmon_notify(struct notifier_block *nb, unsigned long event,
+			 void *data)
+{
+	struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb);
+	struct rproc *rproc = sysmon->rproc;
+	const char *ssr_name = data;
+
+	/* Skip non-running rprocs and the originating instance */
+	if (rproc->state != RPROC_RUNNING || !strcmp(data, sysmon->name)) {
+		dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
+		return NOTIFY_DONE;
+	}
+
+	/* Only SSCTL version 2 supports SSR events */
+	if (sysmon->ssctl_version == 2)
+		ssctl_send_event(sysmon, ssr_name);
+	else if (sysmon->ept)
+		sysmon_send_event(sysmon, ssr_name);
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * qcom_add_sysmon_subdev() - create a sysmon subdev for the given remoteproc
+ * @rproc:	rproc context to associate the subdev with
+ * @name:	name of this subdev, to use in SSR
+ * @ssctl_instance: instance id of the ssctl QMI service
+ *
+ * Return: A new qcom_sysmon object, or NULL on failure
+ */
+struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+					   const char *name,
+					   int ssctl_instance)
+{
+	struct qcom_sysmon *sysmon;
+	int ret;
+
+	sysmon = kzalloc(sizeof(*sysmon), GFP_KERNEL);
+	if (!sysmon)
+		return NULL;
+
+	sysmon->dev = rproc->dev.parent;
+	sysmon->rproc = rproc;
+
+	sysmon->name = name;
+	sysmon->ssctl_instance = ssctl_instance;
+
+	init_completion(&sysmon->comp);
+	mutex_init(&sysmon->lock);
+
+	ret = qmi_handle_init(&sysmon->qmi, SSCTL_MAX_MSG_LEN, &ssctl_ops, NULL);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to initialize qmi handle\n");
+		kfree(sysmon);
+		return NULL;
+	}
+
+	qmi_add_lookup(&sysmon->qmi, 43, 0, 0);
+
+	rproc_add_subdev(rproc, &sysmon->subdev, sysmon_start, sysmon_stop);
+
+	sysmon->nb.notifier_call = sysmon_notify;
+	blocking_notifier_chain_register(&sysmon_notifiers, &sysmon->nb);
+
+	mutex_lock(&sysmon_lock);
+	list_add(&sysmon->node, &sysmon_list);
+	mutex_unlock(&sysmon_lock);
+
+	return sysmon;
+}
+EXPORT_SYMBOL_GPL(qcom_add_sysmon_subdev);
+
+/**
+ * qcom_remove_sysmon_subdev() - release a qcom_sysmon
+ * @sysmon:	sysmon context, as retrieved by qcom_add_sysmon_subdev()
+ */
+void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
+{
+	if (!sysmon)
+		return;
+
+	mutex_lock(&sysmon_lock);
+	list_del(&sysmon->node);
+	mutex_unlock(&sysmon_lock);
+
+	blocking_notifier_chain_unregister(&sysmon_notifiers, &sysmon->nb);
+
+	rproc_remove_subdev(sysmon->rproc, &sysmon->subdev);
+
+	qmi_handle_release(&sysmon->qmi);
+
+	kfree(sysmon);
+}
+EXPORT_SYMBOL_GPL(qcom_remove_sysmon_subdev);
+
+/**
+ * sysmon_probe() - probe sys_mon channel
+ * @rpdev:	rpmsg device handle
+ *
+ * Find the sysmon context associated with the ancestor remoteproc and assign
+ * this rpmsg device with said sysmon context.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int sysmon_probe(struct rpmsg_device *rpdev)
+{
+	struct qcom_sysmon *sysmon;
+	struct rproc *rproc;
+
+	rproc = rproc_get_by_child(&rpdev->dev);
+	if (!rproc) {
+		dev_err(&rpdev->dev, "sysmon device not child of rproc\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&sysmon_lock);
+	list_for_each_entry(sysmon, &sysmon_list, node) {
+		if (sysmon->rproc == rproc)
+			goto found;
+	}
+	mutex_unlock(&sysmon_lock);
+
+	dev_err(&rpdev->dev, "no sysmon associated with parent rproc\n");
+
+	return -EINVAL;
+
+found:
+	mutex_unlock(&sysmon_lock);
+
+	rpdev->ept->priv = sysmon;
+	sysmon->ept = rpdev->ept;
+
+	return 0;
+}
+
+/**
+ * sysmon_remove() - sys_mon channel remove handler
+ * @rpdev:	rpmsg device handle
+ *
+ * Disassociate the rpmsg device with the sysmon instance.
+ */
+static void sysmon_remove(struct rpmsg_device *rpdev)
+{
+	struct qcom_sysmon *sysmon = rpdev->ept->priv;
+
+	sysmon->ept = NULL;
+}
+
+static const struct rpmsg_device_id sysmon_match[] = {
+	{ "sys_mon" },
+	{}
+};
+
+static struct rpmsg_driver sysmon_driver = {
+	.probe = sysmon_probe,
+	.remove = sysmon_remove,
+	.callback = sysmon_callback,
+	.id_table = sysmon_match,
+	.drv = {
+		.name = "qcom_sysmon",
+	},
+};
+
+module_rpmsg_driver(sysmon_driver);
+
+MODULE_DESCRIPTION("Qualcomm sysmon driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
index 3f0609236a76..b0e07e9f42d5 100644
--- a/drivers/remoteproc/qcom_wcnss.c
+++ b/drivers/remoteproc/qcom_wcnss.c
@@ -40,6 +40,7 @@
 #define WCNSS_CRASH_REASON_SMEM		422
 #define WCNSS_FIRMWARE_NAME		"wcnss.mdt"
 #define WCNSS_PAS_ID			6
+#define WCNSS_SSCTL_ID			0x13
 
 #define WCNSS_SPARE_NVBIN_DLND		BIT(25)
 
@@ -98,6 +99,7 @@ struct qcom_wcnss {
 	size_t mem_size;
 
 	struct qcom_rproc_subdev smd_subdev;
+	struct qcom_sysmon *sysmon;
 };
 
 static const struct wcnss_data riva_data = {
@@ -153,7 +155,8 @@ static int wcnss_load(struct rproc *rproc, const struct firmware *fw)
 	struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv;
 
 	return qcom_mdt_load(wcnss->dev, fw, rproc->firmware, WCNSS_PAS_ID,
-			     wcnss->mem_region, wcnss->mem_phys, wcnss->mem_size);
+			     wcnss->mem_region, wcnss->mem_phys,
+			     wcnss->mem_size, &wcnss->mem_reloc);
 }
 
 static void wcnss_indicate_nv_download(struct qcom_wcnss *wcnss)
@@ -308,6 +311,7 @@ static const struct rproc_ops wcnss_ops = {
 	.start = wcnss_start,
 	.stop = wcnss_stop,
 	.da_to_va = wcnss_da_to_va,
+	.parse_fw = qcom_register_dump_segments,
 	.load = wcnss_load,
 };
 
@@ -332,9 +336,6 @@ static irqreturn_t wcnss_fatal_interrupt(int irq, void *dev)
 
 	rproc_report_crash(wcnss->rproc, RPROC_FATAL_ERROR);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -551,6 +552,7 @@ static int wcnss_probe(struct platform_device *pdev)
 	}
 
 	qcom_add_smd_subdev(rproc, &wcnss->smd_subdev);
+	wcnss->sysmon = qcom_add_sysmon_subdev(rproc, "wcnss", WCNSS_SSCTL_ID);
 
 	ret = rproc_add(rproc);
 	if (ret)
@@ -573,6 +575,7 @@ static int wcnss_remove(struct platform_device *pdev)
 	qcom_smem_state_put(wcnss->state);
 	rproc_del(wcnss->rproc);
 
+	qcom_remove_sysmon_subdev(wcnss->sysmon);
 	qcom_remove_smd_subdev(wcnss->rproc, &wcnss->smd_subdev);
 	rproc_free(wcnss->rproc);
 
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 4170dfbd93bd..6d9c5832ce47 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -33,6 +33,7 @@
 #include <linux/firmware.h>
 #include <linux/string.h>
 #include <linux/debugfs.h>
+#include <linux/devcoredump.h>
 #include <linux/remoteproc.h>
 #include <linux/iommu.h>
 #include <linux/idr.h>
@@ -307,7 +308,7 @@ static int rproc_vdev_do_probe(struct rproc_subdev *subdev)
 	return rproc_add_virtio_dev(rvdev, rvdev->id);
 }
 
-static void rproc_vdev_do_remove(struct rproc_subdev *subdev)
+static void rproc_vdev_do_remove(struct rproc_subdev *subdev, bool crashed)
 {
 	struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev);
 
@@ -788,17 +789,31 @@ static int rproc_probe_subdevices(struct rproc *rproc)
 
 unroll_registration:
 	list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev);
+		subdev->remove(subdev, true);
 
 	return ret;
 }
 
-static void rproc_remove_subdevices(struct rproc *rproc)
+static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
 {
 	struct rproc_subdev *subdev;
 
 	list_for_each_entry_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev);
+		subdev->remove(subdev, crashed);
+}
+
+/**
+ * rproc_coredump_cleanup() - clean up dump_segments list
+ * @rproc: the remote processor handle
+ */
+static void rproc_coredump_cleanup(struct rproc *rproc)
+{
+	struct rproc_dump_segment *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &rproc->dump_segments, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
 }
 
 /**
@@ -848,6 +863,8 @@ static void rproc_resource_cleanup(struct rproc *rproc)
 	/* clean up remote vdev entries */
 	list_for_each_entry_safe(rvdev, rvtmp, &rproc->rvdevs, node)
 		kref_put(&rvdev->refcount, rproc_vdev_release);
+
+	rproc_coredump_cleanup(rproc);
 }
 
 static int rproc_start(struct rproc *rproc, const struct firmware *fw)
@@ -927,8 +944,8 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 
 	rproc->bootaddr = rproc_get_boot_addr(rproc, fw);
 
-	/* load resource table */
-	ret = rproc_load_rsc_table(rproc, fw);
+	/* Load resource table, core dump segment list etc from the firmware */
+	ret = rproc_parse_fw(rproc, fw);
 	if (ret)
 		goto disable_iommu;
 
@@ -992,13 +1009,13 @@ static int rproc_trigger_auto_boot(struct rproc *rproc)
 	return ret;
 }
 
-static int rproc_stop(struct rproc *rproc)
+static int rproc_stop(struct rproc *rproc, bool crashed)
 {
 	struct device *dev = &rproc->dev;
 	int ret;
 
 	/* remove any subdevices for the remote processor */
-	rproc_remove_subdevices(rproc);
+	rproc_remove_subdevices(rproc, crashed);
 
 	/* the installed resource table is no longer accessible */
 	rproc->table_ptr = rproc->cached_table;
@@ -1018,6 +1035,113 @@ static int rproc_stop(struct rproc *rproc)
 }
 
 /**
+ * rproc_coredump_add_segment() - add segment of device memory to coredump
+ * @rproc:	handle of a remote processor
+ * @da:		device address
+ * @size:	size of segment
+ *
+ * Add device memory to the list of segments to be included in a coredump for
+ * the remoteproc.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
+{
+	struct rproc_dump_segment *segment;
+
+	segment = kzalloc(sizeof(*segment), GFP_KERNEL);
+	if (!segment)
+		return -ENOMEM;
+
+	segment->da = da;
+	segment->size = size;
+
+	list_add_tail(&segment->node, &rproc->dump_segments);
+
+	return 0;
+}
+EXPORT_SYMBOL(rproc_coredump_add_segment);
+
+/**
+ * rproc_coredump() - perform coredump
+ * @rproc:	rproc handle
+ *
+ * This function will generate an ELF header for the registered segments
+ * and create a devcoredump device associated with rproc.
+ */
+static void rproc_coredump(struct rproc *rproc)
+{
+	struct rproc_dump_segment *segment;
+	struct elf32_phdr *phdr;
+	struct elf32_hdr *ehdr;
+	size_t data_size;
+	size_t offset;
+	void *data;
+	void *ptr;
+	int phnum = 0;
+
+	if (list_empty(&rproc->dump_segments))
+		return;
+
+	data_size = sizeof(*ehdr);
+	list_for_each_entry(segment, &rproc->dump_segments, node) {
+		data_size += sizeof(*phdr) + segment->size;
+
+		phnum++;
+	}
+
+	data = vmalloc(data_size);
+	if (!data)
+		return;
+
+	ehdr = data;
+
+	memset(ehdr, 0, sizeof(*ehdr));
+	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+	ehdr->e_ident[EI_CLASS] = ELFCLASS32;
+	ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+	ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+	ehdr->e_type = ET_CORE;
+	ehdr->e_machine = EM_NONE;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_entry = rproc->bootaddr;
+	ehdr->e_phoff = sizeof(*ehdr);
+	ehdr->e_ehsize = sizeof(*ehdr);
+	ehdr->e_phentsize = sizeof(*phdr);
+	ehdr->e_phnum = phnum;
+
+	phdr = data + ehdr->e_phoff;
+	offset = ehdr->e_phoff + sizeof(*phdr) * ehdr->e_phnum;
+	list_for_each_entry(segment, &rproc->dump_segments, node) {
+		memset(phdr, 0, sizeof(*phdr));
+		phdr->p_type = PT_LOAD;
+		phdr->p_offset = offset;
+		phdr->p_vaddr = segment->da;
+		phdr->p_paddr = segment->da;
+		phdr->p_filesz = segment->size;
+		phdr->p_memsz = segment->size;
+		phdr->p_flags = PF_R | PF_W | PF_X;
+		phdr->p_align = 0;
+
+		ptr = rproc_da_to_va(rproc, segment->da, segment->size);
+		if (!ptr) {
+			dev_err(&rproc->dev,
+				"invalid coredump segment (%pad, %zu)\n",
+				&segment->da, segment->size);
+			memset(data + offset, 0xff, segment->size);
+		} else {
+			memcpy(data + offset, ptr, segment->size);
+		}
+
+		offset += phdr->p_filesz;
+		phdr++;
+	}
+
+	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
+}
+
+/**
  * rproc_trigger_recovery() - recover a remoteproc
  * @rproc: the remote processor
  *
@@ -1039,10 +1163,13 @@ int rproc_trigger_recovery(struct rproc *rproc)
 	if (ret)
 		return ret;
 
-	ret = rproc_stop(rproc);
+	ret = rproc_stop(rproc, false);
 	if (ret)
 		goto unlock_mutex;
 
+	/* generate coredump */
+	rproc_coredump(rproc);
+
 	/* load firmware */
 	ret = request_firmware(&firmware_p, rproc->firmware, dev);
 	if (ret < 0) {
@@ -1189,7 +1316,7 @@ void rproc_shutdown(struct rproc *rproc)
 	if (!atomic_dec_and_test(&rproc->power))
 		goto out;
 
-	ret = rproc_stop(rproc);
+	ret = rproc_stop(rproc, true);
 	if (ret) {
 		atomic_inc(&rproc->power);
 		goto out;
@@ -1428,7 +1555,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
 	/* Default to ELF loader if no load function is specified */
 	if (!rproc->ops->load) {
 		rproc->ops->load = rproc_elf_load_segments;
-		rproc->ops->load_rsc_table = rproc_elf_load_rsc_table;
+		rproc->ops->parse_fw = rproc_elf_load_rsc_table;
 		rproc->ops->find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table;
 		rproc->ops->sanity_check = rproc_elf_sanity_check;
 		rproc->ops->get_boot_addr = rproc_elf_get_boot_addr;
@@ -1443,6 +1570,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
 	INIT_LIST_HEAD(&rproc->traces);
 	INIT_LIST_HEAD(&rproc->rvdevs);
 	INIT_LIST_HEAD(&rproc->subdevs);
+	INIT_LIST_HEAD(&rproc->dump_segments);
 
 	INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
 
@@ -1535,7 +1663,7 @@ EXPORT_SYMBOL(rproc_del);
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
 		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev))
+		      void (*remove)(struct rproc_subdev *subdev, bool crashed))
 {
 	subdev->probe = probe;
 	subdev->remove = remove;
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index 55a2950c5cb7..7570beb035b5 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -88,11 +88,10 @@ int rproc_load_segments(struct rproc *rproc, const struct firmware *fw)
 	return -EINVAL;
 }
 
-static inline int rproc_load_rsc_table(struct rproc *rproc,
-				       const struct firmware *fw)
+static inline int rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
 {
-	if (rproc->ops->load_rsc_table)
-		return rproc->ops->load_rsc_table(rproc, fw);
+	if (rproc->ops->parse_fw)
+		return rproc->ops->parse_fw(rproc, fw);
 
 	return 0;
 }
diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
index e0f31ed096a5..768ef542a841 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -113,7 +113,7 @@ struct qcom_glink {
 	spinlock_t rx_lock;
 	struct list_head rx_queue;
 
-	struct mutex tx_lock;
+	spinlock_t tx_lock;
 
 	spinlock_t idr_lock;
 	struct idr lcids;
@@ -288,15 +288,14 @@ static int qcom_glink_tx(struct qcom_glink *glink,
 			 const void *data, size_t dlen, bool wait)
 {
 	unsigned int tlen = hlen + dlen;
-	int ret;
+	unsigned long flags;
+	int ret = 0;
 
 	/* Reject packets that are too big */
 	if (tlen >= glink->tx_pipe->length)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&glink->tx_lock);
-	if (ret)
-		return ret;
+	spin_lock_irqsave(&glink->tx_lock, flags);
 
 	while (qcom_glink_tx_avail(glink) < tlen) {
 		if (!wait) {
@@ -304,7 +303,12 @@ static int qcom_glink_tx(struct qcom_glink *glink,
 			goto out;
 		}
 
+		/* Wait without holding the tx_lock */
+		spin_unlock_irqrestore(&glink->tx_lock, flags);
+
 		usleep_range(10000, 15000);
+
+		spin_lock_irqsave(&glink->tx_lock, flags);
 	}
 
 	qcom_glink_tx_write(glink, hdr, hlen, data, dlen);
@@ -313,7 +317,7 @@ static int qcom_glink_tx(struct qcom_glink *glink,
 	mbox_client_txdone(glink->mbox_chan, 0);
 
 out:
-	mutex_unlock(&glink->tx_lock);
+	spin_unlock_irqrestore(&glink->tx_lock, flags);
 
 	return ret;
 }
@@ -1567,7 +1571,7 @@ struct qcom_glink *qcom_glink_native_probe(struct device *dev,
 	glink->features = features;
 	glink->intentless = intentless;
 
-	mutex_init(&glink->tx_lock);
+	spin_lock_init(&glink->tx_lock);
 	spin_lock_init(&glink->rx_lock);
 	INIT_LIST_HEAD(&glink->rx_queue);
 	INIT_WORK(&glink->rx_work, qcom_glink_work);
diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c
index 892f2b92a4d8..3fa9d43e2c87 100644
--- a/drivers/rpmsg/qcom_glink_smem.c
+++ b/drivers/rpmsg/qcom_glink_smem.c
@@ -217,6 +217,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent,
 	ret = device_register(dev);
 	if (ret) {
 		pr_err("failed to register glink edge\n");
+		put_device(dev);
 		return ERR_PTR(ret);
 	}
 
@@ -299,7 +300,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent,
 	return glink;
 
 err_put_dev:
-	put_device(dev);
+	device_unregister(dev);
 
 	return ERR_PTR(ret);
 }
diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
index 92d0c6a7a837..5ce9bf7b897d 100644
--- a/drivers/rpmsg/qcom_smd.c
+++ b/drivers/rpmsg/qcom_smd.c
@@ -167,9 +167,9 @@ struct qcom_smd_endpoint {
 	struct qcom_smd_channel *qsch;
 };
 
-#define to_smd_device(_rpdev)	container_of(_rpdev, struct qcom_smd_device, rpdev)
+#define to_smd_device(r)	container_of(r, struct qcom_smd_device, rpdev)
 #define to_smd_edge(d)		container_of(d, struct qcom_smd_edge, dev)
-#define to_smd_endpoint(ept)	container_of(ept, struct qcom_smd_endpoint, ept)
+#define to_smd_endpoint(e)	container_of(e, struct qcom_smd_endpoint, ept)
 
 /**
  * struct qcom_smd_channel - smd channel struct
@@ -205,7 +205,7 @@ struct qcom_smd_channel {
 	struct smd_channel_info_pair *info;
 	struct smd_channel_info_word_pair *info_word;
 
-	struct mutex tx_lock;
+	spinlock_t tx_lock;
 	wait_queue_head_t fblockread_event;
 
 	void *tx_fifo;
@@ -729,6 +729,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 {
 	__le32 hdr[5] = { cpu_to_le32(len), };
 	int tlen = sizeof(hdr) + len;
+	unsigned long flags;
 	int ret;
 
 	/* Word aligned channels only accept word size aligned data */
@@ -739,9 +740,11 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 	if (tlen >= channel->fifo_size)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&channel->tx_lock);
-	if (ret)
-		return ret;
+	/* Highlight the fact that if we enter the loop below we might sleep */
+	if (wait)
+		might_sleep();
+
+	spin_lock_irqsave(&channel->tx_lock, flags);
 
 	while (qcom_smd_get_tx_avail(channel) < tlen &&
 	       channel->state == SMD_CHANNEL_OPENED) {
@@ -753,7 +756,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 		SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 0);
 
 		/* Wait without holding the tx_lock */
-		mutex_unlock(&channel->tx_lock);
+		spin_unlock_irqrestore(&channel->tx_lock, flags);
 
 		ret = wait_event_interruptible(channel->fblockread_event,
 				       qcom_smd_get_tx_avail(channel) >= tlen ||
@@ -761,9 +764,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 		if (ret)
 			return ret;
 
-		ret = mutex_lock_interruptible(&channel->tx_lock);
-		if (ret)
-			return ret;
+		spin_lock_irqsave(&channel->tx_lock, flags);
 
 		SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 1);
 	}
@@ -787,7 +788,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 	qcom_smd_signal_channel(channel);
 
 out_unlock:
-	mutex_unlock(&channel->tx_lock);
+	spin_unlock_irqrestore(&channel->tx_lock, flags);
 
 	return ret;
 }
@@ -996,8 +997,26 @@ static struct device_node *qcom_smd_match_channel(struct device_node *edge_node,
 	return NULL;
 }
 
+static int qcom_smd_announce_create(struct rpmsg_device *rpdev)
+{
+	struct qcom_smd_endpoint *qept = to_smd_endpoint(rpdev->ept);
+	struct qcom_smd_channel *channel = qept->qsch;
+	unsigned long flags;
+	bool kick_state;
+
+	spin_lock_irqsave(&channel->recv_lock, flags);
+	kick_state = qcom_smd_channel_intr(channel);
+	spin_unlock_irqrestore(&channel->recv_lock, flags);
+
+	if (kick_state)
+		schedule_work(&channel->edge->state_work);
+
+	return 0;
+}
+
 static const struct rpmsg_device_ops qcom_smd_device_ops = {
 	.create_ept = qcom_smd_create_ept,
+	.announce_create = qcom_smd_announce_create,
 };
 
 static const struct rpmsg_endpoint_ops qcom_smd_endpoint_ops = {
@@ -1090,7 +1109,7 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 	if (!channel->name)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_init(&channel->tx_lock);
+	spin_lock_init(&channel->tx_lock);
 	spin_lock_init(&channel->recv_lock);
 	init_waitqueue_head(&channel->fblockread_event);
 	init_waitqueue_head(&channel->state_change_event);
@@ -1234,6 +1253,11 @@ static void qcom_channel_state_worker(struct work_struct *work)
 		if (channel->state != SMD_CHANNEL_CLOSED)
 			continue;
 
+		remote_state = GET_RX_CHANNEL_INFO(channel, state);
+		if (remote_state != SMD_CHANNEL_OPENING &&
+		    remote_state != SMD_CHANNEL_OPENED)
+			continue;
+
 		if (channel->registered)
 			continue;
 
@@ -1408,6 +1432,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
 	ret = device_register(&edge->dev);
 	if (ret) {
 		pr_err("failed to register smd edge\n");
+		put_device(&edge->dev);
 		return ERR_PTR(ret);
 	}
 
@@ -1428,7 +1453,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
 	return edge;
 
 unregister_dev:
-	put_device(&edge->dev);
+	device_unregister(&edge->dev);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL(qcom_smd_register_edge);
diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index 5a081762afcc..920a02f0462c 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -442,7 +442,7 @@ static int rpmsg_dev_probe(struct device *dev)
 		goto out;
 	}
 
-	if (rpdev->ops->announce_create)
+	if (ept && rpdev->ops->announce_create)
 		err = rpdev->ops->announce_create(rpdev);
 out:
 	return err;
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 319e3c8976d5..59e6dede3db3 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -407,6 +407,16 @@ config RTC_DRV_ISL12022
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-isl12022.
 
+config RTC_DRV_ISL12026
+	tristate "Intersil ISL12026"
+	depends on OF || COMPILE_TEST
+	help
+	  If you say yes here you get support for the
+	  Intersil ISL12026 RTC chip.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-isl12026.
+
 config RTC_DRV_X1205
 	tristate "Xicor/Intersil X1205"
 	help
@@ -1413,6 +1423,7 @@ config RTC_DRV_AT91RM9200
 config RTC_DRV_AT91SAM9
 	tristate "AT91SAM9 RTT as RTC"
 	depends on ARCH_AT91 || COMPILE_TEST
+	depends on HAS_IOMEM
 	select MFD_SYSCON
 	help
 	  Some AT91SAM9 SoCs provide an RTT (Real Time Timer) block which
@@ -1502,7 +1513,7 @@ config RTC_DRV_STARFIRE
 
 config RTC_DRV_TX4939
 	tristate "TX4939 SoC"
-	depends on SOC_TX4939
+	depends on SOC_TX4939 || COMPILE_TEST
 	help
 	  Driver for the internal RTC (Realtime Clock) module found on
 	  Toshiba TX4939 SoC.
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index ee0206becd9f..5ff2fc0c361a 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -75,6 +75,7 @@ obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
 obj-$(CONFIG_RTC_DRV_HYM8563)	+= rtc-hym8563.o
 obj-$(CONFIG_RTC_DRV_IMXDI)	+= rtc-imxdi.o
 obj-$(CONFIG_RTC_DRV_ISL12022)	+= rtc-isl12022.o
+obj-$(CONFIG_RTC_DRV_ISL12026)	+= rtc-isl12026.o
 obj-$(CONFIG_RTC_DRV_ISL1208)	+= rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_JZ4740)	+= rtc-jz4740.o
 obj-$(CONFIG_RTC_DRV_LP8788)	+= rtc-lp8788.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 722d683e0b0f..d37588f08055 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -211,6 +211,73 @@ static int rtc_device_get_id(struct device *dev)
 	return id;
 }
 
+static void rtc_device_get_offset(struct rtc_device *rtc)
+{
+	time64_t range_secs;
+	u32 start_year;
+	int ret;
+
+	/*
+	 * If RTC driver did not implement the range of RTC hardware device,
+	 * then we can not expand the RTC range by adding or subtracting one
+	 * offset.
+	 */
+	if (rtc->range_min == rtc->range_max)
+		return;
+
+	ret = device_property_read_u32(rtc->dev.parent, "start-year",
+				       &start_year);
+	if (!ret) {
+		rtc->start_secs = mktime64(start_year, 1, 1, 0, 0, 0);
+		rtc->set_start_time = true;
+	}
+
+	/*
+	 * If user did not implement the start time for RTC driver, then no
+	 * need to expand the RTC range.
+	 */
+	if (!rtc->set_start_time)
+		return;
+
+	range_secs = rtc->range_max - rtc->range_min + 1;
+
+	/*
+	 * If the start_secs is larger than the maximum seconds (rtc->range_max)
+	 * supported by RTC hardware or the maximum seconds of new expanded
+	 * range (start_secs + rtc->range_max - rtc->range_min) is less than
+	 * rtc->range_min, which means the minimum seconds (rtc->range_min) of
+	 * RTC hardware will be mapped to start_secs by adding one offset, so
+	 * the offset seconds calculation formula should be:
+	 * rtc->offset_secs = rtc->start_secs - rtc->range_min;
+	 *
+	 * If the start_secs is larger than the minimum seconds (rtc->range_min)
+	 * supported by RTC hardware, then there is one region is overlapped
+	 * between the original RTC hardware range and the new expanded range,
+	 * and this overlapped region do not need to be mapped into the new
+	 * expanded range due to it is valid for RTC device. So the minimum
+	 * seconds of RTC hardware (rtc->range_min) should be mapped to
+	 * rtc->range_max + 1, then the offset seconds formula should be:
+	 * rtc->offset_secs = rtc->range_max - rtc->range_min + 1;
+	 *
+	 * If the start_secs is less than the minimum seconds (rtc->range_min),
+	 * which is similar to case 2. So the start_secs should be mapped to
+	 * start_secs + rtc->range_max - rtc->range_min + 1, then the
+	 * offset seconds formula should be:
+	 * rtc->offset_secs = -(rtc->range_max - rtc->range_min + 1);
+	 *
+	 * Otherwise the offset seconds should be 0.
+	 */
+	if (rtc->start_secs > rtc->range_max ||
+	    rtc->start_secs + range_secs - 1 < rtc->range_min)
+		rtc->offset_secs = rtc->start_secs - rtc->range_min;
+	else if (rtc->start_secs > rtc->range_min)
+		rtc->offset_secs = range_secs;
+	else if (rtc->start_secs < rtc->range_min)
+		rtc->offset_secs = -range_secs;
+	else
+		rtc->offset_secs = 0;
+}
+
 /**
  * rtc_device_register - register w/ RTC class
  * @dev: the device to register
@@ -247,6 +314,8 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 
 	dev_set_name(&rtc->dev, "rtc%d", id);
 
+	rtc_device_get_offset(rtc);
+
 	/* Check to see if there is an ALARM already set in hw */
 	err = __rtc_read_alarm(rtc, &alrm);
 
@@ -293,8 +362,6 @@ EXPORT_SYMBOL_GPL(rtc_device_register);
  */
 void rtc_device_unregister(struct rtc_device *rtc)
 {
-	rtc_nvmem_unregister(rtc);
-
 	mutex_lock(&rtc->ops_lock);
 	/*
 	 * Remove innards of this RTC, then disable it, before
@@ -312,6 +379,7 @@ static void devm_rtc_device_release(struct device *dev, void *res)
 {
 	struct rtc_device *rtc = *(struct rtc_device **)res;
 
+	rtc_nvmem_unregister(rtc);
 	rtc_device_unregister(rtc);
 }
 
@@ -382,6 +450,8 @@ static void devm_rtc_release_device(struct device *dev, void *res)
 {
 	struct rtc_device *rtc = *(struct rtc_device **)res;
 
+	rtc_nvmem_unregister(rtc);
+
 	if (rtc->registered)
 		rtc_device_unregister(rtc);
 	else
@@ -435,6 +505,7 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 		return -EINVAL;
 
 	rtc->owner = owner;
+	rtc_device_get_offset(rtc);
 
 	/* Check to see if there is an ALARM already set in hw */
 	err = __rtc_read_alarm(rtc, &alrm);
@@ -453,8 +524,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 
 	rtc_proc_add_device(rtc);
 
-	rtc_nvmem_register(rtc);
-
 	rtc->registered = true;
 	dev_info(rtc->dev.parent, "registered as %s\n",
 		 dev_name(&rtc->dev));
diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c
index e1cfa06810ef..e79f2a181ad2 100644
--- a/drivers/rtc/hctosys.c
+++ b/drivers/rtc/hctosys.c
@@ -49,6 +49,11 @@ static int __init rtc_hctosys(void)
 
 	tv64.tv_sec = rtc_tm_to_time64(&tm);
 
+#if BITS_PER_LONG == 32
+	if (tv64.tv_sec > INT_MAX)
+		goto err_read;
+#endif
+
 	err = do_settimeofday64(&tv64);
 
 	dev_info(rtc->dev.parent,
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 672b192f8153..7cbdc9228dd5 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -17,9 +17,73 @@
 #include <linux/log2.h>
 #include <linux/workqueue.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/rtc.h>
+
 static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
 static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
 
+static void rtc_add_offset(struct rtc_device *rtc, struct rtc_time *tm)
+{
+	time64_t secs;
+
+	if (!rtc->offset_secs)
+		return;
+
+	secs = rtc_tm_to_time64(tm);
+
+	/*
+	 * Since the reading time values from RTC device are always in the RTC
+	 * original valid range, but we need to skip the overlapped region
+	 * between expanded range and original range, which is no need to add
+	 * the offset.
+	 */
+	if ((rtc->start_secs > rtc->range_min && secs >= rtc->start_secs) ||
+	    (rtc->start_secs < rtc->range_min &&
+	     secs <= (rtc->start_secs + rtc->range_max - rtc->range_min)))
+		return;
+
+	rtc_time64_to_tm(secs + rtc->offset_secs, tm);
+}
+
+static void rtc_subtract_offset(struct rtc_device *rtc, struct rtc_time *tm)
+{
+	time64_t secs;
+
+	if (!rtc->offset_secs)
+		return;
+
+	secs = rtc_tm_to_time64(tm);
+
+	/*
+	 * If the setting time values are in the valid range of RTC hardware
+	 * device, then no need to subtract the offset when setting time to RTC
+	 * device. Otherwise we need to subtract the offset to make the time
+	 * values are valid for RTC hardware device.
+	 */
+	if (secs >= rtc->range_min && secs <= rtc->range_max)
+		return;
+
+	rtc_time64_to_tm(secs - rtc->offset_secs, tm);
+}
+
+static int rtc_valid_range(struct rtc_device *rtc, struct rtc_time *tm)
+{
+	if (rtc->range_min != rtc->range_max) {
+		time64_t time = rtc_tm_to_time64(tm);
+		time64_t range_min = rtc->set_start_time ? rtc->start_secs :
+			rtc->range_min;
+		time64_t range_max = rtc->set_start_time ?
+			(rtc->start_secs + rtc->range_max - rtc->range_min) :
+			rtc->range_max;
+
+		if (time < range_min || time > range_max)
+			return -ERANGE;
+	}
+
+	return 0;
+}
+
 static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 {
 	int err;
@@ -36,6 +100,8 @@ static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 			return err;
 		}
 
+		rtc_add_offset(rtc, tm);
+
 		err = rtc_valid_tm(tm);
 		if (err < 0)
 			dev_dbg(&rtc->dev, "read_time: rtc_time isn't valid\n");
@@ -53,6 +119,8 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 
 	err = __rtc_read_time(rtc, tm);
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_read_time(rtc_tm_to_time64(tm), err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_time);
@@ -65,6 +133,12 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
 	if (err != 0)
 		return err;
 
+	err = rtc_valid_range(rtc, tm);
+	if (err)
+		return err;
+
+	rtc_subtract_offset(rtc, tm);
+
 	err = mutex_lock_interruptible(&rtc->ops_lock);
 	if (err)
 		return err;
@@ -87,6 +161,8 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
 	mutex_unlock(&rtc->ops_lock);
 	/* A timer might have just expired */
 	schedule_work(&rtc->irqwork);
+
+	trace_rtc_set_time(rtc_tm_to_time64(tm), err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_time);
@@ -119,6 +195,8 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al
 	}
 
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err);
 	return err;
 }
 
@@ -316,6 +394,7 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	}
 	mutex_unlock(&rtc->ops_lock);
 
+	trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_alarm);
@@ -329,6 +408,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	err = rtc_valid_tm(&alarm->time);
 	if (err)
 		return err;
+
+	rtc_subtract_offset(rtc, &alarm->time);
 	scheduled = rtc_tm_to_time64(&alarm->time);
 
 	/* Make sure we're not setting alarms in the past */
@@ -352,6 +433,7 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	else
 		err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
 
+	trace_rtc_set_alarm(rtc_tm_to_time64(&alarm->time), err);
 	return err;
 }
 
@@ -363,6 +445,10 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	if (err != 0)
 		return err;
 
+	err = rtc_valid_range(rtc, &alarm->time);
+	if (err)
+		return err;
+
 	err = mutex_lock_interruptible(&rtc->ops_lock);
 	if (err)
 		return err;
@@ -375,6 +461,8 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 		err = rtc_timer_enqueue(rtc, &rtc->aie_timer);
 
 	mutex_unlock(&rtc->ops_lock);
+
+	rtc_add_offset(rtc, &alarm->time);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
@@ -406,6 +494,7 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 
 		rtc->aie_timer.enabled = 1;
 		timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
+		trace_rtc_timer_enqueue(&rtc->aie_timer);
 	}
 	mutex_unlock(&rtc->ops_lock);
 	return err;
@@ -435,6 +524,8 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 		err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled);
 
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_alarm_irq_enable(enabled, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable);
@@ -709,6 +800,8 @@ retry:
 		rtc->pie_enabled = enabled;
 	}
 	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	trace_rtc_irq_set_state(enabled, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_state);
@@ -745,6 +838,8 @@ retry:
 		}
 	}
 	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	trace_rtc_irq_set_freq(freq, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
@@ -779,6 +874,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
 	}
 
 	timerqueue_add(&rtc->timerqueue, &timer->node);
+	trace_rtc_timer_enqueue(timer);
 	if (!next || ktime_before(timer->node.expires, next->expires)) {
 		struct rtc_wkalrm alarm;
 		int err;
@@ -790,6 +886,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
 			schedule_work(&rtc->irqwork);
 		} else if (err) {
 			timerqueue_del(&rtc->timerqueue, &timer->node);
+			trace_rtc_timer_dequeue(timer);
 			timer->enabled = 0;
 			return err;
 		}
@@ -803,6 +900,7 @@ static void rtc_alarm_disable(struct rtc_device *rtc)
 		return;
 
 	rtc->ops->alarm_irq_enable(rtc->dev.parent, false);
+	trace_rtc_alarm_irq_enable(0, 0);
 }
 
 /**
@@ -821,6 +919,7 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
 {
 	struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
 	timerqueue_del(&rtc->timerqueue, &timer->node);
+	trace_rtc_timer_dequeue(timer);
 	timer->enabled = 0;
 	if (next == &timer->node) {
 		struct rtc_wkalrm alarm;
@@ -871,16 +970,19 @@ again:
 		/* expire timer */
 		timer = container_of(next, struct rtc_timer, node);
 		timerqueue_del(&rtc->timerqueue, &timer->node);
+		trace_rtc_timer_dequeue(timer);
 		timer->enabled = 0;
 		if (timer->task.func)
 			timer->task.func(timer->task.private_data);
 
+		trace_rtc_timer_fired(timer);
 		/* Re-add/fwd periodic timers */
 		if (ktime_to_ns(timer->period)) {
 			timer->node.expires = ktime_add(timer->node.expires,
 							timer->period);
 			timer->enabled = 1;
 			timerqueue_add(&rtc->timerqueue, &timer->node);
+			trace_rtc_timer_enqueue(timer);
 		}
 	}
 
@@ -902,6 +1004,7 @@ reprogram:
 
 			timer = container_of(next, struct rtc_timer, node);
 			timerqueue_del(&rtc->timerqueue, &timer->node);
+			trace_rtc_timer_dequeue(timer);
 			timer->enabled = 0;
 			dev_err(&rtc->dev, "__rtc_set_alarm: err=%d\n", err);
 			goto again;
@@ -992,6 +1095,8 @@ int rtc_read_offset(struct rtc_device *rtc, long *offset)
 	mutex_lock(&rtc->ops_lock);
 	ret = rtc->ops->read_offset(rtc->dev.parent, offset);
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_read_offset(*offset, ret);
 	return ret;
 }
 
@@ -1025,5 +1130,7 @@ int rtc_set_offset(struct rtc_device *rtc, long offset)
 	mutex_lock(&rtc->ops_lock);
 	ret = rtc->ops->set_offset(rtc->dev.parent, offset);
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_set_offset(offset, ret);
 	return ret;
 }
diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c
index 8567b4ed9ac6..17ec4c8d0fad 100644
--- a/drivers/rtc/nvmem.c
+++ b/drivers/rtc/nvmem.c
@@ -14,8 +14,6 @@
 #include <linux/rtc.h>
 #include <linux/sysfs.h>
 
-#include "rtc-core.h"
-
 /*
  * Deprecated ABI compatibility, this should be removed at some point
  */
@@ -46,7 +44,7 @@ rtc_nvram_write(struct file *filp, struct kobject *kobj,
 	return nvmem_device_write(rtc->nvmem, off, count, buf);
 }
 
-static int rtc_nvram_register(struct rtc_device *rtc)
+static int rtc_nvram_register(struct rtc_device *rtc, size_t size)
 {
 	int err;
 
@@ -64,7 +62,7 @@ static int rtc_nvram_register(struct rtc_device *rtc)
 
 	rtc->nvram->read = rtc_nvram_read;
 	rtc->nvram->write = rtc_nvram_write;
-	rtc->nvram->size = rtc->nvmem_config->size;
+	rtc->nvram->size = size;
 
 	err = sysfs_create_bin_file(&rtc->dev.parent->kobj,
 				    rtc->nvram);
@@ -84,21 +82,28 @@ static void rtc_nvram_unregister(struct rtc_device *rtc)
 /*
  * New ABI, uses nvmem
  */
-void rtc_nvmem_register(struct rtc_device *rtc)
+int rtc_nvmem_register(struct rtc_device *rtc,
+		       struct nvmem_config *nvmem_config)
 {
-	if (!rtc->nvmem_config)
-		return;
+	if (!IS_ERR_OR_NULL(rtc->nvmem))
+		return -EBUSY;
+
+	if (!nvmem_config)
+		return -ENODEV;
 
-	rtc->nvmem_config->dev = &rtc->dev;
-	rtc->nvmem_config->owner = rtc->owner;
-	rtc->nvmem = nvmem_register(rtc->nvmem_config);
+	nvmem_config->dev = rtc->dev.parent;
+	nvmem_config->owner = rtc->owner;
+	rtc->nvmem = nvmem_register(nvmem_config);
 	if (IS_ERR_OR_NULL(rtc->nvmem))
-		return;
+		return PTR_ERR(rtc->nvmem);
 
 	/* Register the old ABI */
 	if (rtc->nvram_old_abi)
-		rtc_nvram_register(rtc);
+		rtc_nvram_register(rtc, nvmem_config->size);
+
+	return 0;
 }
+EXPORT_SYMBOL_GPL(rtc_nvmem_register);
 
 void rtc_nvmem_unregister(struct rtc_device *rtc)
 {
diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c
index 466bf7f9a285..6cbafefa80a2 100644
--- a/drivers/rtc/rtc-88pm80x.c
+++ b/drivers/rtc/rtc-88pm80x.c
@@ -134,9 +134,9 @@ static int pm80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	struct pm80x_rtc_info *info = dev_get_drvdata(dev);
 	unsigned char buf[4];
 	unsigned long ticks, base, data;
-	if ((tm->tm_year < 70) || (tm->tm_year > 138)) {
+	if (tm->tm_year > 206) {
 		dev_dbg(info->dev,
-			"Set time %d out of range. Please set time between 1970 to 2038.\n",
+			"Set time %d out of range. Please set time between 1970 to 2106.\n",
 			1900 + tm->tm_year);
 		return -EINVAL;
 	}
diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c
index 19e53b3b8e00..01ffc0ef8033 100644
--- a/drivers/rtc/rtc-88pm860x.c
+++ b/drivers/rtc/rtc-88pm860x.c
@@ -135,9 +135,9 @@ static int pm860x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	unsigned char buf[4];
 	unsigned long ticks, base, data;
 
-	if ((tm->tm_year < 70) || (tm->tm_year > 138)) {
+	if (tm->tm_year > 206) {
 		dev_dbg(info->dev, "Set time %d out of range. "
-			"Please set time between 1970 to 2038.\n",
+			"Please set time between 1970 to 2106.\n",
 			1900 + tm->tm_year);
 		return -EINVAL;
 	}
diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c
index ef5c16dfabfa..8dc451932446 100644
--- a/drivers/rtc/rtc-ab-b5ze-s3.c
+++ b/drivers/rtc/rtc-ab-b5ze-s3.c
@@ -217,7 +217,7 @@ static int _abb5zes3_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct abb5zes3_rtc_data *data = dev_get_drvdata(dev);
 	u8 regs[ABB5ZES3_REG_RTC_SC + ABB5ZES3_RTC_SEC_LEN];
-	int ret;
+	int ret = 0;
 
 	/*
 	 * As we need to read CTRL1 register anyway to access 24/12h
@@ -255,8 +255,6 @@ static int _abb5zes3_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_mon  = bcd2bin(regs[ABB5ZES3_REG_RTC_MO]) - 1; /* starts at 1 */
 	tm->tm_year = bcd2bin(regs[ABB5ZES3_REG_RTC_YR]) + 100;
 
-	ret = rtc_valid_tm(tm);
-
 err:
 	return ret;
 }
diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c
index 9b725c553058..821ff52a2222 100644
--- a/drivers/rtc/rtc-ab3100.c
+++ b/drivers/rtc/rtc-ab3100.c
@@ -106,7 +106,7 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time64_to_tm(time, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index 24a0af650a1b..e28f4401fd35 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -36,10 +36,6 @@
 #define AB8500_RTC_FORCE_BKUP_REG	0x0D
 #define AB8500_RTC_CALIB_REG		0x0E
 #define AB8500_RTC_SWITCH_STAT_REG	0x0F
-#define AB8540_RTC_ALRM_SEC		0x22
-#define AB8540_RTC_ALRM_MIN_LOW_REG	0x23
-#define AB8540_RTC_ALRM_MIN_MID_REG	0x24
-#define AB8540_RTC_ALRM_MIN_HI_REG	0x25
 
 /* RtcReadRequest bits */
 #define RTC_READ_REQUEST		0x01
@@ -63,11 +59,6 @@ static const u8 ab8500_rtc_alarm_regs[] = {
 	AB8500_RTC_ALRM_MIN_LOW_REG
 };
 
-static const u8 ab8540_rtc_alarm_regs[] = {
-	AB8540_RTC_ALRM_MIN_HI_REG, AB8540_RTC_ALRM_MIN_MID_REG,
-	AB8540_RTC_ALRM_MIN_LOW_REG, AB8540_RTC_ALRM_SEC
-};
-
 /* Calculate the seconds from 1970 to 01-01-2000 00:00:00 */
 static unsigned long get_elapsed_seconds(int year)
 {
@@ -131,7 +122,7 @@ static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	secs += get_elapsed_seconds(AB8500_RTC_EPOCH);
 
 	rtc_time_to_tm(secs, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -277,43 +268,6 @@ static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 	return ab8500_rtc_irq_enable(dev, alarm->enabled);
 }
 
-static int ab8540_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
-{
-	int retval, i;
-	unsigned char buf[ARRAY_SIZE(ab8540_rtc_alarm_regs)];
-	unsigned long mins, secs = 0;
-
-	if (alarm->time.tm_year < (AB8500_RTC_EPOCH - 1900)) {
-		dev_dbg(dev, "year should be equal to or greater than %d\n",
-				AB8500_RTC_EPOCH);
-		return -EINVAL;
-	}
-
-	/* Get the number of seconds since 1970 */
-	rtc_tm_to_time(&alarm->time, &secs);
-
-	/*
-	 * Convert it to the number of seconds since 01-01-2000 00:00:00
-	 */
-	secs -= get_elapsed_seconds(AB8500_RTC_EPOCH);
-	mins = secs / 60;
-
-	buf[3] = secs % 60;
-	buf[2] = mins & 0xFF;
-	buf[1] = (mins >> 8) & 0xFF;
-	buf[0] = (mins >> 16) & 0xFF;
-
-	/* Set the alarm time */
-	for (i = 0; i < ARRAY_SIZE(ab8540_rtc_alarm_regs); i++) {
-		retval = abx500_set_register_interruptible(dev, AB8500_RTC,
-			ab8540_rtc_alarm_regs[i], buf[i]);
-		if (retval < 0)
-			return retval;
-	}
-
-	return ab8500_rtc_irq_enable(dev, alarm->enabled);
-}
-
 static int ab8500_rtc_set_calibration(struct device *dev, int calibration)
 {
 	int retval;
@@ -435,17 +389,8 @@ static const struct rtc_class_ops ab8500_rtc_ops = {
 	.alarm_irq_enable	= ab8500_rtc_irq_enable,
 };
 
-static const struct rtc_class_ops ab8540_rtc_ops = {
-	.read_time		= ab8500_rtc_read_time,
-	.set_time		= ab8500_rtc_set_time,
-	.read_alarm		= ab8500_rtc_read_alarm,
-	.set_alarm		= ab8540_rtc_set_alarm,
-	.alarm_irq_enable	= ab8500_rtc_irq_enable,
-};
-
 static const struct platform_device_id ab85xx_rtc_ids[] = {
 	{ "ab8500-rtc", (kernel_ulong_t)&ab8500_rtc_ops, },
-	{ "ab8540-rtc", (kernel_ulong_t)&ab8540_rtc_ops, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(platform, ab85xx_rtc_ids);
diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
index b033bc556f5d..2cefa67a1132 100644
--- a/drivers/rtc/rtc-abx80x.c
+++ b/drivers/rtc/rtc-abx80x.c
@@ -172,11 +172,7 @@ static int abx80x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_mon = bcd2bin(buf[ABX8XX_REG_MO] & 0x1F) - 1;
 	tm->tm_year = bcd2bin(buf[ABX8XX_REG_YR]) + 100;
 
-	err = rtc_valid_tm(tm);
-	if (err < 0)
-		dev_err(&client->dev, "retrieved date/time is not valid.\n");
-
-	return err;
+	return 0;
 }
 
 static int abx80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ac100.c b/drivers/rtc/rtc-ac100.c
index 8ff9dc3fe5bf..3fe576fdd45e 100644
--- a/drivers/rtc/rtc-ac100.c
+++ b/drivers/rtc/rtc-ac100.c
@@ -183,7 +183,29 @@ static int ac100_clkout_determine_rate(struct clk_hw *hw,
 
 	for (i = 0; i < num_parents; i++) {
 		struct clk_hw *parent = clk_hw_get_parent_by_index(hw, i);
-		unsigned long tmp, prate = clk_hw_get_rate(parent);
+		unsigned long tmp, prate;
+
+		/*
+		 * The clock has two parents, one is a fixed clock which is
+		 * internally registered by the ac100 driver. The other parent
+		 * is a clock from the codec side of the chip, which we
+		 * properly declare and reference in the devicetree and is
+		 * not implemented in any driver right now.
+		 * If the clock core looks for the parent of that second
+		 * missing clock, it can't find one that is registered and
+		 * returns NULL.
+		 * So we end up in a situation where clk_hw_get_num_parents
+		 * returns the amount of clocks we can be parented to, but
+		 * clk_hw_get_parent_by_index will not return the orphan
+		 * clocks.
+		 * Thus we need to check if the parent exists before
+		 * we get the parent rate, so we could use the RTC
+		 * without waiting for the codec to be supported.
+		 */
+		if (!parent)
+			continue;
+
+		prate = clk_hw_get_rate(parent);
 
 		tmp = ac100_clkout_round_rate(hw, req->rate, prate);
 
@@ -387,7 +409,7 @@ static int ac100_rtc_get_time(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_year = bcd2bin(reg[6] & AC100_RTC_YEA_MASK) +
 			  AC100_YEAR_OFF;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int ac100_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 7418a763ce52..ee71e647fd43 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -349,6 +349,7 @@ static const struct rtc_class_ops at91_rtc_ops = {
 };
 
 static const struct regmap_config gpbr_regmap_config = {
+	.name = "gpbr",
 	.reg_bits = 32,
 	.val_bits = 32,
 	.reg_stride = 4,
diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c
index 2ba44ccb9c3a..7c5530c71285 100644
--- a/drivers/rtc/rtc-au1xxx.c
+++ b/drivers/rtc/rtc-au1xxx.c
@@ -36,7 +36,7 @@ static int au1xtoy_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time_to_tm(t, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int au1xtoy_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 98ac8d5c7901..ef52741000a8 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -36,6 +36,10 @@
 #define BQ32K_CFG2		0x09	/* Trickle charger control */
 #define BQ32K_TCFE		BIT(6)	/* Trickle charge FET bypass */
 
+#define MAX_LEN			10	/* Maximum number of consecutive
+					 * register for this particular RTC.
+					 */
+
 struct bq32k_regs {
 	uint8_t		seconds;
 	uint8_t		minutes;
@@ -74,7 +78,7 @@ static int bq32k_read(struct device *dev, void *data, uint8_t off, uint8_t len)
 static int bq32k_write(struct device *dev, void *data, uint8_t off, uint8_t len)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	uint8_t buffer[len + 1];
+	uint8_t buffer[MAX_LEN + 1];
 
 	buffer[0] = off;
 	memcpy(&buffer[1], data, len);
@@ -110,7 +114,7 @@ static int bq32k_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_year = bcd2bin(regs.years) +
 				((regs.cent_hours & BQ32K_CENT) ? 100 : 0);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int bq32k_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-brcmstb-waketimer.c b/drivers/rtc/rtc-brcmstb-waketimer.c
index 6cee61201c30..bdd6674a1054 100644
--- a/drivers/rtc/rtc-brcmstb-waketimer.c
+++ b/drivers/rtc/rtc-brcmstb-waketimer.c
@@ -60,6 +60,9 @@ static void brcmstb_waketmr_set_alarm(struct brcmstb_waketmr *timer,
 {
 	brcmstb_waketmr_clear_alarm(timer);
 
+	/* Make sure we are actually counting in seconds */
+	writel_relaxed(timer->rate, timer->base + BRCMSTB_WKTMR_PRESCALER);
+
 	writel_relaxed(secs + 1, timer->base + BRCMSTB_WKTMR_ALARM);
 }
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index f7c0f72abb56..1b3738a11702 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -541,11 +541,10 @@ static const struct rtc_class_ops cmos_rtc_ops = {
 
 #define NVRAM_OFFSET	(RTC_REG_D + 1)
 
-static ssize_t
-cmos_nvram_read(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int cmos_nvram_read(void *priv, unsigned int off, void *val,
+			   size_t count)
 {
+	unsigned char *buf = val;
 	int	retval;
 
 	off += NVRAM_OFFSET;
@@ -563,16 +562,13 @@ cmos_nvram_read(struct file *filp, struct kobject *kobj,
 	return retval;
 }
 
-static ssize_t
-cmos_nvram_write(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int cmos_nvram_write(void *priv, unsigned int off, void *val,
+			    size_t count)
 {
-	struct cmos_rtc	*cmos;
+	struct cmos_rtc	*cmos = priv;
+	unsigned char	*buf = val;
 	int		retval;
 
-	cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
-
 	/* NOTE:  on at least PCs and Ataris, the boot firmware uses a
 	 * checksum on part of the NVRAM data.  That's currently ignored
 	 * here.  If userspace is smart enough to know what fields of
@@ -598,17 +594,6 @@ cmos_nvram_write(struct file *filp, struct kobject *kobj,
 	return retval;
 }
 
-static struct bin_attribute nvram = {
-	.attr = {
-		.name	= "nvram",
-		.mode	= S_IRUGO | S_IWUSR,
-	},
-
-	.read	= cmos_nvram_read,
-	.write	= cmos_nvram_write,
-	/* size gets set up later */
-};
-
 /*----------------------------------------------------------------*/
 
 static struct cmos_rtc	cmos_rtc;
@@ -675,6 +660,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	unsigned char			rtc_control;
 	unsigned			address_space;
 	u32				flags = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "cmos_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.reg_read = cmos_nvram_read,
+		.reg_write = cmos_nvram_write,
+		.priv = &cmos_rtc,
+	};
 
 	/* there can be only one ... */
 	if (cmos_rtc.dev)
@@ -751,8 +744,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	cmos_rtc.dev = dev;
 	dev_set_drvdata(dev, &cmos_rtc);
 
-	cmos_rtc.rtc = rtc_device_register(driver_name, dev,
-				&cmos_rtc_ops, THIS_MODULE);
+	cmos_rtc.rtc = devm_rtc_allocate_device(dev);
 	if (IS_ERR(cmos_rtc.rtc)) {
 		retval = PTR_ERR(cmos_rtc.rtc);
 		goto cleanup0;
@@ -814,22 +806,25 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 		}
 	}
 
-	/* export at least the first block of NVRAM */
-	nvram.size = address_space - NVRAM_OFFSET;
-	retval = sysfs_create_bin_file(&dev->kobj, &nvram);
-	if (retval < 0) {
-		dev_dbg(dev, "can't create nvram file? %d\n", retval);
+	cmos_rtc.rtc->ops = &cmos_rtc_ops;
+	cmos_rtc.rtc->nvram_old_abi = true;
+	retval = rtc_register_device(cmos_rtc.rtc);
+	if (retval)
 		goto cleanup2;
-	}
 
-	dev_info(dev, "%s%s, %zd bytes nvram%s\n",
-		!is_valid_irq(rtc_irq) ? "no alarms" :
-			cmos_rtc.mon_alrm ? "alarms up to one year" :
-			cmos_rtc.day_alrm ? "alarms up to one month" :
-			"alarms up to one day",
-		cmos_rtc.century ? ", y3k" : "",
-		nvram.size,
-		is_hpet_enabled() ? ", hpet irqs" : "");
+	/* export at least the first block of NVRAM */
+	nvmem_cfg.size = address_space - NVRAM_OFFSET;
+	if (rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg))
+		dev_err(dev, "nvmem registration failed\n");
+
+	dev_info(dev, "%s%s, %d bytes nvram%s\n",
+		 !is_valid_irq(rtc_irq) ? "no alarms" :
+		 cmos_rtc.mon_alrm ? "alarms up to one year" :
+		 cmos_rtc.day_alrm ? "alarms up to one month" :
+		 "alarms up to one day",
+		 cmos_rtc.century ? ", y3k" : "",
+		 nvmem_cfg.size,
+		 is_hpet_enabled() ? ", hpet irqs" : "");
 
 	return 0;
 
@@ -838,7 +833,6 @@ cleanup2:
 		free_irq(rtc_irq, cmos_rtc.rtc);
 cleanup1:
 	cmos_rtc.dev = NULL;
-	rtc_device_unregister(cmos_rtc.rtc);
 cleanup0:
 	if (RTC_IOMAPPED)
 		release_region(ports->start, resource_size(ports));
@@ -862,14 +856,11 @@ static void cmos_do_remove(struct device *dev)
 
 	cmos_do_shutdown(cmos->irq);
 
-	sysfs_remove_bin_file(&dev->kobj, &nvram);
-
 	if (is_valid_irq(cmos->irq)) {
 		free_irq(cmos->irq, cmos->rtc);
 		hpet_unregister_irq_handler(cmos_interrupt);
 	}
 
-	rtc_device_unregister(cmos->rtc);
 	cmos->rtc = NULL;
 
 	ports = cmos->iomem;
@@ -1271,8 +1262,6 @@ MODULE_DEVICE_TABLE(of, of_cmos_match);
 static __init void cmos_of_init(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
-	struct rtc_time time;
-	int ret;
 	const __be32 *val;
 
 	if (!node)
@@ -1285,16 +1274,6 @@ static __init void cmos_of_init(struct platform_device *pdev)
 	val = of_get_property(node, "freq-reg", NULL);
 	if (val)
 		CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT);
-
-	cmos_read_time(&pdev->dev, &time);
-	ret = rtc_valid_tm(&time);
-	if (ret) {
-		struct rtc_time def_time = {
-			.tm_year = 1,
-			.tm_mday = 1,
-		};
-		cmos_set_time(&pdev->dev, &def_time);
-	}
 }
 #else
 static inline void cmos_of_init(struct platform_device *pdev) {}
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index cfc4141d99cd..2fc517498a5d 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -82,7 +82,7 @@ static int coh901331_read_time(struct device *dev, struct rtc_time *tm)
 	if (readl(rtap->virtbase + COH901331_VALID)) {
 		rtc_time_to_tm(readl(rtap->virtbase + COH901331_CUR_TIME), tm);
 		clk_disable(rtap->clk);
-		return rtc_valid_tm(tm);
+		return 0;
 	}
 	clk_disable(rtap->clk);
 	return -EINVAL;
diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index 513b9bedd2c8..0abf98983e13 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h
@@ -46,11 +46,3 @@ static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 	return NULL;
 }
 #endif
-
-#ifdef CONFIG_RTC_NVMEM
-void rtc_nvmem_register(struct rtc_device *rtc);
-void rtc_nvmem_unregister(struct rtc_device *rtc);
-#else
-static inline void rtc_nvmem_register(struct rtc_device *rtc) {}
-static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
-#endif
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index 3a0333e1f21a..a8856f2b9bc2 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -119,7 +119,7 @@ static int cpcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	cpcap2rtc_time(tm, &cpcap_tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int cpcap_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index f0ea6899c731..bf7ced095c94 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -197,10 +197,10 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		cros_ec_rtc->saved_alarm = (u32)alarm_time;
 	} else {
 		/* Don't set an alarm in the past. */
-		if ((u32)alarm_time < current_time)
-			alarm_offset = EC_RTC_ALARM_CLEAR;
-		else
-			alarm_offset = (u32)alarm_time - current_time;
+		if ((u32)alarm_time <= current_time)
+			return -ETIME;
+
+		alarm_offset = (u32)alarm_time - current_time;
 	}
 
 	ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset);
diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c
index 4273377562ec..03044e1bc497 100644
--- a/drivers/rtc/rtc-da9052.c
+++ b/drivers/rtc/rtc-da9052.c
@@ -187,8 +187,7 @@ static int da9052_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 			rtc_tm->tm_min  = v[0][1] & DA9052_RTC_MIN;
 			rtc_tm->tm_sec  = v[0][0] & DA9052_RTC_SEC;
 
-			ret = rtc_valid_tm(rtc_tm);
-			return ret;
+			return 0;
 		}
 
 		idx = (1-idx);
diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c
index 678af8648c45..e08cd8130c23 100644
--- a/drivers/rtc/rtc-da9055.c
+++ b/drivers/rtc/rtc-da9055.c
@@ -158,7 +158,7 @@ static int da9055_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_min  = v[1] & DA9055_RTC_MIN;
 	rtc_tm->tm_sec  = v[0] & DA9055_RTC_SEC;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int da9055_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index f85cae240f12..b4e054c64bad 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -256,7 +256,7 @@ static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	else
 		rtc->rtc_sync = false;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int da9063_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 9c82b1da2d45..5f158715fb4c 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -99,7 +99,7 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c
index ef75c349dff9..0744916b79c5 100644
--- a/drivers/rtc/rtc-ds1286.c
+++ b/drivers/rtc/rtc-ds1286.c
@@ -211,7 +211,7 @@ static int ds1286_read_time(struct device *dev, struct rtc_time *tm)
 
 	tm->tm_mon--;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ds1286_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index 0ec4be62322b..2a881150d51c 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -43,7 +43,7 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *time)
 {
 	struct spi_device	*spi = dev_get_drvdata(dev);
 	u8		buf[1 + RTC_CLCK_LEN];
-	u8		*bp = buf;
+	u8		*bp;
 	int		status;
 
 	/* Enable writing */
@@ -98,8 +98,7 @@ static int ds1302_rtc_get_time(struct device *dev, struct rtc_time *time)
 	time->tm_mon = bcd2bin(buf[RTC_ADDR_MON]) - 1;
 	time->tm_year = bcd2bin(buf[RTC_ADDR_YEAR]) + 100;
 
-	/* Time may not be set */
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static const struct rtc_class_ops ds1302_rtc_ops = {
@@ -112,7 +111,7 @@ static int ds1302_probe(struct spi_device *spi)
 	struct rtc_device	*rtc;
 	u8		addr;
 	u8		buf[4];
-	u8		*bp = buf;
+	u8		*bp;
 	int		status;
 
 	/* Sanity check board setup data.  This may be hooked up
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index d8df2e9e14ad..2d502fc85698 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -203,8 +203,7 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
 		time->tm_hour, time->tm_mday,
 		time->tm_mon, time->tm_year, time->tm_wday);
 
-	/* Time may not be set */
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int ds1305_set_time(struct device *dev, struct rtc_time *time)
@@ -544,15 +543,6 @@ static int ds1305_nvram_write(void *priv, unsigned int off, void *buf,
 	return spi_sync(spi, &m);
 }
 
-static struct nvmem_config ds1305_nvmem_cfg = {
-	.name = "ds1305_nvram",
-	.word_size = 1,
-	.stride = 1,
-	.size = DS1305_NVRAM_LEN,
-	.reg_read = ds1305_nvram_read,
-	.reg_write = ds1305_nvram_write,
-};
-
 /*----------------------------------------------------------------------*/
 
 /*
@@ -566,6 +556,14 @@ static int ds1305_probe(struct spi_device *spi)
 	u8				addr, value;
 	struct ds1305_platform_data	*pdata = dev_get_platdata(&spi->dev);
 	bool				write_ctrl = false;
+	struct nvmem_config ds1305_nvmem_cfg = {
+		.name = "ds1305_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = DS1305_NVRAM_LEN,
+		.reg_read = ds1305_nvram_read,
+		.reg_write = ds1305_nvram_write,
+	};
 
 	/* Sanity check board setup data.  This may be hooked up
 	 * in 3wire mode, but we don't care.  Note that unless
@@ -703,15 +701,15 @@ static int ds1305_probe(struct spi_device *spi)
 	ds1305->rtc->ops = &ds1305_ops;
 
 	ds1305_nvmem_cfg.priv = ds1305;
-	ds1305->rtc->nvmem_config = &ds1305_nvmem_cfg;
 	ds1305->rtc->nvram_old_abi = true;
-
 	status = rtc_register_device(ds1305->rtc);
 	if (status) {
 		dev_dbg(&spi->dev, "register rtc --> %d\n", status);
 		return status;
 	}
 
+	rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg);
+
 	/* Maybe set up alarm IRQ; be ready to handle it triggering right
 	 * away.  NOTE that we don't share this.  The signal is active low,
 	 * and we can't ack it before a SPI message delay.  We temporarily
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 923dde912f60..a13e59edff53 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -114,7 +114,6 @@ enum ds_type {
 #	define RX8025_BIT_XST		0x20
 
 struct ds1307 {
-	struct nvmem_config	nvmem_cfg;
 	enum ds_type		type;
 	unsigned long		flags;
 #define HAS_NVRAM	0		/* bit 0 == sysfs file active */
@@ -438,8 +437,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 		t->tm_hour, t->tm_mday,
 		t->tm_mon, t->tm_year, t->tm_wday);
 
-	/* initial clock setting can be undefined */
-	return rtc_valid_tm(t);
+	return 0;
 }
 
 static int ds1307_set_time(struct device *dev, struct rtc_time *t)
@@ -1696,24 +1694,26 @@ read_rtc:
 		}
 	}
 
-	if (chip->nvram_size) {
-		ds1307->nvmem_cfg.name = "ds1307_nvram";
-		ds1307->nvmem_cfg.word_size = 1;
-		ds1307->nvmem_cfg.stride = 1;
-		ds1307->nvmem_cfg.size = chip->nvram_size;
-		ds1307->nvmem_cfg.reg_read = ds1307_nvram_read;
-		ds1307->nvmem_cfg.reg_write = ds1307_nvram_write;
-		ds1307->nvmem_cfg.priv = ds1307;
-
-		ds1307->rtc->nvmem_config = &ds1307->nvmem_cfg;
-		ds1307->rtc->nvram_old_abi = true;
-	}
-
 	ds1307->rtc->ops = chip->rtc_ops ?: &ds13xx_rtc_ops;
 	err = rtc_register_device(ds1307->rtc);
 	if (err)
 		return err;
 
+	if (chip->nvram_size) {
+		struct nvmem_config nvmem_cfg = {
+			.name = "ds1307_nvram",
+			.word_size = 1,
+			.stride = 1,
+			.size = chip->nvram_size,
+			.reg_read = ds1307_nvram_read,
+			.reg_write = ds1307_nvram_write,
+			.priv = ds1307,
+		};
+
+		ds1307->rtc->nvram_old_abi = true;
+		rtc_nvmem_register(ds1307->rtc, &nvmem_cfg);
+	}
+
 	ds1307_hwmon_register(ds1307);
 	ds1307_clks_register(ds1307);
 
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index 895fbeeb47fe..5208da4cf94a 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -153,120 +153,22 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
 static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
 			ds1343_store_glitchfilter);
 
-static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
-			struct bin_attribute *attr,
-			char *buf, loff_t off, size_t count)
+static int ds1343_nvram_write(void *priv, unsigned int off, void *val,
+			      size_t bytes)
 {
-	int ret;
-	unsigned char address;
-	struct device *dev = kobj_to_dev(kobj);
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-
-	address = DS1343_NVRAM + off;
-
-	ret = regmap_bulk_write(priv->map, address, buf, count);
-	if (ret < 0)
-		dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
+	struct ds1343_priv *ds1343 = priv;
 
-	return (ret < 0) ? ret : count;
+	return regmap_bulk_write(ds1343->map, DS1343_NVRAM + off, val, bytes);
 }
 
-
-static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *attr,
-				char *buf, loff_t off, size_t count)
+static int ds1343_nvram_read(void *priv, unsigned int off, void *val,
+			     size_t bytes)
 {
-	int ret;
-	unsigned char address;
-	struct device *dev = kobj_to_dev(kobj);
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
+	struct ds1343_priv *ds1343 = priv;
 
-	address = DS1343_NVRAM + off;
-
-	ret = regmap_bulk_read(priv->map, address, buf, count);
-	if (ret < 0)
-		dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
-
-	return (ret < 0) ? ret : count;
+	return regmap_bulk_read(ds1343->map, DS1343_NVRAM + off, val, bytes);
 }
 
-
-static struct bin_attribute nvram_attr = {
-	.attr.name	= "nvram",
-	.attr.mode	= S_IRUGO | S_IWUSR,
-	.read		= ds1343_nvram_read,
-	.write		= ds1343_nvram_write,
-	.size		= DS1343_NVRAM_LEN,
-};
-
-static ssize_t ds1343_show_alarmstatus(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-	int alarmstatus, data;
-
-	regmap_read(priv->map, DS1343_CONTROL_REG, &data);
-
-	alarmstatus = !!(data & DS1343_A0IE);
-
-	if (alarmstatus)
-		return sprintf(buf, "enabled\n");
-	else
-		return sprintf(buf, "disabled\n");
-}
-
-static DEVICE_ATTR(alarm_status, S_IRUGO, ds1343_show_alarmstatus, NULL);
-
-static ssize_t ds1343_show_alarmmode(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-	int alarm_mode, data;
-	char *alarm_str;
-
-	regmap_read(priv->map, DS1343_ALM0_SEC_REG, &data);
-	alarm_mode = (data & 0x80) >> 4;
-
-	regmap_read(priv->map, DS1343_ALM0_MIN_REG, &data);
-	alarm_mode |= (data & 0x80) >> 5;
-
-	regmap_read(priv->map, DS1343_ALM0_HOUR_REG, &data);
-	alarm_mode |= (data & 0x80) >> 6;
-
-	regmap_read(priv->map, DS1343_ALM0_DAY_REG, &data);
-	alarm_mode |= (data & 0x80) >> 7;
-
-	switch (alarm_mode) {
-	case 15:
-		alarm_str = "each second";
-		break;
-
-	case 7:
-		alarm_str = "seconds match";
-		break;
-
-	case 3:
-		alarm_str = "minutes and seconds match";
-		break;
-
-	case 1:
-		alarm_str = "hours, minutes and seconds match";
-		break;
-
-	case 0:
-		alarm_str = "day, hours, minutes and seconds match";
-		break;
-
-	default:
-		alarm_str = "invalid";
-		break;
-	}
-
-	return sprintf(buf, "%s\n", alarm_str);
-}
-
-static DEVICE_ATTR(alarm_mode, S_IRUGO, ds1343_show_alarmmode, NULL);
-
 static ssize_t ds1343_show_tricklecharger(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -313,7 +215,6 @@ static DEVICE_ATTR(trickle_charger, S_IRUGO, ds1343_show_tricklecharger, NULL);
 
 static int ds1343_sysfs_register(struct device *dev)
 {
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
 	int err;
 
 	err = device_create_file(dev, &dev_attr_glitch_filter);
@@ -321,33 +222,9 @@ static int ds1343_sysfs_register(struct device *dev)
 		return err;
 
 	err = device_create_file(dev, &dev_attr_trickle_charger);
-	if (err)
-		goto error1;
-
-	err = device_create_bin_file(dev, &nvram_attr);
-	if (err)
-		goto error2;
-
-	if (priv->irq <= 0)
-		return err;
-
-	err = device_create_file(dev, &dev_attr_alarm_mode);
-	if (err)
-		goto error3;
-
-	err = device_create_file(dev, &dev_attr_alarm_status);
 	if (!err)
-		return err;
+		return 0;
 
-	device_remove_file(dev, &dev_attr_alarm_mode);
-
-error3:
-	device_remove_bin_file(dev, &nvram_attr);
-
-error2:
-	device_remove_file(dev, &dev_attr_trickle_charger);
-
-error1:
 	device_remove_file(dev, &dev_attr_glitch_filter);
 
 	return err;
@@ -355,17 +232,8 @@ error1:
 
 static void ds1343_sysfs_unregister(struct device *dev)
 {
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-
 	device_remove_file(dev, &dev_attr_glitch_filter);
 	device_remove_file(dev, &dev_attr_trickle_charger);
-	device_remove_bin_file(dev, &nvram_attr);
-
-	if (priv->irq <= 0)
-		return;
-
-	device_remove_file(dev, &dev_attr_alarm_status);
-	device_remove_file(dev, &dev_attr_alarm_mode);
 }
 
 static int ds1343_read_time(struct device *dev, struct rtc_time *dt)
@@ -386,7 +254,7 @@ static int ds1343_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_mon	= bcd2bin(buf[5] & 0x1F) - 1;
 	dt->tm_year	= bcd2bin(buf[6]) + 100; /* year offset from 1900 */
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds1343_set_time(struct device *dev, struct rtc_time *dt)
@@ -599,14 +467,18 @@ static const struct rtc_class_ops ds1343_rtc_ops = {
 static int ds1343_probe(struct spi_device *spi)
 {
 	struct ds1343_priv *priv;
-	struct regmap_config config;
+	struct regmap_config config = { .reg_bits = 8, .val_bits = 8,
+					.write_flag_mask = 0x80, };
 	unsigned int data;
 	int res;
-
-	memset(&config, 0, sizeof(config));
-	config.reg_bits = 8;
-	config.val_bits = 8;
-	config.write_flag_mask = 0x80;
+	struct nvmem_config nvmem_cfg = {
+		.name = "ds1343-",
+		.word_size = 1,
+		.stride = 1,
+		.size = DS1343_NVRAM_LEN,
+		.reg_read = ds1343_nvram_read,
+		.reg_write = ds1343_nvram_write,
+	};
 
 	priv = devm_kzalloc(&spi->dev, sizeof(struct ds1343_priv), GFP_KERNEL);
 	if (!priv)
@@ -646,12 +518,19 @@ static int ds1343_probe(struct spi_device *spi)
 	data &= ~(DS1343_OSF | DS1343_IRQF1 | DS1343_IRQF0);
 	regmap_write(priv->map, DS1343_STATUS_REG, data);
 
-	priv->rtc = devm_rtc_device_register(&spi->dev, "ds1343",
-					&ds1343_rtc_ops, THIS_MODULE);
-	if (IS_ERR(priv->rtc)) {
-		dev_err(&spi->dev, "unable to register rtc ds1343\n");
+	priv->rtc = devm_rtc_allocate_device(&spi->dev);
+	if (IS_ERR(priv->rtc))
 		return PTR_ERR(priv->rtc);
-	}
+
+	priv->rtc->nvram_old_abi = true;
+	priv->rtc->ops = &ds1343_rtc_ops;
+
+	res = rtc_register_device(priv->rtc);
+	if (res)
+		return res;
+
+	nvmem_cfg.priv = priv;
+	rtc_nvmem_register(priv->rtc, &nvmem_cfg);
 
 	priv->irq = spi->irq;
 
diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c
index ccfc9d43eb1e..938512c676ee 100644
--- a/drivers/rtc/rtc-ds1347.c
+++ b/drivers/rtc/rtc-ds1347.c
@@ -66,7 +66,7 @@ static int ds1347_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_wday = bcd2bin(buf[5]) - 1;
 	dt->tm_year = bcd2bin(buf[6]) + 100;
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds1347_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index 4d5b007d7fc6..3b095401f848 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -153,7 +153,7 @@ static int ds1390_read_time(struct device *dev, struct rtc_time *dt)
 	/* adjust for century bit */
 	dt->tm_year = bcd2bin(chip->txrx_buf[6]) + ((chip->txrx_buf[5] & 0x80) ? 100 : 0);
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds1390_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 1e95312a6f2e..a7d5ca428d68 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -277,10 +277,6 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 
 	rtc_tm->tm_mon--;
 
-	if (rtc_valid_tm(rtc_tm) < 0) {
-		dev_err(dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, rtc_tm);
-	}
 	return 0;
 }
 
@@ -422,20 +418,20 @@ static int ds1511_nvram_write(void *priv, unsigned int pos, void *buf,
 	return 0;
 }
 
-static struct nvmem_config ds1511_nvmem_cfg = {
-	.name = "ds1511_nvram",
-	.word_size = 1,
-	.stride = 1,
-	.size = DS1511_RAM_MAX,
-	.reg_read = ds1511_nvram_read,
-	.reg_write = ds1511_nvram_write,
-};
-
 static int ds1511_rtc_probe(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct rtc_plat_data *pdata;
 	int ret = 0;
+	struct nvmem_config ds1511_nvmem_cfg = {
+		.name = "ds1511_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = DS1511_RAM_MAX,
+		.reg_read = ds1511_nvram_read,
+		.reg_write = ds1511_nvram_write,
+		.priv = &pdev->dev,
+	};
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -478,14 +474,14 @@ static int ds1511_rtc_probe(struct platform_device *pdev)
 
 	pdata->rtc->ops = &ds1511_rtc_ops;
 
-	ds1511_nvmem_cfg.priv = &pdev->dev;
-	pdata->rtc->nvmem_config = &ds1511_nvmem_cfg;
 	pdata->rtc->nvram_old_abi = true;
 
 	ret = rtc_register_device(pdata->rtc);
 	if (ret)
 		return ret;
 
+	rtc_nvmem_register(pdata->rtc, &ds1511_nvmem_cfg);
+
 	/*
 	 * if the platform has an interrupt in mind for this device,
 	 * then by all means, set it
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 9961ec646fd2..2441b9a2b366 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -127,10 +127,6 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* year is 1900 + tm->tm_year */
 	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-	if (rtc_valid_tm(tm) < 0) {
-		dev_err(dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, tm);
-	}
 	return 0;
 }
 
@@ -233,46 +229,32 @@ static const struct rtc_class_ops ds1553_rtc_ops = {
 	.alarm_irq_enable	= ds1553_rtc_alarm_irq_enable,
 };
 
-static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *bin_attr,
-				 char *buf, loff_t pos, size_t size)
+static int ds1553_nvram_read(void *priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		*buf++ = readb(ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static ssize_t ds1553_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t pos, size_t size)
+static int ds1553_nvram_write(void *priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		writeb(*buf++, ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static struct bin_attribute ds1553_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = RTC_OFFSET,
-	.read = ds1553_nvram_read,
-	.write = ds1553_nvram_write,
-};
-
 static int ds1553_rtc_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -280,6 +262,15 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
 	struct rtc_plat_data *pdata;
 	void __iomem *ioaddr;
 	int ret = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "ds1553_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = RTC_OFFSET,
+		.reg_read = ds1553_nvram_read,
+		.reg_write = ds1553_nvram_write,
+		.priv = pdev,
+	};
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -308,11 +299,17 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
 	pdata->last_jiffies = jiffies;
 	platform_set_drvdata(pdev, pdata);
 
-	pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &ds1553_rtc_ops, THIS_MODULE);
+	pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(pdata->rtc))
 		return PTR_ERR(pdata->rtc);
 
+	pdata->rtc->ops = &ds1553_rtc_ops;
+	pdata->rtc->nvram_old_abi = true;
+
+	ret = rtc_register_device(pdata->rtc);
+	if (ret)
+		return ret;
+
 	if (pdata->irq > 0) {
 		writeb(0, ioaddr + RTC_INTERRUPTS);
 		if (devm_request_irq(&pdev->dev, pdata->irq,
@@ -323,21 +320,9 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
 		}
 	}
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
-	if (ret)
-		dev_err(&pdev->dev, "unable to create sysfs file: %s\n",
-			ds1553_nvram_attr.attr.name);
-
-	return 0;
-}
-
-static int ds1553_rtc_remove(struct platform_device *pdev)
-{
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	if (rtc_nvmem_register(pdata->rtc, &nvmem_cfg))
+		dev_err(&pdev->dev, "unable to register nvmem\n");
 
-	sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
-	if (pdata->irq > 0)
-		writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
 	return 0;
 }
 
@@ -346,7 +331,6 @@ MODULE_ALIAS("platform:rtc-ds1553");
 
 static struct platform_driver ds1553_rtc_driver = {
 	.probe		= ds1553_rtc_probe,
-	.remove		= ds1553_rtc_remove,
 	.driver		= {
 		.name	= "rtc-ds1553",
 	},
diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
index ed43b4311660..1a39829d2b40 100644
--- a/drivers/rtc/rtc-ds1685.c
+++ b/drivers/rtc/rtc-ds1685.c
@@ -306,7 +306,7 @@ ds1685_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_yday  = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year);
 	tm->tm_isdst = 0; /* RTC has hardcoded timezone, so don't use. */
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 /**
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 3abf1cbfb8ce..2d781180e968 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -53,9 +53,7 @@
 struct rtc_plat_data {
 	void __iomem *ioaddr_nvram;
 	void __iomem *ioaddr_rtc;
-	size_t size_nvram;
 	unsigned long last_jiffies;
-	struct bin_attribute nvram_attr;
 };
 
 static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -114,7 +112,7 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* year is 1900 + tm->tm_year */
 	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct rtc_class_ops ds1742_rtc_ops = {
@@ -122,34 +120,28 @@ static const struct rtc_class_ops ds1742_rtc_ops = {
 	.set_time	= ds1742_rtc_set_time,
 };
 
-static ssize_t ds1742_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *bin_attr,
-				 char *buf, loff_t pos, size_t size)
+static int ds1742_nvram_read(void *priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr_nvram;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		*buf++ = readb(ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static ssize_t ds1742_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t pos, size_t size)
+static int ds1742_nvram_write(void *priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr_nvram;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		writeb(*buf++, ioaddr + pos++);
-	return count;
+	return 0;
 }
 
 static int ds1742_rtc_probe(struct platform_device *pdev)
@@ -160,6 +152,14 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 	struct rtc_plat_data *pdata;
 	void __iomem *ioaddr;
 	int ret = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "ds1742_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.reg_read = ds1742_nvram_read,
+		.reg_write = ds1742_nvram_write,
+	};
+
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -171,15 +171,10 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 		return PTR_ERR(ioaddr);
 
 	pdata->ioaddr_nvram = ioaddr;
-	pdata->size_nvram = resource_size(res) - RTC_SIZE;
-	pdata->ioaddr_rtc = ioaddr + pdata->size_nvram;
+	pdata->ioaddr_rtc = ioaddr + resource_size(res) - RTC_SIZE;
 
-	sysfs_bin_attr_init(&pdata->nvram_attr);
-	pdata->nvram_attr.attr.name = "nvram";
-	pdata->nvram_attr.attr.mode = S_IRUGO | S_IWUSR;
-	pdata->nvram_attr.read = ds1742_nvram_read;
-	pdata->nvram_attr.write = ds1742_nvram_write;
-	pdata->nvram_attr.size = pdata->size_nvram;
+	nvmem_cfg.size = resource_size(res) - RTC_SIZE;
+	nvmem_cfg.priv = pdata;
 
 	/* turn RTC on if it was not on */
 	ioaddr = pdata->ioaddr_rtc;
@@ -196,24 +191,21 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 
 	pdata->last_jiffies = jiffies;
 	platform_set_drvdata(pdev, pdata);
-	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &ds1742_rtc_ops, THIS_MODULE);
+
+	rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
-	if (ret)
-		dev_err(&pdev->dev, "Unable to create sysfs entry: %s\n",
-			pdata->nvram_attr.attr.name);
+	rtc->ops = &ds1742_rtc_ops;
+	rtc->nvram_old_abi = true;
 
-	return 0;
-}
+	ret = rtc_register_device(rtc);
+	if (ret)
+		return ret;
 
-static int ds1742_rtc_remove(struct platform_device *pdev)
-{
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	if (rtc_nvmem_register(rtc, &nvmem_cfg))
+		dev_err(&pdev->dev, "Unable to register nvmem\n");
 
-	sysfs_remove_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
 	return 0;
 }
 
@@ -225,7 +217,6 @@ MODULE_DEVICE_TABLE(of, ds1742_rtc_of_match);
 
 static struct platform_driver ds1742_rtc_driver = {
 	.probe		= ds1742_rtc_probe,
-	.remove		= ds1742_rtc_remove,
 	.driver		= {
 		.name	= "rtc-ds1742",
 		.of_match_table = of_match_ptr(ds1742_rtc_of_match),
diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c
index 9a1582ed7070..b886b6a5c178 100644
--- a/drivers/rtc/rtc-ds2404.c
+++ b/drivers/rtc/rtc-ds2404.c
@@ -207,7 +207,7 @@ static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
 	time = le32_to_cpu(time);
 
 	rtc_time_to_tm(time, dt);
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds2404_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 0550f7ba464f..7184e5145f12 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -145,7 +145,7 @@ static int ds3232_read_time(struct device *dev, struct rtc_time *time)
 
 	time->tm_year = bcd2bin(year) + add_century;
 
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int ds3232_set_time(struct device *dev, struct rtc_time *time)
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index 0130afd7fe88..3454e7814524 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -176,7 +176,7 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
 	if (!convert_from_efi_time(&eft, tm))
 		return -EIO;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int efi_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index 576eadbba296..e1137670d4d2 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -136,8 +136,7 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
 		t->tm_hour, t->tm_mday,
 		t->tm_mon, t->tm_year, t->tm_wday);
 
-	/* initial clock setting can be undefined */
-	return rtc_valid_tm(t);
+	return 0;
 }
 
 
diff --git a/drivers/rtc/rtc-goldfish.c b/drivers/rtc/rtc-goldfish.c
index d67769265185..a1c44d0c8557 100644
--- a/drivers/rtc/rtc-goldfish.c
+++ b/drivers/rtc/rtc-goldfish.c
@@ -235,3 +235,5 @@ static struct platform_driver goldfish_rtc = {
 };
 
 module_platform_driver(goldfish_rtc);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index 38586a024ee8..890ccfc9e5aa 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -104,8 +104,9 @@ static int isl12022_write_reg(struct i2c_client *client,
  * In the routines that deal directly with the isl12022 hardware, we use
  * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
  */
-static int isl12022_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	uint8_t buf[ISL12022_REG_INT + 1];
 	int ret;
 
@@ -149,11 +150,12 @@ static int isl12022_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct isl12022 *isl12022 = i2c_get_clientdata(client);
 	size_t i;
 	int ret;
@@ -199,7 +201,7 @@ static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 				return ret;
 		}
 
-		isl12022->write_enabled = 1;
+		isl12022->write_enabled = true;
 	}
 
 	/* hours, minutes and seconds */
@@ -228,16 +230,6 @@ static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	return 0;
 }
 
-static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return isl12022_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return isl12022_set_datetime(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops isl12022_rtc_ops = {
 	.read_time	= isl12022_rtc_read_time,
 	.set_time	= isl12022_rtc_set_time,
diff --git a/drivers/rtc/rtc-isl12026.c b/drivers/rtc/rtc-isl12026.c
new file mode 100644
index 000000000000..97f594f9667c
--- /dev/null
+++ b/drivers/rtc/rtc-isl12026.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * An I2C driver for the Intersil ISL 12026
+ *
+ * Copyright (c) 2018 Cavium, Inc.
+ */
+#include <linux/bcd.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
+
+/* register offsets */
+#define ISL12026_REG_PWR	0x14
+# define ISL12026_REG_PWR_BSW	BIT(6)
+# define ISL12026_REG_PWR_SBIB	BIT(7)
+#define ISL12026_REG_SC		0x30
+#define ISL12026_REG_HR		0x32
+# define ISL12026_REG_HR_MIL	BIT(7)	/* military or 24 hour time */
+#define ISL12026_REG_SR		0x3f
+# define ISL12026_REG_SR_RTCF	BIT(0)
+# define ISL12026_REG_SR_WEL	BIT(1)
+# define ISL12026_REG_SR_RWEL	BIT(2)
+# define ISL12026_REG_SR_MBZ	BIT(3)
+# define ISL12026_REG_SR_OSCF	BIT(4)
+
+/* The EEPROM array responds at i2c address 0x57 */
+#define ISL12026_EEPROM_ADDR	0x57
+
+#define ISL12026_PAGESIZE 16
+#define ISL12026_NVMEM_WRITE_TIME 20
+
+struct isl12026 {
+	struct rtc_device *rtc;
+	struct i2c_client *nvm_client;
+};
+
+static int isl12026_read_reg(struct i2c_client *client, int reg)
+{
+	u8 addr[] = {0, reg};
+	u8 val;
+	int ret;
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= client->addr,
+			.flags	= 0,
+			.len	= sizeof(addr),
+			.buf	= addr
+		}, {
+			.addr	= client->addr,
+			.flags	= I2C_M_RD,
+			.len	= 1,
+			.buf	= &val
+		}
+	};
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&client->dev, "read reg error, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+	} else {
+		ret = val;
+	}
+
+	return ret;
+}
+
+static int isl12026_arm_write(struct i2c_client *client)
+{
+	int ret;
+	u8 op[3];
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= 1,
+		.buf	= op
+	};
+
+	/* Set SR.WEL */
+	op[0] = 0;
+	op[1] = ISL12026_REG_SR;
+	op[2] = ISL12026_REG_SR_WEL;
+	msg.len = 3;
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev, "write error SR.WEL, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	/* Set SR.WEL and SR.RWEL */
+	op[2] = ISL12026_REG_SR_WEL | ISL12026_REG_SR_RWEL;
+	msg.len = 3;
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev,
+			"write error SR.WEL|SR.RWEL, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	} else {
+		ret = 0;
+	}
+out:
+	return ret;
+}
+
+static int isl12026_disarm_write(struct i2c_client *client)
+{
+	int ret;
+	u8 op[3] = {0, ISL12026_REG_SR, 0};
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= sizeof(op),
+		.buf	= op
+	};
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev,
+			"write error SR, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+	} else {
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int isl12026_write_reg(struct i2c_client *client, int reg, u8 val)
+{
+	int ret;
+	u8 op[3] = {0, reg, val};
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= sizeof(op),
+		.buf	= op
+	};
+
+	ret = isl12026_arm_write(client);
+	if (ret)
+		return ret;
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev, "write error CCR, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	msleep(ISL12026_NVMEM_WRITE_TIME);
+
+	ret = isl12026_disarm_write(client);
+out:
+	return ret;
+}
+
+static int isl12026_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret;
+	u8 op[10];
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= sizeof(op),
+		.buf	= op
+	};
+
+	ret = isl12026_arm_write(client);
+	if (ret)
+		return ret;
+
+	/* Set the CCR registers */
+	op[0] = 0;
+	op[1] = ISL12026_REG_SC;
+	op[2] = bin2bcd(tm->tm_sec); /* SC */
+	op[3] = bin2bcd(tm->tm_min); /* MN */
+	op[4] = bin2bcd(tm->tm_hour) | ISL12026_REG_HR_MIL; /* HR */
+	op[5] = bin2bcd(tm->tm_mday); /* DT */
+	op[6] = bin2bcd(tm->tm_mon + 1); /* MO */
+	op[7] = bin2bcd(tm->tm_year % 100); /* YR */
+	op[8] = bin2bcd(tm->tm_wday & 7); /* DW */
+	op[9] = bin2bcd(tm->tm_year >= 100 ? 20 : 19); /* Y2K */
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev, "write error CCR, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	ret = isl12026_disarm_write(client);
+out:
+	return ret;
+}
+
+static int isl12026_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	u8 ccr[8];
+	u8 addr[2];
+	u8 sr;
+	int ret;
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= client->addr,
+			.flags	= 0,
+			.len	= sizeof(addr),
+			.buf	= addr
+		}, {
+			.addr	= client->addr,
+			.flags	= I2C_M_RD,
+		}
+	};
+
+	/* First, read SR */
+	addr[0] = 0;
+	addr[1] = ISL12026_REG_SR;
+	msgs[1].len = 1;
+	msgs[1].buf = &sr;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&client->dev, "read error, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	if (sr & ISL12026_REG_SR_RTCF)
+		dev_warn(&client->dev, "Real-Time Clock Failure on read\n");
+	if (sr & ISL12026_REG_SR_OSCF)
+		dev_warn(&client->dev, "Oscillator Failure on read\n");
+
+	/* Second, CCR regs */
+	addr[0] = 0;
+	addr[1] = ISL12026_REG_SC;
+	msgs[1].len = sizeof(ccr);
+	msgs[1].buf = ccr;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&client->dev, "read error, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	tm->tm_sec = bcd2bin(ccr[0] & 0x7F);
+	tm->tm_min = bcd2bin(ccr[1] & 0x7F);
+	if (ccr[2] & ISL12026_REG_HR_MIL)
+		tm->tm_hour = bcd2bin(ccr[2] & 0x3F);
+	else
+		tm->tm_hour = bcd2bin(ccr[2] & 0x1F) +
+			((ccr[2] & 0x20) ? 12 : 0);
+	tm->tm_mday = bcd2bin(ccr[3] & 0x3F);
+	tm->tm_mon = bcd2bin(ccr[4] & 0x1F) - 1;
+	tm->tm_year = bcd2bin(ccr[5]);
+	if (bcd2bin(ccr[7]) == 20)
+		tm->tm_year += 100;
+	tm->tm_wday = ccr[6] & 0x07;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static const struct rtc_class_ops isl12026_rtc_ops = {
+	.read_time	= isl12026_rtc_read_time,
+	.set_time	= isl12026_rtc_set_time,
+};
+
+static int isl12026_nvm_read(void *p, unsigned int offset,
+			     void *val, size_t bytes)
+{
+	struct isl12026 *priv = p;
+	int ret;
+	u8 addr[2];
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= priv->nvm_client->addr,
+			.flags	= 0,
+			.len	= sizeof(addr),
+			.buf	= addr
+		}, {
+			.addr	= priv->nvm_client->addr,
+			.flags	= I2C_M_RD,
+			.buf	= val
+		}
+	};
+
+	/*
+	 * offset and bytes checked and limited by nvmem core, so
+	 * proceed without further checks.
+	 */
+	ret = mutex_lock_interruptible(&priv->rtc->ops_lock);
+	if (ret)
+		return ret;
+
+	/* 2 bytes of address, most significant first */
+	addr[0] = offset >> 8;
+	addr[1] = offset;
+	msgs[1].len = bytes;
+	ret = i2c_transfer(priv->nvm_client->adapter, msgs, ARRAY_SIZE(msgs));
+
+	mutex_unlock(&priv->rtc->ops_lock);
+
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&priv->nvm_client->dev,
+			"nvmem read error, ret=%d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	return 0;
+}
+
+static int isl12026_nvm_write(void *p, unsigned int offset,
+			      void *val, size_t bytes)
+{
+	struct isl12026 *priv = p;
+	int ret;
+	u8 *v = val;
+	size_t chunk_size, num_written;
+	u8 payload[ISL12026_PAGESIZE + 2]; /* page + 2 address bytes */
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= priv->nvm_client->addr,
+			.flags	= 0,
+			.buf	= payload
+		}
+	};
+
+	/*
+	 * offset and bytes checked and limited by nvmem core, so
+	 * proceed without further checks.
+	 */
+	ret = mutex_lock_interruptible(&priv->rtc->ops_lock);
+	if (ret)
+		return ret;
+
+	num_written = 0;
+	while (bytes) {
+		chunk_size = round_down(offset, ISL12026_PAGESIZE) +
+			ISL12026_PAGESIZE - offset;
+		chunk_size = min(bytes, chunk_size);
+		/*
+		 * 2 bytes of address, most significant first, followed
+		 * by page data bytes
+		 */
+		memcpy(payload + 2, v + num_written, chunk_size);
+		payload[0] = offset >> 8;
+		payload[1] = offset;
+		msgs[0].len = chunk_size + 2;
+		ret = i2c_transfer(priv->nvm_client->adapter,
+				   msgs, ARRAY_SIZE(msgs));
+		if (ret != ARRAY_SIZE(msgs)) {
+			dev_err(&priv->nvm_client->dev,
+				"nvmem write error, ret=%d\n", ret);
+			ret = ret < 0 ? ret : -EIO;
+			break;
+		}
+		ret = 0;
+		bytes -= chunk_size;
+		offset += chunk_size;
+		num_written += chunk_size;
+		msleep(ISL12026_NVMEM_WRITE_TIME);
+	}
+
+	mutex_unlock(&priv->rtc->ops_lock);
+
+	return ret;
+}
+
+static void isl12026_force_power_modes(struct i2c_client *client)
+{
+	int ret;
+	int pwr, requested_pwr;
+	u32 bsw_val, sbib_val;
+	bool set_bsw, set_sbib;
+
+	/*
+	 * If we can read the of_property, set the specified value.
+	 * If there is an error reading the of_property (likely
+	 * because it does not exist), keep the current value.
+	 */
+	ret = of_property_read_u32(client->dev.of_node,
+				   "isil,pwr-bsw", &bsw_val);
+	set_bsw = (ret == 0);
+
+	ret = of_property_read_u32(client->dev.of_node,
+				   "isil,pwr-sbib", &sbib_val);
+	set_sbib = (ret == 0);
+
+	/* Check if PWR.BSW and/or PWR.SBIB need specified values */
+	if (!set_bsw && !set_sbib)
+		return;
+
+	pwr = isl12026_read_reg(client, ISL12026_REG_PWR);
+	if (pwr < 0) {
+		dev_warn(&client->dev, "Error: Failed to read PWR %d\n", pwr);
+		return;
+	}
+
+	requested_pwr = pwr;
+
+	if (set_bsw) {
+		if (bsw_val)
+			requested_pwr |= ISL12026_REG_PWR_BSW;
+		else
+			requested_pwr &= ~ISL12026_REG_PWR_BSW;
+	} /* else keep current BSW */
+
+	if (set_sbib) {
+		if (sbib_val)
+			requested_pwr |= ISL12026_REG_PWR_SBIB;
+		else
+			requested_pwr &= ~ISL12026_REG_PWR_SBIB;
+	} /* else keep current SBIB */
+
+	if (pwr >= 0 && pwr != requested_pwr) {
+		dev_dbg(&client->dev, "PWR: %02x\n", pwr);
+		dev_dbg(&client->dev, "Updating PWR to: %02x\n", requested_pwr);
+		isl12026_write_reg(client, ISL12026_REG_PWR, requested_pwr);
+	}
+}
+
+static int isl12026_probe_new(struct i2c_client *client)
+{
+	struct isl12026 *priv;
+	int ret;
+	struct nvmem_config nvm_cfg = {
+		.name = "isl12026-",
+		.base_dev = &client->dev,
+		.stride = 1,
+		.word_size = 1,
+		.size = 512,
+		.reg_read = isl12026_nvm_read,
+		.reg_write = isl12026_nvm_write,
+	};
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -ENODEV;
+
+	priv = devm_kzalloc(&client->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, priv);
+
+	isl12026_force_power_modes(client);
+
+	priv->nvm_client = i2c_new_dummy(client->adapter, ISL12026_EEPROM_ADDR);
+	if (!priv->nvm_client)
+		return -ENOMEM;
+
+	priv->rtc = devm_rtc_allocate_device(&client->dev);
+	ret = PTR_ERR_OR_ZERO(priv->rtc);
+	if (ret)
+		return ret;
+
+	priv->rtc->ops = &isl12026_rtc_ops;
+	nvm_cfg.priv = priv;
+	ret = rtc_nvmem_register(priv->rtc, &nvm_cfg);
+	if (ret)
+		return ret;
+
+	return rtc_register_device(priv->rtc);
+}
+
+static int isl12026_remove(struct i2c_client *client)
+{
+	struct isl12026 *priv = i2c_get_clientdata(client);
+
+	i2c_unregister_device(priv->nvm_client);
+	return 0;
+}
+
+static const struct of_device_id isl12026_dt_match[] = {
+	{ .compatible = "isil,isl12026" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, isl12026_dt_match);
+
+static struct i2c_driver isl12026_driver = {
+	.driver		= {
+		.name	= "rtc-isl12026",
+		.of_match_table = isl12026_dt_match,
+	},
+	.probe_new	= isl12026_probe_new,
+	.remove		= isl12026_remove,
+};
+
+module_i2c_driver(isl12026_driver);
+
+MODULE_DESCRIPTION("ISL 12026 RTC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 8dd299c6a1f3..1a2c38cc0178 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -459,6 +459,11 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	}
 
 	/* clear WRTC again */
+	sr = isl1208_i2c_get_sr(client);
+	if (sr < 0) {
+		dev_err(&client->dev, "%s: reading SR failed\n", __func__);
+		return sr;
+	}
 	sr = i2c_smbus_write_byte_data(client, ISL1208_REG_SR,
 				       sr & ~ISL1208_REG_SR_WRTC);
 	if (sr < 0) {
@@ -630,29 +635,12 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (isl1208_i2c_validate_client(client) < 0)
 		return -ENODEV;
 
-	if (client->irq > 0) {
-		rc = devm_request_threaded_irq(&client->dev, client->irq, NULL,
-					       isl1208_rtc_interrupt,
-					       IRQF_SHARED | IRQF_ONESHOT,
-					       isl1208_driver.driver.name,
-					       client);
-		if (!rc) {
-			device_init_wakeup(&client->dev, 1);
-			enable_irq_wake(client->irq);
-		} else {
-			dev_err(&client->dev,
-				"Unable to request irq %d, no alarm support\n",
-				client->irq);
-			client->irq = 0;
-		}
-	}
-
-	rtc = devm_rtc_device_register(&client->dev, isl1208_driver.driver.name,
-				  &isl1208_rtc_ops,
-				  THIS_MODULE);
+	rtc = devm_rtc_allocate_device(&client->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
 
+	rtc->ops = &isl1208_rtc_ops;
+
 	i2c_set_clientdata(client, rtc);
 
 	rc = isl1208_i2c_get_sr(client);
@@ -669,7 +657,24 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (rc)
 		return rc;
 
-	return 0;
+	if (client->irq > 0) {
+		rc = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+					       isl1208_rtc_interrupt,
+					       IRQF_SHARED | IRQF_ONESHOT,
+					       isl1208_driver.driver.name,
+					       client);
+		if (!rc) {
+			device_init_wakeup(&client->dev, 1);
+			enable_irq_wake(client->irq);
+		} else {
+			dev_err(&client->dev,
+				"Unable to request irq %d, no alarm support\n",
+				client->irq);
+			client->irq = 0;
+		}
+	}
+
+	return rtc_register_device(rtc);
 }
 
 static int
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index ff65a7d2b9c9..d0a891777f44 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -173,7 +173,7 @@ static int jz4740_rtc_read_time(struct device *dev, struct rtc_time *time)
 
 	rtc_time_to_tm(secs, time);
 
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int jz4740_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
index 1ae7da5cfc60..4a3c0f3aab14 100644
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -52,13 +52,11 @@ EXPORT_SYMBOL(rtc_year_days);
  */
 void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
 {
-	unsigned int month, year;
-	unsigned long secs;
+	unsigned int month, year, secs;
 	int days;
 
 	/* time must be positive */
-	days = div_s64(time, 86400);
-	secs = time - (unsigned int) days * 86400;
+	days = div_s64_rem(time, 86400, &secs);
 
 	/* day of the week, 1970-01-01 was a Thursday */
 	tm->tm_wday = (days + 4) % 7;
@@ -67,7 +65,7 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
 	days -= (year - 1970) * 365
 		+ LEAPS_THRU_END_OF(year - 1)
 		- LEAPS_THRU_END_OF(1970 - 1);
-	if (days < 0) {
+	while (days < 0) {
 		year -= 1;
 		days += 365 + is_leap_year(year);
 	}
diff --git a/drivers/rtc/rtc-lpc24xx.c b/drivers/rtc/rtc-lpc24xx.c
index 59d99596fdeb..14dc7b04fae0 100644
--- a/drivers/rtc/rtc-lpc24xx.c
+++ b/drivers/rtc/rtc-lpc24xx.c
@@ -110,7 +110,7 @@ static int lpc24xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_year = CT1_YEAR(ct1);
 	tm->tm_yday = CT2_DOY(ct2);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int lpc24xx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c
index 887871c3d526..3ba87239aacc 100644
--- a/drivers/rtc/rtc-lpc32xx.c
+++ b/drivers/rtc/rtc-lpc32xx.c
@@ -70,7 +70,7 @@ static int lpc32xx_rtc_read_time(struct device *dev, struct rtc_time *time)
 	elapsed_sec = rtc_readl(rtc, LPC32XX_RTC_UCOUNT);
 	rtc_time_to_tm(elapsed_sec, time);
 
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int lpc32xx_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c
index e04ca54f21e2..045af1135e48 100644
--- a/drivers/rtc/rtc-ls1x.c
+++ b/drivers/rtc/rtc-ls1x.c
@@ -98,7 +98,7 @@ static int ls1x_rtc_read_time(struct device *dev, struct rtc_time *rtm)
 			ls1x_get_min(v), ls1x_get_sec(v));
 	rtc_time_to_tm(t, rtm);
 
-	return rtc_valid_tm(rtm);
+	return 0;
 }
 
 static int ls1x_rtc_set_time(struct device *dev, struct  rtc_time *rtm)
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index c90fba3ed861..ad03e2f12f5d 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -73,7 +73,6 @@
 #define M41T80_FEATURE_WD	BIT(3)	/* Extra watchdog resolution */
 #define M41T80_FEATURE_SQ_ALT	BIT(4)	/* RSx bits are in reg 4 */
 
-static DEFINE_MUTEX(m41t80_rtc_mutex);
 static const struct i2c_device_id m41t80_id[] = {
 	{ "m41t62", M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT },
 	{ "m41t65", M41T80_FEATURE_HT | M41T80_FEATURE_WD },
@@ -199,9 +198,9 @@ static irqreturn_t m41t80_handle_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int m41t80_get_datetime(struct i2c_client *client,
-			       struct rtc_time *tm)
+static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char buf[8];
 	int err, flags;
 
@@ -230,12 +229,12 @@ static int m41t80_get_datetime(struct i2c_client *client,
 
 	/* assume 20YY not 19YY, and ignore the Century Bit */
 	tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-/* Sets the given date and time to the real time clock. */
-static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct m41t80_data *clientdata = i2c_get_clientdata(client);
 	unsigned char buf[8];
 	int err, flags;
@@ -298,16 +297,6 @@ static int m41t80_rtc_proc(struct device *dev, struct seq_file *seq)
 	return 0;
 }
 
-static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return m41t80_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return m41t80_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int m41t80_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client *client = to_i2c_client(dev);
@@ -598,6 +587,7 @@ static struct clk *m41t80_sqw_register_clk(struct m41t80_data *m41t80)
  *
  *****************************************************************************
  */
+static DEFINE_MUTEX(m41t80_rtc_mutex);
 static struct i2c_client *save_client;
 
 /* Default margin */
@@ -885,7 +875,6 @@ static int m41t80_probe(struct i2c_client *client,
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	int rc = 0;
-	struct rtc_device *rtc = NULL;
 	struct rtc_time tm;
 	struct m41t80_data *m41t80_data = NULL;
 	bool wakeup_source = false;
@@ -909,6 +898,10 @@ static int m41t80_probe(struct i2c_client *client,
 		m41t80_data->features = id->driver_data;
 	i2c_set_clientdata(client, m41t80_data);
 
+	m41t80_data->rtc =  devm_rtc_allocate_device(&client->dev);
+	if (IS_ERR(m41t80_data->rtc))
+		return PTR_ERR(m41t80_data->rtc);
+
 #ifdef CONFIG_OF
 	wakeup_source = of_property_read_bool(client->dev.of_node,
 					      "wakeup-source");
@@ -932,15 +925,11 @@ static int m41t80_probe(struct i2c_client *client,
 		device_init_wakeup(&client->dev, true);
 	}
 
-	rtc = devm_rtc_device_register(&client->dev, client->name,
-				       &m41t80_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc))
-		return PTR_ERR(rtc);
+	m41t80_data->rtc->ops = &m41t80_rtc_ops;
 
-	m41t80_data->rtc = rtc;
 	if (client->irq <= 0) {
 		/* We cannot support UIE mode if we do not have an IRQ line */
-		rtc->uie_unsupported = 1;
+		m41t80_data->rtc->uie_unsupported = 1;
 	}
 
 	/* Make sure HT (Halt Update) bit is cleared */
@@ -948,7 +937,7 @@ static int m41t80_probe(struct i2c_client *client,
 
 	if (rc >= 0 && rc & M41T80_ALHOUR_HT) {
 		if (m41t80_data->features & M41T80_FEATURE_HT) {
-			m41t80_get_datetime(client, &tm);
+			m41t80_rtc_read_time(&client->dev, &tm);
 			dev_info(&client->dev, "HT bit was set!\n");
 			dev_info(&client->dev,
 				 "Power Down at %04i-%02i-%02i %02i:%02i:%02i\n",
@@ -993,6 +982,11 @@ static int m41t80_probe(struct i2c_client *client,
 	if (m41t80_data->features & M41T80_FEATURE_SQ)
 		m41t80_sqw_register_clk(m41t80_data);
 #endif
+
+	rc = rtc_register_device(m41t80_data->rtc);
+	if (rc)
+		return rc;
+
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-m41t93.c b/drivers/rtc/rtc-m41t93.c
index 5ac45fc1a787..4a08a9dabc82 100644
--- a/drivers/rtc/rtc-m41t93.c
+++ b/drivers/rtc/rtc-m41t93.c
@@ -159,7 +159,7 @@ static int m41t93_get_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_hour, tm->tm_mday,
 		tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return ret < 0 ? ret : rtc_valid_tm(tm);
+	return ret;
 }
 
 
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index 1f0eb79e69f9..bab82b4be356 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -99,8 +99,7 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_hour, tm->tm_mday,
 		tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	/* initial clock setting can be undefined */
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct rtc_class_ops m41t94_rtc_ops = {
diff --git a/drivers/rtc/rtc-m48t35.c b/drivers/rtc/rtc-m48t35.c
index 810f4ea481e4..0cf6507de3c7 100644
--- a/drivers/rtc/rtc-m48t35.c
+++ b/drivers/rtc/rtc-m48t35.c
@@ -84,7 +84,7 @@ static int m48t35_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_year += 100;
 
 	tm->tm_mon--;
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int m48t35_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index d99a705bec07..216fac62c888 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -105,7 +105,7 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	dev_dbg(dev, "RTC read time %04d-%02d-%02d %02d/%02d/%02d\n",
 		tm->tm_year + 1900, tm->tm_mon, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -334,16 +334,16 @@ static const struct rtc_class_ops m48t02_rtc_ops = {
 	.set_time	= m48t59_rtc_set_time,
 };
 
-static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *bin_attr,
-				char *buf, loff_t pos, size_t size)
+static int m48t59_nvram_read(void *priv, unsigned int offset, void *val,
+			     size_t size)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
+	struct device *dev = &pdev->dev;
 	struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
 	struct m48t59_private *m48t59 = platform_get_drvdata(pdev);
 	ssize_t cnt = 0;
 	unsigned long flags;
+	u8 *buf = val;
 
 	spin_lock_irqsave(&m48t59->lock, flags);
 
@@ -352,19 +352,19 @@ static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
 
 	spin_unlock_irqrestore(&m48t59->lock, flags);
 
-	return cnt;
+	return 0;
 }
 
-static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *bin_attr,
-				char *buf, loff_t pos, size_t size)
+static int m48t59_nvram_write(void *priv, unsigned int offset, void *val,
+			      size_t size)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
+	struct device *dev = &pdev->dev;
 	struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
 	struct m48t59_private *m48t59 = platform_get_drvdata(pdev);
 	ssize_t cnt = 0;
 	unsigned long flags;
+	u8 *buf = val;
 
 	spin_lock_irqsave(&m48t59->lock, flags);
 
@@ -373,18 +373,9 @@ static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
 
 	spin_unlock_irqrestore(&m48t59->lock, flags);
 
-	return cnt;
+	return 0;
 }
 
-static struct bin_attribute m48t59_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.read = m48t59_nvram_read,
-	.write = m48t59_nvram_write,
-};
-
 static int m48t59_rtc_probe(struct platform_device *pdev)
 {
 	struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
@@ -393,6 +384,14 @@ static int m48t59_rtc_probe(struct platform_device *pdev)
 	int ret = -ENOMEM;
 	char *name;
 	const struct rtc_class_ops *ops;
+	struct nvmem_config nvmem_cfg = {
+		.name = "m48t59-",
+		.word_size = 1,
+		.stride = 1,
+		.reg_read = m48t59_nvram_read,
+		.reg_write = m48t59_nvram_write,
+		.priv = pdev,
+	};
 
 	/* This chip could be memory-mapped or I/O-mapped */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -480,23 +479,22 @@ static int m48t59_rtc_probe(struct platform_device *pdev)
 	spin_lock_init(&m48t59->lock);
 	platform_set_drvdata(pdev, m48t59);
 
-	m48t59->rtc = devm_rtc_device_register(&pdev->dev, name, ops,
-						THIS_MODULE);
+	m48t59->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(m48t59->rtc))
 		return PTR_ERR(m48t59->rtc);
 
-	m48t59_nvram_attr.size = pdata->offset;
+	m48t59->rtc->nvram_old_abi = true;
+	m48t59->rtc->ops = ops;
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr);
+	nvmem_cfg.size = pdata->offset;
+	ret = rtc_nvmem_register(m48t59->rtc, &nvmem_cfg);
 	if (ret)
 		return ret;
 
-	return 0;
-}
+	ret = rtc_register_device(m48t59->rtc);
+	if (ret)
+		return ret;
 
-static int m48t59_rtc_remove(struct platform_device *pdev)
-{
-	sysfs_remove_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr);
 	return 0;
 }
 
@@ -508,7 +506,6 @@ static struct platform_driver m48t59_rtc_driver = {
 		.name	= "rtc-m48t59",
 	},
 	.probe		= m48t59_rtc_probe,
-	.remove		= m48t59_rtc_remove,
 };
 
 module_platform_driver(m48t59_rtc_driver);
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
index d9aea9b6d9cd..a9533535c3b7 100644
--- a/drivers/rtc/rtc-m48t86.c
+++ b/drivers/rtc/rtc-m48t86.c
@@ -100,7 +100,7 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		if (m48t86_readb(dev, M48T86_HOUR) & 0x80)
 			tm->tm_hour += 12;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -218,21 +218,21 @@ static bool m48t86_verify_chip(struct platform_device *pdev)
 	return false;
 }
 
-static struct nvmem_config m48t86_nvmem_cfg = {
-	.name = "m48t86_nvram",
-	.word_size = 1,
-	.stride = 1,
-	.size = M48T86_NVRAM_LEN,
-	.reg_read = m48t86_nvram_read,
-	.reg_write = m48t86_nvram_write,
-};
-
 static int m48t86_rtc_probe(struct platform_device *pdev)
 {
 	struct m48t86_rtc_info *info;
 	struct resource *res;
 	unsigned char reg;
 	int err;
+	struct nvmem_config m48t86_nvmem_cfg = {
+		.name = "m48t86_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = M48T86_NVRAM_LEN,
+		.reg_read = m48t86_nvram_read,
+		.reg_write = m48t86_nvram_write,
+		.priv = &pdev->dev,
+	};
 
 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
 	if (!info)
@@ -264,15 +264,14 @@ static int m48t86_rtc_probe(struct platform_device *pdev)
 		return PTR_ERR(info->rtc);
 
 	info->rtc->ops = &m48t86_rtc_ops;
-
-	m48t86_nvmem_cfg.priv = &pdev->dev;
-	info->rtc->nvmem_config = &m48t86_nvmem_cfg;
 	info->rtc->nvram_old_abi = true;
 
 	err = rtc_register_device(info->rtc);
 	if (err)
 		return err;
 
+	rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg);
+
 	/* read battery status */
 	reg = m48t86_readb(&pdev->dev, M48T86_D);
 	dev_info(&pdev->dev, "battery %s\n",
diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c
index cbdc86a560ba..ab60f13fa3ef 100644
--- a/drivers/rtc/rtc-max6900.c
+++ b/drivers/rtc/rtc-max6900.c
@@ -139,8 +139,9 @@ static int max6900_i2c_write_regs(struct i2c_client *client, u8 const *buf)
 	return -EIO;
 }
 
-static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
+static int max6900_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	int rc;
 	u8 regs[MAX6900_REG_LEN];
 
@@ -157,7 +158,7 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
 		      bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
 	tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int max6900_i2c_clear_write_protect(struct i2c_client *client)
@@ -165,9 +166,9 @@ static int max6900_i2c_clear_write_protect(struct i2c_client *client)
 	return i2c_smbus_write_byte_data(client, MAX6900_REG_CONTROL_WRITE, 0);
 }
 
-static int
-max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
+static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	u8 regs[MAX6900_REG_LEN];
 	int rc;
 
@@ -193,16 +194,6 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	return 0;
 }
 
-static int max6900_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return max6900_i2c_read_time(to_i2c_client(dev), tm);
-}
-
-static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return max6900_i2c_set_time(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops max6900_rtc_ops = {
 	.read_time = max6900_rtc_read_time,
 	.set_time = max6900_rtc_set_time,
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 315d09e0f2c1..745827463367 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -85,7 +85,7 @@ static int max6902_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_year += century;
 	dt->tm_year -= 1900;
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int max6902_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-max6916.c b/drivers/rtc/rtc-max6916.c
index 623ab27b2757..7e908a490cf6 100644
--- a/drivers/rtc/rtc-max6916.c
+++ b/drivers/rtc/rtc-max6916.c
@@ -75,7 +75,7 @@ static int max6916_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_wday = bcd2bin(buf[5]) - 1;
 	dt->tm_year = bcd2bin(buf[6]) + 100;
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int max6916_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 182fdd00e290..cefde273fae6 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -364,11 +364,9 @@ static int max77686_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	max77686_rtc_data_to_tm(data, tm, info);
 
-	ret = rtc_valid_tm(tm);
-
 out:
 	mutex_unlock(&info->lock);
-	return ret;
+	return 0;
 }
 
 static int max77686_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c
index db984d4bf952..e8cee123e8aa 100644
--- a/drivers/rtc/rtc-max8997.c
+++ b/drivers/rtc/rtc-max8997.c
@@ -153,7 +153,7 @@ static int max8997_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	max8997_rtc_data_to_tm(data, tm, info->rtc_24hr_mode);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int max8997_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c
index 30804b00985e..d8c0f9b3f87d 100644
--- a/drivers/rtc/rtc-max8998.c
+++ b/drivers/rtc/rtc-max8998.c
@@ -120,7 +120,7 @@ static int max8998_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	max8998_data_to_tm(data, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int max8998_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 30b8ef6a3676..1f892b238ddb 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -85,7 +85,7 @@ static int mc13xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs)
diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c
index 77f21331ae21..00e11c1b2186 100644
--- a/drivers/rtc/rtc-mcp795.c
+++ b/drivers/rtc/rtc-mcp795.c
@@ -82,7 +82,7 @@ static int mcp795_rtcc_write(struct device *dev, u8 addr, u8 *data, u8 count)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	int ret;
-	u8 tx[2 + count];
+	u8 tx[257];
 
 	tx[0] = MCP795_WRITE;
 	tx[1] = addr;
@@ -262,7 +262,7 @@ static int mcp795_read_time(struct device *dev, struct rtc_time *tim)
 			tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
 			tim->tm_wday, tim->tm_hour, tim->tm_min, tim->tm_sec);
 
-	return rtc_valid_tm(tim);
+	return 0;
 }
 
 static int mcp795_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index 4ca4daa0b8f3..dd0364293bc0 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -122,7 +122,7 @@ static int mpc5121_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	 */
 	mpc5121_rtc_update_smh(regs, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mpc5121_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 7334c44fa7c3..fcb9de5218b2 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -105,7 +105,7 @@ static int mrst_read_time(struct device *dev, struct rtc_time *time)
 	/* Adjust for the 1972/1900 */
 	time->tm_year += 72;
 	time->tm_mon--;
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int mrst_set_time(struct device *dev, struct rtc_time *time)
@@ -122,7 +122,7 @@ static int mrst_set_time(struct device *dev, struct rtc_time *time)
 	min = time->tm_min;
 	sec = time->tm_sec;
 
-	if (yrs < 72 || yrs > 138)
+	if (yrs < 72 || yrs > 172)
 		return -EINVAL;
 	yrs -= 72;
 
diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c
index c1c5c4e3b3b4..0c72a2e8ec67 100644
--- a/drivers/rtc/rtc-msm6242.c
+++ b/drivers/rtc/rtc-msm6242.c
@@ -155,7 +155,7 @@ static int msm6242_read_time(struct device *dev, struct rtc_time *tm)
 
 	msm6242_unlock(priv);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int msm6242_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-mt7622.c b/drivers/rtc/rtc-mt7622.c
index d79b9ae4d237..fd0cea722286 100644
--- a/drivers/rtc/rtc-mt7622.c
+++ b/drivers/rtc/rtc-mt7622.c
@@ -232,7 +232,7 @@ static int mtk_rtc_gettime(struct device *dev, struct rtc_time *tm)
 
 	mtk_rtc_get_alarm_or_time(hw, tm, MTK_TC);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mtk_rtc_settime(struct device *dev, struct rtc_time *tm)
@@ -307,6 +307,7 @@ static const struct of_device_id mtk_rtc_match[] = {
 	{ .compatible = "mediatek,soc-rtc" },
 	{},
 };
+MODULE_DEVICE_TABLE(of, mtk_rtc_match);
 
 static int mtk_rtc_probe(struct platform_device *pdev)
 {
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
index 79bb28617d45..bc52dbb0c0e2 100644
--- a/drivers/rtc/rtc-mv.c
+++ b/drivers/rtc/rtc-mv.c
@@ -94,7 +94,7 @@ static int mv_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* hw counts from year 2000, but tm_year is relative to 1900 */
 	tm->tm_year = bcd2bin(year) + 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mv_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
@@ -223,7 +223,6 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct rtc_plat_data *pdata;
 	u32 rtc_time;
-	u32 rtc_date;
 	int ret = 0;
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
@@ -259,17 +258,6 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
 		}
 	}
 
-	/*
-	 * A date after January 19th, 2038 does not fit on 32 bits and
-	 * will confuse the kernel and userspace. Reset to a sane date
-	 * (January 1st, 2013) if we're after 2038.
-	 */
-	rtc_date = readl(pdata->ioaddr + RTC_DATE_REG_OFFS);
-	if (bcd2bin((rtc_date >> RTC_YEAR_OFFS) & 0xff) >= 38) {
-		dev_info(&pdev->dev, "invalid RTC date, resetting to January 1st, 2013\n");
-		writel(0x130101, pdata->ioaddr + RTC_DATE_REG_OFFS);
-	}
-
 	pdata->irq = platform_get_irq(pdev, 0);
 
 	platform_set_drvdata(pdev, pdata);
diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c
index 784221dfc9c7..9e14efb990b2 100644
--- a/drivers/rtc/rtc-mxc_v2.c
+++ b/drivers/rtc/rtc-mxc_v2.c
@@ -273,7 +273,7 @@ static const struct rtc_class_ops mxc_rtc_ops = {
 	.alarm_irq_enable = mxc_rtc_alarm_irq_enable,
 };
 
-static int mxc_rtc_wait_for_flag(void *__iomem ioaddr, int flag)
+static int mxc_rtc_wait_for_flag(void __iomem *ioaddr, int flag)
 {
 	unsigned int timeout = REG_READ_TIMEOUT;
 
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index 4ed81117cf5f..7da664a77181 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -102,8 +102,8 @@ static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc)
 	return NULL;
 }
 
-static int nuc900_rtc_bcd2bin(unsigned int timereg,
-				unsigned int calreg, struct rtc_time *tm)
+static void nuc900_rtc_bcd2bin(unsigned int timereg,
+			       unsigned int calreg, struct rtc_time *tm)
 {
 	tm->tm_mday	= bcd2bin(calreg >> 0);
 	tm->tm_mon	= bcd2bin(calreg >> 8);
@@ -112,8 +112,6 @@ static int nuc900_rtc_bcd2bin(unsigned int timereg,
 	tm->tm_sec	= bcd2bin(timereg >> 0);
 	tm->tm_min	= bcd2bin(timereg >> 8);
 	tm->tm_hour	= bcd2bin(timereg >> 16);
-
-	return rtc_valid_tm(tm);
 }
 
 static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
@@ -156,7 +154,9 @@ static int nuc900_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TLR);
 	clrval	= __raw_readl(rtc->rtc_reg + REG_RTC_CLR);
 
-	return nuc900_rtc_bcd2bin(timeval, clrval, tm);
+	nuc900_rtc_bcd2bin(timeval, clrval, tm);
+
+	return 0;
 }
 
 static int nuc900_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -189,7 +189,9 @@ static int nuc900_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TAR);
 	carval	= __raw_readl(rtc->rtc_reg + REG_RTC_CAR);
 
-	return nuc900_rtc_bcd2bin(timeval, carval, &alrm->time);
+	nuc900_rtc_bcd2bin(timeval, carval, &alrm->time);
+
+	return rtc_valid_tm(&alrm->time);
 }
 
 static int nuc900_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 09ef802d6e54..39086398833e 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -273,9 +273,6 @@ static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 /* this hardware doesn't support "don't care" alarm fields */
 static int tm2bcd(struct rtc_time *tm)
 {
-	if (rtc_valid_tm(tm) != 0)
-		return -EINVAL;
-
 	tm->tm_sec = bin2bcd(tm->tm_sec);
 	tm->tm_min = bin2bcd(tm->tm_min);
 	tm->tm_hour = bin2bcd(tm->tm_hour);
@@ -850,7 +847,6 @@ static int omap_rtc_probe(struct platform_device *pdev)
 
 	rtc->rtc->ops = &omap_rtc_ops;
 	omap_rtc_nvmem_config.priv = rtc;
-	rtc->rtc->nvmem_config = &omap_rtc_nvmem_config;
 
 	/* handle periodic and alarm irqs */
 	ret = devm_request_irq(&pdev->dev, rtc->irq_timer, rtc_irq, 0,
@@ -886,6 +882,8 @@ static int omap_rtc_probe(struct platform_device *pdev)
 	if (ret)
 		goto err;
 
+	rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config);
+
 	return 0;
 
 err:
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index c4433240d8a9..c05f524ba9af 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -95,7 +95,7 @@ static int pcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time_to_tm(secs, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcap_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
index 8895f77726e8..e5222c5d8223 100644
--- a/drivers/rtc/rtc-pcf2123.c
+++ b/drivers/rtc/rtc-pcf2123.c
@@ -289,7 +289,7 @@ static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
 			tm->tm_sec, tm->tm_min, tm->tm_hour,
 			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf2123_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index f33447c5db85..e83be1852c2f 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -111,7 +111,7 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c
index 00c31c91b245..ef72b0c389d7 100644
--- a/drivers/rtc/rtc-pcf50633.c
+++ b/drivers/rtc/rtc-pcf50633.c
@@ -135,7 +135,7 @@ static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_mday, tm->tm_mon, tm->tm_year,
 		tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf50633_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index a06dff994c83..49bcbb3d4a69 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -70,7 +70,7 @@ static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1)
 	s32 ret;
 
 	/* start the clock */
-	ctrl1 &= PCF85063_REG_CTRL1_STOP;
+	ctrl1 &= ~PCF85063_REG_CTRL1_STOP;
 
 	ret = i2c_smbus_write_byte_data(client, PCF85063_REG_CTRL1, ctrl1);
 	if (ret < 0) {
@@ -81,8 +81,9 @@ static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1)
 	return 0;
 }
 
-static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	int rc;
 	u8 regs[7];
 
@@ -114,11 +115,12 @@ static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 	tm->tm_year = bcd2bin(regs[6]);
 	tm->tm_year += 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	int rc;
 	u8 regs[7];
 	u8 ctrl1;
@@ -172,16 +174,6 @@ static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	return 0;
 }
 
-static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return pcf85063_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return pcf85063_set_datetime(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops pcf85063_rtc_ops = {
 	.read_time	= pcf85063_rtc_read_time,
 	.set_time	= pcf85063_rtc_set_time
diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c
index c312af0db729..453615f8ac9a 100644
--- a/drivers/rtc/rtc-pcf8523.c
+++ b/drivers/rtc/rtc-pcf8523.c
@@ -192,7 +192,7 @@ static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_mon = bcd2bin(regs[5] & 0x1f) - 1;
 	tm->tm_year = bcd2bin(regs[6]) + 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf85363.c b/drivers/rtc/rtc-pcf85363.c
index ea04e9f0930b..c04a1edcd571 100644
--- a/drivers/rtc/rtc-pcf85363.c
+++ b/drivers/rtc/rtc-pcf85363.c
@@ -73,6 +73,43 @@
 #define CTRL_RESETS	0x2f
 #define CTRL_RAM	0x40
 
+#define ALRM_SEC_A1E	BIT(0)
+#define ALRM_MIN_A1E	BIT(1)
+#define ALRM_HR_A1E	BIT(2)
+#define ALRM_DAY_A1E	BIT(3)
+#define ALRM_MON_A1E	BIT(4)
+#define ALRM_MIN_A2E	BIT(5)
+#define ALRM_HR_A2E	BIT(6)
+#define ALRM_DAY_A2E	BIT(7)
+
+#define INT_WDIE	BIT(0)
+#define INT_BSIE	BIT(1)
+#define INT_TSRIE	BIT(2)
+#define INT_A2IE	BIT(3)
+#define INT_A1IE	BIT(4)
+#define INT_OIE		BIT(5)
+#define INT_PIE		BIT(6)
+#define INT_ILP		BIT(7)
+
+#define FLAGS_TSR1F	BIT(0)
+#define FLAGS_TSR2F	BIT(1)
+#define FLAGS_TSR3F	BIT(2)
+#define FLAGS_BSF	BIT(3)
+#define FLAGS_WDF	BIT(4)
+#define FLAGS_A1F	BIT(5)
+#define FLAGS_A2F	BIT(6)
+#define FLAGS_PIF	BIT(7)
+
+#define PIN_IO_INTAPM	GENMASK(1, 0)
+#define PIN_IO_INTA_CLK	0
+#define PIN_IO_INTA_BAT	1
+#define PIN_IO_INTA_OUT	2
+#define PIN_IO_INTA_HIZ	3
+
+#define STOP_EN_STOP	BIT(0)
+
+#define RESET_CPR	0xa4
+
 #define NVRAM_SIZE	0x40
 
 static struct i2c_driver pcf85363_driver;
@@ -80,7 +117,6 @@ static struct i2c_driver pcf85363_driver;
 struct pcf85363 {
 	struct device		*dev;
 	struct rtc_device	*rtc;
-	struct nvmem_config	nvmem_cfg;
 	struct regmap		*regmap;
 };
 
@@ -116,8 +152,12 @@ static int pcf85363_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
-	unsigned char buf[DT_YEARS + 1];
-	int len = sizeof(buf);
+	unsigned char tmp[11];
+	unsigned char *buf = &tmp[2];
+	int ret;
+
+	tmp[0] = STOP_EN_STOP;
+	tmp[1] = RESET_CPR;
 
 	buf[DT_100THS] = 0;
 	buf[DT_SECS] = bin2bcd(tm->tm_sec);
@@ -128,8 +168,116 @@ static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	buf[DT_MONTHS] = bin2bcd(tm->tm_mon + 1);
 	buf[DT_YEARS] = bin2bcd(tm->tm_year % 100);
 
-	return regmap_bulk_write(pcf85363->regmap, DT_100THS,
-				 buf, len);
+	ret = regmap_bulk_write(pcf85363->regmap, CTRL_STOP_EN,
+				tmp, sizeof(tmp));
+	if (ret)
+		return ret;
+
+	return regmap_write(pcf85363->regmap, CTRL_STOP_EN, 0);
+}
+
+static int pcf85363_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+	unsigned char buf[DT_MONTH_ALM1 - DT_SECOND_ALM1 + 1];
+	unsigned int val;
+	int ret;
+
+	ret = regmap_bulk_read(pcf85363->regmap, DT_SECOND_ALM1, buf,
+			       sizeof(buf));
+	if (ret)
+		return ret;
+
+	alrm->time.tm_sec = bcd2bin(buf[0]);
+	alrm->time.tm_min = bcd2bin(buf[1]);
+	alrm->time.tm_hour = bcd2bin(buf[2]);
+	alrm->time.tm_mday = bcd2bin(buf[3]);
+	alrm->time.tm_mon = bcd2bin(buf[4]) - 1;
+
+	ret = regmap_read(pcf85363->regmap, CTRL_INTA_EN, &val);
+	if (ret)
+		return ret;
+
+	alrm->enabled =  !!(val & INT_A1IE);
+
+	return 0;
+}
+
+static int _pcf85363_rtc_alarm_irq_enable(struct pcf85363 *pcf85363, unsigned
+					  int enabled)
+{
+	unsigned int alarm_flags = ALRM_SEC_A1E | ALRM_MIN_A1E | ALRM_HR_A1E |
+				   ALRM_DAY_A1E | ALRM_MON_A1E;
+	int ret;
+
+	ret = regmap_update_bits(pcf85363->regmap, DT_ALARM_EN, alarm_flags,
+				 enabled ? alarm_flags : 0);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(pcf85363->regmap, CTRL_INTA_EN,
+				 INT_A1IE, enabled ? INT_A1IE : 0);
+
+	if (ret || enabled)
+		return ret;
+
+	/* clear current flags */
+	return regmap_update_bits(pcf85363->regmap, CTRL_FLAGS, FLAGS_A1F, 0);
+}
+
+static int pcf85363_rtc_alarm_irq_enable(struct device *dev,
+					 unsigned int enabled)
+{
+	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+
+	return _pcf85363_rtc_alarm_irq_enable(pcf85363, enabled);
+}
+
+static int pcf85363_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+	unsigned char buf[DT_MONTH_ALM1 - DT_SECOND_ALM1 + 1];
+	int ret;
+
+	buf[0] = bin2bcd(alrm->time.tm_sec);
+	buf[1] = bin2bcd(alrm->time.tm_min);
+	buf[2] = bin2bcd(alrm->time.tm_hour);
+	buf[3] = bin2bcd(alrm->time.tm_mday);
+	buf[4] = bin2bcd(alrm->time.tm_mon + 1);
+
+	/*
+	 * Disable the alarm interrupt before changing the value to avoid
+	 * spurious interrupts
+	 */
+	ret = _pcf85363_rtc_alarm_irq_enable(pcf85363, 0);
+	if (ret)
+		return ret;
+
+	ret = regmap_bulk_write(pcf85363->regmap, DT_SECOND_ALM1, buf,
+				sizeof(buf));
+	if (ret)
+		return ret;
+
+	return _pcf85363_rtc_alarm_irq_enable(pcf85363, alrm->enabled);
+}
+
+static irqreturn_t pcf85363_rtc_handle_irq(int irq, void *dev_id)
+{
+	struct pcf85363 *pcf85363 = i2c_get_clientdata(dev_id);
+	unsigned int flags;
+	int err;
+
+	err = regmap_read(pcf85363->regmap, CTRL_FLAGS, &flags);
+	if (err)
+		return IRQ_NONE;
+
+	if (flags & FLAGS_A1F) {
+		rtc_update_irq(pcf85363->rtc, 1, RTC_IRQF | RTC_AF);
+		regmap_update_bits(pcf85363->regmap, CTRL_FLAGS, FLAGS_A1F, 0);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
 }
 
 static const struct rtc_class_ops rtc_ops = {
@@ -137,6 +285,14 @@ static const struct rtc_class_ops rtc_ops = {
 	.set_time	= pcf85363_rtc_set_time,
 };
 
+static const struct rtc_class_ops rtc_ops_alarm = {
+	.read_time	= pcf85363_rtc_read_time,
+	.set_time	= pcf85363_rtc_set_time,
+	.read_alarm	= pcf85363_rtc_read_alarm,
+	.set_alarm	= pcf85363_rtc_set_alarm,
+	.alarm_irq_enable = pcf85363_rtc_alarm_irq_enable,
+};
+
 static int pcf85363_nvram_read(void *priv, unsigned int offset, void *val,
 			       size_t bytes)
 {
@@ -158,12 +314,22 @@ static int pcf85363_nvram_write(void *priv, unsigned int offset, void *val,
 static const struct regmap_config regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+	.max_register = 0x7f,
 };
 
 static int pcf85363_probe(struct i2c_client *client,
 			  const struct i2c_device_id *id)
 {
 	struct pcf85363 *pcf85363;
+	struct nvmem_config nvmem_cfg = {
+		.name = "pcf85363-",
+		.word_size = 1,
+		.stride = 1,
+		.size = NVRAM_SIZE,
+		.reg_read = pcf85363_nvram_read,
+		.reg_write = pcf85363_nvram_write,
+	};
+	int ret;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
 		return -ENODEV;
@@ -186,17 +352,28 @@ static int pcf85363_probe(struct i2c_client *client,
 	if (IS_ERR(pcf85363->rtc))
 		return PTR_ERR(pcf85363->rtc);
 
-	pcf85363->nvmem_cfg.name = "pcf85363-";
-	pcf85363->nvmem_cfg.word_size = 1;
-	pcf85363->nvmem_cfg.stride = 1;
-	pcf85363->nvmem_cfg.size = NVRAM_SIZE;
-	pcf85363->nvmem_cfg.reg_read = pcf85363_nvram_read;
-	pcf85363->nvmem_cfg.reg_write = pcf85363_nvram_write;
-	pcf85363->nvmem_cfg.priv = pcf85363;
-	pcf85363->rtc->nvmem_config = &pcf85363->nvmem_cfg;
 	pcf85363->rtc->ops = &rtc_ops;
 
-	return rtc_register_device(pcf85363->rtc);
+	if (client->irq > 0) {
+		regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
+		regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
+				   PIN_IO_INTA_OUT, PIN_IO_INTAPM);
+		ret = devm_request_threaded_irq(pcf85363->dev, client->irq,
+						NULL, pcf85363_rtc_handle_irq,
+						IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+						"pcf85363", client);
+		if (ret)
+			dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
+		else
+			pcf85363->rtc->ops = &rtc_ops_alarm;
+	}
+
+	ret = rtc_register_device(pcf85363->rtc);
+
+	nvmem_cfg.priv = pcf85363;
+	rtc_nvmem_register(pcf85363->rtc, &nvmem_cfg);
+
+	return ret;
 }
 
 static const struct of_device_id dev_ids[] = {
diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c
index 5cfb6df5c430..3c08eab4f1a8 100644
--- a/drivers/rtc/rtc-pic32.c
+++ b/drivers/rtc/rtc-pic32.c
@@ -175,7 +175,7 @@ static int pic32_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 		rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
 	clk_disable(pdata->clk);
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int pic32_rtc_settime(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c
index fac835530671..29358a045925 100644
--- a/drivers/rtc/rtc-pm8xxx.c
+++ b/drivers/rtc/rtc-pm8xxx.c
@@ -74,16 +74,18 @@ struct pm8xxx_rtc {
 /*
  * Steps to write the RTC registers.
  * 1. Disable alarm if enabled.
- * 2. Write 0x00 to LSB.
- * 3. Write Byte[1], Byte[2], Byte[3] then Byte[0].
- * 4. Enable alarm if disabled in step 1.
+ * 2. Disable rtc if enabled.
+ * 3. Write 0x00 to LSB.
+ * 4. Write Byte[1], Byte[2], Byte[3] then Byte[0].
+ * 5. Enable rtc if disabled in step 2.
+ * 6. Enable alarm if disabled in step 1.
  */
 static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	int rc, i;
 	unsigned long secs, irq_flags;
-	u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0;
-	unsigned int ctrl_reg;
+	u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0, rtc_disabled = 0;
+	unsigned int ctrl_reg, rtc_ctrl_reg;
 	struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
 	const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
 
@@ -92,23 +94,38 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_tm_to_time(tm, &secs);
 
+	dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs);
+
 	for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) {
 		value[i] = secs & 0xFF;
 		secs >>= 8;
 	}
 
-	dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs);
-
 	spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags);
 
-	rc = regmap_read(rtc_dd->regmap, regs->ctrl, &ctrl_reg);
+	rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
 	if (rc)
 		goto rtc_rw_fail;
 
 	if (ctrl_reg & regs->alarm_en) {
 		alarm_enabled = 1;
 		ctrl_reg &= ~regs->alarm_en;
-		rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg);
+		rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
+		if (rc) {
+			dev_err(dev, "Write to RTC Alarm control register failed\n");
+			goto rtc_rw_fail;
+		}
+	}
+
+	/* Disable RTC H/w before writing on RTC register */
+	rc = regmap_read(rtc_dd->regmap, regs->ctrl, &rtc_ctrl_reg);
+	if (rc)
+		goto rtc_rw_fail;
+
+	if (rtc_ctrl_reg & PM8xxx_RTC_ENABLE) {
+		rtc_disabled = 1;
+		rtc_ctrl_reg &= ~PM8xxx_RTC_ENABLE;
+		rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
 		if (rc) {
 			dev_err(dev, "Write to RTC control register failed\n");
 			goto rtc_rw_fail;
@@ -137,11 +154,21 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		goto rtc_rw_fail;
 	}
 
+	/* Enable RTC H/w after writing on RTC register */
+	if (rtc_disabled) {
+		rtc_ctrl_reg |= PM8xxx_RTC_ENABLE;
+		rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
+		if (rc) {
+			dev_err(dev, "Write to RTC control register failed\n");
+			goto rtc_rw_fail;
+		}
+	}
+
 	if (alarm_enabled) {
 		ctrl_reg |= regs->alarm_en;
-		rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg);
+		rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
 		if (rc) {
-			dev_err(dev, "Write to RTC control register failed\n");
+			dev_err(dev, "Write to RTC Alarm control register failed\n");
 			goto rtc_rw_fail;
 		}
 	}
@@ -190,12 +217,6 @@ static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time_to_tm(secs, tm);
 
-	rc = rtc_valid_tm(tm);
-	if (rc < 0) {
-		dev_err(dev, "Invalid time read from RTC\n");
-		return rc;
-	}
-
 	dev_dbg(dev, "secs = %lu, h:m:s == %d:%d:%d, d/m/y = %d/%d/%d\n",
 		secs, tm->tm_hour, tm->tm_min, tm->tm_sec,
 		tm->tm_mday, tm->tm_mon, tm->tm_year);
diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c
index 6a8f5d758eac..347288bff438 100644
--- a/drivers/rtc/rtc-ps3.c
+++ b/drivers/rtc/rtc-ps3.c
@@ -41,7 +41,7 @@ static u64 read_rtc(void)
 static int ps3_get_time(struct device *dev, struct rtc_time *tm)
 {
 	rtc_time_to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ps3_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-r7301.c b/drivers/rtc/rtc-r7301.c
index 500e8c8a2605..169704b2ce13 100644
--- a/drivers/rtc/rtc-r7301.c
+++ b/drivers/rtc/rtc-r7301.c
@@ -224,7 +224,7 @@ static int rtc7301_read_time(struct device *dev, struct rtc_time *tm)
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	return err ? err : rtc_valid_tm(tm);
+	return err;
 }
 
 static int rtc7301_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index b6c5eb97051c..a39ccd1cf6e8 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -92,7 +92,7 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
 	 * according to the data sheet. make sure they are valid.
 	 */
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c
index 35c9aada07c8..739c0d42e835 100644
--- a/drivers/rtc/rtc-rk808.c
+++ b/drivers/rtc/rtc-rk808.c
@@ -375,7 +375,6 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 {
 	struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent);
 	struct rk808_rtc *rk808_rtc;
-	struct rtc_time tm;
 	int ret;
 
 	rk808_rtc = devm_kzalloc(&pdev->dev, sizeof(*rk808_rtc), GFP_KERNEL);
@@ -404,24 +403,13 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 			return ret;
 	}
 
-	/* set init time */
-	ret = rk808_rtc_readtime(&pdev->dev, &tm);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to read RTC time\n");
-		return ret;
-	}
-	ret = rtc_valid_tm(&tm);
-	if (ret)
-		dev_warn(&pdev->dev, "invalid date/time\n");
-
 	device_init_wakeup(&pdev->dev, 1);
 
-	rk808_rtc->rtc = devm_rtc_device_register(&pdev->dev, "rk808-rtc",
-						  &rk808_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rk808_rtc->rtc)) {
-		ret = PTR_ERR(rk808_rtc->rtc);
-		return ret;
-	}
+	rk808_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(rk808_rtc->rtc))
+		return PTR_ERR(rk808_rtc->rtc);
+
+	rk808_rtc->rtc->ops = &rk808_rtc_ops;
 
 	rk808_rtc->irq = platform_get_irq(pdev, 0);
 	if (rk808_rtc->irq < 0) {
@@ -438,9 +426,10 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to request alarm IRQ %d: %d\n",
 			rk808_rtc->irq, ret);
+		return ret;
 	}
 
-	return ret;
+	return rtc_register_device(rk808_rtc->rtc);
 }
 
 static struct platform_driver rk808_rtc_driver = {
diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c
index 026035373ae6..f1c160fe7d37 100644
--- a/drivers/rtc/rtc-rp5c01.c
+++ b/drivers/rtc/rtc-rp5c01.c
@@ -64,7 +64,6 @@ struct rp5c01_priv {
 	u32 __iomem *regs;
 	struct rtc_device *rtc;
 	spinlock_t lock;	/* against concurrent RTC/NVRAM access */
-	struct bin_attribute nvram_attr;
 };
 
 static inline unsigned int rp5c01_read(struct rp5c01_priv *priv,
@@ -116,7 +115,7 @@ static int rp5c01_read_time(struct device *dev, struct rtc_time *tm)
 	rp5c01_unlock(priv);
 	spin_unlock_irq(&priv->lock);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int rp5c01_set_time(struct device *dev, struct rtc_time *tm)
@@ -160,17 +159,15 @@ static const struct rtc_class_ops rp5c01_rtc_ops = {
  * byte is stored in BLOCK10, the low nibble in BLOCK11.
  */
 
-static ssize_t rp5c01_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *bin_attr,
-				 char *buf, loff_t pos, size_t size)
+static int rp5c01_nvram_read(void *_priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct rp5c01_priv *priv = dev_get_drvdata(dev);
-	ssize_t count;
+	struct rp5c01_priv *priv = _priv;
+	u8 *buf = val;
 
 	spin_lock_irq(&priv->lock);
 
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		u8 data;
 
 		rp5c01_write(priv,
@@ -187,20 +184,18 @@ static ssize_t rp5c01_nvram_read(struct file *filp, struct kobject *kobj,
 	}
 
 	spin_unlock_irq(&priv->lock);
-	return count;
+	return 0;
 }
 
-static ssize_t rp5c01_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t pos, size_t size)
+static int rp5c01_nvram_write(void *_priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct rp5c01_priv *priv = dev_get_drvdata(dev);
-	ssize_t count;
+	struct rp5c01_priv *priv = _priv;
+	u8 *buf = val;
 
 	spin_lock_irq(&priv->lock);
 
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		u8 data = *buf++;
 
 		rp5c01_write(priv,
@@ -216,7 +211,7 @@ static ssize_t rp5c01_nvram_write(struct file *filp, struct kobject *kobj,
 	}
 
 	spin_unlock_irq(&priv->lock);
-	return count;
+	return 0;
 }
 
 static int __init rp5c01_rtc_probe(struct platform_device *dev)
@@ -225,6 +220,14 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
 	struct rp5c01_priv *priv;
 	struct rtc_device *rtc;
 	int error;
+	struct nvmem_config nvmem_cfg = {
+		.name = "rp5c01_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = RP5C01_MODE,
+		.reg_read = rp5c01_nvram_read,
+		.reg_write = rp5c01_nvram_write,
+	};
 
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!res)
@@ -238,43 +241,31 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
 	if (!priv->regs)
 		return -ENOMEM;
 
-	sysfs_bin_attr_init(&priv->nvram_attr);
-	priv->nvram_attr.attr.name = "nvram";
-	priv->nvram_attr.attr.mode = S_IRUGO | S_IWUSR;
-	priv->nvram_attr.read = rp5c01_nvram_read;
-	priv->nvram_attr.write = rp5c01_nvram_write;
-	priv->nvram_attr.size = RP5C01_MODE;
-
 	spin_lock_init(&priv->lock);
 
 	platform_set_drvdata(dev, priv);
 
-	rtc = devm_rtc_device_register(&dev->dev, "rtc-rp5c01", &rp5c01_rtc_ops,
-				  THIS_MODULE);
+	rtc = devm_rtc_allocate_device(&dev->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
+
+	rtc->ops = &rp5c01_rtc_ops;
+	rtc->nvram_old_abi = true;
+
 	priv->rtc = rtc;
 
-	error = sysfs_create_bin_file(&dev->dev.kobj, &priv->nvram_attr);
+	nvmem_cfg.priv = priv;
+	error = rtc_nvmem_register(rtc, &nvmem_cfg);
 	if (error)
 		return error;
 
-	return 0;
-}
-
-static int __exit rp5c01_rtc_remove(struct platform_device *dev)
-{
-	struct rp5c01_priv *priv = platform_get_drvdata(dev);
-
-	sysfs_remove_bin_file(&dev->dev.kobj, &priv->nvram_attr);
-	return 0;
+	return rtc_register_device(rtc);
 }
 
 static struct platform_driver rp5c01_rtc_driver = {
 	.driver	= {
 		.name	= "rtc-rp5c01",
 	},
-	.remove	= __exit_p(rp5c01_rtc_remove),
 };
 
 module_platform_driver_probe(rp5c01_rtc_driver, rp5c01_rtc_probe);
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index 9a306983aaba..f2de8b17e7e3 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -135,11 +135,6 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
 		((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
 
-	if (rtc_valid_tm(tm) < 0) {
-		dev_err(&spi->dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, tm);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index d4eff8d7131f..c5038329058c 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -207,8 +207,9 @@ static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
 	return bin2bcd(hour);
 }
 
-static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct rs5c372	*rs5c = i2c_get_clientdata(client);
 	int		status = rs5c_get_regs(rs5c);
 
@@ -234,12 +235,12 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	/* rtc might need initialization */
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct rs5c372	*rs5c = i2c_get_clientdata(client);
 	unsigned char	buf[7];
 	int		addr;
@@ -305,17 +306,6 @@ static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
 }
 #endif
 
-static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return rs5c372_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return rs5c372_set_datetime(to_i2c_client(dev), tm);
-}
-
-
 static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client	*client = to_i2c_client(dev);
@@ -581,7 +571,6 @@ static int rs5c372_probe(struct i2c_client *client,
 	int err = 0;
 	int smbus_mode = 0;
 	struct rs5c372 *rs5c372;
-	struct rtc_time tm;
 
 	dev_dbg(&client->dev, "%s\n", __func__);
 
@@ -662,9 +651,6 @@ static int rs5c372_probe(struct i2c_client *client,
 		goto exit;
 	}
 
-	if (rs5c372_get_datetime(client, &tm) < 0)
-		dev_warn(&client->dev, "clock needs to be set\n");
-
 	dev_info(&client->dev, "%s found, %s\n",
 			({ char *s; switch (rs5c372->type) {
 			case rtc_r2025sd:	s = "r2025sd"; break;
diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index aae2576741a6..29fc3d210392 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -68,7 +68,6 @@ struct rv8803_data {
 	struct mutex flags_lock;
 	u8 ctrl;
 	enum rv8803_type type;
-	struct nvmem_config nvmem_cfg;
 };
 
 static int rv8803_read_reg(const struct i2c_client *client, u8 reg)
@@ -528,6 +527,15 @@ static int rv8803_probe(struct i2c_client *client,
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct rv8803_data *rv8803;
 	int err, flags;
+	struct nvmem_config nvmem_cfg = {
+		.name = "rv8803_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = 1,
+		.reg_read = rv8803_nvram_read,
+		.reg_write = rv8803_nvram_write,
+		.priv = client,
+	};
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
 				     I2C_FUNC_SMBUS_I2C_BLOCK)) {
@@ -582,21 +590,6 @@ static int rv8803_probe(struct i2c_client *client,
 		}
 	}
 
-	rv8803->nvmem_cfg.name = "rv8803_nvram",
-	rv8803->nvmem_cfg.word_size = 1,
-	rv8803->nvmem_cfg.stride = 1,
-	rv8803->nvmem_cfg.size = 1,
-	rv8803->nvmem_cfg.reg_read = rv8803_nvram_read,
-	rv8803->nvmem_cfg.reg_write = rv8803_nvram_write,
-	rv8803->nvmem_cfg.priv = client;
-
-	rv8803->rtc->ops = &rv8803_rtc_ops;
-	rv8803->rtc->nvmem_config = &rv8803->nvmem_cfg;
-	rv8803->rtc->nvram_old_abi = true;
-	err = rtc_register_device(rv8803->rtc);
-	if (err)
-		return err;
-
 	err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA);
 	if (err)
 		return err;
@@ -607,6 +600,14 @@ static int rv8803_probe(struct i2c_client *client,
 		return err;
 	}
 
+	rv8803->rtc->ops = &rv8803_rtc_ops;
+	rv8803->rtc->nvram_old_abi = true;
+	err = rtc_register_device(rv8803->rtc);
+	if (err)
+		return err;
+
+	rtc_nvmem_register(rv8803->rtc, &nvmem_cfg);
+
 	rv8803->rtc->max_user_freq = 1;
 
 	return 0;
diff --git a/drivers/rtc/rtc-rx4581.c b/drivers/rtc/rtc-rx4581.c
index de3fe4f8d133..c59a218bdd87 100644
--- a/drivers/rtc/rtc-rx4581.c
+++ b/drivers/rtc/rtc-rx4581.c
@@ -172,11 +172,7 @@ static int rx4581_get_datetime(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	err = rtc_valid_tm(tm);
-	if (err < 0)
-		dev_err(dev, "retrieved date/time is not valid.\n");
-
-	return err;
+	return 0;
 }
 
 static int rx4581_set_datetime(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c
index 7c9c08eab5e5..8e322d884cc2 100644
--- a/drivers/rtc/rtc-rx6110.c
+++ b/drivers/rtc/rtc-rx6110.c
@@ -252,7 +252,7 @@ static int rx6110_get_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct reg_sequence rx6110_default_regs[] = {
diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c
index 5c5938ab3d86..7ddc22eb5b0f 100644
--- a/drivers/rtc/rtc-rx8010.c
+++ b/drivers/rtc/rtc-rx8010.c
@@ -138,7 +138,7 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_year = bcd2bin(date[RX8010_YEAR - RX8010_SEC]) + 100;
 	dt->tm_wday = ffs(date[RX8010_WDAY - RX8010_SEC] & 0x7f);
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int rx8010_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 91857d8d2df8..41127adf5765 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -214,7 +214,7 @@ static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
 		dt->tm_sec, dt->tm_min, dt->tm_hour,
 		dt->tm_mday, dt->tm_mon, dt->tm_year);
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c
index 9998d7937688..32caadf912ca 100644
--- a/drivers/rtc/rtc-rx8581.c
+++ b/drivers/rtc/rtc-rx8581.c
@@ -164,11 +164,7 @@ static int rx8581_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	err = rtc_valid_tm(tm);
-	if (err < 0)
-		dev_err(&client->dev, "retrieved date/time is not valid.\n");
-
-	return err;
+	return 0;
 }
 
 static int rx8581_set_datetime(struct i2c_client *client, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 7067bca5c20d..77feb603cd4c 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -210,8 +210,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
 	return hour;
 }
 
-static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a	*s35390a = i2c_get_clientdata(client);
 	int i, err;
 	char buf[7], status;
@@ -241,8 +242,9 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	return err;
 }
 
-static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a *s35390a = i2c_get_clientdata(client);
 	char buf[7], status;
 	int i, err;
@@ -271,11 +273,12 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
 		tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
+static int s35390a_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a *s35390a = i2c_get_clientdata(client);
 	char buf[3], sts = 0;
 	int err, i;
@@ -329,8 +332,9 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
 	return err;
 }
 
-static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
+static int s35390a_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a *s35390a = i2c_get_clientdata(client);
 	char buf[3], sts;
 	int i, err;
@@ -384,26 +388,6 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
 	return 0;
 }
 
-static int s35390a_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	return s35390a_read_alarm(to_i2c_client(dev), alm);
-}
-
-static int s35390a_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	return s35390a_set_alarm(to_i2c_client(dev), alm);
-}
-
-static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return s35390a_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return s35390a_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int s35390a_rtc_ioctl(struct device *dev, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -450,7 +434,6 @@ static int s35390a_probe(struct i2c_client *client,
 	int err, err_read;
 	unsigned int i;
 	struct s35390a *s35390a;
-	struct rtc_time tm;
 	char buf, status1;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
@@ -508,9 +491,6 @@ static int s35390a_probe(struct i2c_client *client,
 		}
 	}
 
-	if (err_read > 0 || s35390a_get_datetime(client, &tm) < 0)
-		dev_warn(&client->dev, "clock needs to be set\n");
-
 	device_set_wakeup_capable(&client->dev, 1);
 
 	s35390a->rtc = devm_rtc_device_register(&client->dev,
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index a8992c227f61..75c8c5033e08 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -232,7 +232,7 @@ retry_get_time:
 
 	rtc_tm->tm_mon -= 1;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 0477678d968f..8428455432ca 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -38,6 +38,19 @@
  */
 #define UDR_READ_RETRY_CNT	5
 
+enum {
+	RTC_SEC = 0,
+	RTC_MIN,
+	RTC_HOUR,
+	RTC_WEEKDAY,
+	RTC_DATE,
+	RTC_MONTH,
+	RTC_YEAR1,
+	RTC_YEAR2,
+	/* Make sure this is always the last enum name. */
+	RTC_MAX_NUM_TIME_REGS
+};
+
 /*
  * Registers used by the driver which are different between chipsets.
  *
@@ -367,7 +380,7 @@ static void s5m8763_tm_to_data(struct rtc_time *tm, u8 *data)
 static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret;
 
 	if (info->regs->read_time_udr_mask) {
@@ -407,13 +420,13 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret = 0;
 
 	switch (info->device_type) {
@@ -450,7 +463,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	unsigned int val;
 	int ret, i;
 
@@ -500,7 +513,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 {
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret, i;
 	struct rtc_time tm;
 
@@ -545,7 +558,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 {
 	int ret;
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	u8 alarm0_conf;
 	struct rtc_time tm;
 
@@ -598,7 +611,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret;
 
 	switch (info->device_type) {
diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c
index d544d5268757..00d87d138984 100644
--- a/drivers/rtc/rtc-sc27xx.c
+++ b/drivers/rtc/rtc-sc27xx.c
@@ -376,7 +376,7 @@ static int sprd_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		return ret;
 
 	rtc_time64_to_tm(secs, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int sprd_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 6c2d3989f967..4e8ab370ce63 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -414,7 +414,7 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon + 1, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-sirfsoc.c b/drivers/rtc/rtc-sirfsoc.c
index 7367f617145c..2a9e151cae99 100644
--- a/drivers/rtc/rtc-sirfsoc.c
+++ b/drivers/rtc/rtc-sirfsoc.c
@@ -204,23 +204,6 @@ static int sirfsoc_rtc_set_time(struct device *dev,
 	return 0;
 }
 
-static int sirfsoc_rtc_ioctl(struct device *dev, unsigned int cmd,
-		unsigned long arg)
-{
-	switch (cmd) {
-	case RTC_PIE_ON:
-	case RTC_PIE_OFF:
-	case RTC_UIE_ON:
-	case RTC_UIE_OFF:
-	case RTC_AIE_ON:
-	case RTC_AIE_OFF:
-		return 0;
-
-	default:
-		return -ENOIOCTLCMD;
-	}
-}
-
 static int sirfsoc_rtc_alarm_irq_enable(struct device *dev,
 		unsigned int enabled)
 {
@@ -250,7 +233,6 @@ static const struct rtc_class_ops sirfsoc_rtc_ops = {
 	.set_time = sirfsoc_rtc_set_time,
 	.read_alarm = sirfsoc_rtc_read_alarm,
 	.set_alarm = sirfsoc_rtc_set_alarm,
-	.ioctl = sirfsoc_rtc_ioctl,
 	.alarm_irq_enable = sirfsoc_rtc_alarm_irq_enable
 };
 
diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
index d8ef9e052c4f..9af591d5223c 100644
--- a/drivers/rtc/rtc-snvs.c
+++ b/drivers/rtc/rtc-snvs.c
@@ -132,20 +132,23 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct snvs_rtc_data *data = dev_get_drvdata(dev);
 	unsigned long time;
+	int ret;
 
 	rtc_tm_to_time(tm, &time);
 
 	/* Disable RTC first */
-	snvs_rtc_enable(data, false);
+	ret = snvs_rtc_enable(data, false);
+	if (ret)
+		return ret;
 
 	/* Write 32-bit time to 47-bit timer, leaving 15 LSBs blank */
 	regmap_write(data->regmap, data->offset + SNVS_LPSRTCLR, time << CNTR_TO_SECS_SH);
 	regmap_write(data->regmap, data->offset + SNVS_LPSRTCMR, time >> (32 - CNTR_TO_SECS_SH));
 
 	/* Enable RTC again */
-	snvs_rtc_enable(data, true);
+	ret = snvs_rtc_enable(data, true);
 
-	return 0;
+	return ret;
 }
 
 static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -288,7 +291,11 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 	regmap_write(data->regmap, data->offset + SNVS_LPSR, 0xffffffff);
 
 	/* Enable RTC */
-	snvs_rtc_enable(data, true);
+	ret = snvs_rtc_enable(data, true);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable rtc %d\n", ret);
+		goto error_rtc_device_register;
+	}
 
 	device_init_wakeup(&pdev->dev, true);
 
diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index e377f42abae7..0567944fd4f8 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c
@@ -170,18 +170,14 @@ static irqreturn_t spear_rtc_irq(int irq, void *dev_id)
 
 }
 
-static int tm2bcd(struct rtc_time *tm)
+static void tm2bcd(struct rtc_time *tm)
 {
-	if (rtc_valid_tm(tm) != 0)
-		return -EINVAL;
 	tm->tm_sec = bin2bcd(tm->tm_sec);
 	tm->tm_min = bin2bcd(tm->tm_min);
 	tm->tm_hour = bin2bcd(tm->tm_hour);
 	tm->tm_mday = bin2bcd(tm->tm_mday);
 	tm->tm_mon = bin2bcd(tm->tm_mon + 1);
 	tm->tm_year = bin2bcd(tm->tm_year);
-
-	return 0;
 }
 
 static void bcd2tm(struct rtc_time *tm)
@@ -237,8 +233,7 @@ static int spear_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	struct spear_rtc_config *config = dev_get_drvdata(dev);
 	unsigned int time, date;
 
-	if (tm2bcd(tm) < 0)
-		return -EINVAL;
+	tm2bcd(tm);
 
 	rtc_wait_not_busy(config);
 	time = (tm->tm_sec << SECOND_SHIFT) | (tm->tm_min << MINUTE_SHIFT) |
@@ -295,8 +290,7 @@ static int spear_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 	unsigned int time, date;
 	int err;
 
-	if (tm2bcd(&alm->time) < 0)
-		return -EINVAL;
+	tm2bcd(&alm->time);
 
 	rtc_wait_not_busy(config);
 
diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c
index 82b0af159a28..d5222667f892 100644
--- a/drivers/rtc/rtc-st-lpc.c
+++ b/drivers/rtc/rtc-st-lpc.c
@@ -195,7 +195,6 @@ static int st_rtc_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	struct st_rtc *rtc;
 	struct resource *res;
-	struct rtc_time tm_check;
 	uint32_t mode;
 	int ret = 0;
 
@@ -254,21 +253,6 @@ static int st_rtc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, rtc);
 
-	/*
-	 * The RTC-LPC is able to manage date.year > 2038
-	 * but currently the kernel can not manage this date!
-	 * If the RTC-LPC has a date.year > 2038 then
-	 * it's set to the epoch "Jan 1st 2000"
-	 */
-	st_rtc_read_time(&pdev->dev, &tm_check);
-
-	if (tm_check.tm_year >=  (2038 - 1900)) {
-		memset(&tm_check, 0, sizeof(tm_check));
-		tm_check.tm_year = 100;
-		tm_check.tm_mday = 1;
-		st_rtc_set_time(&pdev->dev, &tm_check);
-	}
-
 	rtc->rtc_dev = rtc_device_register("st-lpc-rtc", &pdev->dev,
 					   &st_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc->rtc_dev)) {
diff --git a/drivers/rtc/rtc-starfire.c b/drivers/rtc/rtc-starfire.c
index 7fc36973fa33..a7d49329d626 100644
--- a/drivers/rtc/rtc-starfire.c
+++ b/drivers/rtc/rtc-starfire.c
@@ -28,7 +28,7 @@ static u32 starfire_get_time(void)
 static int starfire_read_time(struct device *dev, struct rtc_time *tm)
 {
 	rtc_time_to_tm(starfire_get_time(), tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct rtc_class_ops starfire_rtc_ops = {
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index a456cb6177ea..e70b78d17a98 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -129,10 +129,6 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* year is 1900 + tm->tm_year */
 	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-	if (rtc_valid_tm(tm) < 0) {
-		dev_err(dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, tm);
-	}
 	return 0;
 }
 
@@ -242,46 +238,30 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = {
 	.alarm_irq_enable	= stk17ta8_rtc_alarm_irq_enable,
 };
 
-static ssize_t stk17ta8_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *attr, char *buf,
-				 loff_t pos, size_t size)
+static int stk17ta8_nvram_read(void *priv, unsigned int pos, void *val,
+			       size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		*buf++ = readb(ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static ssize_t stk17ta8_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *attr, char *buf,
-				  loff_t pos, size_t size)
+static int stk17ta8_nvram_write(void *priv, unsigned int pos, void *val,
+				size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		writeb(*buf++, ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static struct bin_attribute stk17ta8_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = RTC_OFFSET,
-	.read = stk17ta8_nvram_read,
-	.write = stk17ta8_nvram_write,
-};
-
 static int stk17ta8_rtc_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -290,6 +270,14 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
 	struct rtc_plat_data *pdata;
 	void __iomem *ioaddr;
 	int ret = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "stk17ta8_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = RTC_OFFSET,
+		.reg_read = stk17ta8_nvram_read,
+		.reg_write = stk17ta8_nvram_write,
+	};
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -328,24 +316,19 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
 		}
 	}
 
-	pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &stk17ta8_rtc_ops, THIS_MODULE);
+	pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(pdata->rtc))
 		return PTR_ERR(pdata->rtc);
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
+	pdata->rtc->ops = &stk17ta8_rtc_ops;
+	pdata->rtc->nvram_old_abi = true;
 
-	return ret;
-}
+	nvmem_cfg.priv = pdata;
+	ret = rtc_nvmem_register(pdata->rtc, &nvmem_cfg);
+	if (ret)
+		return ret;
 
-static int stk17ta8_rtc_remove(struct platform_device *pdev)
-{
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-
-	sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
-	if (pdata->irq > 0)
-		writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
-	return 0;
+	return rtc_register_device(pdata->rtc);
 }
 
 /* work with hotplug and coldplug */
@@ -353,7 +336,6 @@ MODULE_ALIAS("platform:stk17ta8");
 
 static struct platform_driver stk17ta8_rtc_driver = {
 	.probe		= stk17ta8_rtc_probe,
-	.remove		= stk17ta8_rtc_remove,
 	.driver		= {
 		.name	= "stk17ta8",
 	},
diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c
index 5bc28eed1adf..2e6fb275acc8 100644
--- a/drivers/rtc/rtc-sun6i.c
+++ b/drivers/rtc/rtc-sun6i.c
@@ -349,7 +349,7 @@ static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	 */
 	rtc_tm->tm_year += SUN6I_YEAR_OFF;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int sun6i_rtc_getalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
diff --git a/drivers/rtc/rtc-sunxi.c b/drivers/rtc/rtc-sunxi.c
index abada609ddc7..dadbf8b324ad 100644
--- a/drivers/rtc/rtc-sunxi.c
+++ b/drivers/rtc/rtc-sunxi.c
@@ -261,7 +261,7 @@ static int sunxi_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	 */
 	rtc_tm->tm_year += SUNXI_YEAR_OFF(chip->data_year);
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int sunxi_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 92ff2edb86a6..454da38c6012 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -248,6 +248,14 @@ offset_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(offset);
 
+static ssize_t
+range_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min,
+		       to_rtc_device(dev)->range_max);
+}
+static DEVICE_ATTR_RO(range);
+
 static struct attribute *rtc_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_date.attr,
@@ -257,6 +265,7 @@ static struct attribute *rtc_attrs[] = {
 	&dev_attr_hctosys.attr,
 	&dev_attr_wakealarm.attr,
 	&dev_attr_offset.attr,
+	&dev_attr_range.attr,
 	NULL,
 };
 
@@ -286,6 +295,9 @@ static umode_t rtc_attr_is_visible(struct kobject *kobj,
 	} else if (attr == &dev_attr_offset.attr) {
 		if (!rtc->ops->set_offset)
 			mode = 0;
+	} else if (attr == &dev_attr_range.attr) {
+		if (!(rtc->range_max - rtc->range_min))
+			mode = 0;
 	}
 
 	return mode;
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index d30d57b048d3..66efff60c4d5 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -144,10 +144,6 @@ static int tegra_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	int ret;
 
 	/* convert tm to seconds. */
-	ret = rtc_valid_tm(tm);
-	if (ret)
-		return ret;
-
 	rtc_tm_to_time(tm, &sec);
 
 	dev_vdbg(dev, "time set to %lu. %d/%d/%d %d:%02u:%02u\n",
diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c
index a3418a8a3796..d7785ae0a2b4 100644
--- a/drivers/rtc/rtc-tps6586x.c
+++ b/drivers/rtc/rtc-tps6586x.c
@@ -90,7 +90,7 @@ static int tps6586x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	seconds = ticks >> 10;
 	seconds += rtc->epoch_start;
 	rtc_time_to_tm(seconds, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int tps6586x_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index 560d9a5e0225..08dbefc79520 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -14,7 +14,30 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
-#include <asm/txx9/tx4939.h>
+
+#define TX4939_RTCCTL_ALME	0x00000080
+#define TX4939_RTCCTL_ALMD	0x00000040
+#define TX4939_RTCCTL_BUSY	0x00000020
+
+#define TX4939_RTCCTL_COMMAND	0x00000007
+#define TX4939_RTCCTL_COMMAND_NOP	0x00000000
+#define TX4939_RTCCTL_COMMAND_GETTIME	0x00000001
+#define TX4939_RTCCTL_COMMAND_SETTIME	0x00000002
+#define TX4939_RTCCTL_COMMAND_GETALARM	0x00000003
+#define TX4939_RTCCTL_COMMAND_SETALARM	0x00000004
+
+#define TX4939_RTCTBC_PM	0x00000080
+#define TX4939_RTCTBC_COMP	0x0000007f
+
+#define TX4939_RTC_REG_RAMSIZE	0x00000100
+#define TX4939_RTC_REG_RWBSIZE	0x00000006
+
+struct tx4939_rtc_reg {
+	__u32 ctl;
+	__u32 adr;
+	__u32 dat;
+	__u32 tbc;
+};
 
 struct tx4939rtc_plat_data {
 	struct rtc_device *rtc;
@@ -86,9 +109,10 @@ static int tx4939_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	for (i = 2; i < 6; i++)
 		buf[i] = __raw_readl(&rtcreg->dat);
 	spin_unlock_irq(&pdata->lock);
-	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+		(buf[3] << 8) | buf[2];
 	rtc_time_to_tm(sec, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int tx4939_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -147,7 +171,8 @@ static int tx4939_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	alrm->enabled = (ctl & TX4939_RTCCTL_ALME) ? 1 : 0;
 	alrm->pending = (ctl & TX4939_RTCCTL_ALMD) ? 1 : 0;
 	spin_unlock_irq(&pdata->lock);
-	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+		(buf[3] << 8) | buf[2];
 	rtc_time_to_tm(sec, &alrm->time);
 	return rtc_valid_tm(&alrm->time);
 }
@@ -189,58 +214,52 @@ static const struct rtc_class_ops tx4939_rtc_ops = {
 	.alarm_irq_enable	= tx4939_rtc_alarm_irq_enable,
 };
 
-static ssize_t tx4939_rtc_nvram_read(struct file *filp, struct kobject *kobj,
-				     struct bin_attribute *bin_attr,
-				     char *buf, loff_t pos, size_t size)
+static int tx4939_nvram_read(void *priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939rtc_plat_data *pdata = priv;
 	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
-	ssize_t count;
+	u8 *buf = val;
 
 	spin_lock_irq(&pdata->lock);
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		__raw_writel(pos++, &rtcreg->adr);
 		*buf++ = __raw_readl(&rtcreg->dat);
 	}
 	spin_unlock_irq(&pdata->lock);
-	return count;
+	return 0;
 }
 
-static ssize_t tx4939_rtc_nvram_write(struct file *filp, struct kobject *kobj,
-				      struct bin_attribute *bin_attr,
-				      char *buf, loff_t pos, size_t size)
+static int tx4939_nvram_write(void *priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939rtc_plat_data *pdata = priv;
 	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
-	ssize_t count;
+	u8 *buf = val;
 
 	spin_lock_irq(&pdata->lock);
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		__raw_writel(pos++, &rtcreg->adr);
 		__raw_writel(*buf++, &rtcreg->dat);
 	}
 	spin_unlock_irq(&pdata->lock);
-	return count;
+	return 0;
 }
 
-static struct bin_attribute tx4939_rtc_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = TX4939_RTC_REG_RAMSIZE,
-	.read = tx4939_rtc_nvram_read,
-	.write = tx4939_rtc_nvram_write,
-};
-
 static int __init tx4939_rtc_probe(struct platform_device *pdev)
 {
 	struct rtc_device *rtc;
 	struct tx4939rtc_plat_data *pdata;
 	struct resource *res;
 	int irq, ret;
+	struct nvmem_config nvmem_cfg = {
+		.name = "rv8803_nvram",
+		.word_size = 4,
+		.stride = 4,
+		.size = TX4939_RTC_REG_RAMSIZE,
+		.reg_read = tx4939_nvram_read,
+		.reg_write = tx4939_nvram_write,
+	};
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -260,21 +279,27 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
 	if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt,
 			     0, pdev->name, &pdev->dev) < 0)
 		return -EBUSY;
-	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &tx4939_rtc_ops, THIS_MODULE);
+	rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
+
+	rtc->ops = &tx4939_rtc_ops;
+	rtc->nvram_old_abi = true;
+
 	pdata->rtc = rtc;
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
 
-	return ret;
+	nvmem_cfg.priv = pdata;
+	ret = rtc_nvmem_register(rtc, &nvmem_cfg);
+	if (ret)
+		return ret;
+
+	return rtc_register_device(rtc);
 }
 
 static int __exit tx4939_rtc_remove(struct platform_device *pdev)
 {
 	struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
 	spin_lock_irq(&pdata->lock);
 	tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
 	spin_unlock_irq(&pdata->lock);
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 75aea4c4d334..7b824dabf104 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -156,7 +156,7 @@ static int wm831x_rtc_readtime(struct device *dev, struct rtc_time *tm)
 			u32 time = (time1[0] << 16) | time1[1];
 
 			rtc_time_to_tm(time, tm);
-			return rtc_valid_tm(tm);
+			return 0;
 		}
 
 	} while (++count < WM831X_GET_TIME_RETRIES);
diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c
index 0c34d3b81279..153820876a82 100644
--- a/drivers/rtc/rtc-xgene.c
+++ b/drivers/rtc/rtc-xgene.c
@@ -60,7 +60,7 @@ static int xgene_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
 
 	rtc_time_to_tm(readl(pdata->csr_base + RTC_CCVR), tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int xgene_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c
index da18a8ae3c1d..fba994dc31eb 100644
--- a/drivers/rtc/rtc-zynqmp.c
+++ b/drivers/rtc/rtc-zynqmp.c
@@ -122,7 +122,7 @@ static int xlnx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		rtc_time64_to_tm(read_time, tm);
 	}
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int xlnx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
index 0c177647ea6c..718293d72426 100644
--- a/drivers/rtc/systohc.c
+++ b/drivers/rtc/systohc.c
@@ -20,7 +20,7 @@
  * cases.
  *
  * -EPROTO is returned if now.tv_nsec is not close enough to *target_nsec.
- (
+ *
  * If temporary failure is indicated the caller should try again 'soon'
  */
 int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec)
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 1444333210c7..9ac7574e3cfb 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -15,8 +15,8 @@ config BLK_DEV_XPRAM
 
 config DCSSBLK
 	def_tristate m
-	select DAX
 	select FS_DAX_LIMITED
+	select DAX_DRIVER
 	prompt "DCSSBLK support"
 	depends on S390 && BLOCK
 	help
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index b5692a284bd8..04143c08bd6e 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3918,8 +3918,13 @@ static int dasd_generic_requeue_all_requests(struct dasd_device *device)
 			cqr = refers;
 		}
 
-		if (cqr->block)
-			list_del_init(&cqr->blocklist);
+		/*
+		 * _dasd_requeue_request already checked for a valid
+		 * blockdevice, no need to check again
+		 * all erp requests (cqr->refers) have a cqr->block
+		 * pointer copy from the original cqr
+		 */
+		list_del_init(&cqr->blocklist);
 		cqr->block->base->discipline->free_cp(
 			cqr, (struct request *) cqr->callback_data);
 	}
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index ee14d8e45c97..ee73b0607e47 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -2214,15 +2214,28 @@ static void dasd_3990_erp_disable_path(struct dasd_device *device, __u8 lpum)
 {
 	int pos = pathmask_to_pos(lpum);
 
+	if (!(device->features & DASD_FEATURE_PATH_AUTODISABLE)) {
+		dev_err(&device->cdev->dev,
+			"Path %x.%02x (pathmask %02x) is operational despite excessive IFCCs\n",
+			device->path[pos].cssid, device->path[pos].chpid, lpum);
+		goto out;
+	}
+
 	/* no remaining path, cannot disable */
-	if (!(dasd_path_get_opm(device) & ~lpum))
-		return;
+	if (!(dasd_path_get_opm(device) & ~lpum)) {
+		dev_err(&device->cdev->dev,
+			"Last path %x.%02x (pathmask %02x) is operational despite excessive IFCCs\n",
+			device->path[pos].cssid, device->path[pos].chpid, lpum);
+		goto out;
+	}
 
 	dev_err(&device->cdev->dev,
 		"Path %x.%02x (pathmask %02x) is disabled - IFCC threshold exceeded\n",
 		device->path[pos].cssid, device->path[pos].chpid, lpum);
 	dasd_path_remove_opm(device, lpum);
 	dasd_path_add_ifccpm(device, lpum);
+
+out:
 	device->path[pos].errorclk = 0;
 	atomic_set(&device->path[pos].error_count, 0);
 }
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index e7cd28ff1984..b9ebb565ee2c 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -1550,9 +1550,49 @@ dasd_path_threshold_store(struct device *dev, struct device_attribute *attr,
 	dasd_put_device(device);
 	return count;
 }
-
 static DEVICE_ATTR(path_threshold, 0644, dasd_path_threshold_show,
 		   dasd_path_threshold_store);
+
+/*
+ * configure if path is disabled after IFCC/CCC error threshold is
+ * exceeded
+ */
+static ssize_t
+dasd_path_autodisable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct dasd_devmap *devmap;
+	int flag;
+
+	devmap = dasd_find_busid(dev_name(dev));
+	if (!IS_ERR(devmap))
+		flag = (devmap->features & DASD_FEATURE_PATH_AUTODISABLE) != 0;
+	else
+		flag = (DASD_FEATURE_DEFAULT &
+			DASD_FEATURE_PATH_AUTODISABLE) != 0;
+	return snprintf(buf, PAGE_SIZE, flag ? "1\n" : "0\n");
+}
+
+static ssize_t
+dasd_path_autodisable_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	unsigned int val;
+	int rc;
+
+	if (kstrtouint(buf, 0, &val) || val > 1)
+		return -EINVAL;
+
+	rc = dasd_set_feature(to_ccwdev(dev),
+			      DASD_FEATURE_PATH_AUTODISABLE, val);
+
+	return rc ? : count;
+}
+
+static DEVICE_ATTR(path_autodisable, 0644,
+		   dasd_path_autodisable_show,
+		   dasd_path_autodisable_store);
 /*
  * interval for IFCC/CCC checks
  * meaning time with no IFCC/CCC error before the error counter
@@ -1623,6 +1663,7 @@ static struct attribute * dasd_attrs[] = {
 	&dev_attr_host_access_count.attr,
 	&dev_attr_path_masks.attr,
 	&dev_attr_path_threshold.attr,
+	&dev_attr_path_autodisable.attr,
 	&dev_attr_path_interval.attr,
 	&dev_attr_path_reset.attr,
 	&dev_attr_hpf.attr,
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 29397a9dba68..be208e7adcb4 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -214,24 +214,25 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head)
 	geo->head |= head;
 }
 
-static int check_XRC(struct ccw1 *ccw, struct DE_eckd_data *data,
+static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data,
 		     struct dasd_device *device)
 {
 	struct dasd_eckd_private *private = device->private;
 	int rc;
 
-	if (!private->rdc_data.facilities.XRC_supported)
+	rc = get_phys_clock(&data->ep_sys_time);
+	/*
+	 * Ignore return code if XRC is not supported or
+	 * sync clock is switched off
+	 */
+	if ((rc && !private->rdc_data.facilities.XRC_supported) ||
+	    rc == -EOPNOTSUPP || rc == -EACCES)
 		return 0;
 
 	/* switch on System Time Stamp - needed for XRC Support */
 	data->ga_extended |= 0x08; /* switch on 'Time Stamp Valid'   */
 	data->ga_extended |= 0x02; /* switch on 'Extended Parameter' */
 
-	rc = get_phys_clock(&data->ep_sys_time);
-	/* Ignore return code if sync clock is switched off. */
-	if (rc == -EOPNOTSUPP || rc == -EACCES)
-		rc = 0;
-
 	if (ccw) {
 		ccw->count = sizeof(struct DE_eckd_data);
 		ccw->flags |= CCW_FLAG_SLI;
@@ -286,12 +287,12 @@ define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk,
 	case DASD_ECKD_CCW_WRITE_KD_MT:
 		data->mask.perm = 0x02;
 		data->attributes.operation = private->attrib.operation;
-		rc = check_XRC(ccw, data, device);
+		rc = set_timestamp(ccw, data, device);
 		break;
 	case DASD_ECKD_CCW_WRITE_CKD:
 	case DASD_ECKD_CCW_WRITE_CKD_MT:
 		data->attributes.operation = DASD_BYPASS_CACHE;
-		rc = check_XRC(ccw, data, device);
+		rc = set_timestamp(ccw, data, device);
 		break;
 	case DASD_ECKD_CCW_ERASE:
 	case DASD_ECKD_CCW_WRITE_HOME_ADDRESS:
@@ -299,7 +300,7 @@ define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk,
 		data->mask.perm = 0x3;
 		data->mask.auth = 0x1;
 		data->attributes.operation = DASD_BYPASS_CACHE;
-		rc = check_XRC(ccw, data, device);
+		rc = set_timestamp(ccw, data, device);
 		break;
 	case DASD_ECKD_CCW_WRITE_FULL_TRACK:
 		data->mask.perm = 0x03;
@@ -310,7 +311,7 @@ define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk,
 		data->mask.perm = 0x02;
 		data->attributes.operation = private->attrib.operation;
 		data->blk_size = blksize;
-		rc = check_XRC(ccw, data, device);
+		rc = set_timestamp(ccw, data, device);
 		break;
 	default:
 		dev_err(&device->cdev->dev,
@@ -993,7 +994,7 @@ static int dasd_eckd_read_conf(struct dasd_device *device)
 	struct dasd_eckd_private *private, path_private;
 	struct dasd_uid *uid;
 	char print_path_uid[60], print_device_uid[60];
-	struct channel_path_desc *chp_desc;
+	struct channel_path_desc_fmt0 *chp_desc;
 	struct subchannel_id sch_id;
 
 	private = device->private;
@@ -3440,7 +3441,7 @@ static int prepare_itcw(struct itcw *itcw,
 		dedata->mask.perm = 0x02;
 		dedata->attributes.operation = basepriv->attrib.operation;
 		dedata->blk_size = blksize;
-		rc = check_XRC(NULL, dedata, basedev);
+		rc = set_timestamp(NULL, dedata, basedev);
 		dedata->ga_extended |= 0x42;
 		lredata->operation.orientation = 0x0;
 		lredata->operation.operation = 0x3F;
diff --git a/drivers/s390/char/Makefile b/drivers/s390/char/Makefile
index a2b33a22c82a..d049e2d74484 100644
--- a/drivers/s390/char/Makefile
+++ b/drivers/s390/char/Makefile
@@ -23,7 +23,7 @@ CFLAGS_REMOVE_sclp_early_core.o	+= $(CC_FLAGS_EXPOLINE)
 
 obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
 	 sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \
-	 sclp_early.o sclp_early_core.o
+	 sclp_early.o sclp_early_core.o sclp_sd.o
 
 obj-$(CONFIG_TN3270) += raw3270.o
 obj-$(CONFIG_TN3270_CONSOLE) += con3270.o
diff --git a/drivers/s390/char/defkeymap.c b/drivers/s390/char/defkeymap.c
index 98a5c459a1bf..60845d467a1b 100644
--- a/drivers/s390/char/defkeymap.c
+++ b/drivers/s390/char/defkeymap.c
@@ -9,7 +9,9 @@
 #include <linux/kbd_kern.h>
 #include <linux/kbd_diacr.h>
 
-u_short plain_map[NR_KEYS] = {
+#include "keyboard.h"
+
+u_short ebc_plain_map[NR_KEYS] = {
 	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,
 	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,
 	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,	0xf000,
@@ -85,12 +87,12 @@ static u_short shift_ctrl_map[NR_KEYS] = {
 	0xf20a,	0xf108,	0xf200,	0xf200,	0xf200,	0xf200,	0xf200,	0xf200,
 };
 
-ushort *key_maps[MAX_NR_KEYMAPS] = {
-	plain_map, shift_map, NULL, NULL,
+ushort *ebc_key_maps[MAX_NR_KEYMAPS] = {
+	ebc_plain_map, shift_map, NULL, NULL,
 	ctrl_map, shift_ctrl_map, NULL,
 };
 
-unsigned int keymap_count = 4;
+unsigned int ebc_keymap_count = 4;
 
 
 /*
@@ -99,7 +101,7 @@ unsigned int keymap_count = 4;
  * the default and allocate dynamically in chunks of 512 bytes.
  */
 
-char func_buf[] = {
+char ebc_func_buf[] = {
 	'\033', '[', '[', 'A', 0, 
 	'\033', '[', '[', 'B', 0, 
 	'\033', '[', '[', 'C', 0, 
@@ -123,37 +125,37 @@ char func_buf[] = {
 };
 
 
-char *funcbufptr = func_buf;
-int funcbufsize = sizeof(func_buf);
-int funcbufleft = 0;          /* space left */
-
-char *func_table[MAX_NR_FUNC] = {
-	func_buf + 0,
-	func_buf + 5,
-	func_buf + 10,
-	func_buf + 15,
-	func_buf + 20,
-	func_buf + 25,
-	func_buf + 31,
-	func_buf + 37,
-	func_buf + 43,
-	func_buf + 49,
-	func_buf + 55,
-	func_buf + 61,
-	func_buf + 67,
-	func_buf + 73,
-	func_buf + 79,
-	func_buf + 85,
-	func_buf + 91,
-	func_buf + 97,
-	func_buf + 103,
-	func_buf + 109,
+char *ebc_funcbufptr = ebc_func_buf;
+int ebc_funcbufsize = sizeof(ebc_func_buf);
+int ebc_funcbufleft;		/* space left */
+
+char *ebc_func_table[MAX_NR_FUNC] = {
+	ebc_func_buf + 0,
+	ebc_func_buf + 5,
+	ebc_func_buf + 10,
+	ebc_func_buf + 15,
+	ebc_func_buf + 20,
+	ebc_func_buf + 25,
+	ebc_func_buf + 31,
+	ebc_func_buf + 37,
+	ebc_func_buf + 43,
+	ebc_func_buf + 49,
+	ebc_func_buf + 55,
+	ebc_func_buf + 61,
+	ebc_func_buf + 67,
+	ebc_func_buf + 73,
+	ebc_func_buf + 79,
+	ebc_func_buf + 85,
+	ebc_func_buf + 91,
+	ebc_func_buf + 97,
+	ebc_func_buf + 103,
+	ebc_func_buf + 109,
 	NULL,
 };
 
-struct kbdiacruc accent_table[MAX_DIACR] = {
+struct kbdiacruc ebc_accent_table[MAX_DIACR] = {
 	{'^', 'c', 0003},	{'^', 'd', 0004},
 	{'^', 'z', 0032},	{'^', 0012, 0000},
 };
 
-unsigned int accent_table_size = 4;
+unsigned int ebc_accent_table_size = 4;
diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c
index 5b505fdaedec..db1fbf9b00b5 100644
--- a/drivers/s390/char/keyboard.c
+++ b/drivers/s390/char/keyboard.c
@@ -54,24 +54,24 @@ kbd_alloc(void) {
 	kbd = kzalloc(sizeof(struct kbd_data), GFP_KERNEL);
 	if (!kbd)
 		goto out;
-	kbd->key_maps = kzalloc(sizeof(key_maps), GFP_KERNEL);
+	kbd->key_maps = kzalloc(sizeof(ebc_key_maps), GFP_KERNEL);
 	if (!kbd->key_maps)
 		goto out_kbd;
-	for (i = 0; i < ARRAY_SIZE(key_maps); i++) {
-		if (key_maps[i]) {
-			kbd->key_maps[i] = kmemdup(key_maps[i],
+	for (i = 0; i < ARRAY_SIZE(ebc_key_maps); i++) {
+		if (ebc_key_maps[i]) {
+			kbd->key_maps[i] = kmemdup(ebc_key_maps[i],
 						   sizeof(u_short) * NR_KEYS,
 						   GFP_KERNEL);
 			if (!kbd->key_maps[i])
 				goto out_maps;
 		}
 	}
-	kbd->func_table = kzalloc(sizeof(func_table), GFP_KERNEL);
+	kbd->func_table = kzalloc(sizeof(ebc_func_table), GFP_KERNEL);
 	if (!kbd->func_table)
 		goto out_maps;
-	for (i = 0; i < ARRAY_SIZE(func_table); i++) {
-		if (func_table[i]) {
-			kbd->func_table[i] = kstrdup(func_table[i],
+	for (i = 0; i < ARRAY_SIZE(ebc_func_table); i++) {
+		if (ebc_func_table[i]) {
+			kbd->func_table[i] = kstrdup(ebc_func_table[i],
 						     GFP_KERNEL);
 			if (!kbd->func_table[i])
 				goto out_func;
@@ -81,22 +81,22 @@ kbd_alloc(void) {
 		kzalloc(sizeof(fn_handler_fn *) * NR_FN_HANDLER, GFP_KERNEL);
 	if (!kbd->fn_handler)
 		goto out_func;
-	kbd->accent_table = kmemdup(accent_table,
+	kbd->accent_table = kmemdup(ebc_accent_table,
 				    sizeof(struct kbdiacruc) * MAX_DIACR,
 				    GFP_KERNEL);
 	if (!kbd->accent_table)
 		goto out_fn_handler;
-	kbd->accent_table_size = accent_table_size;
+	kbd->accent_table_size = ebc_accent_table_size;
 	return kbd;
 
 out_fn_handler:
 	kfree(kbd->fn_handler);
 out_func:
-	for (i = 0; i < ARRAY_SIZE(func_table); i++)
+	for (i = 0; i < ARRAY_SIZE(ebc_func_table); i++)
 		kfree(kbd->func_table[i]);
 	kfree(kbd->func_table);
 out_maps:
-	for (i = 0; i < ARRAY_SIZE(key_maps); i++)
+	for (i = 0; i < ARRAY_SIZE(ebc_key_maps); i++)
 		kfree(kbd->key_maps[i]);
 	kfree(kbd->key_maps);
 out_kbd:
@@ -112,10 +112,10 @@ kbd_free(struct kbd_data *kbd)
 
 	kfree(kbd->accent_table);
 	kfree(kbd->fn_handler);
-	for (i = 0; i < ARRAY_SIZE(func_table); i++)
+	for (i = 0; i < ARRAY_SIZE(ebc_func_table); i++)
 		kfree(kbd->func_table[i]);
 	kfree(kbd->func_table);
-	for (i = 0; i < ARRAY_SIZE(key_maps); i++)
+	for (i = 0; i < ARRAY_SIZE(ebc_key_maps); i++)
 		kfree(kbd->key_maps[i]);
 	kfree(kbd->key_maps);
 	kfree(kbd);
@@ -131,7 +131,7 @@ kbd_ascebc(struct kbd_data *kbd, unsigned char *ascebc)
 	int i, j, k;
 
 	memset(ascebc, 0x40, 256);
-	for (i = 0; i < ARRAY_SIZE(key_maps); i++) {
+	for (i = 0; i < ARRAY_SIZE(ebc_key_maps); i++) {
 		keymap = kbd->key_maps[i];
 		if (!keymap)
 			continue;
@@ -158,7 +158,7 @@ kbd_ebcasc(struct kbd_data *kbd, unsigned char *ebcasc)
 	int i, j, k;
 
 	memset(ebcasc, ' ', 256);
-	for (i = 0; i < ARRAY_SIZE(key_maps); i++) {
+	for (i = 0; i < ARRAY_SIZE(ebc_key_maps); i++) {
 		keymap = kbd->key_maps[i];
 		if (!keymap)
 			continue;
diff --git a/drivers/s390/char/keyboard.h b/drivers/s390/char/keyboard.h
index a074d9711628..c467589c7f45 100644
--- a/drivers/s390/char/keyboard.h
+++ b/drivers/s390/char/keyboard.h
@@ -14,6 +14,17 @@
 
 struct kbd_data;
 
+extern int ebc_funcbufsize, ebc_funcbufleft;
+extern char *ebc_func_table[MAX_NR_FUNC];
+extern char ebc_func_buf[];
+extern char *ebc_funcbufptr;
+extern unsigned int ebc_keymap_count;
+
+extern struct kbdiacruc ebc_accent_table[];
+extern unsigned int ebc_accent_table_size;
+extern unsigned short *ebc_key_maps[MAX_NR_KEYMAPS];
+extern unsigned short ebc_plain_map[NR_KEYS];
+
 typedef void (fn_handler_fn)(struct kbd_data *);
 
 /*
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index e4e2df7a478e..e9aa71cdfc44 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -417,7 +417,7 @@ sclp_dispatch_evbufs(struct sccb_header *sccb)
 		reg = NULL;
 		list_for_each(l, &sclp_reg_list) {
 			reg = list_entry(l, struct sclp_register, list);
-			if (reg->receive_mask & (1 << (32 - evbuf->type)))
+			if (reg->receive_mask & SCLP_EVTYP_MASK(evbuf->type))
 				break;
 			else
 				reg = NULL;
@@ -618,9 +618,12 @@ struct sclp_statechangebuf {
 	u16		_zeros : 12;
 	u16		mask_length;
 	u64		sclp_active_facility_mask;
-	sccb_mask_t	sclp_receive_mask;
-	sccb_mask_t	sclp_send_mask;
-	u32		read_data_function_mask;
+	u8		masks[2 * 1021 + 4];	/* variable length */
+	/*
+	 * u8		sclp_receive_mask[mask_length];
+	 * u8		sclp_send_mask[mask_length];
+	 * u32		read_data_function_mask;
+	 */
 } __attribute__((packed));
 
 
@@ -631,14 +634,14 @@ sclp_state_change_cb(struct evbuf_header *evbuf)
 	unsigned long flags;
 	struct sclp_statechangebuf *scbuf;
 
+	BUILD_BUG_ON(sizeof(struct sclp_statechangebuf) > PAGE_SIZE);
+
 	scbuf = (struct sclp_statechangebuf *) evbuf;
-	if (scbuf->mask_length != sizeof(sccb_mask_t))
-		return;
 	spin_lock_irqsave(&sclp_lock, flags);
 	if (scbuf->validity_sclp_receive_mask)
-		sclp_receive_mask = scbuf->sclp_receive_mask;
+		sclp_receive_mask = sccb_get_recv_mask(scbuf);
 	if (scbuf->validity_sclp_send_mask)
-		sclp_send_mask = scbuf->sclp_send_mask;
+		sclp_send_mask = sccb_get_send_mask(scbuf);
 	spin_unlock_irqrestore(&sclp_lock, flags);
 	if (scbuf->validity_sclp_active_facility_mask)
 		sclp.facilities = scbuf->sclp_active_facility_mask;
@@ -748,7 +751,7 @@ EXPORT_SYMBOL(sclp_remove_processed);
 
 /* Prepare init mask request. Called while sclp_lock is locked. */
 static inline void
-__sclp_make_init_req(u32 receive_mask, u32 send_mask)
+__sclp_make_init_req(sccb_mask_t receive_mask, sccb_mask_t send_mask)
 {
 	struct init_sccb *sccb;
 
@@ -761,12 +764,15 @@ __sclp_make_init_req(u32 receive_mask, u32 send_mask)
 	sclp_init_req.callback = NULL;
 	sclp_init_req.callback_data = NULL;
 	sclp_init_req.sccb = sccb;
-	sccb->header.length = sizeof(struct init_sccb);
-	sccb->mask_length = sizeof(sccb_mask_t);
-	sccb->receive_mask = receive_mask;
-	sccb->send_mask = send_mask;
-	sccb->sclp_receive_mask = 0;
-	sccb->sclp_send_mask = 0;
+	sccb->header.length = sizeof(*sccb);
+	if (sclp_mask_compat_mode)
+		sccb->mask_length = SCLP_MASK_SIZE_COMPAT;
+	else
+		sccb->mask_length = sizeof(sccb_mask_t);
+	sccb_set_recv_mask(sccb, receive_mask);
+	sccb_set_send_mask(sccb, send_mask);
+	sccb_set_sclp_recv_mask(sccb, 0);
+	sccb_set_sclp_send_mask(sccb, 0);
 }
 
 /* Start init mask request. If calculate is non-zero, calculate the mask as
@@ -822,8 +828,8 @@ sclp_init_mask(int calculate)
 		    sccb->header.response_code == 0x20) {
 			/* Successful request */
 			if (calculate) {
-				sclp_receive_mask = sccb->sclp_receive_mask;
-				sclp_send_mask = sccb->sclp_send_mask;
+				sclp_receive_mask = sccb_get_sclp_recv_mask(sccb);
+				sclp_send_mask = sccb_get_sclp_send_mask(sccb);
 			} else {
 				sclp_receive_mask = 0;
 				sclp_send_mask = 0;
@@ -974,12 +980,18 @@ sclp_check_interface(void)
 		irq_subclass_unregister(IRQ_SUBCLASS_SERVICE_SIGNAL);
 		spin_lock_irqsave(&sclp_lock, flags);
 		del_timer(&sclp_request_timer);
-		if (sclp_init_req.status == SCLP_REQ_DONE &&
-		    sccb->header.response_code == 0x20) {
-			rc = 0;
-			break;
-		} else
-			rc = -EBUSY;
+		rc = -EBUSY;
+		if (sclp_init_req.status == SCLP_REQ_DONE) {
+			if (sccb->header.response_code == 0x20) {
+				rc = 0;
+				break;
+			} else if (sccb->header.response_code == 0x74f0) {
+				if (!sclp_mask_compat_mode) {
+					sclp_mask_compat_mode = true;
+					retry = 0;
+				}
+			}
+		}
 	}
 	unregister_external_irq(EXT_IRQ_SERVICE_SIG, sclp_check_handler);
 	spin_unlock_irqrestore(&sclp_lock, flags);
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index f41f6e2ca063..1fe4918088e7 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -18,7 +18,7 @@
 #define MAX_KMEM_PAGES (sizeof(unsigned long) << 3)
 #define SCLP_CONSOLE_PAGES	6
 
-#define SCLP_EVTYP_MASK(T)	(1U << (32 - (T)))
+#define SCLP_EVTYP_MASK(T) (1UL << (sizeof(sccb_mask_t) * BITS_PER_BYTE - (T)))
 
 #define EVTYP_OPCMD		0x01
 #define EVTYP_MSG		0x02
@@ -28,6 +28,7 @@
 #define EVTYP_PMSGCMD		0x09
 #define EVTYP_ASYNC		0x0A
 #define EVTYP_CTLPROGIDENT	0x0B
+#define EVTYP_STORE_DATA	0x0C
 #define EVTYP_ERRNOTIFY		0x18
 #define EVTYP_VT220MSG		0x1A
 #define EVTYP_SDIAS		0x1C
@@ -42,6 +43,7 @@
 #define EVTYP_PMSGCMD_MASK	SCLP_EVTYP_MASK(EVTYP_PMSGCMD)
 #define EVTYP_ASYNC_MASK	SCLP_EVTYP_MASK(EVTYP_ASYNC)
 #define EVTYP_CTLPROGIDENT_MASK	SCLP_EVTYP_MASK(EVTYP_CTLPROGIDENT)
+#define EVTYP_STORE_DATA_MASK	SCLP_EVTYP_MASK(EVTYP_STORE_DATA)
 #define EVTYP_ERRNOTIFY_MASK	SCLP_EVTYP_MASK(EVTYP_ERRNOTIFY)
 #define EVTYP_VT220MSG_MASK	SCLP_EVTYP_MASK(EVTYP_VT220MSG)
 #define EVTYP_SDIAS_MASK	SCLP_EVTYP_MASK(EVTYP_SDIAS)
@@ -85,7 +87,7 @@ enum sclp_pm_event {
 #define SCLP_PANIC_PRIO		1
 #define SCLP_PANIC_PRIO_CLIENT	0
 
-typedef u32 sccb_mask_t;	/* ATTENTION: assumes 32bit mask !!! */
+typedef u64 sccb_mask_t;
 
 struct sccb_header {
 	u16	length;
@@ -98,12 +100,53 @@ struct init_sccb {
 	struct sccb_header header;
 	u16 _reserved;
 	u16 mask_length;
-	sccb_mask_t receive_mask;
-	sccb_mask_t send_mask;
-	sccb_mask_t sclp_receive_mask;
-	sccb_mask_t sclp_send_mask;
+	u8 masks[4 * 1021];	/* variable length */
+	/*
+	 * u8 receive_mask[mask_length];
+	 * u8 send_mask[mask_length];
+	 * u8 sclp_receive_mask[mask_length];
+	 * u8 sclp_send_mask[mask_length];
+	 */
 } __attribute__((packed));
 
+#define SCLP_MASK_SIZE_COMPAT 4
+
+static inline sccb_mask_t sccb_get_mask(u8 *masks, size_t len, int i)
+{
+	sccb_mask_t res = 0;
+
+	memcpy(&res, masks + i * len, min(sizeof(res), len));
+	return res;
+}
+
+static inline void sccb_set_mask(u8 *masks, size_t len, int i, sccb_mask_t val)
+{
+	memset(masks + i * len, 0, len);
+	memcpy(masks + i * len, &val, min(sizeof(val), len));
+}
+
+#define sccb_get_generic_mask(sccb, i)					\
+({									\
+	__typeof__(sccb) __sccb = sccb;					\
+									\
+	sccb_get_mask(__sccb->masks, __sccb->mask_length, i);		\
+})
+#define sccb_get_recv_mask(sccb)	sccb_get_generic_mask(sccb, 0)
+#define sccb_get_send_mask(sccb)	sccb_get_generic_mask(sccb, 1)
+#define sccb_get_sclp_recv_mask(sccb)	sccb_get_generic_mask(sccb, 2)
+#define sccb_get_sclp_send_mask(sccb)	sccb_get_generic_mask(sccb, 3)
+
+#define sccb_set_generic_mask(sccb, i, val)				\
+({									\
+	__typeof__(sccb) __sccb = sccb;					\
+									\
+	sccb_set_mask(__sccb->masks, __sccb->mask_length, i, val);	\
+})
+#define sccb_set_recv_mask(sccb, val)	    sccb_set_generic_mask(sccb, 0, val)
+#define sccb_set_send_mask(sccb, val)	    sccb_set_generic_mask(sccb, 1, val)
+#define sccb_set_sclp_recv_mask(sccb, val)  sccb_set_generic_mask(sccb, 2, val)
+#define sccb_set_sclp_send_mask(sccb, val)  sccb_set_generic_mask(sccb, 3, val)
+
 struct read_cpu_info_sccb {
 	struct	sccb_header header;
 	u16	nr_configured;
@@ -221,15 +264,17 @@ extern int sclp_init_state;
 extern int sclp_console_pages;
 extern int sclp_console_drop;
 extern unsigned long sclp_console_full;
+extern bool sclp_mask_compat_mode;
 
 extern char sclp_early_sccb[PAGE_SIZE];
 
 void sclp_early_wait_irq(void);
 int sclp_early_cmd(sclp_cmdw_t cmd, void *sccb);
 unsigned int sclp_early_con_check_linemode(struct init_sccb *sccb);
+unsigned int sclp_early_con_check_vt220(struct init_sccb *sccb);
 int sclp_early_set_event_mask(struct init_sccb *sccb,
-			      unsigned long receive_mask,
-			      unsigned long send_mask);
+			      sccb_mask_t receive_mask,
+			      sccb_mask_t send_mask);
 
 /* useful inlines */
 
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 6b1891539c84..9a74abb9224d 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -249,7 +249,7 @@ static void __init sclp_early_console_detect(struct init_sccb *sccb)
 	if (sccb->header.response_code != 0x20)
 		return;
 
-	if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK)
+	if (sclp_early_con_check_vt220(sccb))
 		sclp.has_vt220 = 1;
 
 	if (sclp_early_con_check_linemode(sccb))
diff --git a/drivers/s390/char/sclp_early_core.c b/drivers/s390/char/sclp_early_core.c
index 17b0c67f3e8d..5f8d9ea69ebd 100644
--- a/drivers/s390/char/sclp_early_core.c
+++ b/drivers/s390/char/sclp_early_core.c
@@ -14,6 +14,11 @@
 
 char sclp_early_sccb[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
 int sclp_init_state __section(.data) = sclp_init_state_uninitialized;
+/*
+ * Used to keep track of the size of the event masks. Qemu until version 2.11
+ * only supports 4 and needs a workaround.
+ */
+bool sclp_mask_compat_mode;
 
 void sclp_early_wait_irq(void)
 {
@@ -142,16 +147,24 @@ static void sclp_early_print_vt220(const char *str, unsigned int len)
 }
 
 int sclp_early_set_event_mask(struct init_sccb *sccb,
-			      unsigned long receive_mask,
-			      unsigned long send_mask)
+			      sccb_mask_t receive_mask,
+			      sccb_mask_t send_mask)
 {
+retry:
 	memset(sccb, 0, sizeof(*sccb));
 	sccb->header.length = sizeof(*sccb);
-	sccb->mask_length = sizeof(sccb_mask_t);
-	sccb->receive_mask = receive_mask;
-	sccb->send_mask = send_mask;
+	if (sclp_mask_compat_mode)
+		sccb->mask_length = SCLP_MASK_SIZE_COMPAT;
+	else
+		sccb->mask_length = sizeof(sccb_mask_t);
+	sccb_set_recv_mask(sccb, receive_mask);
+	sccb_set_send_mask(sccb, send_mask);
 	if (sclp_early_cmd(SCLP_CMDW_WRITE_EVENT_MASK, sccb))
 		return -EIO;
+	if ((sccb->header.response_code == 0x74f0) && !sclp_mask_compat_mode) {
+		sclp_mask_compat_mode = true;
+		goto retry;
+	}
 	if (sccb->header.response_code != 0x20)
 		return -EIO;
 	return 0;
@@ -159,19 +172,28 @@ int sclp_early_set_event_mask(struct init_sccb *sccb,
 
 unsigned int sclp_early_con_check_linemode(struct init_sccb *sccb)
 {
-	if (!(sccb->sclp_send_mask & EVTYP_OPCMD_MASK))
+	if (!(sccb_get_sclp_send_mask(sccb) & EVTYP_OPCMD_MASK))
 		return 0;
-	if (!(sccb->sclp_receive_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)))
+	if (!(sccb_get_sclp_recv_mask(sccb) & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK)))
 		return 0;
 	return 1;
 }
 
+unsigned int sclp_early_con_check_vt220(struct init_sccb *sccb)
+{
+	if (sccb_get_sclp_send_mask(sccb) & EVTYP_VT220MSG_MASK)
+		return 1;
+	return 0;
+}
+
 static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220)
 {
 	unsigned long receive_mask, send_mask;
 	struct init_sccb *sccb;
 	int rc;
 
+	BUILD_BUG_ON(sizeof(struct init_sccb) > PAGE_SIZE);
+
 	*have_linemode = *have_vt220 = 0;
 	sccb = (struct init_sccb *) &sclp_early_sccb;
 	receive_mask = disable ? 0 : EVTYP_OPCMD_MASK;
@@ -180,7 +202,7 @@ static int sclp_early_setup(int disable, int *have_linemode, int *have_vt220)
 	if (rc)
 		return rc;
 	*have_linemode = sclp_early_con_check_linemode(sccb);
-	*have_vt220 = sccb->send_mask & EVTYP_VT220MSG_MASK;
+	*have_vt220 = !!(sccb_get_send_mask(sccb) & EVTYP_VT220MSG_MASK);
 	return rc;
 }
 
diff --git a/drivers/s390/char/sclp_sd.c b/drivers/s390/char/sclp_sd.c
new file mode 100644
index 000000000000..99f41db5123b
--- /dev/null
+++ b/drivers/s390/char/sclp_sd.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SCLP Store Data support and sysfs interface
+ *
+ * Copyright IBM Corp. 2017
+ */
+
+#define KMSG_COMPONENT "sclp_sd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/completion.h>
+#include <linux/kobject.h>
+#include <linux/list.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/async.h>
+#include <linux/export.h>
+#include <linux/mutex.h>
+
+#include <asm/pgalloc.h>
+
+#include "sclp.h"
+
+#define SD_EQ_STORE_DATA	0
+#define SD_EQ_HALT		1
+#define SD_EQ_SIZE		2
+
+#define SD_DI_CONFIG		3
+
+struct sclp_sd_evbuf {
+	struct evbuf_header hdr;
+	u8 eq;
+	u8 di;
+	u8 rflags;
+	u64 :56;
+	u32 id;
+	u16 :16;
+	u8 fmt;
+	u8 status;
+	u64 sat;
+	u64 sa;
+	u32 esize;
+	u32 dsize;
+} __packed;
+
+struct sclp_sd_sccb {
+	struct sccb_header hdr;
+	struct sclp_sd_evbuf evbuf;
+} __packed __aligned(PAGE_SIZE);
+
+/**
+ * struct sclp_sd_data - Result of a Store Data request
+ * @esize_bytes: Resulting esize in bytes
+ * @dsize_bytes: Resulting dsize in bytes
+ * @data: Pointer to data - must be released using vfree()
+ */
+struct sclp_sd_data {
+	size_t esize_bytes;
+	size_t dsize_bytes;
+	void *data;
+};
+
+/**
+ * struct sclp_sd_listener - Listener for asynchronous Store Data response
+ * @list: For enqueueing this struct
+ * @id: Event ID of response to listen for
+ * @completion: Can be used to wait for response
+ * @evbuf: Contains the resulting Store Data response after completion
+ */
+struct sclp_sd_listener {
+	struct list_head list;
+	u32 id;
+	struct completion completion;
+	struct sclp_sd_evbuf evbuf;
+};
+
+/**
+ * struct sclp_sd_file - Sysfs representation of a Store Data entity
+ * @kobj: Kobject
+ * @data_attr: Attribute for accessing data contents
+ * @data_mutex: Mutex to serialize access and updates to @data
+ * @data: Data associated with this entity
+ * @di: DI value associated with this entity
+ */
+struct sclp_sd_file {
+	struct kobject kobj;
+	struct bin_attribute data_attr;
+	struct mutex data_mutex;
+	struct sclp_sd_data data;
+	u8 di;
+};
+#define to_sd_file(x) container_of(x, struct sclp_sd_file, kobj)
+
+static struct kset *sclp_sd_kset;
+static struct sclp_sd_file *config_file;
+
+static LIST_HEAD(sclp_sd_queue);
+static DEFINE_SPINLOCK(sclp_sd_queue_lock);
+
+/**
+ * sclp_sd_listener_add() - Add listener for Store Data responses
+ * @listener: Listener to add
+ */
+static void sclp_sd_listener_add(struct sclp_sd_listener *listener)
+{
+	spin_lock_irq(&sclp_sd_queue_lock);
+	list_add_tail(&listener->list, &sclp_sd_queue);
+	spin_unlock_irq(&sclp_sd_queue_lock);
+}
+
+/**
+ * sclp_sd_listener_remove() - Remove listener for Store Data responses
+ * @listener: Listener to remove
+ */
+static void sclp_sd_listener_remove(struct sclp_sd_listener *listener)
+{
+	spin_lock_irq(&sclp_sd_queue_lock);
+	list_del(&listener->list);
+	spin_unlock_irq(&sclp_sd_queue_lock);
+}
+
+/**
+ * sclp_sd_listener_init() - Initialize a Store Data response listener
+ * @id: Event ID to listen for
+ *
+ * Initialize a listener for asynchronous Store Data responses. This listener
+ * can afterwards be used to wait for a specific response and to retrieve
+ * the associated response data.
+ */
+static void sclp_sd_listener_init(struct sclp_sd_listener *listener, u32 id)
+{
+	memset(listener, 0, sizeof(*listener));
+	listener->id = id;
+	init_completion(&listener->completion);
+}
+
+/**
+ * sclp_sd_receiver() - Receiver for Store Data events
+ * @evbuf_hdr: Header of received events
+ *
+ * Process Store Data events and complete listeners with matching event IDs.
+ */
+static void sclp_sd_receiver(struct evbuf_header *evbuf_hdr)
+{
+	struct sclp_sd_evbuf *evbuf = (struct sclp_sd_evbuf *) evbuf_hdr;
+	struct sclp_sd_listener *listener;
+	int found = 0;
+
+	pr_debug("received event (id=0x%08x)\n", evbuf->id);
+	spin_lock(&sclp_sd_queue_lock);
+	list_for_each_entry(listener, &sclp_sd_queue, list) {
+		if (listener->id != evbuf->id)
+			continue;
+
+		listener->evbuf = *evbuf;
+		complete(&listener->completion);
+		found = 1;
+		break;
+	}
+	spin_unlock(&sclp_sd_queue_lock);
+
+	if (!found)
+		pr_debug("unsolicited event (id=0x%08x)\n", evbuf->id);
+}
+
+static struct sclp_register sclp_sd_register = {
+	.send_mask = EVTYP_STORE_DATA_MASK,
+	.receive_mask = EVTYP_STORE_DATA_MASK,
+	.receiver_fn = sclp_sd_receiver,
+};
+
+/**
+ * sclp_sd_sync() - Perform Store Data request synchronously
+ * @page: Address of work page - must be below 2GB
+ * @eq: Input EQ value
+ * @di: Input DI value
+ * @sat: Input SAT value
+ * @sa: Input SA value used to specify the address of the target buffer
+ * @dsize_ptr: Optional pointer to input and output DSIZE value
+ * @esize_ptr: Optional pointer to output ESIZE value
+ *
+ * Perform Store Data request with specified parameters and wait for completion.
+ *
+ * Return %0 on success and store resulting DSIZE and ESIZE values in
+ * @dsize_ptr and @esize_ptr (if provided). Return non-zero on error.
+ */
+static int sclp_sd_sync(unsigned long page, u8 eq, u8 di, u64 sat, u64 sa,
+			u32 *dsize_ptr, u32 *esize_ptr)
+{
+	struct sclp_sd_sccb *sccb = (void *) page;
+	struct sclp_sd_listener listener;
+	struct sclp_sd_evbuf *evbuf;
+	int rc;
+
+	sclp_sd_listener_init(&listener, (u32) (addr_t) sccb);
+	sclp_sd_listener_add(&listener);
+
+	/* Prepare SCCB */
+	memset(sccb, 0, PAGE_SIZE);
+	sccb->hdr.length = sizeof(sccb->hdr) + sizeof(sccb->evbuf);
+	evbuf = &sccb->evbuf;
+	evbuf->hdr.length = sizeof(*evbuf);
+	evbuf->hdr.type = EVTYP_STORE_DATA;
+	evbuf->eq = eq;
+	evbuf->di = di;
+	evbuf->id = listener.id;
+	evbuf->fmt = 1;
+	evbuf->sat = sat;
+	evbuf->sa = sa;
+	if (dsize_ptr)
+		evbuf->dsize = *dsize_ptr;
+
+	/* Perform command */
+	pr_debug("request (eq=%d, di=%d, id=0x%08x)\n", eq, di, listener.id);
+	rc = sclp_sync_request(SCLP_CMDW_WRITE_EVENT_DATA, sccb);
+	pr_debug("request done (rc=%d)\n", rc);
+	if (rc)
+		goto out;
+
+	/* Evaluate response */
+	if (sccb->hdr.response_code == 0x73f0) {
+		pr_debug("event not supported\n");
+		rc = -EIO;
+		goto out_remove;
+	}
+	if (sccb->hdr.response_code != 0x0020 || !(evbuf->hdr.flags & 0x80)) {
+		rc = -EIO;
+		goto out;
+	}
+	if (!(evbuf->rflags & 0x80)) {
+		rc = wait_for_completion_interruptible(&listener.completion);
+		if (rc)
+			goto out;
+		evbuf = &listener.evbuf;
+	}
+	switch (evbuf->status) {
+	case 0:
+		if (dsize_ptr)
+			*dsize_ptr = evbuf->dsize;
+		if (esize_ptr)
+			*esize_ptr = evbuf->esize;
+		pr_debug("success (dsize=%u, esize=%u)\n", evbuf->dsize,
+			 evbuf->esize);
+		break;
+	case 3:
+		rc = -ENOENT;
+		break;
+	default:
+		rc = -EIO;
+		break;
+
+	}
+
+out:
+	if (rc && rc != -ENOENT) {
+		/* Provide some information about what went wrong */
+		pr_warn("Store Data request failed (eq=%d, di=%d, "
+			"response=0x%04x, flags=0x%02x, status=%d, rc=%d)\n",
+			eq, di, sccb->hdr.response_code, evbuf->hdr.flags,
+			evbuf->status, rc);
+	}
+
+out_remove:
+	sclp_sd_listener_remove(&listener);
+
+	return rc;
+}
+
+/**
+ * sclp_sd_store_data() - Obtain data for specified Store Data entity
+ * @result: Resulting data
+ * @di: DI value associated with this entity
+ *
+ * Perform a series of Store Data requests to obtain the size and contents of
+ * the specified Store Data entity.
+ *
+ * Return:
+ *   %0:       Success - result is stored in @result. @result->data must be
+ *	       released using vfree() after use.
+ *   %-ENOENT: No data available for this entity
+ *   %<0:      Other error
+ */
+static int sclp_sd_store_data(struct sclp_sd_data *result, u8 di)
+{
+	u32 dsize = 0, esize = 0;
+	unsigned long page, asce = 0;
+	void *data = NULL;
+	int rc;
+
+	page = __get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!page)
+		return -ENOMEM;
+
+	/* Get size */
+	rc = sclp_sd_sync(page, SD_EQ_SIZE, di, 0, 0, &dsize, &esize);
+	if (rc)
+		goto out;
+	if (dsize == 0)
+		goto out_result;
+
+	/* Allocate memory */
+	data = vzalloc((size_t) dsize * PAGE_SIZE);
+	if (!data) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Get translation table for buffer */
+	asce = base_asce_alloc((unsigned long) data, dsize);
+	if (!asce) {
+		vfree(data);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Get data */
+	rc = sclp_sd_sync(page, SD_EQ_STORE_DATA, di, asce, (u64) data, &dsize,
+			  &esize);
+	if (rc) {
+		/* Cancel running request if interrupted */
+		if (rc == -ERESTARTSYS)
+			sclp_sd_sync(page, SD_EQ_HALT, di, 0, 0, NULL, NULL);
+		vfree(data);
+		goto out;
+	}
+
+out_result:
+	result->esize_bytes = (size_t) esize * PAGE_SIZE;
+	result->dsize_bytes = (size_t) dsize * PAGE_SIZE;
+	result->data = data;
+
+out:
+	base_asce_free(asce);
+	free_page(page);
+
+	return rc;
+}
+
+/**
+ * sclp_sd_data_reset() - Reset Store Data result buffer
+ * @data: Data buffer to reset
+ *
+ * Reset @data to initial state and release associated memory.
+ */
+static void sclp_sd_data_reset(struct sclp_sd_data *data)
+{
+	vfree(data->data);
+	data->data = NULL;
+	data->dsize_bytes = 0;
+	data->esize_bytes = 0;
+}
+
+/**
+ * sclp_sd_file_release() - Release function for sclp_sd_file object
+ * @kobj: Kobject embedded in sclp_sd_file object
+ */
+static void sclp_sd_file_release(struct kobject *kobj)
+{
+	struct sclp_sd_file *sd_file = to_sd_file(kobj);
+
+	sclp_sd_data_reset(&sd_file->data);
+	kfree(sd_file);
+}
+
+/**
+ * sclp_sd_file_update() - Update contents of sclp_sd_file object
+ * @sd_file: Object to update
+ *
+ * Obtain the current version of data associated with the Store Data entity
+ * @sd_file.
+ *
+ * On success, return %0 and generate a KOBJ_CHANGE event to indicate that the
+ * data may have changed. Return non-zero otherwise.
+ */
+static int sclp_sd_file_update(struct sclp_sd_file *sd_file)
+{
+	const char *name = kobject_name(&sd_file->kobj);
+	struct sclp_sd_data data;
+	int rc;
+
+	rc = sclp_sd_store_data(&data, sd_file->di);
+	if (rc) {
+		if (rc == -ENOENT) {
+			pr_info("No data is available for the %s data entity\n",
+				 name);
+		}
+		return rc;
+	}
+
+	mutex_lock(&sd_file->data_mutex);
+	sclp_sd_data_reset(&sd_file->data);
+	sd_file->data = data;
+	mutex_unlock(&sd_file->data_mutex);
+
+	pr_info("A %zu-byte %s data entity was retrieved\n", data.dsize_bytes,
+		name);
+	kobject_uevent(&sd_file->kobj, KOBJ_CHANGE);
+
+	return 0;
+}
+
+/**
+ * sclp_sd_file_update_async() - Wrapper for asynchronous update call
+ * @data: Object to update
+ */
+static void sclp_sd_file_update_async(void *data, async_cookie_t cookie)
+{
+	struct sclp_sd_file *sd_file = data;
+
+	sclp_sd_file_update(sd_file);
+}
+
+/**
+ * reload_store() - Store function for "reload" sysfs attribute
+ * @kobj: Kobject of sclp_sd_file object
+ *
+ * Initiate a reload of the data associated with an sclp_sd_file object.
+ */
+static ssize_t reload_store(struct kobject *kobj, struct kobj_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct sclp_sd_file *sd_file = to_sd_file(kobj);
+
+	sclp_sd_file_update(sd_file);
+
+	return count;
+}
+
+static struct kobj_attribute reload_attr = __ATTR_WO(reload);
+
+static struct attribute *sclp_sd_file_default_attrs[] = {
+	&reload_attr.attr,
+	NULL,
+};
+
+static struct kobj_type sclp_sd_file_ktype = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.release = sclp_sd_file_release,
+	.default_attrs = sclp_sd_file_default_attrs,
+};
+
+/**
+ * data_read() - Read function for "read" sysfs attribute
+ * @kobj: Kobject of sclp_sd_file object
+ * @buffer: Target buffer
+ * @off: Requested file offset
+ * @size: Requested number of bytes
+ *
+ * Store the requested portion of the Store Data entity contents into the
+ * specified buffer. Return the number of bytes stored on success, or %0
+ * on EOF.
+ */
+static ssize_t data_read(struct file *file, struct kobject *kobj,
+			 struct bin_attribute *attr, char *buffer,
+			 loff_t off, size_t size)
+{
+	struct sclp_sd_file *sd_file = to_sd_file(kobj);
+	size_t data_size;
+	char *data;
+
+	mutex_lock(&sd_file->data_mutex);
+
+	data = sd_file->data.data;
+	data_size = sd_file->data.dsize_bytes;
+	if (!data || off >= data_size) {
+		size = 0;
+	} else {
+		if (off + size > data_size)
+			size = data_size - off;
+		memcpy(buffer, data + off, size);
+	}
+
+	mutex_unlock(&sd_file->data_mutex);
+
+	return size;
+}
+
+/**
+ * sclp_sd_file_create() - Add a sysfs file representing a Store Data entity
+ * @name: Name of file
+ * @di: DI value associated with this entity
+ *
+ * Create a sysfs directory with the given @name located under
+ *
+ *   /sys/firmware/sclp_sd/
+ *
+ * The files in this directory can be used to access the contents of the Store
+ * Data entity associated with @DI.
+ *
+ * Return pointer to resulting sclp_sd_file object on success, %NULL otherwise.
+ * The object must be freed by calling kobject_put() on the embedded kobject
+ * pointer after use.
+ */
+static __init struct sclp_sd_file *sclp_sd_file_create(const char *name, u8 di)
+{
+	struct sclp_sd_file *sd_file;
+	int rc;
+
+	sd_file = kzalloc(sizeof(*sd_file), GFP_KERNEL);
+	if (!sd_file)
+		return NULL;
+	sd_file->di = di;
+	mutex_init(&sd_file->data_mutex);
+
+	/* Create kobject located under /sys/firmware/sclp_sd/ */
+	sd_file->kobj.kset = sclp_sd_kset;
+	rc = kobject_init_and_add(&sd_file->kobj, &sclp_sd_file_ktype, NULL,
+				  "%s", name);
+	if (rc) {
+		kobject_put(&sd_file->kobj);
+		return NULL;
+	}
+
+	sysfs_bin_attr_init(&sd_file->data_attr);
+	sd_file->data_attr.attr.name = "data";
+	sd_file->data_attr.attr.mode = 0444;
+	sd_file->data_attr.read = data_read;
+
+	rc = sysfs_create_bin_file(&sd_file->kobj, &sd_file->data_attr);
+	if (rc) {
+		kobject_put(&sd_file->kobj);
+		return NULL;
+	}
+
+	/*
+	 * For completeness only - users interested in entity data should listen
+	 * for KOBJ_CHANGE instead.
+	 */
+	kobject_uevent(&sd_file->kobj, KOBJ_ADD);
+
+	/* Don't let a slow Store Data request delay further initialization */
+	async_schedule(sclp_sd_file_update_async, sd_file);
+
+	return sd_file;
+}
+
+/**
+ * sclp_sd_init() - Initialize sclp_sd support and register sysfs files
+ */
+static __init int sclp_sd_init(void)
+{
+	int rc;
+
+	rc = sclp_register(&sclp_sd_register);
+	if (rc)
+		return rc;
+
+	/* Create kset named "sclp_sd" located under /sys/firmware/ */
+	rc = -ENOMEM;
+	sclp_sd_kset = kset_create_and_add("sclp_sd", NULL, firmware_kobj);
+	if (!sclp_sd_kset)
+		goto err_kset;
+
+	rc = -EINVAL;
+	config_file = sclp_sd_file_create("config", SD_DI_CONFIG);
+	if (!config_file)
+		goto err_config;
+
+	return 0;
+
+err_config:
+	kset_unregister(sclp_sd_kset);
+err_kset:
+	sclp_unregister(&sclp_sd_register);
+
+	return rc;
+}
+device_initcall(sclp_sd_init);
diff --git a/drivers/s390/char/sclp_tty.c b/drivers/s390/char/sclp_tty.c
index 9f7b87d6d434..5aff8b684eb2 100644
--- a/drivers/s390/char/sclp_tty.c
+++ b/drivers/s390/char/sclp_tty.c
@@ -502,7 +502,10 @@ sclp_tty_init(void)
 	int i;
 	int rc;
 
-	if (!CONSOLE_IS_SCLP)
+	/* z/VM multiplexes the line mode output on the 32xx screen */
+	if (MACHINE_IS_VM && !CONSOLE_IS_SCLP)
+		return 0;
+	if (!sclp.has_linemode)
 		return 0;
 	driver = alloc_tty_driver(1);
 	if (!driver)
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index bfec1485ca23..5535312602af 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -323,6 +323,9 @@ int ccwgroup_create_dev(struct device *parent, struct ccwgroup_driver *gdrv,
 	struct ccw_dev_id dev_id;
 	int rc, i;
 
+	if (num_devices < 1)
+		return -EINVAL;
+
 	gdev = kzalloc(sizeof(*gdev) + num_devices * sizeof(gdev->cdev[0]),
 		       GFP_KERNEL);
 	if (!gdev)
@@ -375,7 +378,7 @@ int ccwgroup_create_dev(struct device *parent, struct ccwgroup_driver *gdrv,
 		goto error;
 	}
 	/* Check if the devices are bound to the required ccw driver. */
-	if (gdev->count && gdrv && gdrv->ccw_driver &&
+	if (gdrv && gdrv->ccw_driver &&
 	    gdev->cdev[0]->drv != gdrv->ccw_driver) {
 		rc = -EINVAL;
 		goto error;
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index f95b452b8bbc..afbdee74147d 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -384,6 +384,28 @@ static ssize_t chp_chid_external_show(struct device *dev,
 }
 static DEVICE_ATTR(chid_external, 0444, chp_chid_external_show, NULL);
 
+static ssize_t util_string_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+	struct channel_path *chp = to_channelpath(kobj_to_dev(kobj));
+	ssize_t rc;
+
+	mutex_lock(&chp->lock);
+	rc = memory_read_from_buffer(buf, count, &off, chp->desc_fmt3.util_str,
+				     sizeof(chp->desc_fmt3.util_str));
+	mutex_unlock(&chp->lock);
+
+	return rc;
+}
+static BIN_ATTR_RO(util_string,
+		   sizeof(((struct channel_path_desc_fmt3 *)0)->util_str));
+
+static struct bin_attribute *chp_bin_attrs[] = {
+	&bin_attr_util_string,
+	NULL,
+};
+
 static struct attribute *chp_attrs[] = {
 	&dev_attr_status.attr,
 	&dev_attr_configure.attr,
@@ -396,6 +418,7 @@ static struct attribute *chp_attrs[] = {
 };
 static struct attribute_group chp_attr_group = {
 	.attrs = chp_attrs,
+	.bin_attrs = chp_bin_attrs,
 };
 static const struct attribute_group *chp_attr_groups[] = {
 	&chp_attr_group,
@@ -422,7 +445,7 @@ int chp_update_desc(struct channel_path *chp)
 {
 	int rc;
 
-	rc = chsc_determine_base_channel_path_desc(chp->chpid, &chp->desc);
+	rc = chsc_determine_fmt0_channel_path_desc(chp->chpid, &chp->desc);
 	if (rc)
 		return rc;
 
@@ -431,6 +454,7 @@ int chp_update_desc(struct channel_path *chp)
 	 * hypervisors implement the required chsc commands.
 	 */
 	chsc_determine_fmt1_channel_path_desc(chp->chpid, &chp->desc_fmt1);
+	chsc_determine_fmt3_channel_path_desc(chp->chpid, &chp->desc_fmt3);
 	chsc_get_channel_measurement_chars(chp);
 
 	return 0;
@@ -506,20 +530,20 @@ out:
  * On success return a newly allocated copy of the channel-path description
  * data associated with the given channel-path ID. Return %NULL on error.
  */
-struct channel_path_desc *chp_get_chp_desc(struct chp_id chpid)
+struct channel_path_desc_fmt0 *chp_get_chp_desc(struct chp_id chpid)
 {
 	struct channel_path *chp;
-	struct channel_path_desc *desc;
+	struct channel_path_desc_fmt0 *desc;
 
 	chp = chpid_to_chp(chpid);
 	if (!chp)
 		return NULL;
-	desc = kmalloc(sizeof(struct channel_path_desc), GFP_KERNEL);
+	desc = kmalloc(sizeof(*desc), GFP_KERNEL);
 	if (!desc)
 		return NULL;
 
 	mutex_lock(&chp->lock);
-	memcpy(desc, &chp->desc, sizeof(struct channel_path_desc));
+	memcpy(desc, &chp->desc, sizeof(*desc));
 	mutex_unlock(&chp->lock);
 	return desc;
 }
diff --git a/drivers/s390/cio/chp.h b/drivers/s390/cio/chp.h
index 7e80323cd261..20259f3fbf45 100644
--- a/drivers/s390/cio/chp.h
+++ b/drivers/s390/cio/chp.h
@@ -44,8 +44,9 @@ struct channel_path {
 	struct chp_id chpid;
 	struct mutex lock; /* Serialize access to below members. */
 	int state;
-	struct channel_path_desc desc;
+	struct channel_path_desc_fmt0 desc;
 	struct channel_path_desc_fmt1 desc_fmt1;
+	struct channel_path_desc_fmt3 desc_fmt3;
 	/* Channel-measurement related stuff: */
 	int cmg;
 	int shared;
@@ -61,7 +62,7 @@ static inline struct channel_path *chpid_to_chp(struct chp_id chpid)
 int chp_get_status(struct chp_id chpid);
 u8 chp_get_sch_opm(struct subchannel *sch);
 int chp_is_registered(struct chp_id chpid);
-struct channel_path_desc *chp_get_chp_desc(struct chp_id chpid);
+struct channel_path_desc_fmt0 *chp_get_chp_desc(struct chp_id chpid);
 void chp_remove_cmg_attr(struct channel_path *chp);
 int chp_add_cmg_attr(struct channel_path *chp);
 int chp_update_desc(struct channel_path *chp);
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index c08fc5a8df0c..6652a49a49b1 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -915,6 +915,8 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt,
 		return -EINVAL;
 	if ((rfmt == 2) && !css_general_characteristics.cib)
 		return -EINVAL;
+	if ((rfmt == 3) && !css_general_characteristics.util_str)
+		return -EINVAL;
 
 	memset(page, 0, PAGE_SIZE);
 	scpd_area = page;
@@ -940,43 +942,30 @@ int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt,
 }
 EXPORT_SYMBOL_GPL(chsc_determine_channel_path_desc);
 
-int chsc_determine_base_channel_path_desc(struct chp_id chpid,
-					  struct channel_path_desc *desc)
-{
-	struct chsc_scpd *scpd_area;
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&chsc_page_lock, flags);
-	scpd_area = chsc_page;
-	ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, scpd_area);
-	if (ret)
-		goto out;
-
-	memcpy(desc, scpd_area->data, sizeof(*desc));
-out:
-	spin_unlock_irqrestore(&chsc_page_lock, flags);
-	return ret;
+#define chsc_det_chp_desc(FMT, c)					\
+int chsc_determine_fmt##FMT##_channel_path_desc(			\
+	struct chp_id chpid, struct channel_path_desc_fmt##FMT *desc)	\
+{									\
+	struct chsc_scpd *scpd_area;					\
+	unsigned long flags;						\
+	int ret;							\
+									\
+	spin_lock_irqsave(&chsc_page_lock, flags);			\
+	scpd_area = chsc_page;						\
+	ret = chsc_determine_channel_path_desc(chpid, 0, FMT, c, 0,	\
+					       scpd_area);		\
+	if (ret)							\
+		goto out;						\
+									\
+	memcpy(desc, scpd_area->data, sizeof(*desc));			\
+out:									\
+	spin_unlock_irqrestore(&chsc_page_lock, flags);			\
+	return ret;							\
 }
 
-int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid,
-					  struct channel_path_desc_fmt1 *desc)
-{
-	struct chsc_scpd *scpd_area;
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&chsc_page_lock, flags);
-	scpd_area = chsc_page;
-	ret = chsc_determine_channel_path_desc(chpid, 0, 1, 1, 0, scpd_area);
-	if (ret)
-		goto out;
-
-	memcpy(desc, scpd_area->data, sizeof(*desc));
-out:
-	spin_unlock_irqrestore(&chsc_page_lock, flags);
-	return ret;
-}
+chsc_det_chp_desc(0, 0)
+chsc_det_chp_desc(1, 1)
+chsc_det_chp_desc(3, 0)
 
 static void
 chsc_initialize_cmg_chars(struct channel_path *chp, u8 cmcv,
diff --git a/drivers/s390/cio/chsc.h b/drivers/s390/cio/chsc.h
index dda5953534b7..5c9f0dd33f4e 100644
--- a/drivers/s390/cio/chsc.h
+++ b/drivers/s390/cio/chsc.h
@@ -40,6 +40,11 @@ struct channel_path_desc_fmt1 {
 	u32 zeros[2];
 } __attribute__ ((packed));
 
+struct channel_path_desc_fmt3 {
+	struct channel_path_desc_fmt1 fmt1_desc;
+	u8 util_str[64];
+};
+
 struct channel_path;
 
 struct css_chsc_char {
@@ -147,10 +152,12 @@ int __chsc_do_secm(struct channel_subsystem *css, int enable);
 int chsc_chp_vary(struct chp_id chpid, int on);
 int chsc_determine_channel_path_desc(struct chp_id chpid, int fmt, int rfmt,
 				     int c, int m, void *page);
-int chsc_determine_base_channel_path_desc(struct chp_id chpid,
-					  struct channel_path_desc *desc);
+int chsc_determine_fmt0_channel_path_desc(struct chp_id chpid,
+					  struct channel_path_desc_fmt0 *desc);
 int chsc_determine_fmt1_channel_path_desc(struct chp_id chpid,
 					  struct channel_path_desc_fmt1 *desc);
+int chsc_determine_fmt3_channel_path_desc(struct chp_id chpid,
+					  struct channel_path_desc_fmt3 *desc);
 void chsc_chp_online(struct chp_id chpid);
 void chsc_chp_offline(struct chp_id chpid);
 int chsc_get_channel_measurement_chars(struct channel_path *chp);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 6886b3d34cf8..5130d7c67239 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -25,7 +25,6 @@
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/setup.h>
-#include <asm/reset.h>
 #include <asm/ipl.h>
 #include <asm/chpid.h>
 #include <asm/airq.h>
@@ -767,262 +766,6 @@ void cio_register_early_subchannels(void)
 }
 #endif /* CONFIG_CCW_CONSOLE */
 
-static int
-__disable_subchannel_easy(struct subchannel_id schid, struct schib *schib)
-{
-	int retry, cc;
-
-	cc = 0;
-	for (retry=0;retry<3;retry++) {
-		schib->pmcw.ena = 0;
-		cc = msch(schid, schib);
-		if (cc)
-			return (cc==3?-ENODEV:-EBUSY);
-		if (stsch(schid, schib) || !css_sch_is_valid(schib))
-			return -ENODEV;
-		if (!schib->pmcw.ena)
-			return 0;
-	}
-	return -EBUSY; /* uhm... */
-}
-
-static int
-__clear_io_subchannel_easy(struct subchannel_id schid)
-{
-	int retry;
-
-	if (csch(schid))
-		return -ENODEV;
-	for (retry=0;retry<20;retry++) {
-		struct tpi_info ti;
-
-		if (tpi(&ti)) {
-			tsch(ti.schid, this_cpu_ptr(&cio_irb));
-			if (schid_equal(&ti.schid, &schid))
-				return 0;
-		}
-		udelay_simple(100);
-	}
-	return -EBUSY;
-}
-
-static void __clear_chsc_subchannel_easy(void)
-{
-	/* It seems we can only wait for a bit here :/ */
-	udelay_simple(100);
-}
-
-static int pgm_check_occured;
-
-static void cio_reset_pgm_check_handler(void)
-{
-	pgm_check_occured = 1;
-}
-
-static int stsch_reset(struct subchannel_id schid, struct schib *addr)
-{
-	int rc;
-
-	pgm_check_occured = 0;
-	s390_base_pgm_handler_fn = cio_reset_pgm_check_handler;
-	rc = stsch(schid, addr);
-	s390_base_pgm_handler_fn = NULL;
-
-	/* The program check handler could have changed pgm_check_occured. */
-	barrier();
-
-	if (pgm_check_occured)
-		return -EIO;
-	else
-		return rc;
-}
-
-static int __shutdown_subchannel_easy(struct subchannel_id schid, void *data)
-{
-	struct schib schib;
-
-	if (stsch_reset(schid, &schib))
-		return -ENXIO;
-	if (!schib.pmcw.ena)
-		return 0;
-	switch(__disable_subchannel_easy(schid, &schib)) {
-	case 0:
-	case -ENODEV:
-		break;
-	default: /* -EBUSY */
-		switch (schib.pmcw.st) {
-		case SUBCHANNEL_TYPE_IO:
-			if (__clear_io_subchannel_easy(schid))
-				goto out; /* give up... */
-			break;
-		case SUBCHANNEL_TYPE_CHSC:
-			__clear_chsc_subchannel_easy();
-			break;
-		default:
-			/* No default clear strategy */
-			break;
-		}
-		stsch(schid, &schib);
-		__disable_subchannel_easy(schid, &schib);
-	}
-out:
-	return 0;
-}
-
-static atomic_t chpid_reset_count;
-
-static void s390_reset_chpids_mcck_handler(void)
-{
-	struct crw crw;
-	union mci mci;
-
-	/* Check for pending channel report word. */
-	mci.val = S390_lowcore.mcck_interruption_code;
-	if (!mci.cp)
-		return;
-	/* Process channel report words. */
-	while (stcrw(&crw) == 0) {
-		/* Check for responses to RCHP. */
-		if (crw.slct && crw.rsc == CRW_RSC_CPATH)
-			atomic_dec(&chpid_reset_count);
-	}
-}
-
-#define RCHP_TIMEOUT (30 * USEC_PER_SEC)
-static void css_reset(void)
-{
-	int i, ret;
-	unsigned long long timeout;
-	struct chp_id chpid;
-
-	/* Reset subchannels. */
-	for_each_subchannel(__shutdown_subchannel_easy,  NULL);
-	/* Reset channel paths. */
-	s390_base_mcck_handler_fn = s390_reset_chpids_mcck_handler;
-	/* Enable channel report machine checks. */
-	__ctl_set_bit(14, 28);
-	/* Temporarily reenable machine checks. */
-	local_mcck_enable();
-	chp_id_init(&chpid);
-	for (i = 0; i <= __MAX_CHPID; i++) {
-		chpid.id = i;
-		ret = rchp(chpid);
-		if ((ret == 0) || (ret == 2))
-			/*
-			 * rchp either succeeded, or another rchp is already
-			 * in progress. In either case, we'll get a crw.
-			 */
-			atomic_inc(&chpid_reset_count);
-	}
-	/* Wait for machine check for all channel paths. */
-	timeout = get_tod_clock_fast() + (RCHP_TIMEOUT << 12);
-	while (atomic_read(&chpid_reset_count) != 0) {
-		if (get_tod_clock_fast() > timeout)
-			break;
-		cpu_relax();
-	}
-	/* Disable machine checks again. */
-	local_mcck_disable();
-	/* Disable channel report machine checks. */
-	__ctl_clear_bit(14, 28);
-	s390_base_mcck_handler_fn = NULL;
-}
-
-static struct reset_call css_reset_call = {
-	.fn = css_reset,
-};
-
-static int __init init_css_reset_call(void)
-{
-	atomic_set(&chpid_reset_count, 0);
-	register_reset_call(&css_reset_call);
-	return 0;
-}
-
-arch_initcall(init_css_reset_call);
-
-struct sch_match_id {
-	struct subchannel_id schid;
-	struct ccw_dev_id devid;
-	int rc;
-};
-
-static int __reipl_subchannel_match(struct subchannel_id schid, void *data)
-{
-	struct schib schib;
-	struct sch_match_id *match_id = data;
-
-	if (stsch_reset(schid, &schib))
-		return -ENXIO;
-	if ((schib.pmcw.st == SUBCHANNEL_TYPE_IO) && schib.pmcw.dnv &&
-	    (schib.pmcw.dev == match_id->devid.devno) &&
-	    (schid.ssid == match_id->devid.ssid)) {
-		match_id->schid = schid;
-		match_id->rc = 0;
-		return 1;
-	}
-	return 0;
-}
-
-static int reipl_find_schid(struct ccw_dev_id *devid,
-			    struct subchannel_id *schid)
-{
-	struct sch_match_id match_id;
-
-	match_id.devid = *devid;
-	match_id.rc = -ENODEV;
-	for_each_subchannel(__reipl_subchannel_match, &match_id);
-	if (match_id.rc == 0)
-		*schid = match_id.schid;
-	return match_id.rc;
-}
-
-extern void do_reipl_asm(__u32 schid);
-
-/* Make sure all subchannels are quiet before we re-ipl an lpar. */
-void reipl_ccw_dev(struct ccw_dev_id *devid)
-{
-	struct subchannel_id uninitialized_var(schid);
-
-	s390_reset_system();
-	if (reipl_find_schid(devid, &schid) != 0)
-		panic("IPL Device not found\n");
-	do_reipl_asm(*((__u32*)&schid));
-}
-
-int __init cio_get_iplinfo(struct cio_iplinfo *iplinfo)
-{
-	static struct chsc_sda_area sda_area __initdata;
-	struct subchannel_id schid;
-	struct schib schib;
-
-	schid = *(struct subchannel_id *)&S390_lowcore.subchannel_id;
-	if (!schid.one)
-		return -ENODEV;
-
-	if (schid.ssid) {
-		/*
-		 * Firmware should have already enabled MSS but whoever started
-		 * the kernel might have initiated a channel subsystem reset.
-		 * Ensure that MSS is enabled.
-		 */
-		memset(&sda_area, 0, sizeof(sda_area));
-		if (__chsc_enable_facility(&sda_area, CHSC_SDA_OC_MSS))
-			return -ENODEV;
-	}
-	if (stsch(schid, &schib))
-		return -ENODEV;
-	if (schib.pmcw.st != SUBCHANNEL_TYPE_IO)
-		return -ENODEV;
-	if (!schib.pmcw.dnv)
-		return -ENODEV;
-
-	iplinfo->ssid = schid.ssid;
-	iplinfo->devno = schib.pmcw.dev;
-	iplinfo->is_qdio = schib.pmcw.qf;
-	return 0;
-}
-
 /**
  * cio_tm_start_key - perform start function
  * @sch: subchannel on which to perform the start function
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index f50ea035aa9b..1540229a37bb 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1073,8 +1073,7 @@ out_schedule:
 	return 0;
 }
 
-static int
-io_subchannel_remove (struct subchannel *sch)
+static int io_subchannel_remove(struct subchannel *sch)
 {
 	struct io_subchannel_private *io_priv = to_io_private(sch);
 	struct ccw_device *cdev;
@@ -1082,14 +1081,12 @@ io_subchannel_remove (struct subchannel *sch)
 	cdev = sch_get_cdev(sch);
 	if (!cdev)
 		goto out_free;
-	io_subchannel_quiesce(sch);
-	/* Set ccw device to not operational and drop reference. */
-	spin_lock_irq(cdev->ccwlock);
+
+	ccw_device_unregister(cdev);
+	spin_lock_irq(sch->lock);
 	sch_set_cdev(sch, NULL);
 	set_io_private(sch, NULL);
-	cdev->private->state = DEV_STATE_NOT_OPER;
-	spin_unlock_irq(cdev->ccwlock);
-	ccw_device_unregister(cdev);
+	spin_unlock_irq(sch->lock);
 out_free:
 	kfree(io_priv);
 	sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
@@ -1721,6 +1718,7 @@ static int ccw_device_remove(struct device *dev)
 {
 	struct ccw_device *cdev = to_ccwdev(dev);
 	struct ccw_driver *cdrv = cdev->drv;
+	struct subchannel *sch;
 	int ret;
 
 	if (cdrv->remove)
@@ -1746,7 +1744,9 @@ static int ccw_device_remove(struct device *dev)
 	ccw_device_set_timeout(cdev, 0);
 	cdev->drv = NULL;
 	cdev->private->int_class = IRQIO_CIO;
+	sch = to_subchannel(cdev->dev.parent);
 	spin_unlock_irq(cdev->ccwlock);
+	io_subchannel_quiesce(sch);
 	__disable_cmf(cdev);
 
 	return 0;
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 75ce12a24dc2..aecfebb74157 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -460,8 +460,8 @@ __u8 ccw_device_get_path_mask(struct ccw_device *cdev)
  * On success return a newly allocated copy of the channel-path description
  * data associated with the given channel path. Return %NULL on error.
  */
-struct channel_path_desc *ccw_device_get_chp_desc(struct ccw_device *cdev,
-						  int chp_idx)
+struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *cdev,
+						       int chp_idx)
 {
 	struct subchannel *sch;
 	struct chp_id chpid;
diff --git a/drivers/s390/cio/ioasm.c b/drivers/s390/cio/ioasm.c
index 4fa9ee1d09fa..14d328338ce2 100644
--- a/drivers/s390/cio/ioasm.c
+++ b/drivers/s390/cio/ioasm.c
@@ -183,30 +183,6 @@ int chsc(void *chsc_area)
 }
 EXPORT_SYMBOL(chsc);
 
-static inline int __rchp(struct chp_id chpid)
-{
-	register struct chp_id reg1 asm ("1") = chpid;
-	int ccode;
-
-	asm volatile(
-		"	lr	1,%1\n"
-		"	rchp\n"
-		"	ipm	%0\n"
-		"	srl	%0,28"
-		: "=d" (ccode) : "d" (reg1) : "cc");
-	return ccode;
-}
-
-int rchp(struct chp_id chpid)
-{
-	int ccode;
-
-	ccode = __rchp(chpid);
-	trace_s390_cio_rchp(chpid, ccode);
-
-	return ccode;
-}
-
 static inline int __rsch(struct subchannel_id schid)
 {
 	register struct subchannel_id reg1 asm("1") = schid;
diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h
index 35ad4ddd61e0..4be539cb9adc 100644
--- a/drivers/s390/cio/ioasm.h
+++ b/drivers/s390/cio/ioasm.h
@@ -20,7 +20,6 @@ int ssch(struct subchannel_id schid, union orb *addr);
 int csch(struct subchannel_id schid);
 int tpi(struct tpi_info *addr);
 int chsc(void *chsc_area);
-int rchp(struct chp_id chpid);
 int rsch(struct subchannel_id schid);
 int hsch(struct subchannel_id schid);
 int xsch(struct subchannel_id schid);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index d5b02de02a3a..f4ca72dd862f 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -98,22 +98,6 @@ static inline int do_siga_output(unsigned long schid, unsigned long mask,
 	return cc;
 }
 
-static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
-{
-	/* all done or next buffer state different */
-	if (ccq == 0 || ccq == 32)
-		return 0;
-	/* no buffer processed */
-	if (ccq == 97)
-		return 1;
-	/* not all buffers processed */
-	if (ccq == 96)
-		return 2;
-	/* notify devices immediately */
-	DBF_ERROR("%4x ccq:%3d", SCH_NO(q), ccq);
-	return -EIO;
-}
-
 /**
  * qdio_do_eqbs - extract buffer states for QEBSM
  * @q: queue to manipulate
@@ -128,7 +112,7 @@ static inline int qdio_check_ccq(struct qdio_q *q, unsigned int ccq)
 static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
 			int start, int count, int auto_ack)
 {
-	int rc, tmp_count = count, tmp_start = start, nr = q->nr, retried = 0;
+	int tmp_count = count, tmp_start = start, nr = q->nr;
 	unsigned int ccq = 0;
 
 	qperf_inc(q, eqbs);
@@ -138,34 +122,30 @@ static int qdio_do_eqbs(struct qdio_q *q, unsigned char *state,
 again:
 	ccq = do_eqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count,
 		      auto_ack);
-	rc = qdio_check_ccq(q, ccq);
-	if (!rc)
-		return count - tmp_count;
 
-	if (rc == 1) {
-		DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS again:%2d", ccq);
-		goto again;
-	}
-
-	if (rc == 2) {
+	switch (ccq) {
+	case 0:
+	case 32:
+		/* all done, or next buffer state different */
+		return count - tmp_count;
+	case 96:
+		/* not all buffers processed */
 		qperf_inc(q, eqbs_partial);
 		DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS part:%02x",
 			tmp_count);
-		/*
-		 * Retry once, if that fails bail out and process the
-		 * extracted buffers before trying again.
-		 */
-		if (!retried++)
-			goto again;
-		else
-			return count - tmp_count;
+		return count - tmp_count;
+	case 97:
+		/* no buffer processed */
+		DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "EQBS again:%2d", ccq);
+		goto again;
+	default:
+		DBF_ERROR("%4x ccq:%3d", SCH_NO(q), ccq);
+		DBF_ERROR("%4x EQBS ERROR", SCH_NO(q));
+		DBF_ERROR("%3d%3d%2d", count, tmp_count, nr);
+		q->handler(q->irq_ptr->cdev, QDIO_ERROR_GET_BUF_STATE, q->nr,
+			   q->first_to_kick, count, q->irq_ptr->int_parm);
+		return 0;
 	}
-
-	DBF_ERROR("%4x EQBS ERROR", SCH_NO(q));
-	DBF_ERROR("%3d%3d%2d", count, tmp_count, nr);
-	q->handler(q->irq_ptr->cdev, QDIO_ERROR_GET_BUF_STATE,
-		   q->nr, q->first_to_kick, count, q->irq_ptr->int_parm);
-	return 0;
 }
 
 /**
@@ -185,7 +165,6 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start,
 	unsigned int ccq = 0;
 	int tmp_count = count, tmp_start = start;
 	int nr = q->nr;
-	int rc;
 
 	if (!count)
 		return 0;
@@ -195,26 +174,32 @@ static int qdio_do_sqbs(struct qdio_q *q, unsigned char state, int start,
 		nr += q->irq_ptr->nr_input_qs;
 again:
 	ccq = do_sqbs(q->irq_ptr->sch_token, state, nr, &tmp_start, &tmp_count);
-	rc = qdio_check_ccq(q, ccq);
-	if (!rc) {
+
+	switch (ccq) {
+	case 0:
+	case 32:
+		/* all done, or active buffer adapter-owned */
 		WARN_ON_ONCE(tmp_count);
 		return count - tmp_count;
-	}
-
-	if (rc == 1 || rc == 2) {
+	case 96:
+		/* not all buffers processed */
 		DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "SQBS again:%2d", ccq);
 		qperf_inc(q, sqbs_partial);
 		goto again;
+	default:
+		DBF_ERROR("%4x ccq:%3d", SCH_NO(q), ccq);
+		DBF_ERROR("%4x SQBS ERROR", SCH_NO(q));
+		DBF_ERROR("%3d%3d%2d", count, tmp_count, nr);
+		q->handler(q->irq_ptr->cdev, QDIO_ERROR_SET_BUF_STATE, q->nr,
+			   q->first_to_kick, count, q->irq_ptr->int_parm);
+		return 0;
 	}
-
-	DBF_ERROR("%4x SQBS ERROR", SCH_NO(q));
-	DBF_ERROR("%3d%3d%2d", count, tmp_count, nr);
-	q->handler(q->irq_ptr->cdev, QDIO_ERROR_SET_BUF_STATE,
-		   q->nr, q->first_to_kick, count, q->irq_ptr->int_parm);
-	return 0;
 }
 
-/* returns number of examined buffers and their common state in *state */
+/*
+ * Returns number of examined buffers and their common state in *state.
+ * Requested number of buffers-to-examine must be > 0.
+ */
 static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr,
 				 unsigned char *state, unsigned int count,
 				 int auto_ack, int merge_pending)
@@ -225,17 +210,23 @@ static inline int get_buf_states(struct qdio_q *q, unsigned int bufnr,
 	if (is_qebsm(q))
 		return qdio_do_eqbs(q, state, bufnr, count, auto_ack);
 
-	for (i = 0; i < count; i++) {
-		if (!__state) {
-			__state = q->slsb.val[bufnr];
-			if (merge_pending && __state == SLSB_P_OUTPUT_PENDING)
-				__state = SLSB_P_OUTPUT_EMPTY;
-		} else if (merge_pending) {
-			if ((q->slsb.val[bufnr] & __state) != __state)
-				break;
-		} else if (q->slsb.val[bufnr] != __state)
-			break;
+	/* get initial state: */
+	__state = q->slsb.val[bufnr];
+	if (merge_pending && __state == SLSB_P_OUTPUT_PENDING)
+		__state = SLSB_P_OUTPUT_EMPTY;
+
+	for (i = 1; i < count; i++) {
 		bufnr = next_buf(bufnr);
+
+		/* merge PENDING into EMPTY: */
+		if (merge_pending &&
+		    q->slsb.val[bufnr] == SLSB_P_OUTPUT_PENDING &&
+		    __state == SLSB_P_OUTPUT_EMPTY)
+			continue;
+
+		/* stop if next state differs from initial state: */
+		if (q->slsb.val[bufnr] != __state)
+			break;
 	}
 	*state = __state;
 	return i;
@@ -502,8 +493,8 @@ static inline void inbound_primed(struct qdio_q *q, int count)
 
 static int get_inbound_buffer_frontier(struct qdio_q *q)
 {
-	int count, stop;
 	unsigned char state = 0;
+	int count;
 
 	q->timestamp = get_tod_clock_fast();
 
@@ -512,9 +503,7 @@ static int get_inbound_buffer_frontier(struct qdio_q *q)
 	 * would return 0.
 	 */
 	count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
-	stop = add_buf(q->first_to_check, count);
-
-	if (q->first_to_check == stop)
+	if (!count)
 		goto out;
 
 	/*
@@ -734,8 +723,8 @@ void qdio_inbound_processing(unsigned long data)
 
 static int get_outbound_buffer_frontier(struct qdio_q *q)
 {
-	int count, stop;
 	unsigned char state = 0;
+	int count;
 
 	q->timestamp = get_tod_clock_fast();
 
@@ -751,11 +740,11 @@ static int get_outbound_buffer_frontier(struct qdio_q *q)
 	 * would return 0.
 	 */
 	count = min(atomic_read(&q->nr_buf_used), QDIO_MAX_BUFFERS_MASK);
-	stop = add_buf(q->first_to_check, count);
-	if (q->first_to_check == stop)
+	if (!count)
 		goto out;
 
-	count = get_buf_states(q, q->first_to_check, &state, count, 0, 1);
+	count = get_buf_states(q, q->first_to_check, &state, count, 0,
+			       q->u.out.use_cq);
 	if (!count)
 		goto out;
 
@@ -1218,8 +1207,10 @@ no_cleanup:
 	qdio_shutdown_thinint(irq_ptr);
 
 	/* restore interrupt handler */
-	if ((void *)cdev->handler == (void *)qdio_int_handler)
+	if ((void *)cdev->handler == (void *)qdio_int_handler) {
 		cdev->handler = irq_ptr->orig_handler;
+		cdev->private->intparm = 0;
+	}
 	spin_unlock_irq(get_ccwdev_lock(cdev));
 
 	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c
index 98f3cfdc0d02..439991d71b14 100644
--- a/drivers/s390/cio/qdio_setup.c
+++ b/drivers/s390/cio/qdio_setup.c
@@ -507,8 +507,10 @@ int qdio_setup_irq(struct qdio_initialize *init_data)
 	irq_ptr->aqueue = *ciw;
 
 	/* set new interrupt handler */
+	spin_lock_irq(get_ccwdev_lock(irq_ptr->cdev));
 	irq_ptr->orig_handler = init_data->cdev->handler;
 	init_data->cdev->handler = qdio_int_handler;
+	spin_unlock_irq(get_ccwdev_lock(irq_ptr->cdev));
 	return 0;
 out_err:
 	qdio_release_memory(irq_ptr);
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index c30420c517b1..ff6963ad6e39 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -124,6 +124,11 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 	if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
 		orb = (union orb *)io_region->orb_area;
 
+		/* Don't try to build a cp if transport mode is specified. */
+		if (orb->tm.b) {
+			io_region->ret_code = -EOPNOTSUPP;
+			goto err_out;
+		}
 		io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
 					      orb);
 		if (io_region->ret_code)
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 48d55dc9e986..35a0c2b52f82 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -25,7 +25,6 @@
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/suspend.h>
-#include <asm/reset.h>
 #include <asm/airq.h>
 #include <linux/atomic.h>
 #include <asm/isc.h>
@@ -1197,26 +1196,7 @@ static void ap_config_timeout(struct timer_list *unused)
 	queue_work(system_long_wq, &ap_scan_work);
 }
 
-static void ap_reset_all(void)
-{
-	int i, j;
-
-	for (i = 0; i < AP_DOMAINS; i++) {
-		if (!ap_test_config_domain(i))
-			continue;
-		for (j = 0; j < AP_DEVICES; j++) {
-			if (!ap_test_config_card_id(j))
-				continue;
-			ap_rapq(AP_MKQID(j, i));
-		}
-	}
-}
-
-static struct reset_call ap_reset_call = {
-	.fn = ap_reset_all,
-};
-
-int __init ap_debug_init(void)
+static int __init ap_debug_init(void)
 {
 	ap_dbf_info = debug_register("ap", 1, 1,
 				     DBF_MAX_SPRINTF_ARGS * sizeof(long));
@@ -1226,17 +1206,12 @@ int __init ap_debug_init(void)
 	return 0;
 }
 
-void ap_debug_exit(void)
-{
-	debug_unregister(ap_dbf_info);
-}
-
 /**
  * ap_module_init(): The module initialization code.
  *
  * Initializes the module.
  */
-int __init ap_module_init(void)
+static int __init ap_module_init(void)
 {
 	int max_domain_id;
 	int rc, i;
@@ -1274,8 +1249,6 @@ int __init ap_module_init(void)
 		ap_airq_flag = (rc == 0);
 	}
 
-	register_reset_call(&ap_reset_call);
-
 	/* Create /sys/bus/ap. */
 	rc = bus_register(&ap_bus_type);
 	if (rc)
@@ -1331,7 +1304,6 @@ out_bus:
 		bus_remove_file(&ap_bus_type, ap_bus_attrs[i]);
 	bus_unregister(&ap_bus_type);
 out:
-	unregister_reset_call(&ap_reset_call);
 	if (ap_using_interrupts())
 		unregister_adapter_interrupt(&ap_airq);
 	kfree(ap_configuration);
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index e0827eaa42f1..02184cf35834 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -17,7 +17,7 @@
 #include <linux/types.h>
 #include <asm/ap.h>
 
-#define AP_DEVICES 64		/* Number of AP devices. */
+#define AP_DEVICES 256		/* Number of AP devices. */
 #define AP_DOMAINS 256		/* Number of AP domains. */
 #define AP_RESET_TIMEOUT (HZ*0.7)	/* Time in ticks for reset timeouts. */
 #define AP_CONFIG_TIME 30	/* Time in seconds between AP bus rescans. */
@@ -240,7 +240,4 @@ void ap_queue_resume(struct ap_device *ap_dev);
 struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type,
 			       int comp_device_type, unsigned int functions);
 
-int ap_module_init(void);
-void ap_module_exit(void);
-
 #endif /* _AP_BUS_H_ */
diff --git a/drivers/s390/crypto/ap_debug.h b/drivers/s390/crypto/ap_debug.h
index 6a9d77c75ec3..dc675eb5aef6 100644
--- a/drivers/s390/crypto/ap_debug.h
+++ b/drivers/s390/crypto/ap_debug.h
@@ -23,7 +23,4 @@
 
 extern debug_info_t *ap_dbf_info;
 
-int ap_debug_init(void);
-void ap_debug_exit(void);
-
 #endif /* AP_DEBUG_H */
diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index e7c2e4f9529a..ed80d00cdb6f 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -889,7 +889,7 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 		  u16 *pcardnr, u16 *pdomain, int verify)
 {
 	struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
-	struct zcrypt_device_matrix *device_matrix;
+	struct zcrypt_device_status_ext *device_status;
 	u16 card, dom;
 	u64 mkvp[2];
 	int i, rc, oi = -1;
@@ -899,18 +899,19 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 		return -EINVAL;
 
 	/* fetch status of all crypto cards */
-	device_matrix = kmalloc(sizeof(struct zcrypt_device_matrix),
+	device_status = kmalloc(MAX_ZDEV_ENTRIES_EXT
+				* sizeof(struct zcrypt_device_status_ext),
 				GFP_KERNEL);
-	if (!device_matrix)
+	if (!device_status)
 		return -ENOMEM;
-	zcrypt_device_status_mask(device_matrix);
+	zcrypt_device_status_mask_ext(device_status);
 
 	/* walk through all crypto cards */
-	for (i = 0; i < MAX_ZDEV_ENTRIES; i++) {
-		card = AP_QID_CARD(device_matrix->device[i].qid);
-		dom = AP_QID_QUEUE(device_matrix->device[i].qid);
-		if (device_matrix->device[i].online &&
-		    device_matrix->device[i].functions & 0x04) {
+	for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
+		card = AP_QID_CARD(device_status[i].qid);
+		dom = AP_QID_QUEUE(device_status[i].qid);
+		if (device_status[i].online &&
+		    device_status[i].functions & 0x04) {
 			/* an enabled CCA Coprocessor card */
 			/* try cached mkvp */
 			if (mkvp_cache_fetch(card, dom, mkvp) == 0 &&
@@ -930,14 +931,14 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 			mkvp_cache_scrub(card, dom);
 		}
 	}
-	if (i >= MAX_ZDEV_ENTRIES) {
+	if (i >= MAX_ZDEV_ENTRIES_EXT) {
 		/* nothing found, so this time without cache */
-		for (i = 0; i < MAX_ZDEV_ENTRIES; i++) {
-			if (!(device_matrix->device[i].online &&
-			      device_matrix->device[i].functions & 0x04))
+		for (i = 0; i < MAX_ZDEV_ENTRIES_EXT; i++) {
+			if (!(device_status[i].online &&
+			      device_status[i].functions & 0x04))
 				continue;
-			card = AP_QID_CARD(device_matrix->device[i].qid);
-			dom = AP_QID_QUEUE(device_matrix->device[i].qid);
+			card = AP_QID_CARD(device_status[i].qid);
+			dom = AP_QID_QUEUE(device_status[i].qid);
 			/* fresh fetch mkvp from adapter */
 			if (fetch_mkvp(card, dom, mkvp) == 0) {
 				mkvp_cache_update(card, dom, mkvp);
@@ -947,13 +948,13 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 					oi = i;
 			}
 		}
-		if (i >= MAX_ZDEV_ENTRIES && oi >= 0) {
+		if (i >= MAX_ZDEV_ENTRIES_EXT && oi >= 0) {
 			/* old mkvp matched, use this card then */
-			card = AP_QID_CARD(device_matrix->device[oi].qid);
-			dom = AP_QID_QUEUE(device_matrix->device[oi].qid);
+			card = AP_QID_CARD(device_status[oi].qid);
+			dom = AP_QID_QUEUE(device_status[oi].qid);
 		}
 	}
-	if (i < MAX_ZDEV_ENTRIES || oi >= 0) {
+	if (i < MAX_ZDEV_ENTRIES_EXT || oi >= 0) {
 		if (pcardnr)
 			*pcardnr = card;
 		if (pdomain)
@@ -962,7 +963,7 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 	} else
 		rc = -ENODEV;
 
-	kfree(device_matrix);
+	kfree(device_status);
 	return rc;
 }
 EXPORT_SYMBOL(pkey_findcard);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index ce15f101ee28..5efd84862ccb 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -18,8 +18,6 @@
 #include <linux/interrupt.h>
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
 #include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/atomic.h>
@@ -607,19 +605,24 @@ out:
 	return rc;
 }
 
-void zcrypt_device_status_mask(struct zcrypt_device_matrix *matrix)
+static void zcrypt_device_status_mask(struct zcrypt_device_status *devstatus)
 {
 	struct zcrypt_card *zc;
 	struct zcrypt_queue *zq;
 	struct zcrypt_device_status *stat;
+	int card, queue;
+
+	memset(devstatus, 0, MAX_ZDEV_ENTRIES
+	       * sizeof(struct zcrypt_device_status));
 
-	memset(matrix, 0, sizeof(*matrix));
 	spin_lock(&zcrypt_list_lock);
 	for_each_zcrypt_card(zc) {
 		for_each_zcrypt_queue(zq, zc) {
-			stat = matrix->device;
-			stat += AP_QID_CARD(zq->queue->qid) * MAX_ZDEV_DOMAINS;
-			stat += AP_QID_QUEUE(zq->queue->qid);
+			card = AP_QID_CARD(zq->queue->qid);
+			if (card >= MAX_ZDEV_CARDIDS)
+				continue;
+			queue = AP_QID_QUEUE(zq->queue->qid);
+			stat = &devstatus[card * AP_DOMAINS + queue];
 			stat->hwtype = zc->card->ap_dev.device_type;
 			stat->functions = zc->card->functions >> 26;
 			stat->qid = zq->queue->qid;
@@ -628,40 +631,70 @@ void zcrypt_device_status_mask(struct zcrypt_device_matrix *matrix)
 	}
 	spin_unlock(&zcrypt_list_lock);
 }
-EXPORT_SYMBOL(zcrypt_device_status_mask);
 
-static void zcrypt_status_mask(char status[AP_DEVICES])
+void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus)
 {
 	struct zcrypt_card *zc;
 	struct zcrypt_queue *zq;
+	struct zcrypt_device_status_ext *stat;
+	int card, queue;
+
+	memset(devstatus, 0, MAX_ZDEV_ENTRIES_EXT
+	       * sizeof(struct zcrypt_device_status_ext));
 
-	memset(status, 0, sizeof(char) * AP_DEVICES);
 	spin_lock(&zcrypt_list_lock);
 	for_each_zcrypt_card(zc) {
 		for_each_zcrypt_queue(zq, zc) {
-			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+			card = AP_QID_CARD(zq->queue->qid);
+			queue = AP_QID_QUEUE(zq->queue->qid);
+			stat = &devstatus[card * AP_DOMAINS + queue];
+			stat->hwtype = zc->card->ap_dev.device_type;
+			stat->functions = zc->card->functions >> 26;
+			stat->qid = zq->queue->qid;
+			stat->online = zq->online ? 0x01 : 0x00;
+		}
+	}
+	spin_unlock(&zcrypt_list_lock);
+}
+EXPORT_SYMBOL(zcrypt_device_status_mask_ext);
+
+static void zcrypt_status_mask(char status[], size_t max_adapters)
+{
+	struct zcrypt_card *zc;
+	struct zcrypt_queue *zq;
+	int card;
+
+	memset(status, 0, max_adapters);
+	spin_lock(&zcrypt_list_lock);
+	for_each_zcrypt_card(zc) {
+		for_each_zcrypt_queue(zq, zc) {
+			card = AP_QID_CARD(zq->queue->qid);
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index
+			    || card >= max_adapters)
 				continue;
-			status[AP_QID_CARD(zq->queue->qid)] =
-				zc->online ? zc->user_space_type : 0x0d;
+			status[card] = zc->online ? zc->user_space_type : 0x0d;
 		}
 	}
 	spin_unlock(&zcrypt_list_lock);
 }
 
-static void zcrypt_qdepth_mask(char qdepth[AP_DEVICES])
+static void zcrypt_qdepth_mask(char qdepth[], size_t max_adapters)
 {
 	struct zcrypt_card *zc;
 	struct zcrypt_queue *zq;
+	int card;
 
-	memset(qdepth, 0, sizeof(char)	* AP_DEVICES);
+	memset(qdepth, 0, max_adapters);
 	spin_lock(&zcrypt_list_lock);
 	local_bh_disable();
 	for_each_zcrypt_card(zc) {
 		for_each_zcrypt_queue(zq, zc) {
-			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+			card = AP_QID_CARD(zq->queue->qid);
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index
+			    || card >= max_adapters)
 				continue;
 			spin_lock(&zq->queue->lock);
-			qdepth[AP_QID_CARD(zq->queue->qid)] =
+			qdepth[card] =
 				zq->queue->pendingq_count +
 				zq->queue->requestq_count;
 			spin_unlock(&zq->queue->lock);
@@ -671,21 +704,23 @@ static void zcrypt_qdepth_mask(char qdepth[AP_DEVICES])
 	spin_unlock(&zcrypt_list_lock);
 }
 
-static void zcrypt_perdev_reqcnt(int reqcnt[AP_DEVICES])
+static void zcrypt_perdev_reqcnt(int reqcnt[], size_t max_adapters)
 {
 	struct zcrypt_card *zc;
 	struct zcrypt_queue *zq;
+	int card;
 
-	memset(reqcnt, 0, sizeof(int) * AP_DEVICES);
+	memset(reqcnt, 0, sizeof(int) * max_adapters);
 	spin_lock(&zcrypt_list_lock);
 	local_bh_disable();
 	for_each_zcrypt_card(zc) {
 		for_each_zcrypt_queue(zq, zc) {
-			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
+			card = AP_QID_CARD(zq->queue->qid);
+			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index
+			    || card >= max_adapters)
 				continue;
 			spin_lock(&zq->queue->lock);
-			reqcnt[AP_QID_CARD(zq->queue->qid)] =
-				zq->queue->total_request_count;
+			reqcnt[card] = zq->queue->total_request_count;
 			spin_unlock(&zq->queue->lock);
 		}
 	}
@@ -739,60 +774,10 @@ static int zcrypt_requestq_count(void)
 	return requestq_count;
 }
 
-static int zcrypt_count_type(int type)
-{
-	struct zcrypt_card *zc;
-	struct zcrypt_queue *zq;
-	int device_count;
-
-	device_count = 0;
-	spin_lock(&zcrypt_list_lock);
-	for_each_zcrypt_card(zc) {
-		if (zc->card->id != type)
-			continue;
-		for_each_zcrypt_queue(zq, zc) {
-			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
-				continue;
-			device_count++;
-		}
-	}
-	spin_unlock(&zcrypt_list_lock);
-	return device_count;
-}
-
-/**
- * zcrypt_ica_status(): Old, depracted combi status call.
- *
- * Old, deprecated combi status call.
- */
-static long zcrypt_ica_status(struct file *filp, unsigned long arg)
-{
-	struct ica_z90_status *pstat;
-	int ret;
-
-	pstat = kzalloc(sizeof(*pstat), GFP_KERNEL);
-	if (!pstat)
-		return -ENOMEM;
-	pstat->totalcount = zcrypt_device_count;
-	pstat->leedslitecount = zcrypt_count_type(ZCRYPT_PCICA);
-	pstat->leeds2count = zcrypt_count_type(ZCRYPT_PCICC);
-	pstat->requestqWaitCount = zcrypt_requestq_count();
-	pstat->pendingqWaitCount = zcrypt_pendingq_count();
-	pstat->totalOpenCount = atomic_read(&zcrypt_open_count);
-	pstat->cryptoDomain = ap_domain_index;
-	zcrypt_status_mask(pstat->status);
-	zcrypt_qdepth_mask(pstat->qdepth);
-	ret = 0;
-	if (copy_to_user((void __user *) arg, pstat, sizeof(*pstat)))
-		ret = -EFAULT;
-	kfree(pstat);
-	return ret;
-}
-
 static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 				  unsigned long arg)
 {
-	int rc;
+	int rc = 0;
 
 	switch (cmd) {
 	case ICARSAMODEXPO: {
@@ -871,48 +856,48 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 			return -EFAULT;
 		return rc;
 	}
-	case ZDEVICESTATUS: {
-		struct zcrypt_device_matrix *device_status;
+	case ZCRYPT_DEVICE_STATUS: {
+		struct zcrypt_device_status_ext *device_status;
+		size_t total_size = MAX_ZDEV_ENTRIES_EXT
+			* sizeof(struct zcrypt_device_status_ext);
 
-		device_status = kzalloc(sizeof(struct zcrypt_device_matrix),
-					GFP_KERNEL);
+		device_status = kzalloc(total_size, GFP_KERNEL);
 		if (!device_status)
 			return -ENOMEM;
-
-		zcrypt_device_status_mask(device_status);
-
+		zcrypt_device_status_mask_ext(device_status);
 		if (copy_to_user((char __user *) arg, device_status,
-				 sizeof(struct zcrypt_device_matrix))) {
-			kfree(device_status);
-			return -EFAULT;
-		}
-
+				 total_size))
+			rc = -EFAULT;
 		kfree(device_status);
-		return 0;
+		return rc;
 	}
-	case Z90STAT_STATUS_MASK: {
+	case ZCRYPT_STATUS_MASK: {
 		char status[AP_DEVICES];
-		zcrypt_status_mask(status);
-		if (copy_to_user((char __user *) arg, status,
-				 sizeof(char) * AP_DEVICES))
+
+		zcrypt_status_mask(status, AP_DEVICES);
+		if (copy_to_user((char __user *) arg, status, sizeof(status)))
 			return -EFAULT;
 		return 0;
 	}
-	case Z90STAT_QDEPTH_MASK: {
+	case ZCRYPT_QDEPTH_MASK: {
 		char qdepth[AP_DEVICES];
-		zcrypt_qdepth_mask(qdepth);
-		if (copy_to_user((char __user *) arg, qdepth,
-				 sizeof(char) * AP_DEVICES))
+
+		zcrypt_qdepth_mask(qdepth, AP_DEVICES);
+		if (copy_to_user((char __user *) arg, qdepth, sizeof(qdepth)))
 			return -EFAULT;
 		return 0;
 	}
-	case Z90STAT_PERDEV_REQCNT: {
-		int reqcnt[AP_DEVICES];
-		zcrypt_perdev_reqcnt(reqcnt);
-		if (copy_to_user((int __user *) arg, reqcnt,
-				 sizeof(int) * AP_DEVICES))
-			return -EFAULT;
-		return 0;
+	case ZCRYPT_PERDEV_REQCNT: {
+		int *reqcnt;
+
+		reqcnt = kcalloc(AP_DEVICES, sizeof(int), GFP_KERNEL);
+		if (!reqcnt)
+			return -ENOMEM;
+		zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES);
+		if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt)))
+			rc = -EFAULT;
+		kfree(reqcnt);
+		return rc;
 	}
 	case Z90STAT_REQUESTQ_COUNT:
 		return put_user(zcrypt_requestq_count(), (int __user *) arg);
@@ -924,38 +909,54 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 	case Z90STAT_DOMAIN_INDEX:
 		return put_user(ap_domain_index, (int __user *) arg);
 	/*
-	 * Deprecated ioctls. Don't add another device count ioctl,
-	 * you can count them yourself in the user space with the
-	 * output of the Z90STAT_STATUS_MASK ioctl.
+	 * Deprecated ioctls
 	 */
-	case ICAZ90STATUS:
-		return zcrypt_ica_status(filp, arg);
-	case Z90STAT_TOTALCOUNT:
-		return put_user(zcrypt_device_count, (int __user *) arg);
-	case Z90STAT_PCICACOUNT:
-		return put_user(zcrypt_count_type(ZCRYPT_PCICA),
-				(int __user *) arg);
-	case Z90STAT_PCICCCOUNT:
-		return put_user(zcrypt_count_type(ZCRYPT_PCICC),
-				(int __user *) arg);
-	case Z90STAT_PCIXCCMCL2COUNT:
-		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2),
-				(int __user *) arg);
-	case Z90STAT_PCIXCCMCL3COUNT:
-		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL3),
-				(int __user *) arg);
-	case Z90STAT_PCIXCCCOUNT:
-		return put_user(zcrypt_count_type(ZCRYPT_PCIXCC_MCL2) +
-				zcrypt_count_type(ZCRYPT_PCIXCC_MCL3),
-				(int __user *) arg);
-	case Z90STAT_CEX2CCOUNT:
-		return put_user(zcrypt_count_type(ZCRYPT_CEX2C),
-				(int __user *) arg);
-	case Z90STAT_CEX2ACOUNT:
-		return put_user(zcrypt_count_type(ZCRYPT_CEX2A),
-				(int __user *) arg);
+	case ZDEVICESTATUS: {
+		/* the old ioctl supports only 64 adapters */
+		struct zcrypt_device_status *device_status;
+		size_t total_size = MAX_ZDEV_ENTRIES
+			* sizeof(struct zcrypt_device_status);
+
+		device_status = kzalloc(total_size, GFP_KERNEL);
+		if (!device_status)
+			return -ENOMEM;
+		zcrypt_device_status_mask(device_status);
+		if (copy_to_user((char __user *) arg, device_status,
+				 total_size))
+			rc = -EFAULT;
+		kfree(device_status);
+		return rc;
+	}
+	case Z90STAT_STATUS_MASK: {
+		/* the old ioctl supports only 64 adapters */
+		char status[MAX_ZDEV_CARDIDS];
+
+		zcrypt_status_mask(status, MAX_ZDEV_CARDIDS);
+		if (copy_to_user((char __user *) arg, status, sizeof(status)))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_QDEPTH_MASK: {
+		/* the old ioctl supports only 64 adapters */
+		char qdepth[MAX_ZDEV_CARDIDS];
+
+		zcrypt_qdepth_mask(qdepth, MAX_ZDEV_CARDIDS);
+		if (copy_to_user((char __user *) arg, qdepth, sizeof(qdepth)))
+			return -EFAULT;
+		return 0;
+	}
+	case Z90STAT_PERDEV_REQCNT: {
+		/* the old ioctl supports only 64 adapters */
+		int reqcnt[MAX_ZDEV_CARDIDS];
+
+		zcrypt_perdev_reqcnt(reqcnt, MAX_ZDEV_CARDIDS);
+		if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt)))
+			return -EFAULT;
+		return 0;
+	}
+	/* unknown ioctl number */
 	default:
-		/* unknown ioctl number */
+		ZCRYPT_DBF(DBF_DEBUG, "unknown ioctl 0x%08x\n", cmd);
 		return -ENOIOCTLCMD;
 	}
 }
@@ -1152,201 +1153,6 @@ static struct miscdevice zcrypt_misc_device = {
 	.fops	    = &zcrypt_fops,
 };
 
-/*
- * Deprecated /proc entry support.
- */
-static struct proc_dir_entry *zcrypt_entry;
-
-static void sprintcl(struct seq_file *m, unsigned char *addr, unsigned int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++)
-		seq_printf(m, "%01x", (unsigned int) addr[i]);
-	seq_putc(m, ' ');
-}
-
-static void sprintrw(struct seq_file *m, unsigned char *addr, unsigned int len)
-{
-	int inl, c, cx;
-
-	seq_printf(m, "	   ");
-	inl = 0;
-	for (c = 0; c < (len / 16); c++) {
-		sprintcl(m, addr+inl, 16);
-		inl += 16;
-	}
-	cx = len%16;
-	if (cx) {
-		sprintcl(m, addr+inl, cx);
-		inl += cx;
-	}
-	seq_putc(m, '\n');
-}
-
-static void sprinthx(unsigned char *title, struct seq_file *m,
-		     unsigned char *addr, unsigned int len)
-{
-	int inl, r, rx;
-
-	seq_printf(m, "\n%s\n", title);
-	inl = 0;
-	for (r = 0; r < (len / 64); r++) {
-		sprintrw(m, addr+inl, 64);
-		inl += 64;
-	}
-	rx = len % 64;
-	if (rx) {
-		sprintrw(m, addr+inl, rx);
-		inl += rx;
-	}
-	seq_putc(m, '\n');
-}
-
-static void sprinthx4(unsigned char *title, struct seq_file *m,
-		      unsigned int *array, unsigned int len)
-{
-	seq_printf(m, "\n%s\n", title);
-	seq_hex_dump(m, "    ", DUMP_PREFIX_NONE, 32, 4, array, len, false);
-	seq_putc(m, '\n');
-}
-
-static int zcrypt_proc_show(struct seq_file *m, void *v)
-{
-	char workarea[sizeof(int) * AP_DEVICES];
-
-	seq_printf(m, "\nzcrypt version: %d.%d.%d\n",
-		   ZCRYPT_VERSION, ZCRYPT_RELEASE, ZCRYPT_VARIANT);
-	seq_printf(m, "Cryptographic domain: %d\n", ap_domain_index);
-	seq_printf(m, "Total device count: %d\n", zcrypt_device_count);
-	seq_printf(m, "PCICA count: %d\n", zcrypt_count_type(ZCRYPT_PCICA));
-	seq_printf(m, "PCICC count: %d\n", zcrypt_count_type(ZCRYPT_PCICC));
-	seq_printf(m, "PCIXCC MCL2 count: %d\n",
-		   zcrypt_count_type(ZCRYPT_PCIXCC_MCL2));
-	seq_printf(m, "PCIXCC MCL3 count: %d\n",
-		   zcrypt_count_type(ZCRYPT_PCIXCC_MCL3));
-	seq_printf(m, "CEX2C count: %d\n", zcrypt_count_type(ZCRYPT_CEX2C));
-	seq_printf(m, "CEX2A count: %d\n", zcrypt_count_type(ZCRYPT_CEX2A));
-	seq_printf(m, "CEX3C count: %d\n", zcrypt_count_type(ZCRYPT_CEX3C));
-	seq_printf(m, "CEX3A count: %d\n", zcrypt_count_type(ZCRYPT_CEX3A));
-	seq_printf(m, "requestq count: %d\n", zcrypt_requestq_count());
-	seq_printf(m, "pendingq count: %d\n", zcrypt_pendingq_count());
-	seq_printf(m, "Total open handles: %d\n\n",
-		   atomic_read(&zcrypt_open_count));
-	zcrypt_status_mask(workarea);
-	sprinthx("Online devices: 1=PCICA 2=PCICC 3=PCIXCC(MCL2) "
-		 "4=PCIXCC(MCL3) 5=CEX2C 6=CEX2A 7=CEX3C 8=CEX3A",
-		 m, workarea, AP_DEVICES);
-	zcrypt_qdepth_mask(workarea);
-	sprinthx("Waiting work element counts", m, workarea, AP_DEVICES);
-	zcrypt_perdev_reqcnt((int *) workarea);
-	sprinthx4("Per-device successfully completed request counts",
-		  m, (unsigned int *) workarea, AP_DEVICES);
-	return 0;
-}
-
-static int zcrypt_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, zcrypt_proc_show, NULL);
-}
-
-static void zcrypt_disable_card(int index)
-{
-	struct zcrypt_card *zc;
-	struct zcrypt_queue *zq;
-
-	spin_lock(&zcrypt_list_lock);
-	for_each_zcrypt_card(zc) {
-		for_each_zcrypt_queue(zq, zc) {
-			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
-				continue;
-			zq->online = 0;
-			ap_flush_queue(zq->queue);
-		}
-	}
-	spin_unlock(&zcrypt_list_lock);
-}
-
-static void zcrypt_enable_card(int index)
-{
-	struct zcrypt_card *zc;
-	struct zcrypt_queue *zq;
-
-	spin_lock(&zcrypt_list_lock);
-	for_each_zcrypt_card(zc) {
-		for_each_zcrypt_queue(zq, zc) {
-			if (AP_QID_QUEUE(zq->queue->qid) != ap_domain_index)
-				continue;
-			zq->online = 1;
-			ap_flush_queue(zq->queue);
-		}
-	}
-	spin_unlock(&zcrypt_list_lock);
-}
-
-static ssize_t zcrypt_proc_write(struct file *file, const char __user *buffer,
-				 size_t count, loff_t *pos)
-{
-	unsigned char *lbuf, *ptr;
-	size_t local_count;
-	int j;
-
-	if (count <= 0)
-		return 0;
-
-#define LBUFSIZE 1200UL
-	lbuf = kmalloc(LBUFSIZE, GFP_KERNEL);
-	if (!lbuf)
-		return 0;
-
-	local_count = min(LBUFSIZE - 1, count);
-	if (copy_from_user(lbuf, buffer, local_count) != 0) {
-		kfree(lbuf);
-		return -EFAULT;
-	}
-	lbuf[local_count] = '\0';
-
-	ptr = strstr(lbuf, "Online devices");
-	if (!ptr)
-		goto out;
-	ptr = strstr(ptr, "\n");
-	if (!ptr)
-		goto out;
-	ptr++;
-
-	if (strstr(ptr, "Waiting work element counts") == NULL)
-		goto out;
-
-	for (j = 0; j < 64 && *ptr; ptr++) {
-		/*
-		 * '0' for no device, '1' for PCICA, '2' for PCICC,
-		 * '3' for PCIXCC_MCL2, '4' for PCIXCC_MCL3,
-		 * '5' for CEX2C and '6' for CEX2A'
-		 * '7' for CEX3C and '8' for CEX3A
-		 */
-		if (*ptr >= '0' && *ptr <= '8')
-			j++;
-		else if (*ptr == 'd' || *ptr == 'D')
-			zcrypt_disable_card(j++);
-		else if (*ptr == 'e' || *ptr == 'E')
-			zcrypt_enable_card(j++);
-		else if (*ptr != ' ' && *ptr != '\t')
-			break;
-	}
-out:
-	kfree(lbuf);
-	return count;
-}
-
-static const struct file_operations zcrypt_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= zcrypt_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-	.write		= zcrypt_proc_write,
-};
-
 static int zcrypt_rng_device_count;
 static u32 *zcrypt_rng_buffer;
 static int zcrypt_rng_buffer_index;
@@ -1448,27 +1254,15 @@ int __init zcrypt_api_init(void)
 	if (rc)
 		goto out;
 
-	atomic_set(&zcrypt_rescan_req, 0);
-
 	/* Register the request sprayer. */
 	rc = misc_register(&zcrypt_misc_device);
 	if (rc < 0)
 		goto out;
 
-	/* Set up the proc file system */
-	zcrypt_entry = proc_create("driver/z90crypt", 0644, NULL,
-				   &zcrypt_proc_fops);
-	if (!zcrypt_entry) {
-		rc = -ENOMEM;
-		goto out_misc;
-	}
-
 	zcrypt_msgtype6_init();
 	zcrypt_msgtype50_init();
 	return 0;
 
-out_misc:
-	misc_deregister(&zcrypt_misc_device);
 out:
 	return rc;
 }
@@ -1480,7 +1274,6 @@ out:
  */
 void __exit zcrypt_api_exit(void)
 {
-	remove_proc_entry("driver/z90crypt", NULL);
 	misc_deregister(&zcrypt_misc_device);
 	zcrypt_msgtype6_exit();
 	zcrypt_msgtype50_exit();
diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h
index 9fff8912f6e3..f149a8fee60d 100644
--- a/drivers/s390/crypto/zcrypt_api.h
+++ b/drivers/s390/crypto/zcrypt_api.h
@@ -21,30 +21,6 @@
 #include <asm/zcrypt.h>
 #include "ap_bus.h"
 
-/* deprecated status calls */
-#define ICAZ90STATUS		_IOR(ZCRYPT_IOCTL_MAGIC, 0x10, struct ica_z90_status)
-#define Z90STAT_PCIXCCCOUNT	_IOR(ZCRYPT_IOCTL_MAGIC, 0x43, int)
-
-/**
- * This structure is deprecated and the corresponding ioctl() has been
- * replaced with individual ioctl()s for each piece of data!
- */
-struct ica_z90_status {
-	int totalcount;
-	int leedslitecount; // PCICA
-	int leeds2count;    // PCICC
-	// int PCIXCCCount; is not in struct for backward compatibility
-	int requestqWaitCount;
-	int pendingqWaitCount;
-	int totalOpenCount;
-	int cryptoDomain;
-	// status: 0=not there, 1=PCICA, 2=PCICC, 3=PCIXCC_MCL2, 4=PCIXCC_MCL3,
-	//	   5=CEX2C
-	unsigned char status[64];
-	// qdepth: # work elements waiting for each device
-	unsigned char qdepth[64];
-};
-
 /**
  * device type for an actual device is either PCICA, PCICC, PCIXCC_MCL2,
  * PCIXCC_MCL3, CEX2C, or CEX2A
@@ -179,6 +155,6 @@ struct zcrypt_ops *zcrypt_msgtype(unsigned char *, int);
 int zcrypt_api_init(void);
 void zcrypt_api_exit(void);
 long zcrypt_send_cprb(struct ica_xcRB *xcRB);
-void zcrypt_device_status_mask(struct zcrypt_device_matrix *devstatus);
+void zcrypt_device_status_mask_ext(struct zcrypt_device_status_ext *devstatus);
 
 #endif /* _ZCRYPT_API_H_ */
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 19203340f879..04fefa5bb08d 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1369,7 +1369,7 @@ static void qeth_set_multiple_write_queues(struct qeth_card *card)
 static void qeth_update_from_chp_desc(struct qeth_card *card)
 {
 	struct ccw_device *ccwdev;
-	struct channel_path_desc *chp_dsc;
+	struct channel_path_desc_fmt0 *chp_dsc;
 
 	QETH_DBF_TEXT(SETUP, 2, "chp_desc");
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0cf25d789d05..3f3cb72e0c0c 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -587,18 +587,28 @@ out:
 static unsigned int sr_block_check_events(struct gendisk *disk,
 					  unsigned int clearing)
 {
-	struct scsi_cd *cd = scsi_cd(disk);
+	unsigned int ret = 0;
+	struct scsi_cd *cd;
 
-	if (atomic_read(&cd->device->disk_events_disable_depth))
+	cd = scsi_cd_get(disk);
+	if (!cd)
 		return 0;
 
-	return cdrom_check_events(&cd->cdi, clearing);
+	if (!atomic_read(&cd->device->disk_events_disable_depth))
+		ret = cdrom_check_events(&cd->cdi, clearing);
+
+	scsi_cd_put(cd);
+	return ret;
 }
 
 static int sr_block_revalidate_disk(struct gendisk *disk)
 {
-	struct scsi_cd *cd = scsi_cd(disk);
 	struct scsi_sense_hdr sshdr;
+	struct scsi_cd *cd;
+
+	cd = scsi_cd_get(disk);
+	if (!cd)
+		return -ENXIO;
 
 	/* if the unit is not ready, nothing more to do */
 	if (scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr))
@@ -607,6 +617,7 @@ static int sr_block_revalidate_disk(struct gendisk *disk)
 	sr_cd_check(&cd->cdi);
 	get_sectorsize(cd);
 out:
+	scsi_cd_put(cd);
 	return 0;
 }
 
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index a993d19fa562..5c4535b545cc 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -37,7 +37,7 @@ config QCOM_PM
 
 config QCOM_QMI_HELPERS
 	tristate
-	depends on ARCH_QCOM
+	depends on ARCH_QCOM && NET
 	help
 	  Helper library for handling QMI encoded messages.  QMI encoded
 	  messages are used in communication between the majority of QRTR
diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c
index 08bd8549242a..17b314d9a148 100644
--- a/drivers/soc/qcom/mdt_loader.c
+++ b/drivers/soc/qcom/mdt_loader.c
@@ -83,12 +83,14 @@ EXPORT_SYMBOL_GPL(qcom_mdt_get_size);
  * @mem_region:	allocated memory region to load firmware into
  * @mem_phys:	physical address of allocated memory region
  * @mem_size:	size of the allocated memory region
+ * @reloc_base:	adjusted physical address after relocation
  *
  * Returns 0 on success, negative errno otherwise.
  */
 int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 		  const char *firmware, int pas_id, void *mem_region,
-		  phys_addr_t mem_phys, size_t mem_size)
+		  phys_addr_t mem_phys, size_t mem_size,
+		  phys_addr_t *reloc_base)
 {
 	const struct elf32_phdr *phdrs;
 	const struct elf32_phdr *phdr;
@@ -192,6 +194,9 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 			memset(ptr + phdr->p_filesz, 0, phdr->p_memsz - phdr->p_filesz);
 	}
 
+	if (reloc_base)
+		*reloc_base = mem_reloc;
+
 out:
 	kfree(fw_name);
 
diff --git a/drivers/soc/samsung/pm_domains.c b/drivers/soc/samsung/pm_domains.c
index b6a436594a19..caf45cf7aa8e 100644
--- a/drivers/soc/samsung/pm_domains.c
+++ b/drivers/soc/samsung/pm_domains.c
@@ -147,6 +147,12 @@ static __init const char *exynos_get_domain_name(struct device_node *node)
 	return kstrdup_const(name, GFP_KERNEL);
 }
 
+static const char *soc_force_no_clk[] = {
+	"samsung,exynos5250-clock",
+	"samsung,exynos5420-clock",
+	"samsung,exynos5800-clock",
+};
+
 static __init int exynos4_pm_init_power_domain(void)
 {
 	struct device_node *np;
@@ -183,6 +189,11 @@ static __init int exynos4_pm_init_power_domain(void)
 		pd->pd.power_on = exynos_pd_power_on;
 		pd->local_pwr_cfg = pm_domain_cfg->local_pwr_cfg;
 
+		for (i = 0; i < ARRAY_SIZE(soc_force_no_clk); i++)
+			if (of_find_compatible_node(NULL, NULL,
+						    soc_force_no_clk[i]))
+				goto no_clk;
+
 		for (i = 0; i < MAX_CLK_PER_DOMAIN; i++) {
 			char clk_name[8];
 
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index c43ac574274c..3075358f3f08 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -69,7 +69,7 @@ blkcnt_t dirty_cnt(struct inode *inode)
 	void	      *results[1];
 
 	if (inode->i_mapping)
-		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
+		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
 						  results, 0, 1,
 						  PAGECACHE_TAG_DIRTY);
 	if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 3b1c8e5a3053..8ee7b4d273b2 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -934,14 +934,14 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
 	struct page *page;
 	int found;
 
-	spin_lock_irq(&mapping->tree_lock);
-	found = radix_tree_gang_lookup(&mapping->page_tree,
+	xa_lock_irq(&mapping->i_pages);
+	found = radix_tree_gang_lookup(&mapping->i_pages,
 				       (void **)&page, offset, 1);
 	if (found > 0 && !radix_tree_exceptional_entry(page)) {
 		struct lu_dirpage *dp;
 
 		get_page(page);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		/*
 		 * In contrast to find_lock_page() we are sure that directory
 		 * page cannot be truncated (while DLM lock is held) and,
@@ -989,7 +989,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
 			page = ERR_PTR(-EIO);
 		}
 	} else {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		page = NULL;
 	}
 	return page;
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
index 93753cb96180..512fa87fa11b 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
@@ -619,7 +619,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = gc0310_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config gc0310_controls[] = {
+static const struct v4l2_ctrl_config gc0310_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
index 834fba8c4fa0..44db9f9f1fc5 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
@@ -107,7 +107,7 @@ mt9m114_write_reg(struct i2c_client *client, u16 data_length, u16 reg, u32 val)
 	int num_msg;
 	struct i2c_msg msg;
 	unsigned char data[6] = {0};
-	u16 *wreg;
+	__be16 *wreg;
 	int retry = 0;
 
 	if (!client->adapter) {
@@ -130,18 +130,20 @@ again:
 	msg.buf = data;
 
 	/* high byte goes out first */
-	wreg = (u16 *)data;
+	wreg = (void *)data;
 	*wreg = cpu_to_be16(reg);
 
 	if (data_length == MISENSOR_8BIT) {
 		data[2] = (u8)(val);
 	} else if (data_length == MISENSOR_16BIT) {
-		u16 *wdata = (u16 *)&data[2];
-		*wdata = be16_to_cpu((u16)val);
+		u16 *wdata = (void *)&data[2];
+
+		*wdata = be16_to_cpu(*(__be16 *)&data[2]);
 	} else {
 		/* MISENSOR_32BIT */
-		u32 *wdata = (u32 *)&data[2];
-		*wdata = be32_to_cpu(val);
+		u32 *wdata = (void *)&data[2];
+
+		*wdata = be32_to_cpu(*(__be32 *)&data[2]);
 	}
 
 	num_msg = i2c_transfer(client->adapter, &msg, 1);
@@ -245,6 +247,7 @@ static int __mt9m114_flush_reg_array(struct i2c_client *client,
 	const int num_msg = 1;
 	int ret;
 	int retry = 0;
+	__be16 *data16 = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
@@ -253,7 +256,7 @@ again:
 	msg.addr = client->addr;
 	msg.flags = 0;
 	msg.len = 2 + ctrl->index;
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+	*data16 = cpu_to_be16(ctrl->buffer.addr);
 	msg.buf = (u8 *)&ctrl->buffer;
 
 	ret = i2c_transfer(client->adapter, &msg, num_msg);
@@ -282,8 +285,8 @@ static int __mt9m114_buf_reg_array(struct i2c_client *client,
 				   struct mt9m114_write_ctrl *ctrl,
 				   const struct misensor_reg *next)
 {
-	u16 *data16;
-	u32 *data32;
+	__be16 *data16;
+	__be32 *data32;
 	int err;
 
 	/* Insufficient buffer? Let's flush and get more free space. */
@@ -298,11 +301,11 @@ static int __mt9m114_buf_reg_array(struct i2c_client *client,
 		ctrl->buffer.data[ctrl->index] = (u8)next->val;
 		break;
 	case MISENSOR_16BIT:
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+		data16 = (__be16 *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	case MISENSOR_32BIT:
-		data32 = (u32 *)&ctrl->buffer.data[ctrl->index];
+		data32 = (__be32 *)&ctrl->buffer.data[ctrl->index];
 		*data32 = cpu_to_be32(next->val);
 		break;
 	default:
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
index 11412061c40e..c0849299d592 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
@@ -94,9 +94,9 @@ static int ov2680_read_reg(struct i2c_client *client,
 	if (data_length == OV2680_8BIT)
 		*val = (u8)data[0];
 	else if (data_length == OV2680_16BIT)
-		*val = be16_to_cpu(*(u16 *)&data[0]);
+		*val = be16_to_cpu(*(__be16 *)&data[0]);
 	else
-		*val = be32_to_cpu(*(u32 *)&data[0]);
+		*val = be32_to_cpu(*(__be32 *)&data[0]);
 	//dev_dbg(&client->dev,  "++++i2c read adr%x = %x\n", reg,*val);
 	return 0;
 }
@@ -121,7 +121,7 @@ static int ov2680_write_reg(struct i2c_client *client, u16 data_length,
 {
 	int ret;
 	unsigned char data[4] = {0};
-	u16 *wreg = (u16 *)data;
+	__be16 *wreg = (void *)data;
 	const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
 	if (data_length != OV2680_8BIT && data_length != OV2680_16BIT) {
@@ -137,7 +137,8 @@ static int ov2680_write_reg(struct i2c_client *client, u16 data_length,
 		data[2] = (u8)(val);
 	} else {
 		/* OV2680_16BIT */
-		u16 *wdata = (u16 *)&data[2];
+		__be16 *wdata = (void *)&data[2];
+
 		*wdata = cpu_to_be16(val);
 	}
 
@@ -169,12 +170,13 @@ static int __ov2680_flush_reg_array(struct i2c_client *client,
 				    struct ov2680_write_ctrl *ctrl)
 {
 	u16 size;
+	__be16 *data16 = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
 
 	size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+	*data16 = cpu_to_be16(ctrl->buffer.addr);
 	ctrl->index = 0;
 
 	return ov2680_i2c_write(client, size, (u8 *)&ctrl->buffer);
@@ -185,7 +187,7 @@ static int __ov2680_buf_reg_array(struct i2c_client *client,
 				  const struct ov2680_reg *next)
 {
 	int size;
-	u16 *data16;
+	__be16 *data16;
 
 	switch (next->type) {
 	case OV2680_8BIT:
@@ -194,7 +196,7 @@ static int __ov2680_buf_reg_array(struct i2c_client *client,
 		break;
 	case OV2680_16BIT:
 		size = 2;
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+		data16 = (void *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	default:
@@ -722,7 +724,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = ov2680_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov2680_controls[] = {
+static const struct v4l2_ctrl_config ov2680_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
index e59358ac89ce..a362eebd882f 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
@@ -85,9 +85,9 @@ static int ov2722_read_reg(struct i2c_client *client,
 	if (data_length == OV2722_8BIT)
 		*val = (u8)data[0];
 	else if (data_length == OV2722_16BIT)
-		*val = be16_to_cpu(*(u16 *)&data[0]);
+		*val = be16_to_cpu(*(__be16 *)&data[0]);
 	else
-		*val = be32_to_cpu(*(u32 *)&data[0]);
+		*val = be32_to_cpu(*(__be32 *)&data[0]);
 
 	return 0;
 }
@@ -112,7 +112,7 @@ static int ov2722_write_reg(struct i2c_client *client, u16 data_length,
 {
 	int ret;
 	unsigned char data[4] = {0};
-	u16 *wreg = (u16 *)data;
+	__be16 *wreg = (__be16 *)data;
 	const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
 	if (data_length != OV2722_8BIT && data_length != OV2722_16BIT) {
@@ -128,7 +128,8 @@ static int ov2722_write_reg(struct i2c_client *client, u16 data_length,
 		data[2] = (u8)(val);
 	} else {
 		/* OV2722_16BIT */
-		u16 *wdata = (u16 *)&data[2];
+		__be16 *wdata = (__be16 *)&data[2];
+
 		*wdata = cpu_to_be16(val);
 	}
 
@@ -160,12 +161,13 @@ static int __ov2722_flush_reg_array(struct i2c_client *client,
 				    struct ov2722_write_ctrl *ctrl)
 {
 	u16 size;
+	__be16 *data16 = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
 
 	size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+	*data16 = cpu_to_be16(ctrl->buffer.addr);
 	ctrl->index = 0;
 
 	return ov2722_i2c_write(client, size, (u8 *)&ctrl->buffer);
@@ -176,7 +178,7 @@ static int __ov2722_buf_reg_array(struct i2c_client *client,
 				  const struct ov2722_reg *next)
 {
 	int size;
-	u16 *data16;
+	__be16 *data16;
 
 	switch (next->type) {
 	case OV2722_8BIT:
@@ -185,7 +187,7 @@ static int __ov2722_buf_reg_array(struct i2c_client *client,
 		break;
 	case OV2722_16BIT:
 		size = 2;
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+		data16 = (void *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	default:
@@ -569,7 +571,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = ov2722_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov2722_controls[] = {
+static const struct v4l2_ctrl_config ov2722_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/gc0310.h b/drivers/staging/media/atomisp/i2c/gc0310.h
index af6b11f6e5e7..70c252c5163c 100644
--- a/drivers/staging/media/atomisp/i2c/gc0310.h
+++ b/drivers/staging/media/atomisp/i2c/gc0310.h
@@ -377,8 +377,7 @@ static struct gc0310_reg const gc0310_VGA_30fps[] = {
 	{GC0310_TOK_TERM, 0, 0},
 };
 
-
-struct gc0310_resolution gc0310_res_preview[] = {
+static struct gc0310_resolution gc0310_res_preview[] = {
 	{
 		.desc = "gc0310_VGA_30fps",
 		.width = 656, // 648,
diff --git a/drivers/staging/media/atomisp/i2c/ov2722.h b/drivers/staging/media/atomisp/i2c/ov2722.h
index 028b04aaaa8f..757b37613ccc 100644
--- a/drivers/staging/media/atomisp/i2c/ov2722.h
+++ b/drivers/staging/media/atomisp/i2c/ov2722.h
@@ -1096,7 +1096,7 @@ static struct ov2722_reg const ov2722_720p_30fps[] = {
 	{OV2722_TOK_TERM, 0, 0},
 };
 
-struct ov2722_resolution ov2722_res_preview[] = {
+static struct ov2722_resolution ov2722_res_preview[] = {
 	{
 		.desc = "ov2722_1632_1092_30fps",
 		.width = 1632,
diff --git a/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c b/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
index 30a735e59e54..714297c36b3e 100644
--- a/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
+++ b/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
@@ -173,9 +173,9 @@ static int ov5693_read_reg(struct i2c_client *client,
 	if (data_length == OV5693_8BIT)
 		*val = (u8)data[0];
 	else if (data_length == OV5693_16BIT)
-		*val = be16_to_cpu(*(u16 *)&data[0]);
+		*val = be16_to_cpu(*(__be16 *)&data[0]);
 	else
-		*val = be32_to_cpu(*(u32 *)&data[0]);
+		*val = be32_to_cpu(*(__be32 *)&data[0]);
 
 	return 0;
 }
@@ -200,13 +200,13 @@ static int vcm_dw_i2c_write(struct i2c_client *client, u16 data)
 	struct i2c_msg msg;
 	const int num_msg = 1;
 	int ret;
-	u16 val;
+	__be16 val;
 
 	val = cpu_to_be16(data);
 	msg.addr = VCM_ADDR;
 	msg.flags = 0;
 	msg.len = OV5693_16BIT;
-	msg.buf = (u8 *)&val;
+	msg.buf = (void *)&val;
 
 	ret = i2c_transfer(client->adapter, &msg, 1);
 
@@ -263,7 +263,7 @@ static int ov5693_write_reg(struct i2c_client *client, u16 data_length,
 {
 	int ret;
 	unsigned char data[4] = {0};
-	u16 *wreg = (u16 *)data;
+	__be16 *wreg = (void *)data;
 	const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
 	if (data_length != OV5693_8BIT && data_length != OV5693_16BIT) {
@@ -279,7 +279,8 @@ static int ov5693_write_reg(struct i2c_client *client, u16 data_length,
 		data[2] = (u8)(val);
 	} else {
 		/* OV5693_16BIT */
-		u16 *wdata = (u16 *)&data[2];
+		__be16 *wdata = (void *)&data[2];
+
 		*wdata = cpu_to_be16(val);
 	}
 
@@ -311,15 +312,17 @@ static int __ov5693_flush_reg_array(struct i2c_client *client,
 				    struct ov5693_write_ctrl *ctrl)
 {
 	u16 size;
+	__be16 *reg = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
 
 	size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+
+	*reg = cpu_to_be16(ctrl->buffer.addr);
 	ctrl->index = 0;
 
-	return ov5693_i2c_write(client, size, (u8 *)&ctrl->buffer);
+	return ov5693_i2c_write(client, size, (u8 *)reg);
 }
 
 static int __ov5693_buf_reg_array(struct i2c_client *client,
@@ -327,7 +330,7 @@ static int __ov5693_buf_reg_array(struct i2c_client *client,
 				  const struct ov5693_reg *next)
 {
 	int size;
-	u16 *data16;
+	__be16 *data16;
 
 	switch (next->type) {
 	case OV5693_8BIT:
@@ -336,7 +339,8 @@ static int __ov5693_buf_reg_array(struct i2c_client *client,
 		break;
 	case OV5693_16BIT:
 		size = 2;
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+
+		data16 = (void *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	default:
@@ -951,7 +955,7 @@ static int ad5823_t_focus_vcm(struct v4l2_subdev *sd, u16 val)
 	return ret;
 }
 
-int ad5823_t_focus_abs(struct v4l2_subdev *sd, s32 value)
+static int ad5823_t_focus_abs(struct v4l2_subdev *sd, s32 value)
 {
 	value = min(value, AD5823_MAX_FOCUS_POS);
 	return ad5823_t_focus_vcm(sd, value);
@@ -1132,7 +1136,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = ov5693_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov5693_controls[] = {
+static const struct v4l2_ctrl_config ov5693_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h b/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h
index 6d27dd849a62..9058a82455a6 100644
--- a/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h
+++ b/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h
@@ -1087,7 +1087,7 @@ static struct ov5693_reg const ov5693_2576x1936_30fps[] = {
 	{OV5693_TOK_TERM, 0, 0}
 };
 
-struct ov5693_resolution ov5693_res_preview[] = {
+static struct ov5693_resolution ov5693_res_preview[] = {
 	{
 		.desc = "ov5693_736x496_30fps",
 		.width = 736,
diff --git a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
index e0f0c379e7ce..aa5e294e7b7d 100644
--- a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
+++ b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
@@ -104,6 +104,10 @@ enum atomisp_input_format {
 	ATOMISP_INPUT_FORMAT_USER_DEF8,  /* User defined 8-bit data type 8 */
 };
 
+#define N_ATOMISP_INPUT_FORMAT (ATOMISP_INPUT_FORMAT_USER_DEF8 + 1)
+
+
+
 enum intel_v4l2_subdev_type {
 	RAW_CAMERA = 1,
 	SOC_CAMERA = 2,
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/Makefile b/drivers/staging/media/atomisp/pci/atomisp2/Makefile
index 83f816faba1b..7fead5fc9a7d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/Makefile
+++ b/drivers/staging/media/atomisp/pci/atomisp2/Makefile
@@ -59,17 +59,14 @@ atomisp-objs += \
 	css2400/isp/kernels/bnr/bnr_1.0/ia_css_bnr.host.o \
 	css2400/isp/kernels/bnr/bnr2_2/ia_css_bnr2_2.host.o \
 	css2400/isp/kernels/dpc2/ia_css_dpc2.host.o \
-	css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.o \
 	css2400/isp/kernels/fc/fc_1.0/ia_css_formats.host.o \
 	css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc.host.o \
 	css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_table.host.o \
 	css2400/isp/kernels/ctc/ctc2/ia_css_ctc2.host.o \
 	css2400/isp/kernels/ctc/ctc1_5/ia_css_ctc1_5.host.o \
 	css2400/isp/kernels/bh/bh_2/ia_css_bh.host.o \
-	css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.o \
 	css2400/isp/kernels/bnlm/ia_css_bnlm.host.o \
 	css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.o \
-	css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.o \
 	css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs.host.o \
 	css2400/isp/kernels/anr/anr_1.0/ia_css_anr.host.o \
 	css2400/isp/kernels/anr/anr_2/ia_css_anr2_table.host.o \
@@ -96,7 +93,6 @@ atomisp-objs += \
 	css2400/isp/kernels/ob/ob2/ia_css_ob2.host.o \
 	css2400/isp/kernels/iterator/iterator_1.0/ia_css_iterator.host.o \
 	css2400/isp/kernels/wb/wb_1.0/ia_css_wb.host.o \
-	css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.o \
 	css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.o \
 	css2400/isp/kernels/sc/sc_1.0/ia_css_sc.host.o \
 	css2400/isp/kernels/ipu2_io_ls/bayer_io_ls/ia_css_bayer_io.host.o \
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
index 22f2dbcecc15..fa6ea506f8b1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
@@ -437,7 +437,7 @@ static void atomisp_reset_event(struct atomisp_sub_device *asd)
 }
 
 
-static void print_csi_rx_errors(enum ia_css_csi2_port port,
+static void print_csi_rx_errors(enum mipi_port_id port,
 				struct atomisp_device *isp)
 {
 	u32 infos = 0;
@@ -481,7 +481,7 @@ static void clear_irq_reg(struct atomisp_device *isp)
 }
 
 static struct atomisp_sub_device *
-__get_asd_from_port(struct atomisp_device *isp, mipi_port_ID_t port)
+__get_asd_from_port(struct atomisp_device *isp, enum mipi_port_id port)
 {
 	int i;
 
@@ -515,7 +515,7 @@ irqreturn_t atomisp_isr(int irq, void *dev)
 
 	spin_lock_irqsave(&isp->lock, flags);
 	if (isp->sw_contex.power_state != ATOM_ISP_POWER_UP ||
-	    isp->css_initialized == false) {
+	    !isp->css_initialized) {
 		spin_unlock_irqrestore(&isp->lock, flags);
 		return IRQ_HANDLED;
 	}
@@ -570,9 +570,9 @@ irqreturn_t atomisp_isr(int irq, void *dev)
 	    (irq_infos & CSS_IRQ_INFO_IF_ERROR)) {
 		/* handle mipi receiver error */
 		u32 rx_infos;
-		enum ia_css_csi2_port port;
+		enum mipi_port_id port;
 
-		for (port = IA_CSS_CSI2_PORT0; port <= IA_CSS_CSI2_PORT2;
+		for (port = MIPI_PORT0_ID; port <= MIPI_PORT2_ID;
 		     port++) {
 			print_csi_rx_errors(port, isp);
 			atomisp_css_rx_get_irq_info(port, &rx_infos);
@@ -4603,7 +4603,7 @@ int atomisp_fixed_pattern(struct atomisp_sub_device *asd, int flag,
 	}
 
 	if (*value == 0) {
-		asd->params.fpn_en = 0;
+		asd->params.fpn_en = false;
 		return 0;
 	}
 
@@ -5028,7 +5028,7 @@ atomisp_try_fmt_file(struct atomisp_device *isp, struct v4l2_format *f)
 	return 0;
 }
 
-mipi_port_ID_t __get_mipi_port(struct atomisp_device *isp,
+enum mipi_port_id __get_mipi_port(struct atomisp_device *isp,
 				enum atomisp_camera_port port)
 {
 	switch (port) {
@@ -5162,22 +5162,22 @@ static int __enable_continuous_mode(struct atomisp_sub_device *asd,
 	return atomisp_update_run_mode(asd);
 }
 
-int configure_pp_input_nop(struct atomisp_sub_device *asd,
-			   unsigned int width, unsigned int height)
+static int configure_pp_input_nop(struct atomisp_sub_device *asd,
+				  unsigned int width, unsigned int height)
 {
 	return 0;
 }
 
-int configure_output_nop(struct atomisp_sub_device *asd,
-			 unsigned int width, unsigned int height,
-			 unsigned int min_width,
-			 enum atomisp_css_frame_format sh_fmt)
+static int configure_output_nop(struct atomisp_sub_device *asd,
+				unsigned int width, unsigned int height,
+				unsigned int min_width,
+				enum atomisp_css_frame_format sh_fmt)
 {
 	return 0;
 }
 
-int get_frame_info_nop(struct atomisp_sub_device *asd,
-		       struct atomisp_css_frame_info *finfo)
+static int get_frame_info_nop(struct atomisp_sub_device *asd,
+			      struct atomisp_css_frame_info *finfo)
 {
 	return 0;
 }
@@ -5524,7 +5524,7 @@ static void atomisp_get_dis_envelop(struct atomisp_sub_device *asd,
 
 	/* if subdev type is SOC camera,we do not need to set DVS */
 	if (isp->inputs[asd->input_curr].type == SOC_CAMERA)
-		asd->params.video_dis_en = 0;
+		asd->params.video_dis_en = false;
 
 	if (asd->params.video_dis_en &&
 	    asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) {
@@ -5624,7 +5624,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
 			ffmt = req_ffmt;
 			dev_warn(isp->dev,
 			  "can not enable video dis due to sensor limitation.");
-			asd->params.video_dis_en = 0;
+			asd->params.video_dis_en = false;
 		}
 	}
 	dev_dbg(isp->dev, "sensor width: %d, height: %d\n",
@@ -5649,7 +5649,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
 	    (ffmt->width < req_ffmt->width || ffmt->height < req_ffmt->height)) {
 		dev_warn(isp->dev,
 			 "can not enable video dis due to sensor limitation.");
-		asd->params.video_dis_en = 0;
+		asd->params.video_dis_en = false;
 	}
 
 	atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
@@ -6152,7 +6152,7 @@ int atomisp_set_shading_table(struct atomisp_sub_device *asd,
 
 	if (!user_shading_table->enable) {
 		atomisp_css_set_shading_table(asd, NULL);
-		asd->params.sc_en = 0;
+		asd->params.sc_en = false;
 		return 0;
 	}
 
@@ -6190,7 +6190,7 @@ int atomisp_set_shading_table(struct atomisp_sub_device *asd,
 	free_table = asd->params.css_param.shading_table;
 	asd->params.css_param.shading_table = shading_table;
 	atomisp_css_set_shading_table(asd, shading_table);
-	asd->params.sc_en = 1;
+	asd->params.sc_en = true;
 
 out:
 	if (free_table != NULL)
@@ -6627,7 +6627,7 @@ int atomisp_inject_a_fake_event(struct atomisp_sub_device *asd, int *event)
 	return 0;
 }
 
-int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
+static int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
 {
 	struct atomisp_sub_device *asd = pipe->asd;
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h
index bdc73862fb79..79d493dba403 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h
@@ -389,7 +389,7 @@ int atomisp_source_pad_to_stream_id(struct atomisp_sub_device *asd,
  */
 void atomisp_eof_event(struct atomisp_sub_device *asd, uint8_t exp_id);
 
-mipi_port_ID_t __get_mipi_port(struct atomisp_device *isp,
+enum mipi_port_id __get_mipi_port(struct atomisp_device *isp,
 				enum atomisp_camera_port port);
 
 bool atomisp_is_vf_pipe(struct atomisp_video_pipe *pipe);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h
index 3ef850cd25bd..6c829d0a1e4c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h
@@ -148,10 +148,10 @@ void atomisp_css_init_struct(struct atomisp_sub_device *asd);
 int atomisp_css_irq_translate(struct atomisp_device *isp,
 			      unsigned int *infos);
 
-void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_get_irq_info(enum mipi_port_id port,
 					unsigned int *infos);
 
-void atomisp_css_rx_clear_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_clear_irq_info(enum mipi_port_id port,
 					unsigned int infos);
 
 int atomisp_css_irq_enable(struct atomisp_device *isp,
@@ -182,8 +182,6 @@ void atomisp_css_mmu_invalidate_cache(void);
 
 void atomisp_css_mmu_invalidate_tlb(void);
 
-void atomisp_css_mmu_set_page_table_base_index(unsigned long base_index);
-
 int atomisp_css_start(struct atomisp_sub_device *asd,
 		      enum atomisp_css_pipe_id pipe_id, bool in_reset);
 
@@ -255,7 +253,7 @@ void atomisp_css_isys_set_valid(struct atomisp_sub_device *asd,
 
 void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
 				 enum atomisp_input_stream_id stream_id,
-				 enum atomisp_css_stream_format format,
+				 enum atomisp_input_format format,
 				 int isys_stream);
 
 int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
@@ -264,18 +262,18 @@ int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
 
 int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format);
+				    enum atomisp_input_format input_format);
 
 void atomisp_css_isys_two_stream_cfg_update_stream1(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height);
 
 void atomisp_css_isys_two_stream_cfg_update_stream2(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height);
 
 int atomisp_css_input_set_resolution(struct atomisp_sub_device *asd,
@@ -292,7 +290,7 @@ void atomisp_css_input_set_bayer_order(struct atomisp_sub_device *asd,
 
 void atomisp_css_input_set_format(struct atomisp_sub_device *asd,
 				enum atomisp_input_stream_id stream_id,
-				enum atomisp_css_stream_format format);
+				enum atomisp_input_format format);
 
 int atomisp_css_input_set_effective_resolution(
 					struct atomisp_sub_device *asd,
@@ -334,11 +332,11 @@ void atomisp_css_enable_cvf(struct atomisp_sub_device *asd,
 							bool enable);
 
 int atomisp_css_input_configure_port(struct atomisp_sub_device *asd,
-				mipi_port_ID_t port,
+				enum mipi_port_id port,
 				unsigned int num_lanes,
 				unsigned int timeout,
 				unsigned int mipi_freq,
-				enum atomisp_css_stream_format metadata_format,
+				enum atomisp_input_format metadata_format,
 				unsigned int metadata_width,
 				unsigned int metadata_height);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
index 7621b4537147..f668c68dc33a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
@@ -88,7 +88,7 @@ unsigned int atomisp_css_debug_get_dtrace_level(void)
 	return ia_css_debug_trace_level;
 }
 
-void atomisp_css2_hw_store_8(hrt_address addr, uint8_t data)
+static void atomisp_css2_hw_store_8(hrt_address addr, uint8_t data)
 {
 	unsigned long flags;
 
@@ -126,7 +126,7 @@ static uint8_t atomisp_css2_hw_load_8(hrt_address addr)
 	return ret;
 }
 
-uint16_t atomisp_css2_hw_load_16(hrt_address addr)
+static uint16_t atomisp_css2_hw_load_16(hrt_address addr)
 {
 	unsigned long flags;
 	uint16_t ret;
@@ -136,7 +136,8 @@ uint16_t atomisp_css2_hw_load_16(hrt_address addr)
 	spin_unlock_irqrestore(&mmio_lock, flags);
 	return ret;
 }
-uint32_t atomisp_css2_hw_load_32(hrt_address addr)
+
+static uint32_t atomisp_css2_hw_load_32(hrt_address addr)
 {
 	unsigned long flags;
 	uint32_t ret;
@@ -1019,7 +1020,7 @@ int atomisp_css_irq_translate(struct atomisp_device *isp,
 	return 0;
 }
 
-void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_get_irq_info(enum mipi_port_id port,
 					unsigned int *infos)
 {
 #ifndef ISP2401_NEW_INPUT_SYSTEM
@@ -1029,7 +1030,7 @@ void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
 #endif
 }
 
-void atomisp_css_rx_clear_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_clear_irq_info(enum mipi_port_id port,
 					unsigned int infos)
 {
 #ifndef ISP2401_NEW_INPUT_SYSTEM
@@ -1159,31 +1160,6 @@ void atomisp_css_mmu_invalidate_tlb(void)
 	ia_css_mmu_invalidate_cache();
 }
 
-void atomisp_css_mmu_set_page_table_base_index(unsigned long base_index)
-{
-}
-
-/*
- * Check whether currently running MIPI buffer size fulfill
- * the requirement of the stream to be run
- */
-bool __need_realloc_mipi_buffer(struct atomisp_device *isp)
-{
-	unsigned int i;
-
-	for (i = 0; i < isp->num_of_streams; i++) {
-		struct atomisp_sub_device *asd = &isp->asd[i];
-
-		if (asd->streaming !=
-				ATOMISP_DEVICE_STREAMING_ENABLED)
-			continue;
-		if (asd->mipi_frame_size < isp->mipi_frame_size)
-			return true;
-	}
-
-	return false;
-}
-
 int atomisp_css_start(struct atomisp_sub_device *asd,
 			enum atomisp_css_pipe_id pipe_id, bool in_reset)
 {
@@ -1808,7 +1784,7 @@ void atomisp_css_isys_set_valid(struct atomisp_sub_device *asd,
 
 void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
 				 enum atomisp_input_stream_id stream_id,
-				 enum atomisp_css_stream_format format,
+				 enum atomisp_input_format format,
 				 int isys_stream)
 {
 
@@ -1820,7 +1796,7 @@ void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
 
 void atomisp_css_input_set_format(struct atomisp_sub_device *asd,
 					enum atomisp_input_stream_id stream_id,
-					enum atomisp_css_stream_format format)
+					enum atomisp_input_format format)
 {
 
 	struct ia_css_stream_config *s_config =
@@ -1859,7 +1835,7 @@ int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
 
 int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format)
+				    enum atomisp_input_format input_format)
 {
 	struct ia_css_stream_config *s_config =
 		&asd->stream_env[stream_id].stream_config;
@@ -1873,9 +1849,9 @@ int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].linked_isys_stream_id
 		= IA_CSS_STREAM_ISYS_STREAM_0;
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_0].format =
-		IA_CSS_STREAM_FORMAT_USER_DEF1;
+		ATOMISP_INPUT_FORMAT_USER_DEF1;
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].format =
-		IA_CSS_STREAM_FORMAT_USER_DEF2;
+		ATOMISP_INPUT_FORMAT_USER_DEF2;
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].valid = true;
 	return 0;
 }
@@ -1883,7 +1859,7 @@ int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 void atomisp_css_isys_two_stream_cfg_update_stream1(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height)
 {
 	struct ia_css_stream_config *s_config =
@@ -1901,7 +1877,7 @@ void atomisp_css_isys_two_stream_cfg_update_stream1(
 void atomisp_css_isys_two_stream_cfg_update_stream2(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height)
 {
 	struct ia_css_stream_config *s_config =
@@ -2142,11 +2118,11 @@ void atomisp_css_enable_cvf(struct atomisp_sub_device *asd,
 
 int atomisp_css_input_configure_port(
 		struct atomisp_sub_device *asd,
-		mipi_port_ID_t port,
+		enum mipi_port_id port,
 		unsigned int num_lanes,
 		unsigned int timeout,
 		unsigned int mipi_freq,
-		enum atomisp_css_stream_format metadata_format,
+		enum atomisp_input_format metadata_format,
 		unsigned int metadata_width,
 		unsigned int metadata_height)
 {
@@ -2890,8 +2866,8 @@ stream_err:
 	return -EINVAL;
 }
 
-unsigned int atomisp_get_pipe_index(struct atomisp_sub_device *asd,
-					uint16_t source_pad)
+static unsigned int atomisp_get_pipe_index(struct atomisp_sub_device *asd,
+					   uint16_t source_pad)
 {
 	struct atomisp_device *isp = asd->isp;
 	/*
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h
index b03711668eda..a06c5b6e8027 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h
@@ -37,7 +37,6 @@
 #define atomisp_css_irq_info  ia_css_irq_info
 #define atomisp_css_isp_config ia_css_isp_config
 #define atomisp_css_bayer_order ia_css_bayer_order
-#define atomisp_css_stream_format ia_css_stream_format
 #define atomisp_css_capture_mode ia_css_capture_mode
 #define atomisp_css_input_mode ia_css_input_mode
 #define atomisp_css_frame ia_css_frame
@@ -117,7 +116,7 @@
  */
 #define CSS_ID(val)	(IA_ ## val)
 #define CSS_EVENT(val)	(IA_CSS_EVENT_TYPE_ ## val)
-#define CSS_FORMAT(val)	(IA_CSS_STREAM_FORMAT_ ## val)
+#define CSS_FORMAT(val)	(ATOMISP_INPUT_FORMAT_ ## val)
 
 #define CSS_EVENT_PORT_EOF	CSS_EVENT(PORT_EOF)
 #define CSS_EVENT_FRAME_TAGGED	CSS_EVENT(FRAME_TAGGED)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c
index ceedb82b6beb..a815c768bda9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c
@@ -22,6 +22,7 @@
 #include "atomisp_compat.h"
 #include "atomisp_internal.h"
 #include "atomisp_ioctl.h"
+#include "atomisp_drvfs.h"
 #include "hmm/hmm.h"
 
 /*
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
index 545ef024841d..709137f25700 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
@@ -689,7 +689,7 @@ static void atomisp_dev_init_struct(struct atomisp_device *isp)
 {
 	unsigned int i;
 
-	isp->sw_contex.file_input = 0;
+	isp->sw_contex.file_input = false;
 	isp->need_gfx_throttle = true;
 	isp->isp_fatal_error = false;
 	isp->mipi_frame_size = 0;
@@ -708,12 +708,12 @@ static void atomisp_subdev_init_struct(struct atomisp_sub_device *asd)
 	v4l2_ctrl_s_ctrl(asd->run_mode, ATOMISP_RUN_MODE_STILL_CAPTURE);
 	memset(&asd->params.css_param, 0, sizeof(asd->params.css_param));
 	asd->params.color_effect = V4L2_COLORFX_NONE;
-	asd->params.bad_pixel_en = 1;
-	asd->params.gdc_cac_en = 0;
-	asd->params.video_dis_en = 0;
-	asd->params.sc_en = 0;
-	asd->params.fpn_en = 0;
-	asd->params.xnr_en = 0;
+	asd->params.bad_pixel_en = true;
+	asd->params.gdc_cac_en = false;
+	asd->params.video_dis_en = false;
+	asd->params.sc_en = false;
+	asd->params.fpn_en = false;
+	asd->params.xnr_en = false;
 	asd->params.false_color = 0;
 	asd->params.online_process = 1;
 	asd->params.yuv_ds_en = 0;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c
index 5c84dd63778e..61bd550dafb9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c
@@ -1607,10 +1607,12 @@ int atomisp_stream_on_master_slave_sensor(struct atomisp_device *isp,
 
 /* FIXME! */
 #ifndef ISP2401
-void __wdt_on_master_slave_sensor(struct atomisp_device *isp, unsigned int wdt_duration)
+static void __wdt_on_master_slave_sensor(struct atomisp_device *isp,
+					 unsigned int wdt_duration)
 #else
-void __wdt_on_master_slave_sensor(struct atomisp_video_pipe *pipe,
-				unsigned int wdt_duration, bool enable)
+static void __wdt_on_master_slave_sensor(struct atomisp_video_pipe *pipe,
+					 unsigned int wdt_duration,
+					 bool enable)
 #endif
 {
 #ifndef ISP2401
@@ -2731,7 +2733,7 @@ static int atomisp_s_parm_file(struct file *file, void *fh,
 	}
 
 	rt_mutex_lock(&isp->mutex);
-	isp->sw_contex.file_input = 1;
+	isp->sw_contex.file_input = true;
 	rt_mutex_unlock(&isp->mutex);
 
 	return 0;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c
index b78276ac22da..49a9973b4289 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c
@@ -42,17 +42,17 @@ const struct atomisp_in_fmt_conv atomisp_in_fmt_conv[] = {
 	{ MEDIA_BUS_FMT_SGBRG12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_GBRG, CSS_FORMAT_RAW_12 },
 	{ MEDIA_BUS_FMT_SGRBG12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_GRBG, CSS_FORMAT_RAW_12 },
 	{ MEDIA_BUS_FMT_SRGGB12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_RGGB, CSS_FORMAT_RAW_12 },
-	{ MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, IA_CSS_STREAM_FORMAT_YUV422_8 },
-	{ MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, IA_CSS_STREAM_FORMAT_YUV422_8 },
-	{ MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, IA_CSS_STREAM_FORMAT_BINARY_8 },
+	{ MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, ATOMISP_INPUT_FORMAT_YUV422_8 },
+	{ MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, ATOMISP_INPUT_FORMAT_YUV422_8 },
+	{ MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, ATOMISP_INPUT_FORMAT_BINARY_8 },
 	{ V4L2_MBUS_FMT_CUSTOM_NV12, 12, 12, CSS_FRAME_FORMAT_NV12, 0, CSS_FRAME_FORMAT_NV12 },
 	{ V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, CSS_FRAME_FORMAT_NV21 },
-	{ V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY },
+	{ V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY },
 #if 0
-	{ V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, IA_CSS_STREAM_FORMAT_BINARY_8 },
+	{ V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, ATOMISP_INPUT_FORMAT_BINARY_8 },
 #endif
 	/* no valid V4L2 MBUS code for metadata format, so leave it 0. */
-	{ 0, 0, 0, ATOMISP_INPUT_FORMAT_EMBEDDED, 0, IA_CSS_STREAM_FORMAT_EMBEDDED },
+	{ 0, 0, 0, ATOMISP_INPUT_FORMAT_EMBEDDED, 0, ATOMISP_INPUT_FORMAT_EMBEDDED },
 	{}
 };
 
@@ -101,7 +101,7 @@ const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv(u32 code)
 }
 
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(
-	enum atomisp_css_stream_format atomisp_in_fmt)
+	enum atomisp_input_format atomisp_in_fmt)
 {
 	int i;
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
index c3eba675da06..59ff8723c182 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
@@ -58,9 +58,9 @@ struct atomisp_in_fmt_conv {
 	u32     code;
 	uint8_t bpp; /* bits per pixel */
 	uint8_t depth; /* uncompressed */
-	enum atomisp_css_stream_format atomisp_in_fmt;
+	enum atomisp_input_format atomisp_in_fmt;
 	enum atomisp_css_bayer_order bayer_order;
-	enum ia_css_stream_format css_stream_fmt;
+	enum atomisp_input_format css_stream_fmt;
 };
 
 struct atomisp_sub_device;
@@ -424,10 +424,10 @@ bool atomisp_subdev_is_compressed(u32 code);
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv(u32 code);
 #ifndef ISP2401
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(
-	enum atomisp_css_stream_format atomisp_in_fmt);
+	enum atomisp_input_format atomisp_in_fmt);
 #else
 const struct atomisp_in_fmt_conv
-    *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(enum atomisp_css_stream_format
+    *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(enum atomisp_input_format
 						atomisp_in_fmt);
 #endif
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_compressed(u32 code);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
index a8c27676a38b..5ab48f346790 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
@@ -116,7 +116,7 @@ extern bool ia_css_util_resolution_is_even(
  *
  */
 extern unsigned int ia_css_util_input_format_bpp(
-	enum ia_css_stream_format stream_format,
+	enum atomisp_input_format stream_format,
 	bool two_ppc);
 
 /* @brief check if input format it raw
@@ -126,7 +126,7 @@ extern unsigned int ia_css_util_input_format_bpp(
  *
  */
 extern bool ia_css_util_is_input_format_raw(
-	enum ia_css_stream_format stream_format);
+	enum atomisp_input_format stream_format);
 
 /* @brief check if input format it yuv
  *
@@ -135,7 +135,7 @@ extern bool ia_css_util_is_input_format_raw(
  *
  */
 extern bool ia_css_util_is_input_format_yuv(
-	enum ia_css_stream_format stream_format);
+	enum atomisp_input_format stream_format);
 
 #endif /* __IA_CSS_UTIL_H__ */
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c
index 54193789a809..91e586112332 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c
@@ -52,55 +52,55 @@ enum ia_css_err ia_css_convert_errno(
 
 /* MW: Table look-up ??? */
 unsigned int ia_css_util_input_format_bpp(
-	enum ia_css_stream_format format,
+	enum atomisp_input_format format,
 	bool two_ppc)
 {
 	unsigned int rval = 0;
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 		rval = 8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		rval = 10;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		rval = 16;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		rval = 4;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		rval = 5;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		rval = 65;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		rval = 6;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		rval = 7;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		rval = 12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		if (two_ppc)
 			rval = 14;
 		else
 			rval = 12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		if (two_ppc)
 			rval = 16;
 		else
@@ -175,28 +175,28 @@ bool ia_css_util_resolution_is_even(const struct ia_css_resolution resolution)
 }
 
 #endif
-bool ia_css_util_is_input_format_raw(enum ia_css_stream_format format)
+bool ia_css_util_is_input_format_raw(enum atomisp_input_format format)
 {
-	return ((format == IA_CSS_STREAM_FORMAT_RAW_6) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_7) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_8) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_10) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_12));
+	return ((format == ATOMISP_INPUT_FORMAT_RAW_6) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_7) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_8) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_10) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_12));
 	/* raw_14 and raw_16 are not supported as input formats to the ISP.
 	 * They can only be copied to a frame in memory using the
 	 * copy binary.
 	 */
 }
 
-bool ia_css_util_is_input_format_yuv(enum ia_css_stream_format format)
+bool ia_css_util_is_input_format_yuv(enum atomisp_input_format format)
 {
-	return format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY ||
-	    format == IA_CSS_STREAM_FORMAT_YUV420_8  ||
-	    format == IA_CSS_STREAM_FORMAT_YUV420_10 ||
-	    format == IA_CSS_STREAM_FORMAT_YUV420_16 ||
-	    format == IA_CSS_STREAM_FORMAT_YUV422_8  ||
-	    format == IA_CSS_STREAM_FORMAT_YUV422_10 ||
-	    format == IA_CSS_STREAM_FORMAT_YUV422_16;
+	return format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY ||
+	    format == ATOMISP_INPUT_FORMAT_YUV420_8  ||
+	    format == ATOMISP_INPUT_FORMAT_YUV420_10 ||
+	    format == ATOMISP_INPUT_FORMAT_YUV420_16 ||
+	    format == ATOMISP_INPUT_FORMAT_YUV422_8  ||
+	    format == ATOMISP_INPUT_FORMAT_YUV422_10 ||
+	    format == ATOMISP_INPUT_FORMAT_YUV422_16;
 }
 
 enum ia_css_err ia_css_util_check_input(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
index d2e3a2deea2e..7907f0ff6d6c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
@@ -284,12 +284,12 @@ typedef enum {
 	N_RX_ID
 } rx_ID_t;
 
-typedef enum {
+enum mipi_port_id {
 	MIPI_PORT0_ID = 0,
 	MIPI_PORT1_ID,
 	MIPI_PORT2_ID,
 	N_MIPI_PORT_ID
-} mipi_port_ID_t;
+};
 
 #define	N_RX_CHANNEL_ID		4
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c
index c412810887b3..dcb9a3127cfe 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c
@@ -29,7 +29,7 @@
 hrt_address	debug_buffer_address = (hrt_address)-1;
 hrt_vaddress	debug_buffer_ddr_address = (hrt_vaddress)-1;
 /* The local copy */
-debug_data_t		debug_data;
+static debug_data_t		debug_data;
 debug_data_t		*debug_data_ptr = &debug_data;
 
 void debug_buffer_init(const hrt_address addr)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
index bcfd443f5202..b6b1344786b1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
@@ -29,7 +29,7 @@ gp_timer_reg_load(uint32_t reg);
 static void
 gp_timer_reg_store(uint32_t reg, uint32_t value);
 
-uint32_t
+static uint32_t
 gp_timer_reg_load(uint32_t reg)
 {
 	return ia_css_device_load_uint32(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c
index a8997e45738e..0e1ca995fb06 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c
@@ -45,8 +45,9 @@ const uint8_t HIVE_IF_SWITCH_CODE[N_INPUT_FORMATTER_ID] = {
 	HIVE_INPUT_SWITCH_SELECT_STR_TO_MEM};
 
 /* MW Should be part of system_global.h, where we have the main enumeration */
-const bool HIVE_IF_BIN_COPY[N_INPUT_FORMATTER_ID] = {
-	false, false, false, true};
+static const bool HIVE_IF_BIN_COPY[N_INPUT_FORMATTER_ID] = {
+	false, false, false, true
+};
 
 void input_formatter_rst(
 	const input_formatter_ID_t		ID)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c
index bd6821e436b2..2515e162828f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c
@@ -29,7 +29,7 @@
 #define ZERO (0x0)
 #define ONE  (1U)
 
-const ib_buffer_t   IB_BUFFER_NULL = {0 ,0, 0 };
+static const ib_buffer_t   IB_BUFFER_NULL = {0 ,0, 0 };
 
 static input_system_error_t input_system_configure_channel(
 	const channel_cfg_t		channel);
@@ -98,7 +98,7 @@ static inline void ctrl_unit_get_state(
 
 static inline void mipi_port_get_state(
 	const rx_ID_t					ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	mipi_port_state_t				*state);
 
 static inline void rx_channel_get_state(
@@ -180,7 +180,7 @@ void receiver_get_state(
 	const rx_ID_t				ID,
 	receiver_state_t			*state)
 {
-	mipi_port_ID_t	port_id;
+	enum mipi_port_id	port_id;
 	unsigned int	ch_id;
 
 	assert(ID < N_RX_ID);
@@ -209,7 +209,7 @@ void receiver_get_state(
 	state->raw16 = (uint16_t)receiver_reg_load(ID,
 		_HRT_CSS_RECEIVER_RAW16_REG_IDX);
 
-	for (port_id = (mipi_port_ID_t)0; port_id < N_MIPI_PORT_ID; port_id++) {
+	for (port_id = (enum mipi_port_id)0; port_id < N_MIPI_PORT_ID; port_id++) {
 		mipi_port_get_state(ID, port_id,
 			&(state->mipi_port_state[port_id]));
 	}
@@ -305,7 +305,7 @@ void receiver_set_compression(
 
 void receiver_port_enable(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const bool			cnd)
 {
 	hrt_data	reg = receiver_port_reg_load(ID, port_ID,
@@ -324,7 +324,7 @@ void receiver_port_enable(
 
 bool is_receiver_port_enabled(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID)
+	const enum mipi_port_id		port_ID)
 {
 	hrt_data	reg = receiver_port_reg_load(ID, port_ID,
 		_HRT_CSS_RECEIVER_DEVICE_READY_REG_IDX);
@@ -333,7 +333,7 @@ bool is_receiver_port_enabled(
 
 void receiver_irq_enable(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const rx_irq_info_t		irq_info)
 {
 	receiver_port_reg_store(ID,
@@ -343,7 +343,7 @@ void receiver_irq_enable(
 
 rx_irq_info_t receiver_get_irq_info(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID)
+	const enum mipi_port_id		port_ID)
 {
 	return receiver_port_reg_load(ID,
 	port_ID, _HRT_CSS_RECEIVER_IRQ_STATUS_REG_IDX);
@@ -351,7 +351,7 @@ rx_irq_info_t receiver_get_irq_info(
 
 void receiver_irq_clear(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const rx_irq_info_t		irq_info)
 {
 	receiver_port_reg_store(ID,
@@ -556,7 +556,7 @@ static inline void ctrl_unit_get_state(
 
 static inline void mipi_port_get_state(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	mipi_port_state_t			*state)
 {
 	int	i;
@@ -644,12 +644,12 @@ static inline void rx_channel_get_state(
 }
 
 // MW: "2400" in the name is not good, but this is to avoid a naming conflict
-input_system_cfg2400_t config;
+static input_system_cfg2400_t config;
 
 static void receiver_rst(
 	const rx_ID_t				ID)
 {
-	mipi_port_ID_t		port_id;
+	enum mipi_port_id		port_id;
 
 	assert(ID < N_RX_ID);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h
index 3e8bd00082dc..bf9230fd08f2 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h
@@ -353,7 +353,7 @@ typedef struct rx_cfg_s		rx_cfg_t;
  */
 struct rx_cfg_s {
 	rx_mode_t			mode;	/* The HW config */
-	mipi_port_ID_t		port;	/* The port ID to apply the control on */
+	enum mipi_port_id		port;	/* The port ID to apply the control on */
 	unsigned int		timeout;
 	unsigned int		initcount;
 	unsigned int		synccount;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h
index 118185eb86e9..48876bb08b70 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h
@@ -63,7 +63,7 @@ STORAGE_CLASS_INPUT_SYSTEM_C hrt_data receiver_reg_load(
 
 STORAGE_CLASS_INPUT_SYSTEM_C void receiver_port_reg_store(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const hrt_address			reg,
 	const hrt_data				value)
 {
@@ -77,7 +77,7 @@ STORAGE_CLASS_INPUT_SYSTEM_C void receiver_port_reg_store(
 
 STORAGE_CLASS_INPUT_SYSTEM_C hrt_data receiver_port_reg_load(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const hrt_address			reg)
 {
 	assert(ID < N_RX_ID);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h
index d803efd7400a..6f63962a54e8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h
@@ -266,12 +266,12 @@ typedef enum {
 	N_RX_ID
 } rx_ID_t;
 
-typedef enum {
+enum mipi_port_id {
 	MIPI_PORT0_ID = 0,
 	MIPI_PORT1_ID,
 	MIPI_PORT2_ID,
 	N_MIPI_PORT_ID
-} mipi_port_ID_t;
+};
 
 #define	N_RX_CHANNEL_ID		4
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h
index 1596757fe9ef..6e37ff0fe0f9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h
@@ -83,7 +83,7 @@ extern void receiver_set_compression(
  */
 extern void receiver_port_enable(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const bool					cnd);
 
 /*! Flag if PORT[port_ID] of RECEIVER[ID] is enabled
@@ -95,7 +95,7 @@ extern void receiver_port_enable(
  */
 extern bool is_receiver_port_enabled(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID);
+	const enum mipi_port_id		port_ID);
 
 /*! Enable the IRQ channels of PORT[port_ID] of RECEIVER[ID]
 
@@ -107,7 +107,7 @@ extern bool is_receiver_port_enabled(
  */
 extern void receiver_irq_enable(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const rx_irq_info_t			irq_info);
 
 /*! Return the IRQ status of PORT[port_ID] of RECEIVER[ID]
@@ -119,7 +119,7 @@ extern void receiver_irq_enable(
  */
 extern rx_irq_info_t receiver_get_irq_info(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID);
+	const enum mipi_port_id		port_ID);
 
 /*! Clear the IRQ status of PORT[port_ID] of RECEIVER[ID]
 
@@ -131,7 +131,7 @@ extern rx_irq_info_t receiver_get_irq_info(
  */
 extern void receiver_irq_clear(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const rx_irq_info_t			irq_info);
 
 /*! Write to a control register of INPUT_SYSTEM[ID]
@@ -195,7 +195,7 @@ STORAGE_CLASS_INPUT_SYSTEM_H hrt_data receiver_reg_load(
  */
 STORAGE_CLASS_INPUT_SYSTEM_H void receiver_port_reg_store(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const hrt_address			reg,
 	const hrt_data				value);
 
@@ -210,7 +210,7 @@ STORAGE_CLASS_INPUT_SYSTEM_H void receiver_port_reg_store(
  */
 STORAGE_CLASS_INPUT_SYSTEM_H hrt_data receiver_port_reg_load(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const hrt_address			reg);
 
 /*! Write to a control register of SUB_SYSTEM[sub_ID] of INPUT_SYSTEM[ID]
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
index f415570a3da9..ad9ca5449369 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
@@ -12,6 +12,9 @@
  * more details.
  */
 
+/* For MIPI_PORT0_ID to MIPI_PORT2_ID */
+#include "system_global.h"
+
 #ifndef __IA_CSS_INPUT_PORT_H
 #define __IA_CSS_INPUT_PORT_H
 
@@ -19,21 +22,12 @@
  * This file contains information about the possible input ports for CSS
  */
 
-/* Enumeration of the physical input ports on the CSS hardware.
- *  There are 3 MIPI CSI-2 ports.
- */
-enum ia_css_csi2_port {
-	IA_CSS_CSI2_PORT0, /* Implicitly map to MIPI_PORT0_ID */
-	IA_CSS_CSI2_PORT1, /* Implicitly map to MIPI_PORT1_ID */
-	IA_CSS_CSI2_PORT2  /* Implicitly map to MIPI_PORT2_ID */
-};
-
 /* Backward compatible for CSS API 2.0 only
  *  TO BE REMOVED when all drivers move to CSS	API 2.1
  */
-#define	IA_CSS_CSI2_PORT_4LANE IA_CSS_CSI2_PORT0
-#define	IA_CSS_CSI2_PORT_1LANE IA_CSS_CSI2_PORT1
-#define	IA_CSS_CSI2_PORT_2LANE IA_CSS_CSI2_PORT2
+#define	IA_CSS_CSI2_PORT_4LANE MIPI_PORT0_ID
+#define	IA_CSS_CSI2_PORT_1LANE MIPI_PORT1_ID
+#define	IA_CSS_CSI2_PORT_2LANE MIPI_PORT2_ID
 
 /* The CSI2 interface supports 2 types of compression or can
  *  be run without compression.
@@ -56,7 +50,7 @@ struct ia_css_csi2_compression {
 /* Input port structure.
  */
 struct ia_css_input_port {
-	enum ia_css_csi2_port port; /** Physical CSI-2 port */
+	enum mipi_port_id port; /** Physical CSI-2 port */
 	unsigned int num_lanes; /** Number of lanes used (4-lane port only) */
 	unsigned int timeout;   /** Timeout value */
 	unsigned int rxcount;   /** Register value, should include all lanes */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
index 10ef61178bb2..c8840138899a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
@@ -186,7 +186,7 @@ ia_css_rx_get_irq_info(unsigned int *irq_bits);
  * that occurred.
  */
 void
-ia_css_rx_port_get_irq_info(enum ia_css_csi2_port port, unsigned int *irq_bits);
+ia_css_rx_port_get_irq_info(enum mipi_port_id port, unsigned int *irq_bits);
 
 /* @brief Clear CSI receiver error info.
  *
@@ -218,7 +218,7 @@ ia_css_rx_clear_irq_info(unsigned int irq_bits);
  * error bits get overwritten.
  */
 void
-ia_css_rx_port_clear_irq_info(enum ia_css_csi2_port port, unsigned int irq_bits);
+ia_css_rx_port_clear_irq_info(enum mipi_port_id port, unsigned int irq_bits);
 
 /* @brief Enable or disable specific interrupts.
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
index 8b674c98224c..ed0b6ab371da 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
@@ -27,8 +27,8 @@
  *  to process sensor metadata.
  */
 struct ia_css_metadata_config {
-	enum ia_css_stream_format data_type; /** Data type of CSI-2 embedded
-			data. The default value is IA_CSS_STREAM_FORMAT_EMBEDDED. For
+	enum atomisp_input_format data_type; /** Data type of CSI-2 embedded
+			data. The default value is ATOMISP_INPUT_FORMAT_EMBEDDED. For
 			certain sensors, user can choose non-default data type for embedded
 			data. */
 	struct ia_css_resolution  resolution; /** Resolution */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
index f9c9cd76be97..367b2aafa5e8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
@@ -55,7 +55,7 @@ ia_css_mipi_frame_specify(const unsigned int	size_mem_words,
  *
  */
 enum ia_css_err
-ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
+ia_css_mipi_frame_enable_check_on_size(const enum mipi_port_id port,
 				const unsigned int	size_mem_words);
 #endif
 
@@ -74,7 +74,7 @@ ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
 enum ia_css_err
 ia_css_mipi_frame_calculate_size(const unsigned int width,
 				const unsigned int height,
-				const enum ia_css_stream_format format,
+				const enum atomisp_input_format format,
 				const bool hasSOLandEOL,
 				const unsigned int embedded_data_size_words,
 				unsigned int *size_mem_words);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
index f7e9020a86e1..f97b9eb2b19c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
@@ -20,75 +20,10 @@
  */
 
 #include <type_support.h> /* bool */
-
-/* The ISP streaming input interface supports the following formats.
- *  These match the corresponding MIPI formats.
- */
-enum ia_css_stream_format {
-	IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY,    /** 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_8,  /** 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_10, /** 10 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_16, /** 16 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_8,  /** UYVY..UYVY, 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_10, /** UYVY..UYVY, 10 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_16, /** UYVY..UYVY, 16 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_444,  /** BGR..BGR, 4 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_555,  /** BGR..BGR, 5 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_565,  /** BGR..BGR, 5 bits B and R, 6 bits G */
-	IA_CSS_STREAM_FORMAT_RGB_666,  /** BGR..BGR, 6 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_888,  /** BGR..BGR, 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RAW_6,    /** RAW data, 6 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_7,    /** RAW data, 7 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_8,    /** RAW data, 8 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_10,   /** RAW data, 10 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_12,   /** RAW data, 12 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_14,   /** RAW data, 14 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_16,   /** RAW data, 16 bits per pixel, which is
-					    not specified in CSI-MIPI standard*/
-	IA_CSS_STREAM_FORMAT_BINARY_8, /** Binary byte stream, which is target at
-					    JPEG. */
-
-	/* CSI2-MIPI specific format: Generic short packet data. It is used to
-	 *  keep the timing information for the opening/closing of shutters,
-	 *  triggering of flashes and etc.
-	 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT1,  /** Generic Short Packet Code 1 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT2,  /** Generic Short Packet Code 2 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT3,  /** Generic Short Packet Code 3 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT4,  /** Generic Short Packet Code 4 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT5,  /** Generic Short Packet Code 5 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT6,  /** Generic Short Packet Code 6 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT7,  /** Generic Short Packet Code 7 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT8,  /** Generic Short Packet Code 8 */
-
-	/* CSI2-MIPI specific format: YUV data.
-	 */
-	IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT,  /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
-	IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT, /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
-
-	/* CSI2-MIPI specific format: Generic long packet data
-	 */
-	IA_CSS_STREAM_FORMAT_EMBEDDED, /** Embedded 8-bit non Image Data */
-
-	/* CSI2-MIPI specific format: User defined byte-based data. For example,
-	 *  the data transmitter (e.g. the SoC sensor) can keep the JPEG data as
-	 *  the User Defined Data Type 4 and the MPEG data as the
-	 *  User Defined Data Type 7.
-	 */
-	IA_CSS_STREAM_FORMAT_USER_DEF1,  /** User defined 8-bit data type 1 */
-	IA_CSS_STREAM_FORMAT_USER_DEF2,  /** User defined 8-bit data type 2 */
-	IA_CSS_STREAM_FORMAT_USER_DEF3,  /** User defined 8-bit data type 3 */
-	IA_CSS_STREAM_FORMAT_USER_DEF4,  /** User defined 8-bit data type 4 */
-	IA_CSS_STREAM_FORMAT_USER_DEF5,  /** User defined 8-bit data type 5 */
-	IA_CSS_STREAM_FORMAT_USER_DEF6,  /** User defined 8-bit data type 6 */
-	IA_CSS_STREAM_FORMAT_USER_DEF7,  /** User defined 8-bit data type 7 */
-	IA_CSS_STREAM_FORMAT_USER_DEF8,  /** User defined 8-bit data type 8 */
-};
-
-#define	IA_CSS_STREAM_FORMAT_NUM	IA_CSS_STREAM_FORMAT_USER_DEF8
+#include "../../../include/linux/atomisp_platform.h"
 
 unsigned int ia_css_util_input_format_bpp(
-	enum ia_css_stream_format format,
+	enum atomisp_input_format format,
 	bool two_ppc);
 
-#endif /* __IA_CSS_STREAM_FORMAT_H */
+#endif /* __ATOMISP_INPUT_FORMAT_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
index ca3203357ff5..ddefad330db7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
@@ -62,7 +62,7 @@ enum {
  */
 struct ia_css_stream_isys_stream_config {
 	struct ia_css_resolution  input_res; /** Resolution of input data */
-	enum ia_css_stream_format format; /** Format of input stream. This data
+	enum atomisp_input_format format; /** Format of input stream. This data
 					       format will be mapped to MIPI data
 					       type internally. */
 	int linked_isys_stream_id; /** default value is -1, other value means
@@ -77,7 +77,7 @@ struct ia_css_stream_input_config {
 							Used for CSS 2400/1 System and deprecated for other
 							systems (replaced by input_effective_res in
 							ia_css_pipe_config) */
-	enum ia_css_stream_format format; /** Format of input stream. This data
+	enum atomisp_input_format format; /** Format of input stream. This data
 					       format will be mapped to MIPI data
 					       type internally. */
 	enum ia_css_bayer_order bayer_order; /** Bayer order for RAW streams */
@@ -257,7 +257,7 @@ ia_css_stream_unload(struct ia_css_stream *stream);
  *
  * This function will return the stream format.
  */
-enum ia_css_stream_format
+enum atomisp_input_format
 ia_css_stream_get_format(const struct ia_css_stream *stream);
 
 /* @brief Check if the stream is configured for 2 pixels per clock
@@ -453,7 +453,7 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
  */
 void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
-			      enum ia_css_stream_format format,
+			      enum atomisp_input_format format,
 			      const unsigned short *data,
 			      unsigned int width);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h
index b99c0644ab38..675f6e539b3f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_bnlm_types.h"
 #include "ia_css_bnlm_param.h"
-#include "ia_css_bnlm_default.host.h"
 
 void
 ia_css_bnlm_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c
deleted file mode 100644
index e2eb88c0f123..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_bnlm_types.h"
-
-const struct ia_css_bnlm_config default_bnlm_config = {
-
-	.rad_enable = true,
-	.rad_x_origin = 0,
-	.rad_y_origin = 0,
-	.avg_min_th = 127,
-	.max_min_th = 2047,
-
-	.exp_coeff_a = 6048,
-	.exp_coeff_b = 7828,
-	.exp_coeff_c = 0,
-	.exp_exponent = 3,
-
-	.nl_th = {2252, 2251, 2250},
-	.match_quality_max_idx = {2, 3, 3, 1},
-
-	.mu_root_lut_thr = {
-		26, 56, 128, 216, 462, 626, 932, 1108, 1480, 1564, 1824, 1896, 2368, 3428, 4560},
-	.mu_root_lut_val = {
-		384, 320, 320, 264, 248, 240, 224, 192, 192, 160, 160, 160, 136, 130, 96, 80},
-	.sad_norm_lut_thr = {
-		236, 328, 470, 774, 964, 1486, 2294, 3244, 4844, 6524, 6524, 6524, 6524, 6524, 6524},
-	.sad_norm_lut_val = {
-		8064, 7680, 7168, 6144, 5120, 3840, 2560, 2304, 1984, 1792, 1792, 1792, 1792, 1792, 1792, 1792},
-	.sig_detail_lut_thr = {
-		2936, 3354, 3943, 4896, 5230, 5682, 5996, 7299, 7299, 7299, 7299, 7299, 7299, 7299, 7299},
-	.sig_detail_lut_val = {
-		8191, 7680, 7168, 6144, 5120, 4608, 4224, 4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032},
-	.sig_rad_lut_thr = {
-		18, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20},
-	.sig_rad_lut_val = {
-		2560, 7168, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188},
-	.rad_pow_lut_thr = {
-		0, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013},
-	.rad_pow_lut_val = {
-		8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-	.nl_0_lut_thr = {
-		1072, 7000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000},
-	.nl_0_lut_val = {
-		2560, 3072, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120},
-	.nl_1_lut_thr = {
-		624, 3224, 3392, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424},
-	.nl_1_lut_val = {
-		3584, 4608, 5120, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144},
-	.nl_2_lut_thr = {
-		745, 2896, 3720, 6535, 7696, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040},
-	.nl_2_lut_val = {
-		3584, 4608, 6144, 7168, 7936, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-	.nl_3_lut_thr = {
-		4848, 4984, 5872, 6000, 6517, 6960, 7944, 8088, 8161, 8161, 8161, 8161, 8161, 8161, 8161},
-	.nl_3_lut_val = {
-		3072, 4104, 4608, 5120, 6144, 7168, 7680, 8128, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h
deleted file mode 100644
index f18c8070abba..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_BNLM_DEFAULT_HOST_H
-#define __IA_CSS_BNLM_DEFAULT_HOST_H
-
-#include "ia_css_bnlm_types.h"
-extern const struct ia_css_bnlm_config default_bnlm_config;
-
-#endif /* __IA_CSS_BNLM_DEFAULT_HOST_H */
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h
index 641564b4af8e..38d10a5237c6 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_dpc2_types.h"
 #include "ia_css_dpc2_param.h"
-#include "ia_css_dpc2_default.host.h"
 
 void
 ia_css_dpc2_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c
deleted file mode 100644
index c102601cc635..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_dpc2_types.h"
-
-const struct ia_css_dpc2_config default_dpc2_config = {
-	.metric1 = 1638,
-	.metric2 =  128,
-	.metric3 = 1638,
-	.wb_gain_gr = 512,
-	.wb_gain_r  = 512,
-	.wb_gain_b  = 512,
-	.wb_gain_gb = 512
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h
deleted file mode 100644
index a1527ce3eddc..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_DPC2_DEFAULT_HOST_H
-#define __IA_CSS_DPC2_DEFAULT_HOST_H
-
-#include "ia_css_dpc2_types.h"
-
-extern const struct ia_css_dpc2_config default_dpc2_config;
-
-#endif /* __IA_CSS_DPC2_DEFAULT_HOST_H */
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h
index 355ff13273b0..fff932c1364e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_eed1_8_types.h"
 #include "ia_css_eed1_8_param.h"
-#include "ia_css_eed1_8_default.host.h"
 
 void
 ia_css_eed1_8_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c
deleted file mode 100644
index 3622719dafa5..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_eed1_8_types.h"
-
-/* The default values for the kernel parameters are based on
- * ISP261 CSS API public parameter list_all.xlsx from 12-09-2014
- * The parameter list is available on the ISP261 sharepoint
- */
-
-/* Default kernel parameters. */
-const struct ia_css_eed1_8_config default_eed1_8_config = {
-	.rbzp_strength = 5489,
-	.fcstrength = 6554,
-	.fcthres_0 = 0,
-	.fcthres_1 = 0,
-	.fc_sat_coef = 8191,
-	.fc_coring_prm = 128,
-	.aerel_thres0 = 0,
-	.aerel_gain0 = 8191,
-	.aerel_thres1 = 16,
-	.aerel_gain1 = 20,
-	.derel_thres0 = 1229,
-	.derel_gain0 = 1,
-	.derel_thres1 = 819,
-	.derel_gain1 = 1,
-	.coring_pos0 = 0,
-	.coring_pos1 = 0,
-	.coring_neg0 = 0,
-	.coring_neg1 = 0,
-	.gain_exp = 2,
-	.gain_pos0 = 6144,
-	.gain_pos1 = 2048,
-	.gain_neg0 = 2048,
-	.gain_neg1 = 6144,
-	.pos_margin0 = 1475,
-	.pos_margin1 = 1475,
-	.neg_margin0 = 1475,
-	.neg_margin1 = 1475,
-	.dew_enhance_seg_x = {
-		0,
-		64,
-		272,
-		688,
-		1376,
-		2400,
-		3840,
-		5744,
-		8191
-		},
-	.dew_enhance_seg_y = {
-		0,
-		144,
-		480,
-		1040,
-		1852,
-		2945,
-		4357,
-		6094,
-		8191
-		},
-	.dew_enhance_seg_slope = {
-		4608,
-		3308,
-		2757,
-		2417,
-		2186,
-		8033,
-		7473,
-		7020
-		},
-	.dew_enhance_seg_exp = {
-		2,
-		2,
-		2,
-		2,
-		2,
-		0,
-		0,
-		0
-		},
-	.dedgew_max = 6144
-};
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h
deleted file mode 100644
index 782f739ca8b5..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_EED1_8_DEFAULT_HOST_H
-#define __IA_CSS_EED1_8_DEFAULT_HOST_H
-
-#include "ia_css_eed1_8_types.h"
-
-extern const struct ia_css_eed1_8_config default_eed1_8_config;
-
-#endif /* __IA_CSS_EED1_8_DEFAULT_HOST_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c
index 8fdf47c9310c..9efe5e5e4e06 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c
@@ -60,7 +60,7 @@ ia_css_output_config(
 	(void)size;
 	ia_css_dma_configure_from_info(&to->port_b, from->info);
 	to->width_a_over_b = elems_a / to->port_b.elems;
-	to->height = from->info->res.height;
+	to->height = from->info ? from->info->res.height : 0;
 	to->enable = from->info != NULL;
 	ia_css_frame_info_to_frame_sp_info(&to->info, from->info);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c
index 68a27f0cfba0..fa9ce0fedf23 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c
@@ -37,34 +37,34 @@ sh_css_elems_bytes_from_info (unsigned raw_bit_depth)
 
 /* MW: These areMIPI / ISYS properties, not camera function properties */
 static enum sh_stream_format
-css2isp_stream_format(enum ia_css_stream_format from)
+css2isp_stream_format(enum atomisp_input_format from)
 {
 	switch (from) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		return sh_stream_format_yuv420_legacy;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		return sh_stream_format_yuv420;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		return sh_stream_format_yuv422;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		return sh_stream_format_rgb;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		return sh_stream_format_raw;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 	default:
 		return sh_stream_format_raw;
 	}
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
index 5c0b8febd79a..ae868eb5e10f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
@@ -28,7 +28,7 @@ struct ia_css_raw_configuration {
 	const struct ia_css_frame_info  *in_info;
 	const struct ia_css_frame_info  *internal_info;
 	bool two_ppc;
-	enum ia_css_stream_format stream_format;
+	enum atomisp_input_format stream_format;
 	bool deinterleaved;
 	uint8_t enable_left_padding;
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c
index e775af51c0c0..78a113bfe8f1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c
@@ -15,7 +15,7 @@
 #include "ia_css_debug.h"
 #include "ia_css_tdf.host.h"
 
-const int16_t g_pyramid[8][8] = {
+static const int16_t g_pyramid[8][8] = {
 {128, 384, 640, 896, 896, 640, 384, 128},
 {384, 1152, 1920, 2688, 2688, 1920, 1152, 384},
 {640, 1920, 3200, 4480, 4480, 3200, 1920, 640},
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h
index 1b3e759e41a3..bd628a18e839 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_tdf_types.h"
 #include "ia_css_tdf_param.h"
-#include "ia_css_tdf_default.host.h"
 
 void
 ia_css_tdf_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c
deleted file mode 100644
index 9bb42daf070d..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_tdf_types.h"
-
-const struct ia_css_tdf_config default_tdf_config = {
-	.thres_flat_table = {0},
-	.thres_detail_table = {0},
-	.epsilon_0 = 4095,
-	.epsilon_1 = 5733,
-	.eps_scale_text = 409,
-	.eps_scale_edge = 3686,
-	.sepa_flat = 1294,
-	.sepa_edge = 4095,
-	.blend_flat = 819,
-	.blend_text = 819,
-	.blend_edge = 8191,
-	.shading_gain = 1024,
-	.shading_base_gain = 8191,
-	.local_y_gain = 0,
-	.local_y_base_gain = 2047,
-	.rad_x_origin = 0,
-	.rad_y_origin = 0
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h
deleted file mode 100644
index cd8fb70e5a87..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_TDF_DEFAULT_HOST_H
-#define __IA_CSS_TDF_DEFAULT_HOST_H
-
-#include "ia_css_tdf_types.h"
-
-extern const struct ia_css_tdf_config default_tdf_config;
-
-#endif /* __IA_CSS_TDF_DEFAULT_HOST_H */
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c
index 5610833ed595..c2076e412410 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c
@@ -130,11 +130,11 @@ ia_css_vf_configure(
 
 	err = configure_kernel(info, out_info, vf_info, downscale_log2, &config);
 	configure_dma(&config, vf_info);
-	if (binary) {
-		if (vf_info)
-			vf_info->raw_bit_depth = info->dma.vfdec_bits_per_pixel;
-		ia_css_configure_vf (binary, &config);
-	}
+
+	if (vf_info)
+		vf_info->raw_bit_depth = info->dma.vfdec_bits_per_pixel;
+	ia_css_configure_vf (binary, &config);
+
 	return IA_CSS_SUCCESS;
 }
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
index 732e49a241eb..b62c4d321a4e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
@@ -113,7 +113,7 @@ struct ia_css_binary_descr {
 #endif
 	bool enable_capture_pp_bli;
 	struct ia_css_resolution dvs_env;
-	enum ia_css_stream_format stream_format;
+	enum atomisp_input_format stream_format;
 	struct ia_css_frame_info *in_info;		/* the info of the input-frame with the
 							   ISP required resolution. */
 	struct ia_css_frame_info *bds_out_info;
@@ -126,7 +126,7 @@ struct ia_css_binary_descr {
 
 struct ia_css_binary {
 	const struct ia_css_binary_xinfo *info;
-	enum ia_css_stream_format input_format;
+	enum atomisp_input_format input_format;
 	struct ia_css_frame_info in_frame_info;
 	struct ia_css_frame_info internal_frame_info;
 	struct ia_css_frame_info out_frame_info[IA_CSS_BINARY_MAX_OUTPUT_PORTS];
@@ -162,7 +162,7 @@ struct ia_css_binary {
 
 #define IA_CSS_BINARY_DEFAULT_SETTINGS \
 (struct ia_css_binary) { \
-	.input_format		= IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY, \
+	.input_format		= ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, \
 	.in_frame_info		= IA_CSS_BINARY_DEFAULT_FRAME_INFO, \
 	.internal_frame_info	= IA_CSS_BINARY_DEFAULT_FRAME_INFO, \
 	.out_frame_info		= {IA_CSS_BINARY_DEFAULT_FRAME_INFO}, \
@@ -179,7 +179,7 @@ enum ia_css_err
 ia_css_binary_fill_info(const struct ia_css_binary_xinfo *xinfo,
 		 bool online,
 		 bool two_ppc,
-		 enum ia_css_stream_format stream_format,
+		 enum atomisp_input_format stream_format,
 		 const struct ia_css_frame_info *in_info,
 		 const struct ia_css_frame_info *bds_out_info,
 		 const struct ia_css_frame_info *out_info[],
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
index a0f0e9062c4c..0cd6e1da43cf 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
@@ -861,7 +861,7 @@ binary_supports_output_format(const struct ia_css_binary_xinfo *info,
 #ifdef ISP2401
 static bool
 binary_supports_input_format(const struct ia_css_binary_xinfo *info,
-			     enum ia_css_stream_format format)
+			     enum atomisp_input_format format)
 {
 
 	assert(info != NULL);
@@ -1088,7 +1088,7 @@ enum ia_css_err
 ia_css_binary_fill_info(const struct ia_css_binary_xinfo *xinfo,
 		 bool online,
 		 bool two_ppc,
-		 enum ia_css_stream_format stream_format,
+		 enum atomisp_input_format stream_format,
 		 const struct ia_css_frame_info *in_info, /* can be NULL */
 		 const struct ia_css_frame_info *bds_out_info, /* can be NULL */
 		 const struct ia_css_frame_info *out_info[], /* can be NULL */
@@ -1382,7 +1382,7 @@ ia_css_binary_find(struct ia_css_binary_descr *descr,
 	int mode;
 	bool online;
 	bool two_ppc;
-	enum ia_css_stream_format stream_format;
+	enum atomisp_input_format stream_format;
 	const struct ia_css_frame_info *req_in_info,
 				       *req_bds_out_info,
 				       *req_out_info[IA_CSS_BINARY_MAX_OUTPUT_PORTS],
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
index e50d9f2e2609..ffbcdd80d934 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
@@ -90,12 +90,11 @@ struct sh_css_queues {
 
 #endif
 
-struct sh_css_queues  css_queues;
-
-
 /*******************************************************
 *** Static variables
 ********************************************************/
+static struct sh_css_queues css_queues;
+
 static int buffer_type_to_queue_id_map[SH_CSS_MAX_SP_THREADS][IA_CSS_NUM_DYNAMIC_BUFFER_TYPE];
 static bool queue_availability[SH_CSS_MAX_SP_THREADS][SH_CSS_MAX_NUM_QUEUES];
 
@@ -207,7 +206,7 @@ static void map_buffer_type_to_queue_id(
 	}
 
 	for (i = SH_CSS_QUEUE_C_ID; i < SH_CSS_MAX_NUM_QUEUES; i++) {
-		if (queue_availability[thread_id][i] == true) {
+		if (queue_availability[thread_id][i]) {
 			queue_availability[thread_id][i] = false;
 			buffer_type_to_queue_id_map[thread_id][buf_type] = i;
 			break;
@@ -266,7 +265,7 @@ static ia_css_queue_t *bufq_get_qhandle(
 	case sh_css_sp2host_isys_event_queue:
 		q = &css_queues.sp2host_isys_event_queue_handle;
 		break;
-#endif		
+#endif
 	case sh_css_host2sp_tag_cmd_queue:
 		q = &css_queues.host2sp_tag_cmd_queue_handle;
 		break;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
index 60395904f89a..4607a76dc78a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
@@ -110,9 +110,6 @@
 /* Global variable to store the dtrace verbosity level */
 unsigned int ia_css_debug_trace_level = IA_CSS_DEBUG_WARNING;
 
-/* Assumes that IA_CSS_STREAM_FORMAT_BINARY_8 is last */
-#define N_IA_CSS_STREAM_FORMAT (IA_CSS_STREAM_FORMAT_BINARY_8+1)
-
 #define DPG_START "ia_css_debug_pipe_graph_dump_start "
 #define DPG_END   " ia_css_debug_pipe_graph_dump_end\n"
 
@@ -141,8 +138,8 @@ static struct pipe_graph_class {
 	int width;
 	int eff_height;
 	int eff_width;
-	enum ia_css_stream_format stream_format;
-} pg_inst = {true, 0, 0, 0, 0, N_IA_CSS_STREAM_FORMAT};
+	enum atomisp_input_format stream_format;
+} pg_inst = {true, 0, 0, 0, 0, N_ATOMISP_INPUT_FORMAT};
 
 static const char * const queue_id_to_str[] = {
 	/* [SH_CSS_QUEUE_A_ID]     =*/ "queue_A",
@@ -261,86 +258,86 @@ unsigned int ia_css_debug_get_dtrace_level(void)
 	return ia_css_debug_trace_level;
 }
 
-static const char *debug_stream_format2str(const enum ia_css_stream_format stream_format)
+static const char *debug_stream_format2str(const enum atomisp_input_format stream_format)
 {
 	switch (stream_format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		return "yuv420-8-legacy";
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
 		return "yuv420-8";
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
 		return "yuv420-10";
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		return "yuv420-16";
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
 		return "yuv422-8";
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
 		return "yuv422-10";
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		return "yuv422-16";
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		return "rgb444";
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		return "rgb555";
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		return "rgb565";
-	case IA_CSS_STREAM_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
 		return "rgb666";
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		return "rgb888";
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		return "raw6";
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		return "raw7";
-	case IA_CSS_STREAM_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
 		return "raw8";
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		return "raw10";
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		return "raw12";
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		return "raw14";
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		return "raw16";
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 		return "binary8";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT1:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT1:
 		return "generic-short1";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT2:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT2:
 		return "generic-short2";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT3:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT3:
 		return "generic-short3";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT4:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT4:
 		return "generic-short4";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT5:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT5:
 		return "generic-short5";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT6:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT6:
 		return "generic-short6";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT7:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT7:
 		return "generic-short7";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT8:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT8:
 		return "generic-short8";
-	case IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_SHIFT:
 		return "yuv420-8-shift";
-	case IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT:
+	case ATOMISP_INPUT_FORMAT_YUV420_10_SHIFT:
 		return "yuv420-10-shift";
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 		return "embedded-8";
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
 		return "user-def-8-type-1";
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
 		return "user-def-8-type-2";
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
 		return "user-def-8-type-3";
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
 		return "user-def-8-type-4";
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
 		return "user-def-8-type-5";
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
 		return "user-def-8-type-6";
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
 		return "user-def-8-type-7";
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		return "user-def-8-type-8";
 
 	default:
@@ -2679,9 +2676,9 @@ ia_css_debug_pipe_graph_dump_frame(
 	}
 	dtrace_dot(
 		"node [shape = box, "
-		"fixedsize=true, width=2, height=0.7]; \"0x%08lx\" "
+		"fixedsize=true, width=2, height=0.7]; \"%p\" "
 		"[label = \"%s\\n%d(%d) x %d, %dbpp\\n%s\"];",
-		HOST_ADDRESS(frame),
+		frame,
 		debug_frame_format2str(frame->info.format),
 		frame->info.res.width,
 		frame->info.padded_width,
@@ -2691,16 +2688,16 @@ ia_css_debug_pipe_graph_dump_frame(
 
 	if (in_frame) {
 		dtrace_dot(
-			"\"0x%08lx\"->\"%s(pipe%d)\" "
+			"\"%p\"->\"%s(pipe%d)\" "
 			"[label = %s_frame];",
-			HOST_ADDRESS(frame),
+			frame,
 			blob_name, id, frame_name);
 	} else {
 		dtrace_dot(
-			"\"%s(pipe%d)\"->\"0x%08lx\" "
+			"\"%s(pipe%d)\"->\"%p\" "
 			"[label = %s_frame];",
 			blob_name, id,
-			HOST_ADDRESS(frame),
+			frame,
 			frame_name);
 	}
 }
@@ -2730,7 +2727,7 @@ void ia_css_debug_pipe_graph_dump_epilogue(void)
 	}
 
 
-	if (pg_inst.stream_format != N_IA_CSS_STREAM_FORMAT) {
+	if (pg_inst.stream_format != N_ATOMISP_INPUT_FORMAT) {
 		/* An input stream format has been set so assume we have
 		 * an input system and sensor
 		 */
@@ -2770,7 +2767,7 @@ void ia_css_debug_pipe_graph_dump_epilogue(void)
 	pg_inst.height = 0;
 	pg_inst.eff_width = 0;
 	pg_inst.eff_height = 0;
-	pg_inst.stream_format = N_IA_CSS_STREAM_FORMAT;
+	pg_inst.stream_format = N_ATOMISP_INPUT_FORMAT;
 }
 
 void
@@ -3011,9 +3008,9 @@ ia_css_debug_pipe_graph_dump_sp_raw_copy(
 
 	snprintf(ring_buffer, sizeof(ring_buffer),
 		"node [shape = box, "
-		"fixedsize=true, width=2, height=0.7]; \"0x%08lx\" "
+		"fixedsize=true, width=2, height=0.7]; \"%p\" "
 		"[label = \"%s\\n%d(%d) x %d\\nRingbuffer\"];",
-		HOST_ADDRESS(out_frame),
+		out_frame,
 		debug_frame_format2str(out_frame->info.format),
 		out_frame->info.res.width,
 		out_frame->info.padded_width,
@@ -3022,9 +3019,9 @@ ia_css_debug_pipe_graph_dump_sp_raw_copy(
 	dtrace_dot(ring_buffer);
 
 	dtrace_dot(
-		"\"%s(pipe%d)\"->\"0x%08lx\" "
+		"\"%s(pipe%d)\"->\"%p\" "
 		"[label = out_frame];",
-		"sp_raw_copy", 1, HOST_ADDRESS(out_frame));
+		"sp_raw_copy", 1, out_frame);
 
 	snprintf(dot_id_input_bin, sizeof(dot_id_input_bin), "%s(pipe%d)", "sp_raw_copy", 1);
 }
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
index adefa57820a4..1bed027435fd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
@@ -112,13 +112,13 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	    width_b_factor = 1, start_column_b,
 	    left_padding = 0;
 	input_formatter_cfg_t if_a_config, if_b_config;
-	enum ia_css_stream_format input_format;
+	enum atomisp_input_format input_format;
 	enum ia_css_err err = IA_CSS_SUCCESS;
 	uint8_t if_config_index;
 
 	/* Determine which input formatter config set is targeted. */
 	/* Index is equal to the CSI-2 port used. */
-	enum ia_css_csi2_port port;
+	enum mipi_port_id port;
 
 	if (binary) {
 		cropped_height = binary->in_frame_info.res.height;
@@ -141,7 +141,7 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	if (config->mode == IA_CSS_INPUT_MODE_SENSOR
 	    || config->mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR) {
 		port = config->source.port.port;
-		if_config_index = (uint8_t) (port - IA_CSS_CSI2_PORT0);
+		if_config_index = (uint8_t) (port - MIPI_PORT0_ID);
 	} else if (config->mode == IA_CSS_INPUT_MODE_MEMORY) {
 		if_config_index = SH_CSS_IF_CONFIG_NOT_NEEDED;
 	} else {
@@ -189,7 +189,7 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	bits_per_pixel = input_formatter_get_alignment(INPUT_FORMATTER0_ID)
 	    * 8 / ISP_VEC_NELEMS;
 	switch (input_format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		if (two_ppc) {
 			vmem_increment = 1;
 			deinterleaving = 1;
@@ -219,9 +219,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 			start_column = start_column * deinterleaving / 2;
 		}
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		if (two_ppc) {
 			vmem_increment = 1;
 			deinterleaving = 1;
@@ -246,9 +246,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 			start_column *= deinterleaving;
 		}
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		if (two_ppc) {
 			vmem_increment = 1;
 			deinterleaving = 1;
@@ -267,11 +267,11 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 			start_column *= deinterleaving;
 		}
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		num_vectors *= 2;
 		if (two_ppc) {
 			deinterleaving = 2;	/* BR in if_a, G in if_b */
@@ -293,11 +293,11 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		num_vectors = num_vectors / 2 * deinterleaving;
 		buf_offset_b = buffer_width / 2 / ISP_VEC_NELEMS;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		if (two_ppc) {
 			int crop_col = (start_column % 2) == 1;
 			vmem_increment = 2;
@@ -332,8 +332,8 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		vectors_per_line = CEIL_DIV(cropped_width, ISP_VEC_NELEMS);
 		vectors_per_line = CEIL_MUL(vectors_per_line, deinterleaving);
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		if (two_ppc) {
 			num_vectors *= 2;
 			vmem_increment = 1;
@@ -350,26 +350,26 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		}
 		buffer_height *= 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT1:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT2:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT3:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT4:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT5:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT6:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT7:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT8:
-	case IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT:
-	case IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT:
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT1:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT2:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT3:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT4:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT5:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT6:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT7:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_SHIFT:
+	case ATOMISP_INPUT_FORMAT_YUV420_10_SHIFT:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		break;
 	}
 	if (width_a == 0)
@@ -420,9 +420,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	if_a_config.buf_eol_offset =
 	    buffer_width * bits_per_pixel / 8 - line_width;
 	if_a_config.is_yuv420_format =
-	    (input_format == IA_CSS_STREAM_FORMAT_YUV420_8)
-	    || (input_format == IA_CSS_STREAM_FORMAT_YUV420_10)
-	    || (input_format == IA_CSS_STREAM_FORMAT_YUV420_16);
+	    (input_format == ATOMISP_INPUT_FORMAT_YUV420_8)
+	    || (input_format == ATOMISP_INPUT_FORMAT_YUV420_10)
+	    || (input_format == ATOMISP_INPUT_FORMAT_YUV420_16);
 	if_a_config.block_no_reqs = (config->mode != IA_CSS_INPUT_MODE_SENSOR);
 
 	if (two_ppc) {
@@ -449,9 +449,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		if_b_config.buf_eol_offset =
 		    buffer_width * bits_per_pixel / 8 - line_width;
 		if_b_config.is_yuv420_format =
-		    input_format == IA_CSS_STREAM_FORMAT_YUV420_8
-		    || input_format == IA_CSS_STREAM_FORMAT_YUV420_10
-		    || input_format == IA_CSS_STREAM_FORMAT_YUV420_16;
+		    input_format == ATOMISP_INPUT_FORMAT_YUV420_8
+		    || input_format == ATOMISP_INPUT_FORMAT_YUV420_10
+		    || input_format == ATOMISP_INPUT_FORMAT_YUV420_16;
 		if_b_config.block_no_reqs =
 		    (config->mode != IA_CSS_INPUT_MODE_SENSOR);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h
index 47d0f7e53f47..545f9e2da59e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h
@@ -42,12 +42,12 @@ void ia_css_inputfifo_send_input_frame(
 	unsigned int	width,
 	unsigned int	height,
 	unsigned int	ch_id,
-	enum ia_css_stream_format	input_format,
+	enum atomisp_input_format	input_format,
 	bool			two_ppc);
 
 void ia_css_inputfifo_start_frame(
 	unsigned int	ch_id,
-	enum ia_css_stream_format	input_format,
+	enum atomisp_input_format	input_format,
 	bool			two_ppc);
 
 void ia_css_inputfifo_send_line(
@@ -59,7 +59,7 @@ void ia_css_inputfifo_send_line(
 
 void ia_css_inputfifo_send_embedded_line(
 	unsigned int	ch_id,
-	enum ia_css_stream_format	data_type,
+	enum atomisp_input_format	data_type,
 	const unsigned short	*data,
 	unsigned int	width);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
index 8dc74927e9a2..24ca4aaf8df1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
@@ -86,7 +86,7 @@ static unsigned int inputfifo_curr_ch_id, inputfifo_curr_fmt_type;
 #endif
 struct inputfifo_instance {
 	unsigned int				ch_id;
-	enum ia_css_stream_format	input_format;
+	enum atomisp_input_format	input_format;
 	bool						two_ppc;
 	bool						streaming;
 	unsigned int				hblank_cycles;
@@ -466,21 +466,21 @@ static void inputfifo_send_frame(
 
 
 static enum inputfifo_mipi_data_type inputfifo_determine_type(
-	enum ia_css_stream_format input_format)
+	enum atomisp_input_format input_format)
 {
 	enum inputfifo_mipi_data_type type;
 
 	type = inputfifo_mipi_data_type_regular;
-	if (input_format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) {
+	if (input_format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) {
 		type =
 			inputfifo_mipi_data_type_yuv420_legacy;
-	} else if (input_format == IA_CSS_STREAM_FORMAT_YUV420_8  ||
-		   input_format == IA_CSS_STREAM_FORMAT_YUV420_10 ||
-		   input_format == IA_CSS_STREAM_FORMAT_YUV420_16) {
+	} else if (input_format == ATOMISP_INPUT_FORMAT_YUV420_8  ||
+		   input_format == ATOMISP_INPUT_FORMAT_YUV420_10 ||
+		   input_format == ATOMISP_INPUT_FORMAT_YUV420_16) {
 		type =
 			inputfifo_mipi_data_type_yuv420;
-	} else if (input_format >= IA_CSS_STREAM_FORMAT_RGB_444 &&
-		   input_format <= IA_CSS_STREAM_FORMAT_RGB_888) {
+	} else if (input_format >= ATOMISP_INPUT_FORMAT_RGB_444 &&
+		   input_format <= ATOMISP_INPUT_FORMAT_RGB_888) {
 		type =
 			inputfifo_mipi_data_type_rgb;
 	}
@@ -500,7 +500,7 @@ void ia_css_inputfifo_send_input_frame(
 	unsigned int width,
 	unsigned int height,
 	unsigned int ch_id,
-	enum ia_css_stream_format input_format,
+	enum atomisp_input_format input_format,
 	bool two_ppc)
 {
 	unsigned int fmt_type, hblank_cycles, marker_cycles;
@@ -524,7 +524,7 @@ void ia_css_inputfifo_send_input_frame(
 
 void ia_css_inputfifo_start_frame(
 	unsigned int ch_id,
-	enum ia_css_stream_format input_format,
+	enum atomisp_input_format input_format,
 	bool two_ppc)
 {
 	struct inputfifo_instance *s2mi;
@@ -574,7 +574,7 @@ void ia_css_inputfifo_send_line(
 
 void ia_css_inputfifo_send_embedded_line(
 	unsigned int	ch_id,
-	enum ia_css_stream_format	data_type,
+	enum atomisp_input_format	data_type,
 	const unsigned short	*data,
 	unsigned int	width)
 {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
index 4cf2defe9ef0..8c005db9766e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
@@ -50,8 +50,8 @@ typedef input_system_cfg_t	ia_css_isys_descr_t;
 #if defined(USE_INPUT_SYSTEM_VERSION_2) || defined(USE_INPUT_SYSTEM_VERSION_2401)
 input_system_error_t ia_css_isys_init(void);
 void ia_css_isys_uninit(void);
-mipi_port_ID_t ia_css_isys_port_to_mipi_port(
-	enum ia_css_csi2_port api_port);
+enum mipi_port_id ia_css_isys_port_to_mipi_port(
+	enum mipi_port_id api_port);
 #endif
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2401)
@@ -68,7 +68,7 @@ mipi_port_ID_t ia_css_isys_port_to_mipi_port(
  *				there is already a stream registered with the same handle
  */
 enum ia_css_err ia_css_isys_csi_rx_register_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id);
 
 /**
@@ -83,14 +83,14 @@ enum ia_css_err ia_css_isys_csi_rx_register_stream(
  *				there is no stream registered with that handle
  */
 enum ia_css_err ia_css_isys_csi_rx_unregister_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id);
 
 enum ia_css_err ia_css_isys_convert_compressed_format(
 		struct ia_css_csi2_compression *comp,
 		struct input_system_cfg_s *cfg);
 unsigned int ia_css_csi2_calculate_input_system_alignment(
-	enum ia_css_stream_format fmt_type);
+	enum atomisp_input_format fmt_type);
 #endif
 
 #if !defined(USE_INPUT_SYSTEM_VERSION_2401)
@@ -101,12 +101,12 @@ void ia_css_isys_rx_configure(
 
 void ia_css_isys_rx_disable(void);
 
-void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port);
+void ia_css_isys_rx_enable_all_interrupts(enum mipi_port_id port);
 
-unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port);
-void ia_css_isys_rx_get_irq_info(mipi_port_ID_t port,
+unsigned int ia_css_isys_rx_get_interrupt_reg(enum mipi_port_id port);
+void ia_css_isys_rx_get_irq_info(enum mipi_port_id port,
 				 unsigned int *irq_infos);
-void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port,
+void ia_css_isys_rx_clear_irq_info(enum mipi_port_id port,
 				   unsigned int irq_infos);
 unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits);
 
@@ -124,7 +124,7 @@ unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits);
  * format type must be sumitted correctly by the application.
  */
 enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
-		enum ia_css_stream_format input_format,
+		enum atomisp_input_format input_format,
 		mipi_predictor_t compression,
 		unsigned int *fmt_type);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
index 3b04dc51335a..a914ce5532ec 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
@@ -141,7 +141,7 @@ void ia_css_isys_csi_rx_lut_rmgr_release(
 }
 
 enum ia_css_err ia_css_isys_csi_rx_register_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id)
 {
 	enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
@@ -160,7 +160,7 @@ enum ia_css_err ia_css_isys_csi_rx_register_stream(
 }
 
 enum ia_css_err ia_css_isys_csi_rx_unregister_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id)
 {
 	enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
index 4122084fd237..2ae5e59d5e31 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
@@ -105,8 +105,6 @@ input_system_error_t ia_css_isys_init(void)
 #elif defined(USE_INPUT_SYSTEM_VERSION_2401)
 input_system_error_t ia_css_isys_init(void)
 {
-	input_system_error_t error = INPUT_SYSTEM_ERR_NO_ERROR;
-
 	ia_css_isys_csi_rx_lut_rmgr_init();
 	ia_css_isys_ibuf_rmgr_init();
 	ia_css_isys_dma_channel_rmgr_init();
@@ -120,7 +118,7 @@ input_system_error_t ia_css_isys_init(void)
 	isys_irqc_status_enable(ISYS_IRQ1_ID);
 	isys_irqc_status_enable(ISYS_IRQ2_ID);
 
-	return error;
+	return INPUT_SYSTEM_ERR_NO_ERROR;
 }
 #endif
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
index 70f6cb5e5918..425bd3cc3f34 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
@@ -36,7 +36,7 @@ more details.
 #include "sh_css_internal.h"
 
 #if !defined(USE_INPUT_SYSTEM_VERSION_2401)
-void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port)
+void ia_css_isys_rx_enable_all_interrupts(enum mipi_port_id port)
 {
 	hrt_data bits = receiver_port_reg_load(RX0_ID,
 				port,
@@ -80,22 +80,22 @@ void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port)
  * initializers in Windows. Without that there is no easy way to guarantee
  * that the array values would be in the correct order.
  * */
-mipi_port_ID_t ia_css_isys_port_to_mipi_port(enum ia_css_csi2_port api_port)
+enum mipi_port_id ia_css_isys_port_to_mipi_port(enum mipi_port_id api_port)
 {
 	/* In this module the validity of the inptu variable should
 	 * have been checked already, so we do not check for erroneous
 	 * values. */
-	mipi_port_ID_t port = MIPI_PORT0_ID;
+	enum mipi_port_id port = MIPI_PORT0_ID;
 
-	if (api_port == IA_CSS_CSI2_PORT1)
+	if (api_port == MIPI_PORT1_ID)
 		port = MIPI_PORT1_ID;
-	else if (api_port == IA_CSS_CSI2_PORT2)
+	else if (api_port == MIPI_PORT2_ID)
 		port = MIPI_PORT2_ID;
 
 	return port;
 }
 
-unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port)
+unsigned int ia_css_isys_rx_get_interrupt_reg(enum mipi_port_id port)
 {
 	return receiver_port_reg_load(RX0_ID,
 				      port,
@@ -104,17 +104,17 @@ unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port)
 
 void ia_css_rx_get_irq_info(unsigned int *irq_infos)
 {
-	ia_css_rx_port_get_irq_info(IA_CSS_CSI2_PORT1, irq_infos);
+	ia_css_rx_port_get_irq_info(MIPI_PORT1_ID, irq_infos);
 }
 
-void ia_css_rx_port_get_irq_info(enum ia_css_csi2_port api_port,
+void ia_css_rx_port_get_irq_info(enum mipi_port_id api_port,
 				 unsigned int *irq_infos)
 {
-	mipi_port_ID_t port = ia_css_isys_port_to_mipi_port(api_port);
+	enum mipi_port_id port = ia_css_isys_port_to_mipi_port(api_port);
 	ia_css_isys_rx_get_irq_info(port, irq_infos);
 }
 
-void ia_css_isys_rx_get_irq_info(mipi_port_ID_t port,
+void ia_css_isys_rx_get_irq_info(enum mipi_port_id port,
 				 unsigned int *irq_infos)
 {
 	unsigned int bits;
@@ -169,16 +169,16 @@ unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits)
 
 void ia_css_rx_clear_irq_info(unsigned int irq_infos)
 {
-	ia_css_rx_port_clear_irq_info(IA_CSS_CSI2_PORT1, irq_infos);
+	ia_css_rx_port_clear_irq_info(MIPI_PORT1_ID, irq_infos);
 }
 
-void ia_css_rx_port_clear_irq_info(enum ia_css_csi2_port api_port, unsigned int irq_infos)
+void ia_css_rx_port_clear_irq_info(enum mipi_port_id api_port, unsigned int irq_infos)
 {
-	mipi_port_ID_t port = ia_css_isys_port_to_mipi_port(api_port);
+	enum mipi_port_id port = ia_css_isys_port_to_mipi_port(api_port);
 	ia_css_isys_rx_clear_irq_info(port, irq_infos);
 }
 
-void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port, unsigned int irq_infos)
+void ia_css_isys_rx_clear_irq_info(enum mipi_port_id port, unsigned int irq_infos)
 {
 	hrt_data bits = receiver_port_reg_load(RX0_ID,
 				port,
@@ -229,7 +229,7 @@ void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port, unsigned int irq_infos)
 #endif /* #if !defined(USE_INPUT_SYSTEM_VERSION_2401) */
 
 enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
-		enum ia_css_stream_format input_format,
+		enum atomisp_input_format input_format,
 		mipi_predictor_t compression,
 		unsigned int *fmt_type)
 {
@@ -244,25 +244,25 @@ enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
 	 */
 	if (compression != MIPI_PREDICTOR_NONE) {
 		switch (input_format) {
-		case IA_CSS_STREAM_FORMAT_RAW_6:
+		case ATOMISP_INPUT_FORMAT_RAW_6:
 			*fmt_type = 6;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_7:
+		case ATOMISP_INPUT_FORMAT_RAW_7:
 			*fmt_type = 7;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_8:
+		case ATOMISP_INPUT_FORMAT_RAW_8:
 			*fmt_type = 8;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_10:
+		case ATOMISP_INPUT_FORMAT_RAW_10:
 			*fmt_type = 10;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_12:
+		case ATOMISP_INPUT_FORMAT_RAW_12:
 			*fmt_type = 12;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_14:
+		case ATOMISP_INPUT_FORMAT_RAW_14:
 			*fmt_type = 14;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_16:
+		case ATOMISP_INPUT_FORMAT_RAW_16:
 			*fmt_type = 16;
 			break;
 		default:
@@ -277,96 +277,96 @@ enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
 	 * MW: For some reason the mapping is not 1-to-1
 	 */
 	switch (input_format) {
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		*fmt_type = MIPI_FORMAT_RGB888;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		*fmt_type = MIPI_FORMAT_RGB555;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		*fmt_type = MIPI_FORMAT_RGB444;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		*fmt_type = MIPI_FORMAT_RGB565;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
 		*fmt_type = MIPI_FORMAT_RGB666;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
 		*fmt_type = MIPI_FORMAT_RAW8;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		*fmt_type = MIPI_FORMAT_RAW10;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		*fmt_type = MIPI_FORMAT_RAW6;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		*fmt_type = MIPI_FORMAT_RAW7;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		*fmt_type = MIPI_FORMAT_RAW12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		*fmt_type = MIPI_FORMAT_RAW14;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
 		*fmt_type = MIPI_FORMAT_YUV420_8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
 		*fmt_type = MIPI_FORMAT_YUV420_10;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
 		*fmt_type = MIPI_FORMAT_YUV422_8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
 		*fmt_type = MIPI_FORMAT_YUV422_10;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		*fmt_type = MIPI_FORMAT_YUV420_8_LEGACY;
 		break;
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 		*fmt_type = MIPI_FORMAT_EMBEDDED;
 		break;
 #ifndef USE_INPUT_SYSTEM_VERSION_2401
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		/* This is not specified by Arasan, so we use
 		 * 17 for now.
 		 */
 		*fmt_type = MIPI_FORMAT_RAW16;
 		break;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 		*fmt_type = MIPI_FORMAT_BINARY_8;
 		break;
 #else
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
 		*fmt_type = MIPI_FORMAT_CUSTOM0;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
 		*fmt_type = MIPI_FORMAT_CUSTOM1;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
 		*fmt_type = MIPI_FORMAT_CUSTOM2;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
 		*fmt_type = MIPI_FORMAT_CUSTOM3;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
 		*fmt_type = MIPI_FORMAT_CUSTOM4;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
 		*fmt_type = MIPI_FORMAT_CUSTOM5;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
 		*fmt_type = MIPI_FORMAT_CUSTOM6;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		*fmt_type = MIPI_FORMAT_CUSTOM7;
 		break;
 #endif
 
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 	default:
 		return IA_CSS_ERR_INTERNAL_ERROR;
 	}
@@ -448,34 +448,34 @@ enum ia_css_err ia_css_isys_convert_compressed_format(
 }
 
 unsigned int ia_css_csi2_calculate_input_system_alignment(
-	enum ia_css_stream_format fmt_type)
+	enum atomisp_input_format fmt_type)
 {
 	unsigned int memory_alignment_in_bytes = HIVE_ISP_DDR_WORD_BYTES;
 
 	switch (fmt_type) {
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		memory_alignment_in_bytes = 2 * ISP_VEC_NELEMS;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		/* Planar YUV formats need to have all planes aligned, this means
 		 * double the alignment for the Y plane if the horizontal decimation is 2. */
 		memory_alignment_in_bytes = 2 * HIVE_ISP_DDR_WORD_BYTES;
 		break;
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 	default:
 		memory_alignment_in_bytes = HIVE_ISP_DDR_WORD_BYTES;
 		break;
@@ -492,7 +492,7 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 #if defined(HAS_RX_VERSION_2)
 	bool port_enabled[N_MIPI_PORT_ID];
 	bool any_port_enabled = false;
-	mipi_port_ID_t port;
+	enum mipi_port_id port;
 
 	if ((config == NULL)
 		|| (config->mode >= N_RX_MODE)
@@ -500,7 +500,7 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 		assert(0);
 		return;
 	}
-	for (port = (mipi_port_ID_t) 0; port < N_MIPI_PORT_ID; port++) {
+	for (port = (enum mipi_port_id) 0; port < N_MIPI_PORT_ID; port++) {
 		if (is_receiver_port_enabled(RX0_ID, port))
 			any_port_enabled = true;
 	}
@@ -595,8 +595,8 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 
 void ia_css_isys_rx_disable(void)
 {
-	mipi_port_ID_t port;
-	for (port = (mipi_port_ID_t) 0; port < N_MIPI_PORT_ID; port++) {
+	enum mipi_port_id port;
+	for (port = (enum mipi_port_id) 0; port < N_MIPI_PORT_ID; port++) {
 		receiver_port_reg_store(RX0_ID, port,
 					_HRT_CSS_RECEIVER_DEVICE_READY_REG_IDX,
 					false);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
index 90922a7acefd..2484949453b7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
@@ -331,7 +331,7 @@ static bool create_input_system_channel(
 		break;
 	}
 
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	if (!acquire_sid(me->stream2mmio_id, &(me->stream2mmio_sid_id))) {
@@ -474,7 +474,7 @@ static bool calculate_input_system_channel_cfg(
 
 	rc = calculate_stream2mmio_cfg(isys_cfg, metadata,
 			&(channel_cfg->stream2mmio_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	rc = calculate_ibuf_ctrl_cfg(
@@ -482,7 +482,7 @@ static bool calculate_input_system_channel_cfg(
 			input_port,
 			isys_cfg,
 			&(channel_cfg->ibuf_ctrl_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 	if (metadata)
 		channel_cfg->ibuf_ctrl_cfg.stores_per_frame = isys_cfg->metadata.lines_per_frame;
@@ -491,7 +491,7 @@ static bool calculate_input_system_channel_cfg(
 			channel,
 			isys_cfg,
 			&(channel_cfg->dma_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	rc = calculate_isys2401_dma_port_cfg(
@@ -499,7 +499,7 @@ static bool calculate_input_system_channel_cfg(
 			false,
 			metadata,
 			&(channel_cfg->dma_src_port_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	rc = calculate_isys2401_dma_port_cfg(
@@ -507,7 +507,7 @@ static bool calculate_input_system_channel_cfg(
 			isys_cfg->raw_packed,
 			metadata,
 			&(channel_cfg->dma_dest_port_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	return true;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
index 81a50c73ad0b..4746620ca212 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
@@ -161,9 +161,9 @@ void ia_css_pipeline_start(enum ia_css_pipe_id pipe_id,
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-				, (mipi_port_ID_t) 0
+				, (enum mipi_port_id) 0
 #else
-				(mipi_port_ID_t) 0,
+				(enum mipi_port_id) 0,
 #endif
 #endif
 #ifndef ISP2401
@@ -574,7 +574,7 @@ static void pipeline_map_num_to_sp_thread(unsigned int pipe_num)
 
 		But the below is more descriptive.
 	*/
-	assert(found_sp_thread != false);
+	assert(found_sp_thread);
 }
 
 static void pipeline_unmap_num_to_sp_thread(unsigned int pipe_num)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
index 54239ac9d7c9..a4d8a48f95ba 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
@@ -24,12 +24,12 @@
  * @brief VBUF resource handles
  */
 #define NUM_HANDLES 1000
-struct ia_css_rmgr_vbuf_handle handle_table[NUM_HANDLES];
+static struct ia_css_rmgr_vbuf_handle handle_table[NUM_HANDLES];
 
 /*
  * @brief VBUF resource pool - refpool
  */
-struct ia_css_rmgr_vbuf_pool refpool = {
+static struct ia_css_rmgr_vbuf_pool refpool = {
 	false,			/* copy_on_write */
 	false,			/* recycle */
 	0,			/* size */
@@ -40,7 +40,7 @@ struct ia_css_rmgr_vbuf_pool refpool = {
 /*
  * @brief VBUF resource pool - writepool
  */
-struct ia_css_rmgr_vbuf_pool writepool = {
+static struct ia_css_rmgr_vbuf_pool writepool = {
 	true,			/* copy_on_write */
 	false,			/* recycle */
 	0,			/* size */
@@ -51,7 +51,7 @@ struct ia_css_rmgr_vbuf_pool writepool = {
 /*
  * @brief VBUF resource pool - hmmbufferpool
  */
-struct ia_css_rmgr_vbuf_pool hmmbufferpool = {
+static struct ia_css_rmgr_vbuf_pool hmmbufferpool = {
 	true,			/* copy_on_write */
 	true,			/* recycle */
 	32,			/* size */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
index 37116faab631..c771e4b910f3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
@@ -462,46 +462,46 @@ verify_copy_out_frame_format(struct ia_css_pipe *pipe)
 	assert(pipe->stream != NULL);
 
 	switch (pipe->stream->config.input_config.format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
 		for (i=0; i<ARRAY_SIZE(yuv420_copy_formats) && !found; i++)
 			found = (out_fmt == yuv420_copy_formats[i]);
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_YUV420_16);
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
 		for (i=0; i<ARRAY_SIZE(yuv422_copy_formats) && !found; i++)
 			found = (out_fmt == yuv422_copy_formats[i]);
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_YUV422_16 ||
 			 out_fmt == IA_CSS_FRAME_FORMAT_YUV420_16);
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_RGBA888 ||
 			 out_fmt == IA_CSS_FRAME_FORMAT_RGB565);
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_RGBA888 ||
 			 out_fmt == IA_CSS_FRAME_FORMAT_YUV420);
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_RAW) ||
 			(out_fmt == IA_CSS_FRAME_FORMAT_RAW_PACKED);
 		break;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_BINARY_8);
 		break;
 	default:
@@ -586,13 +586,13 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 }
 #elif !defined(HAS_NO_INPUT_SYSTEM) && defined(USE_INPUT_SYSTEM_VERSION_2401)
 static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
-		enum ia_css_stream_format	format,
+		enum atomisp_input_format	format,
 		unsigned int			pixels_per_line)
 {
 	unsigned int rval;
 
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -611,9 +611,9 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -630,9 +630,9 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -649,11 +649,11 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -670,22 +670,22 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 4;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -742,11 +742,11 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_id(
 		break;
 	case IA_CSS_INPUT_MODE_BUFFERED_SENSOR:
 
-		if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT0) {
+		if (stream_cfg->source.port.port == MIPI_PORT0_ID) {
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT0_ID;
-		} else if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT1) {
+		} else if (stream_cfg->source.port.port == MIPI_PORT1_ID) {
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT1_ID;
-		} else if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT2) {
+		} else if (stream_cfg->source.port.port == MIPI_PORT2_ID) {
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT2_ID;
 		}
 
@@ -927,7 +927,7 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_resolution(
 	unsigned int max_subpixels_per_line;
 	unsigned int lines_per_frame;
 	unsigned int align_req_in_bytes;
-	enum ia_css_stream_format fmt_type;
+	enum atomisp_input_format fmt_type;
 
 	fmt_type = stream_cfg->isys_config[isys_stream_idx].format;
 	if ((stream_cfg->mode == IA_CSS_INPUT_MODE_SENSOR ||
@@ -936,11 +936,11 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_resolution(
 
 		if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
 			UNCOMPRESSED_BITS_PER_PIXEL_10) {
-				fmt_type = IA_CSS_STREAM_FORMAT_RAW_10;
+				fmt_type = ATOMISP_INPUT_FORMAT_RAW_10;
 		}
 		else if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
 			UNCOMPRESSED_BITS_PER_PIXEL_12) {
-				fmt_type = IA_CSS_STREAM_FORMAT_RAW_12;
+				fmt_type = ATOMISP_INPUT_FORMAT_RAW_12;
 		}
 		else
 			return false;
@@ -1082,7 +1082,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 
 	/* get the SP thread id */
 	rc = ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &sp_thread_id);
-	if (rc != true)
+	if (!rc)
 		return IA_CSS_ERR_INTERNAL_ERROR;
 	/* get the target input terminal */
 	sp_pipeline_input_terminal = &(sh_css_sp_group.pipe_io[sp_thread_id].input);
@@ -1108,7 +1108,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 					&(isys_stream_descr));
 		}
 
-		if (rc != true)
+		if (!rc)
 			return IA_CSS_ERR_INTERNAL_ERROR;
 
 		isys_stream_id = ia_css_isys_generate_stream_id(sp_thread_id, i);
@@ -1118,7 +1118,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 				&(isys_stream_descr),
 				&(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]),
 				isys_stream_id);
-		if (rc != true)
+		if (!rc)
 			return IA_CSS_ERR_INTERNAL_ERROR;
 
 		/* calculate the configuration of the virtual Input System (2401) */
@@ -1126,7 +1126,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 				&(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]),
 				&(isys_stream_descr),
 				&(sp_pipeline_input_terminal->ctrl.virtual_input_system_stream_cfg[i]));
-		if (rc != true) {
+		if (!rc) {
 			ia_css_isys_stream_destroy(&(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]));
 			return IA_CSS_ERR_INTERNAL_ERROR;
 		}
@@ -1195,7 +1195,7 @@ static inline struct ia_css_pipe *stream_get_target_pipe(
 
 static enum ia_css_err stream_csi_rx_helper(
 	struct ia_css_stream *stream,
-	enum ia_css_err (*func)(enum ia_css_csi2_port, uint32_t))
+	enum ia_css_err (*func)(enum mipi_port_id, uint32_t))
 {
 	enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
 	uint32_t sp_thread_id, stream_id;
@@ -1391,7 +1391,7 @@ start_copy_on_sp(struct ia_css_pipe *pipe,
 		ia_css_isys_rx_disable();
 #endif
 
-	if (pipe->stream->config.input_config.format != IA_CSS_STREAM_FORMAT_BINARY_8)
+	if (pipe->stream->config.input_config.format != ATOMISP_INPUT_FORMAT_BINARY_8)
 		return IA_CSS_ERR_INTERNAL_ERROR;
 	sh_css_sp_start_binary_copy(ia_css_pipe_get_pipe_num(pipe), out_frame, pipe->stream->config.pixels_per_clock == 2);
 
@@ -1454,7 +1454,7 @@ static void start_pipe(
 				&me->stream->info.metadata_info
 #if !defined(HAS_NO_INPUT_SYSTEM)
 				,(input_mode==IA_CSS_INPUT_MODE_MEMORY) ?
-					(mipi_port_ID_t)0 :
+					(enum mipi_port_id)0 :
 					me->stream->config.source.port.port
 #endif
 #ifdef ISP2401
@@ -1497,7 +1497,7 @@ static void
 enable_interrupts(enum ia_css_irq_type irq_type)
 {
 #ifdef USE_INPUT_SYSTEM_VERSION_2
-	mipi_port_ID_t port;
+	enum mipi_port_id port;
 #endif
 	bool enable_pulse = irq_type != IA_CSS_IRQ_TYPE_EDGE;
 	IA_CSS_ENTER_PRIVATE("");
@@ -2562,7 +2562,7 @@ ia_css_uninit(void)
 	ifmtr_set_if_blocking_mode_reset = true;
 #endif
 
-	if (fw_explicitly_loaded == false) {
+	if (!fw_explicitly_loaded) {
 		ia_css_unload_firmware();
 	}
 	ia_css_spctrl_unload_fw(SP0_ID);
@@ -4074,9 +4074,9 @@ preview_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-			, (mipi_port_ID_t)0
+			, (enum mipi_port_id)0
 #else
-			(mipi_port_ID_t)0,
+			(enum mipi_port_id)0,
 #endif
 #endif
 #ifndef ISP2401
@@ -4106,9 +4106,9 @@ preview_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-			, (mipi_port_ID_t) 0
+			, (enum mipi_port_id) 0
 #else
-			(mipi_port_ID_t) 0,
+			(enum mipi_port_id) 0,
 #endif
 #endif
 #ifndef ISP2401
@@ -4673,7 +4673,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event)
 	event->type = convert_event_sp_to_host_domain[payload[0]];
 	/* Some sane default values since not all events use all fields. */
 	event->pipe = NULL;
-	event->port = IA_CSS_CSI2_PORT0;
+	event->port = MIPI_PORT0_ID;
 	event->exp_id = 0;
 	event->fw_warning = IA_CSS_FW_WARNING_NONE;
 	event->fw_handle = 0;
@@ -4719,7 +4719,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event)
 		}
 	}
 	if (event->type == IA_CSS_EVENT_TYPE_PORT_EOF) {
-		event->port = (enum ia_css_csi2_port)payload[1];
+		event->port = (enum mipi_port_id)payload[1];
 		event->exp_id = payload[3];
 	} else if (event->type == IA_CSS_EVENT_TYPE_FW_WARNING) {
 		event->fw_warning = (enum ia_css_fw_warning)payload[1];
@@ -5949,9 +5949,9 @@ static enum ia_css_err video_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-			, (mipi_port_ID_t)0
+			, (enum mipi_port_id)0
 #else
-			(mipi_port_ID_t)0,
+			(enum mipi_port_id)0,
 #endif
 #endif
 #ifndef ISP2401
@@ -6784,7 +6784,7 @@ static bool copy_on_sp(struct ia_css_pipe *pipe)
 
 	rval &= (pipe->config.default_capture_config.mode == IA_CSS_CAPTURE_MODE_RAW);
 
-	rval &= ((pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) ||
+	rval &= ((pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) ||
 		(pipe->config.mode == IA_CSS_PIPE_MODE_COPY));
 
 	return rval;
@@ -6817,7 +6817,7 @@ static enum ia_css_err load_capture_binaries(
 		return err;
 	}
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 			&pipe->output_info[0],
 			JPEG_BYTES,
@@ -6915,7 +6915,7 @@ need_yuv_scaler_stage(const struct ia_css_pipe *pipe)
 
 	/* TODO: make generic function */
 	need_format_conversion =
-		((pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) &&
+		((pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) &&
 		(pipe->output_info[0].format != IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8));
 
 	in_res = pipe->config.input_effective_res;
@@ -7304,7 +7304,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 	/*
 	 * NOTES
 	 * - Why does the "yuvpp" pipe needs "isp_copy_binary" (i.e. ISP Copy) when
-	 *   its input is "IA_CSS_STREAM_FORMAT_YUV422_8"?
+	 *   its input is "ATOMISP_INPUT_FORMAT_YUV422_8"?
 	 *
 	 *   In most use cases, the first stage in the "yuvpp" pipe is the "yuv_scale_
 	 *   binary". However, the "yuv_scale_binary" does NOT support the input-frame
@@ -7319,7 +7319,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 	 *   "yuv_scale_binary".
 	 */
 	need_isp_copy_binary =
-		(pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV422_8);
+		(pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV422_8);
 #else  /* !USE_INPUT_SYSTEM_VERSION_2401 */
 	need_isp_copy_binary = true;
 #endif /*  USE_INPUT_SYSTEM_VERSION_2401 */
@@ -7627,11 +7627,11 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 		 * Bayer-Quad RAW.
 		 */
 		int in_frame_format;
-		if (pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) {
+		if (pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) {
 			in_frame_format = IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8;
-		} else if (pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV422_8) {
+		} else if (pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV422_8) {
 			/*
-			 * When the sensor output frame format is "IA_CSS_STREAM_FORMAT_YUV422_8",
+			 * When the sensor output frame format is "ATOMISP_INPUT_FORMAT_YUV422_8",
 			 * the "isp_copy_var" binary is selected as the first stage in the yuvpp
 			 * pipe.
 			 *
@@ -7739,7 +7739,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 
 		for (i = 0, j = 0; i < num_stage; i++) {
 			assert(j < num_output_stage);
-			if (pipe->pipe_settings.yuvpp.is_output_stage[i] == true) {
+			if (pipe->pipe_settings.yuvpp.is_output_stage[i]) {
 				tmp_out_frame = out_frame[j];
 				tmp_vf_frame = vf_frame[j];
 			} else {
@@ -7758,7 +7758,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 			}
 			/* we use output port 1 as internal output port */
 			tmp_in_frame = yuv_scaler_stage->args.out_frame[1];
-			if (pipe->pipe_settings.yuvpp.is_output_stage[i] == true) {
+			if (pipe->pipe_settings.yuvpp.is_output_stage[i]) {
 				if (tmp_vf_frame && (tmp_vf_frame->info.res.width != 0)) {
 					in_frame = yuv_scaler_stage->args.out_vf_frame;
 					err = add_vf_pp_stage(pipe, in_frame, tmp_vf_frame, &vf_pp_binary[j],
@@ -7812,7 +7812,7 @@ create_host_copy_pipeline(struct ia_css_pipe *pipe,
 	out_frame->flash_state = IA_CSS_FRAME_FLASH_STATE_NONE;
 
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 			&out_frame->info,
 			JPEG_BYTES,
@@ -8044,7 +8044,6 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 	}
 
 	if (mode == IA_CSS_CAPTURE_MODE_PRIMARY) {
-		unsigned int frm;
 		struct ia_css_frame *local_in_frame = NULL;
 		struct ia_css_frame *local_out_frame = NULL;
 
@@ -8082,7 +8081,6 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 				return err;
 			}
 		}
-		(void)frm;
 		/* If we use copy iso primary,
 		   the input must be yuv iso raw */
 		current_stage->args.copy_vf =
@@ -8321,8 +8319,6 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 				  struct ia_css_frame_info *info,
 				  unsigned int idx)
 {
-	enum ia_css_err err = IA_CSS_SUCCESS;
-
 	assert(pipe != NULL);
 	assert(info != NULL);
 
@@ -8331,7 +8327,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
 	*info = pipe->output_info[idx];
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 			info,
 			JPEG_BYTES,
@@ -8347,7 +8343,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
 						"sh_css_pipe_get_output_frame_info() leave:\n");
-	return err;
+	return IA_CSS_SUCCESS;
 }
 
 #if !defined(HAS_NO_INPUT_SYSTEM)
@@ -8392,7 +8388,7 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
 
 void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
-		enum ia_css_stream_format format,
+		enum atomisp_input_format format,
 		const unsigned short *data,
 		unsigned int width)
 {
@@ -9176,7 +9172,7 @@ ia_css_stream_configure_rx(struct ia_css_stream *stream)
 	else if (config->num_lanes != 0)
 		return IA_CSS_ERR_INVALID_ARGUMENTS;
 
-	if (config->port > IA_CSS_CSI2_PORT2)
+	if (config->port > MIPI_PORT2_ID)
 		return IA_CSS_ERR_INVALID_ARGUMENTS;
 	stream->csi_rx_config.port =
 		ia_css_isys_port_to_mipi_port(config->port);
@@ -9363,7 +9359,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2)
 	/* We don't support metadata for JPEG stream, since they both use str2mem */
-	if (stream_config->input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8 &&
+	if (stream_config->input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8 &&
 	    stream_config->metadata_config.resolution.height > 0) {
 		err = IA_CSS_ERR_INVALID_ARGUMENTS;
 		IA_CSS_LEAVE_ERR(err);
@@ -10142,7 +10138,7 @@ ia_css_temp_pipe_to_pipe_id(const struct ia_css_pipe *pipe, enum ia_css_pipe_id
 	return IA_CSS_SUCCESS;
 }
 
-enum ia_css_stream_format
+enum atomisp_input_format
 ia_css_stream_get_format(const struct ia_css_stream *stream)
 {
 	return stream->config.input_config.format;
@@ -10218,8 +10214,6 @@ ia_css_stream_get_3a_binary(const struct ia_css_stream *stream)
 enum ia_css_err
 ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int output_padded_width)
 {
-	enum ia_css_err err = IA_CSS_SUCCESS;
-
 	struct ia_css_pipe *pipe;
 
 	assert(stream != NULL);
@@ -10232,7 +10226,7 @@ ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int
 	pipe->config.output_info[IA_CSS_PIPE_OUTPUT_STAGE_0].padded_width = output_padded_width;
 	pipe->output_info[IA_CSS_PIPE_OUTPUT_STAGE_0].padded_width = output_padded_width;
 
-	return err;
+	return IA_CSS_SUCCESS;
 }
 
 static struct ia_css_binary *
@@ -10734,7 +10728,7 @@ ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle, bool
 				(uint8_t) IA_CSS_PSYS_SW_EVENT_STAGE_ENABLE_DISABLE,
 				(uint8_t) thread_id,
 				(uint8_t) stage->stage_num,
-				(enable == true) ? 1 : 0);
+				enable ? 1 : 0);
 			if (err == IA_CSS_SUCCESS) {
 				if(enable)
 					SH_CSS_QOS_STAGE_ENABLE(&(sh_css_sp_group.pipe[thread_id]),stage->stage_num);
@@ -11059,7 +11053,7 @@ static struct sh_css_hmm_buffer_record
 
 	buffer_record = &hmm_buffer_record[0];
 	for (i = 0; i < MAX_HMM_BUFFER_NUM; i++) {
-		if (buffer_record->in_use == false) {
+		if (!buffer_record->in_use) {
 			buffer_record->in_use = true;
 			buffer_record->type = type;
 			buffer_record->h_vbuf = h_vbuf;
@@ -11083,7 +11077,7 @@ static struct sh_css_hmm_buffer_record
 
 	buffer_record = &hmm_buffer_record[0];
 	for (i = 0; i < MAX_HMM_BUFFER_NUM; i++) {
-		if ((buffer_record->in_use == true) &&
+		if ((buffer_record->in_use) &&
 		    (buffer_record->type == type) &&
 		    (buffer_record->h_vbuf != NULL) &&
 		    (buffer_record->h_vbuf->vptr == ddr_buffer_addr)) {
@@ -11093,7 +11087,7 @@ static struct sh_css_hmm_buffer_record
 		buffer_record++;
 	}
 
-	if (found_record == true)
+	if (found_record)
 		return buffer_record;
 	else
 		return NULL;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
index 883474e90c81..a6a00024bae8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
@@ -104,7 +104,7 @@ static bool ia_css_mipi_is_source_port_valid(struct ia_css_pipe *pipe,
 enum ia_css_err
 ia_css_mipi_frame_calculate_size(const unsigned int width,
 				const unsigned int height,
-				const enum ia_css_stream_format format,
+				const enum atomisp_input_format format,
 				const bool hasSOLandEOL,
 				const unsigned int embedded_data_size_words,
 				unsigned int *size_mem_words)
@@ -136,16 +136,16 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 		     width_padded, height, format, hasSOLandEOL, embedded_data_size_words);
 
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_RAW_6:		/* 4p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_RAW_6:		/* 4p, 3B, 24bits */
 		bits_per_pixel = 6;	break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:		/* 8p, 7B, 56bits */
+	case ATOMISP_INPUT_FORMAT_RAW_7:		/* 8p, 7B, 56bits */
 		bits_per_pixel = 7;		break;
-	case IA_CSS_STREAM_FORMAT_RAW_8:		/* 1p, 1B, 8bits */
-	case IA_CSS_STREAM_FORMAT_BINARY_8:		/*  8bits, TODO: check. */
-	case IA_CSS_STREAM_FORMAT_YUV420_8:		/* odd 2p, 2B, 16bits, even 2p, 4B, 32bits */
+	case ATOMISP_INPUT_FORMAT_RAW_8:		/* 1p, 1B, 8bits */
+	case ATOMISP_INPUT_FORMAT_BINARY_8:		/*  8bits, TODO: check. */
+	case ATOMISP_INPUT_FORMAT_YUV420_8:		/* odd 2p, 2B, 16bits, even 2p, 4B, 32bits */
 		bits_per_pixel = 8;		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:		/* odd 4p, 5B, 40bits, even 4p, 10B, 80bits */
-	case IA_CSS_STREAM_FORMAT_RAW_10:		/* 4p, 5B, 40bits */
+	case ATOMISP_INPUT_FORMAT_YUV420_10:		/* odd 4p, 5B, 40bits, even 4p, 10B, 80bits */
+	case ATOMISP_INPUT_FORMAT_RAW_10:		/* 4p, 5B, 40bits */
 #if !defined(HAS_NO_PACKED_RAW_PIXELS)
 		/* The changes will be reverted as soon as RAW
 		 * Buffers are deployed by the 2401 Input System
@@ -156,26 +156,26 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 		bits_per_pixel = 16;
 #endif
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:	/* 2p, 3B, 24bits */
-	case IA_CSS_STREAM_FORMAT_RAW_12:		/* 2p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:	/* 2p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_RAW_12:		/* 2p, 3B, 24bits */
 		bits_per_pixel = 12;	break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:		/* 4p, 7B, 56bits */
+	case ATOMISP_INPUT_FORMAT_RAW_14:		/* 4p, 7B, 56bits */
 		bits_per_pixel = 14;	break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:		/* 1p, 2B, 16bits */
-	case IA_CSS_STREAM_FORMAT_RGB_555:		/* 1p, 2B, 16bits */
-	case IA_CSS_STREAM_FORMAT_RGB_565:		/* 1p, 2B, 16bits */
-	case IA_CSS_STREAM_FORMAT_YUV422_8:		/* 2p, 4B, 32bits */
+	case ATOMISP_INPUT_FORMAT_RGB_444:		/* 1p, 2B, 16bits */
+	case ATOMISP_INPUT_FORMAT_RGB_555:		/* 1p, 2B, 16bits */
+	case ATOMISP_INPUT_FORMAT_RGB_565:		/* 1p, 2B, 16bits */
+	case ATOMISP_INPUT_FORMAT_YUV422_8:		/* 2p, 4B, 32bits */
 		bits_per_pixel = 16;	break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:		/* 4p, 9B, 72bits */
+	case ATOMISP_INPUT_FORMAT_RGB_666:		/* 4p, 9B, 72bits */
 		bits_per_pixel = 18;	break;
-	case IA_CSS_STREAM_FORMAT_YUV422_10:		/* 2p, 5B, 40bits */
+	case ATOMISP_INPUT_FORMAT_YUV422_10:		/* 2p, 5B, 40bits */
 		bits_per_pixel = 20;	break;
-	case IA_CSS_STREAM_FORMAT_RGB_888:		/* 1p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_RGB_888:		/* 1p, 3B, 24bits */
 		bits_per_pixel = 24;	break;
 
-	case IA_CSS_STREAM_FORMAT_YUV420_16:		/* Not supported */
-	case IA_CSS_STREAM_FORMAT_YUV422_16:		/* Not supported */
-	case IA_CSS_STREAM_FORMAT_RAW_16:		/* TODO: not specified in MIPI SPEC, check */
+	case ATOMISP_INPUT_FORMAT_YUV420_16:		/* Not supported */
+	case ATOMISP_INPUT_FORMAT_YUV422_16:		/* Not supported */
+	case ATOMISP_INPUT_FORMAT_RAW_16:		/* TODO: not specified in MIPI SPEC, check */
 	default:
 		return IA_CSS_ERR_INVALID_ARGUMENTS;
 	}
@@ -183,9 +183,9 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 	odd_line_bytes = (width_padded * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 
 	/* Even lines for YUV420 formats are double in bits_per_pixel. */
-	if (format == IA_CSS_STREAM_FORMAT_YUV420_8
-			|| format == IA_CSS_STREAM_FORMAT_YUV420_10
-			|| format == IA_CSS_STREAM_FORMAT_YUV420_16) {
+	if (format == ATOMISP_INPUT_FORMAT_YUV420_8
+			|| format == ATOMISP_INPUT_FORMAT_YUV420_10
+			|| format == ATOMISP_INPUT_FORMAT_YUV420_16) {
 		even_line_bytes = (width_padded * 2 * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 	} else {
 		even_line_bytes = odd_line_bytes;
@@ -239,7 +239,7 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 
 #if !defined(HAS_NO_INPUT_SYSTEM) && defined(USE_INPUT_SYSTEM_VERSION_2)
 enum ia_css_err
-ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
+ia_css_mipi_frame_enable_check_on_size(const enum mipi_port_id port,
 				const unsigned int	size_mem_words)
 {
 	uint32_t idx;
@@ -285,7 +285,7 @@ calculate_mipi_buff_size(
 #else
 	unsigned int width;
 	unsigned int height;
-	enum ia_css_stream_format format;
+	enum atomisp_input_format format;
 	bool pack_raw_pixels;
 
 	unsigned int width_padded;
@@ -348,15 +348,15 @@ calculate_mipi_buff_size(
 
 	bits_per_pixel = sh_css_stream_format_2_bits_per_subpixel(format);
 	bits_per_pixel =
-		(format == IA_CSS_STREAM_FORMAT_RAW_10 && pack_raw_pixels) ? bits_per_pixel : 16;
+		(format == ATOMISP_INPUT_FORMAT_RAW_10 && pack_raw_pixels) ? bits_per_pixel : 16;
 	if (bits_per_pixel == 0)
 		return IA_CSS_ERR_INTERNAL_ERROR;
 
 	odd_line_bytes = (width_padded * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 
 	/* Even lines for YUV420 formats are double in bits_per_pixel. */
-	if (format == IA_CSS_STREAM_FORMAT_YUV420_8
-		|| format == IA_CSS_STREAM_FORMAT_YUV420_10) {
+	if (format == ATOMISP_INPUT_FORMAT_YUV420_8
+		|| format == ATOMISP_INPUT_FORMAT_YUV420_10) {
 		even_line_bytes = (width_padded * 2 * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 	} else {
 		even_line_bytes = odd_line_bytes;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c
index fbb36112fe3c..43529b1605c3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c
@@ -110,7 +110,7 @@
 #define FPNTBL_BYTES(binary) \
 	(sizeof(char) * (binary)->in_frame_info.res.height * \
 	 (binary)->in_frame_info.padded_width)
-	 
+
 #ifndef ISP2401
 
 #define SCTBL_BYTES(binary) \
@@ -1741,7 +1741,7 @@ ia_css_process_zoom_and_motion(
 				out_infos[0] = &args->out_frame[0]->info;
 			info = &stage->firmware->info.isp;
 			ia_css_binary_fill_info(info, false, false,
-				IA_CSS_STREAM_FORMAT_RAW_10,
+				ATOMISP_INPUT_FORMAT_RAW_10,
 				args->in_frame  ? &args->in_frame->info  : NULL,
 				NULL,
 				out_infos,
@@ -2891,8 +2891,8 @@ ia_css_metadata_free_multiple(unsigned int num_bufs, struct ia_css_metadata **bu
 	}
 }
 
-unsigned g_param_buffer_dequeue_count = 0;
-unsigned g_param_buffer_enqueue_count = 0;
+static unsigned g_param_buffer_dequeue_count = 0;
+static unsigned g_param_buffer_enqueue_count = 0;
 
 enum ia_css_err
 ia_css_stream_isp_parameters_init(struct ia_css_stream *stream)
@@ -3805,7 +3805,6 @@ sh_css_param_update_isp_params(struct ia_css_pipe *curr_pipe,
 
 		enum sh_css_queue_id queue_id;
 
-		(void)stage;
 		pipe = curr_pipe->stream->pipes[i];
 		pipeline = ia_css_pipe_get_pipeline(pipe);
 		pipe_num = ia_css_pipe_get_pipe_num(pipe);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
index 6fc00fc402b1..85263725540d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
@@ -71,7 +71,7 @@
 struct sh_css_sp_group		sh_css_sp_group;
 struct sh_css_sp_stage		sh_css_sp_stage;
 struct sh_css_isp_stage		sh_css_isp_stage;
-struct sh_css_sp_output		sh_css_sp_output;
+static struct sh_css_sp_output		sh_css_sp_output;
 static struct sh_css_sp_per_frame_data per_frame_data;
 
 /* true if SP supports frame loop and host2sp_commands */
@@ -117,9 +117,9 @@ copy_isp_stage_to_sp_stage(void)
 	*/
 	sh_css_sp_stage.enable.sdis = sh_css_isp_stage.binary_info.enable.dis;
 	sh_css_sp_stage.enable.s3a = sh_css_isp_stage.binary_info.enable.s3a;
-#ifdef ISP2401	
+#ifdef ISP2401
 	sh_css_sp_stage.enable.lace_stats = sh_css_isp_stage.binary_info.enable.lace_stats;
-#endif	
+#endif
 }
 
 void
@@ -754,7 +754,7 @@ sh_css_sp_write_frame_pointers(const struct sh_css_binary_args *args)
 
 static void
 sh_css_sp_init_group(bool two_ppc,
-		     enum ia_css_stream_format input_format,
+		     enum atomisp_input_format input_format,
 		     bool no_isp_sync,
 		     uint8_t if_config_index)
 {
@@ -817,7 +817,6 @@ configure_isp_from_args(
 	bool two_ppc,
 	bool deinterleaved)
 {
-	enum ia_css_err err = IA_CSS_SUCCESS;
 #ifdef ISP2401
 	struct ia_css_pipe *pipe = find_pipe_by_num(pipeline->pipe_num);
 	const struct ia_css_resolution *res;
@@ -841,7 +840,7 @@ configure_isp_from_args(
 	ia_css_ref_configure(binary, (const struct ia_css_frame **)args->delay_frames, pipeline->dvs_frame_delay);
 	ia_css_tnr_configure(binary, (const struct ia_css_frame **)args->tnr_frames);
 	ia_css_bayer_io_config(binary, args);
-	return err;
+	return IA_CSS_SUCCESS;
 }
 
 static void
@@ -1118,7 +1117,7 @@ sp_init_stage(struct ia_css_pipeline_stage *stage,
 			out_infos[0] = &args->out_frame[0]->info;
 		info = &firmware->info.isp;
 		ia_css_binary_fill_info(info, false, false,
-			    IA_CSS_STREAM_FORMAT_RAW_10,
+			    ATOMISP_INPUT_FORMAT_RAW_10,
 			    args->in_frame  ? &args->in_frame->info  : NULL,
 			    NULL,
 				out_infos,
@@ -1197,7 +1196,7 @@ sh_css_sp_init_pipeline(struct ia_css_pipeline *me,
 			const struct ia_css_metadata_config *md_config,
 			const struct ia_css_metadata_info *md_info,
 #if !defined(HAS_NO_INPUT_SYSTEM)
-			const mipi_port_ID_t port_id
+			const enum mipi_port_id port_id
 #endif
 #ifdef ISP2401
 			,
@@ -1442,8 +1441,6 @@ sh_css_update_host2sp_offline_frame(
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int offset;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	assert(frame_num < NUM_CONTINUOUS_FRAMES);
 
 	/* Write new frame data into SP DMEM */
@@ -1473,8 +1470,6 @@ sh_css_update_host2sp_mipi_frame(
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int offset;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* MIPI buffers are dedicated to port, so now there are more of them. */
 	assert(frame_num < (N_CSI_PORTS * NUM_MIPI_FRAMES_PER_STREAM));
 
@@ -1500,8 +1495,6 @@ sh_css_update_host2sp_mipi_metadata(
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int o;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* MIPI buffers are dedicated to port, so now there are more of them. */
 	assert(frame_num < (N_CSI_PORTS * NUM_MIPI_FRAMES_PER_STREAM));
 
@@ -1520,8 +1513,6 @@ sh_css_update_host2sp_num_mipi_frames(unsigned num_frames)
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int offset;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* Write new frame data into SP DMEM */
 	HIVE_ADDR_host_sp_com = sh_css_sp_fw.info.sp.host_sp_com;
 	offset = (unsigned int)offsetof(struct host_sp_communication, host2sp_num_mipi_frames)
@@ -1539,8 +1530,6 @@ sh_css_update_host2sp_cont_num_raw_frames(unsigned num_frames, bool set_avail)
 	unsigned int extra_num_frames, avail_num_frames;
 	unsigned int offset, offset_extra;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* Write new frame data into SP DMEM */
 	fw = &sh_css_sp_fw;
 	HIVE_ADDR_host_sp_com = fw->info.sp.host_sp_com;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h
index 98444a3cc3e4..3c41e997de79 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h
@@ -64,7 +64,7 @@ sh_css_sp_init_pipeline(struct ia_css_pipeline *me,
 			const struct ia_css_metadata_config *md_config,
 			const struct ia_css_metadata_info *md_info,
 #if !defined(HAS_NO_INPUT_SYSTEM)
-			const mipi_port_ID_t port_id
+			const enum mipi_port_id port_id
 #endif
 #ifdef ISP2401
 			,
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c
index 52d0a6471597..77f135e7dc3c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c
@@ -16,55 +16,55 @@
 #include <ia_css_stream_format.h>
 
 unsigned int sh_css_stream_format_2_bits_per_subpixel(
-		enum ia_css_stream_format format)
+		enum atomisp_input_format format)
 {
 	unsigned int rval;
 
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		rval = 4;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		rval = 5;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		rval = 6;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		rval = 7;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		rval = 8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		rval = 10;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		rval = 12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		rval = 14;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_16:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		rval = 16;
 		break;
 	default:
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h
index aab2b6207051..b699f538e0dd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h
@@ -18,6 +18,6 @@
 #include <ia_css_stream_format.h>
 
 unsigned int sh_css_stream_format_2_bits_per_subpixel(
-		enum ia_css_stream_format format);
+		enum atomisp_input_format format);
 
 #endif /* __SH_CSS_STREAM_FORMAT_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h b/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h
index 560014add005..4b2d94a37ea1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h
@@ -80,12 +80,10 @@ struct isp_mmu_client {
 	unsigned int null_pte;
 
 	/*
-	 * set/get page directory base address (physical address).
+	 * get page directory base address (physical address).
 	 *
 	 * must be provided.
 	 */
-	int (*set_pd_base) (struct isp_mmu *mmu,
-			phys_addr_t pd_base);
 	unsigned int (*get_pd_base) (struct isp_mmu *mmu, phys_addr_t pd_base);
 	/*
 	 * callback to flush tlb.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c b/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
index f21075c1e503..198f29f4a324 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
@@ -344,13 +344,6 @@ static int mmu_map(struct isp_mmu *mmu, unsigned int isp_virt,
 		/*
 		 * setup L1 page table physical addr to MMU
 		 */
-		ret = mmu->driver->set_pd_base(mmu, l1_pt);
-		if (ret) {
-			dev_err(atomisp_dev,
-				 "set page directory base address fail.\n");
-			mutex_unlock(&mmu->pt_mutex);
-			return ret;
-		}
 		mmu->base_address = l1_pt;
 		mmu->l1_pte = isp_pgaddr_to_pte_valid(mmu, l1_pt);
 		memset(mmu->l2_pgt_refcount, 0, sizeof(int) * ISP_L1PT_PTES);
@@ -531,10 +524,8 @@ int isp_mmu_init(struct isp_mmu *mmu, struct isp_mmu_client *driver)
 
 	mmu->driver = driver;
 
-	if (!driver->set_pd_base || !driver->tlb_flush_all) {
-		dev_err(atomisp_dev,
-			    "set_pd_base or tlb_flush_all operation "
-			     "not provided.\n");
+	if (!driver->tlb_flush_all) {
+		dev_err(atomisp_dev, "tlb_flush_all operation not provided.\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c b/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c
index c59bcc982966..c0212564b7c8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c
@@ -18,6 +18,7 @@
  */
 #include "type_support.h"
 #include "mmu/isp_mmu.h"
+#include "mmu/sh_mmu_mrfld.h"
 #include "memory_access/memory_access.h"
 #include "atomisp_compat.h"
 
@@ -40,20 +41,6 @@ static phys_addr_t sh_pte_to_phys(struct isp_mmu *mmu,
 	return (phys_addr_t)((pte & ~mask) << ISP_PAGE_OFFSET);
 }
 
-/*
- * set page directory base address (physical address).
- *
- * must be provided.
- */
-static int sh_set_pd_base(struct isp_mmu *mmu,
-			  phys_addr_t phys)
-{
-	unsigned int pte = sh_phys_to_pte(mmu, phys);
-	/*mmgr_set_base_address(HOST_ADDRESS(pte));*/
-	atomisp_css_mmu_set_page_table_base_index(HOST_ADDRESS(pte));
-	return 0;
-}
-
 static unsigned int sh_get_pd_base(struct isp_mmu *mmu,
 				   phys_addr_t phys)
 {
@@ -81,7 +68,6 @@ struct isp_mmu_client sh_mmu_mrfld = {
 	.name = "Silicon Hive ISP3000 MMU",
 	.pte_valid_mask = MERR_VALID_PTE_MASK,
 	.null_pte = ~MERR_VALID_PTE_MASK,
-	.set_pd_base = sh_set_pd_base,
 	.get_pd_base = sh_get_pd_base,
 	.tlb_flush_all = sh_tlb_flush,
 	.phys_to_pte = sh_phys_to_pte,
diff --git a/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c b/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c
index d8b7183db252..3283c1b05d6a 100644
--- a/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c
+++ b/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c
@@ -441,7 +441,7 @@ static int gmin_v1p2_ctrl(struct v4l2_subdev *subdev, int on)
 {
 	struct gmin_subdev *gs = find_gmin_subdev(subdev);
 
-	if (gs && gs->v1p2_on == on)
+	if (!gs || gs->v1p2_on == on)
 		return 0;
 	gs->v1p2_on = on;
 
@@ -475,7 +475,7 @@ static int gmin_v1p8_ctrl(struct v4l2_subdev *subdev, int on)
 		}
 	}
 
-	if (gs && gs->v1p8_on == on)
+	if (!gs || gs->v1p8_on == on)
 		return 0;
 	gs->v1p8_on = on;
 
@@ -511,7 +511,7 @@ static int gmin_v2p8_ctrl(struct v4l2_subdev *subdev, int on)
 		}
 	}
 
-	if (gs && gs->v2p8_on == on)
+	if (!gs || gs->v2p8_on == on)
 		return 0;
 	gs->v2p8_on = on;
 
@@ -693,9 +693,11 @@ static int gmin_get_config_var(struct device *dev, const char *var,
 	for (i = 0; i < sizeof(var8) && var8[i]; i++)
 		var16[i] = var8[i];
 
+#ifdef CONFIG_64BIT
 	/* To avoid owerflows when calling the efivar API */
 	if (*out_len > ULONG_MAX)
 		return -EINVAL;
+#endif
 
 	/* Not sure this API usage is kosher; efivar_entry_get()'s
 	 * implementation simply uses VariableName and VendorGuid from
diff --git a/drivers/staging/media/davinci_vpfe/dm365_resizer.c b/drivers/staging/media/davinci_vpfe/dm365_resizer.c
index 857b0e847c5e..1ee216d71d42 100644
--- a/drivers/staging/media/davinci_vpfe/dm365_resizer.c
+++ b/drivers/staging/media/davinci_vpfe/dm365_resizer.c
@@ -480,7 +480,7 @@ resizer_configure_common_in_params(struct vpfe_resizer_device *resizer)
 	return 0;
 }
 static int
-resizer_configure_in_continious_mode(struct vpfe_resizer_device *resizer)
+resizer_configure_in_continuous_mode(struct vpfe_resizer_device *resizer)
 {
 	struct device *dev = resizer->crop_resizer.subdev.v4l2_dev->dev;
 	struct resizer_params *param = &resizer->config;
@@ -1242,7 +1242,7 @@ static int resizer_do_hw_setup(struct vpfe_resizer_device *resizer)
 		    ipipeif_source == IPIPEIF_OUTPUT_RESIZER)
 			ret = resizer_configure_in_single_shot_mode(resizer);
 		else
-			ret =  resizer_configure_in_continious_mode(resizer);
+			ret =  resizer_configure_in_continuous_mode(resizer);
 		if (ret)
 			return ret;
 		ret = config_rsz_hw(resizer, param);
diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c
index 1aa2be891704..16cab40156ca 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -1005,7 +1005,7 @@ static int csi_link_validate(struct v4l2_subdev *sd,
 			     struct v4l2_subdev_format *sink_fmt)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
-	struct v4l2_fwnode_endpoint upstream_ep;
+	struct v4l2_fwnode_endpoint upstream_ep = {};
 	const struct imx_media_pixfmt *incc;
 	bool is_csi2;
 	int ret;
@@ -1800,7 +1800,10 @@ static int imx_csi_probe(struct platform_device *pdev)
 	pinctrl = devm_pinctrl_get_select_default(priv->dev);
 	if (IS_ERR(pinctrl)) {
 		ret = PTR_ERR(priv->vdev);
-		goto free;
+		dev_dbg(priv->dev,
+			"devm_pinctrl_get_select_default() failed: %d\n", ret);
+		if (ret != -ENODEV)
+			goto free;
 	}
 
 	ret = v4l2_async_register_subdev(&priv->sd);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index b6adc54b96f1..82979880f985 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,13 @@ menuconfig THERMAL
 
 if THERMAL
 
+config THERMAL_STATISTICS
+	bool "Thermal state transition statistics"
+	help
+	  Export thermal state transition statistics information through sysfs.
+
+	  If in doubt, say N.
+
 config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
 	int "Emergency poweroff delay in milli-seconds"
 	depends on THERMAL
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index a67781b7a0b2..ee3a215b333a 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -637,6 +637,9 @@ static int imx_thermal_probe(struct platform_device *pdev)
 	regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
 	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
 
+	data->irq_enabled = true;
+	data->mode = THERMAL_DEVICE_ENABLED;
+
 	ret = devm_request_threaded_irq(&pdev->dev, data->irq,
 			imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread,
 			0, "imx_thermal", data);
@@ -649,9 +652,6 @@ static int imx_thermal_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	data->irq_enabled = true;
-	data->mode = THERMAL_DEVICE_ENABLED;
-
 	return 0;
 }
 
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 2b1b0ba393a4..d64325e078db 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -972,8 +972,8 @@ __thermal_cooling_device_register(struct device_node *np,
 	cdev->ops = ops;
 	cdev->updated = false;
 	cdev->device.class = &thermal_class;
-	thermal_cooling_device_setup_sysfs(cdev);
 	cdev->devdata = devdata;
+	thermal_cooling_device_setup_sysfs(cdev);
 	dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
 	result = device_register(&cdev->device);
 	if (result) {
@@ -1106,6 +1106,7 @@ void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
 
 	ida_simple_remove(&thermal_cdev_ida, cdev->id);
 	device_unregister(&cdev->device);
+	thermal_cooling_device_destroy_sysfs(cdev);
 }
 EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
 
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index 27e3b1df7360..5e4150261500 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -73,6 +73,7 @@ int thermal_build_list_of_policies(char *buf);
 int thermal_zone_create_device_groups(struct thermal_zone_device *, int);
 void thermal_zone_destroy_device_groups(struct thermal_zone_device *);
 void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *);
+void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev);
 /* used only at binding time */
 ssize_t
 thermal_cooling_device_trip_point_show(struct device *,
@@ -84,6 +85,15 @@ ssize_t thermal_cooling_device_weight_store(struct device *,
 					    struct device_attribute *,
 					    const char *, size_t);
 
+#ifdef CONFIG_THERMAL_STATISTICS
+void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
+					 unsigned long new_state);
+#else
+static inline void
+thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
+				    unsigned long new_state) {}
+#endif /* CONFIG_THERMAL_STATISTICS */
+
 #ifdef CONFIG_THERMAL_GOV_STEP_WISE
 int thermal_gov_step_wise_register(void);
 void thermal_gov_step_wise_unregister(void);
diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
index 8cdf75adcce1..eb03d7e099bb 100644
--- a/drivers/thermal/thermal_helpers.c
+++ b/drivers/thermal/thermal_helpers.c
@@ -187,7 +187,10 @@ void thermal_cdev_update(struct thermal_cooling_device *cdev)
 		if (instance->target > target)
 			target = instance->target;
 	}
-	cdev->ops->set_cur_state(cdev, target);
+
+	if (!cdev->ops->set_cur_state(cdev, target))
+		thermal_cooling_device_stats_update(cdev, target);
+
 	cdev->updated = true;
 	mutex_unlock(&cdev->lock);
 	trace_cdev_update(cdev, target);
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index ba81c9080f6e..23b5e0a709b0 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -20,6 +20,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/jiffies.h>
 
 #include "thermal_core.h"
 
@@ -721,6 +722,7 @@ thermal_cooling_device_cur_state_store(struct device *dev,
 	result = cdev->ops->set_cur_state(cdev, state);
 	if (result)
 		return result;
+	thermal_cooling_device_stats_update(cdev, state);
 	return count;
 }
 
@@ -745,14 +747,237 @@ static const struct attribute_group cooling_device_attr_group = {
 
 static const struct attribute_group *cooling_device_attr_groups[] = {
 	&cooling_device_attr_group,
+	NULL, /* Space allocated for cooling_device_stats_attr_group */
 	NULL,
 };
 
+#ifdef CONFIG_THERMAL_STATISTICS
+struct cooling_dev_stats {
+	spinlock_t lock;
+	unsigned int total_trans;
+	unsigned long state;
+	unsigned long max_states;
+	ktime_t last_time;
+	ktime_t *time_in_state;
+	unsigned int *trans_table;
+};
+
+static void update_time_in_state(struct cooling_dev_stats *stats)
+{
+	ktime_t now = ktime_get(), delta;
+
+	delta = ktime_sub(now, stats->last_time);
+	stats->time_in_state[stats->state] =
+		ktime_add(stats->time_in_state[stats->state], delta);
+	stats->last_time = now;
+}
+
+void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
+					 unsigned long new_state)
+{
+	struct cooling_dev_stats *stats = cdev->stats;
+
+	spin_lock(&stats->lock);
+
+	if (stats->state == new_state)
+		goto unlock;
+
+	update_time_in_state(stats);
+	stats->trans_table[stats->state * stats->max_states + new_state]++;
+	stats->state = new_state;
+	stats->total_trans++;
+
+unlock:
+	spin_unlock(&stats->lock);
+}
+
+static ssize_t
+thermal_cooling_device_total_trans_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	struct cooling_dev_stats *stats = cdev->stats;
+	int ret;
+
+	spin_lock(&stats->lock);
+	ret = sprintf(buf, "%u\n", stats->total_trans);
+	spin_unlock(&stats->lock);
+
+	return ret;
+}
+
+static ssize_t
+thermal_cooling_device_time_in_state_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	struct cooling_dev_stats *stats = cdev->stats;
+	ssize_t len = 0;
+	int i;
+
+	spin_lock(&stats->lock);
+	update_time_in_state(stats);
+
+	for (i = 0; i < stats->max_states; i++) {
+		len += sprintf(buf + len, "state%u\t%llu\n", i,
+			       ktime_to_ms(stats->time_in_state[i]));
+	}
+	spin_unlock(&stats->lock);
+
+	return len;
+}
+
+static ssize_t
+thermal_cooling_device_reset_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	struct cooling_dev_stats *stats = cdev->stats;
+	int i, states = stats->max_states;
+
+	spin_lock(&stats->lock);
+
+	stats->total_trans = 0;
+	stats->last_time = ktime_get();
+	memset(stats->trans_table, 0,
+	       states * states * sizeof(*stats->trans_table));
+
+	for (i = 0; i < stats->max_states; i++)
+		stats->time_in_state[i] = ktime_set(0, 0);
+
+	spin_unlock(&stats->lock);
+
+	return count;
+}
+
+static ssize_t
+thermal_cooling_device_trans_table_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct thermal_cooling_device *cdev = to_cooling_device(dev);
+	struct cooling_dev_stats *stats = cdev->stats;
+	ssize_t len = 0;
+	int i, j;
+
+	len += snprintf(buf + len, PAGE_SIZE - len, " From  :    To\n");
+	len += snprintf(buf + len, PAGE_SIZE - len, "       : ");
+	for (i = 0; i < stats->max_states; i++) {
+		if (len >= PAGE_SIZE)
+			break;
+		len += snprintf(buf + len, PAGE_SIZE - len, "state%2u  ", i);
+	}
+	if (len >= PAGE_SIZE)
+		return PAGE_SIZE;
+
+	len += snprintf(buf + len, PAGE_SIZE - len, "\n");
+
+	for (i = 0; i < stats->max_states; i++) {
+		if (len >= PAGE_SIZE)
+			break;
+
+		len += snprintf(buf + len, PAGE_SIZE - len, "state%2u:", i);
+
+		for (j = 0; j < stats->max_states; j++) {
+			if (len >= PAGE_SIZE)
+				break;
+			len += snprintf(buf + len, PAGE_SIZE - len, "%8u ",
+				stats->trans_table[i * stats->max_states + j]);
+		}
+		if (len >= PAGE_SIZE)
+			break;
+		len += snprintf(buf + len, PAGE_SIZE - len, "\n");
+	}
+
+	if (len >= PAGE_SIZE) {
+		pr_warn_once("Thermal transition table exceeds PAGE_SIZE. Disabling\n");
+		return -EFBIG;
+	}
+	return len;
+}
+
+static DEVICE_ATTR(total_trans, 0444, thermal_cooling_device_total_trans_show,
+		   NULL);
+static DEVICE_ATTR(time_in_state_ms, 0444,
+		   thermal_cooling_device_time_in_state_show, NULL);
+static DEVICE_ATTR(reset, 0200, NULL, thermal_cooling_device_reset_store);
+static DEVICE_ATTR(trans_table, 0444,
+		   thermal_cooling_device_trans_table_show, NULL);
+
+static struct attribute *cooling_device_stats_attrs[] = {
+	&dev_attr_total_trans.attr,
+	&dev_attr_time_in_state_ms.attr,
+	&dev_attr_reset.attr,
+	&dev_attr_trans_table.attr,
+	NULL
+};
+
+static const struct attribute_group cooling_device_stats_attr_group = {
+	.attrs = cooling_device_stats_attrs,
+	.name = "stats"
+};
+
+static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+{
+	struct cooling_dev_stats *stats;
+	unsigned long states;
+	int var;
+
+	if (cdev->ops->get_max_state(cdev, &states))
+		return;
+
+	states++; /* Total number of states is highest state + 1 */
+
+	var = sizeof(*stats);
+	var += sizeof(*stats->time_in_state) * states;
+	var += sizeof(*stats->trans_table) * states * states;
+
+	stats = kzalloc(var, GFP_KERNEL);
+	if (!stats)
+		return;
+
+	stats->time_in_state = (ktime_t *)(stats + 1);
+	stats->trans_table = (unsigned int *)(stats->time_in_state + states);
+	cdev->stats = stats;
+	stats->last_time = ktime_get();
+	stats->max_states = states;
+
+	spin_lock_init(&stats->lock);
+
+	/* Fill the empty slot left in cooling_device_attr_groups */
+	var = ARRAY_SIZE(cooling_device_attr_groups) - 2;
+	cooling_device_attr_groups[var] = &cooling_device_stats_attr_group;
+}
+
+static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev)
+{
+	kfree(cdev->stats);
+	cdev->stats = NULL;
+}
+
+#else
+
+static inline void
+cooling_device_stats_setup(struct thermal_cooling_device *cdev) {}
+static inline void
+cooling_device_stats_destroy(struct thermal_cooling_device *cdev) {}
+
+#endif /* CONFIG_THERMAL_STATISTICS */
+
 void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *cdev)
 {
+	cooling_device_stats_setup(cdev);
 	cdev->device.groups = cooling_device_attr_groups;
 }
 
+void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev)
+{
+	cooling_device_stats_destroy(cdev);
+}
+
 /* these helper will be used only at the time of bindig */
 ssize_t
 thermal_cooling_device_trip_point_show(struct device *dev,
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index e5041c605fd0..0840d27381ea 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -11,7 +11,7 @@ if TTY
 
 config VT
 	bool "Virtual terminal" if EXPERT
-	depends on !S390 && !UML
+	depends on !UML
 	select INPUT
 	default y
 	---help---
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index d526595bc959..76e16c5251b9 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -65,7 +65,6 @@ struct usb_dev_state {
 	const struct cred *cred;
 	void __user *disccontext;
 	unsigned long ifclaimed;
-	u32 secid;
 	u32 disabled_bulk_eps;
 	bool privileges_dropped;
 	unsigned long interface_allowed_mask;
@@ -95,7 +94,6 @@ struct async {
 	struct usb_memory *usbm;
 	unsigned int mem_usage;
 	int status;
-	u32 secid;
 	u8 bulk_addr;
 	u8 bulk_status;
 };
@@ -586,7 +584,6 @@ static void async_completed(struct urb *urb)
 	struct usb_dev_state *ps = as->ps;
 	struct siginfo sinfo;
 	struct pid *pid = NULL;
-	u32 secid = 0;
 	const struct cred *cred = NULL;
 	int signr;
 
@@ -602,7 +599,6 @@ static void async_completed(struct urb *urb)
 		sinfo.si_addr = as->userurb;
 		pid = get_pid(as->pid);
 		cred = get_cred(as->cred);
-		secid = as->secid;
 	}
 	snoop(&urb->dev->dev, "urb complete\n");
 	snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length,
@@ -618,7 +614,7 @@ static void async_completed(struct urb *urb)
 	spin_unlock(&ps->lock);
 
 	if (signr) {
-		kill_pid_info_as_cred(sinfo.si_signo, &sinfo, pid, cred, secid);
+		kill_pid_info_as_cred(sinfo.si_signo, &sinfo, pid, cred);
 		put_pid(pid);
 		put_cred(cred);
 	}
@@ -1013,7 +1009,6 @@ static int usbdev_open(struct inode *inode, struct file *file)
 	init_waitqueue_head(&ps->wait);
 	ps->disc_pid = get_pid(task_pid(current));
 	ps->cred = get_current_cred();
-	security_task_getsecid(current, &ps->secid);
 	smp_wmb();
 	list_add_tail(&ps->list, &dev->filelist);
 	file->private_data = ps;
@@ -1727,7 +1722,6 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 	as->ifnum = ifnum;
 	as->pid = get_pid(task_pid(current));
 	as->cred = get_current_cred();
-	security_task_getsecid(current, &as->secid);
 	snoop_urb(ps->dev, as->userurb, as->urb->pipe,
 			as->urb->transfer_buffer_length, 0, SUBMIT,
 			NULL, 0);
@@ -2617,7 +2611,7 @@ static void usbdev_remove(struct usb_device *udev)
 			sinfo.si_code = SI_ASYNCIO;
 			sinfo.si_addr = ps->disccontext;
 			kill_pid_info_as_cred(ps->discsignr, &sinfo,
-					ps->disc_pid, ps->cred, ps->secid);
+					ps->disc_pid, ps->cred);
 		}
 	}
 }
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index edc6fec9ad84..986058a57917 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -44,6 +44,10 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
  * Using this limit prevents one virtqueue from starving others. */
 #define VHOST_NET_WEIGHT 0x80000
 
+/* Max number of packets transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving rx. */
+#define VHOST_NET_PKT_WEIGHT(vq) ((vq)->num * 2)
+
 /* MAX number of TX used buffers for outstanding zerocopy */
 #define VHOST_MAX_PEND 128
 #define VHOST_GOODCOPY_LEN 256
@@ -473,6 +477,7 @@ static void handle_tx(struct vhost_net *net)
 	struct socket *sock;
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
 	bool zcopy, zcopy_used;
+	int sent_pkts = 0;
 
 	mutex_lock(&vq->mutex);
 	sock = vq->private_data;
@@ -580,7 +585,8 @@ static void handle_tx(struct vhost_net *net)
 		else
 			vhost_zerocopy_signal_used(net, vq);
 		vhost_net_tx_packet(net);
-		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+		if (unlikely(total_len >= VHOST_NET_WEIGHT) ||
+		    unlikely(++sent_pkts >= VHOST_NET_PKT_WEIGHT(vq))) {
 			vhost_poll_queue(&vq->poll);
 			break;
 		}
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index bec722e41f58..f3bd8e941224 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -641,14 +641,14 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
 
-static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
+static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
 {
 	u64 a = addr / VHOST_PAGE_SIZE / 8;
 
 	/* Make sure 64 bit math will not overflow. */
 	if (a > ULONG_MAX - (unsigned long)log_base ||
 	    a + (unsigned long)log_base > ULONG_MAX)
-		return 0;
+		return false;
 
 	return access_ok(VERIFY_WRITE, log_base + a,
 			 (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
@@ -661,30 +661,30 @@ static bool vhost_overflow(u64 uaddr, u64 size)
 }
 
 /* Caller should have vq mutex and device mutex. */
-static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
-			       int log_all)
+static bool vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
+				int log_all)
 {
 	struct vhost_umem_node *node;
 
 	if (!umem)
-		return 0;
+		return false;
 
 	list_for_each_entry(node, &umem->umem_list, link) {
 		unsigned long a = node->userspace_addr;
 
 		if (vhost_overflow(node->userspace_addr, node->size))
-			return 0;
+			return false;
 
 
 		if (!access_ok(VERIFY_WRITE, (void __user *)a,
 				    node->size))
-			return 0;
+			return false;
 		else if (log_all && !log_access_ok(log_base,
 						   node->start,
 						   node->size))
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
@@ -701,13 +701,13 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
 
 /* Can we switch to this memory table? */
 /* Caller should have device mutex but not vq mutex */
-static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
-			    int log_all)
+static bool memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
+			     int log_all)
 {
 	int i;
 
 	for (i = 0; i < d->nvqs; ++i) {
-		int ok;
+		bool ok;
 		bool log;
 
 		mutex_lock(&d->vqs[i]->mutex);
@@ -717,12 +717,12 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
 			ok = vq_memory_access_ok(d->vqs[i]->log_base,
 						 umem, log);
 		else
-			ok = 1;
+			ok = true;
 		mutex_unlock(&d->vqs[i]->mutex);
 		if (!ok)
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
@@ -744,7 +744,7 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
 		struct iov_iter t;
 		void __user *uaddr = vhost_vq_meta_fetch(vq,
 				     (u64)(uintptr_t)to, size,
-				     VHOST_ADDR_DESC);
+				     VHOST_ADDR_USED);
 
 		if (uaddr)
 			return __copy_to_user(uaddr, from, size);
@@ -959,21 +959,21 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
 	spin_unlock(&d->iotlb_lock);
 }
 
-static int umem_access_ok(u64 uaddr, u64 size, int access)
+static bool umem_access_ok(u64 uaddr, u64 size, int access)
 {
 	unsigned long a = uaddr;
 
 	/* Make sure 64 bit math will not overflow. */
 	if (vhost_overflow(uaddr, size))
-		return -EFAULT;
+		return false;
 
 	if ((access & VHOST_ACCESS_RO) &&
 	    !access_ok(VERIFY_READ, (void __user *)a, size))
-		return -EFAULT;
+		return false;
 	if ((access & VHOST_ACCESS_WO) &&
 	    !access_ok(VERIFY_WRITE, (void __user *)a, size))
-		return -EFAULT;
-	return 0;
+		return false;
+	return true;
 }
 
 static int vhost_process_iotlb_msg(struct vhost_dev *dev,
@@ -988,7 +988,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
 			ret = -EFAULT;
 			break;
 		}
-		if (umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
+		if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) {
 			ret = -EFAULT;
 			break;
 		}
@@ -1135,10 +1135,10 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
 	return 0;
 }
 
-static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
-			struct vring_desc __user *desc,
-			struct vring_avail __user *avail,
-			struct vring_used __user *used)
+static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
+			 struct vring_desc __user *desc,
+			 struct vring_avail __user *avail,
+			 struct vring_used __user *used)
 
 {
 	size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
@@ -1161,8 +1161,8 @@ static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
 		vq->meta_iotlb[type] = node;
 }
 
-static int iotlb_access_ok(struct vhost_virtqueue *vq,
-			   int access, u64 addr, u64 len, int type)
+static bool iotlb_access_ok(struct vhost_virtqueue *vq,
+			    int access, u64 addr, u64 len, int type)
 {
 	const struct vhost_umem_node *node;
 	struct vhost_umem *umem = vq->iotlb;
@@ -1220,7 +1220,7 @@ EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
 
 /* Can we log writes? */
 /* Caller should have device mutex but not vq mutex */
-int vhost_log_access_ok(struct vhost_dev *dev)
+bool vhost_log_access_ok(struct vhost_dev *dev)
 {
 	return memory_access_ok(dev, dev->umem, 1);
 }
@@ -1228,8 +1228,8 @@ EXPORT_SYMBOL_GPL(vhost_log_access_ok);
 
 /* Verify access for write logging. */
 /* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_virtqueue *vq,
-			    void __user *log_base)
+static bool vq_log_access_ok(struct vhost_virtqueue *vq,
+			     void __user *log_base)
 {
 	size_t s = vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 
@@ -1242,12 +1242,14 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq,
 
 /* Can we start vq? */
 /* Caller should have vq mutex and device mutex */
-int vhost_vq_access_ok(struct vhost_virtqueue *vq)
+bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
 {
-	int ret = vq_log_access_ok(vq, vq->log_base);
+	if (!vq_log_access_ok(vq, vq->log_base))
+		return false;
 
-	if (ret || vq->iotlb)
-		return ret;
+	/* Access validation occurs at prefetch time with IOTLB */
+	if (vq->iotlb)
+		return true;
 
 	return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
 }
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d8ee85ae8fdc..6c844b90a168 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -178,8 +178,8 @@ void vhost_dev_cleanup(struct vhost_dev *);
 void vhost_dev_stop(struct vhost_dev *);
 long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
 long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp);
-int vhost_vq_access_ok(struct vhost_virtqueue *vq);
-int vhost_log_access_ok(struct vhost_dev *);
+bool vhost_vq_access_ok(struct vhost_virtqueue *vq);
+bool vhost_log_access_ok(struct vhost_dev *);
 
 int vhost_get_vq_desc(struct vhost_virtqueue *,
 		      struct iovec iov[], unsigned int iov_count,
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 3c20af999893..83d3d271ca15 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -3,12 +3,10 @@
 #
 
 menu "Graphics support"
-	depends on HAS_IOMEM
 
-config HAVE_FB_ATMEL
-	bool
+if HAS_IOMEM
 
-config SH_LCD_MIPI_DSI
+config HAVE_FB_ATMEL
 	bool
 
 source "drivers/char/agp/Kconfig"
@@ -36,6 +34,8 @@ config VIDEOMODE_HELPERS
 config HDMI
 	bool
 
+endif # HAS_IOMEM
+
 if VT
 	source "drivers/video/console/Kconfig"
 endif
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 198574b7dbef..4110ba7d7ca9 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -8,7 +8,7 @@ config VGA_CONSOLE
 	bool "VGA text console" if EXPERT || !X86
 	depends on !4xx && !PPC_8xx && !SPARC && !M68K && !PARISC &&  !SUPERH && \
 		(!ARM || ARCH_FOOTBRIDGE || ARCH_INTEGRATOR || ARCH_NETWINDER) && \
-		!ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !NDS32
+		!ARM64 && !ARC && !MICROBLAZE && !OPENRISC && !NDS32 && !S390
 	default y
 	help
 	  Saying Y here will allow you to use Linux in text mode through a
@@ -84,7 +84,7 @@ config MDA_CONSOLE
 
 config SGI_NEWPORT_CONSOLE
         tristate "SGI Newport Console support"
-        depends on SGI_IP22 
+	depends on SGI_IP22 && HAS_IOMEM
         select FONT_SUPPORT
         help
           Say Y here if you want the console on the Newport aka XL graphics
@@ -152,7 +152,7 @@ config FRAMEBUFFER_CONSOLE_ROTATION
 
 config STI_CONSOLE
         bool "STI text console"
-        depends on PARISC
+	depends on PARISC && HAS_IOMEM
         select FONT_SUPPORT
         default y
         help
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c
index d1d3796773aa..08b822656846 100644
--- a/drivers/video/console/sticore.c
+++ b/drivers/video/console/sticore.c
@@ -827,10 +827,8 @@ static struct sti_struct *sti_try_rom_generic(unsigned long address,
 	}
 	
 	sti = kzalloc(sizeof(*sti), GFP_KERNEL);
-	if (!sti) {
-		printk(KERN_ERR "Not enough memory !\n");
+	if (!sti)
 		return NULL;
-	}
 
 	spin_lock_init(&sti->lock);
 
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 399573742487..d94254263ea5 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1053,6 +1053,11 @@ config FB_I810_I2C
 	bool "Enable DDC Support"
 	depends on FB_I810 && FB_I810_GTF
 	select FB_DDC
+	help
+	  Add DDC/I2C support for i810fb.  This will allow the driver to get
+	  display information, especially for monitors with fickle timings.
+
+	  If unsure, say Y.
 
 config FB_LE80578
 	tristate "Intel LE80578 (Vermilion) support"
@@ -1917,8 +1922,7 @@ config FB_TMIO_ACCELL
 
 config FB_S3C
 	tristate "Samsung S3C framebuffer support"
-	depends on FB && (CPU_S3C2416 || ARCH_S3C64XX || \
-		ARCH_S5PV210 || ARCH_EXYNOS)
+	depends on FB && (CPU_S3C2416 || ARCH_S3C64XX)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index 36d25190b48c..38c1f324ce15 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -967,9 +967,8 @@ static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id)
 		goto out;
 	}
 
-	fb = kzalloc(sizeof(struct clcd_fb), GFP_KERNEL);
+	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
 	if (!fb) {
-		printk(KERN_INFO "CLCD: could not allocate new clcd_fb struct\n");
 		ret = -ENOMEM;
 		goto free_region;
 	}
diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index 3dee267d7c75..076d24afbd72 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -18,10 +18,10 @@
 #include <linux/delay.h>
 #include <linux/backlight.h>
 #include <linux/gfp.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <video/of_display_timing.h>
 #include <linux/regulator/consumer.h>
 #include <video/videomode.h>
@@ -61,8 +61,7 @@ struct atmel_lcdfb_info {
 };
 
 struct atmel_lcdfb_power_ctrl_gpio {
-	int gpio;
-	int active_low;
+	struct gpio_desc *gpiod;
 
 	struct list_head list;
 };
@@ -1018,7 +1017,7 @@ static void atmel_lcdfb_power_control_gpio(struct atmel_lcdfb_pdata *pdata, int
 	struct atmel_lcdfb_power_ctrl_gpio *og;
 
 	list_for_each_entry(og, &pdata->pwr_gpios, list)
-		gpio_set_value(og->gpio, on);
+		gpiod_set_value(og->gpiod, on);
 }
 
 static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
@@ -1031,11 +1030,11 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
 	struct device_node *display_np;
 	struct device_node *timings_np;
 	struct display_timings *timings;
-	enum of_gpio_flags flags;
 	struct atmel_lcdfb_power_ctrl_gpio *og;
 	bool is_gpio_power = false;
+	struct gpio_desc *gpiod;
 	int ret = -ENOENT;
-	int i, gpio;
+	int i;
 
 	sinfo->config = (struct atmel_lcdfb_config*)
 		of_match_device(atmel_lcdfb_dt_ids, dev)->data;
@@ -1072,28 +1071,22 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
 
 	INIT_LIST_HEAD(&pdata->pwr_gpios);
 	ret = -ENOMEM;
-	for (i = 0; i < of_gpio_named_count(display_np, "atmel,power-control-gpio"); i++) {
-		gpio = of_get_named_gpio_flags(display_np, "atmel,power-control-gpio",
-					       i, &flags);
-		if (gpio < 0)
+	for (i = 0; i < gpiod_count(dev, "atmel,power-control"); i++) {
+		gpiod = devm_gpiod_get_index(dev, "atmel,power-control",
+					     i, GPIOD_ASIS);
+		if (IS_ERR(gpiod))
 			continue;
 
 		og = devm_kzalloc(dev, sizeof(*og), GFP_KERNEL);
 		if (!og)
 			goto put_display_node;
 
-		og->gpio = gpio;
-		og->active_low = flags & OF_GPIO_ACTIVE_LOW;
+		og->gpiod = gpiod;
 		is_gpio_power = true;
-		ret = devm_gpio_request(dev, gpio, "lcd-power-control-gpio");
-		if (ret) {
-			dev_err(dev, "request gpio %d failed\n", gpio);
-			goto put_display_node;
-		}
 
-		ret = gpio_direction_output(gpio, og->active_low);
+		ret = gpiod_direction_output(gpiod, gpiod_is_active_low(gpiod));
 		if (ret) {
-			dev_err(dev, "set direction output gpio %d failed\n", gpio);
+			dev_err(dev, "set direction output gpio atmel,power-control[%d] failed\n", i);
 			goto put_display_node;
 		}
 		list_add(&og->list, &pdata->pwr_gpios);
diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c
index db18474607c9..09b0e558dce8 100644
--- a/drivers/video/fbdev/aty/aty128fb.c
+++ b/drivers/video/fbdev/aty/aty128fb.c
@@ -1716,7 +1716,7 @@ static int aty128fb_setup(char *options)
 			continue;
 		}
 		if(!strncmp(this_opt, "nomtrr", 6)) {
-			mtrr = 0;
+			mtrr = false;
 			continue;
 		}
 #ifdef CONFIG_PPC_PMAC
diff --git a/drivers/video/fbdev/aty/mach64_ct.c b/drivers/video/fbdev/aty/mach64_ct.c
index 7d3bd723d3d5..74a62aa193c0 100644
--- a/drivers/video/fbdev/aty/mach64_ct.c
+++ b/drivers/video/fbdev/aty/mach64_ct.c
@@ -180,7 +180,7 @@ static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll)
 		dsp_on = ((multiplier << vshift) + divider) / divider;
 		tmp = ((ras_multiplier << xshift) + ras_divider) / ras_divider;
 		if (dsp_on < tmp)
-		dsp_on = tmp;
+			dsp_on = tmp;
 		dsp_on = dsp_on + (tmp * 2) + (pll->xclkpagefaultdelay << xshift);
 	}
 
diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c
index 87608c0b2351..e8594bbaea60 100644
--- a/drivers/video/fbdev/aty/radeon_base.c
+++ b/drivers/video/fbdev/aty/radeon_base.c
@@ -2255,6 +2255,23 @@ static const struct bin_attribute edid2_attr = {
 	.read	= radeon_show_edid2,
 };
 
+static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return -ENOMEM;
+
+	ap->ranges[0].base = pci_resource_start(pdev, 0);
+	ap->ranges[0].size = pci_resource_len(pdev, 0);
+
+	remove_conflicting_framebuffers(ap, KBUILD_MODNAME, false);
+
+	kfree(ap);
+
+	return 0;
+}
 
 static int radeonfb_pci_register(struct pci_dev *pdev,
 				 const struct pci_device_id *ent)
@@ -2308,6 +2325,10 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
 	rinfo->fb_base_phys = pci_resource_start (pdev, 0);
 	rinfo->mmio_base_phys = pci_resource_start (pdev, 2);
 
+	ret = radeon_kick_out_firmware_fb(pdev);
+	if (ret)
+		return ret;
+
 	/* request the mem regions */
 	ret = pci_request_region(pdev, 0, "radeonfb framebuffer");
 	if (ret < 0) {
diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c
index 8de42f617d16..7c9a672e9811 100644
--- a/drivers/video/fbdev/au1100fb.c
+++ b/drivers/video/fbdev/au1100fb.c
@@ -410,18 +410,15 @@ static int au1100fb_setup(struct au1100fb_device *fbdev)
 
 static int au1100fb_drv_probe(struct platform_device *dev)
 {
-	struct au1100fb_device *fbdev = NULL;
+	struct au1100fb_device *fbdev;
 	struct resource *regs_res;
 	unsigned long page;
 	struct clk *c;
 
 	/* Allocate new device private */
-	fbdev = devm_kzalloc(&dev->dev, sizeof(struct au1100fb_device),
-			     GFP_KERNEL);
-	if (!fbdev) {
-		print_err("fail to allocate device private record");
+	fbdev = devm_kzalloc(&dev->dev, sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev)
 		return -ENOMEM;
-	}
 
 	if (au1100fb_setup(fbdev))
 		goto failed;
diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index 25abbcf38913..1bfd13cbd4e3 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -1960,12 +1960,8 @@ static int __init fsl_diu_init(void)
 
 	of_node_put(np);
 	coherence_data = vmalloc(coherence_data_size);
-	if (!coherence_data) {
-		pr_err("fsl-diu-fb: could not allocate coherence data "
-		       "(size=%zu)\n", coherence_data_size);
+	if (!coherence_data)
 		return -ENOMEM;
-	}
-
 #endif
 
 	ret = platform_driver_register(&fsl_diu_driver);
diff --git a/drivers/video/fbdev/matrox/matroxfb_crtc2.c b/drivers/video/fbdev/matrox/matroxfb_crtc2.c
index 02796a4317a9..f64e1d55d7a1 100644
--- a/drivers/video/fbdev/matrox/matroxfb_crtc2.c
+++ b/drivers/video/fbdev/matrox/matroxfb_crtc2.c
@@ -696,10 +696,9 @@ static void* matroxfb_crtc2_probe(struct matrox_fb_info* minfo) {
 	if (!minfo->devflags.crtc2)
 		return NULL;
 	m2info = kzalloc(sizeof(*m2info), GFP_KERNEL);
-	if (!m2info) {
-		printk(KERN_ERR "matroxfb_crtc2: Not enough memory for CRTC2 control structs\n");
+	if (!m2info)
 		return NULL;
-	}
+
 	m2info->primary_dev = minfo;
 	if (matroxfb_dh_registerfb(m2info)) {
 		kfree(m2info);
diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c
index 90d38de34479..77c0a2f45b3b 100644
--- a/drivers/video/fbdev/offb.c
+++ b/drivers/video/fbdev/offb.c
@@ -280,6 +280,7 @@ static void offb_destroy(struct fb_info *info)
 	if (info->screen_base)
 		iounmap(info->screen_base);
 	release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
+	fb_dealloc_cmap(&info->cmap);
 	framebuffer_release(info);
 }
 
@@ -518,6 +519,7 @@ static void __init offb_init_fb(const char *name,
 	return;
 
 out_err:
+	fb_dealloc_cmap(&info->cmap);
 	iounmap(info->screen_base);
 out_aper:
 	iounmap(par->cmap_adr);
diff --git a/drivers/video/fbdev/s3c-fb.c b/drivers/video/fbdev/s3c-fb.c
index 5f4f696c2ecf..9ec85ccd0ce9 100644
--- a/drivers/video/fbdev/s3c-fb.c
+++ b/drivers/video/fbdev/s3c-fb.c
@@ -1383,11 +1383,9 @@ static int s3c_fb_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	sfb = devm_kzalloc(dev, sizeof(struct s3c_fb), GFP_KERNEL);
-	if (!sfb) {
-		dev_err(dev, "no memory for framebuffers\n");
+	sfb = devm_kzalloc(dev, sizeof(*sfb), GFP_KERNEL);
+	if (!sfb)
 		return -ENOMEM;
-	}
 
 	dev_dbg(dev, "allocate new framebuffer %p\n", sfb);
 
@@ -1716,63 +1714,6 @@ static struct s3c_fb_win_variant s3c_fb_data_64xx_wins[] = {
 	},
 };
 
-static struct s3c_fb_win_variant s3c_fb_data_s5p_wins[] = {
-	[0] = {
-		.has_osd_c	= 1,
-		.osd_size_off	= 0x8,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[1] = {
-		.has_osd_c	= 1,
-		.has_osd_d	= 1,
-		.osd_size_off	= 0xc,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[2] = {
-		.has_osd_c	= 1,
-		.has_osd_d	= 1,
-		.osd_size_off	= 0xc,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[3] = {
-		.has_osd_c	= 1,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[4] = {
-		.has_osd_c	= 1,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-};
-
 static struct s3c_fb_driverdata s3c_fb_data_64xx = {
 	.variant = {
 		.nr_windows	= 5,
@@ -1804,102 +1745,6 @@ static struct s3c_fb_driverdata s3c_fb_data_64xx = {
 	.win[4]	= &s3c_fb_data_64xx_wins[4],
 };
 
-static struct s3c_fb_driverdata s3c_fb_data_s5pv210 = {
-	.variant = {
-		.nr_windows	= 5,
-		.vidtcon	= VIDTCON0,
-		.wincon		= WINCON(0),
-		.winmap		= WINxMAP(0),
-		.keycon		= WKEYCON,
-		.osd		= VIDOSD_BASE,
-		.osd_stride	= 16,
-		.buf_start	= VIDW_BUF_START(0),
-		.buf_size	= VIDW_BUF_SIZE(0),
-		.buf_end	= VIDW_BUF_END(0),
-
-		.palette = {
-			[0] = 0x2400,
-			[1] = 0x2800,
-			[2] = 0x2c00,
-			[3] = 0x3000,
-			[4] = 0x3400,
-		},
-
-		.has_shadowcon	= 1,
-		.has_blendcon	= 1,
-		.has_clksel	= 1,
-		.has_fixvclk	= 1,
-	},
-	.win[0]	= &s3c_fb_data_s5p_wins[0],
-	.win[1]	= &s3c_fb_data_s5p_wins[1],
-	.win[2]	= &s3c_fb_data_s5p_wins[2],
-	.win[3]	= &s3c_fb_data_s5p_wins[3],
-	.win[4]	= &s3c_fb_data_s5p_wins[4],
-};
-
-static struct s3c_fb_driverdata s3c_fb_data_exynos4 = {
-	.variant = {
-		.nr_windows	= 5,
-		.vidtcon	= VIDTCON0,
-		.wincon		= WINCON(0),
-		.winmap		= WINxMAP(0),
-		.keycon		= WKEYCON,
-		.osd		= VIDOSD_BASE,
-		.osd_stride	= 16,
-		.buf_start	= VIDW_BUF_START(0),
-		.buf_size	= VIDW_BUF_SIZE(0),
-		.buf_end	= VIDW_BUF_END(0),
-
-		.palette = {
-			[0] = 0x2400,
-			[1] = 0x2800,
-			[2] = 0x2c00,
-			[3] = 0x3000,
-			[4] = 0x3400,
-		},
-
-		.has_shadowcon	= 1,
-		.has_blendcon	= 1,
-		.has_fixvclk	= 1,
-	},
-	.win[0]	= &s3c_fb_data_s5p_wins[0],
-	.win[1]	= &s3c_fb_data_s5p_wins[1],
-	.win[2]	= &s3c_fb_data_s5p_wins[2],
-	.win[3]	= &s3c_fb_data_s5p_wins[3],
-	.win[4]	= &s3c_fb_data_s5p_wins[4],
-};
-
-static struct s3c_fb_driverdata s3c_fb_data_exynos5 = {
-	.variant = {
-		.nr_windows	= 5,
-		.vidtcon	= FIMD_V8_VIDTCON0,
-		.wincon		= WINCON(0),
-		.winmap		= WINxMAP(0),
-		.keycon		= WKEYCON,
-		.osd		= VIDOSD_BASE,
-		.osd_stride	= 16,
-		.buf_start	= VIDW_BUF_START(0),
-		.buf_size	= VIDW_BUF_SIZE(0),
-		.buf_end	= VIDW_BUF_END(0),
-
-		.palette = {
-			[0] = 0x2400,
-			[1] = 0x2800,
-			[2] = 0x2c00,
-			[3] = 0x3000,
-			[4] = 0x3400,
-		},
-		.has_shadowcon	= 1,
-		.has_blendcon	= 1,
-		.has_fixvclk	= 1,
-	},
-	.win[0]	= &s3c_fb_data_s5p_wins[0],
-	.win[1]	= &s3c_fb_data_s5p_wins[1],
-	.win[2]	= &s3c_fb_data_s5p_wins[2],
-	.win[3]	= &s3c_fb_data_s5p_wins[3],
-	.win[4]	= &s3c_fb_data_s5p_wins[4],
-};
-
 /* S3C2443/S3C2416 style hardware */
 static struct s3c_fb_driverdata s3c_fb_data_s3c2443 = {
 	.variant = {
@@ -1942,15 +1787,6 @@ static const struct platform_device_id s3c_fb_driver_ids[] = {
 		.name		= "s3c-fb",
 		.driver_data	= (unsigned long)&s3c_fb_data_64xx,
 	}, {
-		.name		= "s5pv210-fb",
-		.driver_data	= (unsigned long)&s3c_fb_data_s5pv210,
-	}, {
-		.name		= "exynos4-fb",
-		.driver_data	= (unsigned long)&s3c_fb_data_exynos4,
-	}, {
-		.name		= "exynos5-fb",
-		.driver_data	= (unsigned long)&s3c_fb_data_exynos5,
-	}, {
 		.name		= "s3c2443-fb",
 		.driver_data	= (unsigned long)&s3c_fb_data_s3c2443,
 	},
diff --git a/drivers/video/fbdev/sis/init.h b/drivers/video/fbdev/sis/init.h
index 85d6738b6c64..400b0e5681b2 100644
--- a/drivers/video/fbdev/sis/init.h
+++ b/drivers/video/fbdev/sis/init.h
@@ -1461,81 +1461,5 @@ static const struct SiS_LVDSCRT1Data SiS_LVDSCRT1640x480_1_H[] =
    0x00}}
 };
 
-bool		SiSInitPtr(struct SiS_Private *SiS_Pr);
-unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, bool FSTN,
-				unsigned short CustomT, int LCDwith, int LCDheight,
-				unsigned int VBFlags2);
-unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-
-void		SiS_DisplayOn(struct SiS_Private *SiS_Pr);
-void		SiS_DisplayOff(struct SiS_Private *SiS_Pr);
-void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
-void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
-void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
-unsigned short	SiS_GetModeFlag(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
-
-bool		SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
-				unsigned short *ModeIdIndex);
-unsigned short	SiS_GetModePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-unsigned short  SiS_GetRefCRTVCLK(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
-unsigned short  SiS_GetRefCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
-unsigned short	SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-unsigned short	SiS_GetOffset(struct SiS_Private *SiS_Pr,unsigned short ModeNo,
-				unsigned short ModeIdIndex, unsigned short RRTI);
-#ifdef CONFIG_FB_SIS_300
-void		SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
-				unsigned short *idx2);
-unsigned short	SiS_GetFIFOThresholdB300(unsigned short idx1, unsigned short idx2);
-unsigned short	SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
-#endif
-void		SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
-bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-void		SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
-void		SiS_CalcLCDACRT1Timing(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-void		SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
-				int yres, struct fb_var_screeninfo *var, bool writeres);
-
-/* From init301.c: */
-extern void		SiS_GetVBInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex, int chkcrt2mode);
-extern void		SiS_GetLCDResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern void		SiS_SetYPbPr(struct SiS_Private *SiS_Pr);
-extern void		SiS_SetTVMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern void		SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
-extern void		SiS_DisableBridge(struct SiS_Private *);
-extern bool		SiS_SetCRT2Group(struct SiS_Private *, unsigned short);
-extern unsigned short	SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern void		SiS_WaitRetrace1(struct SiS_Private *SiS_Pr);
-extern unsigned short	SiS_GetResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern unsigned short	SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short tempax);
-extern unsigned short	SiS_GetVCLK2Ptr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex, unsigned short RRTI);
-extern bool		SiS_IsVAMode(struct SiS_Private *);
-extern bool		SiS_IsDualEdge(struct SiS_Private *);
-
-#ifdef CONFIG_FB_SIS_300
-extern unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-extern void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg,
-				unsigned int val);
-#endif
-#ifdef CONFIG_FB_SIS_315
-extern void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg,
-				unsigned char val);
-extern unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
-#endif
-
 #endif
 
diff --git a/drivers/video/fbdev/sis/init301.c b/drivers/video/fbdev/sis/init301.c
index 02ee752d5000..27a2b72e50e8 100644
--- a/drivers/video/fbdev/sis/init301.c
+++ b/drivers/video/fbdev/sis/init301.c
@@ -82,6 +82,332 @@
 #define SiS_I2CDELAY      1000
 #define SiS_I2CDELAYSHORT  150
 
+static const unsigned char SiS_YPbPrTable[3][64] = {
+  {
+    0x17,0x1d,0x03,0x09,0x05,0x06,0x0c,0x0c,
+    0x94,0x49,0x01,0x0a,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x1b,
+    0x0c,0x50,0x00,0x97,0x00,0xda,0x4a,0x17,
+    0x7d,0x05,0x4b,0x00,0x00,0xe2,0x00,0x02,
+    0x03,0x0a,0x65,0x9d /*0x8d*/,0x08,0x92,0x8f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x53 /*0x50*/,
+    0x00,0x40,0x44,0x00,0xdb,0x02,0x3b,0x00
+  },
+  {
+    0x33,0x06,0x06,0x09,0x0b,0x0c,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0d,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0x0c,0x50,0xb2,0x9f,0x16,0x59,0x4f,0x13,
+    0xad,0x11,0xad,0x1d,0x40,0x8a,0x3d,0xb8,
+    0x51,0x5e,0x60,0x49,0x7d,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x4e,
+    0x43,0x41,0x11,0x00,0xfc,0xff,0x32,0x00
+  },
+  {
+#if 0 /* OK, but sticks to left edge */
+    0x13,0x1d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0xed,0x50,0x70,0x9f,0x16,0x59,0x21 /*0x2b*/,0x13,
+    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
+    0x4b,0x4b,0x65 /*0x6f*/,0x2f,0x63,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x27,
+    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
+#endif
+#if 1 /* Perfect */
+    0x23,0x2d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0xed,0x50,0x70,0x9f,0x16,0x59,0x60,0x13,
+    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
+    0x4b,0x4b,0x6f,0x2f,0x63,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x73,
+    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
+#endif
+  }
+};
+
+static const unsigned char SiS_TVPhase[] =
+{
+	0x21,0xED,0xBA,0x08,	/* 0x00 SiS_NTSCPhase */
+	0x2A,0x05,0xE3,0x00,	/* 0x01 SiS_PALPhase */
+	0x21,0xE4,0x2E,0x9B,	/* 0x02 SiS_PALMPhase */
+	0x21,0xF4,0x3E,0xBA,	/* 0x03 SiS_PALNPhase */
+	0x1E,0x8B,0xA2,0xA7,
+	0x1E,0x83,0x0A,0xE0,	/* 0x05 SiS_SpecialPhaseM */
+	0x00,0x00,0x00,0x00,
+	0x00,0x00,0x00,0x00,
+	0x21,0xF0,0x7B,0xD6,	/* 0x08 SiS_NTSCPhase2 */
+	0x2A,0x09,0x86,0xE9,	/* 0x09 SiS_PALPhase2 */
+	0x21,0xE6,0xEF,0xA4,	/* 0x0a SiS_PALMPhase2 */
+	0x21,0xF6,0x94,0x46,	/* 0x0b SiS_PALNPhase2 */
+	0x1E,0x8B,0xA2,0xA7,
+	0x1E,0x83,0x0A,0xE0,	/* 0x0d SiS_SpecialPhaseM */
+	0x00,0x00,0x00,0x00,
+	0x00,0x00,0x00,0x00,
+	0x1e,0x8c,0x5c,0x7a,	/* 0x10 SiS_SpecialPhase */
+	0x25,0xd4,0xfd,0x5e	/* 0x11 SiS_SpecialPhaseJ */
+};
+
+static const unsigned char SiS_HiTVGroup3_1[] = {
+    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x13,
+    0xb1, 0x41, 0x62, 0x62, 0xff, 0xf4, 0x45, 0xa6,
+    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
+    0xac, 0xda, 0x60, 0xfe, 0x6a, 0x9a, 0x06, 0x10,
+    0xd1, 0x04, 0x18, 0x0a, 0xff, 0x80, 0x00, 0x80,
+    0x3b, 0x77, 0x00, 0xef, 0xe0, 0x10, 0xb0, 0xe0,
+    0x10, 0x4f, 0x0f, 0x0f, 0x05, 0x0f, 0x08, 0x6e,
+    0x1a, 0x1f, 0x25, 0x2a, 0x4c, 0xaa, 0x01
+};
+
+static const unsigned char SiS_HiTVGroup3_2[] = {
+    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x7a,
+    0x54, 0x41, 0xe7, 0xe7, 0xff, 0xf4, 0x45, 0xa6,
+    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
+    0xac, 0x6a, 0x60, 0x2b, 0x52, 0xcd, 0x61, 0x10,
+    0x51, 0x04, 0x18, 0x0a, 0x1f, 0x80, 0x00, 0x80,
+    0xff, 0xa4, 0x04, 0x2b, 0x94, 0x21, 0x72, 0x94,
+    0x26, 0x05, 0x01, 0x0f, 0xed, 0x0f, 0x0a, 0x64,
+    0x18, 0x1d, 0x23, 0x28, 0x4c, 0xaa, 0x01
+};
+
+/* 301C / 302ELV extended Part2 TV registers (4 tap scaler) */
+
+static const unsigned char SiS_Part2CLVX_1[] = {
+    0x00,0x00,
+    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
+    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
+    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
+};
+
+static const unsigned char SiS_Part2CLVX_2[] = {
+    0x00,0x00,
+    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
+    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
+    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
+};
+
+static const unsigned char SiS_Part2CLVX_3[] = {  /* NTSC, 525i, 525p */
+    0xE0,0x01,
+    0x04,0x1A,0x04,0x7E,0x03,0x1A,0x06,0x7D,0x01,0x1A,0x08,0x7D,0x00,0x19,0x0A,0x7D,
+    0x7F,0x19,0x0C,0x7C,0x7E,0x18,0x0E,0x7C,0x7E,0x17,0x10,0x7B,0x7D,0x15,0x12,0x7C,
+    0x7D,0x13,0x13,0x7D,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0E,0x18,0x7E,
+    0x7D,0x0C,0x19,0x7E,0x7D,0x0A,0x19,0x00,0x7D,0x08,0x1A,0x01,0x7E,0x06,0x1A,0x02,
+    0x58,0x02,
+    0x07,0x14,0x07,0x7E,0x06,0x14,0x09,0x7D,0x05,0x14,0x0A,0x7D,0x04,0x13,0x0B,0x7E,
+    0x03,0x13,0x0C,0x7E,0x02,0x12,0x0D,0x7F,0x01,0x12,0x0E,0x7F,0x01,0x11,0x0F,0x7F,
+    0x00,0x10,0x10,0x00,0x7F,0x0F,0x11,0x01,0x7F,0x0E,0x12,0x01,0x7E,0x0D,0x12,0x03,
+    0x7E,0x0C,0x13,0x03,0x7E,0x0B,0x13,0x04,0x7E,0x0A,0x14,0x04,0x7D,0x09,0x14,0x06,
+    0x00,0x03,
+    0x09,0x0F,0x09,0x7F,0x08,0x0F,0x09,0x00,0x07,0x0F,0x0A,0x00,0x06,0x0F,0x0A,0x01,
+    0x06,0x0E,0x0B,0x01,0x05,0x0E,0x0B,0x02,0x04,0x0E,0x0C,0x02,0x04,0x0D,0x0C,0x03,
+    0x03,0x0D,0x0D,0x03,0x02,0x0C,0x0D,0x05,0x02,0x0C,0x0E,0x04,0x01,0x0B,0x0E,0x06,
+    0x01,0x0B,0x0E,0x06,0x00,0x0A,0x0F,0x07,0x00,0x0A,0x0F,0x07,0x00,0x09,0x0F,0x08,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_4[] = {   /* PAL */
+    0x58,0x02,
+    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
+    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
+    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
+    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
+    0x00,0x03,
+    0x08,0x12,0x08,0x7E,0x07,0x12,0x09,0x7E,0x06,0x12,0x0A,0x7E,0x05,0x11,0x0B,0x7F,
+    0x04,0x11,0x0C,0x7F,0x03,0x11,0x0C,0x00,0x03,0x10,0x0D,0x00,0x02,0x0F,0x0E,0x01,
+    0x01,0x0F,0x0F,0x01,0x01,0x0E,0x0F,0x02,0x00,0x0D,0x10,0x03,0x7F,0x0C,0x11,0x04,
+    0x7F,0x0C,0x11,0x04,0x7F,0x0B,0x11,0x05,0x7E,0x0A,0x12,0x06,0x7E,0x09,0x12,0x07,
+    0x40,0x02,
+    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
+    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
+    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_5[] = {   /* 750p */
+    0x00,0x03,
+    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
+    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
+    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
+    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_6[] = {   /* 1080i */
+    0x00,0x04,
+    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
+    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
+    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
+    0xFF,0xFF,
+};
+
+#ifdef CONFIG_FB_SIS_315
+/* 661 et al LCD data structure (2.03.00) */
+static const unsigned char SiS_LCDStruct661[] = {
+    /* 1024x768 */
+/*  type|CR37|   HDE   |   VDE   |    HT   |    VT   |   hss    | hse   */
+    0x02,0xC0,0x00,0x04,0x00,0x03,0x40,0x05,0x26,0x03,0x10,0x00,0x88,
+    0x00,0x02,0x00,0x06,0x00,0x41,0x5A,0x64,0x00,0x00,0x00,0x00,0x04,
+    /*  | vss     |    vse  |clck|  clock  |CRT2DataP|CRT2DataP|idx     */
+    /*					      VESA    non-VESA  noscale */
+    /* 1280x1024 */
+    0x03,0xC0,0x00,0x05,0x00,0x04,0x98,0x06,0x2A,0x04,0x30,0x00,0x70,
+    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x08,
+    /* 1400x1050 */
+    0x09,0x20,0x78,0x05,0x1A,0x04,0x98,0x06,0x2A,0x04,0x18,0x00,0x38,
+    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x09,
+    /* 1600x1200 */
+    0x0B,0xE0,0x40,0x06,0xB0,0x04,0x70,0x08,0xE2,0x04,0x40,0x00,0xC0,
+    0x00,0x01,0x00,0x03,0x00,0xA2,0x70,0x24,0x00,0x00,0x00,0x00,0x0A,
+    /* 1280x768 (_2) */
+    0x0A,0xE0,0x00,0x05,0x00,0x03,0x7C,0x06,0x26,0x03,0x30,0x00,0x70,
+    0x00,0x03,0x00,0x06,0x00,0x4D,0xC8,0x48,0x00,0x00,0x00,0x00,0x06,
+    /* 1280x720 */
+    0x0E,0xE0,0x00,0x05,0xD0,0x02,0x80,0x05,0x26,0x03,0x10,0x00,0x20,
+    0x00,0x01,0x00,0x06,0x00,0x45,0x9C,0x62,0x00,0x00,0x00,0x00,0x05,
+    /* 1280x800 (_2) */
+    0x0C,0xE0,0x00,0x05,0x20,0x03,0x10,0x06,0x2C,0x03,0x30,0x00,0x70,
+    0x00,0x04,0x00,0x03,0x00,0x49,0xCE,0x1E,0x00,0x00,0x00,0x00,0x09,
+    /* 1680x1050 */
+    0x0D,0xE0,0x90,0x06,0x1A,0x04,0x6C,0x07,0x2A,0x04,0x1A,0x00,0x4C,
+    0x00,0x03,0x00,0x06,0x00,0x79,0xBE,0x44,0x00,0x00,0x00,0x00,0x06,
+    /* 1280x800_3 */
+    0x0C,0xE0,0x00,0x05,0x20,0x03,0xAA,0x05,0x2E,0x03,0x30,0x00,0x50,
+    0x00,0x04,0x00,0x03,0x00,0x47,0xA9,0x10,0x00,0x00,0x00,0x00,0x07,
+    /* 800x600 */
+    0x01,0xC0,0x20,0x03,0x58,0x02,0x20,0x04,0x74,0x02,0x2A,0x00,0x80,
+    0x00,0x06,0x00,0x04,0x00,0x28,0x63,0x4B,0x00,0x00,0x00,0x00,0x00,
+    /* 1280x854 */
+    0x08,0xE0,0x00,0x05,0x56,0x03,0x80,0x06,0x5d,0x03,0x10,0x00,0x70,
+    0x00,0x01,0x00,0x03,0x00,0x54,0x75,0x13,0x00,0x00,0x00,0x00,0x08
+};
+#endif
+
+#ifdef CONFIG_FB_SIS_300
+static unsigned char SiS300_TrumpionData[14][80] = {
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x0B,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x10,0x00,0x00,0x04,0x23,
+    0x00,0x00,0x03,0x28,0x03,0x10,0x05,0x08,0x40,0x10,0x00,0x10,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xBC,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x09,0x04,0x04,0x05,
+    0x04,0x0C,0x09,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5A,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x27,0x00,0x80,0x02,
+    0x20,0x03,0x07,0x00,0x5E,0x01,0x0D,0x02,0x60,0x0C,0x30,0x11,0x00,0x00,0x04,0x23,
+    0x00,0x00,0x03,0x80,0x03,0x28,0x06,0x08,0x40,0x11,0x00,0x11,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x90,0x01,0xFF,0x0F,0xF4,0x19,0x01,0x00,0x05,0x01,0x00,0x04,0x05,
+    0x04,0x0C,0x02,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEC,0x57,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xD9,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x59,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
+    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
+    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
+    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
+    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
+    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
+    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
+    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  /* variant 2 */
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
+    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
+    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
+    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
+    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
+    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
+    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
+    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
+    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
+    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 }
+};
+#endif
+
+#ifdef CONFIG_FB_SIS_315
+static void	SiS_Chrontel701xOn(struct SiS_Private *SiS_Pr);
+static void	SiS_Chrontel701xOff(struct SiS_Private *SiS_Pr);
+static void	SiS_ChrontelInitTVVSync(struct SiS_Private *SiS_Pr);
+static void	SiS_ChrontelDoSomething1(struct SiS_Private *SiS_Pr);
+#endif /* 315 */
+
+#ifdef CONFIG_FB_SIS_300
+static  bool	SiS_SetTrumpionBlock(struct SiS_Private *SiS_Pr, unsigned char *dataptr);
+#endif
+
+static unsigned short	SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
+				int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
+				bool checkcr32, unsigned int VBFlags2);
+static unsigned short	SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
+				unsigned char *buffer);
+static void		SiS_SetSwitchDDC2(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetStart(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetStop(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetSCLKLow(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetSCLKHigh(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_ReadDDC2Data(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_WriteDDC2Data(struct SiS_Private *SiS_Pr, unsigned short tempax);
+static unsigned short	SiS_CheckACK(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_WriteDABDDC(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_PrepareReadDDC(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_PrepareDDC(struct SiS_Private *SiS_Pr);
+static void		SiS_SendACK(struct SiS_Private *SiS_Pr, unsigned short yesno);
+static unsigned short	SiS_DoProbeDDC(struct SiS_Private *SiS_Pr);
+
+#ifdef CONFIG_FB_SIS_300
+static void		SiS_OEM300Setting(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo, unsigned short ModeIdIndex, unsigned short RefTabindex);
+static void		SetOEMLCDData2(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo, unsigned short ModeIdIndex,unsigned short RefTableIndex);
+#endif
+#ifdef CONFIG_FB_SIS_315
+static void		SiS_OEM310Setting(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
+static void		SiS_OEM661Setting(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
+static void		SiS_FinalizeLCD(struct SiS_Private *, unsigned short, unsigned short);
+#endif
+
 static unsigned short	SiS_GetBIOSLCDResInfo(struct SiS_Private *SiS_Pr);
 static void		SiS_SetCH70xx(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
 
diff --git a/drivers/video/fbdev/sis/init301.h b/drivers/video/fbdev/sis/init301.h
index 2112d6d7feda..6e5cf14c4ce4 100644
--- a/drivers/video/fbdev/sis/init301.h
+++ b/drivers/video/fbdev/sis/init301.h
@@ -66,287 +66,6 @@
 #include "sis.h"
 #include <video/sisfb.h>
 
-static const unsigned char SiS_YPbPrTable[3][64] = {
-  {
-    0x17,0x1d,0x03,0x09,0x05,0x06,0x0c,0x0c,
-    0x94,0x49,0x01,0x0a,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x1b,
-    0x0c,0x50,0x00,0x97,0x00,0xda,0x4a,0x17,
-    0x7d,0x05,0x4b,0x00,0x00,0xe2,0x00,0x02,
-    0x03,0x0a,0x65,0x9d /*0x8d*/,0x08,0x92,0x8f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x53 /*0x50*/,
-    0x00,0x40,0x44,0x00,0xdb,0x02,0x3b,0x00
-  },
-  {
-    0x33,0x06,0x06,0x09,0x0b,0x0c,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0d,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0x0c,0x50,0xb2,0x9f,0x16,0x59,0x4f,0x13,
-    0xad,0x11,0xad,0x1d,0x40,0x8a,0x3d,0xb8,
-    0x51,0x5e,0x60,0x49,0x7d,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x4e,
-    0x43,0x41,0x11,0x00,0xfc,0xff,0x32,0x00
-  },
-  {
-#if 0 /* OK, but sticks to left edge */
-    0x13,0x1d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0xed,0x50,0x70,0x9f,0x16,0x59,0x21 /*0x2b*/,0x13,
-    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
-    0x4b,0x4b,0x65 /*0x6f*/,0x2f,0x63,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x27,
-    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
-#endif
-#if 1 /* Perfect */
-    0x23,0x2d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0xed,0x50,0x70,0x9f,0x16,0x59,0x60,0x13,
-    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
-    0x4b,0x4b,0x6f,0x2f,0x63,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x73,
-    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
-#endif
-  }
-};
-
-static const unsigned char SiS_TVPhase[] =
-{
-	0x21,0xED,0xBA,0x08,	/* 0x00 SiS_NTSCPhase */
-	0x2A,0x05,0xE3,0x00,	/* 0x01 SiS_PALPhase */
-	0x21,0xE4,0x2E,0x9B,	/* 0x02 SiS_PALMPhase */
-	0x21,0xF4,0x3E,0xBA,	/* 0x03 SiS_PALNPhase */
-	0x1E,0x8B,0xA2,0xA7,
-	0x1E,0x83,0x0A,0xE0,	/* 0x05 SiS_SpecialPhaseM */
-	0x00,0x00,0x00,0x00,
-	0x00,0x00,0x00,0x00,
-	0x21,0xF0,0x7B,0xD6,	/* 0x08 SiS_NTSCPhase2 */
-	0x2A,0x09,0x86,0xE9,	/* 0x09 SiS_PALPhase2 */
-	0x21,0xE6,0xEF,0xA4,	/* 0x0a SiS_PALMPhase2 */
-	0x21,0xF6,0x94,0x46,	/* 0x0b SiS_PALNPhase2 */
-	0x1E,0x8B,0xA2,0xA7,
-	0x1E,0x83,0x0A,0xE0,	/* 0x0d SiS_SpecialPhaseM */
-	0x00,0x00,0x00,0x00,
-	0x00,0x00,0x00,0x00,
-	0x1e,0x8c,0x5c,0x7a,	/* 0x10 SiS_SpecialPhase */
-	0x25,0xd4,0xfd,0x5e	/* 0x11 SiS_SpecialPhaseJ */
-};
-
-static const unsigned char SiS_HiTVGroup3_1[] = {
-    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x13,
-    0xb1, 0x41, 0x62, 0x62, 0xff, 0xf4, 0x45, 0xa6,
-    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
-    0xac, 0xda, 0x60, 0xfe, 0x6a, 0x9a, 0x06, 0x10,
-    0xd1, 0x04, 0x18, 0x0a, 0xff, 0x80, 0x00, 0x80,
-    0x3b, 0x77, 0x00, 0xef, 0xe0, 0x10, 0xb0, 0xe0,
-    0x10, 0x4f, 0x0f, 0x0f, 0x05, 0x0f, 0x08, 0x6e,
-    0x1a, 0x1f, 0x25, 0x2a, 0x4c, 0xaa, 0x01
-};
-
-static const unsigned char SiS_HiTVGroup3_2[] = {
-    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x7a,
-    0x54, 0x41, 0xe7, 0xe7, 0xff, 0xf4, 0x45, 0xa6,
-    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
-    0xac, 0x6a, 0x60, 0x2b, 0x52, 0xcd, 0x61, 0x10,
-    0x51, 0x04, 0x18, 0x0a, 0x1f, 0x80, 0x00, 0x80,
-    0xff, 0xa4, 0x04, 0x2b, 0x94, 0x21, 0x72, 0x94,
-    0x26, 0x05, 0x01, 0x0f, 0xed, 0x0f, 0x0a, 0x64,
-    0x18, 0x1d, 0x23, 0x28, 0x4c, 0xaa, 0x01
-};
-
-/* 301C / 302ELV extended Part2 TV registers (4 tap scaler) */
-
-static const unsigned char SiS_Part2CLVX_1[] = {
-    0x00,0x00,
-    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
-    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
-    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
-};
-
-static const unsigned char SiS_Part2CLVX_2[] = {
-    0x00,0x00,
-    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
-    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
-    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
-};
-
-static const unsigned char SiS_Part2CLVX_3[] = {  /* NTSC, 525i, 525p */
-    0xE0,0x01,
-    0x04,0x1A,0x04,0x7E,0x03,0x1A,0x06,0x7D,0x01,0x1A,0x08,0x7D,0x00,0x19,0x0A,0x7D,
-    0x7F,0x19,0x0C,0x7C,0x7E,0x18,0x0E,0x7C,0x7E,0x17,0x10,0x7B,0x7D,0x15,0x12,0x7C,
-    0x7D,0x13,0x13,0x7D,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0E,0x18,0x7E,
-    0x7D,0x0C,0x19,0x7E,0x7D,0x0A,0x19,0x00,0x7D,0x08,0x1A,0x01,0x7E,0x06,0x1A,0x02,
-    0x58,0x02,
-    0x07,0x14,0x07,0x7E,0x06,0x14,0x09,0x7D,0x05,0x14,0x0A,0x7D,0x04,0x13,0x0B,0x7E,
-    0x03,0x13,0x0C,0x7E,0x02,0x12,0x0D,0x7F,0x01,0x12,0x0E,0x7F,0x01,0x11,0x0F,0x7F,
-    0x00,0x10,0x10,0x00,0x7F,0x0F,0x11,0x01,0x7F,0x0E,0x12,0x01,0x7E,0x0D,0x12,0x03,
-    0x7E,0x0C,0x13,0x03,0x7E,0x0B,0x13,0x04,0x7E,0x0A,0x14,0x04,0x7D,0x09,0x14,0x06,
-    0x00,0x03,
-    0x09,0x0F,0x09,0x7F,0x08,0x0F,0x09,0x00,0x07,0x0F,0x0A,0x00,0x06,0x0F,0x0A,0x01,
-    0x06,0x0E,0x0B,0x01,0x05,0x0E,0x0B,0x02,0x04,0x0E,0x0C,0x02,0x04,0x0D,0x0C,0x03,
-    0x03,0x0D,0x0D,0x03,0x02,0x0C,0x0D,0x05,0x02,0x0C,0x0E,0x04,0x01,0x0B,0x0E,0x06,
-    0x01,0x0B,0x0E,0x06,0x00,0x0A,0x0F,0x07,0x00,0x0A,0x0F,0x07,0x00,0x09,0x0F,0x08,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_4[] = {   /* PAL */
-    0x58,0x02,
-    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
-    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
-    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
-    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
-    0x00,0x03,
-    0x08,0x12,0x08,0x7E,0x07,0x12,0x09,0x7E,0x06,0x12,0x0A,0x7E,0x05,0x11,0x0B,0x7F,
-    0x04,0x11,0x0C,0x7F,0x03,0x11,0x0C,0x00,0x03,0x10,0x0D,0x00,0x02,0x0F,0x0E,0x01,
-    0x01,0x0F,0x0F,0x01,0x01,0x0E,0x0F,0x02,0x00,0x0D,0x10,0x03,0x7F,0x0C,0x11,0x04,
-    0x7F,0x0C,0x11,0x04,0x7F,0x0B,0x11,0x05,0x7E,0x0A,0x12,0x06,0x7E,0x09,0x12,0x07,
-    0x40,0x02,
-    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
-    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
-    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_5[] = {   /* 750p */
-    0x00,0x03,
-    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
-    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
-    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
-    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_6[] = {   /* 1080i */
-    0x00,0x04,
-    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
-    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
-    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
-    0xFF,0xFF,
-};
-
-#ifdef CONFIG_FB_SIS_315
-/* 661 et al LCD data structure (2.03.00) */
-static const unsigned char SiS_LCDStruct661[] = {
-    /* 1024x768 */
-/*  type|CR37|   HDE   |   VDE   |    HT   |    VT   |   hss    | hse   */
-    0x02,0xC0,0x00,0x04,0x00,0x03,0x40,0x05,0x26,0x03,0x10,0x00,0x88,
-    0x00,0x02,0x00,0x06,0x00,0x41,0x5A,0x64,0x00,0x00,0x00,0x00,0x04,
-    /*  | vss     |    vse  |clck|  clock  |CRT2DataP|CRT2DataP|idx     */
-    /*					      VESA    non-VESA  noscale */
-    /* 1280x1024 */
-    0x03,0xC0,0x00,0x05,0x00,0x04,0x98,0x06,0x2A,0x04,0x30,0x00,0x70,
-    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x08,
-    /* 1400x1050 */
-    0x09,0x20,0x78,0x05,0x1A,0x04,0x98,0x06,0x2A,0x04,0x18,0x00,0x38,
-    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x09,
-    /* 1600x1200 */
-    0x0B,0xE0,0x40,0x06,0xB0,0x04,0x70,0x08,0xE2,0x04,0x40,0x00,0xC0,
-    0x00,0x01,0x00,0x03,0x00,0xA2,0x70,0x24,0x00,0x00,0x00,0x00,0x0A,
-    /* 1280x768 (_2) */
-    0x0A,0xE0,0x00,0x05,0x00,0x03,0x7C,0x06,0x26,0x03,0x30,0x00,0x70,
-    0x00,0x03,0x00,0x06,0x00,0x4D,0xC8,0x48,0x00,0x00,0x00,0x00,0x06,
-    /* 1280x720 */
-    0x0E,0xE0,0x00,0x05,0xD0,0x02,0x80,0x05,0x26,0x03,0x10,0x00,0x20,
-    0x00,0x01,0x00,0x06,0x00,0x45,0x9C,0x62,0x00,0x00,0x00,0x00,0x05,
-    /* 1280x800 (_2) */
-    0x0C,0xE0,0x00,0x05,0x20,0x03,0x10,0x06,0x2C,0x03,0x30,0x00,0x70,
-    0x00,0x04,0x00,0x03,0x00,0x49,0xCE,0x1E,0x00,0x00,0x00,0x00,0x09,
-    /* 1680x1050 */
-    0x0D,0xE0,0x90,0x06,0x1A,0x04,0x6C,0x07,0x2A,0x04,0x1A,0x00,0x4C,
-    0x00,0x03,0x00,0x06,0x00,0x79,0xBE,0x44,0x00,0x00,0x00,0x00,0x06,
-    /* 1280x800_3 */
-    0x0C,0xE0,0x00,0x05,0x20,0x03,0xAA,0x05,0x2E,0x03,0x30,0x00,0x50,
-    0x00,0x04,0x00,0x03,0x00,0x47,0xA9,0x10,0x00,0x00,0x00,0x00,0x07,
-    /* 800x600 */
-    0x01,0xC0,0x20,0x03,0x58,0x02,0x20,0x04,0x74,0x02,0x2A,0x00,0x80,
-    0x00,0x06,0x00,0x04,0x00,0x28,0x63,0x4B,0x00,0x00,0x00,0x00,0x00,
-    /* 1280x854 */
-    0x08,0xE0,0x00,0x05,0x56,0x03,0x80,0x06,0x5d,0x03,0x10,0x00,0x70,
-    0x00,0x01,0x00,0x03,0x00,0x54,0x75,0x13,0x00,0x00,0x00,0x00,0x08
-};
-#endif
-
-#ifdef CONFIG_FB_SIS_300
-static unsigned char SiS300_TrumpionData[14][80] = {
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x0B,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x10,0x00,0x00,0x04,0x23,
-    0x00,0x00,0x03,0x28,0x03,0x10,0x05,0x08,0x40,0x10,0x00,0x10,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xBC,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x09,0x04,0x04,0x05,
-    0x04,0x0C,0x09,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5A,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x27,0x00,0x80,0x02,
-    0x20,0x03,0x07,0x00,0x5E,0x01,0x0D,0x02,0x60,0x0C,0x30,0x11,0x00,0x00,0x04,0x23,
-    0x00,0x00,0x03,0x80,0x03,0x28,0x06,0x08,0x40,0x11,0x00,0x11,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x90,0x01,0xFF,0x0F,0xF4,0x19,0x01,0x00,0x05,0x01,0x00,0x04,0x05,
-    0x04,0x0C,0x02,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEC,0x57,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xD9,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x59,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
-    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
-    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
-    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
-    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
-    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
-    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
-    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  /* variant 2 */
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
-    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
-    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
-    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
-    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
-    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
-    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
-    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
-    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
-    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 }
-};
-#endif
-
 void		SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
 void		SiS_EnableCRT2(struct SiS_Private *SiS_Pr);
 unsigned short	SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
@@ -375,16 +94,11 @@ unsigned short	SiS_GetCH701x(struct SiS_Private *SiS_Pr, unsigned short tempax);
 void		SiS_SetCH70xxANDOR(struct SiS_Private *SiS_Pr, unsigned short reg,
 			unsigned char orval,unsigned short andval);
 #ifdef CONFIG_FB_SIS_315
-static void	SiS_Chrontel701xOn(struct SiS_Private *SiS_Pr);
-static void	SiS_Chrontel701xOff(struct SiS_Private *SiS_Pr);
-static void	SiS_ChrontelInitTVVSync(struct SiS_Private *SiS_Pr);
-static void	SiS_ChrontelDoSomething1(struct SiS_Private *SiS_Pr);
 void		SiS_Chrontel701xBLOn(struct SiS_Private *SiS_Pr);
 void		SiS_Chrontel701xBLOff(struct SiS_Private *SiS_Pr);
 #endif /* 315 */
 
 #ifdef CONFIG_FB_SIS_300
-static  bool	SiS_SetTrumpionBlock(struct SiS_Private *SiS_Pr, unsigned char *dataptr);
 void		SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo);
 #endif
 
@@ -394,40 +108,6 @@ unsigned short	SiS_HandleDDC(struct SiS_Private *SiS_Pr, unsigned int VBFlags, i
 			unsigned short adaptnum, unsigned short DDCdatatype,
 			unsigned char *buffer, unsigned int VBFlags2);
 
-static unsigned short	SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
-				int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
-				bool checkcr32, unsigned int VBFlags2);
-static unsigned short	SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
-				unsigned char *buffer);
-static void		SiS_SetSwitchDDC2(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetStart(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetStop(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetSCLKLow(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetSCLKHigh(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_ReadDDC2Data(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_WriteDDC2Data(struct SiS_Private *SiS_Pr, unsigned short tempax);
-static unsigned short	SiS_CheckACK(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_WriteDABDDC(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_PrepareReadDDC(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_PrepareDDC(struct SiS_Private *SiS_Pr);
-static void		SiS_SendACK(struct SiS_Private *SiS_Pr, unsigned short yesno);
-static unsigned short	SiS_DoProbeDDC(struct SiS_Private *SiS_Pr);
-
-#ifdef CONFIG_FB_SIS_300
-static void		SiS_OEM300Setting(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo, unsigned short ModeIdIndex, unsigned short RefTabindex);
-static void		SetOEMLCDData2(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo, unsigned short ModeIdIndex,unsigned short RefTableIndex);
-#endif
-#ifdef CONFIG_FB_SIS_315
-static void		SiS_OEM310Setting(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
-static void		SiS_OEM661Setting(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
-static void		SiS_FinalizeLCD(struct SiS_Private *, unsigned short, unsigned short);
-#endif
-
 extern void		SiS_DisplayOff(struct SiS_Private *SiS_Pr);
 extern void		SiS_DisplayOn(struct SiS_Private *SiS_Pr);
 extern bool		SiS_SearchModeID(struct SiS_Private *, unsigned short *, unsigned short *);
diff --git a/drivers/video/fbdev/sis/sis.h b/drivers/video/fbdev/sis/sis.h
index ea1d1c9640bf..d04982b0cd6f 100644
--- a/drivers/video/fbdev/sis/sis.h
+++ b/drivers/video/fbdev/sis/sis.h
@@ -28,6 +28,7 @@
 
 #include "vgatypes.h"
 #include "vstruct.h"
+#include "init.h"
 
 #define VER_MAJOR		1
 #define VER_MINOR		8
@@ -321,6 +322,85 @@ u8 SiS_GetRegByte(SISIOADDRESS);
 u16 SiS_GetRegShort(SISIOADDRESS);
 u32 SiS_GetRegLong(SISIOADDRESS);
 
+/* Chrontel TV, DDC and DPMS functions */
+/* from init.c */
+bool		SiSInitPtr(struct SiS_Private *SiS_Pr);
+unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, bool FSTN,
+				unsigned short CustomT, int LCDwith, int LCDheight,
+				unsigned int VBFlags2);
+unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+
+void		SiS_DisplayOn(struct SiS_Private *SiS_Pr);
+void		SiS_DisplayOff(struct SiS_Private *SiS_Pr);
+void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
+void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
+void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
+unsigned short	SiS_GetModeFlag(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
+
+bool		SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
+				unsigned short *ModeIdIndex);
+unsigned short	SiS_GetModePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+unsigned short  SiS_GetRefCRTVCLK(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
+unsigned short  SiS_GetRefCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
+unsigned short	SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+unsigned short	SiS_GetOffset(struct SiS_Private *SiS_Pr,unsigned short ModeNo,
+				unsigned short ModeIdIndex, unsigned short RRTI);
+#ifdef CONFIG_FB_SIS_300
+void		SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
+				unsigned short *idx2);
+unsigned short	SiS_GetFIFOThresholdB300(unsigned short idx1, unsigned short idx2);
+unsigned short	SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
+#endif
+void		SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
+bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
+void		SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
+void		SiS_CalcLCDACRT1Timing(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+void		SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
+				int yres, struct fb_var_screeninfo *var, bool writeres);
+
+/* From init301.c: */
+extern void		SiS_GetVBInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex, int chkcrt2mode);
+extern void		SiS_GetLCDResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern void		SiS_SetYPbPr(struct SiS_Private *SiS_Pr);
+extern void		SiS_SetTVMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern void		SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
+extern void		SiS_DisableBridge(struct SiS_Private *);
+extern bool		SiS_SetCRT2Group(struct SiS_Private *, unsigned short);
+extern unsigned short	SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern void		SiS_WaitRetrace1(struct SiS_Private *SiS_Pr);
+extern unsigned short	SiS_GetResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern unsigned short	SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short tempax);
+extern unsigned short	SiS_GetVCLK2Ptr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex, unsigned short RRTI);
+extern bool		SiS_IsVAMode(struct SiS_Private *);
+extern bool		SiS_IsDualEdge(struct SiS_Private *);
+
+#ifdef CONFIG_FB_SIS_300
+extern unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+extern void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg,
+				unsigned int val);
+#endif
+#ifdef CONFIG_FB_SIS_315
+extern void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg,
+				unsigned char val);
+extern unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
+#endif
+
+
 /* MMIO access macros */
 #define MMIO_IN8(base, offset)  readb((base+offset))
 #define MMIO_IN16(base, offset) readw((base+offset))
@@ -583,4 +663,55 @@ struct sis_video_info {
 	struct sis_video_info *next;
 };
 
+/* from sis_accel.c */
+extern void	fbcon_sis_fillrect(struct fb_info *info,
+				const struct fb_fillrect *rect);
+extern void	fbcon_sis_copyarea(struct fb_info *info,
+				const struct fb_copyarea *area);
+extern int	fbcon_sis_sync(struct fb_info *info);
+
+/* Internal 2D accelerator functions */
+extern int	sisfb_initaccel(struct sis_video_info *ivideo);
+extern void	sisfb_syncaccel(struct sis_video_info *ivideo);
+
+/* Internal general routines */
+#ifdef CONFIG_FB_SIS_300
+unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg, unsigned int val);
+unsigned int	sisfb_read_lpc_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+#endif
+#ifdef CONFIG_FB_SIS_315
+void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg, unsigned char val);
+unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
+#endif
+
+/* SiS-specific exported functions */
+void			sis_malloc(struct sis_memreq *req);
+void			sis_malloc_new(struct pci_dev *pdev, struct sis_memreq *req);
+void			sis_free(u32 base);
+void			sis_free_new(struct pci_dev *pdev, u32 base);
+
+/* Routines from init.c/init301.c */
+extern unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, bool FSTN, unsigned short CustomT,
+				int LCDwith, int LCDheight, unsigned int VBFlags2);
+extern unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+extern unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+extern void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
+extern bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
+extern void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
+extern void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
+
+extern bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
+
+extern bool		sisfb_gettotalfrommode(struct SiS_Private *SiS_Pr, unsigned char modeno,
+				int *htotal, int *vtotal, unsigned char rateindex);
+extern int		sisfb_mode_rate_to_dclock(struct SiS_Private *SiS_Pr,
+				unsigned char modeno, unsigned char rateindex);
+extern int		sisfb_mode_rate_to_ddata(struct SiS_Private *SiS_Pr, unsigned char modeno,
+				unsigned char rateindex, struct fb_var_screeninfo *var);
+
+
 #endif
diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c
index ecdd054d8951..20aff9005978 100644
--- a/drivers/video/fbdev/sis/sis_main.c
+++ b/drivers/video/fbdev/sis/sis_main.c
@@ -56,15 +56,66 @@
 
 #include "sis.h"
 #include "sis_main.h"
+#include "init301.h"
 
 #if !defined(CONFIG_FB_SIS_300) && !defined(CONFIG_FB_SIS_315)
 #warning Neither CONFIG_FB_SIS_300 nor CONFIG_FB_SIS_315 is set
 #warning sisfb will not work!
 #endif
 
+/* ---------------------- Prototypes ------------------------- */
+
+/* Interface used by the world */
+#ifndef MODULE
+static int sisfb_setup(char *options);
+#endif
+
+/* Interface to the low level console driver */
+static int sisfb_init(void);
+
+/* fbdev routines */
+static int	sisfb_get_fix(struct fb_fix_screeninfo *fix, int con,
+				struct fb_info *info);
+
+static int	sisfb_ioctl(struct fb_info *info, unsigned int cmd,
+			    unsigned long arg);
+static int	sisfb_set_par(struct fb_info *info);
+static int	sisfb_blank(int blank,
+				struct fb_info *info);
+
 static void sisfb_handle_command(struct sis_video_info *ivideo,
 				 struct sisfb_cmd *sisfb_command);
 
+static void	sisfb_search_mode(char *name, bool quiet);
+static int	sisfb_validate_mode(struct sis_video_info *ivideo, int modeindex, u32 vbflags);
+static u8	sisfb_search_refresh_rate(struct sis_video_info *ivideo, unsigned int rate,
+				int index);
+static int	sisfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+				unsigned blue, unsigned transp,
+				struct fb_info *fb_info);
+static int	sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive,
+				struct fb_info *info);
+static void	sisfb_pre_setmode(struct sis_video_info *ivideo);
+static void	sisfb_post_setmode(struct sis_video_info *ivideo);
+static bool	sisfb_CheckVBRetrace(struct sis_video_info *ivideo);
+static bool	sisfbcheckvretracecrt2(struct sis_video_info *ivideo);
+static bool	sisfbcheckvretracecrt1(struct sis_video_info *ivideo);
+static bool	sisfb_bridgeisslave(struct sis_video_info *ivideo);
+static void	sisfb_detect_VB_connect(struct sis_video_info *ivideo);
+static void	sisfb_get_VB_type(struct sis_video_info *ivideo);
+static void	sisfb_set_TVxposoffset(struct sis_video_info *ivideo, int val);
+static void	sisfb_set_TVyposoffset(struct sis_video_info *ivideo, int val);
+
+/* Internal heap routines */
+static int		sisfb_heap_init(struct sis_video_info *ivideo);
+static struct SIS_OH *	sisfb_poh_new_node(struct SIS_HEAP *memheap);
+static struct SIS_OH *	sisfb_poh_allocate(struct SIS_HEAP *memheap, u32 size);
+static void		sisfb_delete_node(struct SIS_OH *poh);
+static void		sisfb_insert_node(struct SIS_OH *pohList, struct SIS_OH *poh);
+static struct SIS_OH *	sisfb_poh_free(struct SIS_HEAP *memheap, u32 base);
+static void		sisfb_free_node(struct SIS_HEAP *memheap, struct SIS_OH *poh);
+
+
 /* ------------------ Internal helper routines ----------------- */
 
 static void __init
diff --git a/drivers/video/fbdev/sis/sis_main.h b/drivers/video/fbdev/sis/sis_main.h
index 32e23c209430..d8ba07061f1e 100644
--- a/drivers/video/fbdev/sis/sis_main.h
+++ b/drivers/video/fbdev/sis/sis_main.h
@@ -661,121 +661,4 @@ static struct _customttable {
 	}
 };
 
-/* ---------------------- Prototypes ------------------------- */
-
-/* Interface used by the world */
-#ifndef MODULE
-static int sisfb_setup(char *options);
 #endif
-
-/* Interface to the low level console driver */
-static int sisfb_init(void);
-
-/* fbdev routines */
-static int	sisfb_get_fix(struct fb_fix_screeninfo *fix, int con,
-				struct fb_info *info);
-
-static int	sisfb_ioctl(struct fb_info *info, unsigned int cmd,
-			    unsigned long arg);
-static int	sisfb_set_par(struct fb_info *info);
-static int	sisfb_blank(int blank,
-				struct fb_info *info);
-extern void	fbcon_sis_fillrect(struct fb_info *info,
-				const struct fb_fillrect *rect);
-extern void	fbcon_sis_copyarea(struct fb_info *info,
-				const struct fb_copyarea *area);
-extern int	fbcon_sis_sync(struct fb_info *info);
-
-/* Internal 2D accelerator functions */
-extern int	sisfb_initaccel(struct sis_video_info *ivideo);
-extern void	sisfb_syncaccel(struct sis_video_info *ivideo);
-
-/* Internal general routines */
-static void	sisfb_search_mode(char *name, bool quiet);
-static int	sisfb_validate_mode(struct sis_video_info *ivideo, int modeindex, u32 vbflags);
-static u8	sisfb_search_refresh_rate(struct sis_video_info *ivideo, unsigned int rate,
-				int index);
-static int	sisfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-				unsigned blue, unsigned transp,
-				struct fb_info *fb_info);
-static int	sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive,
-				struct fb_info *info);
-static void	sisfb_pre_setmode(struct sis_video_info *ivideo);
-static void	sisfb_post_setmode(struct sis_video_info *ivideo);
-static bool	sisfb_CheckVBRetrace(struct sis_video_info *ivideo);
-static bool	sisfbcheckvretracecrt2(struct sis_video_info *ivideo);
-static bool	sisfbcheckvretracecrt1(struct sis_video_info *ivideo);
-static bool	sisfb_bridgeisslave(struct sis_video_info *ivideo);
-static void	sisfb_detect_VB_connect(struct sis_video_info *ivideo);
-static void	sisfb_get_VB_type(struct sis_video_info *ivideo);
-static void	sisfb_set_TVxposoffset(struct sis_video_info *ivideo, int val);
-static void	sisfb_set_TVyposoffset(struct sis_video_info *ivideo, int val);
-#ifdef CONFIG_FB_SIS_300
-unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg, unsigned int val);
-unsigned int	sisfb_read_lpc_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-#endif
-#ifdef CONFIG_FB_SIS_315
-void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg, unsigned char val);
-unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
-#endif
-
-/* SiS-specific exported functions */
-void			sis_malloc(struct sis_memreq *req);
-void			sis_malloc_new(struct pci_dev *pdev, struct sis_memreq *req);
-void			sis_free(u32 base);
-void			sis_free_new(struct pci_dev *pdev, u32 base);
-
-/* Internal heap routines */
-static int		sisfb_heap_init(struct sis_video_info *ivideo);
-static struct SIS_OH *	sisfb_poh_new_node(struct SIS_HEAP *memheap);
-static struct SIS_OH *	sisfb_poh_allocate(struct SIS_HEAP *memheap, u32 size);
-static void		sisfb_delete_node(struct SIS_OH *poh);
-static void		sisfb_insert_node(struct SIS_OH *pohList, struct SIS_OH *poh);
-static struct SIS_OH *	sisfb_poh_free(struct SIS_HEAP *memheap, u32 base);
-static void		sisfb_free_node(struct SIS_HEAP *memheap, struct SIS_OH *poh);
-
-/* Routines from init.c/init301.c */
-extern unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, bool FSTN, unsigned short CustomT,
-				int LCDwith, int LCDheight, unsigned int VBFlags2);
-extern unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-extern unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-extern void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
-extern bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-extern void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
-extern void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
-
-extern bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
-
-extern bool		sisfb_gettotalfrommode(struct SiS_Private *SiS_Pr, unsigned char modeno,
-				int *htotal, int *vtotal, unsigned char rateindex);
-extern int		sisfb_mode_rate_to_dclock(struct SiS_Private *SiS_Pr,
-				unsigned char modeno, unsigned char rateindex);
-extern int		sisfb_mode_rate_to_ddata(struct SiS_Private *SiS_Pr, unsigned char modeno,
-				unsigned char rateindex, struct fb_var_screeninfo *var);
-
-/* Chrontel TV, DDC and DPMS functions */
-extern unsigned short	SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short reg);
-extern void		SiS_SetCH700x(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-extern unsigned short	SiS_GetCH701x(struct SiS_Private *SiS_Pr, unsigned short reg);
-extern void		SiS_SetCH701x(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-extern void		SiS_SetCH70xxANDOR(struct SiS_Private *SiS_Pr, unsigned short reg,
-				unsigned char myor, unsigned char myand);
-extern void		SiS_DDC2Delay(struct SiS_Private *SiS_Pr, unsigned int delaytime);
-extern void		SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo);
-extern unsigned short	SiS_HandleDDC(struct SiS_Private *SiS_Pr, unsigned int VBFlags, int VGAEngine,
-				unsigned short adaptnum, unsigned short DDCdatatype, unsigned char *buffer,
-				unsigned int VBFlags2);
-extern unsigned short	SiS_ReadDDC1Bit(struct SiS_Private *SiS_Pr);
-#ifdef CONFIG_FB_SIS_315
-extern void		SiS_Chrontel701xBLOn(struct SiS_Private *SiS_Pr);
-extern void		SiS_Chrontel701xBLOff(struct SiS_Private *SiS_Pr);
-#endif
-extern void		SiS_SiS30xBLOn(struct SiS_Private *SiS_Pr);
-extern void		SiS_SiS30xBLOff(struct SiS_Private *SiS_Pr);
-#endif
-
-
diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index 8db7085e5d1a..22b606af0a87 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c
@@ -1293,7 +1293,6 @@ static struct fb_ops ufx_ops = {
  * Assumes no active clients have framebuffer open */
 static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
 {
-	int retval = -ENOMEM;
 	int old_len = info->fix.smem_len;
 	int new_len;
 	unsigned char *old_fb = info->screen_base;
@@ -1308,10 +1307,8 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
 		 * Alloc system memory for virtual framebuffer
 		 */
 		new_fb = vmalloc(new_len);
-		if (!new_fb) {
-			pr_err("Virtual framebuffer alloc failed");
-			goto error;
-		}
+		if (!new_fb)
+			return -ENOMEM;
 
 		if (info->screen_base) {
 			memcpy(new_fb, old_fb, old_len);
@@ -1323,11 +1320,7 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
 		info->fix.smem_start = (unsigned long) new_fb;
 		info->flags = smscufx_info_flags;
 	}
-
-	retval = 0;
-
-error:
-	return retval;
+	return 0;
 }
 
 /* sets up I2C Controller for 100 Kbps, std. speed, 7-bit addr, master,
@@ -1620,8 +1613,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
 {
 	struct usb_device *usbdev;
 	struct ufx_data *dev;
-	struct fb_info *info = NULL;
-	int retval = -ENOMEM;
+	struct fb_info *info;
+	int retval;
 	u32 id_rev, fpga_rev;
 
 	/* usb initialization */
@@ -1631,7 +1624,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (dev == NULL) {
 		dev_err(&usbdev->dev, "ufx_usb_probe: failed alloc of dev struct\n");
-		goto error;
+		return -ENOMEM;
 	}
 
 	/* we need to wait for both usb and fbdev to spin down on disconnect */
@@ -1652,9 +1645,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio);
 
 	if (!ufx_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) {
-		retval = -ENOMEM;
 		dev_err(dev->gdev, "ufx_alloc_urb_list failed\n");
-		goto error;
+		goto e_nomem;
 	}
 
 	/* We don't register a new USB class. Our client interface is fbdev */
@@ -1662,9 +1654,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	/* allocates framebuffer driver structure, not framebuffer memory */
 	info = framebuffer_alloc(0, &usbdev->dev);
 	if (!info) {
-		retval = -ENOMEM;
 		dev_err(dev->gdev, "framebuffer_alloc failed\n");
-		goto error;
+		goto e_nomem;
 	}
 
 	dev->info = info;
@@ -1675,7 +1666,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	retval = fb_alloc_cmap(&info->cmap, 256, 0);
 	if (retval < 0) {
 		dev_err(dev->gdev, "fb_alloc_cmap failed %x\n", retval);
-		goto error;
+		goto destroy_modedb;
 	}
 
 	INIT_DELAYED_WORK(&dev->free_framebuffer_work,
@@ -1736,26 +1727,20 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	return 0;
 
 error:
-	if (dev) {
-		if (info) {
-			if (info->cmap.len != 0)
-				fb_dealloc_cmap(&info->cmap);
-			if (info->monspecs.modedb)
-				fb_destroy_modedb(info->monspecs.modedb);
-			vfree(info->screen_base);
-
-			fb_destroy_modelist(&info->modelist);
-
-			framebuffer_release(info);
-		}
-
-		kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
-		kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
-
-		/* dev has been deallocated. Do not dereference */
-	}
-
+	fb_dealloc_cmap(&info->cmap);
+destroy_modedb:
+	fb_destroy_modedb(info->monspecs.modedb);
+	vfree(info->screen_base);
+	fb_destroy_modelist(&info->modelist);
+	framebuffer_release(info);
+put_ref:
+	kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
+	kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
 	return retval;
+
+e_nomem:
+	retval = -ENOMEM;
+	goto put_ref;
 }
 
 static void ufx_usb_disconnect(struct usb_interface *interface)
diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index f599520374dd..6439231f2db2 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -628,7 +628,8 @@ static int ssd1307fb_probe(struct i2c_client *client,
 		goto fb_alloc_error;
 	}
 
-	ssd1307fb_defio = devm_kzalloc(&client->dev, sizeof(struct fb_deferred_io), GFP_KERNEL);
+	ssd1307fb_defio = devm_kzalloc(&client->dev, sizeof(*ssd1307fb_defio),
+				       GFP_KERNEL);
 	if (!ssd1307fb_defio) {
 		dev_err(&client->dev, "Couldn't allocate deferred io.\n");
 		ret = -ENOMEM;
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 3c2e4cabc08f..045e8afe398b 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -1126,10 +1126,8 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
 	int bpp, xres, yres;
 
 	fb = kzalloc(sizeof(*fb), GFP_ATOMIC);
-	if (!fb) {
-		printk(KERN_ERR "stifb: Could not allocate stifb structure\n");
-		return -ENODEV;
-	}
+	if (!fb)
+		return -ENOMEM;
 	
 	info = &fb->info;
 
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 452a4207ac1b..f365d4862015 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -428,7 +428,6 @@ static void dlfb_compress_hline(
 	const uint16_t *pixel = *pixel_start_ptr;
 	uint32_t dev_addr  = *device_address_ptr;
 	uint8_t *cmd = *command_buffer_ptr;
-	const int bpp = 2;
 
 	while ((pixel_end > pixel) &&
 	       (cmd_buffer_end - MIN_RLX_CMD_BYTES > cmd)) {
@@ -441,9 +440,9 @@ static void dlfb_compress_hline(
 
 		*cmd++ = 0xAF;
 		*cmd++ = 0x6B;
-		*cmd++ = (uint8_t) ((dev_addr >> 16) & 0xFF);
-		*cmd++ = (uint8_t) ((dev_addr >> 8) & 0xFF);
-		*cmd++ = (uint8_t) ((dev_addr) & 0xFF);
+		*cmd++ = dev_addr >> 16;
+		*cmd++ = dev_addr >> 8;
+		*cmd++ = dev_addr;
 
 		cmd_pixels_count_byte = cmd++; /*  we'll know this later */
 		cmd_pixel_start = pixel;
@@ -453,15 +452,15 @@ static void dlfb_compress_hline(
 
 		cmd_pixel_end = pixel + min(MAX_CMD_PIXELS + 1,
 			min((int)(pixel_end - pixel),
-			    (int)(cmd_buffer_end - cmd) / bpp));
+			    (int)(cmd_buffer_end - cmd) / BPP));
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * BPP);
 
 		while (pixel < cmd_pixel_end) {
 			const uint16_t * const repeating_pixel = pixel;
 
-			*(uint16_t *)cmd = cpu_to_be16p(pixel);
-			cmd += 2;
+			*cmd++ = *pixel >> 8;
+			*cmd++ = *pixel;
 			pixel++;
 
 			if (unlikely((pixel < cmd_pixel_end) &&
@@ -490,7 +489,7 @@ static void dlfb_compress_hline(
 		}
 
 		*cmd_pixels_count_byte = (pixel - cmd_pixel_start) & 0xFF;
-		dev_addr += (pixel - cmd_pixel_start) * bpp;
+		dev_addr += (pixel - cmd_pixel_start) * BPP;
 	}
 
 	if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) {
@@ -1136,7 +1135,6 @@ static struct fb_ops dlfb_ops = {
  */
 static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info)
 {
-	int retval = -ENOMEM;
 	int old_len = info->fix.smem_len;
 	int new_len;
 	unsigned char *old_fb = info->screen_base;
@@ -1152,7 +1150,7 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
 		new_fb = vmalloc(new_len);
 		if (!new_fb) {
 			dev_err(info->dev, "Virtual framebuffer alloc failed\n");
-			goto error;
+			return -ENOMEM;
 		}
 
 		if (info->screen_base) {
@@ -1181,11 +1179,7 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
 			dlfb->backing_buffer = new_back;
 		}
 	}
-
-	retval = 0;
-
-error:
-	return retval;
+	return 0;
 }
 
 /*
@@ -1531,15 +1525,16 @@ static int dlfb_parse_vendor_descriptor(struct dlfb_data *dlfb,
 			u8 length;
 			u16 key;
 
-			key = le16_to_cpu(*((u16 *) desc));
-			desc += sizeof(u16);
-			length = *desc;
-			desc++;
+			key = *desc++;
+			key |= (u16)*desc++ << 8;
+			length = *desc++;
 
 			switch (key) {
 			case 0x0200: { /* max_area */
-				u32 max_area;
-				max_area = le32_to_cpu(*((u32 *)desc));
+				u32 max_area = *desc++;
+				max_area |= (u32)*desc++ << 8;
+				max_area |= (u32)*desc++ << 16;
+				max_area |= (u32)*desc++ << 24;
 				dev_warn(&intf->dev,
 					 "DL chip limited to %d pixel modes\n",
 					 max_area);
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 6f8d444eb0e3..5172fa581147 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -651,7 +651,7 @@ static int vmlfb_check_var_locked(struct fb_var_screeninfo *var,
 	}
 
 	pitch = ALIGN((var->xres * var->bits_per_pixel) >> 3, 0x40);
-	mem = pitch * var->yres_virtual;
+	mem = (u64)pitch * var->yres_virtual;
 	if (mem > vinfo->vram_contig_size) {
 		return -ENOMEM;
 	}
diff --git a/drivers/video/fbdev/via/via_aux_sii164.c b/drivers/video/fbdev/via/via_aux_sii164.c
index ca1b35f033b1..c27f62c2c75a 100644
--- a/drivers/video/fbdev/via/via_aux_sii164.c
+++ b/drivers/video/fbdev/via/via_aux_sii164.c
@@ -36,7 +36,7 @@ static void probe(struct via_aux_bus *bus, u8 addr)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x01, 0x00, 0x06, 0x00}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/fbdev/via/via_aux_vt1631.c b/drivers/video/fbdev/via/via_aux_vt1631.c
index 06e742f1f723..32978a0ccfd7 100644
--- a/drivers/video/fbdev/via/via_aux_vt1631.c
+++ b/drivers/video/fbdev/via/via_aux_vt1631.c
@@ -36,7 +36,7 @@ void via_aux_vt1631_probe(struct via_aux_bus *bus)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x06, 0x11, 0x91, 0x31}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/fbdev/via/via_aux_vt1632.c b/drivers/video/fbdev/via/via_aux_vt1632.c
index d24f4cd97401..cec8cc43d524 100644
--- a/drivers/video/fbdev/via/via_aux_vt1632.c
+++ b/drivers/video/fbdev/via/via_aux_vt1632.c
@@ -36,7 +36,7 @@ static void probe(struct via_aux_bus *bus, u8 addr)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x06, 0x11, 0x92, 0x31}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/fbdev/via/via_aux_vt1636.c b/drivers/video/fbdev/via/via_aux_vt1636.c
index 9e015c101d4d..2b10bc21ab79 100644
--- a/drivers/video/fbdev/via/via_aux_vt1636.c
+++ b/drivers/video/fbdev/via/via_aux_vt1636.c
@@ -36,7 +36,7 @@ void via_aux_vt1636_probe(struct via_aux_bus *bus)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x06, 0x11, 0x45, 0x33}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c
index 8ce0a99bf17c..83b8963c9657 100644
--- a/drivers/video/of_display_timing.c
+++ b/drivers/video/of_display_timing.c
@@ -244,23 +244,3 @@ dispfail:
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(of_get_display_timings);
-
-/**
- * of_display_timings_exist - check if a display-timings node is provided
- * @np: device_node with the timing
- **/
-int of_display_timings_exist(const struct device_node *np)
-{
-	struct device_node *timings_np;
-
-	if (!np)
-		return -EINVAL;
-
-	timings_np = of_parse_phandle(np, "display-timings", 0);
-	if (!timings_np)
-		return -EINVAL;
-
-	of_node_put(timings_np);
-	return 1;
-}
-EXPORT_SYMBOL_GPL(of_display_timings_exist);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index dfe5684000be..6b237e3f4983 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -272,6 +272,12 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
 				pages_to_bytes(events[PSWPOUT]));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+#ifdef CONFIG_HUGETLB_PAGE
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
+		    events[HTLB_BUDDY_PGALLOC]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL,
+		    events[HTLB_BUDDY_PGALLOC_FAIL]);
+#endif
 #endif
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
 				pages_to_bytes(i.freeram));
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 66a7f5a2f474..9af07fd92763 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -514,6 +514,17 @@ config COH901327_WATCHDOG
 	  This watchdog is used to reset the system and thus cannot be
 	  compiled as a module.
 
+config NPCM7XX_WATCHDOG
+	bool "Nuvoton NPCM750 watchdog"
+	depends on ARCH_NPCM || COMPILE_TEST
+	default y if ARCH_NPCM750
+	select WATCHDOG_CORE
+	help
+	  Say Y here to include Watchdog timer support for the
+	  watchdog embedded into the NPCM7xx.
+	  This watchdog is used to reset the system and thus cannot be
+	  compiled as a module.
+
 config TWL4030_WATCHDOG
 	tristate "TWL4030 Watchdog"
 	depends on TWL4030_CORE
@@ -1102,6 +1113,7 @@ config IT87_WDT
 
 config HP_WATCHDOG
 	tristate "HP ProLiant iLO2+ Hardware Watchdog Timer"
+	select WATCHDOG_CORE
 	depends on X86 && PCI
 	help
 	  A software monitoring watchdog and NMI sourcing driver. This driver
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index e4dd91f5585a..1d3c6b094fe5 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o
 obj-$(CONFIG_SUNXI_WATCHDOG) += sunxi_wdt.o
 obj-$(CONFIG_RN5T618_WATCHDOG) += rn5t618_wdt.o
 obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o
+obj-$(CONFIG_NPCM7XX_WATCHDOG) += npcm_wdt.o
 obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o
 obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
 obj-$(CONFIG_TS4800_WATCHDOG) += ts4800_wdt.o
diff --git a/drivers/watchdog/ar7_wdt.c b/drivers/watchdog/ar7_wdt.c
index 6d5ae251e309..ee1ab12ab04f 100644
--- a/drivers/watchdog/ar7_wdt.c
+++ b/drivers/watchdog/ar7_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * drivers/watchdog/ar7_wdt.c
  *
@@ -8,19 +9,6 @@
  * National Semiconductor SCx200 Watchdog support
  * Copyright (c) 2001,2002 Christer Weinigel <wingel@nano-system.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/watchdog/asm9260_wdt.c b/drivers/watchdog/asm9260_wdt.c
index 7dd0da644a7f..2cf56b459d84 100644
--- a/drivers/watchdog/asm9260_wdt.c
+++ b/drivers/watchdog/asm9260_wdt.c
@@ -292,14 +292,14 @@ static int asm9260_wdt_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->iobase))
 		return PTR_ERR(priv->iobase);
 
-	ret = asm9260_wdt_get_dt_clks(priv);
-	if (ret)
-		return ret;
-
 	priv->rst = devm_reset_control_get_exclusive(&pdev->dev, "wdt_rst");
 	if (IS_ERR(priv->rst))
 		return PTR_ERR(priv->rst);
 
+	ret = asm9260_wdt_get_dt_clks(priv);
+	if (ret)
+		return ret;
+
 	wdd = &priv->wdd;
 	wdd->info = &asm9260_wdt_ident;
 	wdd->ops = &asm9260_wdt_ops;
diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c
index ca5b91e2eb92..a5b8eb21201f 100644
--- a/drivers/watchdog/aspeed_wdt.c
+++ b/drivers/watchdog/aspeed_wdt.c
@@ -46,6 +46,7 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
 #define WDT_RELOAD_VALUE	0x04
 #define WDT_RESTART		0x08
 #define WDT_CTRL		0x0C
+#define   WDT_CTRL_BOOT_SECONDARY	BIT(7)
 #define   WDT_CTRL_RESET_MODE_SOC	(0x00 << 5)
 #define   WDT_CTRL_RESET_MODE_FULL_CHIP	(0x01 << 5)
 #define   WDT_CTRL_RESET_MODE_ARM_CPU	(0x10 << 5)
@@ -158,6 +159,7 @@ static int aspeed_wdt_restart(struct watchdog_device *wdd,
 {
 	struct aspeed_wdt *wdt = to_aspeed_wdt(wdd);
 
+	wdt->ctrl &= ~WDT_CTRL_BOOT_SECONDARY;
 	aspeed_wdt_enable(wdt, 128 * WDT_RATE_1MHZ / 1000);
 
 	mdelay(1000);
@@ -232,16 +234,21 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 		wdt->ctrl |= WDT_CTRL_RESET_MODE_SOC | WDT_CTRL_RESET_SYSTEM;
 	} else {
 		if (!strcmp(reset_type, "cpu"))
-			wdt->ctrl |= WDT_CTRL_RESET_MODE_ARM_CPU;
+			wdt->ctrl |= WDT_CTRL_RESET_MODE_ARM_CPU |
+				     WDT_CTRL_RESET_SYSTEM;
 		else if (!strcmp(reset_type, "soc"))
-			wdt->ctrl |= WDT_CTRL_RESET_MODE_SOC;
+			wdt->ctrl |= WDT_CTRL_RESET_MODE_SOC |
+				     WDT_CTRL_RESET_SYSTEM;
 		else if (!strcmp(reset_type, "system"))
-			wdt->ctrl |= WDT_CTRL_RESET_SYSTEM;
+			wdt->ctrl |= WDT_CTRL_RESET_MODE_FULL_CHIP |
+				     WDT_CTRL_RESET_SYSTEM;
 		else if (strcmp(reset_type, "none"))
 			return -EINVAL;
 	}
 	if (of_property_read_bool(np, "aspeed,external-signal"))
 		wdt->ctrl |= WDT_CTRL_WDT_EXT;
+	if (of_property_read_bool(np, "aspeed,alt-boot"))
+		wdt->ctrl |= WDT_CTRL_BOOT_SECONDARY;
 
 	if (readl(wdt->base + WDT_CTRL) & WDT_CTRL_ENABLE)  {
 		/*
diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index e12a797cb820..b45fc0aee667 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for Atmel AT91RM9200 (Thunder)
  *
  *  Copyright (C) 2003 SAN People (Pty) Ltd
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index 88c05d0448b2..f4050a229eb5 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for Atmel AT91SAM9x processors.
  *
  * Copyright (C) 2008 Renaud CERRATO r.cerrato@til-technologies.fr
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 /*
diff --git a/drivers/watchdog/at91sam9_wdt.h b/drivers/watchdog/at91sam9_wdt.h
index b79a83b467ce..390941c65eee 100644
--- a/drivers/watchdog/at91sam9_wdt.h
+++ b/drivers/watchdog/at91sam9_wdt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * drivers/watchdog/at91sam9_wdt.h
  *
@@ -7,10 +8,6 @@
  * Watchdog Timer (WDT) - System peripherals regsters.
  * Based on AT91SAM9261 datasheet revision D.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef AT91_WDT_H
diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c
index b339e0e67b4c..ed05514cc2dc 100644
--- a/drivers/watchdog/bcm2835_wdt.c
+++ b/drivers/watchdog/bcm2835_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for Broadcom BCM2835
  *
@@ -7,10 +8,6 @@
  *
  * Copyright (C) 2013 Lubomir Rintel <lkundrak@v3.sk>
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
  */
 
 #include <linux/delay.h>
diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c
index f41b756d6dd5..05425c1dfd4c 100644
--- a/drivers/watchdog/bcm47xx_wdt.c
+++ b/drivers/watchdog/bcm47xx_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  Watchdog driver for Broadcom BCM47XX
  *
@@ -5,10 +6,6 @@
  *  Copyright (C) 2009 Matthieu CASTET <castet.matthieu@free.fr>
  *  Copyright (C) 2012-2013 Hauke Mehrtens <hauke@hauke-m.de>
  *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/watchdog/bcm63xx_wdt.c b/drivers/watchdog/bcm63xx_wdt.c
index 8555afc70f9b..d3c1113e774c 100644
--- a/drivers/watchdog/bcm63xx_wdt.c
+++ b/drivers/watchdog/bcm63xx_wdt.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  Broadcom BCM63xx SoC watchdog driver
  *
  *  Copyright (C) 2007, Miguel Gaio <miguel.gaio@efixo.com>
  *  Copyright (C) 2008, Florian Fainelli <florian@openwrt.org>
  *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/watchdog/bcm7038_wdt.c b/drivers/watchdog/bcm7038_wdt.c
index f88f546e8050..ce3f646e8077 100644
--- a/drivers/watchdog/bcm7038_wdt.c
+++ b/drivers/watchdog/bcm7038_wdt.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Copyright (C) 2015 Broadcom Corporation
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
@@ -235,6 +227,6 @@ module_platform_driver(bcm7038_wdt_driver);
 module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Driver for Broadcom 7038 SoCs Watchdog");
 MODULE_AUTHOR("Justin Chen");
diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c
index a5775dfd8d5f..1462be9e6fc5 100644
--- a/drivers/watchdog/bcm_kona_wdt.c
+++ b/drivers/watchdog/bcm_kona_wdt.c
@@ -1,14 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2013 Broadcom Corporation
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/debugfs.h>
diff --git a/drivers/watchdog/cadence_wdt.c b/drivers/watchdog/cadence_wdt.c
index 064cf7b6c1c5..3ec1f418837d 100644
--- a/drivers/watchdog/cadence_wdt.c
+++ b/drivers/watchdog/cadence_wdt.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Cadence WDT driver - Used by Xilinx Zynq
  *
  * Copyright (C) 2010 - 2014 Xilinx, Inc.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/watchdog/coh901327_wdt.c b/drivers/watchdog/coh901327_wdt.c
index 4410337f4f7f..e3a78f927f83 100644
--- a/drivers/watchdog/coh901327_wdt.c
+++ b/drivers/watchdog/coh901327_wdt.c
@@ -1,8 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * coh901327_wdt.c
  *
  * Copyright (C) 2008-2009 ST-Ericsson AB
- * License terms: GNU General Public License (GPL) version 2
  * Watchdog driver for the ST-Ericsson AB COH 901 327 IP core
  * Author: Linus Walleij <linus.walleij@stericsson.com>
  */
@@ -67,7 +67,9 @@
 #define U300_WDOG_IFR_WILL_BARK_IRQ_FORCE_ENABLE			0x0001U
 
 /* Default timeout in seconds = 1 minute */
-static unsigned int margin = 60;
+#define U300_WDOG_DEFAULT_TIMEOUT					60
+
+static unsigned int margin;
 static int irq;
 static void __iomem *virtbase;
 static struct device *parent;
@@ -235,8 +237,9 @@ static struct watchdog_device coh901327_wdt = {
 	 * timeout register is max
 	 * 0x7FFF = 327670ms ~= 327s.
 	 */
-	.min_timeout = 0,
+	.min_timeout = 1,
 	.max_timeout = 327,
+	.timeout = U300_WDOG_DEFAULT_TIMEOUT,
 };
 
 static int __exit coh901327_remove(struct platform_device *pdev)
@@ -315,16 +318,15 @@ static int __init coh901327_probe(struct platform_device *pdev)
 		goto out_no_irq;
 	}
 
-	ret = watchdog_init_timeout(&coh901327_wdt, margin, dev);
-	if (ret < 0)
-		coh901327_wdt.timeout = 60;
+	watchdog_init_timeout(&coh901327_wdt, margin, dev);
 
 	coh901327_wdt.parent = dev;
 	ret = watchdog_register_device(&coh901327_wdt);
 	if (ret)
 		goto out_no_wdog;
 
-	dev_info(dev, "initialized. timer margin=%d sec\n", margin);
+	dev_info(dev, "initialized. (timeout=%d sec)\n",
+			coh901327_wdt.timeout);
 	return 0;
 
 out_no_wdog:
@@ -419,5 +421,5 @@ MODULE_DESCRIPTION("COH 901 327 Watchdog");
 module_param(margin, uint, 0);
 MODULE_PARM_DESC(margin, "Watchdog margin in seconds (default 60s)");
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:coh901327-watchdog");
diff --git a/drivers/watchdog/da9052_wdt.c b/drivers/watchdog/da9052_wdt.c
index d6d5006efa71..e263bad99574 100644
--- a/drivers/watchdog/da9052_wdt.c
+++ b/drivers/watchdog/da9052_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * System monitoring driver for DA9052 PMICs.
  *
@@ -5,11 +6,6 @@
  *
  * Author: Anthony Olech <Anthony.Olech@diasemi.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
  */
 
 #include <linux/module.h>
diff --git a/drivers/watchdog/da9055_wdt.c b/drivers/watchdog/da9055_wdt.c
index 50bdd1022186..26a5b2984094 100644
--- a/drivers/watchdog/da9055_wdt.c
+++ b/drivers/watchdog/da9055_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * System monitoring driver for DA9055 PMICs.
  *
@@ -5,11 +6,6 @@
  *
  * Author: David Dajun Chen <dchen@diasemi.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
  */
 
 #include <linux/module.h>
diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c
index 814dff6045a4..a001782bbfdb 100644
--- a/drivers/watchdog/da9062_wdt.c
+++ b/drivers/watchdog/da9062_wdt.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog device driver for DA9062 and DA9061 PMICs
  * Copyright (C) 2015  Dialog Semiconductor Ltd.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/watchdog/da9063_wdt.c b/drivers/watchdog/da9063_wdt.c
index 2a20fc163ed0..b17ac1bb1f28 100644
--- a/drivers/watchdog/da9063_wdt.c
+++ b/drivers/watchdog/da9063_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for DA9063 PMICs.
  *
@@ -5,10 +6,6 @@
  *
  * Author: Mariusz Wojtasik <mariusz.wojtasik@diasemi.com>
  *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c
index 3e4c592c239f..6c6594261cb7 100644
--- a/drivers/watchdog/davinci_wdt.c
+++ b/drivers/watchdog/davinci_wdt.c
@@ -236,15 +236,22 @@ static int davinci_wdt_probe(struct platform_device *pdev)
 
 	wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	davinci_wdt->base = devm_ioremap_resource(dev, wdt_mem);
-	if (IS_ERR(davinci_wdt->base))
-		return PTR_ERR(davinci_wdt->base);
+	if (IS_ERR(davinci_wdt->base)) {
+		ret = PTR_ERR(davinci_wdt->base);
+		goto err_clk_disable;
+	}
 
 	ret = watchdog_register_device(wdd);
-	if (ret < 0) {
-		clk_disable_unprepare(davinci_wdt->clk);
+	if (ret) {
 		dev_err(dev, "cannot register watchdog device\n");
+		goto err_clk_disable;
 	}
 
+	return 0;
+
+err_clk_disable:
+	clk_disable_unprepare(davinci_wdt->clk);
+
 	return ret;
 }
 
diff --git a/drivers/watchdog/digicolor_wdt.c b/drivers/watchdog/digicolor_wdt.c
index 5e4ef93caa02..a9e11df155b8 100644
--- a/drivers/watchdog/digicolor_wdt.c
+++ b/drivers/watchdog/digicolor_wdt.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for Conexant Digicolor
  *
  * Copyright (C) 2015 Paradox Innovation Ltd.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
  */
 
 #include <linux/types.h>
diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c
index c2f4ff516230..501aebb5b81f 100644
--- a/drivers/watchdog/dw_wdt.c
+++ b/drivers/watchdog/dw_wdt.c
@@ -34,6 +34,7 @@
 
 #define WDOG_CONTROL_REG_OFFSET		    0x00
 #define WDOG_CONTROL_REG_WDT_EN_MASK	    0x01
+#define WDOG_CONTROL_REG_RESP_MODE_MASK	    0x02
 #define WDOG_TIMEOUT_RANGE_REG_OFFSET	    0x04
 #define WDOG_TIMEOUT_RANGE_TOPINIT_SHIFT    4
 #define WDOG_CURRENT_COUNT_REG_OFFSET	    0x08
@@ -56,6 +57,9 @@ struct dw_wdt {
 	unsigned long		rate;
 	struct watchdog_device	wdd;
 	struct reset_control	*rst;
+	/* Save/restore */
+	u32			control;
+	u32			timeout;
 };
 
 #define to_dw_wdt(wdd)	container_of(wdd, struct dw_wdt, wdd)
@@ -121,14 +125,23 @@ static int dw_wdt_set_timeout(struct watchdog_device *wdd, unsigned int top_s)
 	return 0;
 }
 
+static void dw_wdt_arm_system_reset(struct dw_wdt *dw_wdt)
+{
+	u32 val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+
+	/* Disable interrupt mode; always perform system reset. */
+	val &= ~WDOG_CONTROL_REG_RESP_MODE_MASK;
+	/* Enable watchdog. */
+	val |= WDOG_CONTROL_REG_WDT_EN_MASK;
+	writel(val, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+}
+
 static int dw_wdt_start(struct watchdog_device *wdd)
 {
 	struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
 
 	dw_wdt_set_timeout(wdd, wdd->timeout);
-
-	writel(WDOG_CONTROL_REG_WDT_EN_MASK,
-	       dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+	dw_wdt_arm_system_reset(dw_wdt);
 
 	return 0;
 }
@@ -152,16 +165,13 @@ static int dw_wdt_restart(struct watchdog_device *wdd,
 			  unsigned long action, void *data)
 {
 	struct dw_wdt *dw_wdt = to_dw_wdt(wdd);
-	u32 val;
 
 	writel(0, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
-	val = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
-	if (val & WDOG_CONTROL_REG_WDT_EN_MASK)
+	if (dw_wdt_is_enabled(dw_wdt))
 		writel(WDOG_COUNTER_RESTART_KICK_VALUE,
 		       dw_wdt->regs + WDOG_COUNTER_RESTART_REG_OFFSET);
 	else
-		writel(WDOG_CONTROL_REG_WDT_EN_MASK,
-		       dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+		dw_wdt_arm_system_reset(dw_wdt);
 
 	/* wait for reset to assert... */
 	mdelay(500);
@@ -198,6 +208,9 @@ static int dw_wdt_suspend(struct device *dev)
 {
 	struct dw_wdt *dw_wdt = dev_get_drvdata(dev);
 
+	dw_wdt->control = readl(dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+	dw_wdt->timeout = readl(dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+
 	clk_disable_unprepare(dw_wdt->clk);
 
 	return 0;
@@ -211,6 +224,9 @@ static int dw_wdt_resume(struct device *dev)
 	if (err)
 		return err;
 
+	writel(dw_wdt->timeout, dw_wdt->regs + WDOG_TIMEOUT_RANGE_REG_OFFSET);
+	writel(dw_wdt->control, dw_wdt->regs + WDOG_CONTROL_REG_OFFSET);
+
 	dw_wdt_ping(&dw_wdt->wdd);
 
 	return 0;
diff --git a/drivers/watchdog/ebc-c384_wdt.c b/drivers/watchdog/ebc-c384_wdt.c
index 2170b275ea01..4c4c8ce78021 100644
--- a/drivers/watchdog/ebc-c384_wdt.c
+++ b/drivers/watchdog/ebc-c384_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Watchdog timer driver for the WinSystems EBC-C384
  * Copyright (C) 2016 William Breathitt Gray
diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c
index 3a33c5344bd5..9a1c761258ce 100644
--- a/drivers/watchdog/f71808e_wdt.c
+++ b/drivers/watchdog/f71808e_wdt.c
@@ -496,7 +496,7 @@ static bool watchdog_is_running(void)
 
 	is_running = (superio_inb(watchdog.sioaddr, SIO_REG_ENABLE) & BIT(0))
 		&& (superio_inb(watchdog.sioaddr, F71808FG_REG_WDT_CONF)
-			& F71808FG_FLAG_WD_EN);
+			& BIT(F71808FG_FLAG_WD_EN));
 
 	superio_exit(watchdog.sioaddr);
 
diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 3ade28190341..ea77cae03c9d 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c
@@ -152,9 +152,9 @@ static int gpio_wdt_probe(struct platform_device *pdev)
 	priv->wdd.min_timeout	= SOFT_TIMEOUT_MIN;
 	priv->wdd.max_hw_heartbeat_ms = hw_margin;
 	priv->wdd.parent	= dev;
+	priv->wdd.timeout	= SOFT_TIMEOUT_DEF;
 
-	if (watchdog_init_timeout(&priv->wdd, 0, dev) < 0)
-		priv->wdd.timeout = SOFT_TIMEOUT_DEF;
+	watchdog_init_timeout(&priv->wdd, 0, &pdev->dev);
 
 	watchdog_stop_on_reboot(&priv->wdd);
 
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index b0a158073abd..a43ab2cecca2 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -4,7 +4,7 @@
  *
  *	SoftDog	0.05:	A Software Watchdog Device
  *
- *	(c) Copyright 2015 Hewlett Packard Enterprise Development LP
+ *	(c) Copyright 2018 Hewlett Packard Enterprise Development LP
  *	Thomas Mingarelli <thomas.mingarelli@hpe.com>
  *
  *	This program is free software; you can redistribute it and/or
@@ -16,34 +16,27 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/device.h>
-#include <linux/fs.h>
 #include <linux/io.h>
-#include <linux/bitops.h>
 #include <linux/kernel.h>
-#include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <linux/types.h>
-#include <linux/uaccess.h>
 #include <linux/watchdog.h>
 #include <asm/nmi.h>
 
-#define HPWDT_VERSION			"1.4.0"
+#define HPWDT_VERSION			"2.0.0"
 #define SECS_TO_TICKS(secs)		((secs) * 1000 / 128)
 #define TICKS_TO_SECS(ticks)		((ticks) * 128 / 1000)
 #define HPWDT_MAX_TIMER			TICKS_TO_SECS(65535)
 #define DEFAULT_MARGIN			30
+#define PRETIMEOUT_SEC			9
 
+static bool ilo5;
 static unsigned int soft_margin = DEFAULT_MARGIN;	/* in seconds */
-static unsigned int reload;			/* the computed soft_margin */
 static bool nowayout = WATCHDOG_NOWAYOUT;
-#ifdef CONFIG_HPWDT_NMI_DECODING
-static unsigned int allow_kdump = 1;
-#endif
-static char expect_release;
-static unsigned long hpwdt_is_open;
+static bool pretimeout = IS_ENABLED(CONFIG_HPWDT_NMI_DECODING);
 
 static void __iomem *pci_mem_addr;		/* the PCI-memory address */
 static unsigned long __iomem *hpwdt_nmistat;
@@ -61,48 +54,92 @@ MODULE_DEVICE_TABLE(pci, hpwdt_devices);
 /*
  *	Watchdog operations
  */
-static void hpwdt_start(void)
+static int hpwdt_start(struct watchdog_device *wdd)
 {
-	reload = SECS_TO_TICKS(soft_margin);
+	int control = 0x81 | (pretimeout ? 0x4 : 0);
+	int reload = SECS_TO_TICKS(wdd->timeout);
+
+	dev_dbg(wdd->parent, "start watchdog 0x%08x:0x%02x\n", reload, control);
 	iowrite16(reload, hpwdt_timer_reg);
-	iowrite8(0x85, hpwdt_timer_con);
+	iowrite8(control, hpwdt_timer_con);
+
+	return 0;
 }
 
 static void hpwdt_stop(void)
 {
 	unsigned long data;
 
+	pr_debug("stop  watchdog\n");
+
 	data = ioread8(hpwdt_timer_con);
 	data &= 0xFE;
 	iowrite8(data, hpwdt_timer_con);
 }
 
-static void hpwdt_ping(void)
+static int hpwdt_stop_core(struct watchdog_device *wdd)
 {
-	iowrite16(reload, hpwdt_timer_reg);
+	hpwdt_stop();
+
+	return 0;
 }
 
-static int hpwdt_change_timer(int new_margin)
+static int hpwdt_ping(struct watchdog_device *wdd)
 {
-	if (new_margin < 1 || new_margin > HPWDT_MAX_TIMER) {
-		pr_warn("New value passed in is invalid: %d seconds\n",
-			new_margin);
-		return -EINVAL;
-	}
+	int reload = SECS_TO_TICKS(wdd->timeout);
 
-	soft_margin = new_margin;
-	pr_debug("New timer passed in is %d seconds\n", new_margin);
-	reload = SECS_TO_TICKS(soft_margin);
+	dev_dbg(wdd->parent, "ping  watchdog 0x%08x\n", reload);
+	iowrite16(reload, hpwdt_timer_reg);
 
 	return 0;
 }
 
-static int hpwdt_time_left(void)
+static unsigned int hpwdt_gettimeleft(struct watchdog_device *wdd)
 {
 	return TICKS_TO_SECS(ioread16(hpwdt_timer_reg));
 }
 
+static int hpwdt_settimeout(struct watchdog_device *wdd, unsigned int val)
+{
+	dev_dbg(wdd->parent, "set_timeout = %d\n", val);
+
+	wdd->timeout = val;
+	if (val <= wdd->pretimeout) {
+		dev_dbg(wdd->parent, "pretimeout < timeout. Setting to zero\n");
+		wdd->pretimeout = 0;
+		pretimeout = 0;
+		if (watchdog_active(wdd))
+			hpwdt_start(wdd);
+	}
+	hpwdt_ping(wdd);
+
+	return 0;
+}
+
 #ifdef CONFIG_HPWDT_NMI_DECODING
+static int hpwdt_set_pretimeout(struct watchdog_device *wdd, unsigned int req)
+{
+	unsigned int val = 0;
+
+	dev_dbg(wdd->parent, "set_pretimeout = %d\n", req);
+	if (req) {
+		val = PRETIMEOUT_SEC;
+		if (val >= wdd->timeout)
+			return -EINVAL;
+	}
+
+	if (val != req)
+		dev_dbg(wdd->parent, "Rounding pretimeout to: %d\n", val);
+
+	wdd->pretimeout = val;
+	pretimeout = !!val;
+
+	if (watchdog_active(wdd))
+		hpwdt_start(wdd);
+
+	return 0;
+}
+
 static int hpwdt_my_nmi(void)
 {
 	return ioread8(hpwdt_nmistat) & 0x6;
@@ -113,178 +150,71 @@ static int hpwdt_my_nmi(void)
  */
 static int hpwdt_pretimeout(unsigned int ulReason, struct pt_regs *regs)
 {
-	if ((ulReason == NMI_UNKNOWN) && !hpwdt_my_nmi())
-		return NMI_DONE;
-
-	if (allow_kdump)
-		hpwdt_stop();
-
-	nmi_panic(regs, "An NMI occurred. Depending on your system the reason "
-		"for the NMI is logged in any one of the following "
-		"resources:\n"
+	unsigned int mynmi = hpwdt_my_nmi();
+	static char panic_msg[] =
+		"00: An NMI occurred. Depending on your system the reason "
+		"for the NMI is logged in any one of the following resources:\n"
 		"1. Integrated Management Log (IML)\n"
 		"2. OA Syslog\n"
 		"3. OA Forward Progress Log\n"
-		"4. iLO Event Log");
-
-	return NMI_HANDLED;
-}
-#endif /* CONFIG_HPWDT_NMI_DECODING */
-
-/*
- *	/dev/watchdog handling
- */
-static int hpwdt_open(struct inode *inode, struct file *file)
-{
-	/* /dev/watchdog can only be opened once */
-	if (test_and_set_bit(0, &hpwdt_is_open))
-		return -EBUSY;
+		"4. iLO Event Log";
 
-	/* Start the watchdog */
-	hpwdt_start();
-	hpwdt_ping();
-
-	return nonseekable_open(inode, file);
-}
+	if (ilo5 && ulReason == NMI_UNKNOWN && mynmi)
+		return NMI_DONE;
 
-static int hpwdt_release(struct inode *inode, struct file *file)
-{
-	/* Stop the watchdog */
-	if (expect_release == 42) {
-		hpwdt_stop();
-	} else {
-		pr_crit("Unexpected close, not stopping watchdog!\n");
-		hpwdt_ping();
-	}
+	if (ilo5 && !pretimeout)
+		return NMI_DONE;
 
-	expect_release = 0;
+	hpwdt_stop();
 
-	/* /dev/watchdog is being closed, make sure it can be re-opened */
-	clear_bit(0, &hpwdt_is_open);
+	hex_byte_pack(panic_msg, mynmi);
+	nmi_panic(regs, panic_msg);
 
-	return 0;
+	return NMI_HANDLED;
 }
+#endif /* CONFIG_HPWDT_NMI_DECODING */
 
-static ssize_t hpwdt_write(struct file *file, const char __user *data,
-	size_t len, loff_t *ppos)
-{
-	/* See if we got the magic character 'V' and reload the timer */
-	if (len) {
-		if (!nowayout) {
-			size_t i;
-
-			/* note: just in case someone wrote the magic character
-			 * five months ago... */
-			expect_release = 0;
-
-			/* scan to see whether or not we got the magic char. */
-			for (i = 0; i != len; i++) {
-				char c;
-				if (get_user(c, data + i))
-					return -EFAULT;
-				if (c == 'V')
-					expect_release = 42;
-			}
-		}
-
-		/* someone wrote to us, we should reload the timer */
-		hpwdt_ping();
-	}
-
-	return len;
-}
 
 static const struct watchdog_info ident = {
-	.options = WDIOF_SETTIMEOUT |
+	.options = WDIOF_PRETIMEOUT    |
+		   WDIOF_SETTIMEOUT    |
 		   WDIOF_KEEPALIVEPING |
 		   WDIOF_MAGICCLOSE,
 	.identity = "HPE iLO2+ HW Watchdog Timer",
 };
 
-static long hpwdt_ioctl(struct file *file, unsigned int cmd,
-	unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int __user *p = argp;
-	int new_margin, options;
-	int ret = -ENOTTY;
-
-	switch (cmd) {
-	case WDIOC_GETSUPPORT:
-		ret = 0;
-		if (copy_to_user(argp, &ident, sizeof(ident)))
-			ret = -EFAULT;
-		break;
-
-	case WDIOC_GETSTATUS:
-	case WDIOC_GETBOOTSTATUS:
-		ret = put_user(0, p);
-		break;
-
-	case WDIOC_KEEPALIVE:
-		hpwdt_ping();
-		ret = 0;
-		break;
-
-	case WDIOC_SETOPTIONS:
-		ret = get_user(options, p);
-		if (ret)
-			break;
-
-		if (options & WDIOS_DISABLECARD)
-			hpwdt_stop();
-
-		if (options & WDIOS_ENABLECARD) {
-			hpwdt_start();
-			hpwdt_ping();
-		}
-		break;
-
-	case WDIOC_SETTIMEOUT:
-		ret = get_user(new_margin, p);
-		if (ret)
-			break;
-
-		ret = hpwdt_change_timer(new_margin);
-		if (ret)
-			break;
-
-		hpwdt_ping();
-		/* Fall */
-	case WDIOC_GETTIMEOUT:
-		ret = put_user(soft_margin, p);
-		break;
-
-	case WDIOC_GETTIMELEFT:
-		ret = put_user(hpwdt_time_left(), p);
-		break;
-	}
-	return ret;
-}
-
 /*
  *	Kernel interfaces
  */
-static const struct file_operations hpwdt_fops = {
-	.owner = THIS_MODULE,
-	.llseek = no_llseek,
-	.write = hpwdt_write,
-	.unlocked_ioctl = hpwdt_ioctl,
-	.open = hpwdt_open,
-	.release = hpwdt_release,
+
+static const struct watchdog_ops hpwdt_ops = {
+	.owner		= THIS_MODULE,
+	.start		= hpwdt_start,
+	.stop		= hpwdt_stop_core,
+	.ping		= hpwdt_ping,
+	.set_timeout	= hpwdt_settimeout,
+	.get_timeleft	= hpwdt_gettimeleft,
+#ifdef CONFIG_HPWDT_NMI_DECODING
+	.set_pretimeout	= hpwdt_set_pretimeout,
+#endif
 };
 
-static struct miscdevice hpwdt_miscdev = {
-	.minor = WATCHDOG_MINOR,
-	.name = "watchdog",
-	.fops = &hpwdt_fops,
+static struct watchdog_device hpwdt_dev = {
+	.info		= &ident,
+	.ops		= &hpwdt_ops,
+	.min_timeout	= 1,
+	.max_timeout	= HPWDT_MAX_TIMER,
+	.timeout	= DEFAULT_MARGIN,
+#ifdef CONFIG_HPWDT_NMI_DECODING
+	.pretimeout	= PRETIMEOUT_SEC,
+#endif
 };
 
+
 /*
  *	Init & Exit
  */
 
-
 static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 {
 #ifdef CONFIG_HPWDT_NMI_DECODING
@@ -303,9 +233,8 @@ static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 		goto error2;
 
 	dev_info(&dev->dev,
-			"HPE Watchdog Timer Driver: NMI decoding initialized"
-			", allow kernel dump: %s (default = 1/ON)\n",
-			(allow_kdump == 0) ? "OFF" : "ON");
+		"HPE Watchdog Timer Driver: NMI decoding initialized\n");
+
 	return 0;
 
 error2:
@@ -375,29 +304,32 @@ static int hpwdt_init_one(struct pci_dev *dev,
 	/* Make sure that timer is disabled until /dev/watchdog is opened */
 	hpwdt_stop();
 
-	/* Make sure that we have a valid soft_margin */
-	if (hpwdt_change_timer(soft_margin))
-		hpwdt_change_timer(DEFAULT_MARGIN);
-
 	/* Initialize NMI Decoding functionality */
 	retval = hpwdt_init_nmi_decoding(dev);
 	if (retval != 0)
 		goto error_init_nmi_decoding;
 
-	retval = misc_register(&hpwdt_miscdev);
+	watchdog_set_nowayout(&hpwdt_dev, nowayout);
+	if (watchdog_init_timeout(&hpwdt_dev, soft_margin, NULL))
+		dev_warn(&dev->dev, "Invalid soft_margin: %d.\n", soft_margin);
+
+	hpwdt_dev.parent = &dev->dev;
+	retval = watchdog_register_device(&hpwdt_dev);
 	if (retval < 0) {
-		dev_warn(&dev->dev,
-			"Unable to register miscdev on minor=%d (err=%d).\n",
-			WATCHDOG_MINOR, retval);
-		goto error_misc_register;
+		dev_err(&dev->dev, "watchdog register failed: %d.\n", retval);
+		goto error_wd_register;
 	}
 
 	dev_info(&dev->dev, "HPE Watchdog Timer Driver: %s"
 			", timer margin: %d seconds (nowayout=%d).\n",
-			HPWDT_VERSION, soft_margin, nowayout);
+			HPWDT_VERSION, hpwdt_dev.timeout, nowayout);
+
+	if (dev->subsystem_vendor == PCI_VENDOR_ID_HP_3PAR)
+		ilo5 = true;
+
 	return 0;
 
-error_misc_register:
+error_wd_register:
 	hpwdt_exit_nmi_decoding();
 error_init_nmi_decoding:
 	pci_iounmap(dev, pci_mem_addr);
@@ -411,7 +343,7 @@ static void hpwdt_exit(struct pci_dev *dev)
 	if (!nowayout)
 		hpwdt_stop();
 
-	misc_deregister(&hpwdt_miscdev);
+	watchdog_unregister_device(&hpwdt_dev);
 	hpwdt_exit_nmi_decoding();
 	pci_iounmap(dev, pci_mem_addr);
 	pci_disable_device(dev);
@@ -425,7 +357,7 @@ static struct pci_driver hpwdt_driver = {
 };
 
 MODULE_AUTHOR("Tom Mingarelli");
-MODULE_DESCRIPTION("hp watchdog driver");
+MODULE_DESCRIPTION("hpe watchdog driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(HPWDT_VERSION);
 
@@ -437,8 +369,8 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 		__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
 #ifdef CONFIG_HPWDT_NMI_DECODING
-module_param(allow_kdump, int, 0);
-MODULE_PARM_DESC(allow_kdump, "Start a kernel dump after NMI occurs");
-#endif /* CONFIG_HPWDT_NMI_DECODING */
+module_param(pretimeout, bool, 0);
+MODULE_PARM_DESC(pretimeout, "Watchdog pretimeout enabled");
+#endif
 
 module_pci_driver(hpwdt_driver);
diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 518dfa1047cb..f07850d2c977 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c
@@ -76,7 +76,7 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
 
-static unsigned timeout = IMX2_WDT_DEFAULT_TIME;
+static unsigned timeout;
 module_param(timeout, uint, 0);
 MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default="
 				__MODULE_STRING(IMX2_WDT_DEFAULT_TIME) ")");
@@ -281,6 +281,7 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
 	wdog->info		= &imx2_wdt_info;
 	wdog->ops		= &imx2_wdt_ops;
 	wdog->min_timeout	= 1;
+	wdog->timeout		= IMX2_WDT_DEFAULT_TIME;
 	wdog->max_hw_heartbeat_ms = IMX2_WDT_MAX_TIME * 1000;
 	wdog->parent		= &pdev->dev;
 
@@ -299,11 +300,6 @@ static int __init imx2_wdt_probe(struct platform_device *pdev)
 
 	wdev->ext_reset = of_property_read_bool(pdev->dev.of_node,
 						"fsl,ext-reset-output");
-	wdog->timeout = clamp_t(unsigned, timeout, 1, IMX2_WDT_MAX_TIME);
-	if (wdog->timeout != timeout)
-		dev_warn(&pdev->dev, "Initial timeout out of range! Clamped from %u to %u\n",
-			 timeout, wdog->timeout);
-
 	platform_set_drvdata(pdev, wdog);
 	watchdog_set_drvdata(wdog, wdev);
 	watchdog_set_nowayout(wdog, nowayout);
diff --git a/drivers/watchdog/lpc18xx_wdt.c b/drivers/watchdog/lpc18xx_wdt.c
index b4221f43cd94..331cadb459ac 100644
--- a/drivers/watchdog/lpc18xx_wdt.c
+++ b/drivers/watchdog/lpc18xx_wdt.c
@@ -265,7 +265,7 @@ static int lpc18xx_wdt_probe(struct platform_device *pdev)
 	lpc18xx_wdt->wdt_dev.parent = dev;
 	watchdog_set_drvdata(&lpc18xx_wdt->wdt_dev, lpc18xx_wdt);
 
-	ret = watchdog_init_timeout(&lpc18xx_wdt->wdt_dev, heartbeat, dev);
+	watchdog_init_timeout(&lpc18xx_wdt->wdt_dev, heartbeat, dev);
 
 	__lpc18xx_wdt_set_timeout(lpc18xx_wdt);
 
diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c
index b8194b02abe0..8023cf28657a 100644
--- a/drivers/watchdog/mei_wdt.c
+++ b/drivers/watchdog/mei_wdt.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel Management Engine Interface (Intel MEI) Linux driver
  * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
  */
 
 #include <linux/module.h>
@@ -687,5 +679,5 @@ static struct mei_cl_driver mei_wdt_driver = {
 module_mei_cl_driver(mei_wdt_driver);
 
 MODULE_AUTHOR("Intel Corporation");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Device driver for Intel MEI iAMT watchdog");
diff --git a/drivers/watchdog/mena21_wdt.c b/drivers/watchdog/mena21_wdt.c
index 045201a6fdb3..25d5d2b8cfbe 100644
--- a/drivers/watchdog/mena21_wdt.c
+++ b/drivers/watchdog/mena21_wdt.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for the A21 VME CPU Boards
  *
  * Copyright (C) 2013 MEN Mikro Elektronik Nuernberg GmbH
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation
  */
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c
index 69a5a57f1446..69adeab3fde7 100644
--- a/drivers/watchdog/meson_gxbb_wdt.c
+++ b/drivers/watchdog/meson_gxbb_wdt.c
@@ -1,56 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 /*
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
  * Copyright (c) 2016 BayLibre, SAS.
  * Author: Neil Armstrong <narmstrong@baylibre.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- * The full GNU General Public License is included in this distribution
- * in the file called COPYING.
- *
- * BSD LICENSE
- *
- * Copyright (c) 2016 BayLibre, SAS.
- * Author: Neil Armstrong <narmstrong@baylibre.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <linux/clk.h>
 #include <linux/err.h>
diff --git a/drivers/watchdog/meson_wdt.c b/drivers/watchdog/meson_wdt.c
index 304274c67735..cd0275a6cdac 100644
--- a/drivers/watchdog/meson_wdt.c
+++ b/drivers/watchdog/meson_wdt.c
@@ -36,7 +36,7 @@
 #define MESON_SEC_TO_TC(s, c)	((s) * (c))
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
-static unsigned int timeout = MESON_WDT_TIMEOUT;
+static unsigned int timeout;
 
 struct meson_wdt_data {
 	unsigned int enable;
diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index 7ed417a765c7..4baf64f21aa1 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Mediatek Watchdog Driver
  *
@@ -5,16 +6,6 @@
  *
  * Matthias Brugger <matthias.bgg@gmail.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Based on sunxi_wdt.c
  */
 
@@ -57,7 +48,7 @@
 #define DRV_VERSION		"1.0"
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
-static unsigned int timeout = WDT_MAX_TIMEOUT;
+static unsigned int timeout;
 
 struct mtk_wdt_dev {
 	struct watchdog_device wdt_dev;
diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c
index ca360d204548..1fa7d2b32494 100644
--- a/drivers/watchdog/mtx-1_wdt.c
+++ b/drivers/watchdog/mtx-1_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *      Driver for the MTX-1 Watchdog.
  *
@@ -6,16 +7,6 @@
  *                              http://www.4g-systems.biz
  *
  *	(C) Copyright 2007 OpenWrt.org, Florian Fainelli <florian@openwrt.org>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- *      Neither Michael Stickel nor 4G Systems admit liability nor provide
- *      warranty for any of this software. This material is provided
- *      "AS-IS" and at no charge.
- *
  *      (c) Copyright 2005    4G Systems <info@4g-systems.biz>
  *
  *      Release 0.01.
diff --git a/drivers/watchdog/npcm_wdt.c b/drivers/watchdog/npcm_wdt.c
new file mode 100644
index 000000000000..0d4213652ecc
--- /dev/null
+++ b/drivers/watchdog/npcm_wdt.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Nuvoton Technology corporation.
+// Copyright (c) 2018 IBM Corp.
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/watchdog.h>
+
+#define NPCM_WTCR	0x1C
+
+#define NPCM_WTCLK	(BIT(10) | BIT(11))	/* Clock divider */
+#define NPCM_WTE	BIT(7)			/* Enable */
+#define NPCM_WTIE	BIT(6)			/* Enable irq */
+#define NPCM_WTIS	(BIT(4) | BIT(5))	/* Interval selection */
+#define NPCM_WTIF	BIT(3)			/* Interrupt flag*/
+#define NPCM_WTRF	BIT(2)			/* Reset flag */
+#define NPCM_WTRE	BIT(1)			/* Reset enable */
+#define NPCM_WTR	BIT(0)			/* Reset counter */
+
+/*
+ * Watchdog timeouts
+ *
+ * 170     msec:    WTCLK=01 WTIS=00     VAL= 0x400
+ * 670     msec:    WTCLK=01 WTIS=01     VAL= 0x410
+ * 1360    msec:    WTCLK=10 WTIS=00     VAL= 0x800
+ * 2700    msec:    WTCLK=01 WTIS=10     VAL= 0x420
+ * 5360    msec:    WTCLK=10 WTIS=01     VAL= 0x810
+ * 10700   msec:    WTCLK=01 WTIS=11     VAL= 0x430
+ * 21600   msec:    WTCLK=10 WTIS=10     VAL= 0x820
+ * 43000   msec:    WTCLK=11 WTIS=00     VAL= 0xC00
+ * 85600   msec:    WTCLK=10 WTIS=11     VAL= 0x830
+ * 172000  msec:    WTCLK=11 WTIS=01     VAL= 0xC10
+ * 687000  msec:    WTCLK=11 WTIS=10     VAL= 0xC20
+ * 2750000 msec:    WTCLK=11 WTIS=11     VAL= 0xC30
+ */
+
+struct npcm_wdt {
+	struct watchdog_device  wdd;
+	void __iomem		*reg;
+};
+
+static inline struct npcm_wdt *to_npcm_wdt(struct watchdog_device *wdd)
+{
+	return container_of(wdd, struct npcm_wdt, wdd);
+}
+
+static int npcm_wdt_ping(struct watchdog_device *wdd)
+{
+	struct npcm_wdt *wdt = to_npcm_wdt(wdd);
+	u32 val;
+
+	val = readl(wdt->reg);
+	writel(val | NPCM_WTR, wdt->reg);
+
+	return 0;
+}
+
+static int npcm_wdt_start(struct watchdog_device *wdd)
+{
+	struct npcm_wdt *wdt = to_npcm_wdt(wdd);
+	u32 val;
+
+	if (wdd->timeout < 2)
+		val = 0x800;
+	else if (wdd->timeout < 3)
+		val = 0x420;
+	else if (wdd->timeout < 6)
+		val = 0x810;
+	else if (wdd->timeout < 11)
+		val = 0x430;
+	else if (wdd->timeout < 22)
+		val = 0x820;
+	else if (wdd->timeout < 44)
+		val = 0xC00;
+	else if (wdd->timeout < 87)
+		val = 0x830;
+	else if (wdd->timeout < 173)
+		val = 0xC10;
+	else if (wdd->timeout < 688)
+		val = 0xC20;
+	else
+		val = 0xC30;
+
+	val |= NPCM_WTRE | NPCM_WTE | NPCM_WTR | NPCM_WTIE;
+
+	writel(val, wdt->reg);
+
+	return 0;
+}
+
+static int npcm_wdt_stop(struct watchdog_device *wdd)
+{
+	struct npcm_wdt *wdt = to_npcm_wdt(wdd);
+
+	writel(0, wdt->reg);
+
+	return 0;
+}
+
+
+static int npcm_wdt_set_timeout(struct watchdog_device *wdd,
+				unsigned int timeout)
+{
+	if (timeout < 2)
+		wdd->timeout = 1;
+	else if (timeout < 3)
+	      wdd->timeout = 2;
+	else if (timeout < 6)
+	      wdd->timeout = 5;
+	else if (timeout < 11)
+	      wdd->timeout = 10;
+	else if (timeout < 22)
+	      wdd->timeout = 21;
+	else if (timeout < 44)
+	      wdd->timeout = 43;
+	else if (timeout < 87)
+	      wdd->timeout = 86;
+	else if (timeout < 173)
+	      wdd->timeout = 172;
+	else if (timeout < 688)
+	      wdd->timeout = 687;
+	else
+	      wdd->timeout = 2750;
+
+	if (watchdog_active(wdd))
+		npcm_wdt_start(wdd);
+
+	return 0;
+}
+
+static irqreturn_t npcm_wdt_interrupt(int irq, void *data)
+{
+	struct npcm_wdt *wdt = data;
+
+	watchdog_notify_pretimeout(&wdt->wdd);
+
+	return IRQ_HANDLED;
+}
+
+static int npcm_wdt_restart(struct watchdog_device *wdd,
+			    unsigned long action, void *data)
+{
+	struct npcm_wdt *wdt = to_npcm_wdt(wdd);
+
+	writel(NPCM_WTR | NPCM_WTRE | NPCM_WTE, wdt->reg);
+	udelay(1000);
+
+	return 0;
+}
+
+static bool npcm_is_running(struct watchdog_device *wdd)
+{
+	struct npcm_wdt *wdt = to_npcm_wdt(wdd);
+
+	return readl(wdt->reg) & NPCM_WTE;
+}
+
+static const struct watchdog_info npcm_wdt_info = {
+	.identity	= KBUILD_MODNAME,
+	.options	= WDIOF_SETTIMEOUT
+			| WDIOF_KEEPALIVEPING
+			| WDIOF_MAGICCLOSE,
+};
+
+static const struct watchdog_ops npcm_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = npcm_wdt_start,
+	.stop = npcm_wdt_stop,
+	.ping = npcm_wdt_ping,
+	.set_timeout = npcm_wdt_set_timeout,
+	.restart = npcm_wdt_restart,
+};
+
+static int npcm_wdt_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct npcm_wdt *wdt;
+	struct resource *res;
+	int irq;
+	int ret;
+
+	wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+	if (!wdt)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	wdt->reg = devm_ioremap_resource(dev, res);
+	if (IS_ERR(wdt->reg))
+		return PTR_ERR(wdt->reg);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	wdt->wdd.info = &npcm_wdt_info;
+	wdt->wdd.ops = &npcm_wdt_ops;
+	wdt->wdd.min_timeout = 1;
+	wdt->wdd.max_timeout = 2750;
+	wdt->wdd.parent = dev;
+
+	wdt->wdd.timeout = 86;
+	watchdog_init_timeout(&wdt->wdd, 0, dev);
+
+	/* Ensure timeout is able to be represented by the hardware */
+	npcm_wdt_set_timeout(&wdt->wdd, wdt->wdd.timeout);
+
+	if (npcm_is_running(&wdt->wdd)) {
+		/* Restart with the default or device-tree specified timeout */
+		npcm_wdt_start(&wdt->wdd);
+		set_bit(WDOG_HW_RUNNING, &wdt->wdd.status);
+	}
+
+	ret = devm_request_irq(dev, irq, npcm_wdt_interrupt, 0,
+			       "watchdog", wdt);
+	if (ret)
+		return ret;
+
+	ret = devm_watchdog_register_device(dev, &wdt->wdd);
+	if (ret) {
+		dev_err(dev, "failed to register watchdog\n");
+		return ret;
+	}
+
+	dev_info(dev, "NPCM watchdog driver enabled\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id npcm_wdt_match[] = {
+	{.compatible = "nuvoton,npcm750-wdt"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, npcm_wdt_match);
+#endif
+
+static struct platform_driver npcm_wdt_driver = {
+	.probe		= npcm_wdt_probe,
+	.driver		= {
+		.name	= "npcm-wdt",
+		.of_match_table = of_match_ptr(npcm_wdt_match),
+	},
+};
+module_platform_driver(npcm_wdt_driver);
+
+MODULE_AUTHOR("Joel Stanley");
+MODULE_DESCRIPTION("Watchdog driver for NPCM");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
index 1cf286945b7a..4acbe05e27bb 100644
--- a/drivers/watchdog/of_xilinx_wdt.c
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog Device Driver for Xilinx axi/xps_timebase_wdt
  *
  * (C) Copyright 2013 - 2014 Xilinx, Inc.
  * (C) Copyright 2011 (Alejandro Cabrera <aldaya@gmail.com>)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
  */
 
 #include <linux/clk.h>
@@ -323,4 +319,4 @@ module_platform_driver(xwdt_driver);
 
 MODULE_AUTHOR("Alejandro Cabrera <aldaya@gmail.com>");
 MODULE_DESCRIPTION("Xilinx Watchdog driver");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index 1b02bfa81b29..ae77112ce97f 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -253,10 +253,10 @@ static int omap_wdt_probe(struct platform_device *pdev)
 	wdev->wdog.ops = &omap_wdt_ops;
 	wdev->wdog.min_timeout = TIMER_MARGIN_MIN;
 	wdev->wdog.max_timeout = TIMER_MARGIN_MAX;
+	wdev->wdog.timeout = TIMER_MARGIN_DEFAULT;
 	wdev->wdog.parent = &pdev->dev;
 
-	if (watchdog_init_timeout(&wdev->wdog, timer_margin, &pdev->dev) < 0)
-		wdev->wdog.timeout = TIMER_MARGIN_DEFAULT;
+	watchdog_init_timeout(&wdev->wdog, timer_margin, &pdev->dev);
 
 	watchdog_set_nowayout(&wdev->wdog, nowayout);
 
diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c
index 0529aed158a4..8e261799c84e 100644
--- a/drivers/watchdog/pnx4008_wdt.c
+++ b/drivers/watchdog/pnx4008_wdt.c
@@ -78,7 +78,7 @@
 #define WDOG_COUNTER_RATE 13000000	/*the counter clock is 13 MHz fixed */
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
-static unsigned int heartbeat = DEFAULT_HEARTBEAT;
+static unsigned int heartbeat;
 
 static DEFINE_SPINLOCK(io_lock);
 static void __iomem	*wdt_base;
diff --git a/drivers/watchdog/renesas_wdt.c b/drivers/watchdog/renesas_wdt.c
index 831ef83f6de1..6b8c6ddfe30b 100644
--- a/drivers/watchdog/renesas_wdt.c
+++ b/drivers/watchdog/renesas_wdt.c
@@ -16,6 +16,8 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/smp.h>
+#include <linux/sys_soc.h>
 #include <linux/watchdog.h>
 
 #define RWTCNT		0
@@ -49,6 +51,7 @@ struct rwdt_priv {
 	void __iomem *base;
 	struct watchdog_device wdev;
 	unsigned long clk_rate;
+	u16 time_left;
 	u8 cks;
 };
 
@@ -107,6 +110,16 @@ static unsigned int rwdt_get_timeleft(struct watchdog_device *wdev)
 	return DIV_BY_CLKS_PER_SEC(priv, 65536 - val);
 }
 
+static int rwdt_restart(struct watchdog_device *wdev, unsigned long action,
+			void *data)
+{
+	struct rwdt_priv *priv = watchdog_get_drvdata(wdev);
+
+	rwdt_start(wdev);
+	rwdt_write(priv, 0xffff, RWTCNT);
+	return 0;
+}
+
 static const struct watchdog_info rwdt_ident = {
 	.options = WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
 	.identity = "Renesas WDT Watchdog",
@@ -118,8 +131,47 @@ static const struct watchdog_ops rwdt_ops = {
 	.stop = rwdt_stop,
 	.ping = rwdt_init_timeout,
 	.get_timeleft = rwdt_get_timeleft,
+	.restart = rwdt_restart,
 };
 
+#if defined(CONFIG_ARCH_RCAR_GEN2) && defined(CONFIG_SMP)
+/*
+ * Watchdog-reset integration is broken on early revisions of R-Car Gen2 SoCs
+ */
+static const struct soc_device_attribute rwdt_quirks_match[] = {
+	{
+		.soc_id = "r8a7790",
+		.revision = "ES1.*",
+		.data = (void *)1,	/* needs single CPU */
+	}, {
+		.soc_id = "r8a7791",
+		.revision = "ES[12].*",
+		.data = (void *)1,	/* needs single CPU */
+	}, {
+		.soc_id = "r8a7792",
+		.revision = "*",
+		.data = (void *)0,	/* needs SMP disabled */
+	},
+	{ /* sentinel */ }
+};
+
+static bool rwdt_blacklisted(struct device *dev)
+{
+	const struct soc_device_attribute *attr;
+
+	attr = soc_device_match(rwdt_quirks_match);
+	if (attr && setup_max_cpus > (uintptr_t)attr->data) {
+		dev_info(dev, "Watchdog blacklisted on %s %s\n", attr->soc_id,
+			 attr->revision);
+		return true;
+	}
+
+	return false;
+}
+#else /* !CONFIG_ARCH_RCAR_GEN2 || !CONFIG_SMP */
+static inline bool rwdt_blacklisted(struct device *dev) { return false; }
+#endif /* !CONFIG_ARCH_RCAR_GEN2 || !CONFIG_SMP */
+
 static int rwdt_probe(struct platform_device *pdev)
 {
 	struct rwdt_priv *priv;
@@ -128,6 +180,9 @@ static int rwdt_probe(struct platform_device *pdev)
 	unsigned long clks_per_sec;
 	int ret, i;
 
+	if (rwdt_blacklisted(&pdev->dev))
+		return -ENODEV;
+
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
@@ -176,6 +231,7 @@ static int rwdt_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, priv);
 	watchdog_set_drvdata(&priv->wdev, priv);
 	watchdog_set_nowayout(&priv->wdev, nowayout);
+	watchdog_set_restart_priority(&priv->wdev, 0);
 
 	/* This overrides the default timeout only if DT configuration was found */
 	ret = watchdog_init_timeout(&priv->wdev, 0, &pdev->dev);
@@ -203,12 +259,32 @@ static int rwdt_remove(struct platform_device *pdev)
 	return 0;
 }
 
-/*
- * This driver does also fit for R-Car Gen2 (r8a779[0-4]) WDT. However, for SMP
- * to work there, one also needs a RESET (RST) driver which does not exist yet
- * due to HW issues. This needs to be solved before adding compatibles here.
- */
+static int __maybe_unused rwdt_suspend(struct device *dev)
+{
+	struct rwdt_priv *priv = dev_get_drvdata(dev);
+
+	if (watchdog_active(&priv->wdev)) {
+		priv->time_left = readw(priv->base + RWTCNT);
+		rwdt_stop(&priv->wdev);
+	}
+	return 0;
+}
+
+static int __maybe_unused rwdt_resume(struct device *dev)
+{
+	struct rwdt_priv *priv = dev_get_drvdata(dev);
+
+	if (watchdog_active(&priv->wdev)) {
+		rwdt_start(&priv->wdev);
+		rwdt_write(priv, priv->time_left, RWTCNT);
+	}
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rwdt_pm_ops, rwdt_suspend, rwdt_resume);
+
 static const struct of_device_id rwdt_ids[] = {
+	{ .compatible = "renesas,rcar-gen2-wdt", },
 	{ .compatible = "renesas,rcar-gen3-wdt", },
 	{ /* sentinel */ }
 };
@@ -218,6 +294,7 @@ static struct platform_driver rwdt_driver = {
 	.driver = {
 		.name = "renesas_wdt",
 		.of_match_table = rwdt_ids,
+		.pm = &rwdt_pm_ops,
 	},
 	.probe = rwdt_probe,
 	.remove = rwdt_remove,
diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c
index 0ae947c3d7bc..255169916dbb 100644
--- a/drivers/watchdog/sama5d4_wdt.c
+++ b/drivers/watchdog/sama5d4_wdt.c
@@ -33,7 +33,7 @@ struct sama5d4_wdt {
 	unsigned long		last_ping;
 };
 
-static int wdt_timeout = WDT_DEFAULT_TIMEOUT;
+static int wdt_timeout;
 static bool nowayout = WATCHDOG_NOWAYOUT;
 
 module_param(wdt_timeout, int, 0);
@@ -212,7 +212,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	wdd = &wdt->wdd;
-	wdd->timeout = wdt_timeout;
+	wdd->timeout = WDT_DEFAULT_TIMEOUT;
 	wdd->info = &sama5d4_wdt_info;
 	wdd->ops = &sama5d4_wdt_ops;
 	wdd->min_timeout = MIN_WDT_TIMEOUT;
@@ -273,7 +273,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, wdt);
 
 	dev_info(&pdev->dev, "initialized (timeout = %d sec, nowayout = %d)\n",
-		 wdt_timeout, nowayout);
+		 wdd->timeout, nowayout);
 
 	return 0;
 }
diff --git a/drivers/watchdog/sirfsoc_wdt.c b/drivers/watchdog/sirfsoc_wdt.c
index 4eea351e09b0..ac0c9d2c4aee 100644
--- a/drivers/watchdog/sirfsoc_wdt.c
+++ b/drivers/watchdog/sirfsoc_wdt.c
@@ -29,7 +29,7 @@
 #define SIRFSOC_WDT_MAX_TIMEOUT		(10 * 60)	/* 10 mins */
 #define SIRFSOC_WDT_DEFAULT_TIMEOUT	30		/* 30 secs */
 
-static unsigned int timeout = SIRFSOC_WDT_DEFAULT_TIMEOUT;
+static unsigned int timeout;
 static bool nowayout = WATCHDOG_NOWAYOUT;
 
 module_param(timeout, uint, 0);
diff --git a/drivers/watchdog/sprd_wdt.c b/drivers/watchdog/sprd_wdt.c
index a8b280ff33e0..b4d484a42b70 100644
--- a/drivers/watchdog/sprd_wdt.c
+++ b/drivers/watchdog/sprd_wdt.c
@@ -154,8 +154,10 @@ static int sprd_wdt_enable(struct sprd_wdt *wdt)
 	if (ret)
 		return ret;
 	ret = clk_prepare_enable(wdt->rtc_enable);
-	if (ret)
+	if (ret) {
+		clk_disable_unprepare(wdt->enable);
 		return ret;
+	}
 
 	sprd_wdt_unlock(wdt->base);
 	val = readl_relaxed(wdt->base + SPRD_WDT_CTRL);
diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c
index e6100e447dd8..177829b379da 100644
--- a/drivers/watchdog/st_lpc_wdt.c
+++ b/drivers/watchdog/st_lpc_wdt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * ST's LPC Watchdog
  *
@@ -5,11 +6,6 @@
  *
  * Author: David Paris <david.paris@st.com> for STMicroelectronics
  *         Lee Jones <lee.jones@linaro.org> for STMicroelectronics
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c
index 802e31b1416d..c6c73656997e 100644
--- a/drivers/watchdog/sunxi_wdt.c
+++ b/drivers/watchdog/sunxi_wdt.c
@@ -39,7 +39,7 @@
 #define DRV_VERSION		"1.0"
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
-static unsigned int timeout = WDT_MAX_TIMEOUT;
+static unsigned int timeout;
 
 /*
  * This structure stores the register offsets for different variants
diff --git a/drivers/watchdog/tangox_wdt.c b/drivers/watchdog/tangox_wdt.c
index d5fcce062920..b1de8297fa40 100644
--- a/drivers/watchdog/tangox_wdt.c
+++ b/drivers/watchdog/tangox_wdt.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  *  Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
  *  SMP86xx/SMP87xx Watchdog driver
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under  the terms of the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the License, or (at your
- *  option) any later version.
  */
 
 #include <linux/bitops.h>
diff --git a/drivers/watchdog/tegra_wdt.c b/drivers/watchdog/tegra_wdt.c
index 9403c08816e3..877dd39bd41f 100644
--- a/drivers/watchdog/tegra_wdt.c
+++ b/drivers/watchdog/tegra_wdt.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/watchdog/uniphier_wdt.c b/drivers/watchdog/uniphier_wdt.c
index 0ea2339d9702..e20a7a459d69 100644
--- a/drivers/watchdog/uniphier_wdt.c
+++ b/drivers/watchdog/uniphier_wdt.c
@@ -1,18 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Watchdog driver for the UniPhier watchdog timer
  *
  * (c) Copyright 2014 Panasonic Corporation
  * (c) Copyright 2016 Socionext Inc.
  * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/bitops.h>
@@ -212,11 +204,10 @@ static int uniphier_wdt_probe(struct platform_device *pdev)
 	wdev->wdt_dev.ops = &uniphier_wdt_ops;
 	wdev->wdt_dev.max_timeout = WDT_PERIOD_MAX;
 	wdev->wdt_dev.min_timeout = WDT_PERIOD_MIN;
+	wdev->wdt_dev.timeout = WDT_DEFAULT_TIMEOUT;
 	wdev->wdt_dev.parent = dev;
 
-	if (watchdog_init_timeout(&wdev->wdt_dev, timeout, dev) < 0) {
-		wdev->wdt_dev.timeout = WDT_DEFAULT_TIMEOUT;
-	}
+	watchdog_init_timeout(&wdev->wdt_dev, timeout, dev);
 	watchdog_set_nowayout(&wdev->wdt_dev, nowayout);
 	watchdog_stop_on_reboot(&wdev->wdt_dev);
 
diff --git a/drivers/watchdog/wm831x_wdt.c b/drivers/watchdog/wm831x_wdt.c
index 1ddc1f742cd4..116c2f47b463 100644
--- a/drivers/watchdog/wm831x_wdt.c
+++ b/drivers/watchdog/wm831x_wdt.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for the wm831x PMICs
  *
  * Copyright (C) 2009 Wolfson Microelectronics
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation
  */
 
 #include <linux/module.h>
diff --git a/drivers/watchdog/wm8350_wdt.c b/drivers/watchdog/wm8350_wdt.c
index 4ab4b8347d45..33c62d51f00a 100644
--- a/drivers/watchdog/wm8350_wdt.c
+++ b/drivers/watchdog/wm8350_wdt.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Watchdog driver for the wm8350
  *
  * Copyright (C) 2007, 2008 Wolfson Microelectronics <linux@wolfsonmicro.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 23e391d3ec01..b29f4e40851f 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -53,6 +53,8 @@ static unsigned long *acpi_ids_done;
 static unsigned long *acpi_id_present;
 /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */
 static unsigned long *acpi_id_cst_present;
+/* Which ACPI P-State dependencies for a enumerated processor */
+static struct acpi_psd_package *acpi_psd;
 
 static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
 {
@@ -362,9 +364,9 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
 	}
 	/* There are more ACPI Processor objects than in x2APIC or MADT.
 	 * This can happen with incorrect ACPI SSDT declerations. */
-	if (acpi_id > nr_acpi_bits) {
-		pr_debug("We only have %u, trying to set %u\n",
-			 nr_acpi_bits, acpi_id);
+	if (acpi_id >= nr_acpi_bits) {
+		pr_debug("max acpi id %u, trying to set %u\n",
+			 nr_acpi_bits - 1, acpi_id);
 		return AE_OK;
 	}
 	/* OK, There is a ACPI Processor object */
@@ -372,6 +374,13 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
 
 	pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk);
 
+	/* It has P-state dependencies */
+	if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) {
+		pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n",
+			 acpi_id, acpi_psd[acpi_id].coord_type,
+			 acpi_psd[acpi_id].domain);
+	}
+
 	status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		if (!pblk)
@@ -405,6 +414,14 @@ static int check_acpi_ids(struct acpi_processor *pr_backup)
 		return -ENOMEM;
 	}
 
+	acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package),
+			   GFP_KERNEL);
+	if (!acpi_psd) {
+		kfree(acpi_id_present);
+		kfree(acpi_id_cst_present);
+		return -ENOMEM;
+	}
+
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 			    ACPI_UINT32_MAX,
 			    read_acpi_id, NULL, NULL, NULL);
@@ -417,6 +434,12 @@ upload:
 			pr_backup->acpi_id = i;
 			/* Mask out C-states if there are no _CST or PBLK */
 			pr_backup->flags.power = test_bit(i, acpi_id_cst_present);
+			/* num_entries is non-zero if we evaluated _PSD */
+			if (acpi_psd[i].num_entries) {
+				memcpy(&pr_backup->performance->domain_info,
+				       &acpi_psd[i],
+				       sizeof(struct acpi_psd_package));
+			}
 			(void)upload_pm_data(pr_backup);
 		}
 	}
@@ -566,6 +589,7 @@ static void __exit xen_acpi_processor_exit(void)
 	kfree(acpi_ids_done);
 	kfree(acpi_id_present);
 	kfree(acpi_id_cst_present);
+	kfree(acpi_psd);
 	for_each_possible_cpu(i)
 		acpi_processor_unregister_performance(i);
 
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index a493e99bed21..0d6d9264d6a9 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -365,7 +365,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req)
 			if (WARN_ON(rc))
 				goto out;
 		}
-	} else if (req->msg.type == XS_TRANSACTION_END) {
+	} else if (req->type == XS_TRANSACTION_END) {
 		trans = xenbus_get_transaction(u, req->msg.tx_id);
 		if (WARN_ON(!trans))
 			goto out;
@@ -429,6 +429,10 @@ static int xenbus_write_transaction(unsigned msg_type,
 {
 	int rc;
 	struct xenbus_transaction_holder *trans = NULL;
+	struct {
+		struct xsd_sockmsg hdr;
+		char body[];
+	} *msg = (void *)u->u.buffer;
 
 	if (msg_type == XS_TRANSACTION_START) {
 		trans = kzalloc(sizeof(*trans), GFP_KERNEL);
@@ -437,11 +441,15 @@ static int xenbus_write_transaction(unsigned msg_type,
 			goto out;
 		}
 		list_add(&trans->list, &u->transactions);
-	} else if (u->u.msg.tx_id != 0 &&
-		   !xenbus_get_transaction(u, u->u.msg.tx_id))
+	} else if (msg->hdr.tx_id != 0 &&
+		   !xenbus_get_transaction(u, msg->hdr.tx_id))
 		return xenbus_command_reply(u, XS_ERROR, "ENOENT");
+	else if (msg_type == XS_TRANSACTION_END &&
+		 !(msg->hdr.len == 2 &&
+		   (!strcmp(msg->body, "T") || !strcmp(msg->body, "F"))))
+		return xenbus_command_reply(u, XS_ERROR, "EINVAL");
 
-	rc = xenbus_dev_request_and_reply(&u->u.msg, u);
+	rc = xenbus_dev_request_and_reply(&msg->hdr, u);
 	if (rc && trans) {
 		list_del(&trans->list);
 		kfree(trans);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 3f3b29398ab8..49a3874ae6bb 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -140,7 +140,9 @@ void xs_request_exit(struct xb_req_data *req)
 	spin_lock(&xs_state_lock);
 	xs_state_users--;
 	if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
-	    req->type == XS_TRANSACTION_END)
+	    (req->type == XS_TRANSACTION_END &&
+	     !WARN_ON_ONCE(req->msg.type == XS_ERROR &&
+			   !strcmp(req->body, "ENOENT"))))
 		xs_state_users--;
 	spin_unlock(&xs_state_lock);
 
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 64c58eb26159..9eb34701a566 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -55,42 +55,27 @@ int v9fs_random_cachetag(struct v9fs_session_info *v9ses)
 	return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies);
 }
 
-static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
-					   void *buffer, uint16_t bufmax)
-{
-	struct v9fs_session_info *v9ses;
-	uint16_t klen = 0;
-
-	v9ses = (struct v9fs_session_info *)cookie_netfs_data;
-	p9_debug(P9_DEBUG_FSC, "session %p buf %p size %u\n",
-		 v9ses, buffer, bufmax);
-
-	if (v9ses->cachetag)
-		klen = strlen(v9ses->cachetag);
-
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, v9ses->cachetag, klen);
-	p9_debug(P9_DEBUG_FSC, "cache session tag %s\n", v9ses->cachetag);
-	return klen;
-}
-
 const struct fscache_cookie_def v9fs_cache_session_index_def = {
 	.name		= "9P.session",
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= v9fs_cache_session_get_key,
 };
 
 void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
 {
 	/* If no cache session tag was specified, we generate a random one. */
-	if (!v9ses->cachetag)
-		v9fs_random_cachetag(v9ses);
+	if (!v9ses->cachetag) {
+		if (v9fs_random_cachetag(v9ses) < 0) {
+			v9ses->fscache = NULL;
+			return;
+		}
+	}
 
 	v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
 						&v9fs_cache_session_index_def,
-						v9ses, true);
+						v9ses->cachetag,
+						strlen(v9ses->cachetag),
+						NULL, 0,
+						v9ses, 0, true);
 	p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n",
 		 v9ses, v9ses->fscache);
 }
@@ -99,45 +84,15 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
 {
 	p9_debug(P9_DEBUG_FSC, "session %p put cookie %p\n",
 		 v9ses, v9ses->fscache);
-	fscache_relinquish_cookie(v9ses->fscache, 0);
+	fscache_relinquish_cookie(v9ses->fscache, NULL, false);
 	v9ses->fscache = NULL;
 }
 
-
-static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
-					 void *buffer, uint16_t bufmax)
-{
-	const struct v9fs_inode *v9inode = cookie_netfs_data;
-	memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
-	p9_debug(P9_DEBUG_FSC, "inode %p get key %llu\n",
-		 &v9inode->vfs_inode, v9inode->qid.path);
-	return sizeof(v9inode->qid.path);
-}
-
-static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
-				      uint64_t *size)
-{
-	const struct v9fs_inode *v9inode = cookie_netfs_data;
-	*size = i_size_read(&v9inode->vfs_inode);
-
-	p9_debug(P9_DEBUG_FSC, "inode %p get attr %llu\n",
-		 &v9inode->vfs_inode, *size);
-}
-
-static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
-					 void *buffer, uint16_t buflen)
-{
-	const struct v9fs_inode *v9inode = cookie_netfs_data;
-	memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
-	p9_debug(P9_DEBUG_FSC, "inode %p get aux %u\n",
-		 &v9inode->vfs_inode, v9inode->qid.version);
-	return sizeof(v9inode->qid.version);
-}
-
 static enum
 fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
 					    const void *buffer,
-					    uint16_t buflen)
+					    uint16_t buflen,
+					    loff_t object_size)
 {
 	const struct v9fs_inode *v9inode = cookie_netfs_data;
 
@@ -154,9 +109,6 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
 const struct fscache_cookie_def v9fs_cache_inode_index_def = {
 	.name		= "9p.inode",
 	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
-	.get_key	= v9fs_cache_inode_get_key,
-	.get_attr	= v9fs_cache_inode_get_attr,
-	.get_aux	= v9fs_cache_inode_get_aux,
 	.check_aux	= v9fs_cache_inode_check_aux,
 };
 
@@ -175,7 +127,13 @@ void v9fs_cache_inode_get_cookie(struct inode *inode)
 	v9ses = v9fs_inode2v9ses(inode);
 	v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
 						  &v9fs_cache_inode_index_def,
-						  v9inode, true);
+						  &v9inode->qid.path,
+						  sizeof(v9inode->qid.path),
+						  &v9inode->qid.version,
+						  sizeof(v9inode->qid.version),
+						  v9inode,
+						  i_size_read(&v9inode->vfs_inode),
+						  true);
 
 	p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
 		 inode, v9inode->fscache);
@@ -190,7 +148,8 @@ void v9fs_cache_inode_put_cookie(struct inode *inode)
 	p9_debug(P9_DEBUG_FSC, "inode %p put cookie %p\n",
 		 inode, v9inode->fscache);
 
-	fscache_relinquish_cookie(v9inode->fscache, 0);
+	fscache_relinquish_cookie(v9inode->fscache, &v9inode->qid.version,
+				  false);
 	v9inode->fscache = NULL;
 }
 
@@ -203,7 +162,7 @@ void v9fs_cache_inode_flush_cookie(struct inode *inode)
 	p9_debug(P9_DEBUG_FSC, "inode %p flush cookie %p\n",
 		 inode, v9inode->fscache);
 
-	fscache_relinquish_cookie(v9inode->fscache, 1);
+	fscache_relinquish_cookie(v9inode->fscache, NULL, true);
 	v9inode->fscache = NULL;
 }
 
@@ -236,12 +195,18 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode)
 	old = v9inode->fscache;
 
 	mutex_lock(&v9inode->fscache_lock);
-	fscache_relinquish_cookie(v9inode->fscache, 1);
+	fscache_relinquish_cookie(v9inode->fscache, NULL, true);
 
 	v9ses = v9fs_inode2v9ses(inode);
 	v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
 						  &v9fs_cache_inode_index_def,
-						  v9inode, true);
+						  &v9inode->qid.path,
+						  sizeof(v9inode->qid.path),
+						  &v9inode->qid.version,
+						  sizeof(v9inode->qid.version),
+						  v9inode,
+						  i_size_read(&v9inode->vfs_inode),
+						  true);
 	p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n",
 		 inode, old, v9inode->fscache);
 
@@ -367,7 +332,8 @@ void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
 	const struct v9fs_inode *v9inode = V9FS_I(inode);
 
 	p9_debug(P9_DEBUG_FSC, "inode %p page %p\n", inode, page);
-	ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
+	ret = fscache_write_page(v9inode->fscache, page,
+				 i_size_read(&v9inode->vfs_inode), GFP_KERNEL);
 	p9_debug(P9_DEBUG_FSC, "ret =  %d\n", ret);
 	if (ret != 0)
 		v9fs_uncache_page(inode, page);
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 45b7fc405fa6..532acae25453 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -12,6 +12,8 @@ kafs-objs := \
 	cell.o \
 	cmservice.o \
 	dir.o \
+	dir_edit.o \
+	dynroot.o \
 	file.o \
 	flock.o \
 	fsclient.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index fd9f28b8a933..3bedfed608a2 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -243,9 +243,9 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
 		    xport == a->sin6_port)
 			return;
 		if (xdr == a->sin6_addr.s6_addr32[3] &&
-		    xport < a->sin6_port)
+		    (u16 __force)xport < (u16 __force)a->sin6_port)
 			break;
-		if (xdr < a->sin6_addr.s6_addr32[3])
+		if ((u32 __force)xdr < (u32 __force)a->sin6_addr.s6_addr32[3])
 			break;
 	}
 
@@ -280,7 +280,7 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
 		    xport == a->sin6_port)
 			return;
 		if (diff == 0 &&
-		    xport < a->sin6_port)
+		    (u16 __force)xport < (u16 __force)a->sin6_port)
 			break;
 		if (diff < 0)
 			break;
diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index b94d0edc2b78..b4ff1f7ae4ab 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -67,10 +67,14 @@ typedef enum {
 } afs_callback_type_t;
 
 struct afs_callback {
-	struct afs_fid		fid;		/* file identifier */
-	unsigned		version;	/* callback version */
-	unsigned		expiry;		/* time at which expires */
-	afs_callback_type_t	type;		/* type of callback */
+	unsigned		version;	/* Callback version */
+	unsigned		expiry;		/* Time at which expires */
+	afs_callback_type_t	type;		/* Type of callback */
+};
+
+struct afs_callback_break {
+	struct afs_fid		fid;		/* File identifier */
+	struct afs_callback	cb;		/* Callback details */
 };
 
 #define AFSCBMAX 50	/* maximum callbacks transferred per bulk op */
@@ -123,21 +127,20 @@ typedef u32 afs_access_t;
  * AFS file status information
  */
 struct afs_file_status {
-	unsigned		if_version;	/* interface version */
-#define AFS_FSTATUS_VERSION	1
+	u64			size;		/* file size */
+	afs_dataversion_t	data_version;	/* current data version */
+	time_t			mtime_client;	/* last time client changed data */
+	time_t			mtime_server;	/* last time server changed data */
+	unsigned		abort_code;	/* Abort if bulk-fetching this failed */
 
 	afs_file_type_t		type;		/* file type */
 	unsigned		nlink;		/* link count */
-	u64			size;		/* file size */
-	afs_dataversion_t	data_version;	/* current data version */
 	u32			author;		/* author ID */
-	kuid_t			owner;		/* owner ID */
-	kgid_t			group;		/* group ID */
+	u32			owner;		/* owner ID */
+	u32			group;		/* group ID */
 	afs_access_t		caller_access;	/* access rights for authenticated caller */
 	afs_access_t		anon_access;	/* access rights for unauthenticated caller */
 	umode_t			mode;		/* UNIX mode */
-	time_t			mtime_client;	/* last time client changed data */
-	time_t			mtime_server;	/* last time server changed data */
 	s32			lock_count;	/* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
 };
 
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index d47b6d01e4c0..ddfa88a7a9c0 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -31,10 +31,12 @@ enum AFS_FS_Operations {
 	FSGETVOLUMEINFO		= 148,	/* AFS Get information about a volume */
 	FSGETVOLUMESTATUS	= 149,	/* AFS Get volume status information */
 	FSGETROOTVOLUME		= 151,	/* AFS Get root volume name */
+	FSBULKSTATUS		= 155,	/* AFS Fetch multiple file statuses */
 	FSSETLOCK		= 156,	/* AFS Request a file lock */
 	FSEXTENDLOCK		= 157,	/* AFS Extend a file lock */
 	FSRELEASELOCK		= 158,	/* AFS Release a file lock */
 	FSLOOKUP		= 161,	/* AFS lookup file in directory */
+	FSINLINEBULKSTATUS	= 65536, /* AFS Fetch multiple file statuses with inline errors */
 	FSFETCHDATA64		= 65537, /* AFS Fetch file data */
 	FSSTOREDATA64		= 65538, /* AFS Store file data */
 	FSGIVEUPALLCALLBACKS	= 65539, /* AFS Give up all outstanding callbacks on a server */
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index f62ff71d28c9..b1c31ec4523a 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -12,167 +12,39 @@
 #include <linux/sched.h>
 #include "internal.h"
 
-static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
-				       void *buffer, uint16_t buflen);
-static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
-					 void *buffer, uint16_t buflen);
-
-static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
-					void *buffer, uint16_t buflen);
-static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
-				     uint64_t *size);
-static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
-					void *buffer, uint16_t buflen);
 static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
 						       const void *buffer,
-						       uint16_t buflen);
+						       uint16_t buflen,
+						       loff_t object_size);
 
 struct fscache_netfs afs_cache_netfs = {
 	.name			= "afs",
-	.version		= 1,
+	.version		= 2,
 };
 
 struct fscache_cookie_def afs_cell_cache_index_def = {
 	.name		= "AFS.cell",
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= afs_cell_cache_get_key,
 };
 
 struct fscache_cookie_def afs_volume_cache_index_def = {
 	.name		= "AFS.volume",
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= afs_volume_cache_get_key,
 };
 
 struct fscache_cookie_def afs_vnode_cache_index_def = {
-	.name			= "AFS.vnode",
-	.type			= FSCACHE_COOKIE_TYPE_DATAFILE,
-	.get_key		= afs_vnode_cache_get_key,
-	.get_attr		= afs_vnode_cache_get_attr,
-	.get_aux		= afs_vnode_cache_get_aux,
-	.check_aux		= afs_vnode_cache_check_aux,
+	.name		= "AFS.vnode",
+	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.check_aux	= afs_vnode_cache_check_aux,
 };
 
 /*
- * set the key for the index entry
- */
-static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
-				       void *buffer, uint16_t bufmax)
-{
-	const struct afs_cell *cell = cookie_netfs_data;
-	uint16_t klen;
-
-	_enter("%p,%p,%u", cell, buffer, bufmax);
-
-	klen = strlen(cell->name);
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, cell->name, klen);
-	return klen;
-}
-
-/*****************************************************************************/
-/*
- * set the key for the volume index entry
- */
-static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
-					 void *buffer, uint16_t bufmax)
-{
-	const struct afs_volume *volume = cookie_netfs_data;
-	struct {
-		u64 volid;
-	} __packed key;
-
-	_enter("{%u},%p,%u", volume->type, buffer, bufmax);
-
-	if (bufmax < sizeof(key))
-		return 0;
-
-	key.volid = volume->vid;
-	memcpy(buffer, &key, sizeof(key));
-	return sizeof(key);
-}
-
-/*****************************************************************************/
-/*
- * set the key for the index entry
- */
-static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
-					void *buffer, uint16_t bufmax)
-{
-	const struct afs_vnode *vnode = cookie_netfs_data;
-	struct {
-		u32 vnode_id[3];
-	} __packed key;
-
-	_enter("{%x,%x,%llx},%p,%u",
-	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
-	       buffer, bufmax);
-
-	/* Allow for a 96-bit key */
-	memset(&key, 0, sizeof(key));
-	key.vnode_id[0] = vnode->fid.vnode;
-	key.vnode_id[1] = 0;
-	key.vnode_id[2] = 0;
-
-	if (sizeof(key) > bufmax)
-		return 0;
-
-	memcpy(buffer, &key, sizeof(key));
-	return sizeof(key);
-}
-
-/*
- * provide updated file attributes
- */
-static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
-				     uint64_t *size)
-{
-	const struct afs_vnode *vnode = cookie_netfs_data;
-
-	_enter("{%x,%x,%llx},",
-	       vnode->fid.vnode, vnode->fid.unique,
-	       vnode->status.data_version);
-
-	*size = vnode->status.size;
-}
-
-struct afs_vnode_cache_aux {
-	u64 data_version;
-	u32 fid_unique;
-} __packed;
-
-/*
- * provide new auxiliary cache data
- */
-static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
-					void *buffer, uint16_t bufmax)
-{
-	const struct afs_vnode *vnode = cookie_netfs_data;
-	struct afs_vnode_cache_aux aux;
-
-	_enter("{%x,%x,%Lx},%p,%u",
-	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
-	       buffer, bufmax);
-
-	memset(&aux, 0, sizeof(aux));
-	aux.data_version = vnode->status.data_version;
-	aux.fid_unique = vnode->fid.unique;
-
-	if (bufmax < sizeof(aux))
-		return 0;
-
-	memcpy(buffer, &aux, sizeof(aux));
-	return sizeof(aux);
-}
-
-/*
  * check that the auxiliary data indicates that the entry is still valid
  */
 static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
 						       const void *buffer,
-						       uint16_t buflen)
+						       uint16_t buflen,
+						       loff_t object_size)
 {
 	struct afs_vnode *vnode = cookie_netfs_data;
 	struct afs_vnode_cache_aux aux;
@@ -189,12 +61,6 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
 		return FSCACHE_CHECKAUX_OBSOLETE;
 	}
 
-	if (vnode->fid.unique != aux.fid_unique) {
-		_leave(" = OBSOLETE [uniq %x != %x]",
-		       aux.fid_unique, vnode->fid.unique);
-		return FSCACHE_CHECKAUX_OBSOLETE;
-	}
-
 	if (vnode->status.data_version != aux.data_version) {
 		_leave(" = OBSOLETE [vers %llx != %llx]",
 		       aux.data_version, vnode->status.data_version);
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index f4291b576054..abd9a84f4e88 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -97,26 +97,6 @@ again:
 }
 
 /*
- * Set a vnode's interest on a server.
- */
-void afs_set_cb_interest(struct afs_vnode *vnode, struct afs_cb_interest *cbi)
-{
-	struct afs_cb_interest *old_cbi = NULL;
-
-	if (vnode->cb_interest == cbi)
-		return;
-
-	write_seqlock(&vnode->cb_lock);
-	if (vnode->cb_interest != cbi) {
-		afs_get_cb_interest(cbi);
-		old_cbi = vnode->cb_interest;
-		vnode->cb_interest = cbi;
-	}
-	write_sequnlock(&vnode->cb_lock);
-	afs_put_cb_interest(afs_v2net(vnode), cbi);
-}
-
-/*
  * Remove an interest on a server.
  */
 void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
@@ -150,6 +130,7 @@ void afs_break_callback(struct afs_vnode *vnode)
 
 	write_seqlock(&vnode->cb_lock);
 
+	clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
 	if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		vnode->cb_break++;
 		afs_clear_permits(vnode);
@@ -207,7 +188,7 @@ static void afs_break_one_callback(struct afs_server *server,
  * allow the fileserver to break callback promises
  */
 void afs_break_callbacks(struct afs_server *server, size_t count,
-			 struct afs_callback callbacks[])
+			 struct afs_callback_break *callbacks)
 {
 	_enter("%p,%zu,", server, count);
 
@@ -219,9 +200,9 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
 		       callbacks->fid.vid,
 		       callbacks->fid.vnode,
 		       callbacks->fid.unique,
-		       callbacks->version,
-		       callbacks->expiry,
-		       callbacks->type
+		       callbacks->cb.version,
+		       callbacks->cb.expiry,
+		       callbacks->cb.type
 		       );
 		afs_break_one_callback(server, &callbacks->fid);
 	}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 3d2c5e0e854e..fdf4c36cff79 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -18,7 +18,7 @@
 #include <keys/rxrpc-type.h>
 #include "internal.h"
 
-unsigned __read_mostly afs_cell_gc_delay = 10;
+static unsigned __read_mostly afs_cell_gc_delay = 10;
 
 static void afs_manage_cell(struct work_struct *);
 
@@ -75,7 +75,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
 			cell = rcu_dereference_raw(net->ws_cell);
 			if (cell) {
 				afs_get_cell(cell);
-				continue;
+				break;
 			}
 			ret = -EDESTADDRREQ;
 			continue;
@@ -130,6 +130,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
 		_leave(" = -ENAMETOOLONG");
 		return ERR_PTR(-ENAMETOOLONG);
 	}
+	if (namelen == 5 && memcmp(name, "@cell", 5) == 0)
+		return ERR_PTR(-EINVAL);
 
 	_enter("%*.*s,%s", namelen, namelen, name, vllist);
 
@@ -334,8 +336,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
 		return PTR_ERR(new_root);
 	}
 
-	set_bit(AFS_CELL_FL_NO_GC, &new_root->flags);
-	afs_get_cell(new_root);
+	if (!test_and_set_bit(AFS_CELL_FL_NO_GC, &new_root->flags))
+		afs_get_cell(new_root);
 
 	/* install the new cell */
 	write_seqlock(&net->cells_lock);
@@ -411,7 +413,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
 
 	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
-	afs_put_addrlist(cell->vl_addrs);
+	afs_put_addrlist(rcu_access_pointer(cell->vl_addrs));
 	key_put(cell->anonymous_key);
 	kfree(cell);
 
@@ -522,7 +524,9 @@ static int afs_activate_cell(struct afs_net *net, struct afs_cell *cell)
 #ifdef CONFIG_AFS_FSCACHE
 	cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
 					     &afs_cell_cache_index_def,
-					     cell, true);
+					     cell->name, strlen(cell->name),
+					     NULL, 0,
+					     cell, 0, true);
 #endif
 	ret = afs_proc_cell_setup(net, cell);
 	if (ret < 0)
@@ -547,7 +551,7 @@ static void afs_deactivate_cell(struct afs_net *net, struct afs_cell *cell)
 	spin_unlock(&net->proc_cells_lock);
 
 #ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(cell->cache, 0);
+	fscache_relinquish_cookie(cell->cache, NULL, false);
 	cell->cache = NULL;
 #endif
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 41e277f57b20..357de908df3a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -178,8 +178,8 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
  */
 static int afs_deliver_cb_callback(struct afs_call *call)
 {
+	struct afs_callback_break *cb;
 	struct sockaddr_rxrpc srx;
-	struct afs_callback *cb;
 	struct afs_server *server;
 	__be32 *bp;
 	int ret, loop;
@@ -201,7 +201,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("FID count: %u", call->count);
 		if (call->count > AFSCBMAX)
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 
 		call->buffer = kmalloc(call->count * 3 * 4, GFP_KERNEL);
 		if (!call->buffer)
@@ -218,7 +218,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
 		_debug("unmarshall FID array");
 		call->request = kcalloc(call->count,
-					sizeof(struct afs_callback),
+					sizeof(struct afs_callback_break),
 					GFP_KERNEL);
 		if (!call->request)
 			return -ENOMEM;
@@ -229,7 +229,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			cb->fid.vid	= ntohl(*bp++);
 			cb->fid.vnode	= ntohl(*bp++);
 			cb->fid.unique	= ntohl(*bp++);
-			cb->type	= AFSCM_CB_UNTYPED;
+			cb->cb.type	= AFSCM_CB_UNTYPED;
 		}
 
 		call->offset = 0;
@@ -245,7 +245,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		call->count2 = ntohl(call->tmp);
 		_debug("CB count: %u", call->count2);
 		if (call->count2 != call->count && call->count2 != 0)
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -260,9 +260,9 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		cb = call->request;
 		bp = call->buffer;
 		for (loop = call->count2; loop > 0; loop--, cb++) {
-			cb->version	= ntohl(*bp++);
-			cb->expiry	= ntohl(*bp++);
-			cb->type	= ntohl(*bp++);
+			cb->cb.version	= ntohl(*bp++);
+			cb->cb.expiry	= ntohl(*bp++);
+			cb->cb.type	= ntohl(*bp++);
 		}
 
 		call->offset = 0;
@@ -500,9 +500,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 
 		b = call->buffer;
 		r = call->request;
-		r->time_low			= ntohl(b[0]);
-		r->time_mid			= ntohl(b[1]);
-		r->time_hi_and_version		= ntohl(b[2]);
+		r->time_low			= b[0];
+		r->time_mid			= htons(ntohl(b[1]));
+		r->time_hi_and_version		= htons(ntohl(b[2]));
 		r->clock_seq_hi_and_reserved 	= ntohl(b[3]);
 		r->clock_seq_low		= ntohl(b[4]);
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ba2b458b36d1..5889f70d4d27 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1,6 +1,6 @@
 /* dir.c: AFS filesystem directory handling
  *
- * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2002, 2018 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -10,27 +10,26 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/pagemap.h>
+#include <linux/swap.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
-#include <linux/dns_resolver.h>
+#include <linux/task_io_accounting_ops.h>
 #include "internal.h"
+#include "xdr_fs.h"
 
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 				 unsigned int flags);
-static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
-					 unsigned int flags);
 static int afs_dir_open(struct inode *inode, struct file *file);
 static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
-static void afs_d_release(struct dentry *dentry);
-static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
+static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
+			      loff_t fpos, u64 ino, unsigned dtype);
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      bool excl);
 static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
@@ -43,6 +42,14 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		      struct inode *new_dir, struct dentry *new_dentry,
 		      unsigned int flags);
+static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags);
+static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
+				   unsigned int length);
+
+static int afs_dir_set_page_dirty(struct page *page)
+{
+	BUG(); /* This should never happen. */
+}
 
 const struct file_operations afs_dir_file_operations = {
 	.open		= afs_dir_open,
@@ -67,15 +74,10 @@ const struct inode_operations afs_dir_inode_operations = {
 	.listxattr	= afs_listxattr,
 };
 
-const struct file_operations afs_dynroot_file_operations = {
-	.open		= dcache_dir_open,
-	.release	= dcache_dir_close,
-	.iterate_shared	= dcache_readdir,
-	.llseek		= dcache_dir_lseek,
-};
-
-const struct inode_operations afs_dynroot_inode_operations = {
-	.lookup		= afs_dynroot_lookup,
+const struct address_space_operations afs_dir_aops = {
+	.set_page_dirty	= afs_dir_set_page_dirty,
+	.releasepage	= afs_dir_releasepage,
+	.invalidatepage	= afs_dir_invalidatepage,
 };
 
 const struct dentry_operations afs_fs_dentry_operations = {
@@ -85,91 +87,38 @@ const struct dentry_operations afs_fs_dentry_operations = {
 	.d_automount	= afs_d_automount,
 };
 
-#define AFS_DIR_HASHTBL_SIZE	128
-#define AFS_DIR_DIRENT_SIZE	32
-#define AFS_DIRENT_PER_BLOCK	64
-
-union afs_dirent {
-	struct {
-		uint8_t		valid;
-		uint8_t		unused[1];
-		__be16		hash_next;
-		__be32		vnode;
-		__be32		unique;
-		uint8_t		name[16];
-		uint8_t		overflow[4];	/* if any char of the name (inc
-						 * NUL) reaches here, consume
-						 * the next dirent too */
-	} u;
-	uint8_t	extended_name[32];
-};
-
-/* AFS directory page header (one at the beginning of every 2048-byte chunk) */
-struct afs_dir_pagehdr {
-	__be16		npages;
-	__be16		magic;
-#define AFS_DIR_MAGIC htons(1234)
-	uint8_t		nentries;
-	uint8_t		bitmap[8];
-	uint8_t		pad[19];
-};
-
-/* directory block layout */
-union afs_dir_block {
-
-	struct afs_dir_pagehdr pagehdr;
-
-	struct {
-		struct afs_dir_pagehdr	pagehdr;
-		uint8_t			alloc_ctrs[128];
-		/* dir hash table */
-		uint16_t		hashtable[AFS_DIR_HASHTBL_SIZE];
-	} hdr;
-
-	union afs_dirent dirents[AFS_DIRENT_PER_BLOCK];
-};
-
-/* layout on a linux VM page */
-struct afs_dir_page {
-	union afs_dir_block blocks[PAGE_SIZE / sizeof(union afs_dir_block)];
+struct afs_lookup_one_cookie {
+	struct dir_context	ctx;
+	struct qstr		name;
+	bool			found;
+	struct afs_fid		fid;
 };
 
 struct afs_lookup_cookie {
-	struct dir_context ctx;
-	struct afs_fid	fid;
-	struct qstr name;
-	int		found;
+	struct dir_context	ctx;
+	struct qstr		name;
+	bool			found;
+	bool			one_only;
+	unsigned short		nr_fids;
+	struct afs_file_status	*statuses;
+	struct afs_callback	*callbacks;
+	struct afs_fid		fids[50];
 };
 
 /*
  * check that a directory page is valid
  */
-bool afs_dir_check_page(struct inode *dir, struct page *page)
+static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
+			       loff_t i_size)
 {
-	struct afs_dir_page *dbuf;
-	struct afs_vnode *vnode = AFS_FS_I(dir);
-	loff_t latter, i_size, off;
+	struct afs_xdr_dir_page *dbuf;
+	loff_t latter, off;
 	int tmp, qty;
 
-#if 0
-	/* check the page count */
-	qty = desc.size / sizeof(dbuf->blocks[0]);
-	if (qty == 0)
-		goto error;
-
-	if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
-		printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
-		       __func__, dir->i_ino, qty,
-		       ntohs(dbuf->blocks[0].pagehdr.npages));
-		goto error;
-	}
-#endif
-
 	/* Determine how many magic numbers there should be in this page, but
 	 * we must take care because the directory may change size under us.
 	 */
 	off = page_offset(page);
-	i_size = i_size_read(dir);
 	if (i_size <= off)
 		goto checked;
 
@@ -178,112 +127,225 @@ bool afs_dir_check_page(struct inode *dir, struct page *page)
 		qty = PAGE_SIZE;
 	else
 		qty = latter;
-	qty /= sizeof(union afs_dir_block);
+	qty /= sizeof(union afs_xdr_dir_block);
 
 	/* check them */
-	dbuf = page_address(page);
+	dbuf = kmap(page);
 	for (tmp = 0; tmp < qty; tmp++) {
-		if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
+		if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
 			printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
-			       __func__, dir->i_ino, tmp, qty,
-			       ntohs(dbuf->blocks[tmp].pagehdr.magic));
-			trace_afs_dir_check_failed(vnode, off, i_size);
+			       __func__, dvnode->vfs_inode.i_ino, tmp, qty,
+			       ntohs(dbuf->blocks[tmp].hdr.magic));
+			trace_afs_dir_check_failed(dvnode, off, i_size);
+			kunmap(page);
 			goto error;
 		}
+
+		/* Make sure each block is NUL terminated so we can reasonably
+		 * use string functions on it.  The filenames in the page
+		 * *should* be NUL-terminated anyway.
+		 */
+		((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
 	}
 
+	kunmap(page);
+
 checked:
-	SetPageChecked(page);
+	afs_stat_v(dvnode, n_read_dir);
 	return true;
 
 error:
-	SetPageError(page);
 	return false;
 }
 
 /*
- * discard a page cached in the pagecache
+ * open an AFS directory file
  */
-static inline void afs_dir_put_page(struct page *page)
+static int afs_dir_open(struct inode *inode, struct file *file)
 {
-	kunmap(page);
-	unlock_page(page);
-	put_page(page);
+	_enter("{%lu}", inode->i_ino);
+
+	BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
+
+	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
+		return -ENOENT;
+
+	return afs_open(inode, file);
 }
 
 /*
- * get a page into the pagecache
+ * Read the directory into the pagecache in one go, scrubbing the previous
+ * contents.  The list of pages is returned, pinning them so that they don't
+ * get reclaimed during the iteration.
  */
-static struct page *afs_dir_get_page(struct inode *dir, unsigned long index,
-				     struct key *key)
+static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 {
-	struct page *page;
-	_enter("{%lu},%lu", dir->i_ino, index);
-
-	page = read_cache_page(dir->i_mapping, index, afs_page_filler, key);
-	if (!IS_ERR(page)) {
-		lock_page(page);
-		kmap(page);
-		if (unlikely(!PageChecked(page))) {
-			if (PageError(page))
-				goto fail;
-		}
+	struct afs_read *req;
+	loff_t i_size;
+	int nr_pages, nr_inline, i, n;
+	int ret = -ENOMEM;
+
+retry:
+	i_size = i_size_read(&dvnode->vfs_inode);
+	if (i_size < 2048)
+		return ERR_PTR(-EIO);
+	if (i_size > 2048 * 1024)
+		return ERR_PTR(-EFBIG);
+
+	_enter("%llu", i_size);
+
+	/* Get a request record to hold the page list.  We want to hold it
+	 * inline if we can, but we don't want to make an order 1 allocation.
+	 */
+	nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
+	nr_inline = nr_pages;
+	if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
+		nr_inline = 0;
+
+	req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline,
+		      GFP_KERNEL);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&req->usage, 1);
+	req->nr_pages = nr_pages;
+	req->actual_len = i_size; /* May change */
+	req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
+	req->data_version = dvnode->status.data_version; /* May change */
+	if (nr_inline > 0) {
+		req->pages = req->array;
+	} else {
+		req->pages = kcalloc(nr_pages, sizeof(struct page *),
+				     GFP_KERNEL);
+		if (!req->pages)
+			goto error;
 	}
-	return page;
 
-fail:
-	afs_dir_put_page(page);
-	_leave(" = -EIO");
-	return ERR_PTR(-EIO);
-}
+	/* Get a list of all the pages that hold or will hold the directory
+	 * content.  We need to fill in any gaps that we might find where the
+	 * memory reclaimer has been at work.  If there are any gaps, we will
+	 * need to reread the entire directory contents.
+	 */
+	i = 0;
+	do {
+		n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
+					  req->nr_pages - i,
+					  req->pages + i);
+		_debug("find %u at %u/%u", n, i, req->nr_pages);
+		if (n == 0) {
+			gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
+
+			if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+				afs_stat_v(dvnode, n_inval);
+
+			ret = -ENOMEM;
+			req->pages[i] = __page_cache_alloc(gfp);
+			if (!req->pages[i])
+				goto error;
+			ret = add_to_page_cache_lru(req->pages[i],
+						    dvnode->vfs_inode.i_mapping,
+						    i, gfp);
+			if (ret < 0)
+				goto error;
+
+			set_page_private(req->pages[i], 1);
+			SetPagePrivate(req->pages[i]);
+			unlock_page(req->pages[i]);
+			i++;
+		} else {
+			i += n;
+		}
+	} while (i < req->nr_pages);
 
-/*
- * open an AFS directory file
- */
-static int afs_dir_open(struct inode *inode, struct file *file)
-{
-	_enter("{%lu}", inode->i_ino);
+	/* If we're going to reload, we need to lock all the pages to prevent
+	 * races.
+	 */
+	if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
+		ret = -ERESTARTSYS;
+		for (i = 0; i < req->nr_pages; i++)
+			if (lock_page_killable(req->pages[i]) < 0)
+				goto error_unlock;
 
-	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
-	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
+		if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			goto success;
 
-	if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(inode)->flags))
-		return -ENOENT;
+		ret = afs_fetch_data(dvnode, key, req);
+		if (ret < 0)
+			goto error_unlock_all;
 
-	return afs_open(inode, file);
+		task_io_account_read(PAGE_SIZE * req->nr_pages);
+
+		if (req->len < req->file_size)
+			goto content_has_grown;
+
+		/* Validate the data we just read. */
+		ret = -EIO;
+		for (i = 0; i < req->nr_pages; i++)
+			if (!afs_dir_check_page(dvnode, req->pages[i],
+						req->actual_len))
+				goto error_unlock_all;
+
+		// TODO: Trim excess pages
+
+		set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+	}
+
+success:
+	i = req->nr_pages;
+	while (i > 0)
+		unlock_page(req->pages[--i]);
+	return req;
+
+error_unlock_all:
+	i = req->nr_pages;
+error_unlock:
+	while (i > 0)
+		unlock_page(req->pages[--i]);
+error:
+	afs_put_read(req);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
+
+content_has_grown:
+	i = req->nr_pages;
+	while (i > 0)
+		unlock_page(req->pages[--i]);
+	afs_put_read(req);
+	goto retry;
 }
 
 /*
  * deal with one block in an AFS directory
  */
 static int afs_dir_iterate_block(struct dir_context *ctx,
-				 union afs_dir_block *block,
+				 union afs_xdr_dir_block *block,
 				 unsigned blkoff)
 {
-	union afs_dirent *dire;
+	union afs_xdr_dirent *dire;
 	unsigned offset, next, curr;
 	size_t nlen;
 	int tmp;
 
 	_enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block);
 
-	curr = (ctx->pos - blkoff) / sizeof(union afs_dirent);
+	curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
 
 	/* walk through the block, an entry at a time */
-	for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries;
-	     offset < AFS_DIRENT_PER_BLOCK;
+	for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+	     offset < AFS_DIR_SLOTS_PER_BLOCK;
 	     offset = next
 	     ) {
 		next = offset + 1;
 
 		/* skip entries marked unused in the bitmap */
-		if (!(block->pagehdr.bitmap[offset / 8] &
+		if (!(block->hdr.bitmap[offset / 8] &
 		      (1 << (offset % 8)))) {
 			_debug("ENT[%zu.%u]: unused",
-			       blkoff / sizeof(union afs_dir_block), offset);
+			       blkoff / sizeof(union afs_xdr_dir_block), offset);
 			if (offset >= curr)
 				ctx->pos = blkoff +
-					next * sizeof(union afs_dirent);
+					next * sizeof(union afs_xdr_dirent);
 			continue;
 		}
 
@@ -291,34 +353,34 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
 		dire = &block->dirents[offset];
 		nlen = strnlen(dire->u.name,
 			       sizeof(*block) -
-			       offset * sizeof(union afs_dirent));
+			       offset * sizeof(union afs_xdr_dirent));
 
 		_debug("ENT[%zu.%u]: %s %zu \"%s\"",
-		       blkoff / sizeof(union afs_dir_block), offset,
+		       blkoff / sizeof(union afs_xdr_dir_block), offset,
 		       (offset < curr ? "skip" : "fill"),
 		       nlen, dire->u.name);
 
 		/* work out where the next possible entry is */
-		for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_dirent)) {
-			if (next >= AFS_DIRENT_PER_BLOCK) {
+		for (tmp = nlen; tmp > 15; tmp -= sizeof(union afs_xdr_dirent)) {
+			if (next >= AFS_DIR_SLOTS_PER_BLOCK) {
 				_debug("ENT[%zu.%u]:"
 				       " %u travelled beyond end dir block"
 				       " (len %u/%zu)",
-				       blkoff / sizeof(union afs_dir_block),
+				       blkoff / sizeof(union afs_xdr_dir_block),
 				       offset, next, tmp, nlen);
 				return -EIO;
 			}
-			if (!(block->pagehdr.bitmap[next / 8] &
+			if (!(block->hdr.bitmap[next / 8] &
 			      (1 << (next % 8)))) {
 				_debug("ENT[%zu.%u]:"
 				       " %u unmarked extension (len %u/%zu)",
-				       blkoff / sizeof(union afs_dir_block),
+				       blkoff / sizeof(union afs_xdr_dir_block),
 				       offset, next, tmp, nlen);
 				return -EIO;
 			}
 
 			_debug("ENT[%zu.%u]: ext %u/%zu",
-			       blkoff / sizeof(union afs_dir_block),
+			       blkoff / sizeof(union afs_xdr_dir_block),
 			       next, tmp, nlen);
 			next++;
 		}
@@ -330,13 +392,14 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
 		/* found the next entry */
 		if (!dir_emit(ctx, dire->u.name, nlen,
 			      ntohl(dire->u.vnode),
-			      ctx->actor == afs_lookup_filldir ?
+			      (ctx->actor == afs_lookup_filldir ||
+			       ctx->actor == afs_lookup_one_filldir)?
 			      ntohl(dire->u.unique) : DT_UNKNOWN)) {
 			_leave(" = 0 [full]");
 			return 0;
 		}
 
-		ctx->pos = blkoff + next * sizeof(union afs_dirent);
+		ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
 	}
 
 	_leave(" = 1 [more]");
@@ -349,8 +412,10 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
 static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 			   struct key *key)
 {
-	union afs_dir_block *dblock;
-	struct afs_dir_page *dbuf;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct afs_xdr_dir_page *dbuf;
+	union afs_xdr_dir_block *dblock;
+	struct afs_read *req;
 	struct page *page;
 	unsigned blkoff, limit;
 	int ret;
@@ -362,45 +427,53 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 		return -ESTALE;
 	}
 
+	req = afs_read_dir(dvnode, key);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
 	/* round the file position up to the next entry boundary */
-	ctx->pos += sizeof(union afs_dirent) - 1;
-	ctx->pos &= ~(sizeof(union afs_dirent) - 1);
+	ctx->pos += sizeof(union afs_xdr_dirent) - 1;
+	ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1);
 
 	/* walk through the blocks in sequence */
 	ret = 0;
-	while (ctx->pos < dir->i_size) {
-		blkoff = ctx->pos & ~(sizeof(union afs_dir_block) - 1);
+	while (ctx->pos < req->actual_len) {
+		blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
 
-		/* fetch the appropriate page from the directory */
-		page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key);
-		if (IS_ERR(page)) {
-			ret = PTR_ERR(page);
+		/* Fetch the appropriate page from the directory and re-add it
+		 * to the LRU.
+		 */
+		page = req->pages[blkoff / PAGE_SIZE];
+		if (!page) {
+			ret = -EIO;
 			break;
 		}
+		mark_page_accessed(page);
 
 		limit = blkoff & ~(PAGE_SIZE - 1);
 
-		dbuf = page_address(page);
+		dbuf = kmap(page);
 
 		/* deal with the individual blocks stashed on this page */
 		do {
 			dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
-					       sizeof(union afs_dir_block)];
+					       sizeof(union afs_xdr_dir_block)];
 			ret = afs_dir_iterate_block(ctx, dblock, blkoff);
 			if (ret != 1) {
-				afs_dir_put_page(page);
+				kunmap(page);
 				goto out;
 			}
 
-			blkoff += sizeof(union afs_dir_block);
+			blkoff += sizeof(union afs_xdr_dir_block);
 
 		} while (ctx->pos < dir->i_size && blkoff < limit);
 
-		afs_dir_put_page(page);
+		kunmap(page);
 		ret = 0;
 	}
 
 out:
+	afs_put_read(req);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -414,23 +487,23 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
 }
 
 /*
- * search the directory for a name
+ * Search the directory for a single name
  * - if afs_dir_iterate_block() spots this function, it'll pass the FID
  *   uniquifier through dtype
  */
-static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
-			      int nlen, loff_t fpos, u64 ino, unsigned dtype)
+static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
+				  int nlen, loff_t fpos, u64 ino, unsigned dtype)
 {
-	struct afs_lookup_cookie *cookie =
-		container_of(ctx, struct afs_lookup_cookie, ctx);
+	struct afs_lookup_one_cookie *cookie =
+		container_of(ctx, struct afs_lookup_one_cookie, ctx);
 
 	_enter("{%s,%u},%s,%u,,%llu,%u",
 	       cookie->name.name, cookie->name.len, name, nlen,
 	       (unsigned long long) ino, dtype);
 
 	/* insanity checks first */
-	BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048);
-	BUILD_BUG_ON(sizeof(union afs_dirent) != 32);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
 
 	if (cookie->name.len != nlen ||
 	    memcmp(cookie->name.name, name, nlen) != 0) {
@@ -447,15 +520,15 @@ static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
 }
 
 /*
- * do a lookup in a directory
+ * Do a lookup of a single name in a directory
  * - just returns the FID the dentry name maps to if found
  */
-static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
-			 struct afs_fid *fid, struct key *key)
+static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
+			     struct afs_fid *fid, struct key *key)
 {
 	struct afs_super_info *as = dir->i_sb->s_fs_info;
-	struct afs_lookup_cookie cookie = {
-		.ctx.actor = afs_lookup_filldir,
+	struct afs_lookup_one_cookie cookie = {
+		.ctx.actor = afs_lookup_one_filldir,
 		.name = dentry->d_name,
 		.fid.vid = as->volume->vid
 	};
@@ -482,70 +555,265 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 /*
- * Probe to see if a cell may exist.  This prevents positive dentries from
- * being created unnecessarily.
+ * search the directory for a name
+ * - if afs_dir_iterate_block() spots this function, it'll pass the FID
+ *   uniquifier through dtype
  */
-static int afs_probe_cell_name(struct dentry *dentry)
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
+			      int nlen, loff_t fpos, u64 ino, unsigned dtype)
 {
-	struct afs_cell *cell;
-	const char *name = dentry->d_name.name;
-	size_t len = dentry->d_name.len;
+	struct afs_lookup_cookie *cookie =
+		container_of(ctx, struct afs_lookup_cookie, ctx);
 	int ret;
 
-	/* Names prefixed with a dot are R/W mounts. */
-	if (name[0] == '.') {
-		if (len == 1)
-			return -EINVAL;
-		name++;
-		len--;
-	}
+	_enter("{%s,%u},%s,%u,,%llu,%u",
+	       cookie->name.name, cookie->name.len, name, nlen,
+	       (unsigned long long) ino, dtype);
 
-	cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len);
-	if (!IS_ERR(cell)) {
-		afs_put_cell(afs_d2net(dentry), cell);
-		return 0;
+	/* insanity checks first */
+	BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
+	BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
+
+	if (cookie->found) {
+		if (cookie->nr_fids < 50) {
+			cookie->fids[cookie->nr_fids].vnode	= ino;
+			cookie->fids[cookie->nr_fids].unique	= dtype;
+			cookie->nr_fids++;
+		}
+	} else if (cookie->name.len == nlen &&
+		   memcmp(cookie->name.name, name, nlen) == 0) {
+		cookie->fids[0].vnode	= ino;
+		cookie->fids[0].unique	= dtype;
+		cookie->found = 1;
+		if (cookie->one_only)
+			return -1;
 	}
 
-	ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL);
-	if (ret == -ENODATA)
-		ret = -EDESTADDRREQ;
+	ret = cookie->nr_fids >= 50 ? -1 : 0;
+	_leave(" = %d", ret);
 	return ret;
 }
 
 /*
- * Try to auto mount the mountpoint with pseudo directory, if the autocell
- * operation is setted.
+ * Do a lookup in a directory.  We make use of bulk lookup to query a slew of
+ * files in one go and create inodes for them.  The inode of the file we were
+ * asked for is returned.
  */
-static struct inode *afs_try_auto_mntpt(struct dentry *dentry,
-					struct inode *dir, struct afs_fid *fid)
+static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
+				   struct key *key)
 {
-	struct afs_vnode *vnode = AFS_FS_I(dir);
-	struct inode *inode;
-	int ret = -ENOENT;
+	struct afs_lookup_cookie *cookie;
+	struct afs_cb_interest *cbi = NULL;
+	struct afs_super_info *as = dir->i_sb->s_fs_info;
+	struct afs_iget_data data;
+	struct afs_fs_cursor fc;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct inode *inode = NULL;
+	int ret, i;
 
-	_enter("%p{%pd}, {%x:%u}",
-	       dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+
+	cookie = kzalloc(sizeof(struct afs_lookup_cookie), GFP_KERNEL);
+	if (!cookie)
+		return ERR_PTR(-ENOMEM);
+
+	cookie->ctx.actor = afs_lookup_filldir;
+	cookie->name = dentry->d_name;
+	cookie->nr_fids = 1; /* slot 0 is saved for the fid we actually want */
+
+	read_seqlock_excl(&dvnode->cb_lock);
+	if (dvnode->cb_interest &&
+	    dvnode->cb_interest->server &&
+	    test_bit(AFS_SERVER_FL_NO_IBULK, &dvnode->cb_interest->server->flags))
+		cookie->one_only = true;
+	read_sequnlock_excl(&dvnode->cb_lock);
+
+	for (i = 0; i < 50; i++)
+		cookie->fids[i].vid = as->volume->vid;
+
+	/* search the directory */
+	ret = afs_dir_iterate(dir, &cookie->ctx, key);
+	if (ret < 0) {
+		inode = ERR_PTR(ret);
+		goto out;
+	}
 
-	if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+	inode = ERR_PTR(-ENOENT);
+	if (!cookie->found)
 		goto out;
 
-	ret = afs_probe_cell_name(dentry);
-	if (ret < 0)
+	/* Check to see if we already have an inode for the primary fid. */
+	data.volume = dvnode->volume;
+	data.fid = cookie->fids[0];
+	inode = ilookup5(dir->i_sb, cookie->fids[0].vnode, afs_iget5_test, &data);
+	if (inode)
 		goto out;
 
-	inode = afs_iget_pseudo_dir(dir->i_sb, false);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
+	/* Need space for examining all the selected files */
+	inode = ERR_PTR(-ENOMEM);
+	cookie->statuses = kcalloc(cookie->nr_fids, sizeof(struct afs_file_status),
+				   GFP_KERNEL);
+	if (!cookie->statuses)
 		goto out;
+
+	cookie->callbacks = kcalloc(cookie->nr_fids, sizeof(struct afs_callback),
+				    GFP_KERNEL);
+	if (!cookie->callbacks)
+		goto out_s;
+
+	/* Try FS.InlineBulkStatus first.  Abort codes for the individual
+	 * lookups contained therein are stored in the reply without aborting
+	 * the whole operation.
+	 */
+	if (cookie->one_only)
+		goto no_inline_bulk_status;
+
+	inode = ERR_PTR(-ERESTARTSYS);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			if (test_bit(AFS_SERVER_FL_NO_IBULK,
+				      &fc.cbi->server->flags)) {
+				fc.ac.abort_code = RX_INVALID_OPERATION;
+				fc.ac.error = -ECONNABORTED;
+				break;
+			}
+			afs_fs_inline_bulk_status(&fc,
+						  afs_v2net(dvnode),
+						  cookie->fids,
+						  cookie->statuses,
+						  cookie->callbacks,
+						  cookie->nr_fids, NULL);
+		}
+
+		if (fc.ac.error == 0)
+			cbi = afs_get_cb_interest(fc.cbi);
+		if (fc.ac.abort_code == RX_INVALID_OPERATION)
+			set_bit(AFS_SERVER_FL_NO_IBULK, &fc.cbi->server->flags);
+		inode = ERR_PTR(afs_end_vnode_operation(&fc));
 	}
 
-	*fid = AFS_FS_I(inode)->fid;
-	_leave("= %p", inode);
-	return inode;
+	if (!IS_ERR(inode))
+		goto success;
+	if (fc.ac.abort_code != RX_INVALID_OPERATION)
+		goto out_c;
+
+no_inline_bulk_status:
+	/* We could try FS.BulkStatus next, but this aborts the entire op if
+	 * any of the lookups fails - so, for the moment, revert to
+	 * FS.FetchStatus for just the primary fid.
+	 */
+	cookie->nr_fids = 1;
+	inode = ERR_PTR(-ERESTARTSYS);
+	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
+		while (afs_select_fileserver(&fc)) {
+			afs_fs_fetch_status(&fc,
+					    afs_v2net(dvnode),
+					    cookie->fids,
+					    cookie->statuses,
+					    cookie->callbacks,
+					    NULL);
+		}
+
+		if (fc.ac.error == 0)
+			cbi = afs_get_cb_interest(fc.cbi);
+		inode = ERR_PTR(afs_end_vnode_operation(&fc));
+	}
 
+	if (IS_ERR(inode))
+		goto out_c;
+
+	for (i = 0; i < cookie->nr_fids; i++)
+		cookie->statuses[i].abort_code = 0;
+
+success:
+	/* Turn all the files into inodes and save the first one - which is the
+	 * one we actually want.
+	 */
+	if (cookie->statuses[0].abort_code != 0)
+		inode = ERR_PTR(afs_abort_to_error(cookie->statuses[0].abort_code));
+
+	for (i = 0; i < cookie->nr_fids; i++) {
+		struct inode *ti;
+
+		if (cookie->statuses[i].abort_code != 0)
+			continue;
+
+		ti = afs_iget(dir->i_sb, key, &cookie->fids[i],
+			      &cookie->statuses[i],
+			      &cookie->callbacks[i],
+			      cbi);
+		if (i == 0) {
+			inode = ti;
+		} else {
+			if (!IS_ERR(ti))
+				iput(ti);
+		}
+	}
+
+out_c:
+	afs_put_cb_interest(afs_v2net(dvnode), cbi);
+	kfree(cookie->callbacks);
+out_s:
+	kfree(cookie->statuses);
 out:
-	_leave("= %d", ret);
-	return ERR_PTR(ret);
+	kfree(cookie);
+	return inode;
+}
+
+/*
+ * Look up an entry in a directory with @sys substitution.
+ */
+static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
+				       struct key *key)
+{
+	struct afs_sysnames *subs;
+	struct afs_net *net = afs_i2net(dir);
+	struct dentry *ret;
+	char *buf, *p, *name;
+	int len, i;
+
+	_enter("");
+
+	ret = ERR_PTR(-ENOMEM);
+	p = buf = kmalloc(AFSNAMEMAX, GFP_KERNEL);
+	if (!buf)
+		goto out_p;
+	if (dentry->d_name.len > 4) {
+		memcpy(p, dentry->d_name.name, dentry->d_name.len - 4);
+		p += dentry->d_name.len - 4;
+	}
+
+	/* There is an ordered list of substitutes that we have to try. */
+	read_lock(&net->sysnames_lock);
+	subs = net->sysnames;
+	refcount_inc(&subs->usage);
+	read_unlock(&net->sysnames_lock);
+
+	for (i = 0; i < subs->nr; i++) {
+		name = subs->subs[i];
+		len = dentry->d_name.len - 4 + strlen(name);
+		if (len >= AFSNAMEMAX) {
+			ret = ERR_PTR(-ENAMETOOLONG);
+			goto out_s;
+		}
+
+		strcpy(p, name);
+		ret = lookup_one_len(buf, dentry->d_parent, len);
+		if (IS_ERR(ret) || d_is_positive(ret))
+			goto out_s;
+		dput(ret);
+	}
+
+	/* We don't want to d_add() the @sys dentry here as we don't want to
+	 * the cached dentry to hide changes to the sysnames list.
+	 */
+	ret = NULL;
+out_s:
+	afs_put_sysnames(subs);
+	kfree(buf);
+out_p:
+	key_put(key);
+	return ret;
 }
 
 /*
@@ -554,16 +822,13 @@ out:
 static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 				 unsigned int flags)
 {
-	struct afs_vnode *vnode;
-	struct afs_fid fid;
+	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct inode *inode;
 	struct key *key;
 	int ret;
 
-	vnode = AFS_FS_I(dir);
-
 	_enter("{%x:%u},%p{%pd},",
-	       vnode->fid.vid, vnode->fid.vnode, dentry, dentry);
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry);
 
 	ASSERTCMP(d_inode(dentry), ==, NULL);
 
@@ -572,28 +837,37 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 		return ERR_PTR(-ENAMETOOLONG);
 	}
 
-	if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+	if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) {
 		_leave(" = -ESTALE");
 		return ERR_PTR(-ESTALE);
 	}
 
-	key = afs_request_key(vnode->volume->cell);
+	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		_leave(" = %ld [key]", PTR_ERR(key));
 		return ERR_CAST(key);
 	}
 
-	ret = afs_validate(vnode, key);
+	ret = afs_validate(dvnode, key);
 	if (ret < 0) {
 		key_put(key);
 		_leave(" = %d [val]", ret);
 		return ERR_PTR(ret);
 	}
 
-	ret = afs_do_lookup(dir, dentry, &fid, key);
-	if (ret < 0) {
+	if (dentry->d_name.len >= 4 &&
+	    dentry->d_name.name[dentry->d_name.len - 4] == '@' &&
+	    dentry->d_name.name[dentry->d_name.len - 3] == 's' &&
+	    dentry->d_name.name[dentry->d_name.len - 2] == 'y' &&
+	    dentry->d_name.name[dentry->d_name.len - 1] == 's')
+		return afs_lookup_atsys(dir, dentry, key);
+
+	afs_stat_v(dvnode, n_lookup);
+	inode = afs_do_lookup(dir, dentry, key);
+	if (IS_ERR(inode)) {
+		ret = PTR_ERR(inode);
 		if (ret == -ENOENT) {
-			inode = afs_try_auto_mntpt(dentry, dir, &fid);
+			inode = afs_try_auto_mntpt(dentry, dir);
 			if (!IS_ERR(inode)) {
 				key_put(key);
 				goto success;
@@ -611,10 +885,9 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 		_leave(" = %d [do]", ret);
 		return ERR_PTR(ret);
 	}
-	dentry->d_fsdata = (void *)(unsigned long) vnode->status.data_version;
+	dentry->d_fsdata = (void *)(unsigned long)dvnode->status.data_version;
 
 	/* instantiate the dentry */
-	inode = afs_iget(dir->i_sb, key, &fid, NULL, NULL, NULL);
 	key_put(key);
 	if (IS_ERR(inode)) {
 		_leave(" = %ld", PTR_ERR(inode));
@@ -623,9 +896,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 
 success:
 	d_add(dentry, inode);
-	_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
-	       fid.vnode,
-	       fid.unique,
+	_leave(" = 0 { ino=%lu v=%u }",
 	       d_inode(dentry)->i_ino,
 	       d_inode(dentry)->i_generation);
 
@@ -633,67 +904,23 @@ success:
 }
 
 /*
- * Look up an entry in a dynroot directory.
- */
-static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
-					 unsigned int flags)
-{
-	struct afs_vnode *vnode;
-	struct afs_fid fid;
-	struct inode *inode;
-	int ret;
-
-	vnode = AFS_FS_I(dir);
-
-	_enter("%pd", dentry);
-
-	ASSERTCMP(d_inode(dentry), ==, NULL);
-
-	if (dentry->d_name.len >= AFSNAMEMAX) {
-		_leave(" = -ENAMETOOLONG");
-		return ERR_PTR(-ENAMETOOLONG);
-	}
-
-	inode = afs_try_auto_mntpt(dentry, dir, &fid);
-	if (IS_ERR(inode)) {
-		ret = PTR_ERR(inode);
-		if (ret == -ENOENT) {
-			d_add(dentry, NULL);
-			_leave(" = NULL [negative]");
-			return NULL;
-		}
-		_leave(" = %d [do]", ret);
-		return ERR_PTR(ret);
-	}
-
-	d_add(dentry, inode);
-	_leave(" = 0 { ino=%lu v=%u }",
-	       d_inode(dentry)->i_ino, d_inode(dentry)->i_generation);
-	return NULL;
-}
-
-/*
  * check that a dentry lookup hit has found a valid entry
  * - NOTE! the hit can be a negative hit too, so we can't assume we have an
  *   inode
  */
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
-	struct afs_super_info *as = dentry->d_sb->s_fs_info;
 	struct afs_vnode *vnode, *dir;
 	struct afs_fid uninitialized_var(fid);
 	struct dentry *parent;
 	struct inode *inode;
 	struct key *key;
-	void *dir_version;
+	long dir_version, de_version;
 	int ret;
 
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	if (as->dyn_root)
-		return 1;
-
 	if (d_really_is_positive(dentry)) {
 		vnode = AFS_FS_I(d_inode(dentry));
 		_enter("{v={%x:%u} n=%pd fl=%lx},",
@@ -729,14 +956,25 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		goto out_bad_parent;
 	}
 
-	dir_version = (void *) (unsigned long) dir->status.data_version;
-	if (dentry->d_fsdata == dir_version)
-		goto out_valid; /* the dir contents are unchanged */
+	/* We only need to invalidate a dentry if the server's copy changed
+	 * behind our back.  If we made the change, it's no problem.  Note that
+	 * on a 32-bit system, we only have 32 bits in the dentry to store the
+	 * version.
+	 */
+	dir_version = (long)dir->status.data_version;
+	de_version = (long)dentry->d_fsdata;
+	if (de_version == dir_version)
+		goto out_valid;
+
+	dir_version = (long)dir->invalid_before;
+	if (de_version - dir_version >= 0)
+		goto out_valid;
 
 	_debug("dir modified");
+	afs_stat_v(dir, n_reval);
 
 	/* search the directory for this vnode */
-	ret = afs_do_lookup(&dir->vfs_inode, dentry, &fid, key);
+	ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key);
 	switch (ret) {
 	case 0:
 		/* the filename maps to something */
@@ -789,7 +1027,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	}
 
 out_valid:
-	dentry->d_fsdata = dir_version;
+	dentry->d_fsdata = (void *)dir_version;
 	dput(parent);
 	key_put(key);
 	_leave(" = 1 [valid]");
@@ -840,7 +1078,7 @@ zap:
 /*
  * handle dentry release
  */
-static void afs_d_release(struct dentry *dentry)
+void afs_d_release(struct dentry *dentry)
 {
 	_enter("%pd", dentry);
 }
@@ -854,6 +1092,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 				struct afs_file_status *newstatus,
 				struct afs_callback *newcb)
 {
+	struct afs_vnode *vnode;
 	struct inode *inode;
 
 	if (fc->ac.error < 0)
@@ -871,6 +1110,8 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 		return;
 	}
 
+	vnode = AFS_FS_I(inode);
+	set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
 	d_add(new_dentry, inode);
 }
 
@@ -885,6 +1126,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct afs_fid newfid;
 	struct key *key;
+	u64 data_version = dvnode->status.data_version;
 	int ret;
 
 	mode |= S_IFDIR;
@@ -902,7 +1144,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
-			afs_fs_create(&fc, dentry->d_name.name, mode,
+			afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
 				      &newfid, &newstatus, &newcb);
 		}
 
@@ -916,6 +1158,11 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		goto error_key;
 	}
 
+	if (ret == 0 &&
+	    test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+		afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+				 afs_edit_dir_for_create);
+
 	key_put(key);
 	_leave(" = 0");
 	return 0;
@@ -939,6 +1186,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
 		clear_nlink(&vnode->vfs_inode);
 		set_bit(AFS_VNODE_DELETED, &vnode->flags);
 		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
 	}
 }
 
@@ -950,6 +1198,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 	struct afs_fs_cursor fc;
 	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct key *key;
+	u64 data_version = dvnode->status.data_version;
 	int ret;
 
 	_enter("{%x:%u},{%pd}",
@@ -965,13 +1214,18 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
-			afs_fs_remove(&fc, dentry->d_name.name, true);
+			afs_fs_remove(&fc, dentry->d_name.name, true,
+				      data_version);
 		}
 
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
 		ret = afs_end_vnode_operation(&fc);
-		if (ret == 0)
+		if (ret == 0) {
 			afs_dir_remove_subdir(dentry);
+			if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+				afs_edit_dir_remove(dvnode, &dentry->d_name,
+						    afs_edit_dir_for_rmdir);
+		}
 	}
 
 	key_put(key);
@@ -1036,6 +1290,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
 	struct key *key;
 	unsigned long d_version = (unsigned long)dentry->d_fsdata;
+	u64 data_version = dvnode->status.data_version;
 	int ret;
 
 	_enter("{%x:%u},{%pd}",
@@ -1062,7 +1317,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
-			afs_fs_remove(&fc, dentry->d_name.name, false);
+			afs_fs_remove(&fc, dentry->d_name.name, false,
+				      data_version);
 		}
 
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
@@ -1071,6 +1327,10 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 			ret = afs_dir_remove_link(
 				dentry, key, d_version,
 				(unsigned long)dvnode->status.data_version);
+		if (ret == 0 &&
+		    test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+			afs_edit_dir_remove(dvnode, &dentry->d_name,
+					    afs_edit_dir_for_unlink);
 	}
 
 error_key:
@@ -1092,6 +1352,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct afs_fid newfid;
 	struct key *key;
+	u64 data_version = dvnode->status.data_version;
 	int ret;
 
 	mode |= S_IFREG;
@@ -1113,7 +1374,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
-			afs_fs_create(&fc, dentry->d_name.name, mode,
+			afs_fs_create(&fc, dentry->d_name.name, mode, data_version,
 				      &newfid, &newstatus, &newcb);
 		}
 
@@ -1127,6 +1388,10 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		goto error_key;
 	}
 
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+		afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+				 afs_edit_dir_for_create);
+
 	key_put(key);
 	_leave(" = 0");
 	return 0;
@@ -1148,10 +1413,12 @@ static int afs_link(struct dentry *from, struct inode *dir,
 	struct afs_fs_cursor fc;
 	struct afs_vnode *dvnode, *vnode;
 	struct key *key;
+	u64 data_version;
 	int ret;
 
 	vnode = AFS_FS_I(d_inode(from));
 	dvnode = AFS_FS_I(dir);
+	data_version = dvnode->status.data_version;
 
 	_enter("{%x:%u},{%x:%u},{%pd}",
 	       vnode->fid.vid, vnode->fid.vnode,
@@ -1178,7 +1445,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
 			fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break;
-			afs_fs_link(&fc, vnode, dentry->d_name.name);
+			afs_fs_link(&fc, vnode, dentry->d_name.name, data_version);
 		}
 
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
@@ -1194,6 +1461,10 @@ static int afs_link(struct dentry *from, struct inode *dir,
 		goto error_key;
 	}
 
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+		afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid,
+				 afs_edit_dir_for_link);
+
 	key_put(key);
 	_leave(" = 0");
 	return 0;
@@ -1217,6 +1488,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct afs_fid newfid;
 	struct key *key;
+	u64 data_version = dvnode->status.data_version;
 	int ret;
 
 	_enter("{%x:%u},{%pd},%s",
@@ -1241,7 +1513,8 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = dvnode->cb_break + dvnode->cb_s_break;
-			afs_fs_symlink(&fc, dentry->d_name.name, content,
+			afs_fs_symlink(&fc, dentry->d_name.name,
+				       content, data_version,
 				       &newfid, &newstatus);
 		}
 
@@ -1255,6 +1528,10 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 		goto error_key;
 	}
 
+	if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+		afs_edit_dir_add(dvnode, &dentry->d_name, &newfid,
+				 afs_edit_dir_for_symlink);
+
 	key_put(key);
 	_leave(" = 0");
 	return 0;
@@ -1277,6 +1554,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct afs_fs_cursor fc;
 	struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
 	struct key *key;
+	u64 orig_data_version, new_data_version;
+	bool new_negative = d_is_negative(new_dentry);
 	int ret;
 
 	if (flags)
@@ -1285,6 +1564,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	vnode = AFS_FS_I(d_inode(old_dentry));
 	orig_dvnode = AFS_FS_I(old_dir);
 	new_dvnode = AFS_FS_I(new_dir);
+	orig_data_version = orig_dvnode->status.data_version;
+	new_data_version = new_dvnode->status.data_version;
 
 	_enter("{%x:%u},{%x:%u},{%x:%u},{%pd}",
 	       orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
@@ -1310,7 +1591,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break;
 			fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break;
 			afs_fs_rename(&fc, old_dentry->d_name.name,
-				      new_dvnode, new_dentry->d_name.name);
+				      new_dvnode, new_dentry->d_name.name,
+				      orig_data_version, new_data_version);
 		}
 
 		afs_vnode_commit_status(&fc, orig_dvnode, fc.cb_break);
@@ -1322,9 +1604,68 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			goto error_key;
 	}
 
+	if (ret == 0) {
+		if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags))
+		    afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
+					afs_edit_dir_for_rename);
+
+		if (!new_negative &&
+		    test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
+			afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
+					    afs_edit_dir_for_rename);
+
+		if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
+			afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
+					 &vnode->fid,  afs_edit_dir_for_rename);
+	}
+
 error_key:
 	key_put(key);
 error:
 	_leave(" = %d", ret);
 	return ret;
 }
+
+/*
+ * Release a directory page and clean up its private state if it's not busy
+ * - return true if the page can now be released, false if not
+ */
+static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
+
+	_enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
+
+	set_page_private(page, 0);
+	ClearPagePrivate(page);
+
+	/* The directory will need reloading. */
+	if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+		afs_stat_v(dvnode, n_relpg);
+	return 1;
+}
+
+/*
+ * invalidate part or all of a page
+ * - release a page and clean up its private data if offset is 0 (indicating
+ *   the entire page)
+ */
+static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
+				   unsigned int length)
+{
+	struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
+
+	_enter("{%lu},%u,%u", page->index, offset, length);
+
+	BUG_ON(!PageLocked(page));
+
+	/* The directory will need reloading. */
+	if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+		afs_stat_v(dvnode, n_inval);
+
+	/* we clean up only if the entire page is being invalidated */
+	if (offset == 0 && length == PAGE_SIZE) {
+		set_page_private(page, 0);
+		ClearPagePrivate(page);
+	}
+}
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
new file mode 100644
index 000000000000..8b400f5aead5
--- /dev/null
+++ b/fs/afs/dir_edit.c
@@ -0,0 +1,505 @@
+/* AFS filesystem directory editing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "xdr_fs.h"
+
+/*
+ * Find a number of contiguous clear bits in a directory block bitmask.
+ *
+ * There are 64 slots, which means we can load the entire bitmap into a
+ * variable.  The first bit doesn't count as it corresponds to the block header
+ * slot.  nr_slots is between 1 and 9.
+ */
+static int afs_find_contig_bits(union afs_xdr_dir_block *block, unsigned int nr_slots)
+{
+	u64 bitmap;
+	u32 mask;
+	int bit, n;
+
+	bitmap  = (u64)block->hdr.bitmap[0] << 0 * 8;
+	bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8;
+	bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8;
+	bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8;
+	bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8;
+	bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8;
+	bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8;
+	bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8;
+	bitmap >>= 1; /* The first entry is metadata */
+	bit = 1;
+	mask = (1 << nr_slots) - 1;
+
+	do {
+		if (sizeof(unsigned long) == 8)
+			n = ffz(bitmap);
+		else
+			n = ((u32)bitmap) != 0 ?
+				ffz((u32)bitmap) :
+				ffz((u32)(bitmap >> 32)) + 32;
+		bitmap >>= n;
+		bit += n;
+
+		if ((bitmap & mask) == 0) {
+			if (bit > 64 - nr_slots)
+				return -1;
+			return bit;
+		}
+
+		n = __ffs(bitmap);
+		bitmap >>= n;
+		bit += n;
+	} while (bitmap);
+
+	return -1;
+}
+
+/*
+ * Set a number of contiguous bits in the directory block bitmap.
+ */
+static void afs_set_contig_bits(union afs_xdr_dir_block *block,
+				int bit, unsigned int nr_slots)
+{
+	u64 mask, before, after;
+
+	mask = (1 << nr_slots) - 1;
+	mask <<= bit;
+
+	before = *(u64 *)block->hdr.bitmap;
+
+	block->hdr.bitmap[0] |= (u8)(mask >> 0 * 8);
+	block->hdr.bitmap[1] |= (u8)(mask >> 1 * 8);
+	block->hdr.bitmap[2] |= (u8)(mask >> 2 * 8);
+	block->hdr.bitmap[3] |= (u8)(mask >> 3 * 8);
+	block->hdr.bitmap[4] |= (u8)(mask >> 4 * 8);
+	block->hdr.bitmap[5] |= (u8)(mask >> 5 * 8);
+	block->hdr.bitmap[6] |= (u8)(mask >> 6 * 8);
+	block->hdr.bitmap[7] |= (u8)(mask >> 7 * 8);
+
+	after = *(u64 *)block->hdr.bitmap;
+}
+
+/*
+ * Clear a number of contiguous bits in the directory block bitmap.
+ */
+static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
+				  int bit, unsigned int nr_slots)
+{
+	u64 mask, before, after;
+
+	mask = (1 << nr_slots) - 1;
+	mask <<= bit;
+
+	before = *(u64 *)block->hdr.bitmap;
+
+	block->hdr.bitmap[0] &= ~(u8)(mask >> 0 * 8);
+	block->hdr.bitmap[1] &= ~(u8)(mask >> 1 * 8);
+	block->hdr.bitmap[2] &= ~(u8)(mask >> 2 * 8);
+	block->hdr.bitmap[3] &= ~(u8)(mask >> 3 * 8);
+	block->hdr.bitmap[4] &= ~(u8)(mask >> 4 * 8);
+	block->hdr.bitmap[5] &= ~(u8)(mask >> 5 * 8);
+	block->hdr.bitmap[6] &= ~(u8)(mask >> 6 * 8);
+	block->hdr.bitmap[7] &= ~(u8)(mask >> 7 * 8);
+
+	after = *(u64 *)block->hdr.bitmap;
+}
+
+/*
+ * Scan a directory block looking for a dirent of the right name.
+ */
+static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
+			      unsigned int blocknum)
+{
+	union afs_xdr_dirent *de;
+	u64 bitmap;
+	int d, len, n;
+
+	_enter("");
+
+	bitmap  = (u64)block->hdr.bitmap[0] << 0 * 8;
+	bitmap |= (u64)block->hdr.bitmap[1] << 1 * 8;
+	bitmap |= (u64)block->hdr.bitmap[2] << 2 * 8;
+	bitmap |= (u64)block->hdr.bitmap[3] << 3 * 8;
+	bitmap |= (u64)block->hdr.bitmap[4] << 4 * 8;
+	bitmap |= (u64)block->hdr.bitmap[5] << 5 * 8;
+	bitmap |= (u64)block->hdr.bitmap[6] << 6 * 8;
+	bitmap |= (u64)block->hdr.bitmap[7] << 7 * 8;
+
+	for (d = (blocknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+	     d < AFS_DIR_SLOTS_PER_BLOCK;
+	     d++) {
+		if (!((bitmap >> d) & 1))
+			continue;
+		de = &block->dirents[d];
+		if (de->u.valid != 1)
+			continue;
+
+		/* The block was NUL-terminated by afs_dir_check_page(). */
+		len = strlen(de->u.name);
+		if (len == name->len &&
+		    memcmp(de->u.name, name->name, name->len) == 0)
+			return d;
+
+		n = round_up(12 + len + 1 + 4, AFS_DIR_DIRENT_SIZE);
+		n /= AFS_DIR_DIRENT_SIZE;
+		d += n - 1;
+	}
+
+	return -1;
+}
+
+/*
+ * Initialise a new directory block.  Note that block 0 is special and contains
+ * some extra metadata.
+ */
+static void afs_edit_init_block(union afs_xdr_dir_block *meta,
+				union afs_xdr_dir_block *block, int block_num)
+{
+	memset(block, 0, sizeof(*block));
+	block->hdr.npages = htons(1);
+	block->hdr.magic = AFS_DIR_MAGIC;
+	block->hdr.bitmap[0] = 1;
+
+	if (block_num == 0) {
+		block->hdr.bitmap[0] = 0xff;
+		block->hdr.bitmap[1] = 0x1f;
+		memset(block->meta.alloc_ctrs,
+		       AFS_DIR_SLOTS_PER_BLOCK,
+		       sizeof(block->meta.alloc_ctrs));
+		meta->meta.alloc_ctrs[0] =
+			AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS0;
+	}
+
+	if (block_num < AFS_DIR_BLOCKS_WITH_CTR)
+		meta->meta.alloc_ctrs[block_num] =
+			AFS_DIR_SLOTS_PER_BLOCK - AFS_DIR_RESV_BLOCKS;
+}
+
+/*
+ * Edit a directory's file data to add a new directory entry.  Doing this after
+ * create, mkdir, symlink, link or rename if the data version number is
+ * incremented by exactly one avoids the need to re-download the entire
+ * directory contents.
+ *
+ * The caller must hold the inode locked.
+ */
+void afs_edit_dir_add(struct afs_vnode *vnode,
+		      struct qstr *name, struct afs_fid *new_fid,
+		      enum afs_edit_dir_reason why)
+{
+	union afs_xdr_dir_block *meta, *block;
+	struct afs_xdr_dir_page *meta_page, *dir_page;
+	union afs_xdr_dirent *de;
+	struct page *page0, *page;
+	unsigned int need_slots, nr_blocks, b;
+	pgoff_t index;
+	loff_t i_size;
+	gfp_t gfp;
+	int slot;
+
+	_enter(",,{%d,%s},", name->len, name->name);
+
+	i_size = i_size_read(&vnode->vfs_inode);
+	if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
+	    (i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
+		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+		return;
+	}
+
+	gfp = vnode->vfs_inode.i_mapping->gfp_mask;
+	page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp);
+	if (!page0) {
+		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+		_leave(" [fgp]");
+		return;
+	}
+
+	/* Work out how many slots we're going to need. */
+	need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE);
+	need_slots /= AFS_DIR_DIRENT_SIZE;
+
+	meta_page = kmap(page0);
+	meta = &meta_page->blocks[0];
+	if (i_size == 0)
+		goto new_directory;
+	nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+	/* Find a block that has sufficient slots available.  Each VM page
+	 * contains two or more directory blocks.
+	 */
+	for (b = 0; b < nr_blocks + 1; b++) {
+		/* If the directory extended into a new page, then we need to
+		 * tack a new page on the end.
+		 */
+		index = b / AFS_DIR_BLOCKS_PER_PAGE;
+		if (index == 0) {
+			page = page0;
+			dir_page = meta_page;
+		} else {
+			if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
+				goto error;
+			gfp = vnode->vfs_inode.i_mapping->gfp_mask;
+			page = find_or_create_page(vnode->vfs_inode.i_mapping,
+						   index, gfp);
+			if (!page)
+				goto error;
+			if (!PagePrivate(page)) {
+				set_page_private(page, 1);
+				SetPagePrivate(page);
+			}
+			dir_page = kmap(page);
+		}
+
+		/* Abandon the edit if we got a callback break. */
+		if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+			goto invalidated;
+
+		block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
+
+		_debug("block %u: %2u %3u %u",
+		       b,
+		       (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99,
+		       ntohs(block->hdr.npages),
+		       ntohs(block->hdr.magic));
+
+		/* Initialise the block if necessary. */
+		if (b == nr_blocks) {
+			_debug("init %u", b);
+			afs_edit_init_block(meta, block, b);
+			i_size_write(&vnode->vfs_inode, (b + 1) * AFS_DIR_BLOCK_SIZE);
+		}
+
+		/* Only lower dir pages have a counter in the header. */
+		if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
+		    meta->meta.alloc_ctrs[b] >= need_slots) {
+			/* We need to try and find one or more consecutive
+			 * slots to hold the entry.
+			 */
+			slot = afs_find_contig_bits(block, need_slots);
+			if (slot >= 0) {
+				_debug("slot %u", slot);
+				goto found_space;
+			}
+		}
+
+		if (page != page0) {
+			unlock_page(page);
+			kunmap(page);
+			put_page(page);
+		}
+	}
+
+	/* There are no spare slots of sufficient size, yet the operation
+	 * succeeded.  Download the directory again.
+	 */
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create_nospc, 0, 0, 0, 0, name->name);
+	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	goto out_unmap;
+
+new_directory:
+	afs_edit_init_block(meta, meta, 0);
+	i_size = AFS_DIR_BLOCK_SIZE;
+	i_size_write(&vnode->vfs_inode, i_size);
+	slot = AFS_DIR_RESV_BLOCKS0;
+	page = page0;
+	block = meta;
+	nr_blocks = 1;
+	b = 0;
+
+found_space:
+	/* Set the dirent slot. */
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create, b, slot,
+			   new_fid->vnode, new_fid->unique, name->name);
+	de = &block->dirents[slot];
+	de->u.valid	= 1;
+	de->u.unused[0]	= 0;
+	de->u.hash_next	= 0; // TODO: Really need to maintain this
+	de->u.vnode	= htonl(new_fid->vnode);
+	de->u.unique	= htonl(new_fid->unique);
+	memcpy(de->u.name, name->name, name->len + 1);
+	de->u.name[name->len] = 0;
+
+	/* Adjust the bitmap. */
+	afs_set_contig_bits(block, slot, need_slots);
+	if (page != page0) {
+		unlock_page(page);
+		kunmap(page);
+		put_page(page);
+	}
+
+	/* Adjust the allocation counter. */
+	if (b < AFS_DIR_BLOCKS_WITH_CTR)
+		meta->meta.alloc_ctrs[b] -= need_slots;
+
+	inode_inc_iversion_raw(&vnode->vfs_inode);
+	afs_stat_v(vnode, n_dir_cr);
+	_debug("Insert %s in %u[%u]", name->name, b, slot);
+
+out_unmap:
+	unlock_page(page0);
+	kunmap(page0);
+	put_page(page0);
+	_leave("");
+	return;
+
+invalidated:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
+	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	if (page != page0) {
+		kunmap(page);
+		put_page(page);
+	}
+	goto out_unmap;
+
+error:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_create_error, 0, 0, 0, 0, name->name);
+	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	goto out_unmap;
+}
+
+/*
+ * Edit a directory's file data to remove a new directory entry.  Doing this
+ * after unlink, rmdir or rename if the data version number is incremented by
+ * exactly one avoids the need to re-download the entire directory contents.
+ *
+ * The caller must hold the inode locked.
+ */
+void afs_edit_dir_remove(struct afs_vnode *vnode,
+			 struct qstr *name, enum afs_edit_dir_reason why)
+{
+	struct afs_xdr_dir_page *meta_page, *dir_page;
+	union afs_xdr_dir_block *meta, *block;
+	union afs_xdr_dirent *de;
+	struct page *page0, *page;
+	unsigned int need_slots, nr_blocks, b;
+	pgoff_t index;
+	loff_t i_size;
+	int slot;
+
+	_enter(",,{%d,%s},", name->len, name->name);
+
+	i_size = i_size_read(&vnode->vfs_inode);
+	if (i_size < AFS_DIR_BLOCK_SIZE ||
+	    i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
+	    (i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
+		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+		return;
+	}
+	nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+	page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0);
+	if (!page0) {
+		clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+		_leave(" [fgp]");
+		return;
+	}
+
+	/* Work out how many slots we're going to discard. */
+	need_slots = round_up(12 + name->len + 1 + 4, AFS_DIR_DIRENT_SIZE);
+	need_slots /= AFS_DIR_DIRENT_SIZE;
+
+	meta_page = kmap(page0);
+	meta = &meta_page->blocks[0];
+
+	/* Find a page that has sufficient slots available.  Each VM page
+	 * contains two or more directory blocks.
+	 */
+	for (b = 0; b < nr_blocks; b++) {
+		index = b / AFS_DIR_BLOCKS_PER_PAGE;
+		if (index != 0) {
+			page = find_lock_page(vnode->vfs_inode.i_mapping, index);
+			if (!page)
+				goto error;
+			dir_page = kmap(page);
+		} else {
+			page = page0;
+			dir_page = meta_page;
+		}
+
+		/* Abandon the edit if we got a callback break. */
+		if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+			goto invalidated;
+
+		block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
+
+		if (b > AFS_DIR_BLOCKS_WITH_CTR ||
+		    meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
+			slot = afs_dir_scan_block(block, name, b);
+			if (slot >= 0)
+				goto found_dirent;
+		}
+
+		if (page != page0) {
+			unlock_page(page);
+			kunmap(page);
+			put_page(page);
+		}
+	}
+
+	/* Didn't find the dirent to clobber.  Download the directory again. */
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_noent,
+			   0, 0, 0, 0, name->name);
+	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	goto out_unmap;
+
+found_dirent:
+	de = &block->dirents[slot];
+
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete, b, slot,
+			   ntohl(de->u.vnode), ntohl(de->u.unique),
+			   name->name);
+
+	memset(de, 0, sizeof(*de) * need_slots);
+
+	/* Adjust the bitmap. */
+	afs_clear_contig_bits(block, slot, need_slots);
+	if (page != page0) {
+		unlock_page(page);
+		kunmap(page);
+		put_page(page);
+	}
+
+	/* Adjust the allocation counter. */
+	if (b < AFS_DIR_BLOCKS_WITH_CTR)
+		meta->meta.alloc_ctrs[b] += need_slots;
+
+	inode_set_iversion_raw(&vnode->vfs_inode, vnode->status.data_version);
+	afs_stat_v(vnode, n_dir_rm);
+	_debug("Remove %s from %u[%u]", name->name, b, slot);
+
+out_unmap:
+	unlock_page(page0);
+	kunmap(page0);
+	put_page(page0);
+	_leave("");
+	return;
+
+invalidated:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
+			   0, 0, 0, 0, name->name);
+	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	if (page != page0) {
+		unlock_page(page);
+		kunmap(page);
+		put_page(page);
+	}
+	goto out_unmap;
+
+error:
+	trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_error,
+			   0, 0, 0, 0, name->name);
+	clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+	goto out_unmap;
+}
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
new file mode 100644
index 000000000000..983f3946ab57
--- /dev/null
+++ b/fs/afs/dynroot.c
@@ -0,0 +1,209 @@
+/* dir.c: AFS dynamic root handling
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/dns_resolver.h>
+#include "internal.h"
+
+const struct file_operations afs_dynroot_file_operations = {
+	.open		= dcache_dir_open,
+	.release	= dcache_dir_close,
+	.iterate_shared	= dcache_readdir,
+	.llseek		= dcache_dir_lseek,
+};
+
+/*
+ * Probe to see if a cell may exist.  This prevents positive dentries from
+ * being created unnecessarily.
+ */
+static int afs_probe_cell_name(struct dentry *dentry)
+{
+	struct afs_cell *cell;
+	const char *name = dentry->d_name.name;
+	size_t len = dentry->d_name.len;
+	int ret;
+
+	/* Names prefixed with a dot are R/W mounts. */
+	if (name[0] == '.') {
+		if (len == 1)
+			return -EINVAL;
+		name++;
+		len--;
+	}
+
+	cell = afs_lookup_cell_rcu(afs_d2net(dentry), name, len);
+	if (!IS_ERR(cell)) {
+		afs_put_cell(afs_d2net(dentry), cell);
+		return 0;
+	}
+
+	ret = dns_query("afsdb", name, len, "ipv4", NULL, NULL);
+	if (ret == -ENODATA)
+		ret = -EDESTADDRREQ;
+	return ret;
+}
+
+/*
+ * Try to auto mount the mountpoint with pseudo directory, if the autocell
+ * operation is setted.
+ */
+struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
+{
+	struct afs_vnode *vnode = AFS_FS_I(dir);
+	struct inode *inode;
+	int ret = -ENOENT;
+
+	_enter("%p{%pd}, {%x:%u}",
+	       dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
+
+	if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
+		goto out;
+
+	ret = afs_probe_cell_name(dentry);
+	if (ret < 0)
+		goto out;
+
+	inode = afs_iget_pseudo_dir(dir->i_sb, false);
+	if (IS_ERR(inode)) {
+		ret = PTR_ERR(inode);
+		goto out;
+	}
+
+	_leave("= %p", inode);
+	return inode;
+
+out:
+	_leave("= %d", ret);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Look up @cell in a dynroot directory.  This is a substitution for the
+ * local cell name for the net namespace.
+ */
+static struct dentry *afs_lookup_atcell(struct dentry *dentry)
+{
+	struct afs_cell *cell;
+	struct afs_net *net = afs_d2net(dentry);
+	struct dentry *ret;
+	unsigned int seq = 0;
+	char *name;
+	int len;
+
+	if (!net->ws_cell)
+		return ERR_PTR(-ENOENT);
+
+	ret = ERR_PTR(-ENOMEM);
+	name = kmalloc(AFS_MAXCELLNAME + 1, GFP_KERNEL);
+	if (!name)
+		goto out_p;
+
+	rcu_read_lock();
+	do {
+		read_seqbegin_or_lock(&net->cells_lock, &seq);
+		cell = rcu_dereference_raw(net->ws_cell);
+		if (cell) {
+			len = cell->name_len;
+			memcpy(name, cell->name, len + 1);
+		}
+	} while (need_seqretry(&net->cells_lock, seq));
+	done_seqretry(&net->cells_lock, seq);
+	rcu_read_unlock();
+
+	ret = ERR_PTR(-ENOENT);
+	if (!cell)
+		goto out_n;
+
+	ret = lookup_one_len(name, dentry->d_parent, len);
+
+	/* We don't want to d_add() the @cell dentry here as we don't want to
+	 * the cached dentry to hide changes to the local cell name.
+	 */
+
+out_n:
+	kfree(name);
+out_p:
+	return ret;
+}
+
+/*
+ * Look up an entry in a dynroot directory.
+ */
+static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentry,
+					 unsigned int flags)
+{
+	struct afs_vnode *vnode;
+	struct inode *inode;
+	int ret;
+
+	vnode = AFS_FS_I(dir);
+
+	_enter("%pd", dentry);
+
+	ASSERTCMP(d_inode(dentry), ==, NULL);
+
+	if (dentry->d_name.len >= AFSNAMEMAX) {
+		_leave(" = -ENAMETOOLONG");
+		return ERR_PTR(-ENAMETOOLONG);
+	}
+
+	if (dentry->d_name.len == 5 &&
+	    memcmp(dentry->d_name.name, "@cell", 5) == 0)
+		return afs_lookup_atcell(dentry);
+
+	inode = afs_try_auto_mntpt(dentry, dir);
+	if (IS_ERR(inode)) {
+		ret = PTR_ERR(inode);
+		if (ret == -ENOENT) {
+			d_add(dentry, NULL);
+			_leave(" = NULL [negative]");
+			return NULL;
+		}
+		_leave(" = %d [do]", ret);
+		return ERR_PTR(ret);
+	}
+
+	d_add(dentry, inode);
+	_leave(" = 0 { ino=%lu v=%u }",
+	       d_inode(dentry)->i_ino, d_inode(dentry)->i_generation);
+	return NULL;
+}
+
+const struct inode_operations afs_dynroot_inode_operations = {
+	.lookup		= afs_dynroot_lookup,
+};
+
+/*
+ * Dirs in the dynamic root don't need revalidation.
+ */
+static int afs_dynroot_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	return 1;
+}
+
+/*
+ * Allow the VFS to enquire as to whether a dentry should be unhashed (mustn't
+ * sleep)
+ * - called from dput() when d_count is going to 0.
+ * - return 1 to request dentry be unhashed, 0 otherwise
+ */
+static int afs_dynroot_d_delete(const struct dentry *dentry)
+{
+	return d_really_is_positive(dentry);
+}
+
+const struct dentry_operations afs_dynroot_dentry_operations = {
+	.d_revalidate	= afs_dynroot_d_revalidate,
+	.d_delete	= afs_dynroot_d_delete,
+	.d_release	= afs_d_release,
+	.d_automount	= afs_d_automount,
+};
diff --git a/fs/afs/file.c b/fs/afs/file.c
index a39192ced99e..c24c08016dd9 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -30,7 +30,6 @@ static int afs_readpages(struct file *filp, struct address_space *mapping,
 
 const struct file_operations afs_file_operations = {
 	.open		= afs_open,
-	.flush		= afs_flush,
 	.release	= afs_release,
 	.llseek		= generic_file_llseek,
 	.read_iter	= generic_file_read_iter,
@@ -146,6 +145,9 @@ int afs_open(struct inode *inode, struct file *file)
 		if (ret < 0)
 			goto error_af;
 	}
+
+	if (file->f_flags & O_TRUNC)
+		set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
 	
 	file->private_data = af;
 	_leave(" = 0");
@@ -170,6 +172,9 @@ int afs_release(struct inode *inode, struct file *file)
 
 	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
 
+	if ((file->f_mode & FMODE_WRITE))
+		return vfs_fsync(file, 0);
+
 	file->private_data = NULL;
 	if (af->wb)
 		afs_put_wb_key(af->wb);
@@ -187,10 +192,12 @@ void afs_put_read(struct afs_read *req)
 {
 	int i;
 
-	if (atomic_dec_and_test(&req->usage)) {
+	if (refcount_dec_and_test(&req->usage)) {
 		for (i = 0; i < req->nr_pages; i++)
 			if (req->pages[i])
 				put_page(req->pages[i]);
+		if (req->pages != req->array)
+			kfree(req->pages);
 		kfree(req);
 	}
 }
@@ -240,6 +247,12 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
 		ret = afs_end_vnode_operation(&fc);
 	}
 
+	if (ret == 0) {
+		afs_stat_v(vnode, n_fetches);
+		atomic_long_add(desc->actual_len,
+				&afs_v2net(vnode)->n_fetch_bytes);
+	}
+
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -297,10 +310,11 @@ int afs_page_filler(void *data, struct page *page)
 		 * end of the file, the server will return a short read and the
 		 * unmarshalling code will clear the unfilled space.
 		 */
-		atomic_set(&req->usage, 1);
+		refcount_set(&req->usage, 1);
 		req->pos = (loff_t)page->index << PAGE_SHIFT;
 		req->len = PAGE_SIZE;
 		req->nr_pages = 1;
+		req->pages = req->array;
 		req->pages[0] = page;
 		get_page(page);
 
@@ -309,10 +323,6 @@ int afs_page_filler(void *data, struct page *page)
 		ret = afs_fetch_data(vnode, key, req);
 		afs_put_read(req);
 
-		if (ret >= 0 && S_ISDIR(inode->i_mode) &&
-		    !afs_dir_check_page(inode, page))
-			ret = -EIO;
-
 		if (ret < 0) {
 			if (ret == -ENOENT) {
 				_debug("got NOENT from server"
@@ -339,7 +349,8 @@ int afs_page_filler(void *data, struct page *page)
 		/* send the page to the cache */
 #ifdef CONFIG_AFS_FSCACHE
 		if (PageFsCache(page) &&
-		    fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) {
+		    fscache_write_page(vnode->cache, page, vnode->status.size,
+				       GFP_KERNEL) != 0) {
 			fscache_uncache_page(vnode->cache, page);
 			BUG_ON(PageFsCache(page));
 		}
@@ -403,7 +414,8 @@ static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
 	/* send the page to the cache */
 #ifdef CONFIG_AFS_FSCACHE
 	if (PageFsCache(page) &&
-	    fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) {
+	    fscache_write_page(vnode->cache, page, vnode->status.size,
+			       GFP_KERNEL) != 0) {
 		fscache_uncache_page(vnode->cache, page);
 		BUG_ON(PageFsCache(page));
 	}
@@ -445,10 +457,11 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
 	if (!req)
 		return -ENOMEM;
 
-	atomic_set(&req->usage, 1);
+	refcount_set(&req->usage, 1);
 	req->page_done = afs_readpages_page_done;
 	req->pos = first->index;
 	req->pos <<= PAGE_SHIFT;
+	req->pages = req->array;
 
 	/* Transfer the pages to the request.  We add them in until one fails
 	 * to add to the LRU and then we stop (as that'll make a hole in the
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index c40ba2fe3cbe..7a0e017070ec 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -613,7 +613,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
 	posix_test_lock(file, fl);
 	if (fl->fl_type == F_UNLCK) {
 		/* no local locks; consult the server */
-		ret = afs_fetch_status(vnode, key);
+		ret = afs_fetch_status(vnode, key, false);
 		if (ret < 0)
 			goto error;
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 88ec38c2d83c..efacdb7c1dee 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -16,6 +16,7 @@
 #include <linux/iversion.h>
 #include "internal.h"
 #include "afs_fs.h"
+#include "xdr_fs.h"
 
 static const struct afs_fid afs_zero_fid;
 
@@ -44,109 +45,194 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
 }
 
 /*
- * decode an AFSFetchStatus block
+ * Dump a bad file status record.
  */
-static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
-				      struct afs_file_status *status,
-				      struct afs_vnode *vnode,
-				      afs_dataversion_t *store_version)
+static void xdr_dump_bad(const __be32 *bp)
 {
-	afs_dataversion_t expected_version;
-	const __be32 *bp = *_bp;
+	__be32 x[4];
+	int i;
+
+	pr_notice("AFS XDR: Bad status record\n");
+	for (i = 0; i < 5 * 4 * 4; i += 16) {
+		memcpy(x, bp, 16);
+		bp += 4;
+		pr_notice("%03x: %08x %08x %08x %08x\n",
+			  i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+	}
+
+	memcpy(x, bp, 4);
+	pr_notice("0x50: %08x\n", ntohl(x[0]));
+}
+
+/*
+ * Update the core inode struct from a returned status record.
+ */
+void afs_update_inode_from_status(struct afs_vnode *vnode,
+				  struct afs_file_status *status,
+				  const afs_dataversion_t *expected_version,
+				  u8 flags)
+{
+	struct timespec t;
 	umode_t mode;
+
+	t.tv_sec = status->mtime_client;
+	t.tv_nsec = 0;
+	vnode->vfs_inode.i_ctime = t;
+	vnode->vfs_inode.i_mtime = t;
+	vnode->vfs_inode.i_atime = t;
+
+	if (flags & (AFS_VNODE_META_CHANGED | AFS_VNODE_NOT_YET_SET)) {
+		vnode->vfs_inode.i_uid = make_kuid(&init_user_ns, status->owner);
+		vnode->vfs_inode.i_gid = make_kgid(&init_user_ns, status->group);
+		set_nlink(&vnode->vfs_inode, status->nlink);
+
+		mode = vnode->vfs_inode.i_mode;
+		mode &= ~S_IALLUGO;
+		mode |= status->mode;
+		barrier();
+		vnode->vfs_inode.i_mode = mode;
+	}
+
+	if (!(flags & AFS_VNODE_NOT_YET_SET)) {
+		if (expected_version &&
+		    *expected_version != status->data_version) {
+			_debug("vnode modified %llx on {%x:%u} [exp %llx]",
+			       (unsigned long long) status->data_version,
+			       vnode->fid.vid, vnode->fid.vnode,
+			       (unsigned long long) *expected_version);
+			vnode->invalid_before = status->data_version;
+			if (vnode->status.type == AFS_FTYPE_DIR) {
+				if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+					afs_stat_v(vnode, n_inval);
+			} else {
+				set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
+			}
+		} else if (vnode->status.type == AFS_FTYPE_DIR) {
+			/* Expected directory change is handled elsewhere so
+			 * that we can locally edit the directory and save on a
+			 * download.
+			 */
+			if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+				flags &= ~AFS_VNODE_DATA_CHANGED;
+		}
+	}
+
+	if (flags & (AFS_VNODE_DATA_CHANGED | AFS_VNODE_NOT_YET_SET)) {
+		inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
+		i_size_write(&vnode->vfs_inode, status->size);
+	}
+}
+
+/*
+ * decode an AFSFetchStatus block
+ */
+static int xdr_decode_AFSFetchStatus(struct afs_call *call,
+				     const __be32 **_bp,
+				     struct afs_file_status *status,
+				     struct afs_vnode *vnode,
+				     const afs_dataversion_t *expected_version,
+				     struct afs_read *read_req)
+{
+	const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp;
 	u64 data_version, size;
-	bool changed = false;
-	kuid_t owner;
-	kgid_t group;
+	u32 type, abort_code;
+	u8 flags = 0;
+	int ret;
 
 	if (vnode)
 		write_seqlock(&vnode->cb_lock);
 
-#define EXTRACT(DST)				\
-	do {					\
-		u32 x = ntohl(*bp++);		\
-		if (DST != x)			\
-			changed |= true;	\
-		DST = x;			\
-	} while (0)
-
-	status->if_version = ntohl(*bp++);
-	EXTRACT(status->type);
-	EXTRACT(status->nlink);
-	size = ntohl(*bp++);
-	data_version = ntohl(*bp++);
-	EXTRACT(status->author);
-	owner = make_kuid(&init_user_ns, ntohl(*bp++));
-	changed |= !uid_eq(owner, status->owner);
-	status->owner = owner;
-	EXTRACT(status->caller_access); /* call ticket dependent */
-	EXTRACT(status->anon_access);
-	EXTRACT(status->mode);
-	bp++; /* parent.vnode */
-	bp++; /* parent.unique */
-	bp++; /* seg size */
-	status->mtime_client = ntohl(*bp++);
-	status->mtime_server = ntohl(*bp++);
-	group = make_kgid(&init_user_ns, ntohl(*bp++));
-	changed |= !gid_eq(group, status->group);
-	status->group = group;
-	bp++; /* sync counter */
-	data_version |= (u64) ntohl(*bp++) << 32;
-	EXTRACT(status->lock_count);
-	size |= (u64) ntohl(*bp++) << 32;
-	bp++; /* spare 4 */
-	*_bp = bp;
+	if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) {
+		pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version));
+		goto bad;
+	}
 
-	if (size != status->size) {
-		status->size = size;
-		changed |= true;
+	type = ntohl(xdr->type);
+	abort_code = ntohl(xdr->abort_code);
+	switch (type) {
+	case AFS_FTYPE_FILE:
+	case AFS_FTYPE_DIR:
+	case AFS_FTYPE_SYMLINK:
+		if (type != status->type &&
+		    vnode &&
+		    !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+			pr_warning("Vnode %x:%x:%x changed type %u to %u\n",
+				   vnode->fid.vid,
+				   vnode->fid.vnode,
+				   vnode->fid.unique,
+				   status->type, type);
+			goto bad;
+		}
+		status->type = type;
+		break;
+	case AFS_FTYPE_INVALID:
+		if (abort_code != 0) {
+			status->abort_code = abort_code;
+			ret = 0;
+			goto out;
+		}
+		/* Fall through */
+	default:
+		goto bad;
 	}
-	status->mode &= S_IALLUGO;
 
-	_debug("vnode time %lx, %lx",
-	       status->mtime_client, status->mtime_server);
+#define EXTRACT_M(FIELD)					\
+	do {							\
+		u32 x = ntohl(xdr->FIELD);			\
+		if (status->FIELD != x) {			\
+			flags |= AFS_VNODE_META_CHANGED;	\
+			status->FIELD = x;			\
+		}						\
+	} while (0)
 
-	if (vnode) {
-		if (changed && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
-			_debug("vnode changed");
-			i_size_write(&vnode->vfs_inode, size);
-			vnode->vfs_inode.i_uid = status->owner;
-			vnode->vfs_inode.i_gid = status->group;
-			vnode->vfs_inode.i_generation = vnode->fid.unique;
-			set_nlink(&vnode->vfs_inode, status->nlink);
-
-			mode = vnode->vfs_inode.i_mode;
-			mode &= ~S_IALLUGO;
-			mode |= status->mode;
-			barrier();
-			vnode->vfs_inode.i_mode = mode;
-		}
+	EXTRACT_M(nlink);
+	EXTRACT_M(author);
+	EXTRACT_M(owner);
+	EXTRACT_M(caller_access); /* call ticket dependent */
+	EXTRACT_M(anon_access);
+	EXTRACT_M(mode);
+	EXTRACT_M(group);
+
+	status->mtime_client = ntohl(xdr->mtime_client);
+	status->mtime_server = ntohl(xdr->mtime_server);
+	status->lock_count   = ntohl(xdr->lock_count);
+
+	size  = (u64)ntohl(xdr->size_lo);
+	size |= (u64)ntohl(xdr->size_hi) << 32;
+	status->size = size;
+
+	data_version  = (u64)ntohl(xdr->data_version_lo);
+	data_version |= (u64)ntohl(xdr->data_version_hi) << 32;
+	if (data_version != status->data_version) {
+		status->data_version = data_version;
+		flags |= AFS_VNODE_DATA_CHANGED;
+	}
 
-		vnode->vfs_inode.i_ctime.tv_sec	= status->mtime_client;
-		vnode->vfs_inode.i_mtime	= vnode->vfs_inode.i_ctime;
-		vnode->vfs_inode.i_atime	= vnode->vfs_inode.i_ctime;
-		inode_set_iversion_raw(&vnode->vfs_inode, data_version);
+	if (read_req) {
+		read_req->data_version = data_version;
+		read_req->file_size = size;
 	}
 
-	expected_version = status->data_version;
-	if (store_version)
-		expected_version = *store_version;
+	*_bp = (const void *)*_bp + sizeof(*xdr);
 
-	if (expected_version != data_version) {
-		status->data_version = data_version;
-		if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
-			_debug("vnode modified %llx on {%x:%u}",
-			       (unsigned long long) data_version,
-			       vnode->fid.vid, vnode->fid.vnode);
-			set_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
-			set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
-		}
-	} else if (store_version) {
-		status->data_version = data_version;
+	if (vnode) {
+		if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
+			flags |= AFS_VNODE_NOT_YET_SET;
+		afs_update_inode_from_status(vnode, status, expected_version,
+					     flags);
 	}
 
+	ret = 0;
+
+out:
 	if (vnode)
 		write_sequnlock(&vnode->cb_lock);
+	return ret;
+
+bad:
+	xdr_dump_bad(*_bp);
+	ret = afs_protocol_error(call, -EBADMSG);
+	goto out;
 }
 
 /*
@@ -274,7 +360,7 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
 /*
  * deliver reply data to an FS.FetchStatus
  */
-static int afs_deliver_fs_fetch_status(struct afs_call *call)
+static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
@@ -288,7 +374,9 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
+	if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	xdr_decode_AFSCallBack(call, vnode, &bp);
 	if (call->reply[1])
 		xdr_decode_AFSVolSync(&bp, call->reply[1]);
@@ -300,17 +388,18 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 /*
  * FS.FetchStatus operation type
  */
-static const struct afs_call_type afs_RXFSFetchStatus = {
-	.name		= "FS.FetchStatus",
+static const struct afs_call_type afs_RXFSFetchStatus_vnode = {
+	.name		= "FS.FetchStatus(vnode)",
 	.op		= afs_FS_FetchStatus,
-	.deliver	= afs_deliver_fs_fetch_status,
+	.deliver	= afs_deliver_fs_fetch_status_vnode,
 	.destructor	= afs_flat_call_destructor,
 };
 
 /*
  * fetch the status information for a file
  */
-int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync)
+int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
+			     bool new_inode)
 {
 	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
@@ -320,7 +409,8 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 	_enter(",%x,{%x:%u},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
-	call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
+	call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode,
+				   16, (21 + 3 + 6) * 4);
 	if (!call) {
 		fc->ac.error = -ENOMEM;
 		return -ENOMEM;
@@ -329,6 +419,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 	call->key = fc->key;
 	call->reply[0] = vnode;
 	call->reply[1] = volsync;
+	call->expected_version = new_inode ? 1 : vnode->status.data_version;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -464,7 +555,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 			return ret;
 
 		bp = call->buffer;
-		xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
+		if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode,
+					      &vnode->status.data_version, req) < 0)
+			return afs_protocol_error(call, -EBADMSG);
 		xdr_decode_AFSCallBack(call, vnode, &bp);
 		if (call->reply[1])
 			xdr_decode_AFSVolSync(&bp, call->reply[1]);
@@ -534,6 +627,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->reply[0] = vnode;
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
+	call->expected_version = vnode->status.data_version;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -546,7 +640,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
 	bp[6] = 0;
 	bp[7] = htonl(lower_32_bits(req->len));
 
-	atomic_inc(&req->usage);
+	refcount_inc(&req->usage);
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
@@ -578,6 +672,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->reply[0] = vnode;
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
+	call->expected_version = vnode->status.data_version;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -588,7 +683,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	bp[4] = htonl(lower_32_bits(req->pos));
 	bp[5] = htonl(lower_32_bits(req->len));
 
-	atomic_inc(&req->usage);
+	refcount_inc(&req->usage);
 	call->cb_break = fc->cb_break;
 	afs_use_fs_server(call, fc->cbi);
 	trace_afs_make_fs_call(call, &vnode->fid);
@@ -613,8 +708,10 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
 	xdr_decode_AFSFid(&bp, call->reply[1]);
-	xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
+	if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 ||
+	    xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
@@ -645,6 +742,7 @@ static const struct afs_call_type afs_RXFSMakeDir = {
 int afs_fs_create(struct afs_fs_cursor *fc,
 		  const char *name,
 		  umode_t mode,
+		  u64 current_data_version,
 		  struct afs_fid *newfid,
 		  struct afs_file_status *newstatus,
 		  struct afs_callback *newcb)
@@ -672,6 +770,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	call->reply[1] = newfid;
 	call->reply[2] = newstatus;
 	call->reply[3] = newcb;
+	call->expected_version = current_data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -715,7 +814,9 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
+	if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -742,7 +843,8 @@ static const struct afs_call_type afs_RXFSRemoveDir = {
 /*
  * remove a file or directory
  */
-int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir)
+int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
+		  u64 current_data_version)
 {
 	struct afs_vnode *vnode = fc->vnode;
 	struct afs_call *call;
@@ -764,6 +866,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
+	call->expected_version = current_data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -801,8 +904,10 @@ static int afs_deliver_fs_link(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
-	xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL);
+	if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 ||
+	    xdr_decode_AFSFetchStatus(call, &bp, &dvnode->status, dvnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -823,7 +928,7 @@ static const struct afs_call_type afs_RXFSLink = {
  * make a hard link
  */
 int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
-		const char *name)
+		const char *name, u64 current_data_version)
 {
 	struct afs_vnode *dvnode = fc->vnode;
 	struct afs_call *call;
@@ -844,6 +949,7 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	call->key = fc->key;
 	call->reply[0] = dvnode;
 	call->reply[1] = vnode;
+	call->expected_version = current_data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -885,8 +991,10 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
 	xdr_decode_AFSFid(&bp, call->reply[1]);
-	xdr_decode_AFSFetchStatus(&bp, call->reply[2], NULL, NULL);
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
+	if (xdr_decode_AFSFetchStatus(call, &bp, call->reply[2], NULL, NULL, NULL) ||
+	    xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -909,6 +1017,7 @@ static const struct afs_call_type afs_RXFSSymlink = {
 int afs_fs_symlink(struct afs_fs_cursor *fc,
 		   const char *name,
 		   const char *contents,
+		   u64 current_data_version,
 		   struct afs_fid *newfid,
 		   struct afs_file_status *newstatus)
 {
@@ -937,6 +1046,7 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 	call->reply[0] = vnode;
 	call->reply[1] = newfid;
 	call->reply[2] = newstatus;
+	call->expected_version = current_data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -987,10 +1097,13 @@ static int afs_deliver_fs_rename(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL);
-	if (new_dvnode != orig_dvnode)
-		xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode,
-					  NULL);
+	if (xdr_decode_AFSFetchStatus(call, &bp, &orig_dvnode->status, orig_dvnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
+	if (new_dvnode != orig_dvnode &&
+	    xdr_decode_AFSFetchStatus(call, &bp, &new_dvnode->status, new_dvnode,
+				      &call->expected_version_2, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -1013,7 +1126,9 @@ static const struct afs_call_type afs_RXFSRename = {
 int afs_fs_rename(struct afs_fs_cursor *fc,
 		  const char *orig_name,
 		  struct afs_vnode *new_dvnode,
-		  const char *new_name)
+		  const char *new_name,
+		  u64 current_orig_data_version,
+		  u64 current_new_data_version)
 {
 	struct afs_vnode *orig_dvnode = fc->vnode;
 	struct afs_call *call;
@@ -1041,6 +1156,8 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 	call->key = fc->key;
 	call->reply[0] = orig_dvnode;
 	call->reply[1] = new_dvnode;
+	call->expected_version = current_orig_data_version + 1;
+	call->expected_version_2 = current_new_data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1089,8 +1206,9 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode,
-				  &call->store_version);
+	if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	afs_pages_written_back(vnode, call);
@@ -1147,7 +1265,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 	call->first_offset = offset;
 	call->last_to = to;
 	call->send_pages = true;
-	call->store_version = vnode->status.data_version + 1;
+	call->expected_version = vnode->status.data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1222,7 +1340,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
 	call->first_offset = offset;
 	call->last_to = to;
 	call->send_pages = true;
-	call->store_version = vnode->status.data_version + 1;
+	call->expected_version = vnode->status.data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1252,7 +1370,6 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
  */
 static int afs_deliver_fs_store_status(struct afs_call *call)
 {
-	afs_dataversion_t *store_version;
 	struct afs_vnode *vnode = call->reply[0];
 	const __be32 *bp;
 	int ret;
@@ -1264,12 +1381,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
 		return ret;
 
 	/* unmarshall the reply once we've received all of it */
-	store_version = NULL;
-	if (call->operation_ID == FSSTOREDATA)
-		store_version = &call->store_version;
-
 	bp = call->buffer;
-	xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version);
+	if (xdr_decode_AFSFetchStatus(call, &bp, &vnode->status, vnode,
+				      &call->expected_version, NULL) < 0)
+		return afs_protocol_error(call, -EBADMSG);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -1324,7 +1439,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
-	call->store_version = vnode->status.data_version + 1;
+	call->expected_version = vnode->status.data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1373,7 +1488,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
-	call->store_version = vnode->status.data_version + 1;
+	call->expected_version = vnode->status.data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1418,6 +1533,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 
 	call->key = fc->key;
 	call->reply[0] = vnode;
+	call->expected_version = vnode->status.data_version;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1471,7 +1587,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("volname length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -1518,7 +1634,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("offline msg length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -1565,7 +1681,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("motd length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -1947,3 +2063,265 @@ int afs_fs_get_capabilities(struct afs_net *net,
 	trace_afs_make_fs_call(call, NULL);
 	return afs_make_call(ac, call, GFP_NOFS, false);
 }
+
+/*
+ * Deliver reply data to an FS.FetchStatus with no vnode.
+ */
+static int afs_deliver_fs_fetch_status(struct afs_call *call)
+{
+	struct afs_file_status *status = call->reply[1];
+	struct afs_callback *callback = call->reply[2];
+	struct afs_volsync *volsync = call->reply[3];
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_AFSFetchStatus(call, &bp, status, vnode,
+				  &call->expected_version, NULL);
+	callback[call->count].version	= ntohl(bp[0]);
+	callback[call->count].expiry	= ntohl(bp[1]);
+	callback[call->count].type	= ntohl(bp[2]);
+	if (vnode)
+		xdr_decode_AFSCallBack(call, vnode, &bp);
+	else
+		bp += 3;
+	if (volsync)
+		xdr_decode_AFSVolSync(&bp, volsync);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.FetchStatus operation type
+ */
+static const struct afs_call_type afs_RXFSFetchStatus = {
+	.name		= "FS.FetchStatus",
+	.op		= afs_FS_FetchStatus,
+	.deliver	= afs_deliver_fs_fetch_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a fid without needing a vnode handle.
+ */
+int afs_fs_fetch_status(struct afs_fs_cursor *fc,
+			struct afs_net *net,
+			struct afs_fid *fid,
+			struct afs_file_status *status,
+			struct afs_callback *callback,
+			struct afs_volsync *volsync)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter(",%x,{%x:%u},,",
+	       key_serial(fc->key), fid->vid, fid->vnode);
+
+	call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = NULL; /* vnode for fid[0] */
+	call->reply[1] = status;
+	call->reply[2] = callback;
+	call->reply[3] = volsync;
+	call->expected_version = 1; /* vnode->status.data_version */
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp[0] = htonl(FSFETCHSTATUS);
+	bp[1] = htonl(fid->vid);
+	bp[2] = htonl(fid->vnode);
+	bp[3] = htonl(fid->unique);
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an FS.InlineBulkStatus call
+ */
+static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+	struct afs_file_status *statuses;
+	struct afs_callback *callbacks;
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	u32 tmp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->unmarshall++;
+
+		/* Extract the file status count and array in two steps */
+	case 1:
+		_debug("extract status count");
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("status count: %u/%u", tmp, call->count2);
+		if (tmp != call->count2)
+			return afs_protocol_error(call, -EBADMSG);
+
+		call->count = 0;
+		call->unmarshall++;
+	more_counts:
+		call->offset = 0;
+
+	case 2:
+		_debug("extract status array %u", call->count);
+		ret = afs_extract_data(call, call->buffer, 21 * 4, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		statuses = call->reply[1];
+		if (xdr_decode_AFSFetchStatus(call, &bp, &statuses[call->count],
+					      call->count == 0 ? vnode : NULL,
+					      NULL, NULL) < 0)
+			return afs_protocol_error(call, -EBADMSG);
+
+		call->count++;
+		if (call->count < call->count2)
+			goto more_counts;
+
+		call->count = 0;
+		call->unmarshall++;
+		call->offset = 0;
+
+		/* Extract the callback count and array in two steps */
+	case 3:
+		_debug("extract CB count");
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("CB count: %u", tmp);
+		if (tmp != call->count2)
+			return afs_protocol_error(call, -EBADMSG);
+		call->count = 0;
+		call->unmarshall++;
+	more_cbs:
+		call->offset = 0;
+
+	case 4:
+		_debug("extract CB array");
+		ret = afs_extract_data(call, call->buffer, 3 * 4, true);
+		if (ret < 0)
+			return ret;
+
+		_debug("unmarshall CB array");
+		bp = call->buffer;
+		callbacks = call->reply[2];
+		callbacks[call->count].version	= ntohl(bp[0]);
+		callbacks[call->count].expiry	= ntohl(bp[1]);
+		callbacks[call->count].type	= ntohl(bp[2]);
+		statuses = call->reply[1];
+		if (call->count == 0 && vnode && statuses[0].abort_code == 0)
+			xdr_decode_AFSCallBack(call, vnode, &bp);
+		call->count++;
+		if (call->count < call->count2)
+			goto more_cbs;
+
+		call->offset = 0;
+		call->unmarshall++;
+
+	case 5:
+		ret = afs_extract_data(call, call->buffer, 6 * 4, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		if (call->reply[3])
+			xdr_decode_AFSVolSync(&bp, call->reply[3]);
+
+		call->offset = 0;
+		call->unmarshall++;
+
+	case 6:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type afs_RXFSInlineBulkStatus = {
+	.name		= "FS.InlineBulkStatus",
+	.op		= afs_FS_InlineBulkStatus,
+	.deliver	= afs_deliver_fs_inline_bulk_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 50 files
+ */
+int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
+			      struct afs_net *net,
+			      struct afs_fid *fids,
+			      struct afs_file_status *statuses,
+			      struct afs_callback *callbacks,
+			      unsigned int nr_fids,
+			      struct afs_volsync *volsync)
+{
+	struct afs_call *call;
+	__be32 *bp;
+	int i;
+
+	_enter(",%x,{%x:%u},%u",
+	       key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
+
+	call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus,
+				   (2 + nr_fids * 3) * 4,
+				   21 * 4);
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = NULL; /* vnode for fid[0] */
+	call->reply[1] = statuses;
+	call->reply[2] = callbacks;
+	call->reply[3] = volsync;
+	call->count2 = nr_fids;
+
+	/* marshall the parameters */
+	bp = call->request;
+	*bp++ = htonl(FSINLINEBULKSTATUS);
+	*bp++ = htonl(nr_fids);
+	for (i = 0; i < nr_fids; i++) {
+		*bp++ = htonl(fids[i].vid);
+		*bp++ = htonl(fids[i].vnode);
+		*bp++ = htonl(fids[i].unique);
+	}
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &fids[0]);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6b39d0255b72..06194cfe9724 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -30,12 +30,11 @@ static const struct inode_operations afs_symlink_inode_operations = {
 };
 
 /*
- * map the AFS file status to the inode member variables
+ * Initialise an inode from the vnode status.
  */
-static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
+static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
 {
 	struct inode *inode = AFS_VNODE_TO_I(vnode);
-	bool changed;
 
 	_debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu",
 	       vnode->status.type,
@@ -46,16 +45,21 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 
 	read_seqlock_excl(&vnode->cb_lock);
 
+	afs_update_inode_from_status(vnode, &vnode->status, NULL,
+				     AFS_VNODE_NOT_YET_SET);
+
 	switch (vnode->status.type) {
 	case AFS_FTYPE_FILE:
 		inode->i_mode	= S_IFREG | vnode->status.mode;
 		inode->i_op	= &afs_file_inode_operations;
 		inode->i_fop	= &afs_file_operations;
+		inode->i_mapping->a_ops	= &afs_fs_aops;
 		break;
 	case AFS_FTYPE_DIR:
 		inode->i_mode	= S_IFDIR | vnode->status.mode;
 		inode->i_op	= &afs_dir_inode_operations;
 		inode->i_fop	= &afs_dir_file_operations;
+		inode->i_mapping->a_ops	= &afs_dir_aops;
 		break;
 	case AFS_FTYPE_SYMLINK:
 		/* Symlinks with a mode of 0644 are actually mountpoints. */
@@ -67,45 +71,31 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
 			inode->i_mode	= S_IFDIR | 0555;
 			inode->i_op	= &afs_mntpt_inode_operations;
 			inode->i_fop	= &afs_mntpt_file_operations;
+			inode->i_mapping->a_ops	= &afs_fs_aops;
 		} else {
 			inode->i_mode	= S_IFLNK | vnode->status.mode;
 			inode->i_op	= &afs_symlink_inode_operations;
+			inode->i_mapping->a_ops	= &afs_fs_aops;
 		}
 		inode_nohighmem(inode);
 		break;
 	default:
 		printk("kAFS: AFS vnode with undefined type\n");
 		read_sequnlock_excl(&vnode->cb_lock);
-		return -EBADMSG;
+		return afs_protocol_error(NULL, -EBADMSG);
 	}
 
-	changed = (vnode->status.size != inode->i_size);
-
-	set_nlink(inode, vnode->status.nlink);
-	inode->i_uid		= vnode->status.owner;
-	inode->i_gid            = vnode->status.group;
-	inode->i_size		= vnode->status.size;
-	inode->i_ctime.tv_sec	= vnode->status.mtime_client;
-	inode->i_ctime.tv_nsec	= 0;
-	inode->i_atime		= inode->i_mtime = inode->i_ctime;
 	inode->i_blocks		= 0;
-	inode->i_generation	= vnode->fid.unique;
-	inode_set_iversion_raw(inode, vnode->status.data_version);
-	inode->i_mapping->a_ops	= &afs_fs_aops;
+	vnode->invalid_before	= vnode->status.data_version;
 
 	read_sequnlock_excl(&vnode->cb_lock);
-
-#ifdef CONFIG_AFS_FSCACHE
-	if (changed)
-		fscache_attr_changed(vnode->cache);
-#endif
 	return 0;
 }
 
 /*
  * Fetch file status from the volume.
  */
-int afs_fetch_status(struct afs_vnode *vnode, struct key *key)
+int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
 {
 	struct afs_fs_cursor fc;
 	int ret;
@@ -119,7 +109,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key)
 	if (afs_begin_vnode_operation(&fc, vnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = vnode->cb_break + vnode->cb_s_break;
-			afs_fs_fetch_file_status(&fc, NULL);
+			afs_fs_fetch_file_status(&fc, NULL, new_inode);
 		}
 
 		afs_check_for_remote_deletion(&fc, fc.vnode);
@@ -243,6 +233,38 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
 }
 
 /*
+ * Get a cache cookie for an inode.
+ */
+static void afs_get_inode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+	struct {
+		u32 vnode_id;
+		u32 unique;
+		u32 vnode_id_ext[2];	/* Allow for a 96-bit key */
+	} __packed key;
+	struct afs_vnode_cache_aux aux;
+
+	if (vnode->status.type == AFS_FTYPE_DIR) {
+		vnode->cache = NULL;
+		return;
+	}
+
+	key.vnode_id		= vnode->fid.vnode;
+	key.unique		= vnode->fid.unique;
+	key.vnode_id_ext[0]	= 0;
+	key.vnode_id_ext[1]	= 0;
+	aux.data_version	= vnode->status.data_version;
+
+	vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
+					      &afs_vnode_cache_index_def,
+					      &key, sizeof(key),
+					      &aux, sizeof(aux),
+					      vnode, vnode->status.size, true);
+#endif
+}
+
+/*
  * inode retrieval
  */
 struct inode *afs_iget(struct super_block *sb, struct key *key,
@@ -280,7 +302,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 
 	if (!status) {
 		/* it's a remotely extant inode */
-		ret = afs_fetch_status(vnode, key);
+		ret = afs_fetch_status(vnode, key, true);
 		if (ret < 0)
 			goto bad_inode;
 	} else {
@@ -304,19 +326,12 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 		vnode->cb_expires_at += ktime_get_real_seconds();
 	}
 
-	/* set up caching before mapping the status, as map-status reads the
-	 * first page of symlinks to see if they're really mountpoints */
-	inode->i_size = vnode->status.size;
-#ifdef CONFIG_AFS_FSCACHE
-	vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
-					      &afs_vnode_cache_index_def,
-					      vnode, true);
-#endif
-
-	ret = afs_inode_map_status(vnode, key);
+	ret = afs_inode_init_from_status(vnode, key);
 	if (ret < 0)
 		goto bad_inode;
 
+	afs_get_inode_cache(vnode);
+
 	/* success */
 	clear_bit(AFS_VNODE_UNSET, &vnode->flags);
 	inode->i_flags |= S_NOATIME;
@@ -326,10 +341,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 
 	/* failure */
 bad_inode:
-#ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(vnode->cache, 0);
-	vnode->cache = NULL;
-#endif
 	iget_failed(inode);
 	_leave(" = %d [bad]", ret);
 	return ERR_PTR(ret);
@@ -343,6 +354,10 @@ void afs_zap_data(struct afs_vnode *vnode)
 {
 	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
 
+#ifdef CONFIG_AFS_FSCACHE
+	fscache_invalidate(vnode->cache);
+#endif
+
 	/* nuke all the non-dirty pages that aren't locked, mapped or being
 	 * written back in a regular file and completely discard the pages in a
 	 * directory or symlink */
@@ -380,8 +395,11 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) {
 			vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
-		} else if (!test_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags) &&
-			   !test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
+		} else if (vnode->status.type == AFS_FTYPE_DIR &&
+			   test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
+			   vnode->cb_expires_at - 10 > now) {
+				valid = true;
+		} else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
 			   vnode->cb_expires_at - 10 > now) {
 				valid = true;
 		}
@@ -405,7 +423,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	 * access */
 	if (!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		_debug("not promised");
-		ret = afs_fetch_status(vnode, key);
+		ret = afs_fetch_status(vnode, key, false);
 		if (ret < 0) {
 			if (ret == -ENOENT) {
 				set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -426,8 +444,6 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	 * different */
 	if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
 		afs_zap_data(vnode);
-
-	clear_bit(AFS_VNODE_DIR_MODIFIED, &vnode->flags);
 	mutex_unlock(&vnode->validate_lock);
 valid:
 	_leave(" = 0");
@@ -507,11 +523,17 @@ void afs_evict_inode(struct inode *inode)
 	}
 
 #ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(vnode->cache, 0);
-	vnode->cache = NULL;
+	{
+		struct afs_vnode_cache_aux aux;
+
+		aux.data_version = vnode->status.data_version;
+		fscache_relinquish_cookie(vnode->cache, &aux,
+					  test_bit(AFS_VNODE_DELETED, &vnode->flags));
+		vnode->cache = NULL;
+	}
 #endif
 
-	afs_put_permits(vnode->permit_cache);
+	afs_put_permits(rcu_access_pointer(vnode->permit_cache));
 	_leave("");
 }
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 72217170b155..f8086ec95e24 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -122,7 +122,8 @@ struct afs_call {
 	u32			operation_ID;	/* operation ID for an incoming call */
 	u32			count;		/* count for use in unmarshalling */
 	__be32			tmp;		/* place to extract temporary data */
-	afs_dataversion_t	store_version;	/* updated version expected from store */
+	afs_dataversion_t	expected_version; /* Updated version expected from store */
+	afs_dataversion_t	expected_version_2; /* 2nd updated version expected from store */
 };
 
 struct afs_call_type {
@@ -173,11 +174,14 @@ struct afs_read {
 	loff_t			len;		/* How much we're asking for */
 	loff_t			actual_len;	/* How much we're actually getting */
 	loff_t			remain;		/* Amount remaining */
-	atomic_t		usage;
+	loff_t			file_size;	/* File size returned by server */
+	afs_dataversion_t	data_version;	/* Version number returned by server */
+	refcount_t		usage;
 	unsigned int		index;		/* Which page we're reading into */
 	unsigned int		nr_pages;
 	void (*page_done)(struct afs_call *, struct afs_read *);
-	struct page		*pages[];
+	struct page		**pages;
+	struct page		*array[];
 };
 
 /*
@@ -199,6 +203,18 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb)
 extern struct file_system_type afs_fs_type;
 
 /*
+ * Set of substitutes for @sys.
+ */
+struct afs_sysnames {
+#define AFS_NR_SYSNAME 16
+	char			*subs[AFS_NR_SYSNAME];
+	refcount_t		usage;
+	unsigned short		nr;
+	short			error;
+	char			blank[1];
+};
+
+/*
  * AFS network namespace record.
  */
 struct afs_net {
@@ -245,9 +261,25 @@ struct afs_net {
 	struct mutex		lock_manager_mutex;
 
 	/* Misc */
-	struct proc_dir_entry	*proc_afs;		/* /proc/net/afs directory */
+	struct proc_dir_entry	*proc_afs;	/* /proc/net/afs directory */
+	struct afs_sysnames	*sysnames;
+	rwlock_t		sysnames_lock;
+
+	/* Statistics counters */
+	atomic_t		n_lookup;	/* Number of lookups done */
+	atomic_t		n_reval;	/* Number of dentries needing revalidation */
+	atomic_t		n_inval;	/* Number of invalidations by the server */
+	atomic_t		n_relpg;	/* Number of invalidations by releasepage */
+	atomic_t		n_read_dir;	/* Number of directory pages read */
+	atomic_t		n_dir_cr;	/* Number of directory entry creation edits */
+	atomic_t		n_dir_rm;	/* Number of directory entry removal edits */
+	atomic_t		n_stores;	/* Number of store ops */
+	atomic_long_t		n_store_bytes;	/* Number of bytes stored */
+	atomic_long_t		n_fetch_bytes;	/* Number of bytes fetched */
+	atomic_t		n_fetches;	/* Number of data fetch ops */
 };
 
+extern const char afs_init_sysname[];
 extern struct afs_net __afs_net;// Dummy AFS network namespace; TODO: replace with real netns
 
 enum afs_cell_state {
@@ -363,6 +395,7 @@ struct afs_server {
 #define AFS_SERVER_FL_UPDATING	4
 #define AFS_SERVER_FL_PROBED	5		/* The fileserver has been probed */
 #define AFS_SERVER_FL_PROBING	6		/* Fileserver is being probed */
+#define AFS_SERVER_FL_NO_IBULK	7		/* Fileserver doesn't support FS.InlineBulkStatus */
 	atomic_t		usage;
 	u32			addr_version;	/* Address list version */
 
@@ -455,10 +488,11 @@ struct afs_vnode {
 	struct afs_volume	*volume;	/* volume on which vnode resides */
 	struct afs_fid		fid;		/* the file identifier for this inode */
 	struct afs_file_status	status;		/* AFS status info for this file */
+	afs_dataversion_t	invalid_before;	/* Child dentries are invalid before this */
 #ifdef CONFIG_AFS_FSCACHE
 	struct fscache_cookie	*cache;		/* caching cookie */
 #endif
-	struct afs_permits	*permit_cache;	/* cache of permits so far obtained */
+	struct afs_permits __rcu *permit_cache;	/* cache of permits so far obtained */
 	struct mutex		io_lock;	/* Lock for serialising I/O on this mutex */
 	struct mutex		validate_lock;	/* lock for validating this vnode */
 	spinlock_t		wb_lock;	/* lock for wb_keys */
@@ -466,12 +500,13 @@ struct afs_vnode {
 	unsigned long		flags;
 #define AFS_VNODE_CB_PROMISED	0		/* Set if vnode has a callback promise */
 #define AFS_VNODE_UNSET		1		/* set if vnode attributes not yet set */
-#define AFS_VNODE_DIR_MODIFIED	2		/* set if dir vnode's data modified */
+#define AFS_VNODE_DIR_VALID	2		/* Set if dir contents are valid */
 #define AFS_VNODE_ZAP_DATA	3		/* set if vnode's data should be invalidated */
 #define AFS_VNODE_DELETED	4		/* set if vnode deleted on server */
 #define AFS_VNODE_MOUNTPOINT	5		/* set if vnode is a mountpoint symlink */
 #define AFS_VNODE_AUTOCELL	6		/* set if Vnode is an auto mount point */
 #define AFS_VNODE_PSEUDODIR	7 		/* set if Vnode is a pseudo directory */
+#define AFS_VNODE_NEW_CONTENT	8		/* Set if file has new content (create/trunc-0) */
 
 	struct list_head	wb_keys;	/* List of keys available for writeback */
 	struct list_head	pending_locks;	/* locks waiting to be granted */
@@ -559,6 +594,13 @@ struct afs_fs_cursor {
 #define AFS_FS_CURSOR_NO_VSLEEP	0x0020		/* Set to prevent sleep on VBUSY, VOFFLINE, ... */
 };
 
+/*
+ * Cache auxiliary data.
+ */
+struct afs_vnode_cache_aux {
+	u64			data_version;
+} __packed;
+
 #include <trace/events/afs.h>
 
 /*****************************************************************************/
@@ -604,7 +646,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
  */
 extern void afs_init_callback_state(struct afs_server *);
 extern void afs_break_callback(struct afs_vnode *);
-extern void afs_break_callbacks(struct afs_server *, size_t,struct afs_callback[]);
+extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
 
 extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *);
 extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *);
@@ -639,11 +681,26 @@ extern bool afs_cm_incoming_call(struct afs_call *);
  */
 extern const struct file_operations afs_dir_file_operations;
 extern const struct inode_operations afs_dir_inode_operations;
+extern const struct address_space_operations afs_dir_aops;
+extern const struct dentry_operations afs_fs_dentry_operations;
+
+extern void afs_d_release(struct dentry *);
+
+/*
+ * dir_edit.c
+ */
+extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *,
+			     enum afs_edit_dir_reason);
+extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
+
+/*
+ * dynroot.c
+ */
 extern const struct file_operations afs_dynroot_file_operations;
 extern const struct inode_operations afs_dynroot_inode_operations;
-extern const struct dentry_operations afs_fs_dentry_operations;
+extern const struct dentry_operations afs_dynroot_dentry_operations;
 
-extern bool afs_dir_check_page(struct inode *, struct page *);
+extern struct inode *afs_try_auto_mntpt(struct dentry *, struct inode *);
 
 /*
  * file.c
@@ -673,17 +730,23 @@ extern int afs_flock(struct file *, int, struct file_lock *);
 /*
  * fsclient.c
  */
-extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *);
+#define AFS_VNODE_NOT_YET_SET	0x01
+#define AFS_VNODE_META_CHANGED	0x02
+#define AFS_VNODE_DATA_CHANGED	0x04
+extern void afs_update_inode_from_status(struct afs_vnode *, struct afs_file_status *,
+					 const afs_dataversion_t *, u8);
+
+extern int afs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
 extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
 extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
-extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t,
+extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64,
 			 struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool);
-extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *);
-extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *,
+extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64);
+extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
 			  struct afs_fid *, struct afs_file_status *);
 extern int afs_fs_rename(struct afs_fs_cursor *, const char *,
-			 struct afs_vnode *, const char *);
+			 struct afs_vnode *, const char *, u64, u64);
 extern int afs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
 			     pgoff_t, pgoff_t, unsigned, unsigned);
 extern int afs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
@@ -695,11 +758,18 @@ extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
 					struct afs_addr_cursor *, struct key *);
 extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
 				   struct afs_addr_cursor *, struct key *);
+extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
+				     struct afs_fid *, struct afs_file_status *,
+				     struct afs_callback *, unsigned int,
+				     struct afs_volsync *);
+extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
+			       struct afs_fid *, struct afs_file_status *,
+			       struct afs_callback *, struct afs_volsync *);
 
 /*
  * inode.c
  */
-extern int afs_fetch_status(struct afs_vnode *, struct key *);
+extern int afs_fetch_status(struct afs_vnode *, struct key *, bool);
 extern int afs_iget5_test(struct inode *, void *);
 extern struct inode *afs_iget_pseudo_dir(struct super_block *, bool);
 extern struct inode *afs_iget(struct super_block *, struct key *,
@@ -747,6 +817,13 @@ static inline void afs_put_net(struct afs_net *net)
 {
 }
 
+static inline void __afs_stat(atomic_t *s)
+{
+	atomic_inc(s);
+}
+
+#define afs_stat_v(vnode, n) __afs_stat(&afs_v2net(vnode)->n)
+
 /*
  * misc.c
  */
@@ -774,6 +851,7 @@ extern int __net_init afs_proc_init(struct afs_net *);
 extern void __net_exit afs_proc_cleanup(struct afs_net *);
 extern int afs_proc_cell_setup(struct afs_net *, struct afs_cell *);
 extern void afs_proc_cell_remove(struct afs_net *, struct afs_cell *);
+extern void afs_put_sysnames(struct afs_sysnames *);
 
 /*
  * rotate.c
@@ -802,6 +880,7 @@ extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
 extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
+extern int afs_protocol_error(struct afs_call *, int);
 
 static inline int afs_transfer_reply(struct afs_call *call)
 {
@@ -948,7 +1027,6 @@ extern int afs_writepage(struct page *, struct writeback_control *);
 extern int afs_writepages(struct address_space *, struct writeback_control *);
 extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
 extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *);
-extern int afs_flush(struct file *, fl_owner_t);
 extern int afs_fsync(struct file *, loff_t, loff_t, int);
 extern int afs_page_mkwrite(struct vm_fault *);
 extern void afs_prune_wb_keys(struct afs_vnode *);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 15a02a05ff40..d7560168b3bf 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -34,11 +34,42 @@ MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
 struct workqueue_struct *afs_wq;
 struct afs_net __afs_net;
 
+#if defined(CONFIG_ALPHA)
+const char afs_init_sysname[] = "alpha_linux26";
+#elif defined(CONFIG_X86_64)
+const char afs_init_sysname[] = "amd64_linux26";
+#elif defined(CONFIG_ARM)
+const char afs_init_sysname[] = "arm_linux26";
+#elif defined(CONFIG_ARM64)
+const char afs_init_sysname[] = "aarch64_linux26";
+#elif defined(CONFIG_X86_32)
+const char afs_init_sysname[] = "i386_linux26";
+#elif defined(CONFIG_IA64)
+const char afs_init_sysname[] = "ia64_linux26";
+#elif defined(CONFIG_PPC64)
+const char afs_init_sysname[] = "ppc64_linux26";
+#elif defined(CONFIG_PPC32)
+const char afs_init_sysname[] = "ppc_linux26";
+#elif defined(CONFIG_S390)
+#ifdef CONFIG_64BIT
+const char afs_init_sysname[] = "s390x_linux26";
+#else
+const char afs_init_sysname[] = "s390_linux26";
+#endif
+#elif defined(CONFIG_SPARC64)
+const char afs_init_sysname[] = "sparc64_linux26";
+#elif defined(CONFIG_SPARC32)
+const char afs_init_sysname[] = "sparc_linux26";
+#else
+const char afs_init_sysname[] = "unknown_linux26";
+#endif
+
 /*
  * Initialise an AFS network namespace record.
  */
 static int __net_init afs_net_init(struct afs_net *net)
 {
+	struct afs_sysnames *sysnames;
 	int ret;
 
 	net->live = true;
@@ -67,6 +98,16 @@ static int __net_init afs_net_init(struct afs_net *net)
 	INIT_WORK(&net->fs_manager, afs_manage_servers);
 	timer_setup(&net->fs_timer, afs_servers_timer, 0);
 
+	ret = -ENOMEM;
+	sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL);
+	if (!sysnames)
+		goto error_sysnames;
+	sysnames->subs[0] = (char *)&afs_init_sysname;
+	sysnames->nr = 1;
+	refcount_set(&sysnames->usage, 1);
+	net->sysnames = sysnames;
+	rwlock_init(&net->sysnames_lock);
+
 	/* Register the /proc stuff */
 	ret = afs_proc_init(net);
 	if (ret < 0)
@@ -92,6 +133,8 @@ error_cell_init:
 	net->live = false;
 	afs_proc_cleanup(net);
 error_proc:
+	afs_put_sysnames(net->sysnames);
+error_sysnames:
 	net->live = false;
 	return ret;
 }
@@ -106,6 +149,7 @@ static void __net_exit afs_net_exit(struct afs_net *net)
 	afs_purge_servers(net);
 	afs_close_socket(net);
 	afs_proc_cleanup(net);
+	afs_put_sysnames(net->sysnames);
 }
 
 /*
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 4508dd54f789..839a22280606 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -126,6 +126,34 @@ static const struct file_operations afs_proc_servers_fops = {
 	.release	= seq_release,
 };
 
+static int afs_proc_sysname_open(struct inode *inode, struct file *file);
+static int afs_proc_sysname_release(struct inode *inode, struct file *file);
+static void *afs_proc_sysname_start(struct seq_file *p, loff_t *pos);
+static void *afs_proc_sysname_next(struct seq_file *p, void *v,
+					loff_t *pos);
+static void afs_proc_sysname_stop(struct seq_file *p, void *v);
+static int afs_proc_sysname_show(struct seq_file *m, void *v);
+static ssize_t afs_proc_sysname_write(struct file *file,
+				      const char __user *buf,
+				      size_t size, loff_t *_pos);
+
+static const struct seq_operations afs_proc_sysname_ops = {
+	.start	= afs_proc_sysname_start,
+	.next	= afs_proc_sysname_next,
+	.stop	= afs_proc_sysname_stop,
+	.show	= afs_proc_sysname_show,
+};
+
+static const struct file_operations afs_proc_sysname_fops = {
+	.open		= afs_proc_sysname_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= afs_proc_sysname_release,
+	.write		= afs_proc_sysname_write,
+};
+
+static const struct file_operations afs_proc_stats_fops;
+
 /*
  * initialise the /proc/fs/afs/ directory
  */
@@ -139,7 +167,9 @@ int afs_proc_init(struct afs_net *net)
 
 	if (!proc_create("cells", 0644, net->proc_afs, &afs_proc_cells_fops) ||
 	    !proc_create("rootcell", 0644, net->proc_afs, &afs_proc_rootcell_fops) ||
-	    !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops))
+	    !proc_create("servers", 0644, net->proc_afs, &afs_proc_servers_fops) ||
+	    !proc_create("stats", 0644, net->proc_afs, &afs_proc_stats_fops) ||
+	    !proc_create("sysname", 0644, net->proc_afs, &afs_proc_sysname_fops))
 		goto error_tree;
 
 	_leave(" = 0");
@@ -183,6 +213,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
  * first item
  */
 static void *afs_proc_cells_start(struct seq_file *m, loff_t *_pos)
+	__acquires(rcu)
 {
 	struct afs_net *net = afs_seq2net(m);
 
@@ -204,6 +235,7 @@ static void *afs_proc_cells_next(struct seq_file *m, void *v, loff_t *pos)
  * clean up after reading from the cells list
  */
 static void afs_proc_cells_stop(struct seq_file *m, void *v)
+	__releases(rcu)
 {
 	rcu_read_unlock();
 }
@@ -282,7 +314,8 @@ static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 			goto done;
 		}
 
-		set_bit(AFS_CELL_FL_NO_GC, &cell->flags);
+		if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
+			afs_put_cell(net, cell);
 		printk("kAFS: Added new cell '%s'\n", name);
 	} else {
 		goto inval;
@@ -304,7 +337,40 @@ inval:
 static ssize_t afs_proc_rootcell_read(struct file *file, char __user *buf,
 				      size_t size, loff_t *_pos)
 {
-	return 0;
+	struct afs_cell *cell;
+	struct afs_net *net = afs_proc2net(file);
+	unsigned int seq = 0;
+	char name[AFS_MAXCELLNAME + 1];
+	int len;
+
+	if (*_pos > 0)
+		return 0;
+	if (!net->ws_cell)
+		return 0;
+
+	rcu_read_lock();
+	do {
+		read_seqbegin_or_lock(&net->cells_lock, &seq);
+		len = 0;
+		cell = rcu_dereference_raw(net->ws_cell);
+		if (cell) {
+			len = cell->name_len;
+			memcpy(name, cell->name, len);
+		}
+	} while (need_seqretry(&net->cells_lock, seq));
+	done_seqretry(&net->cells_lock, seq);
+	rcu_read_unlock();
+
+	if (!len)
+		return 0;
+
+	name[len++] = '\n';
+	if (len > size)
+		len = size;
+	if (copy_to_user(buf, name, len) != 0)
+		return -EFAULT;
+	*_pos = 1;
+	return len;
 }
 
 /*
@@ -327,6 +393,12 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 	if (IS_ERR(kbuf))
 		return PTR_ERR(kbuf);
 
+	ret = -EINVAL;
+	if (kbuf[0] == '.')
+		goto out;
+	if (memchr(kbuf, '/', size))
+		goto out;
+
 	/* trim to first NL */
 	s = memchr(kbuf, '\n', size);
 	if (s)
@@ -339,6 +411,7 @@ static ssize_t afs_proc_rootcell_write(struct file *file,
 	if (ret >= 0)
 		ret = size;	/* consume everything, always */
 
+out:
 	kfree(kbuf);
 	_leave(" = %d", ret);
 	return ret;
@@ -413,6 +486,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
  * first item
  */
 static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos)
+	__acquires(cell->proc_lock)
 {
 	struct afs_cell *cell = m->private;
 
@@ -438,6 +512,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
  * clean up after reading from the cells list
  */
 static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v)
+	__releases(cell->proc_lock)
 {
 	struct afs_cell *cell = p->private;
 
@@ -500,6 +575,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
  * first item
  */
 static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
+	__acquires(rcu)
 {
 	struct afs_addr_list *alist;
 	struct afs_cell *cell = m->private;
@@ -544,6 +620,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
  * clean up after reading from the cells list
  */
 static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v)
+	__releases(rcu)
 {
 	rcu_read_unlock();
 }
@@ -580,6 +657,7 @@ static int afs_proc_servers_open(struct inode *inode, struct file *file)
  * first item.
  */
 static void *afs_proc_servers_start(struct seq_file *m, loff_t *_pos)
+	__acquires(rcu)
 {
 	struct afs_net *net = afs_seq2net(m);
 
@@ -601,6 +679,7 @@ static void *afs_proc_servers_next(struct seq_file *m, void *v, loff_t *_pos)
  * clean up after reading from the cells list
  */
 static void afs_proc_servers_stop(struct seq_file *p, void *v)
+	__releases(rcu)
 {
 	rcu_read_unlock();
 }
@@ -626,3 +705,244 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
 		   &alist->addrs[alist->index].transport);
 	return 0;
 }
+
+void afs_put_sysnames(struct afs_sysnames *sysnames)
+{
+	int i;
+
+	if (sysnames && refcount_dec_and_test(&sysnames->usage)) {
+		for (i = 0; i < sysnames->nr; i++)
+			if (sysnames->subs[i] != afs_init_sysname &&
+			    sysnames->subs[i] != sysnames->blank)
+				kfree(sysnames->subs[i]);
+	}
+}
+
+/*
+ * Handle opening of /proc/fs/afs/sysname.  If it is opened for writing, we
+ * assume the caller wants to change the substitution list and we allocate a
+ * buffer to hold the list.
+ */
+static int afs_proc_sysname_open(struct inode *inode, struct file *file)
+{
+	struct afs_sysnames *sysnames;
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &afs_proc_sysname_ops);
+	if (ret < 0)
+		return ret;
+
+	if (file->f_mode & FMODE_WRITE) {
+		sysnames = kzalloc(sizeof(*sysnames), GFP_KERNEL);
+		if (!sysnames) {
+			seq_release(inode, file);
+			return -ENOMEM;
+		}
+
+		refcount_set(&sysnames->usage, 1);
+		m = file->private_data;
+		m->private = sysnames;
+	}
+
+	return 0;
+}
+
+/*
+ * Handle writes to /proc/fs/afs/sysname to set the @sys substitution.
+ */
+static ssize_t afs_proc_sysname_write(struct file *file,
+				      const char __user *buf,
+				      size_t size, loff_t *_pos)
+{
+	struct afs_sysnames *sysnames;
+	struct seq_file *m = file->private_data;
+	char *kbuf = NULL, *s, *p, *sub;
+	int ret, len;
+
+	sysnames = m->private;
+	if (!sysnames)
+		return -EINVAL;
+	if (sysnames->error)
+		return sysnames->error;
+
+	if (size >= PAGE_SIZE - 1) {
+		sysnames->error = -EINVAL;
+		return -EINVAL;
+	}
+	if (size == 0)
+		return 0;
+
+	kbuf = memdup_user_nul(buf, size);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	inode_lock(file_inode(file));
+
+	p = kbuf;
+	while ((s = strsep(&p, " \t\n"))) {
+		len = strlen(s);
+		if (len == 0)
+			continue;
+		ret = -ENAMETOOLONG;
+		if (len >= AFSNAMEMAX)
+			goto error;
+
+		if (len >= 4 &&
+		    s[len - 4] == '@' &&
+		    s[len - 3] == 's' &&
+		    s[len - 2] == 'y' &&
+		    s[len - 1] == 's')
+			/* Protect against recursion */
+			goto invalid;
+
+		if (s[0] == '.' &&
+		    (len < 2 || (len == 2 && s[1] == '.')))
+			goto invalid;
+
+		if (memchr(s, '/', len))
+			goto invalid;
+
+		ret = -EFBIG;
+		if (sysnames->nr >= AFS_NR_SYSNAME)
+			goto out;
+
+		if (strcmp(s, afs_init_sysname) == 0) {
+			sub = (char *)afs_init_sysname;
+		} else {
+			ret = -ENOMEM;
+			sub = kmemdup(s, len + 1, GFP_KERNEL);
+			if (!sub)
+				goto out;
+		}
+
+		sysnames->subs[sysnames->nr] = sub;
+		sysnames->nr++;
+	}
+
+	ret = size;	/* consume everything, always */
+out:
+	inode_unlock(file_inode(file));
+	kfree(kbuf);
+	return ret;
+
+invalid:
+	ret = -EINVAL;
+error:
+	sysnames->error = ret;
+	goto out;
+}
+
+static int afs_proc_sysname_release(struct inode *inode, struct file *file)
+{
+	struct afs_sysnames *sysnames, *kill = NULL;
+	struct seq_file *m = file->private_data;
+	struct afs_net *net = afs_seq2net(m);
+
+	sysnames = m->private;
+	if (sysnames) {
+		if (!sysnames->error) {
+			kill = sysnames;
+			if (sysnames->nr == 0) {
+				sysnames->subs[0] = sysnames->blank;
+				sysnames->nr++;
+			}
+			write_lock(&net->sysnames_lock);
+			kill = net->sysnames;
+			net->sysnames = sysnames;
+			write_unlock(&net->sysnames_lock);
+		}
+		afs_put_sysnames(kill);
+	}
+
+	return seq_release(inode, file);
+}
+
+static void *afs_proc_sysname_start(struct seq_file *m, loff_t *pos)
+	__acquires(&net->sysnames_lock)
+{
+	struct afs_net *net = afs_seq2net(m);
+	struct afs_sysnames *names = net->sysnames;
+
+	read_lock(&net->sysnames_lock);
+
+	if (*pos >= names->nr)
+		return NULL;
+	return (void *)(unsigned long)(*pos + 1);
+}
+
+static void *afs_proc_sysname_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	struct afs_net *net = afs_seq2net(m);
+	struct afs_sysnames *names = net->sysnames;
+
+	*pos += 1;
+	if (*pos >= names->nr)
+		return NULL;
+	return (void *)(unsigned long)(*pos + 1);
+}
+
+static void afs_proc_sysname_stop(struct seq_file *m, void *v)
+	__releases(&net->sysnames_lock)
+{
+	struct afs_net *net = afs_seq2net(m);
+
+	read_unlock(&net->sysnames_lock);
+}
+
+static int afs_proc_sysname_show(struct seq_file *m, void *v)
+{
+	struct afs_net *net = afs_seq2net(m);
+	struct afs_sysnames *sysnames = net->sysnames;
+	unsigned int i = (unsigned long)v - 1;
+
+	if (i < sysnames->nr)
+		seq_printf(m, "%s\n", sysnames->subs[i]);
+	return 0;
+}
+
+/*
+ * Display general per-net namespace statistics
+ */
+static int afs_proc_stats_show(struct seq_file *m, void *v)
+{
+	struct afs_net *net = afs_seq2net(m);
+
+	seq_puts(m, "kAFS statistics\n");
+
+	seq_printf(m, "dir-mgmt: look=%u reval=%u inval=%u relpg=%u\n",
+		   atomic_read(&net->n_lookup),
+		   atomic_read(&net->n_reval),
+		   atomic_read(&net->n_inval),
+		   atomic_read(&net->n_relpg));
+
+	seq_printf(m, "dir-data: rdpg=%u\n",
+		   atomic_read(&net->n_read_dir));
+
+	seq_printf(m, "dir-edit: cr=%u rm=%u\n",
+		   atomic_read(&net->n_dir_cr),
+		   atomic_read(&net->n_dir_rm));
+
+	seq_printf(m, "file-rd : n=%u nb=%lu\n",
+		   atomic_read(&net->n_fetches),
+		   atomic_long_read(&net->n_fetch_bytes));
+	seq_printf(m, "file-wr : n=%u nb=%lu\n",
+		   atomic_read(&net->n_stores),
+		   atomic_long_read(&net->n_store_bytes));
+	return 0;
+}
+
+/*
+ * Open "/proc/fs/afs/stats" to allow reading of the stat counters.
+ */
+static int afs_proc_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, afs_proc_stats_show, NULL);
+}
+
+static const struct file_operations afs_proc_stats_fops = {
+	.open		= afs_proc_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release        = single_release,
+};
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index ad1328d85526..ac0feac9d746 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -21,7 +21,7 @@
 /*
  * Initialise a filesystem server cursor for iterating over FS servers.
  */
-void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
+static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
 {
 	memset(fc, 0, sizeof(*fc));
 }
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index f7ae54b6a393..5c6263972ec9 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -926,3 +926,12 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 	afs_set_call_complete(call, ret, remote_abort);
 	return ret;
 }
+
+/*
+ * Log protocol error production.
+ */
+noinline int afs_protocol_error(struct afs_call *call, int error)
+{
+	trace_afs_protocol_error(call, error, __builtin_return_address(0));
+	return error;
+}
diff --git a/fs/afs/security.c b/fs/afs/security.c
index b88b7d45fdaa..cea2fff313dc 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -178,18 +178,14 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
 		}
 	}
 
-	if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) {
-		rcu_read_unlock();
+	if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break))
 		goto someone_else_changed_it;
-	}
 
 	/* We need a ref on any permits list we want to copy as we'll have to
 	 * drop the lock to do memory allocation.
 	 */
-	if (permits && !refcount_inc_not_zero(&permits->usage)) {
-		rcu_read_unlock();
+	if (permits && !refcount_inc_not_zero(&permits->usage))
 		goto someone_else_changed_it;
-	}
 
 	rcu_read_unlock();
 
@@ -278,6 +274,7 @@ someone_else_changed_it:
 	/* Someone else changed the cache under us - don't recheck at this
 	 * time.
 	 */
+	rcu_read_unlock();
 	return;
 }
 
@@ -296,8 +293,6 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 	_enter("{%x:%u},%x",
 	       vnode->fid.vid, vnode->fid.vnode, key_serial(key));
 
-	permits = vnode->permit_cache;
-
 	/* check the permits to see if we've got one yet */
 	if (key == vnode->volume->cell->anonymous_key) {
 		_debug("anon");
@@ -327,7 +322,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 		 */
 		_debug("no valid permit");
 
-		ret = afs_fetch_status(vnode, key);
+		ret = afs_fetch_status(vnode, key, false);
 		if (ret < 0) {
 			*_access = 0;
 			_leave(" = %d", ret);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index a43ef77dabae..e23be63998a8 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -59,7 +59,8 @@ struct afs_server *afs_find_server(struct afs_net *net,
 				alist = rcu_dereference(server->addresses);
 				for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
 					b = &alist->addrs[i].transport.sin6;
-					diff = (u16)a->sin6_port - (u16)b->sin6_port;
+					diff = ((u16 __force)a->sin6_port -
+						(u16 __force)b->sin6_port);
 					if (diff == 0)
 						diff = memcmp(&a->sin6_addr,
 							      &b->sin6_addr,
@@ -79,10 +80,11 @@ struct afs_server *afs_find_server(struct afs_net *net,
 				alist = rcu_dereference(server->addresses);
 				for (i = 0; i < alist->nr_ipv4; i++) {
 					b = &alist->addrs[i].transport.sin6;
-					diff = (u16)a->sin6_port - (u16)b->sin6_port;
+					diff = ((u16 __force)a->sin6_port -
+						(u16 __force)b->sin6_port);
 					if (diff == 0)
-						diff = ((u32)a->sin6_addr.s6_addr32[3] -
-							(u32)b->sin6_addr.s6_addr32[3]);
+						diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
+							(u32 __force)b->sin6_addr.s6_addr32[3]);
 					if (diff == 0)
 						goto found;
 					if (diff < 0) {
@@ -381,7 +383,7 @@ static void afs_server_rcu(struct rcu_head *rcu)
 {
 	struct afs_server *server = container_of(rcu, struct afs_server, rcu);
 
-	afs_put_addrlist(server->addresses);
+	afs_put_addrlist(rcu_access_pointer(server->addresses));
 	kfree(server);
 }
 
@@ -390,7 +392,7 @@ static void afs_server_rcu(struct rcu_head *rcu)
  */
 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 {
-	struct afs_addr_list *alist = server->addresses;
+	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
 	struct afs_addr_cursor ac = {
 		.alist	= alist,
 		.addr	= &alist->addrs[0],
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 3623c952b6ff..65081ec3c36e 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -154,7 +154,7 @@ static int afs_show_devname(struct seq_file *m, struct dentry *root)
 		seq_puts(m, "none");
 		return 0;
 	}
-	
+
 	switch (volume->type) {
 	case AFSVL_RWVOL:
 		break;
@@ -269,7 +269,7 @@ static int afs_parse_device_name(struct afs_mount_params *params,
 	int cellnamesz;
 
 	_enter(",%s", name);
-	
+
 	if (!name) {
 		printk(KERN_ERR "kAFS: no volume name specified\n");
 		return -EINVAL;
@@ -418,7 +418,10 @@ static int afs_fill_super(struct super_block *sb,
 	if (!sb->s_root)
 		goto error;
 
-	sb->s_d_op = &afs_fs_dentry_operations;
+	if (params->dyn_root)
+		sb->s_d_op = &afs_dynroot_dentry_operations;
+	else
+		sb->s_d_op = &afs_fs_dentry_operations;
 
 	_leave(" = 0");
 	return 0;
@@ -676,7 +679,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 		buf->f_bfree	= 0;
 		return 0;
 	}
-	
+
 	key = afs_request_key(vnode->volume->cell);
 	if (IS_ERR(key))
 		return PTR_ERR(key);
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 5d8562f1ad4a..1ed7e2fd2f35 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -303,7 +303,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
 	r->uuid.clock_seq_hi_and_reserved 	= htonl(u->clock_seq_hi_and_reserved);
 	r->uuid.clock_seq_low			= htonl(u->clock_seq_low);
 	for (i = 0; i < 6; i++)
-		r->uuid.node[i] = ntohl(u->node[i]);
+		r->uuid.node[i] = htonl(u->node[i]);
 
 	trace_afs_make_vl_call(call);
 	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
@@ -450,7 +450,7 @@ again:
 		call->count2	= ntohl(*bp); /* Type or next count */
 
 		if (call->count > YFS_MAXENDPOINTS)
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 
 		alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
 		if (!alist)
@@ -474,7 +474,7 @@ again:
 			size = sizeof(__be32) * (1 + 4 + 1);
 			break;
 		default:
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		}
 
 		size += sizeof(__be32);
@@ -487,24 +487,24 @@ again:
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			if (ntohl(bp[0]) != sizeof(__be32) * 2)
-				return -EBADMSG;
+				return afs_protocol_error(call, -EBADMSG);
 			afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
 			bp += 3;
 			break;
 		case YFS_ENDPOINT_IPV6:
 			if (ntohl(bp[0]) != sizeof(__be32) * 5)
-				return -EBADMSG;
+				return afs_protocol_error(call, -EBADMSG);
 			afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
 			bp += 6;
 			break;
 		default:
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		}
 
 		/* Got either the type of the next entry or the count of
 		 * volEndpoints if no more fsEndpoints.
 		 */
-		call->count2 = htonl(*bp++);
+		call->count2 = ntohl(*bp++);
 
 		call->offset = 0;
 		call->count--;
@@ -517,7 +517,7 @@ again:
 		if (!call->count)
 			goto end;
 		if (call->count > YFS_MAXENDPOINTS)
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 
 		call->unmarshall = 3;
 
@@ -531,7 +531,7 @@ again:
 			return ret;
 
 		bp = call->buffer;
-		call->count2 = htonl(*bp++);
+		call->count2 = ntohl(*bp++);
 		call->offset = 0;
 		call->unmarshall = 4;
 
@@ -545,7 +545,7 @@ again:
 			size = sizeof(__be32) * (1 + 4 + 1);
 			break;
 		default:
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		}
 
 		if (call->count > 1)
@@ -558,16 +558,16 @@ again:
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			if (ntohl(bp[0]) != sizeof(__be32) * 2)
-				return -EBADMSG;
+				return afs_protocol_error(call, -EBADMSG);
 			bp += 3;
 			break;
 		case YFS_ENDPOINT_IPV6:
 			if (ntohl(bp[0]) != sizeof(__be32) * 5)
-				return -EBADMSG;
+				return afs_protocol_error(call, -EBADMSG);
 			bp += 6;
 			break;
 		default:
-			return -EBADMSG;
+			return afs_protocol_error(call, -EBADMSG);
 		}
 
 		/* Got either the type of the next entry or the count of
@@ -576,7 +576,7 @@ again:
 		call->offset = 0;
 		call->count--;
 		if (call->count > 0) {
-			call->count2 = htonl(*bp++);
+			call->count2 = ntohl(*bp++);
 			goto again;
 		}
 
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index b517a588781f..3037bd01f617 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -225,7 +225,9 @@ void afs_activate_volume(struct afs_volume *volume)
 #ifdef CONFIG_AFS_FSCACHE
 	volume->cache = fscache_acquire_cookie(volume->cell->cache,
 					       &afs_volume_cache_index_def,
-					       volume, true);
+					       &volume->vid, sizeof(volume->vid),
+					       NULL, 0,
+					       volume, 0, true);
 #endif
 
 	write_lock(&volume->cell->proc_lock);
@@ -245,7 +247,7 @@ void afs_deactivate_volume(struct afs_volume *volume)
 	write_unlock(&volume->cell->proc_lock);
 
 #ifdef CONFIG_AFS_FSCACHE
-	fscache_relinquish_cookie(volume->cache,
+	fscache_relinquish_cookie(volume->cache, NULL,
 				  test_bit(AFS_VOLUME_DELETED, &volume->flags));
 	volume->cache = NULL;
 #endif
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9370e2feb999..c164698dc304 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -42,10 +42,11 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 	if (!req)
 		return -ENOMEM;
 
-	atomic_set(&req->usage, 1);
+	refcount_set(&req->usage, 1);
 	req->pos = pos;
 	req->len = len;
 	req->nr_pages = 1;
+	req->pages = req->array;
 	req->pages[0] = page;
 	get_page(page);
 
@@ -124,7 +125,12 @@ try_again:
 					     page->index, priv);
 			goto flush_conflicting_write;
 		}
-		if (to < f || from > t)
+		/* If the file is being filled locally, allow inter-write
+		 * spaces to be merged into writes.  If it's not, only write
+		 * back what the user gives us.
+		 */
+		if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
+		    (to < f || from > t))
 			goto flush_conflicting_write;
 		if (from < f)
 			f = from;
@@ -355,6 +361,12 @@ found_key:
 	}
 
 	switch (ret) {
+	case 0:
+		afs_stat_v(vnode, n_stores);
+		atomic_long_add((last * PAGE_SIZE + to) -
+				(first * PAGE_SIZE + offset),
+				&afs_v2net(vnode)->n_store_bytes);
+		break;
 	case -EACCES:
 	case -EPERM:
 	case -ENOKEY:
@@ -412,7 +424,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
 		trace_afs_page_dirty(vnode, tracepoint_string("WARN"),
 				     primary_page->index, priv);
 
-	if (start >= final_page || to < PAGE_SIZE)
+	if (start >= final_page ||
+	    (to < PAGE_SIZE && !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)))
 		goto no_more;
 
 	start++;
@@ -433,9 +446,10 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
 		}
 
 		for (loop = 0; loop < n; loop++) {
-			if (to != PAGE_SIZE)
-				break;
 			page = pages[loop];
+			if (to != PAGE_SIZE &&
+			    !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags))
+				break;
 			if (page->index > final_page)
 				break;
 			if (!trylock_page(page))
@@ -448,7 +462,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
 			priv = page_private(page);
 			f = priv & AFS_PRIV_MAX;
 			t = priv >> AFS_PRIV_SHIFT;
-			if (f != 0) {
+			if (f != 0 &&
+			    !test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) {
 				unlock_page(page);
 				break;
 			}
@@ -570,10 +585,11 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		_debug("wback %lx", page->index);
 
-		/* at this point we hold neither mapping->tree_lock nor lock on
-		 * the page itself: the page may be truncated or invalidated
-		 * (changing page->mapping to NULL), or even swizzled back from
-		 * swapper_space to tmpfs file mapping
+		/*
+		 * at this point we hold neither the i_pages lock nor the
+		 * page lock: the page may be truncated or invalidated
+		 * (changing page->mapping to NULL), or even swizzled
+		 * back from swapper_space to tmpfs file mapping
 		 */
 		ret = lock_page_killable(page);
 		if (ret < 0) {
@@ -734,20 +750,6 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 }
 
 /*
- * Flush out all outstanding writes on a file opened for writing when it is
- * closed.
- */
-int afs_flush(struct file *file, fl_owner_t id)
-{
-	_enter("");
-
-	if ((file->f_mode & FMODE_WRITE) == 0)
-		return 0;
-
-	return vfs_fsync(file, 0);
-}
-
-/*
  * notification that a previously read-only page is about to become writable
  * - if it returns an error, the caller will deliver a bus error signal
  */
diff --git a/fs/afs/xdr_fs.h b/fs/afs/xdr_fs.h
new file mode 100644
index 000000000000..aa21f3068d52
--- /dev/null
+++ b/fs/afs/xdr_fs.h
@@ -0,0 +1,103 @@
+/* AFS fileserver XDR types
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef XDR_FS_H
+#define XDR_FS_H
+
+struct afs_xdr_AFSFetchStatus {
+	__be32	if_version;
+#define AFS_FSTATUS_VERSION	1
+	__be32	type;
+	__be32	nlink;
+	__be32	size_lo;
+	__be32	data_version_lo;
+	__be32	author;
+	__be32	owner;
+	__be32	caller_access;
+	__be32	anon_access;
+	__be32	mode;
+	__be32	parent_vnode;
+	__be32	parent_unique;
+	__be32	seg_size;
+	__be32	mtime_client;
+	__be32	mtime_server;
+	__be32	group;
+	__be32	sync_counter;
+	__be32	data_version_hi;
+	__be32	lock_count;
+	__be32	size_hi;
+	__be32	abort_code;
+} __packed;
+
+#define AFS_DIR_HASHTBL_SIZE	128
+#define AFS_DIR_DIRENT_SIZE	32
+#define AFS_DIR_SLOTS_PER_BLOCK	64
+#define AFS_DIR_BLOCK_SIZE	2048
+#define AFS_DIR_BLOCKS_PER_PAGE	(PAGE_SIZE / AFS_DIR_BLOCK_SIZE)
+#define AFS_DIR_MAX_SLOTS	65536
+#define AFS_DIR_BLOCKS_WITH_CTR	128
+#define AFS_DIR_MAX_BLOCKS	1023
+#define AFS_DIR_RESV_BLOCKS	1
+#define AFS_DIR_RESV_BLOCKS0	13
+
+/*
+ * Directory entry structure.
+ */
+union afs_xdr_dirent {
+	struct {
+		u8		valid;
+		u8		unused[1];
+		__be16		hash_next;
+		__be32		vnode;
+		__be32		unique;
+		u8		name[16];
+		u8		overflow[4];	/* if any char of the name (inc
+						 * NUL) reaches here, consume
+						 * the next dirent too */
+	} u;
+	u8			extended_name[32];
+} __packed;
+
+/*
+ * Directory block header (one at the beginning of every 2048-byte block).
+ */
+struct afs_xdr_dir_hdr {
+	__be16		npages;
+	__be16		magic;
+#define AFS_DIR_MAGIC htons(1234)
+	u8		reserved;
+	u8		bitmap[8];
+	u8		pad[19];
+} __packed;
+
+/*
+ * Directory block layout
+ */
+union afs_xdr_dir_block {
+	struct afs_xdr_dir_hdr		hdr;
+
+	struct {
+		struct afs_xdr_dir_hdr	hdr;
+		u8			alloc_ctrs[AFS_DIR_MAX_BLOCKS];
+		__be16			hashtable[AFS_DIR_HASHTBL_SIZE];
+	} meta;
+
+	union afs_xdr_dirent	dirents[AFS_DIR_SLOTS_PER_BLOCK];
+} __packed;
+
+/*
+ * Directory layout on a linux VM page.
+ */
+struct afs_xdr_dir_page {
+	union afs_xdr_dir_block	blocks[AFS_DIR_BLOCKS_PER_PAGE];
+};
+
+#endif /* XDR_FS_H */
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index a0c57c37fa21..be9c3dc048ab 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -19,9 +19,6 @@
  */
 static autofs_wqt_t autofs4_next_wait_queue = 1;
 
-/* These are the signals we allow interrupting a pending mount */
-#define SHUTDOWN_SIGS	(sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
-
 void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 {
 	struct autofs_wait_queue *wq, *nwq;
@@ -486,29 +483,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 	 * wq->name.name is NULL iff the lock is already released
 	 * or the mount has been made catatonic.
 	 */
-	if (wq->name.name) {
-		/* Block all but "shutdown" signals while waiting */
-		unsigned long shutdown_sigs_mask;
-		unsigned long irqflags;
-		sigset_t oldset;
-
-		spin_lock_irqsave(&current->sighand->siglock, irqflags);
-		oldset = current->blocked;
-		shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0];
-		siginitsetinv(&current->blocked, shutdown_sigs_mask);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
-
-		wait_event_interruptible(wq->queue, wq->name.name == NULL);
-
-		spin_lock_irqsave(&current->sighand->siglock, irqflags);
-		current->blocked = oldset;
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
-	} else {
-		pr_debug("skipped sleeping\n");
-	}
-
+	wait_event_killable(wq->queue, wq->name.name == NULL);
 	status = wq->status;
 
 	/*
@@ -574,7 +549,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 	kfree(wq->name.name);
 	wq->name.name = NULL;	/* Do not wait on this queue */
 	wq->status = status;
-	wake_up_interruptible(&wq->queue);
+	wake_up(&wq->queue);
 	if (!--wq->wait_ctr)
 		kfree(wq);
 	mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ce1824f47ba6..c3deb2e35f20 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -330,6 +330,7 @@ beyond_if:
 #ifdef __alpha__
 	regs->gp = ex.a_gpvalue;
 #endif
+	finalize_exec(bprm);
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
 	return 0;
 }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bdb201230bae..41e04183e4ce 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -377,6 +377,11 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
 	} else
 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
 
+	if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr))
+		pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n",
+				task_pid_nr(current), current->comm,
+				(void *)addr);
+
 	return(map_addr);
 }
 
@@ -575,7 +580,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 				elf_prot |= PROT_EXEC;
 			vaddr = eppnt->p_vaddr;
 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
-				elf_type |= MAP_FIXED;
+				elf_type |= MAP_FIXED_NOREPLACE;
 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
 				load_addr = -vaddr;
 
@@ -890,7 +895,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
-		int elf_prot = 0, elf_flags;
+		int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
 		unsigned long k, vaddr;
 		unsigned long total_size = 0;
 
@@ -922,6 +927,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
 					 */
 				}
 			}
+
+			/*
+			 * Some binaries have overlapping elf segments and then
+			 * we have to forcefully map over an existing mapping
+			 * e.g. over this newly established brk mapping.
+			 */
+			elf_fixed = MAP_FIXED;
 		}
 
 		if (elf_ppnt->p_flags & PF_R)
@@ -939,7 +951,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		 * the ET_DYN load_addr calculations, proceed normally.
 		 */
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
-			elf_flags |= MAP_FIXED;
+			elf_flags |= elf_fixed;
 		} else if (loc->elf_ex.e_type == ET_DYN) {
 			/*
 			 * This logic is run once for the first LOAD Program
@@ -975,7 +987,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 				load_bias = ELF_ET_DYN_BASE;
 				if (current->flags & PF_RANDOMIZE)
 					load_bias += arch_mmap_rnd();
-				elf_flags |= MAP_FIXED;
+				elf_flags |= elf_fixed;
 			} else
 				load_bias = 0;
 
@@ -1155,6 +1167,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	ELF_PLAT_INIT(regs, reloc_func_desc);
 #endif
 
+	finalize_exec(bprm);
 	start_thread(regs, elf_entry, bprm->p);
 	retval = 0;
 out:
@@ -1234,7 +1247,7 @@ static int load_elf_library(struct file *file)
 			(eppnt->p_filesz +
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
 			PROT_READ | PROT_WRITE | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
 			(eppnt->p_offset -
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 429326b6e2e7..d90993adeffa 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -463,6 +463,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 			    dynaddr);
 #endif
 
+	finalize_exec(bprm);
 	/* everything is now ready... get the userspace context ready to roll */
 	entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
 	start_thread(regs, entryaddr, current->mm->start_stack);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5d6b94475f27..82a48e830018 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -994,6 +994,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
 	FLAT_PLAT_INIT(regs);
 #endif
 
+	finalize_exec(bprm);
 	pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n",
 		 regs, start_addr, current->mm->start_stack);
 	start_thread(regs, start_addr, current->mm->start_stack);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7a506c55a993..7ec920e27065 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1948,11 +1948,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 static int blkdev_writepages(struct address_space *mapping,
 			     struct writeback_control *wbc)
 {
-	if (dax_mapping(mapping)) {
-		struct block_device *bdev = I_BDEV(mapping->host);
-
-		return dax_writeback_mapping_range(mapping, bdev, wbc);
-	}
 	return generic_writepages(mapping, wbc);
 }
 
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 562c3e633403..578181cd96b5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -458,7 +458,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 			break;
 
 		rcu_read_lock();
-		page = radix_tree_lookup(&mapping->page_tree, pg_index);
+		page = radix_tree_lookup(&mapping->i_pages, pg_index);
 		rcu_read_unlock();
 		if (page && !radix_tree_exceptional_entry(page)) {
 			misses++;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 47a8fe9d22e8..cf87976e389d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3963,11 +3963,11 @@ retry:
 
 			done_index = page->index;
 			/*
-			 * At this point we hold neither mapping->tree_lock nor
-			 * lock on the page itself: the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or even
-			 * swizzled back from swapper_space to tmpfs file
-			 * mapping
+			 * At this point we hold neither the i_pages lock nor
+			 * the page lock: the page may be truncated or
+			 * invalidated (changing page->mapping to NULL),
+			 * or even swizzled back from swapper_space to
+			 * tmpfs file mapping
 			 */
 			if (!trylock_page(page)) {
 				flush_write_bio(epd);
@@ -5174,13 +5174,13 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
 		WARN_ON(!PagePrivate(page));
 
 		clear_page_dirty_for_io(page);
-		spin_lock_irq(&page->mapping->tree_lock);
+		xa_lock_irq(&page->mapping->i_pages);
 		if (!PageDirty(page)) {
-			radix_tree_tag_clear(&page->mapping->page_tree,
+			radix_tree_tag_clear(&page->mapping->i_pages,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
 		}
-		spin_unlock_irq(&page->mapping->tree_lock);
+		xa_unlock_irq(&page->mapping->i_pages);
 		ClearPageError(page);
 		unlock_page(page);
 	}
diff --git a/fs/buffer.c b/fs/buffer.c
index ec5dd39071e6..249b83fafe48 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -185,10 +185,9 @@ EXPORT_SYMBOL(end_buffer_write_sync);
  * we get exclusion from try_to_free_buffers with the blockdev mapping's
  * private_lock.
  *
- * Hack idea: for the blockdev mapping, i_bufferlist_lock contention
+ * Hack idea: for the blockdev mapping, private_lock contention
  * may be quite high.  This code could TryLock the page, and if that
- * succeeds, there is no need to take private_lock. (But if
- * private_lock is contended then so is mapping->tree_lock).
+ * succeeds, there is no need to take private_lock.
  */
 static struct buffer_head *
 __find_get_block_slow(struct block_device *bdev, sector_t block)
@@ -495,35 +494,12 @@ repeat:
 	return err;
 }
 
-static void do_thaw_one(struct super_block *sb, void *unused)
+void emergency_thaw_bdev(struct super_block *sb)
 {
 	while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
 		printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
 }
 
-static void do_thaw_all(struct work_struct *work)
-{
-	iterate_supers(do_thaw_one, NULL);
-	kfree(work);
-	printk(KERN_WARNING "Emergency Thaw complete\n");
-}
-
-/**
- * emergency_thaw_all -- forcibly thaw every frozen filesystem
- *
- * Used for emergency unfreeze of all filesystems via SysRq
- */
-void emergency_thaw_all(void)
-{
-	struct work_struct *work;
-
-	work = kmalloc(sizeof(*work), GFP_ATOMIC);
-	if (work) {
-		INIT_WORK(work, do_thaw_all);
-		schedule_work(work);
-	}
-}
-
 /**
  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
  * @mapping: the mapping which wants those buffers written
@@ -594,20 +570,21 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
  *
  * The caller must hold lock_page_memcg().
  */
-static void __set_page_dirty(struct page *page, struct address_space *mapping,
+void __set_page_dirty(struct page *page, struct address_space *mapping,
 			     int warn)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	if (page->mapping) {	/* Race with truncate? */
 		WARN_ON_ONCE(warn && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
-		radix_tree_tag_set(&mapping->page_tree,
+		radix_tree_tag_set(&mapping->i_pages,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 }
+EXPORT_SYMBOL_GPL(__set_page_dirty);
 
 /*
  * Add a page to the dirty page list.
@@ -1095,7 +1072,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and mapping->host->i_lock.
+ * i_pages lock and mapping->host->i_lock.
  */
 void mark_buffer_dirty(struct buffer_head *bh)
 {
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index e7f16a77a22a..222bc5d8b62c 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -32,7 +32,7 @@ static struct fscache_object *cachefiles_alloc_object(
 	struct cachefiles_cache *cache;
 	struct cachefiles_xattr *auxdata;
 	unsigned keylen, auxlen;
-	void *buffer;
+	void *buffer, *p;
 	char *key;
 
 	cache = container_of(_cache, struct cachefiles_cache, cache);
@@ -65,8 +65,12 @@ static struct fscache_object *cachefiles_alloc_object(
 	if (!buffer)
 		goto nomem_buffer;
 
-	keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512);
-	ASSERTCMP(keylen, <, 512);
+	keylen = cookie->key_len;
+	if (keylen <= sizeof(cookie->inline_key))
+		p = cookie->inline_key;
+	else
+		p = cookie->key;
+	memcpy(buffer + 2, p, keylen);
 
 	*(uint16_t *)buffer = keylen;
 	((char *)buffer)[keylen + 2] = 0;
@@ -80,15 +84,17 @@ static struct fscache_object *cachefiles_alloc_object(
 
 	/* get hold of the auxiliary data and prepend the object type */
 	auxdata = buffer;
-	auxlen = 0;
-	if (cookie->def->get_aux) {
-		auxlen = cookie->def->get_aux(cookie->netfs_data,
-					      auxdata->data, 511);
-		ASSERTCMP(auxlen, <, 511);
+	auxlen = cookie->aux_len;
+	if (auxlen) {
+		if (auxlen <= sizeof(cookie->inline_aux))
+			p = cookie->inline_aux;
+		else
+			p = cookie->aux;
+		memcpy(auxdata->data, p, auxlen);
 	}
 
 	auxdata->len = auxlen + 1;
-	auxdata->type = cookie->def->type;
+	auxdata->type = cookie->type;
 
 	lookup_data->auxdata = auxdata;
 	lookup_data->key = key;
@@ -177,10 +183,12 @@ static void cachefiles_lookup_complete(struct fscache_object *_object)
  * increment the usage count on an inode object (may fail if unmounting)
  */
 static
-struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
+struct fscache_object *cachefiles_grab_object(struct fscache_object *_object,
+					      enum fscache_obj_ref_trace why)
 {
 	struct cachefiles_object *object =
 		container_of(_object, struct cachefiles_object, fscache);
+	int u;
 
 	_enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage));
 
@@ -188,7 +196,9 @@ struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
 	ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
 #endif
 
-	atomic_inc(&object->usage);
+	u = atomic_inc_return(&object->usage);
+	trace_cachefiles_ref(object, _object->cookie,
+			     (enum cachefiles_obj_ref_trace)why, u);
 	return &object->fscache;
 }
 
@@ -202,6 +212,7 @@ static void cachefiles_update_object(struct fscache_object *_object)
 	struct cachefiles_cache *cache;
 	struct fscache_cookie *cookie;
 	const struct cred *saved_cred;
+	const void *aux;
 	unsigned auxlen;
 
 	_enter("{OBJ%x}", _object->debug_id);
@@ -216,26 +227,29 @@ static void cachefiles_update_object(struct fscache_object *_object)
 	}
 
 	cookie = object->fscache.cookie;
+	auxlen = cookie->aux_len;
 
-	if (!cookie->def->get_aux) {
+	if (!auxlen) {
 		fscache_unuse_cookie(_object);
 		_leave(" [no aux]");
 		return;
 	}
 
-	auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp);
+	auxdata = kmalloc(2 + auxlen + 3, cachefiles_gfp);
 	if (!auxdata) {
 		fscache_unuse_cookie(_object);
 		_leave(" [nomem]");
 		return;
 	}
 
-	auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511);
+	aux = (auxlen <= sizeof(cookie->inline_aux)) ?
+		cookie->inline_aux : cookie->aux;
+
+	memcpy(auxdata->data, aux, auxlen);
 	fscache_unuse_cookie(_object);
-	ASSERTCMP(auxlen, <, 511);
 
 	auxdata->len = auxlen + 1;
-	auxdata->type = cookie->def->type;
+	auxdata->type = cookie->type;
 
 	cachefiles_begin_secure(cache, &saved_cred);
 	cachefiles_update_object_xattr(object, auxdata);
@@ -309,10 +323,12 @@ static void cachefiles_drop_object(struct fscache_object *_object)
 /*
  * dispose of a reference to an object
  */
-static void cachefiles_put_object(struct fscache_object *_object)
+static void cachefiles_put_object(struct fscache_object *_object,
+				  enum fscache_obj_ref_trace why)
 {
 	struct cachefiles_object *object;
 	struct fscache_cache *cache;
+	int u;
 
 	ASSERT(_object);
 
@@ -328,7 +344,11 @@ static void cachefiles_put_object(struct fscache_object *_object)
 	ASSERTIFCMP(object->fscache.parent,
 		    object->fscache.parent->n_children, >, 0);
 
-	if (atomic_dec_and_test(&object->usage)) {
+	u = atomic_dec_return(&object->usage);
+	trace_cachefiles_ref(object, _object->cookie,
+			     (enum cachefiles_obj_ref_trace)why, u);
+	ASSERTCMP(u, !=, -1);
+	if (u == 0) {
 		_debug("- kill object OBJ%x", object->fscache.debug_id);
 
 		ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
@@ -421,7 +441,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
 	loff_t oi_size;
 	int ret;
 
-	_object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size);
+	ni_size = _object->store_limit_l;
 
 	_enter("{OBJ%x},[%llu]",
 	       _object->debug_id, (unsigned long long) ni_size);
@@ -493,8 +513,7 @@ static void cachefiles_invalidate_object(struct fscache_operation *op)
 	cache = container_of(object->fscache.cache,
 			     struct cachefiles_cache, cache);
 
-	op->object->cookie->def->get_attr(op->object->cookie->netfs_data,
-					  &ni_size);
+	ni_size = op->object->store_limit_l;
 
 	_enter("{OBJ%x},[%llu]",
 	       op->object->debug_id, (unsigned long long)ni_size);
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index bb3a02ca9da4..d2f6f996e65a 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -124,6 +124,8 @@ struct cachefiles_xattr {
 	uint8_t				data[];
 };
 
+#include <trace/events/cachefiles.h>
+
 /*
  * note change of state for daemon
  */
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c
index 711f13d8c2de..f54d3f5b2e40 100644
--- a/fs/cachefiles/main.c
+++ b/fs/cachefiles/main.c
@@ -22,6 +22,7 @@
 #include <linux/statfs.h>
 #include <linux/sysctl.h>
 #include <linux/miscdevice.h>
+#define CREATE_TRACE_POINTS
 #include "internal.h"
 
 unsigned cachefiles_debug;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 3978b324cbca..0daa1e3fe0df 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -30,11 +30,11 @@
  */
 static noinline
 void __cachefiles_printk_object(struct cachefiles_object *object,
-				const char *prefix,
-				u8 *keybuf)
+				const char *prefix)
 {
 	struct fscache_cookie *cookie;
-	unsigned keylen, loop;
+	const u8 *k;
+	unsigned loop;
 
 	pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id);
 	pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
@@ -56,23 +56,16 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
 		       object->fscache.cookie->parent,
 		       object->fscache.cookie->netfs_data,
 		       object->fscache.cookie->flags);
-		if (keybuf && cookie->def)
-			keylen = cookie->def->get_key(cookie->netfs_data, keybuf,
-						      CACHEFILES_KEYBUF_SIZE);
-		else
-			keylen = 0;
+		pr_err("%skey=[%u] '", prefix, cookie->key_len);
+		k = (cookie->key_len <= sizeof(cookie->inline_key)) ?
+			cookie->inline_key : cookie->key;
+		for (loop = 0; loop < cookie->key_len; loop++)
+			pr_cont("%02x", k[loop]);
+		pr_cont("'\n");
 	} else {
 		pr_err("%scookie=NULL\n", prefix);
-		keylen = 0;
 	}
 	spin_unlock(&object->fscache.lock);
-
-	if (keylen) {
-		pr_err("%skey=[%u] '", prefix, keylen);
-		for (loop = 0; loop < keylen; loop++)
-			pr_cont("%02x", keybuf[loop]);
-		pr_cont("'\n");
-	}
 }
 
 /*
@@ -81,14 +74,10 @@ void __cachefiles_printk_object(struct cachefiles_object *object,
 static noinline void cachefiles_printk_object(struct cachefiles_object *object,
 					      struct cachefiles_object *xobject)
 {
-	u8 *keybuf;
-
-	keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO);
 	if (object)
-		__cachefiles_printk_object(object, "", keybuf);
+		__cachefiles_printk_object(object, "");
 	if (xobject)
-		__cachefiles_printk_object(xobject, "x", keybuf);
-	kfree(keybuf);
+		__cachefiles_printk_object(xobject, "x");
 }
 
 /*
@@ -120,6 +109,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
 	}
 
 	write_unlock(&cache->active_lock);
+	trace_cachefiles_mark_buried(NULL, dentry, why);
 	_leave(" [no owner]");
 	return;
 
@@ -130,6 +120,8 @@ found_dentry:
 	       object->fscache.state->name,
 	       dentry);
 
+	trace_cachefiles_mark_buried(object, dentry, why);
+
 	if (fscache_object_is_live(&object->fscache)) {
 		pr_err("\n");
 		pr_err("Error: Can't preemptively bury live object\n");
@@ -158,13 +150,15 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
 try_again:
 	write_lock(&cache->active_lock);
 
+	dentry = object->dentry;
+	trace_cachefiles_mark_active(object, dentry);
+
 	if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
 		pr_err("Error: Object already active\n");
 		cachefiles_printk_object(object, NULL);
 		BUG();
 	}
 
-	dentry = object->dentry;
 	_p = &cache->active_nodes.rb_node;
 	while (*_p) {
 		_parent = *_p;
@@ -191,6 +185,8 @@ try_again:
 	/* an old object from a previous incarnation is hogging the slot - we
 	 * need to wait for it to be destroyed */
 wait_for_old_object:
+	trace_cachefiles_wait_active(object, dentry, xobject);
+
 	if (fscache_object_is_live(&xobject->fscache)) {
 		pr_err("\n");
 		pr_err("Error: Unexpected object collision\n");
@@ -248,12 +244,12 @@ wait_for_old_object:
 
 	ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags));
 
-	cache->cache.ops->put_object(&xobject->fscache);
+	cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_retry);
 	goto try_again;
 
 requeue:
 	clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
-	cache->cache.ops->put_object(&xobject->fscache);
+	cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo);
 	_leave(" = -ETIMEDOUT");
 	return -ETIMEDOUT;
 }
@@ -265,6 +261,11 @@ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
 				     struct cachefiles_object *object,
 				     blkcnt_t i_blocks)
 {
+	struct dentry *dentry = object->dentry;
+	struct inode *inode = d_backing_inode(dentry);
+
+	trace_cachefiles_mark_inactive(object, dentry, inode);
+
 	write_lock(&cache->active_lock);
 	rb_erase(&object->active_node, &cache->active_nodes);
 	clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
@@ -288,6 +289,7 @@ void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
  * - unlocks the directory mutex
  */
 static int cachefiles_bury_object(struct cachefiles_cache *cache,
+				  struct cachefiles_object *object,
 				  struct dentry *dir,
 				  struct dentry *rep,
 				  bool preemptive,
@@ -312,6 +314,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 		if (ret < 0) {
 			cachefiles_io_error(cache, "Unlink security error");
 		} else {
+			trace_cachefiles_unlink(object, rep, why);
 			ret = vfs_unlink(d_inode(dir), rep, NULL);
 
 			if (preemptive)
@@ -413,6 +416,7 @@ try_again:
 	if (ret < 0) {
 		cachefiles_io_error(cache, "Rename security error %d", ret);
 	} else {
+		trace_cachefiles_rename(object, rep, grave, why);
 		ret = vfs_rename(d_inode(dir), rep,
 				 d_inode(cache->graveyard), grave, NULL, 0);
 		if (ret != 0 && ret != -ENOMEM)
@@ -458,7 +462,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
 		/* we need to check that our parent is _still_ our parent - it
 		 * may have been renamed */
 		if (dir == object->dentry->d_parent) {
-			ret = cachefiles_bury_object(cache, dir,
+			ret = cachefiles_bury_object(cache, object, dir,
 						     object->dentry, false,
 						     FSCACHE_OBJECT_WAS_RETIRED);
 		} else {
@@ -486,6 +490,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
 {
 	struct cachefiles_cache *cache;
 	struct dentry *dir, *next = NULL;
+	struct inode *inode;
 	struct path path;
 	unsigned long start;
 	const char *name;
@@ -529,13 +534,17 @@ lookup_again:
 	start = jiffies;
 	next = lookup_one_len(name, dir, nlen);
 	cachefiles_hist(cachefiles_lookup_histogram, start);
-	if (IS_ERR(next))
+	if (IS_ERR(next)) {
+		trace_cachefiles_lookup(object, next, NULL);
 		goto lookup_error;
+	}
 
-	_debug("next -> %p %s", next, d_backing_inode(next) ? "positive" : "negative");
+	inode = d_backing_inode(next);
+	trace_cachefiles_lookup(object, next, inode);
+	_debug("next -> %p %s", next, inode ? "positive" : "negative");
 
 	if (!key)
-		object->new = !d_backing_inode(next);
+		object->new = !inode;
 
 	/* if this element of the path doesn't exist, then the lookup phase
 	 * failed, and we can release any readers in the certain knowledge that
@@ -558,6 +567,8 @@ lookup_again:
 			start = jiffies;
 			ret = vfs_mkdir(d_inode(dir), next, 0);
 			cachefiles_hist(cachefiles_mkdir_histogram, start);
+			if (!key)
+				trace_cachefiles_mkdir(object, next, ret);
 			if (ret < 0)
 				goto create_error;
 
@@ -587,6 +598,7 @@ lookup_again:
 			start = jiffies;
 			ret = vfs_create(d_inode(dir), next, S_IFREG, true);
 			cachefiles_hist(cachefiles_create_histogram, start);
+			trace_cachefiles_create(object, next, ret);
 			if (ret < 0)
 				goto create_error;
 
@@ -629,7 +641,8 @@ lookup_again:
 			 * mutex) */
 			object->dentry = NULL;
 
-			ret = cachefiles_bury_object(cache, dir, next, true,
+			ret = cachefiles_bury_object(cache, object, dir, next,
+						     true,
 						     FSCACHE_OBJECT_IS_STALE);
 			dput(next);
 			next = NULL;
@@ -955,7 +968,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
 	/*  actually remove the victim (drops the dir mutex) */
 	_debug("bury");
 
-	ret = cachefiles_bury_object(cache, dir, victim, false,
+	ret = cachefiles_bury_object(cache, NULL, dir, victim, false,
 				     FSCACHE_OBJECT_WAS_CULLED);
 	if (ret < 0)
 		goto error;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 883bc7bb12c5..5082c8a49686 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -952,6 +952,7 @@ error:
  * - cache withdrawal is prevented by the caller
  */
 void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
+	__releases(&object->fscache.cookie->lock)
 {
 	struct cachefiles_object *object;
 	struct cachefiles_cache *cache;
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index d31c1a72d8a5..0a29a00aed2e 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -113,6 +113,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object,
 	/* attempt to install the cache metadata directly */
 	_debug("SET #%u", auxdata->len);
 
+	clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
 	ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
 			   &auxdata->type, auxdata->len,
 			   XATTR_CREATE);
@@ -141,6 +142,7 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
 	/* attempt to install the cache metadata directly */
 	_debug("SET #%u", auxdata->len);
 
+	clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
 	ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
 			   &auxdata->type, auxdata->len,
 			   XATTR_REPLACE);
@@ -180,7 +182,8 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
 		goto error;
 
 	xlen--;
-	validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen);
+	validity = fscache_check_aux(&object->fscache, &auxbuf->data, xlen,
+				     i_size_read(d_backing_inode(dentry)));
 	if (validity != FSCACHE_CHECKAUX_OKAY)
 		goto error;
 
@@ -249,7 +252,8 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
 		       object->fscache.cookie->def->name, dlen);
 
 		result = fscache_check_aux(&object->fscache,
-					   &auxbuf->data, dlen);
+					   &auxbuf->data, dlen,
+					   i_size_read(d_backing_inode(dentry)));
 
 		switch (result) {
 			/* entry okay as is */
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 174f5709e508..a699e320393f 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
-	export.o caps.o snap.o xattr.o \
+	export.o caps.o snap.o xattr.o quota.o \
 	mds_client.o mdsmap.o strings.o ceph_frag.o \
 	debugfs.o
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b4336b42ce3b..5f7ad3d0df2e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -15,6 +15,7 @@
 #include "mds_client.h"
 #include "cache.h"
 #include <linux/ceph/osd_client.h>
+#include <linux/ceph/striper.h>
 
 /*
  * Ceph address space ops.
@@ -438,7 +439,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_file_info *ci = file->private_data;
+	struct ceph_file_info *fi = file->private_data;
 	struct ceph_rw_context *rw_ctx;
 	int rc = 0;
 	int max = 0;
@@ -452,7 +453,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	if (rc == 0)
 		goto out;
 
-	rw_ctx = ceph_find_rw_context(ci);
+	rw_ctx = ceph_find_rw_context(fi);
 	max = fsc->mount_options->rsize >> PAGE_SHIFT;
 	dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
 	     inode, file, rw_ctx, nr_pages, max);
@@ -800,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	struct ceph_osd_request *req = NULL;
 	struct ceph_writeback_ctl ceph_wbc;
 	bool should_loop, range_whole = false;
-	bool stop, done = false;
+	bool done = false;
 
 	dout("writepages_start %p (mode=%s)\n", inode,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
@@ -856,7 +857,7 @@ retry:
 		 * in that range can be associated with newer snapc.
 		 * They are not writeable until we write all dirty pages
 		 * associated with 'snapc' get written */
-		if (index > 0 || wbc->sync_mode != WB_SYNC_NONE)
+		if (index > 0)
 			should_loop = true;
 		dout(" non-head snapc, range whole\n");
 	}
@@ -864,8 +865,7 @@ retry:
 	ceph_put_snap_context(last_snapc);
 	last_snapc = snapc;
 
-	stop = false;
-	while (!stop && index <= end) {
+	while (!done && index <= end) {
 		int num_ops = 0, op_idx;
 		unsigned i, pvec_pages, max_pages, locked_pages = 0;
 		struct page **pages = NULL, **data_pages;
@@ -898,16 +898,30 @@ get_more_pages:
 				unlock_page(page);
 				continue;
 			}
-			if (strip_unit_end && (page->index > strip_unit_end)) {
-				dout("end of strip unit %p\n", page);
+			/* only if matching snap context */
+			pgsnapc = page_snap_context(page);
+			if (pgsnapc != snapc) {
+				dout("page snapc %p %lld != oldest %p %lld\n",
+				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
+				if (!should_loop &&
+				    !ceph_wbc.head_snapc &&
+				    wbc->sync_mode != WB_SYNC_NONE)
+					should_loop = true;
 				unlock_page(page);
-				break;
+				continue;
 			}
 			if (page_offset(page) >= ceph_wbc.i_size) {
 				dout("%p page eof %llu\n",
 				     page, ceph_wbc.i_size);
-				/* not done if range_cyclic */
-				stop = true;
+				if (ceph_wbc.size_stable ||
+				    page_offset(page) >= i_size_read(inode))
+					mapping->a_ops->invalidatepage(page,
+								0, PAGE_SIZE);
+				unlock_page(page);
+				continue;
+			}
+			if (strip_unit_end && (page->index > strip_unit_end)) {
+				dout("end of strip unit %p\n", page);
 				unlock_page(page);
 				break;
 			}
@@ -921,15 +935,6 @@ get_more_pages:
 				wait_on_page_writeback(page);
 			}
 
-			/* only if matching snap context */
-			pgsnapc = page_snap_context(page);
-			if (pgsnapc != snapc) {
-				dout("page snapc %p %lld != oldest %p %lld\n",
-				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
-				unlock_page(page);
-				continue;
-			}
-
 			if (!clear_page_dirty_for_io(page)) {
 				dout("%p !clear_page_dirty_for_io\n", page);
 				unlock_page(page);
@@ -945,19 +950,15 @@ get_more_pages:
 			if (locked_pages == 0) {
 				u64 objnum;
 				u64 objoff;
+				u32 xlen;
 
 				/* prepare async write request */
 				offset = (u64)page_offset(page);
-				len = wsize;
-
-				rc = ceph_calc_file_object_mapping(&ci->i_layout,
-								offset, len,
-								&objnum, &objoff,
-								&len);
-				if (rc < 0) {
-					unlock_page(page);
-					break;
-				}
+				ceph_calc_file_object_mapping(&ci->i_layout,
+							      offset, wsize,
+							      &objnum, &objoff,
+							      &xlen);
+				len = xlen;
 
 				num_ops = 1;
 				strip_unit_end = page->index +
@@ -1146,7 +1147,7 @@ new_request:
 		 * we tagged for writeback prior to entering this loop.
 		 */
 		if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
-			done = stop = true;
+			done = true;
 
 release_pvec_pages:
 		dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index a3ab265d3215..bb524c880b1e 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -27,7 +27,6 @@
 struct ceph_aux_inode {
 	u64 		version;
 	struct timespec	mtime;
-	loff_t          size;
 };
 
 struct fscache_netfs ceph_cache_netfs = {
@@ -41,37 +40,18 @@ static LIST_HEAD(ceph_fscache_list);
 struct ceph_fscache_entry {
 	struct list_head list;
 	struct fscache_cookie *fscache;
-	struct ceph_fsid fsid;
 	size_t uniq_len;
+	/* The following members must be last */
+	struct ceph_fsid fsid;
 	char uniquifier[0];
 };
 
-static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
-					     void *buffer, uint16_t maxbuf)
-{
-	const struct ceph_fs_client* fsc = cookie_netfs_data;
-	const char *fscache_uniq = fsc->mount_options->fscache_uniq;
-	uint16_t fsid_len, uniq_len;
-
-	fsid_len = sizeof(fsc->client->fsid);
-	uniq_len = fscache_uniq ? strlen(fscache_uniq) : 0;
-	if (fsid_len + uniq_len > maxbuf)
-		return 0;
-
-	memcpy(buffer, &fsc->client->fsid, fsid_len);
-	if (uniq_len)
-		memcpy(buffer + fsid_len, fscache_uniq, uniq_len);
-
-	return fsid_len + uniq_len;
-}
-
 static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
 	.name		= "CEPH.fsid",
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= ceph_fscache_session_get_key,
 };
 
-int ceph_fscache_register(void)
+int __init ceph_fscache_register(void)
 {
 	return fscache_register_netfs(&ceph_cache_netfs);
 }
@@ -110,16 +90,19 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
 		goto out_unlock;
 	}
 
+	memcpy(&ent->fsid, fsid, sizeof(*fsid));
+	if (uniq_len > 0) {
+		memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
+		ent->uniq_len = uniq_len;
+	}
+
 	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
 					      &ceph_fscache_fsid_object_def,
-					      fsc, true);
+					      &ent->fsid, sizeof(ent->fsid) + uniq_len,
+					      NULL, 0,
+					      fsc, 0, true);
 
 	if (fsc->fscache) {
-		memcpy(&ent->fsid, fsid, sizeof(*fsid));
-		if (uniq_len > 0) {
-			memcpy(&ent->uniquifier, fscache_uniq, uniq_len);
-			ent->uniq_len = uniq_len;
-		}
 		ent->fscache = fsc->fscache;
 		list_add_tail(&ent->list, &ceph_fscache_list);
 	} else {
@@ -133,73 +116,32 @@ out_unlock:
 	return err;
 }
 
-static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
-					   void *buffer, uint16_t maxbuf)
-{
-	const struct ceph_inode_info* ci = cookie_netfs_data;
-	uint16_t klen;
-
-	/* use ceph virtual inode (id + snapshot) */
-	klen = sizeof(ci->i_vino);
-	if (klen > maxbuf)
-		return 0;
-
-	memcpy(buffer, &ci->i_vino, klen);
-	return klen;
-}
-
-static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
-					   void *buffer, uint16_t bufmax)
-{
-	struct ceph_aux_inode aux;
-	const struct ceph_inode_info* ci = cookie_netfs_data;
-	const struct inode* inode = &ci->vfs_inode;
-
-	memset(&aux, 0, sizeof(aux));
-	aux.version = ci->i_version;
-	aux.mtime = inode->i_mtime;
-	aux.size = i_size_read(inode);
-
-	memcpy(buffer, &aux, sizeof(aux));
-
-	return sizeof(aux);
-}
-
-static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
-					uint64_t *size)
-{
-	const struct ceph_inode_info* ci = cookie_netfs_data;
-	*size = i_size_read(&ci->vfs_inode);
-}
-
 static enum fscache_checkaux ceph_fscache_inode_check_aux(
-	void *cookie_netfs_data, const void *data, uint16_t dlen)
+	void *cookie_netfs_data, const void *data, uint16_t dlen,
+	loff_t object_size)
 {
 	struct ceph_aux_inode aux;
 	struct ceph_inode_info* ci = cookie_netfs_data;
 	struct inode* inode = &ci->vfs_inode;
 
-	if (dlen != sizeof(aux))
+	if (dlen != sizeof(aux) ||
+	    i_size_read(inode) != object_size)
 		return FSCACHE_CHECKAUX_OBSOLETE;
 
 	memset(&aux, 0, sizeof(aux));
 	aux.version = ci->i_version;
 	aux.mtime = inode->i_mtime;
-	aux.size = i_size_read(inode);
 
 	if (memcmp(data, &aux, sizeof(aux)) != 0)
 		return FSCACHE_CHECKAUX_OBSOLETE;
 
-	dout("ceph inode 0x%p cached okay", ci);
+	dout("ceph inode 0x%p cached okay\n", ci);
 	return FSCACHE_CHECKAUX_OKAY;
 }
 
 static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
 	.name		= "CEPH.inode",
 	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
-	.get_key	= ceph_fscache_inode_get_key,
-	.get_attr	= ceph_fscache_inode_get_attr,
-	.get_aux	= ceph_fscache_inode_get_aux,
 	.check_aux	= ceph_fscache_inode_check_aux,
 };
 
@@ -207,6 +149,7 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_aux_inode aux;
 
 	/* No caching for filesystem */
 	if (!fsc->fscache)
@@ -218,9 +161,14 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
 
 	inode_lock_nested(inode, I_MUTEX_CHILD);
 	if (!ci->fscache) {
+		memset(&aux, 0, sizeof(aux));
+		aux.version = ci->i_version;
+		aux.mtime = inode->i_mtime;
 		ci->fscache = fscache_acquire_cookie(fsc->fscache,
-					&ceph_fscache_inode_object_def,
-					ci, false);
+						     &ceph_fscache_inode_object_def,
+						     &ci->i_vino, sizeof(ci->i_vino),
+						     &aux, sizeof(aux),
+						     ci, i_size_read(inode), false);
 	}
 	inode_unlock(inode);
 }
@@ -235,7 +183,7 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
 	ci->fscache = NULL;
 
 	fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
-	fscache_relinquish_cookie(cookie, 0);
+	fscache_relinquish_cookie(cookie, &ci->i_vino, false);
 }
 
 static bool ceph_fscache_can_enable(void *data)
@@ -254,11 +202,11 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
 	if (inode_is_open_for_write(inode)) {
 		dout("fscache_file_set_cookie %p %p disabling cache\n",
 		     inode, filp);
-		fscache_disable_cookie(ci->fscache, false);
+		fscache_disable_cookie(ci->fscache, &ci->i_vino, false);
 		fscache_uncache_all_inode_pages(ci->fscache, inode);
 	} else {
-		fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
-				inode);
+		fscache_enable_cookie(ci->fscache, &ci->i_vino, i_size_read(inode),
+				      ceph_fscache_can_enable, inode);
 		if (fscache_cookie_enabled(ci->fscache)) {
 			dout("fscache_file_set_cookie %p %p enabling cache\n",
 			     inode, filp);
@@ -351,7 +299,8 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
 	if (!cache_valid(ci))
 		return;
 
-	ret = fscache_write_page(ci->fscache, page, GFP_KERNEL);
+	ret = fscache_write_page(ci->fscache, page, i_size_read(inode),
+				 GFP_KERNEL);
 	if (ret)
 		 fscache_uncache_page(ci->fscache, page);
 }
@@ -385,7 +334,7 @@ void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
 		WARN_ON_ONCE(!found);
 		mutex_unlock(&ceph_fscache_lock);
 
-		__fscache_relinquish_cookie(fsc->fscache, 0);
+		__fscache_relinquish_cookie(fsc->fscache, NULL, false);
 	}
 	fsc->fscache = NULL;
 }
@@ -402,7 +351,7 @@ void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
 	 * truncate while the caller holds CEPH_CAP_FILE_RD */
 	mutex_lock(&ci->i_truncate_mutex);
 	if (!cache_valid(ci)) {
-		if (fscache_check_consistency(ci->fscache))
+		if (fscache_check_consistency(ci->fscache, &ci->i_vino))
 			fscache_invalidate(ci->fscache);
 		spin_lock(&ci->i_ceph_lock);
 		ci->i_fscache_gen = ci->i_rdcache_gen;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0e5bd3e3344e..23dbfae16156 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -184,36 +184,54 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 					 mdsc->caps_avail_count);
 	spin_unlock(&mdsc->caps_list_lock);
 
-	for (i = have; i < need; i++) {
-retry:
+	for (i = have; i < need; ) {
 		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-		if (!cap) {
-			if (!trimmed) {
-				for (j = 0; j < mdsc->max_sessions; j++) {
-					s = __ceph_lookup_mds_session(mdsc, j);
-					if (!s)
-						continue;
-					mutex_unlock(&mdsc->mutex);
+		if (cap) {
+			list_add(&cap->caps_item, &newcaps);
+			alloc++;
+			i++;
+			continue;
+		}
 
-					mutex_lock(&s->s_mutex);
-					max_caps = s->s_nr_caps - (need - i);
-					ceph_trim_caps(mdsc, s, max_caps);
-					mutex_unlock(&s->s_mutex);
+		if (!trimmed) {
+			for (j = 0; j < mdsc->max_sessions; j++) {
+				s = __ceph_lookup_mds_session(mdsc, j);
+				if (!s)
+					continue;
+				mutex_unlock(&mdsc->mutex);
 
-					ceph_put_mds_session(s);
-					mutex_lock(&mdsc->mutex);
-				}
-				trimmed = true;
-				goto retry;
-			} else {
-				pr_warn("reserve caps ctx=%p ENOMEM "
-					"need=%d got=%d\n",
-					ctx, need, have + alloc);
-				goto out_nomem;
+				mutex_lock(&s->s_mutex);
+				max_caps = s->s_nr_caps - (need - i);
+				ceph_trim_caps(mdsc, s, max_caps);
+				mutex_unlock(&s->s_mutex);
+
+				ceph_put_mds_session(s);
+				mutex_lock(&mdsc->mutex);
 			}
+			trimmed = true;
+
+			spin_lock(&mdsc->caps_list_lock);
+			if (mdsc->caps_avail_count) {
+				int more_have;
+				if (mdsc->caps_avail_count >= need - i)
+					more_have = need - i;
+				else
+					more_have = mdsc->caps_avail_count;
+
+				i += more_have;
+				have += more_have;
+				mdsc->caps_avail_count -= more_have;
+				mdsc->caps_reserve_count += more_have;
+
+			}
+			spin_unlock(&mdsc->caps_list_lock);
+
+			continue;
 		}
-		list_add(&cap->caps_item, &newcaps);
-		alloc++;
+
+		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
+			ctx, need, have + alloc);
+		goto out_nomem;
 	}
 	BUG_ON(have + alloc != need);
 
@@ -234,16 +252,28 @@ retry:
 	return 0;
 
 out_nomem:
+
+	spin_lock(&mdsc->caps_list_lock);
+	mdsc->caps_avail_count += have;
+	mdsc->caps_reserve_count -= have;
+
 	while (!list_empty(&newcaps)) {
 		cap = list_first_entry(&newcaps,
 				struct ceph_cap, caps_item);
 		list_del(&cap->caps_item);
-		kmem_cache_free(ceph_cap_cachep, cap);
+
+		/* Keep some preallocated caps around (ceph_min_count), to
+		 * avoid lots of free/alloc churn. */
+		if (mdsc->caps_avail_count >=
+		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
+			kmem_cache_free(ceph_cap_cachep, cap);
+		} else {
+			mdsc->caps_avail_count++;
+			mdsc->caps_total_count++;
+			list_add(&cap->caps_item, &mdsc->caps_list);
+		}
 	}
 
-	spin_lock(&mdsc->caps_list_lock);
-	mdsc->caps_avail_count += have;
-	mdsc->caps_reserve_count -= have;
 	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
 					 mdsc->caps_reserve_count +
 					 mdsc->caps_avail_count);
@@ -254,12 +284,26 @@ out_nomem:
 int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 			struct ceph_cap_reservation *ctx)
 {
+	int i;
+	struct ceph_cap *cap;
+
 	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
 	if (ctx->count) {
 		spin_lock(&mdsc->caps_list_lock);
 		BUG_ON(mdsc->caps_reserve_count < ctx->count);
 		mdsc->caps_reserve_count -= ctx->count;
-		mdsc->caps_avail_count += ctx->count;
+		if (mdsc->caps_avail_count >=
+		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
+			mdsc->caps_total_count -= ctx->count;
+			for (i = 0; i < ctx->count; i++) {
+				cap = list_first_entry(&mdsc->caps_list,
+					struct ceph_cap, caps_item);
+				list_del(&cap->caps_item);
+				kmem_cache_free(ceph_cap_cachep, cap);
+			}
+		} else {
+			mdsc->caps_avail_count += ctx->count;
+		}
 		ctx->count = 0;
 		dout("unreserve caps %d = %d used + %d resv + %d avail\n",
 		     mdsc->caps_total_count, mdsc->caps_use_count,
@@ -285,7 +329,23 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 			mdsc->caps_use_count++;
 			mdsc->caps_total_count++;
 			spin_unlock(&mdsc->caps_list_lock);
+		} else {
+			spin_lock(&mdsc->caps_list_lock);
+			if (mdsc->caps_avail_count) {
+				BUG_ON(list_empty(&mdsc->caps_list));
+
+				mdsc->caps_avail_count--;
+				mdsc->caps_use_count++;
+				cap = list_first_entry(&mdsc->caps_list,
+						struct ceph_cap, caps_item);
+				list_del(&cap->caps_item);
+
+				BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+				       mdsc->caps_reserve_count + mdsc->caps_avail_count);
+			}
+			spin_unlock(&mdsc->caps_list_lock);
 		}
+
 		return cap;
 	}
 
@@ -341,6 +401,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 {
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 
+	spin_lock(&mdsc->caps_list_lock);
+
 	if (total)
 		*total = mdsc->caps_total_count;
 	if (avail)
@@ -351,6 +413,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 		*reserved = mdsc->caps_reserve_count;
 	if (min)
 		*min = mdsc->caps_min_count;
+
+	spin_unlock(&mdsc->caps_list_lock);
 }
 
 /*
@@ -639,9 +703,11 @@ void ceph_add_cap(struct inode *inode,
 			}
 
 			spin_lock(&realm->inodes_with_caps_lock);
-			ci->i_snap_realm = realm;
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
+			ci->i_snap_realm = realm;
+			if (realm->ino == ci->i_vino.ino)
+				realm->inode = inode;
 			spin_unlock(&realm->inodes_with_caps_lock);
 
 			if (oldrealm)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 644def813754..abdf98deeec4 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -260,7 +260,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
-					0600,
+					0400,
 					fsc->client->debugfs_dir,
 					fsc,
 					&mdsmap_show_fops);
@@ -268,7 +268,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions",
-					0600,
+					0400,
 					fsc->client->debugfs_dir,
 					fsc,
 					&mds_sessions_show_fops);
@@ -276,7 +276,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
-						0600,
+						0400,
 						fsc->client->debugfs_dir,
 						fsc,
 						&mdsc_show_fops);
@@ -292,7 +292,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
-					0600,
+					0400,
 					fsc->client->debugfs_dir,
 					fsc,
 					&dentry_lru_show_fops);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2bdd561c4c68..1a78dd6f8bf2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -101,18 +101,18 @@ static int fpos_cmp(loff_t l, loff_t r)
  * regardless of what dir changes take place on the
  * server.
  */
-static int note_last_dentry(struct ceph_file_info *fi, const char *name,
+static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
 		            int len, unsigned next_offset)
 {
 	char *buf = kmalloc(len+1, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
-	kfree(fi->last_name);
-	fi->last_name = buf;
-	memcpy(fi->last_name, name, len);
-	fi->last_name[len] = 0;
-	fi->next_offset = next_offset;
-	dout("note_last_dentry '%s'\n", fi->last_name);
+	kfree(dfi->last_name);
+	dfi->last_name = buf;
+	memcpy(dfi->last_name, name, len);
+	dfi->last_name[len] = 0;
+	dfi->next_offset = next_offset;
+	dout("note_last_dentry '%s'\n", dfi->last_name);
 	return 0;
 }
 
@@ -174,7 +174,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
 static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			    int shared_gen)
 {
-	struct ceph_file_info *fi = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct dentry *parent = file->f_path.dentry;
 	struct inode *dir = d_inode(parent);
 	struct dentry *dentry, *last = NULL;
@@ -221,7 +221,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 		bool emit_dentry = false;
 		dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl);
 		if (!dentry) {
-			fi->flags |= CEPH_F_ATEND;
+			dfi->file_info.flags |= CEPH_F_ATEND;
 			err = 0;
 			break;
 		}
@@ -272,33 +272,33 @@ out:
 	if (last) {
 		int ret;
 		di = ceph_dentry(last);
-		ret = note_last_dentry(fi, last->d_name.name, last->d_name.len,
+		ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
 				       fpos_off(di->offset) + 1);
 		if (ret < 0)
 			err = ret;
 		dput(last);
 		/* last_name no longer match cache index */
-		if (fi->readdir_cache_idx >= 0) {
-			fi->readdir_cache_idx = -1;
-			fi->dir_release_count = 0;
+		if (dfi->readdir_cache_idx >= 0) {
+			dfi->readdir_cache_idx = -1;
+			dfi->dir_release_count = 0;
 		}
 	}
 	return err;
 }
 
-static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos)
+static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
 {
-	if (!fi->last_readdir)
+	if (!dfi->last_readdir)
 		return true;
 	if (is_hash_order(pos))
-		return !ceph_frag_contains_value(fi->frag, fpos_hash(pos));
+		return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
 	else
-		return fi->frag != fpos_frag(pos);
+		return dfi->frag != fpos_frag(pos);
 }
 
 static int ceph_readdir(struct file *file, struct dir_context *ctx)
 {
-	struct ceph_file_info *fi = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -309,7 +309,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	struct ceph_mds_reply_info_parsed *rinfo;
 
 	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
-	if (fi->flags & CEPH_F_ATEND)
+	if (dfi->file_info.flags & CEPH_F_ATEND)
 		return 0;
 
 	/* always start with . and .. */
@@ -350,15 +350,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	/* proceed with a normal readdir */
 more:
 	/* do we have the correct frag content buffered? */
-	if (need_send_readdir(fi, ctx->pos)) {
+	if (need_send_readdir(dfi, ctx->pos)) {
 		struct ceph_mds_request *req;
 		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
 			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
 		/* discard old result, if any */
-		if (fi->last_readdir) {
-			ceph_mdsc_put_request(fi->last_readdir);
-			fi->last_readdir = NULL;
+		if (dfi->last_readdir) {
+			ceph_mdsc_put_request(dfi->last_readdir);
+			dfi->last_readdir = NULL;
 		}
 
 		if (is_hash_order(ctx->pos)) {
@@ -372,7 +372,7 @@ more:
 		}
 
 		dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
-		     ceph_vinop(inode), frag, fi->last_name);
+		     ceph_vinop(inode), frag, dfi->last_name);
 		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 		if (IS_ERR(req))
 			return PTR_ERR(req);
@@ -388,8 +388,8 @@ more:
 			__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
 			req->r_inode_drop = CEPH_CAP_FILE_EXCL;
 		}
-		if (fi->last_name) {
-			req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
+		if (dfi->last_name) {
+			req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
 			if (!req->r_path2) {
 				ceph_mdsc_put_request(req);
 				return -ENOMEM;
@@ -399,10 +399,10 @@ more:
 				cpu_to_le32(fpos_hash(ctx->pos));
 		}
 
-		req->r_dir_release_cnt = fi->dir_release_count;
-		req->r_dir_ordered_cnt = fi->dir_ordered_count;
-		req->r_readdir_cache_idx = fi->readdir_cache_idx;
-		req->r_readdir_offset = fi->next_offset;
+		req->r_dir_release_cnt = dfi->dir_release_count;
+		req->r_dir_ordered_cnt = dfi->dir_ordered_count;
+		req->r_readdir_cache_idx = dfi->readdir_cache_idx;
+		req->r_readdir_offset = dfi->next_offset;
 		req->r_args.readdir.frag = cpu_to_le32(frag);
 		req->r_args.readdir.flags =
 				cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
@@ -426,35 +426,35 @@ more:
 		if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
 			frag = le32_to_cpu(rinfo->dir_dir->frag);
 			if (!rinfo->hash_order) {
-				fi->next_offset = req->r_readdir_offset;
+				dfi->next_offset = req->r_readdir_offset;
 				/* adjust ctx->pos to beginning of frag */
 				ctx->pos = ceph_make_fpos(frag,
-							  fi->next_offset,
+							  dfi->next_offset,
 							  false);
 			}
 		}
 
-		fi->frag = frag;
-		fi->last_readdir = req;
+		dfi->frag = frag;
+		dfi->last_readdir = req;
 
 		if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
-			fi->readdir_cache_idx = req->r_readdir_cache_idx;
-			if (fi->readdir_cache_idx < 0) {
+			dfi->readdir_cache_idx = req->r_readdir_cache_idx;
+			if (dfi->readdir_cache_idx < 0) {
 				/* preclude from marking dir ordered */
-				fi->dir_ordered_count = 0;
+				dfi->dir_ordered_count = 0;
 			} else if (ceph_frag_is_leftmost(frag) &&
-				   fi->next_offset == 2) {
+				   dfi->next_offset == 2) {
 				/* note dir version at start of readdir so
 				 * we can tell if any dentries get dropped */
-				fi->dir_release_count = req->r_dir_release_cnt;
-				fi->dir_ordered_count = req->r_dir_ordered_cnt;
+				dfi->dir_release_count = req->r_dir_release_cnt;
+				dfi->dir_ordered_count = req->r_dir_ordered_cnt;
 			}
 		} else {
-			dout("readdir !did_prepopulate");
+			dout("readdir !did_prepopulate\n");
 			/* disable readdir cache */
-			fi->readdir_cache_idx = -1;
+			dfi->readdir_cache_idx = -1;
 			/* preclude from marking dir complete */
-			fi->dir_release_count = 0;
+			dfi->dir_release_count = 0;
 		}
 
 		/* note next offset and last dentry name */
@@ -463,19 +463,19 @@ more:
 					rinfo->dir_entries + (rinfo->dir_nr-1);
 			unsigned next_offset = req->r_reply_info.dir_end ?
 					2 : (fpos_off(rde->offset) + 1);
-			err = note_last_dentry(fi, rde->name, rde->name_len,
+			err = note_last_dentry(dfi, rde->name, rde->name_len,
 					       next_offset);
 			if (err)
 				return err;
 		} else if (req->r_reply_info.dir_end) {
-			fi->next_offset = 2;
+			dfi->next_offset = 2;
 			/* keep last name */
 		}
 	}
 
-	rinfo = &fi->last_readdir->r_reply_info;
+	rinfo = &dfi->last_readdir->r_reply_info;
 	dout("readdir frag %x num %d pos %llx chunk first %llx\n",
-	     fi->frag, rinfo->dir_nr, ctx->pos,
+	     dfi->frag, rinfo->dir_nr, ctx->pos,
 	     rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
 
 	i = 0;
@@ -519,52 +519,55 @@ more:
 		ctx->pos++;
 	}
 
-	ceph_mdsc_put_request(fi->last_readdir);
-	fi->last_readdir = NULL;
+	ceph_mdsc_put_request(dfi->last_readdir);
+	dfi->last_readdir = NULL;
 
-	if (fi->next_offset > 2) {
-		frag = fi->frag;
+	if (dfi->next_offset > 2) {
+		frag = dfi->frag;
 		goto more;
 	}
 
 	/* more frags? */
-	if (!ceph_frag_is_rightmost(fi->frag)) {
-		frag = ceph_frag_next(fi->frag);
+	if (!ceph_frag_is_rightmost(dfi->frag)) {
+		frag = ceph_frag_next(dfi->frag);
 		if (is_hash_order(ctx->pos)) {
 			loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
-							fi->next_offset, true);
+							dfi->next_offset, true);
 			if (new_pos > ctx->pos)
 				ctx->pos = new_pos;
 			/* keep last_name */
 		} else {
-			ctx->pos = ceph_make_fpos(frag, fi->next_offset, false);
-			kfree(fi->last_name);
-			fi->last_name = NULL;
+			ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
+							false);
+			kfree(dfi->last_name);
+			dfi->last_name = NULL;
 		}
 		dout("readdir next frag is %x\n", frag);
 		goto more;
 	}
-	fi->flags |= CEPH_F_ATEND;
+	dfi->file_info.flags |= CEPH_F_ATEND;
 
 	/*
 	 * if dir_release_count still matches the dir, no dentries
 	 * were released during the whole readdir, and we should have
 	 * the complete dir contents in our cache.
 	 */
-	if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) {
+	if (atomic64_read(&ci->i_release_count) ==
+					dfi->dir_release_count) {
 		spin_lock(&ci->i_ceph_lock);
-		if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) {
+		if (dfi->dir_ordered_count ==
+				atomic64_read(&ci->i_ordered_count)) {
 			dout(" marking %p complete and ordered\n", inode);
 			/* use i_size to track number of entries in
 			 * readdir cache */
-			BUG_ON(fi->readdir_cache_idx < 0);
-			i_size_write(inode, fi->readdir_cache_idx *
+			BUG_ON(dfi->readdir_cache_idx < 0);
+			i_size_write(inode, dfi->readdir_cache_idx *
 				     sizeof(struct dentry*));
 		} else {
 			dout(" marking %p complete\n", inode);
 		}
-		__ceph_dir_set_complete(ci, fi->dir_release_count,
-					fi->dir_ordered_count);
+		__ceph_dir_set_complete(ci, dfi->dir_release_count,
+					dfi->dir_ordered_count);
 		spin_unlock(&ci->i_ceph_lock);
 	}
 
@@ -572,25 +575,25 @@ more:
 	return 0;
 }
 
-static void reset_readdir(struct ceph_file_info *fi)
+static void reset_readdir(struct ceph_dir_file_info *dfi)
 {
-	if (fi->last_readdir) {
-		ceph_mdsc_put_request(fi->last_readdir);
-		fi->last_readdir = NULL;
+	if (dfi->last_readdir) {
+		ceph_mdsc_put_request(dfi->last_readdir);
+		dfi->last_readdir = NULL;
 	}
-	kfree(fi->last_name);
-	fi->last_name = NULL;
-	fi->dir_release_count = 0;
-	fi->readdir_cache_idx = -1;
-	fi->next_offset = 2;  /* compensate for . and .. */
-	fi->flags &= ~CEPH_F_ATEND;
+	kfree(dfi->last_name);
+	dfi->last_name = NULL;
+	dfi->dir_release_count = 0;
+	dfi->readdir_cache_idx = -1;
+	dfi->next_offset = 2;  /* compensate for . and .. */
+	dfi->file_info.flags &= ~CEPH_F_ATEND;
 }
 
 /*
  * discard buffered readdir content on seekdir(0), or seek to new frag,
  * or seek prior to current chunk
  */
-static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
+static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
 {
 	struct ceph_mds_reply_info_parsed *rinfo;
 	loff_t chunk_offset;
@@ -599,10 +602,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
 	if (is_hash_order(new_pos)) {
 		/* no need to reset last_name for a forward seek when
 		 * dentries are sotred in hash order */
-	} else if (fi->frag != fpos_frag(new_pos)) {
+	} else if (dfi->frag != fpos_frag(new_pos)) {
 		return true;
 	}
-	rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
+	rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
 	if (!rinfo || !rinfo->dir_nr)
 		return true;
 	chunk_offset = rinfo->dir_entries[0].offset;
@@ -612,7 +615,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
 
 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
-	struct ceph_file_info *fi = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file->f_mapping->host;
 	loff_t retval;
 
@@ -630,20 +633,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 	}
 
 	if (offset >= 0) {
-		if (need_reset_readdir(fi, offset)) {
+		if (need_reset_readdir(dfi, offset)) {
 			dout("dir_llseek dropping %p content\n", file);
-			reset_readdir(fi);
+			reset_readdir(dfi);
 		} else if (is_hash_order(offset) && offset > file->f_pos) {
 			/* for hash offset, we don't know if a forward seek
 			 * is within same frag */
-			fi->dir_release_count = 0;
-			fi->readdir_cache_idx = -1;
+			dfi->dir_release_count = 0;
+			dfi->readdir_cache_idx = -1;
 		}
 
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 			file->f_version = 0;
-			fi->flags &= ~CEPH_F_ATEND;
+			dfi->file_info.flags &= ~CEPH_F_ATEND;
 		}
 		retval = offset;
 	}
@@ -824,6 +827,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 
+	if (ceph_quota_is_max_files_exceeded(dir))
+		return -EDQUOT;
+
 	err = ceph_pre_init_acls(dir, &mode, &acls);
 	if (err < 0)
 		return err;
@@ -877,6 +883,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 
+	if (ceph_quota_is_max_files_exceeded(dir))
+		return -EDQUOT;
+
 	dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
@@ -926,6 +935,12 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		goto out;
 	}
 
+	if (op == CEPH_MDS_OP_MKDIR &&
+	    ceph_quota_is_max_files_exceeded(dir)) {
+		err = -EDQUOT;
+		goto out;
+	}
+
 	mode |= S_IFDIR;
 	err = ceph_pre_init_acls(dir, &mode, &acls);
 	if (err < 0)
@@ -1065,6 +1080,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		else
 			return -EROFS;
 	}
+	/* don't allow cross-quota renames */
+	if ((old_dir != new_dir) &&
+	    (!ceph_quota_is_same_realm(old_dir, new_dir)))
+		return -EXDEV;
+
 	dout("rename dir %p dentry %p to dir %p dentry %p\n",
 	     old_dir, old_dentry, new_dir, new_dentry);
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1351,7 +1371,7 @@ static void ceph_d_prune(struct dentry *dentry)
 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 			     loff_t *ppos)
 {
-	struct ceph_file_info *cf = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int left;
@@ -1360,12 +1380,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
 		return -EISDIR;
 
-	if (!cf->dir_info) {
-		cf->dir_info = kmalloc(bufsize, GFP_KERNEL);
-		if (!cf->dir_info)
+	if (!dfi->dir_info) {
+		dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
+		if (!dfi->dir_info)
 			return -ENOMEM;
-		cf->dir_info_len =
-			snprintf(cf->dir_info, bufsize,
+		dfi->dir_info_len =
+			snprintf(dfi->dir_info, bufsize,
 				"entries:   %20lld\n"
 				" files:    %20lld\n"
 				" subdirs:  %20lld\n"
@@ -1385,10 +1405,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 				(long)ci->i_rctime.tv_nsec);
 	}
 
-	if (*ppos >= cf->dir_info_len)
+	if (*ppos >= dfi->dir_info_len)
 		return 0;
-	size = min_t(unsigned, size, cf->dir_info_len-*ppos);
-	left = copy_to_user(buf, cf->dir_info + *ppos, size);
+	size = min_t(unsigned, size, dfi->dir_info_len-*ppos);
+	left = copy_to_user(buf, dfi->dir_info + *ppos, size);
 	if (left == size)
 		return -EFAULT;
 	*ppos += (size - left);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b67eec3532a1..f85040d73e3d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -30,6 +30,8 @@ static __le32 ceph_flags_sys2wire(u32 flags)
 		break;
 	}
 
+	flags &= ~O_ACCMODE;
+
 #define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; }
 
 	ceph_sys2wire(O_CREAT);
@@ -41,7 +43,7 @@ static __le32 ceph_flags_sys2wire(u32 flags)
 #undef ceph_sys2wire
 
 	if (flags)
-		dout("unused open flags: %x", flags);
+		dout("unused open flags: %x\n", flags);
 
 	return cpu_to_le32(wire_flags);
 }
@@ -159,13 +161,50 @@ out:
 	return req;
 }
 
+static int ceph_init_file_info(struct inode *inode, struct file *file,
+					int fmode, bool isdir)
+{
+	struct ceph_file_info *fi;
+
+	dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
+			inode->i_mode, isdir ? "dir" : "regular");
+	BUG_ON(inode->i_fop->release != ceph_release);
+
+	if (isdir) {
+		struct ceph_dir_file_info *dfi =
+			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
+		if (!dfi) {
+			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+			return -ENOMEM;
+		}
+
+		file->private_data = dfi;
+		fi = &dfi->file_info;
+		dfi->next_offset = 2;
+		dfi->readdir_cache_idx = -1;
+	} else {
+		fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
+		if (!fi) {
+			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+			return -ENOMEM;
+		}
+
+		file->private_data = fi;
+	}
+
+	fi->fmode = fmode;
+	spin_lock_init(&fi->rw_contexts_lock);
+	INIT_LIST_HEAD(&fi->rw_contexts);
+
+	return 0;
+}
+
 /*
  * initialize private struct file data.
  * if we fail, clean up by dropping fmode reference on the ceph_inode
  */
 static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 {
-	struct ceph_file_info *cf;
 	int ret = 0;
 
 	switch (inode->i_mode & S_IFMT) {
@@ -173,22 +212,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 		ceph_fscache_register_inode_cookie(inode);
 		ceph_fscache_file_set_cookie(inode, file);
 	case S_IFDIR:
-		dout("init_file %p %p 0%o (regular)\n", inode, file,
-		     inode->i_mode);
-		cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
-		if (!cf) {
-			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
-			return -ENOMEM;
-		}
-		cf->fmode = fmode;
-
-		spin_lock_init(&cf->rw_contexts_lock);
-		INIT_LIST_HEAD(&cf->rw_contexts);
-
-		cf->next_offset = 2;
-		cf->readdir_cache_idx = -1;
-		file->private_data = cf;
-		BUG_ON(inode->i_fop->release != ceph_release);
+		ret = ceph_init_file_info(inode, file, fmode,
+						S_ISDIR(inode->i_mode));
+		if (ret)
+			return ret;
 		break;
 
 	case S_IFLNK:
@@ -278,11 +305,11 @@ int ceph_open(struct inode *inode, struct file *file)
 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
-	struct ceph_file_info *cf = file->private_data;
+	struct ceph_file_info *fi = file->private_data;
 	int err;
 	int flags, fmode, wanted;
 
-	if (cf) {
+	if (fi) {
 		dout("open file %p is already opened\n", file);
 		return 0;
 	}
@@ -375,7 +402,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct ceph_mds_request *req;
 	struct dentry *dn;
 	struct ceph_acls_info acls = {};
-       int mask;
+	int mask;
 	int err;
 
 	dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@@ -386,6 +413,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		return -ENAMETOOLONG;
 
 	if (flags & O_CREAT) {
+		if (ceph_quota_is_max_files_exceeded(dir))
+			return -EDQUOT;
 		err = ceph_pre_init_acls(dir, &mode, &acls);
 		if (err < 0)
 			return err;
@@ -460,16 +489,27 @@ out_acl:
 int ceph_release(struct inode *inode, struct file *file)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_file_info *cf = file->private_data;
 
-	dout("release inode %p file %p\n", inode, file);
-	ceph_put_fmode(ci, cf->fmode);
-	if (cf->last_readdir)
-		ceph_mdsc_put_request(cf->last_readdir);
-	kfree(cf->last_name);
-	kfree(cf->dir_info);
-	WARN_ON(!list_empty(&cf->rw_contexts));
-	kmem_cache_free(ceph_file_cachep, cf);
+	if (S_ISDIR(inode->i_mode)) {
+		struct ceph_dir_file_info *dfi = file->private_data;
+		dout("release inode %p dir file %p\n", inode, file);
+		WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
+
+		ceph_put_fmode(ci, dfi->file_info.fmode);
+
+		if (dfi->last_readdir)
+			ceph_mdsc_put_request(dfi->last_readdir);
+		kfree(dfi->last_name);
+		kfree(dfi->dir_info);
+		kmem_cache_free(ceph_dir_file_cachep, dfi);
+	} else {
+		struct ceph_file_info *fi = file->private_data;
+		dout("release inode %p regular file %p\n", inode, file);
+		WARN_ON(!list_empty(&fi->rw_contexts));
+
+		ceph_put_fmode(ci, fi->fmode);
+		kmem_cache_free(ceph_file_cachep, fi);
+	}
 
 	/* wake up anyone waiting for caps on this inode */
 	wake_up_all(&ci->i_cap_wq);
@@ -1338,6 +1378,11 @@ retry_snap:
 
 	pos = iocb->ki_pos;
 	count = iov_iter_count(from);
+	if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) {
+		err = -EDQUOT;
+		goto out;
+	}
+
 	err = file_remove_privs(file);
 	if (err)
 		goto out;
@@ -1419,6 +1464,7 @@ retry_snap:
 
 	if (written >= 0) {
 		int dirty;
+
 		spin_lock(&ci->i_ceph_lock);
 		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
@@ -1426,6 +1472,8 @@ retry_snap:
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
+		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
+			ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
 	}
 
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
@@ -1668,6 +1716,12 @@ static long ceph_fallocate(struct file *file, int mode,
 		goto unlock;
 	}
 
+	if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) &&
+	    ceph_quota_is_max_bytes_exceeded(inode, offset + length)) {
+		ret = -EDQUOT;
+		goto unlock;
+	}
+
 	if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
 	    !(mode & FALLOC_FL_PUNCH_HOLE)) {
 		ret = -ENOSPC;
@@ -1716,6 +1770,9 @@ static long ceph_fallocate(struct file *file, int mode,
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
+		if ((endoff > size) &&
+		    ceph_quota_is_max_bytes_approaching(inode, endoff))
+			ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
 	}
 
 	ceph_put_cap_refs(ci, got);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index c6ec5aa46100..8bf60250309e 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -441,6 +441,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	atomic64_set(&ci->i_complete_seq[1], 0);
 	ci->i_symlink = NULL;
 
+	ci->i_max_bytes = 0;
+	ci->i_max_files = 0;
+
 	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
 	RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
 
@@ -536,6 +539,9 @@ void ceph_destroy_inode(struct inode *inode)
 
 	ceph_queue_caps_release(inode);
 
+	if (__ceph_has_any_quota(ci))
+		ceph_adjust_quota_realms_count(inode, false);
+
 	/*
 	 * we may still have a snap_realm reference if there are stray
 	 * caps in i_snap_caps.
@@ -548,6 +554,9 @@ void ceph_destroy_inode(struct inode *inode)
 		dout(" dropping residual ref to snap realm %p\n", realm);
 		spin_lock(&realm->inodes_with_caps_lock);
 		list_del_init(&ci->i_snap_realm_item);
+		ci->i_snap_realm = NULL;
+		if (realm->ino == ci->i_vino.ino)
+			realm->inode = NULL;
 		spin_unlock(&realm->inodes_with_caps_lock);
 		ceph_put_snap_realm(mdsc, realm);
 	}
@@ -790,6 +799,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 	inode->i_rdev = le32_to_cpu(info->rdev);
 	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
+	__ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
+
 	if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
 	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(info->mode);
@@ -1867,20 +1878,9 @@ retry:
 	 * possibly truncate them.. so write AND block!
 	 */
 	if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
-		struct ceph_cap_snap *capsnap;
-		to = ci->i_truncate_size;
-		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-			// MDS should have revoked Frw caps
-			WARN_ON_ONCE(capsnap->writing);
-			if (capsnap->dirty_pages && capsnap->size > to)
-				to = capsnap->size;
-		}
 		spin_unlock(&ci->i_ceph_lock);
 		dout("__do_pending_vmtruncate %p flushing snaps first\n",
 		     inode);
-
-		truncate_pagecache(inode, to);
-
 		filemap_write_and_wait_range(&inode->i_data, 0,
 					     inode->i_sb->s_maxbytes);
 		goto retry;
@@ -2152,6 +2152,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err != 0)
 		return err;
 
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
+		return -EDQUOT;
+
 	err = __ceph_setattr(inode, attr);
 
 	if (err >= 0 && (attr->ia_valid & ATTR_MODE))
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 851aa69ec8f0..c90f03beb15d 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -5,7 +5,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "ioctl.h"
-
+#include <linux/ceph/striper.h>
 
 /*
  * ioctls
@@ -185,7 +185,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
 	struct ceph_object_locator oloc;
 	CEPH_DEFINE_OID_ONSTACK(oid);
-	u64 len = 1, olen;
+	u32 xlen;
 	u64 tmp;
 	struct ceph_pg pgid;
 	int r;
@@ -195,13 +195,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 		return -EFAULT;
 
 	down_read(&osdc->lock);
-	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
-					  &dl.object_no, &dl.object_offset,
-					  &olen);
-	if (r < 0) {
-		up_read(&osdc->lock);
-		return -EIO;
-	}
+	ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, 1,
+				      &dl.object_no, &dl.object_offset, &xlen);
 	dl.file_offset -= dl.object_offset;
 	dl.object_size = ci->i_layout.object_size;
 	dl.block_size = ci->i_layout.stripe_unit;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 9e66f69ee8a5..9dae2ec7e1fa 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -95,7 +95,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 	owner = secure_addr(fl->fl_owner);
 
 	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
-	     "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
+	     "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type,
 	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
 	     wait, fl->fl_type);
 
@@ -132,7 +132,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 	}
 	ceph_mdsc_put_request(req);
 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
+	     "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type,
 	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
 	     length, wait, fl->fl_type, err);
 	return err;
@@ -226,7 +226,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
 		return -ENOLCK;
 
-	dout("ceph_lock, fl_owner: %p", fl->fl_owner);
+	dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
 
 	/* set wait bit as appropriate, then make command as Ceph expects it*/
 	if (IS_GETLK(cmd))
@@ -264,7 +264,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
 	if (!err) {
 		if (op == CEPH_MDS_OP_SETFILELOCK) {
-			dout("mds locked, locking locally");
+			dout("mds locked, locking locally\n");
 			err = posix_lock_file(file, fl, NULL);
 			if (err) {
 				/* undo! This should only happen if
@@ -272,7 +272,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				 * deadlock. */
 				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
 						  CEPH_LOCK_UNLOCK, 0, fl);
-				dout("got %d on posix_lock_file, undid lock",
+				dout("got %d on posix_lock_file, undid lock\n",
 				     err);
 			}
 		}
@@ -294,7 +294,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 	if (fl->fl_type & LOCK_MAND)
 		return -EOPNOTSUPP;
 
-	dout("ceph_flock, fl_file: %p", fl->fl_file);
+	dout("ceph_flock, fl_file: %p\n", fl->fl_file);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
@@ -329,7 +329,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 			ceph_lock_message(CEPH_LOCK_FLOCK,
 					  CEPH_MDS_OP_SETFILELOCK,
 					  inode, CEPH_LOCK_UNLOCK, 0, fl);
-			dout("got %d on locks_lock_file_wait, undid lock", err);
+			dout("got %d on locks_lock_file_wait, undid lock\n", err);
 		}
 	}
 	return err;
@@ -356,7 +356,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 			++(*flock_count);
 		spin_unlock(&ctx->flc_lock);
 	}
-	dout("counted %d flock locks and %d fcntl locks",
+	dout("counted %d flock locks and %d fcntl locks\n",
 	     *flock_count, *fcntl_count);
 }
 
@@ -384,7 +384,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
 		cephlock->type = CEPH_LOCK_UNLOCK;
 		break;
 	default:
-		dout("Have unknown lock type %d", lock->fl_type);
+		dout("Have unknown lock type %d\n", lock->fl_type);
 		err = -EINVAL;
 	}
 
@@ -407,7 +407,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
 	int seen_flock = 0;
 	int l = 0;
 
-	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
+	dout("encoding %d flock and %d fcntl locks\n", num_flock_locks,
 	     num_fcntl_locks);
 
 	if (!ctx)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2e8f90f96540..5ece2e6ad154 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,26 @@ static int parse_reply_info_in(void **p, void *end,
 	} else
 		info->inline_version = CEPH_INLINE_NONE;
 
+	if (features & CEPH_FEATURE_MDS_QUOTA) {
+		u8 struct_v, struct_compat;
+		u32 struct_len;
+
+		/*
+		 * both struct_v and struct_compat are expected to be >= 1
+		 */
+		ceph_decode_8_safe(p, end, struct_v, bad);
+		ceph_decode_8_safe(p, end, struct_compat, bad);
+		if (!struct_v || !struct_compat)
+			goto bad;
+		ceph_decode_32_safe(p, end, struct_len, bad);
+		ceph_decode_need(p, end, struct_len, bad);
+		ceph_decode_64_safe(p, end, info->max_bytes, bad);
+		ceph_decode_64_safe(p, end, info->max_files, bad);
+	} else {
+		info->max_bytes = 0;
+		info->max_files = 0;
+	}
+
 	info->pool_ns_len = 0;
 	info->pool_ns_data = NULL;
 	if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
@@ -384,7 +404,7 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
 		     refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
 		return s;
 	} else {
-		dout("mdsc get_session %p 0 -- FAIL", s);
+		dout("mdsc get_session %p 0 -- FAIL\n", s);
 		return NULL;
 	}
 }
@@ -419,9 +439,10 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
 
 static bool __have_session(struct ceph_mds_client *mdsc, int mds)
 {
-	if (mds >= mdsc->max_sessions)
+	if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
 		return false;
-	return mdsc->sessions[mds];
+	else
+		return true;
 }
 
 static int __verify_registered_session(struct ceph_mds_client *mdsc,
@@ -448,6 +469,25 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	s = kzalloc(sizeof(*s), GFP_NOFS);
 	if (!s)
 		return ERR_PTR(-ENOMEM);
+
+	if (mds >= mdsc->max_sessions) {
+		int newmax = 1 << get_count_order(mds + 1);
+		struct ceph_mds_session **sa;
+
+		dout("%s: realloc to %d\n", __func__, newmax);
+		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
+		if (!sa)
+			goto fail_realloc;
+		if (mdsc->sessions) {
+			memcpy(sa, mdsc->sessions,
+			       mdsc->max_sessions * sizeof(void *));
+			kfree(mdsc->sessions);
+		}
+		mdsc->sessions = sa;
+		mdsc->max_sessions = newmax;
+	}
+
+	dout("%s: mds%d\n", __func__, mds);
 	s->s_mdsc = mdsc;
 	s->s_mds = mds;
 	s->s_state = CEPH_MDS_SESSION_NEW;
@@ -476,23 +516,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	INIT_LIST_HEAD(&s->s_cap_releases);
 	INIT_LIST_HEAD(&s->s_cap_flushing);
 
-	dout("register_session mds%d\n", mds);
-	if (mds >= mdsc->max_sessions) {
-		int newmax = 1 << get_count_order(mds+1);
-		struct ceph_mds_session **sa;
-
-		dout("register_session realloc to %d\n", newmax);
-		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
-		if (!sa)
-			goto fail_realloc;
-		if (mdsc->sessions) {
-			memcpy(sa, mdsc->sessions,
-			       mdsc->max_sessions * sizeof(void *));
-			kfree(mdsc->sessions);
-		}
-		mdsc->sessions = sa;
-		mdsc->max_sessions = newmax;
-	}
 	mdsc->sessions[mds] = s;
 	atomic_inc(&mdsc->num_sessions);
 	refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
@@ -2531,10 +2554,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	 * Otherwise we just have to return an ESTALE
 	 */
 	if (result == -ESTALE) {
-		dout("got ESTALE on request %llu", req->r_tid);
+		dout("got ESTALE on request %llu\n", req->r_tid);
 		req->r_resend_mds = -1;
 		if (req->r_direct_mode != USE_AUTH_MDS) {
-			dout("not using auth, setting for that now");
+			dout("not using auth, setting for that now\n");
 			req->r_direct_mode = USE_AUTH_MDS;
 			__do_request(mdsc, req);
 			mutex_unlock(&mdsc->mutex);
@@ -2542,13 +2565,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 		} else  {
 			int mds = __choose_mds(mdsc, req);
 			if (mds >= 0 && mds != req->r_session->s_mds) {
-				dout("but auth changed, so resending");
+				dout("but auth changed, so resending\n");
 				__do_request(mdsc, req);
 				mutex_unlock(&mdsc->mutex);
 				goto out;
 			}
 		}
-		dout("have to return ESTALE on request %llu", req->r_tid);
+		dout("have to return ESTALE on request %llu\n", req->r_tid);
 	}
 
 
@@ -3470,13 +3493,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
 }
 
 /*
- * drop all leases (and dentry refs) in preparation for umount
+ * lock unlock sessions, to wait ongoing session activities
  */
-static void drop_leases(struct ceph_mds_client *mdsc)
+static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
 {
 	int i;
 
-	dout("drop_leases\n");
 	mutex_lock(&mdsc->mutex);
 	for (i = 0; i < mdsc->max_sessions; i++) {
 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
@@ -3572,7 +3594,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	if (!mdsc)
 		return -ENOMEM;
 	mdsc->fsc = fsc;
-	fsc->mdsc = mdsc;
 	mutex_init(&mdsc->mutex);
 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
 	if (!mdsc->mdsmap) {
@@ -3580,6 +3601,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 		return -ENOMEM;
 	}
 
+	fsc->mdsc = mdsc;
 	init_completion(&mdsc->safe_umount_waiters);
 	init_waitqueue_head(&mdsc->session_close_wq);
 	INIT_LIST_HEAD(&mdsc->waiting_for_map);
@@ -3587,6 +3609,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	atomic_set(&mdsc->num_sessions, 0);
 	mdsc->max_sessions = 0;
 	mdsc->stopping = 0;
+	atomic64_set(&mdsc->quotarealms_count, 0);
 	mdsc->last_snap_seq = 0;
 	init_rwsem(&mdsc->snap_rwsem);
 	mdsc->snap_realms = RB_ROOT;
@@ -3660,7 +3683,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 	dout("pre_umount\n");
 	mdsc->stopping = 1;
 
-	drop_leases(mdsc);
+	lock_unlock_sessions(mdsc);
 	ceph_flush_dirty_caps(mdsc);
 	wait_requests(mdsc);
 
@@ -3858,6 +3881,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	dout("mdsc_destroy %p\n", mdsc);
 
+	if (!mdsc)
+		return;
+
 	/* flush out any connection work with references to us */
 	ceph_msgr_flush();
 
@@ -4077,6 +4103,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	case CEPH_MSG_CLIENT_LEASE:
 		handle_lease(mdsc, s, msg);
 		break;
+	case CEPH_MSG_CLIENT_QUOTA:
+		ceph_handle_quota(mdsc, s, msg);
+		break;
 
 	default:
 		pr_err("received unknown message type %d %s\n", type,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 71e3b783ee6f..2ec3b5b35067 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -49,6 +49,8 @@ struct ceph_mds_reply_info_in {
 	char *inline_data;
 	u32 pool_ns_len;
 	char *pool_ns_data;
+	u64 max_bytes;
+	u64 max_files;
 };
 
 struct ceph_mds_reply_dir_entry {
@@ -312,6 +314,8 @@ struct ceph_mds_client {
 	int                     max_sessions;  /* len of s_mds_sessions */
 	int                     stopping;      /* true if shutting down */
 
+	atomic64_t		quotarealms_count; /* # realms with quota */
+
 	/*
 	 * snap_rwsem will cover cap linkage into snaprealms, and
 	 * realm snap contexts.  (later, we can do per-realm snap
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
new file mode 100644
index 000000000000..242bfa5c0539
--- /dev/null
+++ b/fs/ceph/quota.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * quota.c - CephFS quota
+ *
+ * Copyright (C) 2017-2018 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/statfs.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	if (inc)
+		atomic64_inc(&mdsc->quotarealms_count);
+	else
+		atomic64_dec(&mdsc->quotarealms_count);
+}
+
+static inline bool ceph_has_realms_with_quotas(struct inode *inode)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	return atomic64_read(&mdsc->quotarealms_count) > 0;
+}
+
+void ceph_handle_quota(struct ceph_mds_client *mdsc,
+		       struct ceph_mds_session *session,
+		       struct ceph_msg *msg)
+{
+	struct super_block *sb = mdsc->fsc->sb;
+	struct ceph_mds_quota *h = msg->front.iov_base;
+	struct ceph_vino vino;
+	struct inode *inode;
+	struct ceph_inode_info *ci;
+
+	if (msg->front.iov_len != sizeof(*h)) {
+		pr_err("%s corrupt message mds%d len %d\n", __func__,
+		       session->s_mds, (int)msg->front.iov_len);
+		ceph_msg_dump(msg);
+		return;
+	}
+
+	/* increment msg sequence number */
+	mutex_lock(&session->s_mutex);
+	session->s_seq++;
+	mutex_unlock(&session->s_mutex);
+
+	/* lookup inode */
+	vino.ino = le64_to_cpu(h->ino);
+	vino.snap = CEPH_NOSNAP;
+	inode = ceph_find_inode(sb, vino);
+	if (!inode) {
+		pr_warn("Failed to find inode %llu\n", vino.ino);
+		return;
+	}
+	ci = ceph_inode(inode);
+
+	spin_lock(&ci->i_ceph_lock);
+	ci->i_rbytes = le64_to_cpu(h->rbytes);
+	ci->i_rfiles = le64_to_cpu(h->rfiles);
+	ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
+	__ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
+		            le64_to_cpu(h->max_files));
+	spin_unlock(&ci->i_ceph_lock);
+
+	iput(inode);
+}
+
+/*
+ * This function walks through the snaprealm for an inode and returns the
+ * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
+ * or max_bytes).  If the root is reached, return the root ceph_snap_realm
+ * instead.
+ *
+ * Note that the caller is responsible for calling ceph_put_snap_realm() on the
+ * returned realm.
+ */
+static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
+					       struct inode *inode)
+{
+	struct ceph_inode_info *ci = NULL;
+	struct ceph_snap_realm *realm, *next;
+	struct inode *in;
+	bool has_quota;
+
+	if (ceph_snap(inode) != CEPH_NOSNAP)
+		return NULL;
+
+	realm = ceph_inode(inode)->i_snap_realm;
+	if (realm)
+		ceph_get_snap_realm(mdsc, realm);
+	else
+		pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
+				   "null i_snap_realm\n", ceph_vinop(inode));
+	while (realm) {
+		spin_lock(&realm->inodes_with_caps_lock);
+		in = realm->inode ? igrab(realm->inode) : NULL;
+		spin_unlock(&realm->inodes_with_caps_lock);
+		if (!in)
+			break;
+
+		ci = ceph_inode(in);
+		has_quota = __ceph_has_any_quota(ci);
+		iput(in);
+
+		next = realm->parent;
+		if (has_quota || !next)
+		       return realm;
+
+		ceph_get_snap_realm(mdsc, next);
+		ceph_put_snap_realm(mdsc, realm);
+		realm = next;
+	}
+	if (realm)
+		ceph_put_snap_realm(mdsc, realm);
+
+	return NULL;
+}
+
+bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
+	struct ceph_snap_realm *old_realm, *new_realm;
+	bool is_same;
+
+	down_read(&mdsc->snap_rwsem);
+	old_realm = get_quota_realm(mdsc, old);
+	new_realm = get_quota_realm(mdsc, new);
+	is_same = (old_realm == new_realm);
+	up_read(&mdsc->snap_rwsem);
+
+	if (old_realm)
+		ceph_put_snap_realm(mdsc, old_realm);
+	if (new_realm)
+		ceph_put_snap_realm(mdsc, new_realm);
+
+	return is_same;
+}
+
+enum quota_check_op {
+	QUOTA_CHECK_MAX_FILES_OP,	/* check quota max_files limit */
+	QUOTA_CHECK_MAX_BYTES_OP,	/* check quota max_files limit */
+	QUOTA_CHECK_MAX_BYTES_APPROACHING_OP	/* check if quota max_files
+						   limit is approaching */
+};
+
+/*
+ * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
+ * realm, it will execute quota check operation defined by the 'op' parameter.
+ * The snaprealm walk is interrupted if the quota check detects that the quota
+ * is exceeded or if the root inode is reached.
+ */
+static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
+				 loff_t delta)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_inode_info *ci;
+	struct ceph_snap_realm *realm, *next;
+	struct inode *in;
+	u64 max, rvalue;
+	bool exceeded = false;
+
+	if (ceph_snap(inode) != CEPH_NOSNAP)
+		return false;
+
+	down_read(&mdsc->snap_rwsem);
+	realm = ceph_inode(inode)->i_snap_realm;
+	if (realm)
+		ceph_get_snap_realm(mdsc, realm);
+	else
+		pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
+				   "null i_snap_realm\n", ceph_vinop(inode));
+	while (realm) {
+		spin_lock(&realm->inodes_with_caps_lock);
+		in = realm->inode ? igrab(realm->inode) : NULL;
+		spin_unlock(&realm->inodes_with_caps_lock);
+		if (!in)
+			break;
+
+		ci = ceph_inode(in);
+		spin_lock(&ci->i_ceph_lock);
+		if (op == QUOTA_CHECK_MAX_FILES_OP) {
+			max = ci->i_max_files;
+			rvalue = ci->i_rfiles + ci->i_rsubdirs;
+		} else {
+			max = ci->i_max_bytes;
+			rvalue = ci->i_rbytes;
+		}
+		spin_unlock(&ci->i_ceph_lock);
+		switch (op) {
+		case QUOTA_CHECK_MAX_FILES_OP:
+			exceeded = (max && (rvalue >= max));
+			break;
+		case QUOTA_CHECK_MAX_BYTES_OP:
+			exceeded = (max && (rvalue + delta > max));
+			break;
+		case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
+			if (max) {
+				if (rvalue >= max)
+					exceeded = true;
+				else {
+					/*
+					 * when we're writing more that 1/16th
+					 * of the available space
+					 */
+					exceeded =
+						(((max - rvalue) >> 4) < delta);
+				}
+			}
+			break;
+		default:
+			/* Shouldn't happen */
+			pr_warn("Invalid quota check op (%d)\n", op);
+			exceeded = true; /* Just break the loop */
+		}
+		iput(in);
+
+		next = realm->parent;
+		if (exceeded || !next)
+			break;
+		ceph_get_snap_realm(mdsc, next);
+		ceph_put_snap_realm(mdsc, realm);
+		realm = next;
+	}
+	ceph_put_snap_realm(mdsc, realm);
+	up_read(&mdsc->snap_rwsem);
+
+	return exceeded;
+}
+
+/*
+ * ceph_quota_is_max_files_exceeded - check if we can create a new file
+ * @inode:	directory where a new file is being created
+ *
+ * This functions returns true is max_files quota allows a new file to be
+ * created.  It is necessary to walk through the snaprealm hierarchy (until the
+ * FS root) to check all realms with quotas set.
+ */
+bool ceph_quota_is_max_files_exceeded(struct inode *inode)
+{
+	if (!ceph_has_realms_with_quotas(inode))
+		return false;
+
+	WARN_ON(!S_ISDIR(inode->i_mode));
+
+	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
+}
+
+/*
+ * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
+ * @inode:	inode being written
+ * @newsize:	new size if write succeeds
+ *
+ * This functions returns true is max_bytes quota allows a file size to reach
+ * @newsize; it returns false otherwise.
+ */
+bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
+{
+	loff_t size = i_size_read(inode);
+
+	if (!ceph_has_realms_with_quotas(inode))
+		return false;
+
+	/* return immediately if we're decreasing file size */
+	if (newsize <= size)
+		return false;
+
+	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
+}
+
+/*
+ * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
+ * @inode:	inode being written
+ * @newsize:	new size if write succeeds
+ *
+ * This function returns true if the new file size @newsize will be consuming
+ * more than 1/16th of the available quota space; it returns false otherwise.
+ */
+bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
+{
+	loff_t size = ceph_inode(inode)->i_reported_size;
+
+	if (!ceph_has_realms_with_quotas(inode))
+		return false;
+
+	/* return immediately if we're decreasing file size */
+	if (newsize <= size)
+		return false;
+
+	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
+				    (newsize - size));
+}
+
+/*
+ * ceph_quota_update_statfs - if root has quota update statfs with quota status
+ * @fsc:	filesystem client instance
+ * @buf:	statfs to update
+ *
+ * If the mounted filesystem root has max_bytes quota set, update the filesystem
+ * statistics with the quota status.
+ *
+ * This function returns true if the stats have been updated, false otherwise.
+ */
+bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
+{
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_inode_info *ci;
+	struct ceph_snap_realm *realm;
+	struct inode *in;
+	u64 total = 0, used, free;
+	bool is_updated = false;
+
+	down_read(&mdsc->snap_rwsem);
+	realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root));
+	up_read(&mdsc->snap_rwsem);
+	if (!realm)
+		return false;
+
+	spin_lock(&realm->inodes_with_caps_lock);
+	in = realm->inode ? igrab(realm->inode) : NULL;
+	spin_unlock(&realm->inodes_with_caps_lock);
+	if (in) {
+		ci = ceph_inode(in);
+		spin_lock(&ci->i_ceph_lock);
+		if (ci->i_max_bytes) {
+			total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
+			used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
+			/* It is possible for a quota to be exceeded.
+			 * Report 'zero' in that case
+			 */
+			free = total > used ? total - used : 0;
+		}
+		spin_unlock(&ci->i_ceph_lock);
+		if (total) {
+			buf->f_blocks = total;
+			buf->f_bfree = free;
+			buf->f_bavail = free;
+			is_updated = true;
+		}
+		iput(in);
+	}
+	ceph_put_snap_realm(mdsc, realm);
+
+	return is_updated;
+}
+
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 07cf95e6413d..041c27ea8de1 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -931,6 +931,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
 			ci->i_snap_realm = realm;
+			if (realm->ino == ci->i_vino.ino)
+                                realm->inode = inode;
 			spin_unlock(&realm->inodes_with_caps_lock);
 
 			spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index fb2bc9c15a23..b33082e6878f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -76,9 +76,18 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 */
 	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
 	buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
-	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
-	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
-	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+
+	/*
+	 * By default use root quota for stats; fallback to overall filesystem
+	 * usage if using 'noquotadf' mount option or if the root dir doesn't
+	 * have max_bytes quota set.
+	 */
+	if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
+	    !ceph_quota_update_statfs(fsc, buf)) {
+		buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
+		buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+		buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+	}
 
 	buf->f_files = le64_to_cpu(st.num_objects);
 	buf->f_ffree = -1;
@@ -151,6 +160,8 @@ enum {
 	Opt_acl,
 #endif
 	Opt_noacl,
+	Opt_quotadf,
+	Opt_noquotadf,
 };
 
 static match_table_t fsopt_tokens = {
@@ -187,6 +198,8 @@ static match_table_t fsopt_tokens = {
 	{Opt_acl, "acl"},
 #endif
 	{Opt_noacl, "noacl"},
+	{Opt_quotadf, "quotadf"},
+	{Opt_noquotadf, "noquotadf"},
 	{-1, NULL}
 };
 
@@ -314,13 +327,16 @@ static int parse_fsopt_token(char *c, void *private)
 		break;
 	case Opt_fscache:
 		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+		kfree(fsopt->fscache_uniq);
+		fsopt->fscache_uniq = NULL;
 		break;
 	case Opt_nofscache:
 		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
+		kfree(fsopt->fscache_uniq);
+		fsopt->fscache_uniq = NULL;
 		break;
 	case Opt_poolperm:
 		fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
-		printk ("pool perm");
 		break;
 	case Opt_nopoolperm:
 		fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
@@ -331,6 +347,12 @@ static int parse_fsopt_token(char *c, void *private)
 	case Opt_norequire_active_mds:
 		fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
 		break;
+	case Opt_quotadf:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
+		break;
+	case Opt_noquotadf:
+		fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
+		break;
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	case Opt_acl:
 		fsopt->sb_flags |= SB_POSIXACL;
@@ -513,13 +535,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
 		seq_puts(m, ",nodcache");
 	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
-		if (fsopt->fscache_uniq)
-			seq_printf(m, ",fsc=%s", fsopt->fscache_uniq);
-		else
-			seq_puts(m, ",fsc");
+		seq_show_option(m, "fsc", fsopt->fscache_uniq);
 	}
 	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
 		seq_puts(m, ",nopoolperm");
+	if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
+		seq_puts(m, ",noquotadf");
 
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	if (fsopt->sb_flags & SB_POSIXACL)
@@ -529,7 +550,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 #endif
 
 	if (fsopt->mds_namespace)
-		seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace);
+		seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
 	if (fsopt->wsize)
 		seq_printf(m, ",wsize=%d", fsopt->wsize);
 	if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -679,6 +700,7 @@ struct kmem_cache *ceph_cap_cachep;
 struct kmem_cache *ceph_cap_flush_cachep;
 struct kmem_cache *ceph_dentry_cachep;
 struct kmem_cache *ceph_file_cachep;
+struct kmem_cache *ceph_dir_file_cachep;
 
 static void ceph_inode_init_once(void *foo)
 {
@@ -698,8 +720,7 @@ static int __init init_caches(void)
 	if (!ceph_inode_cachep)
 		return -ENOMEM;
 
-	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
-				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
 	if (!ceph_cap_cachep)
 		goto bad_cap;
 	ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
@@ -716,6 +737,10 @@ static int __init init_caches(void)
 	if (!ceph_file_cachep)
 		goto bad_file;
 
+	ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
+	if (!ceph_dir_file_cachep)
+		goto bad_dir_file;
+
 	error = ceph_fscache_register();
 	if (error)
 		goto bad_fscache;
@@ -723,6 +748,8 @@ static int __init init_caches(void)
 	return 0;
 
 bad_fscache:
+	kmem_cache_destroy(ceph_dir_file_cachep);
+bad_dir_file:
 	kmem_cache_destroy(ceph_file_cachep);
 bad_file:
 	kmem_cache_destroy(ceph_dentry_cachep);
@@ -748,6 +775,7 @@ static void destroy_caches(void)
 	kmem_cache_destroy(ceph_cap_flush_cachep);
 	kmem_cache_destroy(ceph_dentry_cachep);
 	kmem_cache_destroy(ceph_file_cachep);
+	kmem_cache_destroy(ceph_dir_file_cachep);
 
 	ceph_fscache_unregister();
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1c2086e0fec2..a7077a0c989f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -39,6 +39,7 @@
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 #define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
 #define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
+#define CEPH_MOUNT_OPT_NOQUOTADF       (1<<13) /* no root dir quota in statfs */
 
 #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
 
@@ -310,6 +311,9 @@ struct ceph_inode_info {
 	u64 i_rbytes, i_rfiles, i_rsubdirs;
 	u64 i_files, i_subdirs;
 
+	/* quotas */
+	u64 i_max_bytes, i_max_files;
+
 	struct rb_root i_fragtree;
 	int i_fragtree_nsplits;
 	struct mutex i_fragtree_mutex;
@@ -671,6 +675,10 @@ struct ceph_file_info {
 
 	spinlock_t rw_contexts_lock;
 	struct list_head rw_contexts;
+};
+
+struct ceph_dir_file_info {
+	struct ceph_file_info file_info;
 
 	/* readdir: position within the dir */
 	u32 frag;
@@ -748,6 +756,7 @@ struct ceph_readdir_cache_control {
  */
 struct ceph_snap_realm {
 	u64 ino;
+	struct inode *inode;
 	atomic_t nref;
 	struct rb_node node;
 
@@ -1066,4 +1075,37 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
 extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
 
+/* quota.c */
+static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci)
+{
+	return ci->i_max_files || ci->i_max_bytes;
+}
+
+extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc);
+
+static inline void __ceph_update_quota(struct ceph_inode_info *ci,
+				       u64 max_bytes, u64 max_files)
+{
+	bool had_quota, has_quota;
+	had_quota = __ceph_has_any_quota(ci);
+	ci->i_max_bytes = max_bytes;
+	ci->i_max_files = max_files;
+	has_quota = __ceph_has_any_quota(ci);
+
+	if (had_quota != has_quota)
+		ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota);
+}
+
+extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
+			      struct ceph_mds_session *session,
+			      struct ceph_msg *msg);
+extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
+extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
+extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
+					     loff_t newlen);
+extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
+						loff_t newlen);
+extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
+				     struct kstatfs *buf);
+
 #endif /* _FS_CEPH_SUPER_H */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index e1c4e0b12b4c..7e72348639e4 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -224,6 +224,31 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 			(long)ci->i_rctime.tv_nsec);
 }
 
+/* quotas */
+
+static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
+{
+	return (ci->i_max_files || ci->i_max_bytes);
+}
+
+static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
+				  size_t size)
+{
+	return snprintf(val, size, "max_bytes=%llu max_files=%llu",
+			ci->i_max_bytes, ci->i_max_files);
+}
+
+static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
+					    char *val, size_t size)
+{
+	return snprintf(val, size, "%llu", ci->i_max_bytes);
+}
+
+static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
+					    char *val, size_t size)
+{
+	return snprintf(val, size, "%llu", ci->i_max_files);
+}
 
 #define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2)	\
@@ -247,6 +272,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 		.hidden = true,			\
 		.exists_cb = ceph_vxattrcb_layout_exists,	\
 	}
+#define XATTR_QUOTA_FIELD(_type, _name)					\
+	{								\
+		.name = CEPH_XATTR_NAME(_type, _name),			\
+		.name_size = sizeof(CEPH_XATTR_NAME(_type, _name)),	\
+		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name,	\
+		.readonly = false,					\
+		.hidden = true,						\
+		.exists_cb = ceph_vxattrcb_quota_exists,		\
+	}
 
 static struct ceph_vxattr ceph_dir_vxattrs[] = {
 	{
@@ -270,6 +304,16 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
 	XATTR_NAME_CEPH(dir, rsubdirs),
 	XATTR_NAME_CEPH(dir, rbytes),
 	XATTR_NAME_CEPH(dir, rctime),
+	{
+		.name = "ceph.quota",
+		.name_size = sizeof("ceph.quota"),
+		.getxattr_cb = ceph_vxattrcb_quota,
+		.readonly = false,
+		.hidden = true,
+		.exists_cb = ceph_vxattrcb_quota_exists,
+	},
+	XATTR_QUOTA_FIELD(quota, max_bytes),
+	XATTR_QUOTA_FIELD(quota, max_files),
 	{ .name = NULL, 0 }	/* Required table terminator */
 };
 static size_t ceph_dir_vxattrs_name_size;	/* total size of all names */
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 2c14020e5e1d..edf5f40898bf 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -46,67 +46,11 @@ void cifs_fscache_unregister(void)
 }
 
 /*
- * Key layout of CIFS server cache index object
- */
-struct cifs_server_key {
-	uint16_t	family;		/* address family */
-	__be16		port;		/* IP port */
-	union {
-		struct in_addr	ipv4_addr;
-		struct in6_addr	ipv6_addr;
-	} addr[0];
-};
-
-/*
- * Server object keyed by {IPaddress,port,family} tuple
- */
-static uint16_t cifs_server_get_key(const void *cookie_netfs_data,
-				   void *buffer, uint16_t maxbuf)
-{
-	const struct TCP_Server_Info *server = cookie_netfs_data;
-	const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
-	const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
-	const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
-	struct cifs_server_key *key = buffer;
-	uint16_t key_len = sizeof(struct cifs_server_key);
-
-	memset(key, 0, key_len);
-
-	/*
-	 * Should not be a problem as sin_family/sin6_family overlays
-	 * sa_family field
-	 */
-	switch (sa->sa_family) {
-	case AF_INET:
-		key->family = sa->sa_family;
-		key->port = addr->sin_port;
-		key->addr[0].ipv4_addr = addr->sin_addr;
-		key_len += sizeof(key->addr[0].ipv4_addr);
-		break;
-
-	case AF_INET6:
-		key->family = sa->sa_family;
-		key->port = addr6->sin6_port;
-		key->addr[0].ipv6_addr = addr6->sin6_addr;
-		key_len += sizeof(key->addr[0].ipv6_addr);
-		break;
-
-	default:
-		cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
-		key_len = 0;
-		break;
-	}
-
-	return key_len;
-}
-
-/*
  * Server object for FS-Cache
  */
 const struct fscache_cookie_def cifs_fscache_server_index_def = {
 	.name = "CIFS.server",
 	.type = FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key = cifs_server_get_key,
 };
 
 /*
@@ -116,7 +60,7 @@ struct cifs_fscache_super_auxdata {
 	u64	resource_id;		/* unique server resource id */
 };
 
-static char *extract_sharename(const char *treename)
+char *extract_sharename(const char *treename)
 {
 	const char *src;
 	char *delim, *dst;
@@ -140,56 +84,11 @@ static char *extract_sharename(const char *treename)
 	return dst;
 }
 
-/*
- * Superblock object currently keyed by share name
- */
-static uint16_t cifs_super_get_key(const void *cookie_netfs_data, void *buffer,
-				   uint16_t maxbuf)
-{
-	const struct cifs_tcon *tcon = cookie_netfs_data;
-	char *sharename;
-	uint16_t len;
-
-	sharename = extract_sharename(tcon->treeName);
-	if (IS_ERR(sharename)) {
-		cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
-		sharename = NULL;
-		return 0;
-	}
-
-	len = strlen(sharename);
-	if (len > maxbuf)
-		return 0;
-
-	memcpy(buffer, sharename, len);
-
-	kfree(sharename);
-
-	return len;
-}
-
-static uint16_t
-cifs_fscache_super_get_aux(const void *cookie_netfs_data, void *buffer,
-			   uint16_t maxbuf)
-{
-	struct cifs_fscache_super_auxdata auxdata;
-	const struct cifs_tcon *tcon = cookie_netfs_data;
-
-	memset(&auxdata, 0, sizeof(auxdata));
-	auxdata.resource_id = tcon->resource_id;
-
-	if (maxbuf > sizeof(auxdata))
-		maxbuf = sizeof(auxdata);
-
-	memcpy(buffer, &auxdata, maxbuf);
-
-	return maxbuf;
-}
-
 static enum
 fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
 					      const void *data,
-					      uint16_t datalen)
+					      uint16_t datalen,
+					      loff_t object_size)
 {
 	struct cifs_fscache_super_auxdata auxdata;
 	const struct cifs_tcon *tcon = cookie_netfs_data;
@@ -212,68 +111,14 @@ fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
 const struct fscache_cookie_def cifs_fscache_super_index_def = {
 	.name = "CIFS.super",
 	.type = FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key = cifs_super_get_key,
-	.get_aux = cifs_fscache_super_get_aux,
 	.check_aux = cifs_fscache_super_check_aux,
 };
 
-/*
- * Auxiliary data attached to CIFS inode within the cache
- */
-struct cifs_fscache_inode_auxdata {
-	struct timespec	last_write_time;
-	struct timespec	last_change_time;
-	u64		eof;
-};
-
-static uint16_t cifs_fscache_inode_get_key(const void *cookie_netfs_data,
-					   void *buffer, uint16_t maxbuf)
-{
-	const struct cifsInodeInfo *cifsi = cookie_netfs_data;
-	uint16_t keylen;
-
-	/* use the UniqueId as the key */
-	keylen = sizeof(cifsi->uniqueid);
-	if (keylen > maxbuf)
-		keylen = 0;
-	else
-		memcpy(buffer, &cifsi->uniqueid, keylen);
-
-	return keylen;
-}
-
-static void
-cifs_fscache_inode_get_attr(const void *cookie_netfs_data, uint64_t *size)
-{
-	const struct cifsInodeInfo *cifsi = cookie_netfs_data;
-
-	*size = cifsi->vfs_inode.i_size;
-}
-
-static uint16_t
-cifs_fscache_inode_get_aux(const void *cookie_netfs_data, void *buffer,
-			   uint16_t maxbuf)
-{
-	struct cifs_fscache_inode_auxdata auxdata;
-	const struct cifsInodeInfo *cifsi = cookie_netfs_data;
-
-	memset(&auxdata, 0, sizeof(auxdata));
-	auxdata.eof = cifsi->server_eof;
-	auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
-	auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
-
-	if (maxbuf > sizeof(auxdata))
-		maxbuf = sizeof(auxdata);
-
-	memcpy(buffer, &auxdata, maxbuf);
-
-	return maxbuf;
-}
-
 static enum
 fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
 					      const void *data,
-					      uint16_t datalen)
+					      uint16_t datalen,
+					      loff_t object_size)
 {
 	struct cifs_fscache_inode_auxdata auxdata;
 	struct cifsInodeInfo *cifsi = cookie_netfs_data;
@@ -295,8 +140,5 @@ fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data,
 const struct fscache_cookie_def cifs_fscache_inode_object_def = {
 	.name		= "CIFS.uniqueid",
 	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
-	.get_key	= cifs_fscache_inode_get_key,
-	.get_attr	= cifs_fscache_inode_get_attr,
-	.get_aux	= cifs_fscache_inode_get_aux,
 	.check_aux	= cifs_fscache_inode_check_aux,
 };
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7cee97b93a61..4bcd4e838b47 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1987,11 +1987,10 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
 	for (i = 0; i < found_pages; i++) {
 		page = wdata->pages[i];
 		/*
-		 * At this point we hold neither mapping->tree_lock nor
-		 * lock on the page itself: the page may be truncated or
-		 * invalidated (changing page->mapping to NULL), or even
-		 * swizzled back from swapper_space to tmpfs file
-		 * mapping
+		 * At this point we hold neither the i_pages lock nor the
+		 * page lock: the page may be truncated or invalidated
+		 * (changing page->mapping to NULL), or even swizzled
+		 * back from swapper_space to tmpfs file mapping
 		 */
 
 		if (nr_pages == 0)
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index 8d4b7bc8ae91..25d3f66b2d50 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -23,11 +23,63 @@
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
 
+/*
+ * Key layout of CIFS server cache index object
+ */
+struct cifs_server_key {
+	struct {
+		uint16_t	family;		/* address family */
+		__be16		port;		/* IP port */
+	} hdr;
+	union {
+		struct in_addr	ipv4_addr;
+		struct in6_addr	ipv6_addr;
+	};
+} __packed;
+
+/*
+ * Get a cookie for a server object keyed by {IPaddress,port,family} tuple
+ */
 void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server)
 {
+	const struct sockaddr *sa = (struct sockaddr *) &server->dstaddr;
+	const struct sockaddr_in *addr = (struct sockaddr_in *) sa;
+	const struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) sa;
+	struct cifs_server_key key;
+	uint16_t key_len = sizeof(key.hdr);
+
+	memset(&key, 0, sizeof(key));
+
+	/*
+	 * Should not be a problem as sin_family/sin6_family overlays
+	 * sa_family field
+	 */
+	key.hdr.family = sa->sa_family;
+	switch (sa->sa_family) {
+	case AF_INET:
+		key.hdr.port = addr->sin_port;
+		key.ipv4_addr = addr->sin_addr;
+		key_len += sizeof(key.ipv4_addr);
+		break;
+
+	case AF_INET6:
+		key.hdr.port = addr6->sin6_port;
+		key.ipv6_addr = addr6->sin6_addr;
+		key_len += sizeof(key.ipv6_addr);
+		break;
+
+	default:
+		cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family);
+		server->fscache = NULL;
+		return;
+	}
+
 	server->fscache =
 		fscache_acquire_cookie(cifs_fscache_netfs.primary_index,
-				&cifs_fscache_server_index_def, server, true);
+				       &cifs_fscache_server_index_def,
+				       &key, key_len,
+				       NULL, 0,
+				       server, 0, true);
 	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
 		 __func__, server, server->fscache);
 }
@@ -36,17 +88,29 @@ void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server)
 {
 	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
 		 __func__, server, server->fscache);
-	fscache_relinquish_cookie(server->fscache, 0);
+	fscache_relinquish_cookie(server->fscache, NULL, false);
 	server->fscache = NULL;
 }
 
 void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
 {
 	struct TCP_Server_Info *server = tcon->ses->server;
+	char *sharename;
+
+	sharename = extract_sharename(tcon->treeName);
+	if (IS_ERR(sharename)) {
+		cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
+		tcon->fscache = NULL;
+		return;
+	}
 
 	tcon->fscache =
 		fscache_acquire_cookie(server->fscache,
-				&cifs_fscache_super_index_def, tcon, true);
+				       &cifs_fscache_super_index_def,
+				       sharename, strlen(sharename),
+				       &tcon->resource_id, sizeof(tcon->resource_id),
+				       tcon, 0, true);
+	kfree(sharename);
 	cifs_dbg(FYI, "%s: (0x%p/0x%p)\n",
 		 __func__, server->fscache, tcon->fscache);
 }
@@ -54,10 +118,28 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
 void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon)
 {
 	cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache);
-	fscache_relinquish_cookie(tcon->fscache, 0);
+	fscache_relinquish_cookie(tcon->fscache, &tcon->resource_id, false);
 	tcon->fscache = NULL;
 }
 
+static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi,
+					      struct cifs_tcon *tcon)
+{
+	struct cifs_fscache_inode_auxdata auxdata;
+
+	memset(&auxdata, 0, sizeof(auxdata));
+	auxdata.eof = cifsi->server_eof;
+	auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+	auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
+	cifsi->fscache =
+		fscache_acquire_cookie(tcon->fscache,
+				       &cifs_fscache_inode_object_def,
+				       &cifsi->uniqueid, sizeof(cifsi->uniqueid),
+				       &auxdata, sizeof(auxdata),
+				       cifsi, cifsi->vfs_inode.i_size, true);
+}
+
 static void cifs_fscache_enable_inode_cookie(struct inode *inode)
 {
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
@@ -67,21 +149,28 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
 	if (cifsi->fscache)
 		return;
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
-		cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
-				&cifs_fscache_inode_object_def, cifsi, true);
-		cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
-			 __func__, tcon->fscache, cifsi->fscache);
-	}
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE))
+		return;
+
+	cifs_fscache_acquire_inode_cookie(cifsi, tcon);
+
+	cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n",
+		 __func__, tcon->fscache, cifsi->fscache);
 }
 
 void cifs_fscache_release_inode_cookie(struct inode *inode)
 {
+	struct cifs_fscache_inode_auxdata auxdata;
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 
 	if (cifsi->fscache) {
+		memset(&auxdata, 0, sizeof(auxdata));
+		auxdata.eof = cifsi->server_eof;
+		auxdata.last_write_time = cifsi->vfs_inode.i_mtime;
+		auxdata.last_change_time = cifsi->vfs_inode.i_ctime;
+
 		cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
-		fscache_relinquish_cookie(cifsi->fscache, 0);
+		fscache_relinquish_cookie(cifsi->fscache, &auxdata, false);
 		cifsi->fscache = NULL;
 	}
 }
@@ -93,7 +182,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode)
 	if (cifsi->fscache) {
 		cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache);
 		fscache_uncache_all_inode_pages(cifsi->fscache, inode);
-		fscache_relinquish_cookie(cifsi->fscache, 1);
+		fscache_relinquish_cookie(cifsi->fscache, NULL, true);
 		cifsi->fscache = NULL;
 	}
 }
@@ -110,16 +199,14 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode)
 {
 	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 	struct fscache_cookie *old = cifsi->fscache;
 
 	if (cifsi->fscache) {
 		/* retire the current fscache cache and get a new one */
-		fscache_relinquish_cookie(cifsi->fscache, 1);
+		fscache_relinquish_cookie(cifsi->fscache, NULL, true);
 
-		cifsi->fscache = fscache_acquire_cookie(
-					cifs_sb_master_tcon(cifs_sb)->fscache,
-					&cifs_fscache_inode_object_def,
-					cifsi, true);
+		cifs_fscache_acquire_inode_cookie(cifsi, tcon);
 		cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n",
 			 __func__, cifsi->fscache, old);
 	}
@@ -214,13 +301,15 @@ int __cifs_readpages_from_fscache(struct inode *inode,
 
 void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
 {
+	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 	int ret;
 
 	cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
-		 __func__, CIFS_I(inode)->fscache, page, inode);
-	ret = fscache_write_page(CIFS_I(inode)->fscache, page, GFP_KERNEL);
+		 __func__, cifsi->fscache, page, inode);
+	ret = fscache_write_page(cifsi->fscache, page,
+				 cifsi->vfs_inode.i_size, GFP_KERNEL);
 	if (ret != 0)
-		fscache_uncache_page(CIFS_I(inode)->fscache, page);
+		fscache_uncache_page(cifsi->fscache, page);
 }
 
 void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages)
@@ -239,4 +328,3 @@ void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode)
 	fscache_wait_on_page_write(cookie, page);
 	fscache_uncache_page(cookie, page);
 }
-
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 24794b6cd8ec..c7e3ac251e16 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -27,6 +27,18 @@
 
 #ifdef CONFIG_CIFS_FSCACHE
 
+/*
+ * Auxiliary data attached to CIFS inode within the cache
+ */
+struct cifs_fscache_inode_auxdata {
+	struct timespec	last_write_time;
+	struct timespec	last_change_time;
+	u64		eof;
+};
+
+/*
+ * cache.c
+ */
 extern struct fscache_netfs cifs_fscache_netfs;
 extern const struct fscache_cookie_def cifs_fscache_server_index_def;
 extern const struct fscache_cookie_def cifs_fscache_super_index_def;
@@ -34,6 +46,7 @@ extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
 
 extern int cifs_fscache_register(void);
 extern void cifs_fscache_unregister(void);
+extern char *extract_sharename(const char *);
 
 /*
  * fscache.c
diff --git a/fs/dax.c b/fs/dax.c
index 0276df90e86c..aaec72ded1b6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -73,16 +73,15 @@ fs_initcall(init_dax_wait_table);
 #define RADIX_DAX_ZERO_PAGE	(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
 #define RADIX_DAX_EMPTY		(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
 
-static unsigned long dax_radix_sector(void *entry)
+static unsigned long dax_radix_pfn(void *entry)
 {
 	return (unsigned long)entry >> RADIX_DAX_SHIFT;
 }
 
-static void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
 {
 	return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
-			((unsigned long)sector << RADIX_DAX_SHIFT) |
-			RADIX_DAX_ENTRY_LOCK);
+			(pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
 }
 
 static unsigned int dax_radix_order(void *entry)
@@ -159,11 +158,9 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo
 }
 
 /*
- * We do not necessarily hold the mapping->tree_lock when we call this
- * function so it is possible that 'entry' is no longer a valid item in the
- * radix tree.  This is okay because all we really need to do is to find the
- * correct waitqueue where tasks might be waiting for that old 'entry' and
- * wake them.
+ * @entry may no longer be the entry at the index in the mapping.
+ * The important information it's conveying is whether the entry at
+ * this index used to be a PMD entry.
  */
 static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		pgoff_t index, void *entry, bool wake_all)
@@ -175,7 +172,7 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 
 	/*
 	 * Checking for locked entry and prepare_to_wait_exclusive() happens
-	 * under mapping->tree_lock, ditto for entry handling in our callers.
+	 * under the i_pages lock, ditto for entry handling in our callers.
 	 * So at this point all tasks that could have seen our entry locked
 	 * must be in the waitqueue and the following check will see them.
 	 */
@@ -184,41 +181,39 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 }
 
 /*
- * Check whether the given slot is locked. The function must be called with
- * mapping->tree_lock held
+ * Check whether the given slot is locked.  Must be called with the i_pages
+ * lock held.
  */
 static inline int slot_locked(struct address_space *mapping, void **slot)
 {
 	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 	return entry & RADIX_DAX_ENTRY_LOCK;
 }
 
 /*
- * Mark the given slot is locked. The function must be called with
- * mapping->tree_lock held
+ * Mark the given slot as locked.  Must be called with the i_pages lock held.
  */
 static inline void *lock_slot(struct address_space *mapping, void **slot)
 {
 	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 
 	entry |= RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
 	return (void *)entry;
 }
 
 /*
- * Mark the given slot is unlocked. The function must be called with
- * mapping->tree_lock held
+ * Mark the given slot as unlocked.  Must be called with the i_pages lock held.
  */
 static inline void *unlock_slot(struct address_space *mapping, void **slot)
 {
 	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 
 	entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
 	return (void *)entry;
 }
 
@@ -229,7 +224,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
  * put_locked_mapping_entry() when he locked the entry and now wants to
  * unlock it.
  *
- * The function must be called with mapping->tree_lock held.
+ * Must be called with the i_pages lock held.
  */
 static void *get_unlocked_mapping_entry(struct address_space *mapping,
 					pgoff_t index, void ***slotp)
@@ -242,7 +237,7 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 	ewait.wait.func = wake_exceptional_entry_func;
 
 	for (;;) {
-		entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
+		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
 					  &slot);
 		if (!entry ||
 		    WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
@@ -255,10 +250,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 		wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		schedule();
 		finish_wait(wq, &ewait.wait);
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 	}
 }
 
@@ -267,15 +262,15 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
 {
 	void *entry, **slot;
 
-	spin_lock_irq(&mapping->tree_lock);
-	entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
+	xa_lock_irq(&mapping->i_pages);
+	entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
 	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
 			 !slot_locked(mapping, slot))) {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return;
 	}
 	unlock_slot(mapping, slot);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
@@ -299,6 +294,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
+static unsigned long dax_entry_size(void *entry)
+{
+	if (dax_is_zero_entry(entry))
+		return 0;
+	else if (dax_is_empty_entry(entry))
+		return 0;
+	else if (dax_is_pmd_entry(entry))
+		return PMD_SIZE;
+	else
+		return PAGE_SIZE;
+}
+
+static unsigned long dax_radix_end_pfn(void *entry)
+{
+	return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
+}
+
+/*
+ * Iterate through all mapped pfns represented by an entry, i.e. skip
+ * 'empty' and 'zero' entries.
+ */
+#define for_each_mapped_pfn(entry, pfn) \
+	for (pfn = dax_radix_pfn(entry); \
+			pfn < dax_radix_end_pfn(entry); pfn++)
+
+static void dax_associate_entry(void *entry, struct address_space *mapping)
+{
+	unsigned long pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_mapped_pfn(entry, pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(page->mapping);
+		page->mapping = mapping;
+	}
+}
+
+static void dax_disassociate_entry(void *entry, struct address_space *mapping,
+		bool trunc)
+{
+	unsigned long pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_mapped_pfn(entry, pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+		WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+		page->mapping = NULL;
+	}
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -332,7 +384,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
 	void *entry, **slot;
 
 restart:
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	entry = get_unlocked_mapping_entry(mapping, index, &slot);
 
 	if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
@@ -364,12 +416,12 @@ restart:
 		if (pmd_downgrade) {
 			/*
 			 * Make sure 'entry' remains valid while we drop
-			 * mapping->tree_lock.
+			 * the i_pages lock.
 			 */
 			entry = lock_slot(mapping, slot);
 		}
 
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		/*
 		 * Besides huge zero pages the only other thing that gets
 		 * downgraded are empty entries which don't need to be
@@ -386,26 +438,27 @@ restart:
 				put_locked_mapping_entry(mapping, index);
 			return ERR_PTR(err);
 		}
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 
 		if (!entry) {
 			/*
-			 * We needed to drop the page_tree lock while calling
+			 * We needed to drop the i_pages lock while calling
 			 * radix_tree_preload() and we didn't have an entry to
 			 * lock.  See if another thread inserted an entry at
 			 * our index during this time.
 			 */
-			entry = __radix_tree_lookup(&mapping->page_tree, index,
+			entry = __radix_tree_lookup(&mapping->i_pages, index,
 					NULL, &slot);
 			if (entry) {
 				radix_tree_preload_end();
-				spin_unlock_irq(&mapping->tree_lock);
+				xa_unlock_irq(&mapping->i_pages);
 				goto restart;
 			}
 		}
 
 		if (pmd_downgrade) {
-			radix_tree_delete(&mapping->page_tree, index);
+			dax_disassociate_entry(entry, mapping, false);
+			radix_tree_delete(&mapping->i_pages, index);
 			mapping->nrexceptional--;
 			dax_wake_mapping_entry_waiter(mapping, index, entry,
 					true);
@@ -413,11 +466,11 @@ restart:
 
 		entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
 
-		err = __radix_tree_insert(&mapping->page_tree, index,
+		err = __radix_tree_insert(&mapping->i_pages, index,
 				dax_radix_order(entry), entry);
 		radix_tree_preload_end();
 		if (err) {
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			/*
 			 * Our insertion of a DAX entry failed, most likely
 			 * because we were inserting a PMD entry and it
@@ -430,12 +483,12 @@ restart:
 		}
 		/* Good, we have inserted empty locked entry into the tree. */
 		mapping->nrexceptional++;
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return entry;
 	}
 	entry = lock_slot(mapping, slot);
  out_unlock:
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	return entry;
 }
 
@@ -444,22 +497,23 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 {
 	int ret = 0;
 	void *entry;
-	struct radix_tree_root *page_tree = &mapping->page_tree;
+	struct radix_tree_root *pages = &mapping->i_pages;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(pages);
 	entry = get_unlocked_mapping_entry(mapping, index, NULL);
 	if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
 		goto out;
 	if (!trunc &&
-	    (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
-	     radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
+	    (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
+	     radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
 		goto out;
-	radix_tree_delete(page_tree, index);
+	dax_disassociate_entry(entry, mapping, trunc);
+	radix_tree_delete(pages, index);
 	mapping->nrexceptional--;
 	ret = 1;
 out:
 	put_unlocked_mapping_entry(mapping, index, entry);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(pages);
 	return ret;
 }
 /*
@@ -526,12 +580,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
  */
 static void *dax_insert_mapping_entry(struct address_space *mapping,
 				      struct vm_fault *vmf,
-				      void *entry, sector_t sector,
+				      void *entry, pfn_t pfn_t,
 				      unsigned long flags, bool dirty)
 {
-	struct radix_tree_root *page_tree = &mapping->page_tree;
-	void *new_entry;
+	struct radix_tree_root *pages = &mapping->i_pages;
+	unsigned long pfn = pfn_t_to_pfn(pfn_t);
 	pgoff_t index = vmf->pgoff;
+	void *new_entry;
 
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -545,8 +600,12 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 			unmap_mapping_pages(mapping, vmf->pgoff, 1, false);
 	}
 
-	spin_lock_irq(&mapping->tree_lock);
-	new_entry = dax_radix_locked_entry(sector, flags);
+	xa_lock_irq(pages);
+	new_entry = dax_radix_locked_entry(pfn, flags);
+	if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+		dax_disassociate_entry(entry, mapping, false);
+		dax_associate_entry(new_entry, mapping);
+	}
 
 	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		/*
@@ -561,17 +620,17 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 		void **slot;
 		void *ret;
 
-		ret = __radix_tree_lookup(page_tree, index, &node, &slot);
+		ret = __radix_tree_lookup(pages, index, &node, &slot);
 		WARN_ON_ONCE(ret != entry);
-		__radix_tree_replace(page_tree, node, slot,
+		__radix_tree_replace(pages, node, slot,
 				     new_entry, NULL);
 		entry = new_entry;
 	}
 
 	if (dirty)
-		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
 
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(pages);
 	return entry;
 }
 
@@ -657,17 +716,14 @@ unlock_pte:
 	i_mmap_unlock_read(mapping);
 }
 
-static int dax_writeback_one(struct block_device *bdev,
-		struct dax_device *dax_dev, struct address_space *mapping,
-		pgoff_t index, void *entry)
+static int dax_writeback_one(struct dax_device *dax_dev,
+		struct address_space *mapping, pgoff_t index, void *entry)
 {
-	struct radix_tree_root *page_tree = &mapping->page_tree;
-	void *entry2, **slot, *kaddr;
-	long ret = 0, id;
-	sector_t sector;
-	pgoff_t pgoff;
+	struct radix_tree_root *pages = &mapping->i_pages;
+	void *entry2, **slot;
+	unsigned long pfn;
+	long ret = 0;
 	size_t size;
-	pfn_t pfn;
 
 	/*
 	 * A page got tagged dirty in DAX mapping? Something is seriously
@@ -676,17 +732,17 @@ static int dax_writeback_one(struct block_device *bdev,
 	if (WARN_ON(!radix_tree_exceptional_entry(entry)))
 		return -EIO;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(pages);
 	entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
 	/* Entry got punched out / reallocated? */
 	if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
 		goto put_unlocked;
 	/*
 	 * Entry got reallocated elsewhere? No need to writeback. We have to
-	 * compare sectors as we must not bail out due to difference in lockbit
+	 * compare pfns as we must not bail out due to difference in lockbit
 	 * or entry type.
 	 */
-	if (dax_radix_sector(entry2) != dax_radix_sector(entry))
+	if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
 		goto put_unlocked;
 	if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
 				dax_is_zero_entry(entry))) {
@@ -695,7 +751,7 @@ static int dax_writeback_one(struct block_device *bdev,
 	}
 
 	/* Another fsync thread may have already written back this entry */
-	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+	if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
 		goto put_unlocked;
 	/* Lock the entry to serialize with page faults */
 	entry = lock_slot(mapping, slot);
@@ -703,60 +759,40 @@ static int dax_writeback_one(struct block_device *bdev,
 	 * We can clear the tag now but we have to be careful so that concurrent
 	 * dax_writeback_one() calls for the same index cannot finish before we
 	 * actually flush the caches. This is achieved as the calls will look
-	 * at the entry only under tree_lock and once they do that they will
-	 * see the entry locked and wait for it to unlock.
+	 * at the entry only under the i_pages lock and once they do that
+	 * they will see the entry locked and wait for it to unlock.
 	 */
-	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
-	spin_unlock_irq(&mapping->tree_lock);
+	radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
+	xa_unlock_irq(pages);
 
 	/*
 	 * Even if dax_writeback_mapping_range() was given a wbc->range_start
 	 * in the middle of a PMD, the 'index' we are given will be aligned to
-	 * the start index of the PMD, as will the sector we pull from
-	 * 'entry'.  This allows us to flush for PMD_SIZE and not have to
-	 * worry about partial PMD writebacks.
+	 * the start index of the PMD, as will the pfn we pull from 'entry'.
+	 * This allows us to flush for PMD_SIZE and not have to worry about
+	 * partial PMD writebacks.
 	 */
-	sector = dax_radix_sector(entry);
+	pfn = dax_radix_pfn(entry);
 	size = PAGE_SIZE << dax_radix_order(entry);
 
-	id = dax_read_lock();
-	ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
-	if (ret)
-		goto dax_unlock;
-
-	/*
-	 * dax_direct_access() may sleep, so cannot hold tree_lock over
-	 * its invocation.
-	 */
-	ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn);
-	if (ret < 0)
-		goto dax_unlock;
-
-	if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) {
-		ret = -EIO;
-		goto dax_unlock;
-	}
-
-	dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
-	dax_flush(dax_dev, kaddr, size);
+	dax_mapping_entry_mkclean(mapping, index, pfn);
+	dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
 	/*
 	 * After we have flushed the cache, we can clear the dirty tag. There
 	 * cannot be new dirty data in the pfn after the flush has completed as
 	 * the pfn mappings are writeprotected and fault waits for mapping
 	 * entry lock.
 	 */
-	spin_lock_irq(&mapping->tree_lock);
-	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_lock_irq(pages);
+	radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
+	xa_unlock_irq(pages);
 	trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
- dax_unlock:
-	dax_read_unlock(id);
 	put_locked_mapping_entry(mapping, index);
 	return ret;
 
  put_unlocked:
 	put_unlocked_mapping_entry(mapping, index, entry2);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(pages);
 	return ret;
 }
 
@@ -808,8 +844,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 				break;
 			}
 
-			ret = dax_writeback_one(bdev, dax_dev, mapping,
-					indices[i], pvec.pages[i]);
+			ret = dax_writeback_one(dax_dev, mapping, indices[i],
+					pvec.pages[i]);
 			if (ret < 0) {
 				mapping_set_error(mapping, ret);
 				goto out;
@@ -877,6 +913,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	int ret = VM_FAULT_NOPAGE;
 	struct page *zero_page;
 	void *entry2;
+	pfn_t pfn;
 
 	zero_page = ZERO_PAGE(0);
 	if (unlikely(!zero_page)) {
@@ -884,14 +921,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 		goto out;
 	}
 
-	entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+	pfn = page_to_pfn_t(zero_page);
+	entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 			RADIX_DAX_ZERO_PAGE, false);
 	if (IS_ERR(entry2)) {
 		ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
 
-	vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page));
+	vm_insert_mixed(vmf->vma, vaddr, pfn);
 out:
 	trace_dax_load_hole(inode, vmf, ret);
 	return ret;
@@ -1200,8 +1238,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 		if (error < 0)
 			goto error_finish_iomap;
 
-		entry = dax_insert_mapping_entry(mapping, vmf, entry,
-						 dax_iomap_sector(&iomap, pos),
+		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 						 0, write && !sync);
 		if (IS_ERR(entry)) {
 			error = PTR_ERR(entry);
@@ -1280,13 +1317,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
 	void *ret = NULL;
 	spinlock_t *ptl;
 	pmd_t pmd_entry;
+	pfn_t pfn;
 
 	zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
 
 	if (unlikely(!zero_page))
 		goto fallback;
 
-	ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+	pfn = page_to_pfn_t(zero_page);
+	ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 			RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
 	if (IS_ERR(ret))
 		goto fallback;
@@ -1409,8 +1448,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 		if (error < 0)
 			goto finish_iomap;
 
-		entry = dax_insert_mapping_entry(mapping, vmf, entry,
-						dax_iomap_sector(&iomap, pos),
+		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 						RADIX_DAX_PMD, write && !sync);
 		if (IS_ERR(entry))
 			goto finish_iomap;
@@ -1524,21 +1562,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
 	pgoff_t index = vmf->pgoff;
 	int vmf_ret, error;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	entry = get_unlocked_mapping_entry(mapping, index, &slot);
 	/* Did we race with someone splitting entry or so? */
 	if (!entry ||
 	    (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
 	    (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
 		put_unlocked_mapping_entry(mapping, index, entry);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
 						      VM_FAULT_NOPAGE);
 		return VM_FAULT_NOPAGE;
 	}
-	radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
+	radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
 	entry = lock_slot(mapping, slot);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	switch (pe_size) {
 	case PE_SIZE_PTE:
 		error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
diff --git a/fs/dcache.c b/fs/dcache.c
index 593079176123..86d2de63461e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -257,11 +257,25 @@ static void __d_free(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void __d_free_external_name(struct rcu_head *head)
+{
+	struct external_name *name = container_of(head, struct external_name,
+						  u.head);
+
+	mod_node_page_state(page_pgdat(virt_to_page(name)),
+			    NR_INDIRECTLY_RECLAIMABLE_BYTES,
+			    -ksize(name));
+
+	kfree(name);
+}
+
 static void __d_free_external(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-	kfree(external_name(dentry));
-	kmem_cache_free(dentry_cache, dentry); 
+
+	__d_free_external_name(&external_name(dentry)->u.head);
+
+	kmem_cache_free(dentry_cache, dentry);
 }
 
 static inline int dname_external(const struct dentry *dentry)
@@ -291,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
 		struct external_name *p;
 		p = container_of(name->name, struct external_name, name[0]);
 		if (unlikely(atomic_dec_and_test(&p->u.count)))
-			kfree_rcu(p, u.head);
+			call_rcu(&p->u.head, __d_free_external_name);
 	}
 }
 EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -1038,6 +1052,8 @@ static void shrink_dentry_list(struct list_head *list)
 	while (!list_empty(list)) {
 		struct dentry *dentry, *parent;
 
+		cond_resched();
+
 		dentry = list_entry(list->prev, struct dentry, d_lru);
 		spin_lock(&dentry->d_lock);
 		rcu_read_lock();
@@ -1191,7 +1207,6 @@ void shrink_dcache_sb(struct super_block *sb)
 
 		this_cpu_sub(nr_dentry_unused, freed);
 		shrink_dentry_list(&dispose);
-		cond_resched();
 	} while (list_lru_count(&sb->s_dentry_lru) > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
@@ -1473,7 +1488,6 @@ void shrink_dcache_parent(struct dentry *parent)
 			break;
 
 		shrink_dentry_list(&data.dispose);
-		cond_resched();
 	}
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
@@ -1600,7 +1614,6 @@ void d_invalidate(struct dentry *dentry)
 			detach_mounts(data.mountpoint);
 			dput(data.mountpoint);
 		}
-		cond_resched();
 	}
 }
 EXPORT_SYMBOL(d_invalidate);
@@ -1617,6 +1630,7 @@ EXPORT_SYMBOL(d_invalidate);
  
 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 {
+	struct external_name *ext = NULL;
 	struct dentry *dentry;
 	char *dname;
 	int err;
@@ -1637,14 +1651,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 		dname = dentry->d_iname;
 	} else if (name->len > DNAME_INLINE_LEN-1) {
 		size_t size = offsetof(struct external_name, name[1]);
-		struct external_name *p = kmalloc(size + name->len,
-						  GFP_KERNEL_ACCOUNT);
-		if (!p) {
+
+		ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT);
+		if (!ext) {
 			kmem_cache_free(dentry_cache, dentry); 
 			return NULL;
 		}
-		atomic_set(&p->u.count, 1);
-		dname = p->name;
+		atomic_set(&ext->u.count, 1);
+		dname = ext->name;
 	} else  {
 		dname = dentry->d_iname;
 	}	
@@ -1683,6 +1697,12 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 		}
 	}
 
+	if (unlikely(ext)) {
+		pg_data_t *pgdat = page_pgdat(virt_to_page(ext));
+		mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES,
+				    ksize(ext));
+	}
+
 	this_cpu_inc(nr_dentry);
 
 	return dentry;
@@ -2770,7 +2790,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
 		dentry->d_name.hash_len = target->d_name.hash_len;
 	}
 	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
-		kfree_rcu(old_name, u.head);
+		call_rcu(&old_name->u.head, __d_free_external_name);
 }
 
 /*
diff --git a/fs/exec.c b/fs/exec.c
index 7eb8d21bcab9..183059c427b9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -257,7 +257,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		 *    to work from.
 		 */
 		limit = _STK_LIM / 4 * 3;
-		limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+		limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
 		if (size > limit)
 			goto fail;
 	}
@@ -411,6 +411,11 @@ static int bprm_mm_init(struct linux_binprm *bprm)
 	if (!mm)
 		goto err;
 
+	/* Save current stack limit for all calculations made during exec. */
+	task_lock(current->group_leader);
+	bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
+	task_unlock(current->group_leader);
+
 	err = __bprm_mm_init(bprm);
 	if (err)
 		goto err;
@@ -697,7 +702,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 
 #ifdef CONFIG_STACK_GROWSUP
 	/* Limit stack size */
-	stack_base = rlimit_max(RLIMIT_STACK);
+	stack_base = bprm->rlim_stack.rlim_max;
 	if (stack_base > STACK_SIZE_MAX)
 		stack_base = STACK_SIZE_MAX;
 
@@ -770,7 +775,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	 * Align this down to a page boundary as expand_stack
 	 * will align it up.
 	 */
-	rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
+	rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
 #ifdef CONFIG_STACK_GROWSUP
 	if (stack_size + stack_expand > rlim_stack)
 		stack_base = vma->vm_start + rlim_stack;
@@ -895,13 +900,13 @@ int kernel_read_file(struct file *file, void **buf, loff_t *size,
 	if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
 		return -EINVAL;
 
-	ret = security_kernel_read_file(file, id);
+	ret = deny_write_access(file);
 	if (ret)
 		return ret;
 
-	ret = deny_write_access(file);
+	ret = security_kernel_read_file(file, id);
 	if (ret)
-		return ret;
+		goto out;
 
 	i_size = i_size_read(file_inode(file));
 	if (max_size > 0 && i_size > max_size) {
@@ -1341,11 +1346,11 @@ void setup_new_exec(struct linux_binprm * bprm)
 		 * RLIMIT_STACK, but after the point of no return to avoid
 		 * needing to clean up the change on failure.
 		 */
-		if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
-			current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+		if (bprm->rlim_stack.rlim_cur > _STK_LIM)
+			bprm->rlim_stack.rlim_cur = _STK_LIM;
 	}
 
-	arch_pick_mmap_layout(current->mm);
+	arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
 
 	current->sas_ss_sp = current->sas_ss_size = 0;
 
@@ -1378,6 +1383,16 @@ void setup_new_exec(struct linux_binprm * bprm)
 }
 EXPORT_SYMBOL(setup_new_exec);
 
+/* Runs immediately before start_thread() takes over. */
+void finalize_exec(struct linux_binprm *bprm)
+{
+	/* Store any stack rlimit changes before starting thread. */
+	task_lock(current->group_leader);
+	current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
+	task_unlock(current->group_leader);
+}
+EXPORT_SYMBOL(finalize_exec);
+
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
  * install_exec_creds() commits the new creds and drops the lock.
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 329a5d103846..645158dc33f1 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -435,6 +435,15 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
 	if (IS_ERR_OR_NULL(result))
 		return ERR_PTR(-ESTALE);
 
+	/*
+	 * If no acceptance criteria was specified by caller, a disconnected
+	 * dentry is also accepatable. Callers may use this mode to query if
+	 * file handle is stale or to get a reference to an inode without
+	 * risking the high overhead caused by directory reconnect.
+	 */
+	if (!acceptable)
+		return result;
+
 	if (d_is_dir(result)) {
 		/*
 		 * This request is for a directory.
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 032295e1d386..cc40802ddfa8 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct inode_operations ext2_file_inode_operations;
 extern const struct file_operations ext2_file_operations;
 
 /* inode.c */
+extern void ext2_set_file_ops(struct inode *inode);
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
 extern const struct iomap_ops ext2_iomap_ops;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 9b2ac55ac34f..1e01fabef130 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -940,9 +940,6 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (WARN_ON_ONCE(IS_DAX(inode)))
-		return -EIO;
-
 	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
@@ -952,17 +949,16 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-#ifdef CONFIG_FS_DAX
-	if (dax_mapping(mapping)) {
-		return dax_writeback_mapping_range(mapping,
-						   mapping->host->i_sb->s_bdev,
-						   wbc);
-	}
-#endif
-
 	return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
+static int
+ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+	return dax_writeback_mapping_range(mapping,
+			mapping->host->i_sb->s_bdev, wbc);
+}
+
 const struct address_space_operations ext2_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
@@ -990,6 +986,13 @@ const struct address_space_operations ext2_nobh_aops = {
 	.error_remove_page	= generic_error_remove_page,
 };
 
+static const struct address_space_operations ext2_dax_aops = {
+	.writepages		= ext2_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
+
 /*
  * Probably it should be a library function... search for first non-zero word
  * or memcmp with zero_page, whatever is better for particular architecture.
@@ -1388,6 +1391,18 @@ void ext2_set_inode_flags(struct inode *inode)
 		inode->i_flags |= S_DAX;
 }
 
+void ext2_set_file_ops(struct inode *inode)
+{
+	inode->i_op = &ext2_file_inode_operations;
+	inode->i_fop = &ext2_file_operations;
+	if (IS_DAX(inode))
+		inode->i_mapping->a_ops = &ext2_dax_aops;
+	else if (test_opt(inode->i_sb, NOBH))
+		inode->i_mapping->a_ops = &ext2_nobh_aops;
+	else
+		inode->i_mapping->a_ops = &ext2_aops;
+}
+
 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 {
 	struct ext2_inode_info *ei;
@@ -1480,14 +1495,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 		ei->i_data[n] = raw_inode->i_block[n];
 
 	if (S_ISREG(inode->i_mode)) {
-		inode->i_op = &ext2_file_inode_operations;
-		if (test_opt(inode->i_sb, NOBH)) {
-			inode->i_mapping->a_ops = &ext2_nobh_aops;
-			inode->i_fop = &ext2_file_operations;
-		} else {
-			inode->i_mapping->a_ops = &ext2_aops;
-			inode->i_fop = &ext2_file_operations;
-		}
+		ext2_set_file_ops(inode);
 	} else if (S_ISDIR(inode->i_mode)) {
 		inode->i_op = &ext2_dir_inode_operations;
 		inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e078075dc66f..55f7caadb093 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -107,14 +107,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	inode->i_op = &ext2_file_inode_operations;
-	if (test_opt(inode->i_sb, NOBH)) {
-		inode->i_mapping->a_ops = &ext2_nobh_aops;
-		inode->i_fop = &ext2_file_operations;
-	} else {
-		inode->i_mapping->a_ops = &ext2_aops;
-		inode->i_fop = &ext2_file_operations;
-	}
+	ext2_set_file_ops(inode);
 	mark_inode_dirty(inode);
 	return ext2_add_nondir(dentry, inode);
 }
@@ -125,14 +118,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	inode->i_op = &ext2_file_inode_operations;
-	if (test_opt(inode->i_sb, NOBH)) {
-		inode->i_mapping->a_ops = &ext2_nobh_aops;
-		inode->i_fop = &ext2_file_operations;
-	} else {
-		inode->i_mapping->a_ops = &ext2_aops;
-		inode->i_fop = &ext2_file_operations;
-	}
+	ext2_set_file_ops(inode);
 	mark_inode_dirty(inode);
 	d_tmpfile(dentry, inode);
 	unlock_new_inode(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 129205028300..1e50c5efae67 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2716,12 +2716,6 @@ static int ext4_writepages(struct address_space *mapping,
 	percpu_down_read(&sbi->s_journal_flag_rwsem);
 	trace_ext4_writepages(inode, wbc);
 
-	if (dax_mapping(mapping)) {
-		ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
-						  wbc);
-		goto out_writepages;
-	}
-
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
 	 * a transaction for special inodes like journal inode on last iput()
@@ -2942,6 +2936,27 @@ out_writepages:
 	return ret;
 }
 
+static int ext4_dax_writepages(struct address_space *mapping,
+			       struct writeback_control *wbc)
+{
+	int ret;
+	long nr_to_write = wbc->nr_to_write;
+	struct inode *inode = mapping->host;
+	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+		return -EIO;
+
+	percpu_down_read(&sbi->s_journal_flag_rwsem);
+	trace_ext4_writepages(inode, wbc);
+
+	ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
+	trace_ext4_writepages_result(inode, wbc, ret,
+				     nr_to_write - wbc->nr_to_write);
+	percpu_up_read(&sbi->s_journal_flag_rwsem);
+	return ret;
+}
+
 static int ext4_nonda_switch(struct super_block *sb)
 {
 	s64 free_clusters, dirty_clusters;
@@ -3845,10 +3860,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (ext4_has_inline_data(inode))
 		return 0;
 
-	/* DAX uses iomap path now */
-	if (WARN_ON_ONCE(IS_DAX(inode)))
-		return 0;
-
 	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 	if (iov_iter_rw(iter) == READ)
 		ret = ext4_direct_IO_read(iocb, iter);
@@ -3934,6 +3945,13 @@ static const struct address_space_operations ext4_da_aops = {
 	.error_remove_page	= generic_error_remove_page,
 };
 
+static const struct address_space_operations ext4_dax_aops = {
+	.writepages		= ext4_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
+
 void ext4_set_aops(struct inode *inode)
 {
 	switch (ext4_inode_journal_mode(inode)) {
@@ -3946,7 +3964,9 @@ void ext4_set_aops(struct inode *inode)
 	default:
 		BUG();
 	}
-	if (test_opt(inode->i_sb, DELALLOC))
+	if (IS_DAX(inode))
+		inode->i_mapping->a_ops = &ext4_dax_aops;
+	else if (test_opt(inode->i_sb, DELALLOC))
 		inode->i_mapping->a_ops = &ext4_da_aops;
 	else
 		inode->i_mapping->a_ops = &ext4_aops;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index db50686f5096..02237d4d91f5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2424,12 +2424,12 @@ void f2fs_set_page_dirty_nobuffers(struct page *page)
 	SetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	WARN_ON_ONCE(!PageUptodate(page));
 	account_page_dirtied(page, mapping);
-	radix_tree_tag_set(&mapping->page_tree,
+	radix_tree_tag_set(&mapping->i_pages,
 			page_index(page), PAGECACHE_TAG_DIRTY);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 	unlock_page_memcg(page);
 
 	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index fe661274ff10..8c9c2f31b253 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -732,10 +732,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 
 	if (bit_pos == NR_DENTRY_IN_BLOCK &&
 			!truncate_hole(dir, page->index, page->index + 1)) {
-		spin_lock_irqsave(&mapping->tree_lock, flags);
-		radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+		xa_lock_irqsave(&mapping->i_pages, flags);
+		radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 				     PAGECACHE_TAG_DIRTY);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 		clear_page_dirty_for_io(page);
 		ClearPagePrivate(page);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index bfb7a4a3a929..9327411fd93b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1015,7 +1015,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 	unsigned int init_segno = segno;
 	struct gc_inode_list gc_list = {
 		.ilist = LIST_HEAD_INIT(gc_list.ilist),
-		.iroot = RADIX_TREE_INIT(GFP_NOFS),
+		.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
 	};
 
 	trace_f2fs_gc_begin(sbi->sb, sync, background,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 3b77d6421218..265da200daa8 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -226,10 +226,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
 	kunmap_atomic(src_addr);
 	set_page_dirty(dn.inode_page);
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
-	radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+	xa_lock_irqsave(&mapping->i_pages, flags);
+	radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 			     PAGECACHE_TAG_DIRTY);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	set_inode_flag(inode, FI_APPEND_WRITE);
 	set_inode_flag(inode, FI_DATA_EXIST);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9a99243054ba..f202398e20ea 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -91,11 +91,11 @@ static void clear_node_page_dirty(struct page *page)
 	unsigned int long flags;
 
 	if (PageDirty(page)) {
-		spin_lock_irqsave(&mapping->tree_lock, flags);
-		radix_tree_tag_clear(&mapping->page_tree,
+		xa_lock_irqsave(&mapping->i_pages, flags);
+		radix_tree_tag_clear(&mapping->i_pages,
 				page_index(page),
 				PAGECACHE_TAG_DIRTY);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 		clear_page_dirty_for_io(page);
 		dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
@@ -1161,7 +1161,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
 	f2fs_bug_on(sbi, check_nid_range(sbi, nid));
 
 	rcu_read_lock();
-	apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
+	apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
 	rcu_read_unlock();
 	if (apage)
 		return;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1280f915079b..4b12ba70a895 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	 * By the time control reaches here, RCU grace period has passed
 	 * since I_WB_SWITCH assertion and all wb stat update transactions
 	 * between unlocked_inode_to_wb_begin/end() are guaranteed to be
-	 * synchronizing against mapping->tree_lock.
+	 * synchronizing against the i_pages lock.
 	 *
-	 * Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock
+	 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
 	 * gives us exclusion against all wb related operations on @inode
 	 * including IO list manipulations and stat updates.
 	 */
@@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
 	}
 	spin_lock(&inode->i_lock);
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 
 	/*
 	 * Once I_FREEING is visible under i_lock, the eviction path owns
@@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	/*
 	 * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
 	 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
-	 * pages actually under underwriteback.
+	 * pages actually under writeback.
 	 */
-	radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
 				   PAGECACHE_TAG_DIRTY) {
 		struct page *page = radix_tree_deref_slot_protected(slot,
-							&mapping->tree_lock);
+						&mapping->i_pages.xa_lock);
 		if (likely(page) && PageDirty(page)) {
 			dec_wb_stat(old_wb, WB_RECLAIMABLE);
 			inc_wb_stat(new_wb, WB_RECLAIMABLE);
 		}
 	}
 
-	radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
 				   PAGECACHE_TAG_WRITEBACK) {
 		struct page *page = radix_tree_deref_slot_protected(slot,
-							&mapping->tree_lock);
+						&mapping->i_pages.xa_lock);
 		if (likely(page)) {
 			WARN_ON_ONCE(!PageWriteback(page));
 			dec_wb_stat(old_wb, WB_WRITEBACK);
@@ -430,7 +430,7 @@ skip_switch:
 	 */
 	smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
 
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&new_wb->list_lock);
 	spin_unlock(&old_wb->list_lock);
@@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 
 	/*
 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
-	 * the RCU protected stat update paths to grab the mapping's
-	 * tree_lock so that stat transfer can synchronize against them.
+	 * the RCU protected stat update paths to grab the i_page
+	 * lock so that stat transfer can synchronize against them.
 	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
 	 */
 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index 56cce7fdd39e..c184c5a356ff 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -125,7 +125,7 @@ struct fscache_cache *fscache_select_cache_for_object(
 	}
 
 	/* the parent is unbacked */
-	if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+	if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) {
 		/* cookie not an index and is unbacked */
 		spin_unlock(&cookie->lock);
 		_leave(" = NULL [cookie ub,ni]");
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index d705125665f0..97137d7ec5ee 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -21,12 +21,54 @@ struct kmem_cache *fscache_cookie_jar;
 
 static atomic_t fscache_object_debug_id = ATOMIC_INIT(0);
 
-static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie);
+#define fscache_cookie_hash_shift 15
+static struct hlist_bl_head fscache_cookie_hash[1 << fscache_cookie_hash_shift];
+
+static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie,
+					    loff_t object_size);
 static int fscache_alloc_object(struct fscache_cache *cache,
 				struct fscache_cookie *cookie);
 static int fscache_attach_object(struct fscache_cookie *cookie,
 				 struct fscache_object *object);
 
+static void fscache_print_cookie(struct fscache_cookie *cookie, char prefix)
+{
+	struct hlist_node *object;
+	const u8 *k;
+	unsigned loop;
+
+	pr_err("%c-cookie c=%p [p=%p fl=%lx nc=%u na=%u]\n",
+	       prefix, cookie, cookie->parent, cookie->flags,
+	       atomic_read(&cookie->n_children),
+	       atomic_read(&cookie->n_active));
+	pr_err("%c-cookie d=%p n=%p\n",
+	       prefix, cookie->def, cookie->netfs_data);
+
+	object = READ_ONCE(cookie->backing_objects.first);
+	if (object)
+		pr_err("%c-cookie o=%p\n",
+		       prefix, hlist_entry(object, struct fscache_object, cookie_link));
+
+	pr_err("%c-key=[%u] '", prefix, cookie->key_len);
+	k = (cookie->key_len <= sizeof(cookie->inline_key)) ?
+		cookie->inline_key : cookie->key;
+	for (loop = 0; loop < cookie->key_len; loop++)
+		pr_cont("%02x", k[loop]);
+	pr_cont("'\n");
+}
+
+void fscache_free_cookie(struct fscache_cookie *cookie)
+{
+	if (cookie) {
+		BUG_ON(!hlist_empty(&cookie->backing_objects));
+		if (cookie->aux_len > sizeof(cookie->inline_aux))
+			kfree(cookie->aux);
+		if (cookie->key_len > sizeof(cookie->inline_key))
+			kfree(cookie->key);
+		kmem_cache_free(fscache_cookie_jar, cookie);
+	}
+}
+
 /*
  * initialise an cookie jar slab element prior to any use
  */
@@ -41,6 +83,170 @@ void fscache_cookie_init_once(void *_cookie)
 }
 
 /*
+ * Set the index key in a cookie.  The cookie struct has space for a 12-byte
+ * key plus length and hash, but if that's not big enough, it's instead a
+ * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then
+ * the key data.
+ */
+static int fscache_set_key(struct fscache_cookie *cookie,
+			   const void *index_key, size_t index_key_len)
+{
+	unsigned long long h;
+	u32 *buf;
+	int i;
+
+	cookie->key_len = index_key_len;
+
+	if (index_key_len > sizeof(cookie->inline_key)) {
+		buf = kzalloc(index_key_len, GFP_KERNEL);
+		if (!buf)
+			return -ENOMEM;
+		cookie->key = buf;
+	} else {
+		buf = (u32 *)cookie->inline_key;
+		buf[0] = 0;
+		buf[1] = 0;
+		buf[2] = 0;
+	}
+
+	memcpy(buf, index_key, index_key_len);
+
+	/* Calculate a hash and combine this with the length in the first word
+	 * or first half word
+	 */
+	h = (unsigned long)cookie->parent;
+	h += index_key_len + cookie->type;
+	for (i = 0; i < (index_key_len + sizeof(u32) - 1) / sizeof(u32); i++)
+		h += buf[i];
+
+	cookie->key_hash = h ^ (h >> 32);
+	return 0;
+}
+
+static long fscache_compare_cookie(const struct fscache_cookie *a,
+				   const struct fscache_cookie *b)
+{
+	const void *ka, *kb;
+
+	if (a->key_hash != b->key_hash)
+		return (long)a->key_hash - (long)b->key_hash;
+	if (a->parent != b->parent)
+		return (long)a->parent - (long)b->parent;
+	if (a->key_len != b->key_len)
+		return (long)a->key_len - (long)b->key_len;
+	if (a->type != b->type)
+		return (long)a->type - (long)b->type;
+
+	if (a->key_len <= sizeof(a->inline_key)) {
+		ka = &a->inline_key;
+		kb = &b->inline_key;
+	} else {
+		ka = a->key;
+		kb = b->key;
+	}
+	return memcmp(ka, kb, a->key_len);
+}
+
+/*
+ * Allocate a cookie.
+ */
+struct fscache_cookie *fscache_alloc_cookie(
+	struct fscache_cookie *parent,
+	const struct fscache_cookie_def *def,
+	const void *index_key, size_t index_key_len,
+	const void *aux_data, size_t aux_data_len,
+	void *netfs_data,
+	loff_t object_size)
+{
+	struct fscache_cookie *cookie;
+
+	/* allocate and initialise a cookie */
+	cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
+	if (!cookie)
+		return NULL;
+
+	cookie->key_len = index_key_len;
+	cookie->aux_len = aux_data_len;
+
+	if (fscache_set_key(cookie, index_key, index_key_len) < 0)
+		goto nomem;
+
+	if (cookie->aux_len <= sizeof(cookie->inline_aux)) {
+		memcpy(cookie->inline_aux, aux_data, cookie->aux_len);
+	} else {
+		cookie->aux = kmemdup(aux_data, cookie->aux_len, GFP_KERNEL);
+		if (!cookie->aux)
+			goto nomem;
+	}
+
+	atomic_set(&cookie->usage, 1);
+	atomic_set(&cookie->n_children, 0);
+
+	/* We keep the active count elevated until relinquishment to prevent an
+	 * attempt to wake up every time the object operations queue quiesces.
+	 */
+	atomic_set(&cookie->n_active, 1);
+
+	cookie->def		= def;
+	cookie->parent		= parent;
+	cookie->netfs_data	= netfs_data;
+	cookie->flags		= (1 << FSCACHE_COOKIE_NO_DATA_YET);
+	cookie->type		= def->type;
+
+	/* radix tree insertion won't use the preallocation pool unless it's
+	 * told it may not wait */
+	INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
+	return cookie;
+
+nomem:
+	fscache_free_cookie(cookie);
+	return NULL;
+}
+
+/*
+ * Attempt to insert the new cookie into the hash.  If there's a collision, we
+ * return the old cookie if it's not in use and an error otherwise.
+ */
+struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *candidate)
+{
+	struct fscache_cookie *cursor;
+	struct hlist_bl_head *h;
+	struct hlist_bl_node *p;
+	unsigned int bucket;
+
+	bucket = candidate->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1);
+	h = &fscache_cookie_hash[bucket];
+
+	hlist_bl_lock(h);
+	hlist_bl_for_each_entry(cursor, p, h, hash_link) {
+		if (fscache_compare_cookie(candidate, cursor) == 0)
+			goto collision;
+	}
+
+	__set_bit(FSCACHE_COOKIE_ACQUIRED, &candidate->flags);
+	fscache_cookie_get(candidate->parent, fscache_cookie_get_acquire_parent);
+	atomic_inc(&candidate->parent->n_children);
+	hlist_bl_add_head(&candidate->hash_link, h);
+	hlist_bl_unlock(h);
+	return candidate;
+
+collision:
+	if (test_and_set_bit(FSCACHE_COOKIE_ACQUIRED, &cursor->flags)) {
+		trace_fscache_cookie(cursor, fscache_cookie_collision,
+				     atomic_read(&cursor->usage));
+		pr_err("Duplicate cookie detected\n");
+		fscache_print_cookie(cursor, 'O');
+		fscache_print_cookie(candidate, 'N');
+		hlist_bl_unlock(h);
+		return NULL;
+	}
+
+	fscache_cookie_get(cursor, fscache_cookie_get_reacquire);
+	hlist_bl_unlock(h);
+	return cursor;
+}
+
+/*
  * request a cookie to represent an object (index, datafile, xattr, etc)
  * - parent specifies the parent object
  *   - the top level index cookie for each netfs is stored in the fscache_netfs
@@ -58,10 +264,13 @@ void fscache_cookie_init_once(void *_cookie)
 struct fscache_cookie *__fscache_acquire_cookie(
 	struct fscache_cookie *parent,
 	const struct fscache_cookie_def *def,
+	const void *index_key, size_t index_key_len,
+	const void *aux_data, size_t aux_data_len,
 	void *netfs_data,
+	loff_t object_size,
 	bool enable)
 {
-	struct fscache_cookie *cookie;
+	struct fscache_cookie *candidate, *cookie;
 
 	BUG_ON(!def);
 
@@ -69,6 +278,13 @@ struct fscache_cookie *__fscache_acquire_cookie(
 	       parent ? (char *) parent->def->name : "<no-parent>",
 	       def->name, netfs_data, enable);
 
+	if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255)
+		return NULL;
+	if (!aux_data || !aux_data_len) {
+		aux_data = NULL;
+		aux_data_len = 0;
+	}
+
 	fscache_stat(&fscache_n_acquires);
 
 	/* if there's no parent cookie, then we don't create one here either */
@@ -79,41 +295,31 @@ struct fscache_cookie *__fscache_acquire_cookie(
 	}
 
 	/* validate the definition */
-	BUG_ON(!def->get_key);
 	BUG_ON(!def->name[0]);
 
 	BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX &&
-	       parent->def->type != FSCACHE_COOKIE_TYPE_INDEX);
+	       parent->type != FSCACHE_COOKIE_TYPE_INDEX);
 
-	/* allocate and initialise a cookie */
-	cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
-	if (!cookie) {
+	candidate = fscache_alloc_cookie(parent, def,
+					 index_key, index_key_len,
+					 aux_data, aux_data_len,
+					 netfs_data, object_size);
+	if (!candidate) {
 		fscache_stat(&fscache_n_acquires_oom);
 		_leave(" [ENOMEM]");
 		return NULL;
 	}
 
-	atomic_set(&cookie->usage, 1);
-	atomic_set(&cookie->n_children, 0);
-
-	/* We keep the active count elevated until relinquishment to prevent an
-	 * attempt to wake up every time the object operations queue quiesces.
-	 */
-	atomic_set(&cookie->n_active, 1);
-
-	atomic_inc(&parent->usage);
-	atomic_inc(&parent->n_children);
+	cookie = fscache_hash_cookie(candidate);
+	if (!cookie) {
+		trace_fscache_cookie(candidate, fscache_cookie_discard, 1);
+		goto out;
+	}
 
-	cookie->def		= def;
-	cookie->parent		= parent;
-	cookie->netfs_data	= netfs_data;
-	cookie->flags		= (1 << FSCACHE_COOKIE_NO_DATA_YET);
+	if (cookie == candidate)
+		candidate = NULL;
 
-	/* radix tree insertion won't use the preallocation pool unless it's
-	 * told it may not wait */
-	INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
-
-	switch (cookie->def->type) {
+	switch (cookie->type) {
 	case FSCACHE_COOKIE_TYPE_INDEX:
 		fscache_stat(&fscache_n_cookie_index);
 		break;
@@ -125,16 +331,19 @@ struct fscache_cookie *__fscache_acquire_cookie(
 		break;
 	}
 
+	trace_fscache_acquire(cookie);
+
 	if (enable) {
 		/* if the object is an index then we need do nothing more here
 		 * - we create indices on disk when we need them as an index
 		 * may exist in multiple caches */
-		if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
-			if (fscache_acquire_non_index_cookie(cookie) == 0) {
+		if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) {
+			if (fscache_acquire_non_index_cookie(cookie, object_size) == 0) {
 				set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
 			} else {
 				atomic_dec(&parent->n_children);
-				__fscache_cookie_put(cookie);
+				fscache_cookie_put(cookie,
+						   fscache_cookie_put_acquire_nobufs);
 				fscache_stat(&fscache_n_acquires_nobufs);
 				_leave(" = NULL");
 				return NULL;
@@ -145,7 +354,9 @@ struct fscache_cookie *__fscache_acquire_cookie(
 	}
 
 	fscache_stat(&fscache_n_acquires_ok);
-	_leave(" = %p", cookie);
+
+out:
+	fscache_free_cookie(candidate);
 	return cookie;
 }
 EXPORT_SYMBOL(__fscache_acquire_cookie);
@@ -154,24 +365,30 @@ EXPORT_SYMBOL(__fscache_acquire_cookie);
  * Enable a cookie to permit it to accept new operations.
  */
 void __fscache_enable_cookie(struct fscache_cookie *cookie,
+			     const void *aux_data,
+			     loff_t object_size,
 			     bool (*can_enable)(void *data),
 			     void *data)
 {
 	_enter("%p", cookie);
 
+	trace_fscache_enable(cookie);
+
 	wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
 			 TASK_UNINTERRUPTIBLE);
 
+	fscache_update_aux(cookie, aux_data);
+
 	if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
 		goto out_unlock;
 
 	if (can_enable && !can_enable(data)) {
 		/* The netfs decided it didn't want to enable after all */
-	} else if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
+	} else if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX) {
 		/* Wait for outstanding disablement to complete */
 		__fscache_wait_on_invalidate(cookie);
 
-		if (fscache_acquire_non_index_cookie(cookie) == 0)
+		if (fscache_acquire_non_index_cookie(cookie, object_size) == 0)
 			set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
 	} else {
 		set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
@@ -188,11 +405,11 @@ EXPORT_SYMBOL(__fscache_enable_cookie);
  * - this must make sure the index chain is instantiated and instantiate the
  *   object representation too
  */
-static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
+static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie,
+					    loff_t object_size)
 {
 	struct fscache_object *object;
 	struct fscache_cache *cache;
-	uint64_t i_size;
 	int ret;
 
 	_enter("");
@@ -231,9 +448,6 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
 		return ret;
 	}
 
-	/* pass on how big the object we're caching is supposed to be */
-	cookie->def->get_attr(cookie->netfs_data, &i_size);
-
 	spin_lock(&cookie->lock);
 	if (hlist_empty(&cookie->backing_objects)) {
 		spin_unlock(&cookie->lock);
@@ -243,7 +457,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
 	object = hlist_entry(cookie->backing_objects.first,
 			     struct fscache_object, cookie_link);
 
-	fscache_set_store_limit(object, i_size);
+	fscache_set_store_limit(object, object_size);
 
 	/* initiate the process of looking up all the objects in the chain
 	 * (done by fscache_initialise_object()) */
@@ -318,7 +532,7 @@ static int fscache_alloc_object(struct fscache_cache *cache,
 	 * attached to the cookie */
 	if (fscache_attach_object(cookie, object) < 0) {
 		fscache_stat(&fscache_n_cop_put_object);
-		cache->ops->put_object(object);
+		cache->ops->put_object(object, fscache_obj_put_attach_fail);
 		fscache_stat_d(&fscache_n_cop_put_object);
 	}
 
@@ -338,7 +552,7 @@ object_already_extant:
 
 error_put:
 	fscache_stat(&fscache_n_cop_put_object);
-	cache->ops->put_object(object);
+	cache->ops->put_object(object, fscache_obj_put_alloc_fail);
 	fscache_stat_d(&fscache_n_cop_put_object);
 error:
 	_leave(" = %d", ret);
@@ -398,7 +612,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
 
 	/* attach to the cookie */
 	object->cookie = cookie;
-	atomic_inc(&cookie->usage);
+	fscache_cookie_get(cookie, fscache_cookie_get_attach_object);
 	hlist_add_head(&object->cookie_link, &cookie->backing_objects);
 
 	fscache_objlist_add(object);
@@ -426,10 +640,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie)
 	 * there, and if it's doing that, it may as well just retire the
 	 * cookie.
 	 */
-	ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
-
-	/* We will be updating the cookie too. */
-	BUG_ON(!cookie->def->get_aux);
+	ASSERTCMP(cookie->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
 
 	/* If there's an object, we tell the object state machine to handle the
 	 * invalidation on our behalf, otherwise there's nothing to do.
@@ -473,7 +684,7 @@ EXPORT_SYMBOL(__fscache_wait_on_invalidate);
 /*
  * update the index entries backing a cookie
  */
-void __fscache_update_cookie(struct fscache_cookie *cookie)
+void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data)
 {
 	struct fscache_object *object;
 
@@ -487,10 +698,10 @@ void __fscache_update_cookie(struct fscache_cookie *cookie)
 
 	_enter("{%s}", cookie->def->name);
 
-	BUG_ON(!cookie->def->get_aux);
-
 	spin_lock(&cookie->lock);
 
+	fscache_update_aux(cookie, aux_data);
+
 	if (fscache_cookie_enabled(cookie)) {
 		/* update the index entry on disk in each cache backing this
 		 * cookie.
@@ -509,13 +720,17 @@ EXPORT_SYMBOL(__fscache_update_cookie);
 /*
  * Disable a cookie to stop it from accepting new requests from the netfs.
  */
-void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
+void __fscache_disable_cookie(struct fscache_cookie *cookie,
+			      const void *aux_data,
+			      bool invalidate)
 {
 	struct fscache_object *object;
 	bool awaken = false;
 
 	_enter("%p,%u", cookie, invalidate);
 
+	trace_fscache_disable(cookie);
+
 	ASSERTCMP(atomic_read(&cookie->n_active), >, 0);
 
 	if (atomic_read(&cookie->n_children) != 0) {
@@ -526,6 +741,9 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
 
 	wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK,
 			 TASK_UNINTERRUPTIBLE);
+
+	fscache_update_aux(cookie, aux_data);
+
 	if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags))
 		goto out_unlock_enable;
 
@@ -563,7 +781,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
 	}
 
 	/* Make sure any pending writes are cancelled. */
-	if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
+	if (cookie->type != FSCACHE_COOKIE_TYPE_INDEX)
 		fscache_invalidate_writes(cookie);
 
 	/* Reset the cookie state if it wasn't relinquished */
@@ -585,7 +803,9 @@ EXPORT_SYMBOL(__fscache_disable_cookie);
  * - all dependents of this cookie must have already been unregistered
  *   (indices/files/pages)
  */
-void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
+void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
+				 const void *aux_data,
+				 bool retire)
 {
 	fscache_stat(&fscache_n_relinquishes);
 	if (retire)
@@ -601,15 +821,18 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
 	       cookie, cookie->def->name, cookie->netfs_data,
 	       atomic_read(&cookie->n_active), retire);
 
+	trace_fscache_relinquish(cookie, retire);
+
 	/* No further netfs-accessing operations on this cookie permitted */
-	set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags);
+	if (test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags))
+		BUG();
 
-	__fscache_disable_cookie(cookie, retire);
+	__fscache_disable_cookie(cookie, aux_data, retire);
 
 	/* Clear pointers back to the netfs */
 	cookie->netfs_data	= NULL;
 	cookie->def		= NULL;
-	BUG_ON(cookie->stores.rnode);
+	BUG_ON(!radix_tree_empty(&cookie->stores));
 
 	if (cookie->parent) {
 		ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0);
@@ -619,35 +842,54 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
 
 	/* Dispose of the netfs's link to the cookie */
 	ASSERTCMP(atomic_read(&cookie->usage), >, 0);
-	fscache_cookie_put(cookie);
+	fscache_cookie_put(cookie, fscache_cookie_put_relinquish);
 
 	_leave("");
 }
 EXPORT_SYMBOL(__fscache_relinquish_cookie);
 
 /*
- * destroy a cookie
+ * Remove a cookie from the hash table.
  */
-void __fscache_cookie_put(struct fscache_cookie *cookie)
+static void fscache_unhash_cookie(struct fscache_cookie *cookie)
+{
+	struct hlist_bl_head *h;
+	unsigned int bucket;
+
+	bucket = cookie->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1);
+	h = &fscache_cookie_hash[bucket];
+
+	hlist_bl_lock(h);
+	hlist_bl_del(&cookie->hash_link);
+	hlist_bl_unlock(h);
+}
+
+/*
+ * Drop a reference to a cookie.
+ */
+void fscache_cookie_put(struct fscache_cookie *cookie,
+			enum fscache_cookie_trace where)
 {
 	struct fscache_cookie *parent;
+	int usage;
 
 	_enter("%p", cookie);
 
-	for (;;) {
-		_debug("FREE COOKIE %p", cookie);
-		parent = cookie->parent;
-		BUG_ON(!hlist_empty(&cookie->backing_objects));
-		kmem_cache_free(fscache_cookie_jar, cookie);
+	do {
+		usage = atomic_dec_return(&cookie->usage);
+		trace_fscache_cookie(cookie, where, usage);
 
-		if (!parent)
-			break;
+		if (usage > 0)
+			return;
+		BUG_ON(usage < 0);
+
+		parent = cookie->parent;
+		fscache_unhash_cookie(cookie);
+		fscache_free_cookie(cookie);
 
 		cookie = parent;
-		BUG_ON(atomic_read(&cookie->usage) <= 0);
-		if (!atomic_dec_and_test(&cookie->usage))
-			break;
-	}
+		where = fscache_cookie_put_parent;
+	} while (cookie);
 
 	_leave("");
 }
@@ -657,7 +899,8 @@ void __fscache_cookie_put(struct fscache_cookie *cookie)
  *
  * NOTE: it only serves no-index type
  */
-int __fscache_check_consistency(struct fscache_cookie *cookie)
+int __fscache_check_consistency(struct fscache_cookie *cookie,
+				const void *aux_data)
 {
 	struct fscache_operation *op;
 	struct fscache_object *object;
@@ -666,7 +909,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
 
 	_enter("%p,", cookie);
 
-	ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
+	ASSERTCMP(cookie->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
 
 	if (fscache_wait_for_deferred_lookup(cookie) < 0)
 		return -ERESTARTSYS;
@@ -678,13 +921,16 @@ int __fscache_check_consistency(struct fscache_cookie *cookie)
 	if (!op)
 		return -ENOMEM;
 
-	fscache_operation_init(op, NULL, NULL, NULL);
+	fscache_operation_init(cookie, op, NULL, NULL, NULL);
 	op->flags = FSCACHE_OP_MYTHREAD |
 		(1 << FSCACHE_OP_WAITING) |
 		(1 << FSCACHE_OP_UNUSE_COOKIE);
+	trace_fscache_page_op(cookie, NULL, op, fscache_page_op_check_consistency);
 
 	spin_lock(&cookie->lock);
 
+	fscache_update_aux(cookie, aux_data);
+
 	if (!fscache_cookie_enabled(cookie) ||
 	    hlist_empty(&cookie->backing_objects))
 		goto inconsistent;
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
index 5a117df2a9ef..aa46e48d8c75 100644
--- a/fs/fscache/fsdef.c
+++ b/fs/fscache/fsdef.c
@@ -13,16 +13,11 @@
 #include <linux/module.h>
 #include "internal.h"
 
-static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax);
-
-static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax);
-
 static
 enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data,
 						    const void *data,
-						    uint16_t datalen);
+						    uint16_t datalen,
+						    loff_t object_size);
 
 /*
  * The root index is owned by FS-Cache itself.
@@ -60,6 +55,7 @@ struct fscache_cookie fscache_fsdef_index = {
 	.backing_objects = HLIST_HEAD_INIT,
 	.def		= &fscache_fsdef_index_def,
 	.flags		= 1 << FSCACHE_COOKIE_ENABLED,
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
 };
 EXPORT_SYMBOL(fscache_fsdef_index);
 
@@ -71,59 +67,18 @@ EXPORT_SYMBOL(fscache_fsdef_index);
 struct fscache_cookie_def fscache_fsdef_netfs_def = {
 	.name		= "FSDEF.netfs",
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= fscache_fsdef_netfs_get_key,
-	.get_aux	= fscache_fsdef_netfs_get_aux,
 	.check_aux	= fscache_fsdef_netfs_check_aux,
 };
 
 /*
- * get the key data for an FSDEF index record - this is the name of the netfs
- * for which this entry is created
- */
-static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax)
-{
-	const struct fscache_netfs *netfs = cookie_netfs_data;
-	unsigned klen;
-
-	_enter("{%s.%u},", netfs->name, netfs->version);
-
-	klen = strlen(netfs->name);
-	if (klen > bufmax)
-		return 0;
-
-	memcpy(buffer, netfs->name, klen);
-	return klen;
-}
-
-/*
- * get the auxiliary data for an FSDEF index record - this is the index
- * structure version number of the netfs for which this version is created
- */
-static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
-					    void *buffer, uint16_t bufmax)
-{
-	const struct fscache_netfs *netfs = cookie_netfs_data;
-	unsigned dlen;
-
-	_enter("{%s.%u},", netfs->name, netfs->version);
-
-	dlen = sizeof(uint32_t);
-	if (dlen > bufmax)
-		return 0;
-
-	memcpy(buffer, &netfs->version, dlen);
-	return dlen;
-}
-
-/*
  * check that the index structure version number stored in the auxiliary data
  * matches the one the netfs gave us
  */
 static enum fscache_checkaux fscache_fsdef_netfs_check_aux(
 	void *cookie_netfs_data,
 	const void *data,
-	uint16_t datalen)
+	uint16_t datalen,
+	loff_t object_size)
 {
 	struct fscache_netfs *netfs = cookie_netfs_data;
 	uint32_t version;
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 0ff4b49a0037..500650f938fe 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -29,6 +29,7 @@
 #define pr_fmt(fmt) "FS-Cache: " fmt
 
 #include <linux/fscache-cache.h>
+#include <trace/events/fscache.h>
 #include <linux/sched.h>
 
 #define FSCACHE_MIN_THREADS	4
@@ -48,8 +49,16 @@ extern struct fscache_cache *fscache_select_cache_for_object(
  */
 extern struct kmem_cache *fscache_cookie_jar;
 
+extern void fscache_free_cookie(struct fscache_cookie *);
 extern void fscache_cookie_init_once(void *);
-extern void __fscache_cookie_put(struct fscache_cookie *);
+extern struct fscache_cookie *fscache_alloc_cookie(struct fscache_cookie *,
+						   const struct fscache_cookie_def *,
+						   const void *, size_t,
+						   const void *, size_t,
+						   void *, loff_t);
+extern struct fscache_cookie *fscache_hash_cookie(struct fscache_cookie *);
+extern void fscache_cookie_put(struct fscache_cookie *,
+			       enum fscache_cookie_trace);
 
 /*
  * fsdef.c
@@ -311,14 +320,12 @@ static inline void fscache_raise_event(struct fscache_object *object,
 		fscache_enqueue_object(object);
 }
 
-/*
- * drop a reference to a cookie
- */
-static inline void fscache_cookie_put(struct fscache_cookie *cookie)
+static inline void fscache_cookie_get(struct fscache_cookie *cookie,
+				      enum fscache_cookie_trace where)
 {
-	BUG_ON(atomic_read(&cookie->usage) <= 0);
-	if (atomic_dec_and_test(&cookie->usage))
-		__fscache_cookie_put(cookie);
+	int usage = atomic_inc_return(&cookie->usage);
+
+	trace_fscache_cookie(cookie, where, usage);
 }
 
 /*
@@ -342,6 +349,27 @@ void fscache_put_context(struct fscache_cookie *cookie, void *context)
 		cookie->def->put_context(cookie->netfs_data, context);
 }
 
+/*
+ * Update the auxiliary data on a cookie.
+ */
+static inline
+void fscache_update_aux(struct fscache_cookie *cookie, const void *aux_data)
+{
+	void *p;
+
+	if (!aux_data)
+		return;
+	if (cookie->aux_len <= sizeof(cookie->inline_aux))
+		p = cookie->inline_aux;
+	else
+		p = cookie->aux;
+
+	if (memcmp(p, aux_data, cookie->aux_len) != 0) {
+		memcpy(p, aux_data, cookie->aux_len);
+		set_bit(FSCACHE_COOKIE_AUX_UPDATED, &cookie->flags);
+	}
+}
+
 /*****************************************************************************/
 /*
  * debug tracing
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 249968dcbf5c..7dce110bf17d 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -16,6 +16,7 @@
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#define CREATE_TRACE_POINTS
 #include "internal.h"
 
 MODULE_DESCRIPTION("FS Cache Manager");
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index a8aa00be4444..c2f605483cc5 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -14,69 +14,51 @@
 #include <linux/slab.h>
 #include "internal.h"
 
-static LIST_HEAD(fscache_netfs_list);
-
 /*
  * register a network filesystem for caching
  */
 int __fscache_register_netfs(struct fscache_netfs *netfs)
 {
-	struct fscache_netfs *ptr;
-	struct fscache_cookie *cookie;
-	int ret;
+	struct fscache_cookie *candidate, *cookie;
 
 	_enter("{%s}", netfs->name);
 
-	INIT_LIST_HEAD(&netfs->link);
-
 	/* allocate a cookie for the primary index */
-	cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
-
-	if (!cookie) {
+	candidate = fscache_alloc_cookie(&fscache_fsdef_index,
+					 &fscache_fsdef_netfs_def,
+					 netfs->name, strlen(netfs->name),
+					 &netfs->version, sizeof(netfs->version),
+					 netfs, 0);
+	if (!candidate) {
 		_leave(" = -ENOMEM");
 		return -ENOMEM;
 	}
 
-	/* initialise the primary index cookie */
-	atomic_set(&cookie->usage, 1);
-	atomic_set(&cookie->n_children, 0);
-	atomic_set(&cookie->n_active, 1);
-
-	cookie->def		= &fscache_fsdef_netfs_def;
-	cookie->parent		= &fscache_fsdef_index;
-	cookie->netfs_data	= netfs;
-	cookie->flags		= 1 << FSCACHE_COOKIE_ENABLED;
-
-	spin_lock_init(&cookie->lock);
-	spin_lock_init(&cookie->stores_lock);
-	INIT_HLIST_HEAD(&cookie->backing_objects);
+	candidate->flags = 1 << FSCACHE_COOKIE_ENABLED;
 
 	/* check the netfs type is not already present */
-	down_write(&fscache_addremove_sem);
-
-	ret = -EEXIST;
-	list_for_each_entry(ptr, &fscache_netfs_list, link) {
-		if (strcmp(ptr->name, netfs->name) == 0)
-			goto already_registered;
+	cookie = fscache_hash_cookie(candidate);
+	if (!cookie)
+		goto already_registered;
+	if (cookie != candidate) {
+		trace_fscache_cookie(candidate, fscache_cookie_discard, 1);
+		fscache_free_cookie(candidate);
 	}
 
-	atomic_inc(&cookie->parent->usage);
+	fscache_cookie_get(cookie->parent, fscache_cookie_get_register_netfs);
 	atomic_inc(&cookie->parent->n_children);
 
 	netfs->primary_index = cookie;
-	list_add(&netfs->link, &fscache_netfs_list);
-	ret = 0;
 
 	pr_notice("Netfs '%s' registered for caching\n", netfs->name);
+	trace_fscache_netfs(netfs);
+	_leave(" = 0");
+	return 0;
 
 already_registered:
-	up_write(&fscache_addremove_sem);
-
-	if (ret < 0)
-		kmem_cache_free(fscache_cookie_jar, cookie);
-
-	_leave(" = %d", ret);
-	return ret;
+	fscache_cookie_put(candidate, fscache_cookie_put_dup_netfs);
+	_leave(" = -EEXIST");
+	return -EEXIST;
 }
 EXPORT_SYMBOL(__fscache_register_netfs);
 
@@ -88,15 +70,8 @@ void __fscache_unregister_netfs(struct fscache_netfs *netfs)
 {
 	_enter("{%s.%u}", netfs->name, netfs->version);
 
-	down_write(&fscache_addremove_sem);
-
-	list_del(&netfs->link);
-	fscache_relinquish_cookie(netfs->primary_index, 0);
-
-	up_write(&fscache_addremove_sem);
-
-	pr_notice("Netfs '%s' unregistered from caching\n",
-		  netfs->name);
+	fscache_relinquish_cookie(netfs->primary_index, NULL, false);
+	pr_notice("Netfs '%s' unregistered from caching\n", netfs->name);
 
 	_leave("");
 }
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
index 0438d4cd91ef..43e6e28c164f 100644
--- a/fs/fscache/object-list.c
+++ b/fs/fscache/object-list.c
@@ -36,8 +36,6 @@ struct fscache_objlist_data {
 #define FSCACHE_OBJLIST_CONFIG_NOEVENTS	0x00000800	/* show objects without no events */
 #define FSCACHE_OBJLIST_CONFIG_WORK	0x00001000	/* show objects with work */
 #define FSCACHE_OBJLIST_CONFIG_NOWORK	0x00002000	/* show objects without work */
-
-	u8		buf[512];	/* key and aux data buffer */
 };
 
 /*
@@ -170,7 +168,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
 	struct fscache_cookie *cookie;
 	unsigned long config = data->config;
 	char _type[3], *type;
-	u8 *buf = data->buf, *p;
+	u8 *p;
 
 	if ((unsigned long) v == 1) {
 		seq_puts(m, "OBJECT   PARENT   STAT CHLDN OPS OOP IPR EX READS"
@@ -254,7 +252,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
 	if (fscache_use_cookie(obj)) {
 		uint16_t keylen = 0, auxlen = 0;
 
-		switch (cookie->def->type) {
+		switch (cookie->type) {
 		case 0:
 			type = "IX";
 			break;
@@ -263,7 +261,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
 			break;
 		default:
 			snprintf(_type, sizeof(_type), "%02u",
-				 cookie->def->type);
+				 cookie->type);
 			type = _type;
 			break;
 		}
@@ -274,30 +272,30 @@ static int fscache_objlist_show(struct seq_file *m, void *v)
 			   cookie->flags,
 			   cookie->netfs_data);
 
-		if (cookie->def->get_key &&
-		    config & FSCACHE_OBJLIST_CONFIG_KEY)
-			keylen = cookie->def->get_key(cookie->netfs_data,
-						      buf, 400);
+		if (config & FSCACHE_OBJLIST_CONFIG_KEY)
+			keylen = cookie->key_len;
 
-		if (cookie->def->get_aux &&
-		    config & FSCACHE_OBJLIST_CONFIG_AUX)
-			auxlen = cookie->def->get_aux(cookie->netfs_data,
-						      buf + keylen, 512 - keylen);
-		fscache_unuse_cookie(obj);
+		if (config & FSCACHE_OBJLIST_CONFIG_AUX)
+			auxlen = cookie->aux_len;
 
 		if (keylen > 0 || auxlen > 0) {
 			seq_puts(m, " ");
-			for (p = buf; keylen > 0; keylen--)
+			p = keylen <= sizeof(cookie->inline_key) ?
+				cookie->inline_key : cookie->key;
+			for (; keylen > 0; keylen--)
 				seq_printf(m, "%02x", *p++);
 			if (auxlen > 0) {
 				if (config & FSCACHE_OBJLIST_CONFIG_KEY)
 					seq_puts(m, ", ");
+				p = auxlen <= sizeof(cookie->inline_aux) ?
+					cookie->inline_aux : cookie->aux;
 				for (; auxlen > 0; auxlen--)
 					seq_printf(m, "%02x", *p++);
 			}
 		}
 
 		seq_puts(m, "\n");
+		fscache_unuse_cookie(obj);
 	} else {
 		seq_puts(m, "<no_netfs>\n");
 	}
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 7a182c87f378..20e0d0a4dc8c 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -138,10 +138,13 @@ static const struct fscache_transition fscache_osm_run_oob[] = {
 	   { 0, NULL }
 };
 
-static int  fscache_get_object(struct fscache_object *);
-static void fscache_put_object(struct fscache_object *);
+static int  fscache_get_object(struct fscache_object *,
+			       enum fscache_obj_ref_trace);
+static void fscache_put_object(struct fscache_object *,
+			       enum fscache_obj_ref_trace);
 static bool fscache_enqueue_dependents(struct fscache_object *, int);
 static void fscache_dequeue_object(struct fscache_object *);
+static void fscache_update_aux_data(struct fscache_object *);
 
 /*
  * we need to notify the parent when an op completes that we had outstanding
@@ -170,6 +173,7 @@ static void fscache_object_sm_dispatcher(struct fscache_object *object)
 	const struct fscache_transition *t;
 	const struct fscache_state *state, *new_state;
 	unsigned long events, event_mask;
+	bool oob;
 	int event = -1;
 
 	ASSERT(object != NULL);
@@ -188,6 +192,7 @@ restart_masked:
 	if (events & object->oob_event_mask) {
 		_debug("{OBJ%x} oob %lx",
 		       object->debug_id, events & object->oob_event_mask);
+		oob = true;
 		for (t = object->oob_table; t->events; t++) {
 			if (events & t->events) {
 				state = t->transit_to;
@@ -199,6 +204,7 @@ restart_masked:
 			}
 		}
 	}
+	oob = false;
 
 	/* Wait states are just transition tables */
 	if (!state->work) {
@@ -207,6 +213,8 @@ restart_masked:
 				if (events & t->events) {
 					new_state = t->transit_to;
 					event = fls(events & t->events) - 1;
+					trace_fscache_osm(object, state,
+							  true, false, event);
 					clear_bit(event, &object->events);
 					_debug("{OBJ%x} ev %d: %s -> %s",
 					       object->debug_id, event,
@@ -226,6 +234,7 @@ restart_masked:
 execute_work_state:
 	_debug("{OBJ%x} exec %s", object->debug_id, state->name);
 
+	trace_fscache_osm(object, state, false, oob, event);
 	new_state = state->work(object, event);
 	event = -1;
 	if (new_state == NO_TRANSIT) {
@@ -279,7 +288,7 @@ static void fscache_object_work_func(struct work_struct *work)
 	start = jiffies;
 	fscache_object_sm_dispatcher(object);
 	fscache_hist(fscache_objs_histogram, start);
-	fscache_put_object(object);
+	fscache_put_object(object, fscache_obj_put_work);
 }
 
 /**
@@ -397,7 +406,7 @@ static const struct fscache_state *fscache_initialise_object(struct fscache_obje
 	fscache_stat(&fscache_n_cop_grab_object);
 	success = false;
 	if (fscache_object_is_live(parent) &&
-	    object->cache->ops->grab_object(object)) {
+	    object->cache->ops->grab_object(object, fscache_obj_get_add_to_deps)) {
 		list_add(&object->dep_link, &parent->dependents);
 		success = true;
 	}
@@ -703,6 +712,11 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
 	ASSERT(cookie != NULL);
 	ASSERT(!hlist_unhashed(&object->cookie_link));
 
+	if (test_bit(FSCACHE_COOKIE_AUX_UPDATED, &cookie->flags)) {
+		_debug("final update");
+		fscache_update_aux_data(object);
+	}
+
 	/* Make sure the cookie no longer points here and that the netfs isn't
 	 * waiting for us.
 	 */
@@ -745,7 +759,7 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
 	}
 
 	/* this just shifts the object release to the work processor */
-	fscache_put_object(object);
+	fscache_put_object(object, fscache_obj_put_drop_obj);
 	fscache_stat(&fscache_n_object_dead);
 
 	_leave("");
@@ -755,12 +769,13 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
 /*
  * get a ref on an object
  */
-static int fscache_get_object(struct fscache_object *object)
+static int fscache_get_object(struct fscache_object *object,
+			      enum fscache_obj_ref_trace why)
 {
 	int ret;
 
 	fscache_stat(&fscache_n_cop_grab_object);
-	ret = object->cache->ops->grab_object(object) ? 0 : -EAGAIN;
+	ret = object->cache->ops->grab_object(object, why) ? 0 : -EAGAIN;
 	fscache_stat_d(&fscache_n_cop_grab_object);
 	return ret;
 }
@@ -768,10 +783,11 @@ static int fscache_get_object(struct fscache_object *object)
 /*
  * Discard a ref on an object
  */
-static void fscache_put_object(struct fscache_object *object)
+static void fscache_put_object(struct fscache_object *object,
+			       enum fscache_obj_ref_trace why)
 {
 	fscache_stat(&fscache_n_cop_put_object);
-	object->cache->ops->put_object(object);
+	object->cache->ops->put_object(object, why);
 	fscache_stat_d(&fscache_n_cop_put_object);
 }
 
@@ -786,7 +802,7 @@ void fscache_object_destroy(struct fscache_object *object)
 	fscache_objlist_remove(object);
 
 	/* We can get rid of the cookie now */
-	fscache_cookie_put(object->cookie);
+	fscache_cookie_put(object->cookie, fscache_cookie_put_object);
 	object->cookie = NULL;
 }
 EXPORT_SYMBOL(fscache_object_destroy);
@@ -798,7 +814,7 @@ void fscache_enqueue_object(struct fscache_object *object)
 {
 	_enter("{OBJ%x}", object->debug_id);
 
-	if (fscache_get_object(object) >= 0) {
+	if (fscache_get_object(object, fscache_obj_get_queue) >= 0) {
 		wait_queue_head_t *cong_wq =
 			&get_cpu_var(fscache_object_cong_wait);
 
@@ -806,7 +822,7 @@ void fscache_enqueue_object(struct fscache_object *object)
 			if (fscache_object_congested())
 				wake_up(cong_wq);
 		} else
-			fscache_put_object(object);
+			fscache_put_object(object, fscache_obj_put_queue);
 
 		put_cpu_var(fscache_object_cong_wait);
 	}
@@ -866,7 +882,7 @@ static bool fscache_enqueue_dependents(struct fscache_object *object, int event)
 		list_del_init(&dep->dep_link);
 
 		fscache_raise_event(dep, event);
-		fscache_put_object(dep);
+		fscache_put_object(dep, fscache_obj_put_enq_dep);
 
 		if (!list_empty(&object->dependents) && need_resched()) {
 			ret = false;
@@ -906,7 +922,8 @@ static void fscache_dequeue_object(struct fscache_object *object)
  * and creation).
  */
 enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
-					const void *data, uint16_t datalen)
+					const void *data, uint16_t datalen,
+					loff_t object_size)
 {
 	enum fscache_checkaux result;
 
@@ -916,7 +933,7 @@ enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
 	}
 
 	result = object->cookie->def->check_aux(object->cookie->netfs_data,
-						data, datalen);
+						data, datalen, object_size);
 	switch (result) {
 		/* entry okay as is */
 	case FSCACHE_CHECKAUX_OKAY:
@@ -956,7 +973,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
 	 * retire the object instead.
 	 */
 	if (!fscache_use_cookie(object)) {
-		ASSERT(object->cookie->stores.rnode == NULL);
+		ASSERT(radix_tree_empty(&object->cookie->stores));
 		set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
 		_leave(" [no cookie]");
 		return transit_to(KILL_OBJECT);
@@ -972,11 +989,12 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
 	if (!op)
 		goto nomem;
 
-	fscache_operation_init(op, object->cache->ops->invalidate_object,
+	fscache_operation_init(cookie, op, object->cache->ops->invalidate_object,
 			       NULL, NULL);
 	op->flags = FSCACHE_OP_ASYNC |
 		(1 << FSCACHE_OP_EXCLUSIVE) |
 		(1 << FSCACHE_OP_UNUSE_COOKIE);
+	trace_fscache_page_op(cookie, NULL, op, fscache_page_op_invalidate);
 
 	spin_lock(&cookie->lock);
 	if (fscache_submit_exclusive_op(object, op) < 0)
@@ -1026,6 +1044,17 @@ static const struct fscache_state *fscache_invalidate_object(struct fscache_obje
 }
 
 /*
+ * Update auxiliary data.
+ */
+static void fscache_update_aux_data(struct fscache_object *object)
+{
+	fscache_stat(&fscache_n_updates_run);
+	fscache_stat(&fscache_n_cop_update_object);
+	object->cache->ops->update_object(object);
+	fscache_stat_d(&fscache_n_cop_update_object);
+}
+
+/*
  * Asynchronously update an object.
  */
 static const struct fscache_state *fscache_update_object(struct fscache_object *object,
@@ -1033,10 +1062,7 @@ static const struct fscache_state *fscache_update_object(struct fscache_object *
 {
 	_enter("{OBJ%x},%d", object->debug_id, event);
 
-	fscache_stat(&fscache_n_updates_run);
-	fscache_stat(&fscache_n_cop_update_object);
-	object->cache->ops->update_object(object);
-	fscache_stat_d(&fscache_n_cop_update_object);
+	fscache_update_aux_data(object);
 
 	_leave("");
 	return transit_to(WAIT_FOR_CMD);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index de67745e1cd7..e30c5975ea58 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -32,7 +32,8 @@ static void fscache_operation_dummy_cancel(struct fscache_operation *op)
  * Do basic initialisation of an operation.  The caller must still set flags,
  * object and processor if needed.
  */
-void fscache_operation_init(struct fscache_operation *op,
+void fscache_operation_init(struct fscache_cookie *cookie,
+			    struct fscache_operation *op,
 			    fscache_operation_processor_t processor,
 			    fscache_operation_cancel_t cancel,
 			    fscache_operation_release_t release)
@@ -46,6 +47,7 @@ void fscache_operation_init(struct fscache_operation *op,
 	op->release = release;
 	INIT_LIST_HEAD(&op->pend_link);
 	fscache_stat(&fscache_n_op_initialised);
+	trace_fscache_op(cookie, op, fscache_op_init);
 }
 EXPORT_SYMBOL(fscache_operation_init);
 
@@ -59,6 +61,8 @@ EXPORT_SYMBOL(fscache_operation_init);
  */
 void fscache_enqueue_operation(struct fscache_operation *op)
 {
+	struct fscache_cookie *cookie = op->object->cookie;
+	
 	_enter("{OBJ%x OP%x,%u}",
 	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
 
@@ -71,12 +75,14 @@ void fscache_enqueue_operation(struct fscache_operation *op)
 	fscache_stat(&fscache_n_op_enqueue);
 	switch (op->flags & FSCACHE_OP_TYPE) {
 	case FSCACHE_OP_ASYNC:
+		trace_fscache_op(cookie, op, fscache_op_enqueue_async);
 		_debug("queue async");
 		atomic_inc(&op->usage);
 		if (!queue_work(fscache_op_wq, &op->work))
 			fscache_put_operation(op);
 		break;
 	case FSCACHE_OP_MYTHREAD:
+		trace_fscache_op(cookie, op, fscache_op_enqueue_mythread);
 		_debug("queue for caller's attention");
 		break;
 	default:
@@ -101,6 +107,8 @@ static void fscache_run_op(struct fscache_object *object,
 		wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
 	if (op->processor)
 		fscache_enqueue_operation(op);
+	else
+		trace_fscache_op(object->cookie, op, fscache_op_run);
 	fscache_stat(&fscache_n_op_run);
 }
 
@@ -155,6 +163,8 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
 
 	_enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
 
+	trace_fscache_op(object->cookie, op, fscache_op_submit_ex);
+
 	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
@@ -240,6 +250,8 @@ int fscache_submit_op(struct fscache_object *object,
 	_enter("{OBJ%x OP%x},{%u}",
 	       object->debug_id, op->debug_id, atomic_read(&op->usage));
 
+	trace_fscache_op(object->cookie, op, fscache_op_submit);
+
 	ASSERTCMP(op->state, ==, FSCACHE_OP_ST_INITIALISED);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
 
@@ -357,6 +369,8 @@ int fscache_cancel_op(struct fscache_operation *op,
 
 	_enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id);
 
+	trace_fscache_op(object->cookie, op, fscache_op_cancel);
+
 	ASSERTCMP(op->state, >=, FSCACHE_OP_ST_PENDING);
 	ASSERTCMP(op->state, !=, FSCACHE_OP_ST_CANCELLED);
 	ASSERTCMP(atomic_read(&op->usage), >, 0);
@@ -419,6 +433,8 @@ void fscache_cancel_all_ops(struct fscache_object *object)
 		fscache_stat(&fscache_n_op_cancelled);
 		list_del_init(&op->pend_link);
 
+		trace_fscache_op(object->cookie, op, fscache_op_cancel_all);
+
 		ASSERTCMP(op->state, ==, FSCACHE_OP_ST_PENDING);
 		op->cancel(op);
 		op->state = FSCACHE_OP_ST_CANCELLED;
@@ -454,9 +470,11 @@ void fscache_op_complete(struct fscache_operation *op, bool cancelled)
 	spin_lock(&object->lock);
 
 	if (!cancelled) {
+		trace_fscache_op(object->cookie, op, fscache_op_completed);
 		op->state = FSCACHE_OP_ST_COMPLETE;
 	} else {
 		op->cancel(op);
+		trace_fscache_op(object->cookie, op, fscache_op_cancelled);
 		op->state = FSCACHE_OP_ST_CANCELLED;
 	}
 
@@ -488,6 +506,8 @@ void fscache_put_operation(struct fscache_operation *op)
 	if (!atomic_dec_and_test(&op->usage))
 		return;
 
+	trace_fscache_op(op->object ? op->object->cookie : NULL, op, fscache_op_put);
+
 	_debug("PUT OP");
 	ASSERTIFCMP(op->state != FSCACHE_OP_ST_INITIALISED &&
 		    op->state != FSCACHE_OP_ST_COMPLETE,
@@ -563,6 +583,8 @@ void fscache_operation_gc(struct work_struct *work)
 		spin_unlock(&cache->op_gc_list_lock);
 
 		object = op->object;
+		trace_fscache_op(object->cookie, op, fscache_op_gc);
+
 		spin_lock(&object->lock);
 
 		_debug("GC DEFERRED REL OBJ%x OP%x",
@@ -601,6 +623,8 @@ void fscache_op_work_func(struct work_struct *work)
 	_enter("{OBJ%x OP%x,%d}",
 	       op->object->debug_id, op->debug_id, atomic_read(&op->usage));
 
+	trace_fscache_op(op->object->cookie, op, fscache_op_work);
+
 	ASSERT(op->processor != NULL);
 	start = jiffies;
 	op->processor(op);
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 961029e04027..111349f67d98 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -27,6 +27,7 @@ bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page
 	rcu_read_lock();
 	val = radix_tree_lookup(&cookie->stores, page->index);
 	rcu_read_unlock();
+	trace_fscache_check_page(cookie, page, val, 0);
 
 	return val != NULL;
 }
@@ -39,6 +40,8 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa
 {
 	wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0);
 
+	trace_fscache_page(cookie, page, fscache_page_write_wait);
+
 	wait_event(*wq, !__fscache_check_page_write(cookie, page));
 }
 EXPORT_SYMBOL(__fscache_wait_on_page_write);
@@ -69,6 +72,8 @@ bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
 
 	_enter("%p,%p,%x", cookie, page, gfp);
 
+	trace_fscache_page(cookie, page, fscache_page_maybe_release);
+
 try_again:
 	rcu_read_lock();
 	val = radix_tree_lookup(&cookie->stores, page->index);
@@ -101,6 +106,7 @@ try_again:
 	}
 
 	xpage = radix_tree_delete(&cookie->stores, page->index);
+	trace_fscache_page(cookie, page, fscache_page_radix_delete);
 	spin_unlock(&cookie->stores_lock);
 
 	if (xpage) {
@@ -112,6 +118,7 @@ try_again:
 	}
 
 	wake_up_bit(&cookie->flags, 0);
+	trace_fscache_wake_cookie(cookie);
 	if (xpage)
 		put_page(xpage);
 	__fscache_uncache_page(cookie, page);
@@ -144,7 +151,7 @@ static void fscache_end_page_write(struct fscache_object *object,
 				   struct page *page)
 {
 	struct fscache_cookie *cookie;
-	struct page *xpage = NULL;
+	struct page *xpage = NULL, *val;
 
 	spin_lock(&object->lock);
 	cookie = object->cookie;
@@ -154,13 +161,24 @@ static void fscache_end_page_write(struct fscache_object *object,
 		spin_lock(&cookie->stores_lock);
 		radix_tree_tag_clear(&cookie->stores, page->index,
 				     FSCACHE_COOKIE_STORING_TAG);
+		trace_fscache_page(cookie, page, fscache_page_radix_clear_store);
 		if (!radix_tree_tag_get(&cookie->stores, page->index,
 					FSCACHE_COOKIE_PENDING_TAG)) {
 			fscache_stat(&fscache_n_store_radix_deletes);
 			xpage = radix_tree_delete(&cookie->stores, page->index);
+			trace_fscache_page(cookie, page, fscache_page_radix_delete);
+			trace_fscache_page(cookie, page, fscache_page_write_end);
+
+			val = radix_tree_lookup(&cookie->stores, page->index);
+			trace_fscache_check_page(cookie, page, val, 1);
+		} else {
+			trace_fscache_page(cookie, page, fscache_page_write_end_pend);
 		}
 		spin_unlock(&cookie->stores_lock);
 		wake_up_bit(&cookie->flags, 0);
+		trace_fscache_wake_cookie(cookie);
+	} else {
+		trace_fscache_page(cookie, page, fscache_page_write_end_noc);
 	}
 	spin_unlock(&object->lock);
 	if (xpage)
@@ -185,9 +203,11 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
 		fscache_stat_d(&fscache_n_cop_attr_changed);
 		if (ret < 0)
 			fscache_abort_object(object);
+		fscache_op_complete(op, ret < 0);
+	} else {
+		fscache_op_complete(op, true);
 	}
 
-	fscache_op_complete(op, true);
 	_leave("");
 }
 
@@ -213,7 +233,8 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
 		return -ENOMEM;
 	}
 
-	fscache_operation_init(op, fscache_attr_changed_op, NULL, NULL);
+	fscache_operation_init(cookie, op, fscache_attr_changed_op, NULL, NULL);
+	trace_fscache_page_op(cookie, NULL, op, fscache_page_op_attr_changed);
 	op->flags = FSCACHE_OP_ASYNC |
 		(1 << FSCACHE_OP_EXCLUSIVE) |
 		(1 << FSCACHE_OP_UNUSE_COOKIE);
@@ -297,7 +318,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
 		return NULL;
 	}
 
-	fscache_operation_init(&op->op, NULL,
+	fscache_operation_init(cookie, &op->op, NULL,
 			       fscache_do_cancel_retrieval,
 			       fscache_release_retrieval_op);
 	op->op.flags	= FSCACHE_OP_MYTHREAD |
@@ -368,6 +389,7 @@ int fscache_wait_for_operation_activation(struct fscache_object *object,
 		fscache_stat(stat_op_waits);
 	if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
 			TASK_INTERRUPTIBLE) != 0) {
+		trace_fscache_op(object->cookie, op, fscache_op_signal);
 		ret = fscache_cancel_op(op, false);
 		if (ret == 0)
 			return -ERESTARTSYS;
@@ -389,6 +411,7 @@ check_if_dead:
 	if (unlikely(fscache_object_is_dying(object) ||
 		     fscache_cache_is_broken(object))) {
 		enum fscache_operation_state state = op->state;
+		trace_fscache_op(object->cookie, op, fscache_op_signal);
 		fscache_cancel_op(op, true);
 		if (stat_object_dead)
 			fscache_stat(stat_object_dead);
@@ -443,6 +466,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
 		return -ENOMEM;
 	}
 	atomic_set(&op->n_pages, 1);
+	trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_retr_one);
 
 	spin_lock(&cookie->lock);
 
@@ -571,6 +595,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
 	if (!op)
 		return -ENOMEM;
 	atomic_set(&op->n_pages, *nr_pages);
+	trace_fscache_page_op(cookie, NULL, &op->op, fscache_page_op_retr_multi);
 
 	spin_lock(&cookie->lock);
 
@@ -682,6 +707,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
 	if (!op)
 		return -ENOMEM;
 	atomic_set(&op->n_pages, 1);
+	trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_alloc_one);
 
 	spin_lock(&cookie->lock);
 
@@ -776,15 +802,17 @@ static void fscache_write_op(struct fscache_operation *_op)
 
 	_enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
 
+again:
 	spin_lock(&object->lock);
 	cookie = object->cookie;
 
 	if (!fscache_object_is_active(object)) {
-		/* If we get here, then the on-disk cache object likely longer
-		 * exists, so we should just cancel this write operation.
+		/* If we get here, then the on-disk cache object likely no
+		 * longer exists, so we should just cancel this write
+		 * operation.
 		 */
 		spin_unlock(&object->lock);
-		fscache_op_complete(&op->op, false);
+		fscache_op_complete(&op->op, true);
 		_leave(" [inactive]");
 		return;
 	}
@@ -797,7 +825,7 @@ static void fscache_write_op(struct fscache_operation *_op)
 		 * cancel this write operation.
 		 */
 		spin_unlock(&object->lock);
-		fscache_op_complete(&op->op, false);
+		fscache_op_complete(&op->op, true);
 		_leave(" [cancel] op{f=%lx s=%u} obj{s=%s f=%lx}",
 		       _op->flags, _op->state, object->state->short_name,
 		       object->flags);
@@ -809,30 +837,33 @@ static void fscache_write_op(struct fscache_operation *_op)
 	fscache_stat(&fscache_n_store_calls);
 
 	/* find a page to store */
+	results[0] = NULL;
 	page = NULL;
 	n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1,
 				       FSCACHE_COOKIE_PENDING_TAG);
+	trace_fscache_gang_lookup(cookie, &op->op, results, n, op->store_limit);
 	if (n != 1)
 		goto superseded;
 	page = results[0];
 	_debug("gang %d [%lx]", n, page->index);
-	if (page->index >= op->store_limit) {
-		fscache_stat(&fscache_n_store_pages_over_limit);
-		goto superseded;
-	}
 
 	radix_tree_tag_set(&cookie->stores, page->index,
 			   FSCACHE_COOKIE_STORING_TAG);
 	radix_tree_tag_clear(&cookie->stores, page->index,
 			     FSCACHE_COOKIE_PENDING_TAG);
+	trace_fscache_page(cookie, page, fscache_page_radix_pend2store);
 
 	spin_unlock(&cookie->stores_lock);
 	spin_unlock(&object->lock);
 
+	if (page->index >= op->store_limit)
+		goto discard_page;
+
 	fscache_stat(&fscache_n_store_pages);
 	fscache_stat(&fscache_n_cop_write_page);
 	ret = object->cache->ops->write_page(op, page);
 	fscache_stat_d(&fscache_n_cop_write_page);
+	trace_fscache_wrote_page(cookie, page, &op->op, ret);
 	fscache_end_page_write(object, page);
 	if (ret < 0) {
 		fscache_abort_object(object);
@@ -844,6 +875,12 @@ static void fscache_write_op(struct fscache_operation *_op)
 	_leave("");
 	return;
 
+discard_page:
+	fscache_stat(&fscache_n_store_pages_over_limit);
+	trace_fscache_wrote_page(cookie, page, &op->op, -ENOBUFS);
+	fscache_end_page_write(object, page);
+	goto again;
+
 superseded:
 	/* this writer is going away and there aren't any more things to
 	 * write */
@@ -851,7 +888,7 @@ superseded:
 	spin_unlock(&cookie->stores_lock);
 	clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
 	spin_unlock(&object->lock);
-	fscache_op_complete(&op->op, true);
+	fscache_op_complete(&op->op, false);
 	_leave("");
 }
 
@@ -879,6 +916,8 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
 		for (i = n - 1; i >= 0; i--) {
 			page = results[i];
 			radix_tree_delete(&cookie->stores, page->index);
+			trace_fscache_page(cookie, page, fscache_page_radix_delete);
+			trace_fscache_page(cookie, page, fscache_page_inval);
 		}
 
 		spin_unlock(&cookie->stores_lock);
@@ -888,6 +927,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
 	}
 
 	wake_up_bit(&cookie->flags, 0);
+	trace_fscache_wake_cookie(cookie);
 
 	_leave("");
 }
@@ -923,6 +963,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
  */
 int __fscache_write_page(struct fscache_cookie *cookie,
 			 struct page *page,
+			 loff_t object_size,
 			 gfp_t gfp)
 {
 	struct fscache_storage *op;
@@ -946,7 +987,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	if (!op)
 		goto nomem;
 
-	fscache_operation_init(&op->op, fscache_write_op, NULL,
+	fscache_operation_init(cookie, &op->op, fscache_write_op, NULL,
 			       fscache_release_write_op);
 	op->op.flags = FSCACHE_OP_ASYNC |
 		(1 << FSCACHE_OP_WAITING) |
@@ -956,6 +997,8 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	if (ret < 0)
 		goto nomem_free;
 
+	trace_fscache_page_op(cookie, page, &op->op, fscache_page_op_write_one);
+
 	ret = -ENOBUFS;
 	spin_lock(&cookie->lock);
 
@@ -967,9 +1010,15 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 	if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
 		goto nobufs;
 
+	trace_fscache_page(cookie, page, fscache_page_write);
+
 	/* add the page to the pending-storage radix tree on the backing
 	 * object */
 	spin_lock(&object->lock);
+
+	if (object->store_limit_l != object_size)
+		fscache_set_store_limit(object, object_size);
+
 	spin_lock(&cookie->stores_lock);
 
 	_debug("store limit %llx", (unsigned long long) object->store_limit);
@@ -982,8 +1031,10 @@ int __fscache_write_page(struct fscache_cookie *cookie,
 		goto nobufs_unlock_obj;
 	}
 
+	trace_fscache_page(cookie, page, fscache_page_radix_insert);
 	radix_tree_tag_set(&cookie->stores, page->index,
 			   FSCACHE_COOKIE_PENDING_TAG);
+	trace_fscache_page(cookie, page, fscache_page_radix_set_pend);
 	get_page(page);
 
 	/* we only want one writer at a time, but we do need to queue new
@@ -1026,6 +1077,7 @@ already_pending:
 submit_failed:
 	spin_lock(&cookie->stores_lock);
 	radix_tree_delete(&cookie->stores, page->index);
+	trace_fscache_page(cookie, page, fscache_page_radix_delete);
 	spin_unlock(&cookie->stores_lock);
 	wake_cookie = __fscache_unuse_cookie(cookie);
 	put_page(page);
@@ -1072,6 +1124,8 @@ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page)
 	if (!PageFsCache(page))
 		goto done;
 
+	trace_fscache_page(cookie, page, fscache_page_uncache);
+
 	/* get the object */
 	spin_lock(&cookie->lock);
 
@@ -1120,6 +1174,8 @@ void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page)
 	atomic_inc(&fscache_n_marks);
 #endif
 
+	trace_fscache_page(cookie, page, fscache_page_cached);
+
 	_debug("- mark %p{%lx}", page, page->index);
 	if (TestSetPageFsCache(page)) {
 		static bool once_only;
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 7ac6e839b065..fcc8c2f2690e 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -21,7 +21,6 @@
 atomic_t fscache_n_op_pend;
 atomic_t fscache_n_op_run;
 atomic_t fscache_n_op_enqueue;
-atomic_t fscache_n_op_requeue;
 atomic_t fscache_n_op_deferred_release;
 atomic_t fscache_n_op_initialised;
 atomic_t fscache_n_op_release;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 624f18bbfd2b..ef309958e060 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1080,6 +1080,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_time_gran = 1;
 	sb->s_export_op = &fuse_export_operations;
+	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
+	if (sb->s_user_ns != &init_user_ns)
+		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
 
 	file = fget(d.fd);
 	err = -EINVAL;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 685c305cbeb6..278ed0869c3c 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1744,7 +1744,7 @@ do_grow_qunlock:
  * @newsize: the size to make the file
  *
  * The file size can grow, shrink, or stay the same size. This
- * is called holding i_mutex and an exclusive glock on the inode
+ * is called holding i_rwsem and an exclusive glock on the inode
  * in question.
  *
  * Returns: errno
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 82fb5583445c..097bd3c0f270 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1923,28 +1923,37 @@ void gfs2_glock_exit(void)
 
 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
 {
-	if (n == 0)
-		gi->gl = rhashtable_walk_peek(&gi->hti);
-	else {
-		gi->gl = rhashtable_walk_next(&gi->hti);
-		n--;
+	struct gfs2_glock *gl = gi->gl;
+
+	if (gl) {
+		if (n == 0)
+			return;
+		if (!lockref_put_not_zero(&gl->gl_lockref))
+			gfs2_glock_queue_put(gl);
 	}
 	for (;;) {
-		if (IS_ERR_OR_NULL(gi->gl)) {
-			if (!gi->gl)
-				return;
-			if (PTR_ERR(gi->gl) != -EAGAIN) {
-				gi->gl = NULL;
-				return;
+		gl = rhashtable_walk_next(&gi->hti);
+		if (IS_ERR_OR_NULL(gl)) {
+			if (gl == ERR_PTR(-EAGAIN)) {
+				n = 1;
+				continue;
 			}
-			n = 0;
-		} else if (gi->sdp == gi->gl->gl_name.ln_sbd &&
-			   !__lockref_is_dead(&gi->gl->gl_lockref)) {
-			if (!n--)
-				break;
+			gl = NULL;
+			break;
+		}
+		if (gl->gl_name.ln_sbd != gi->sdp)
+			continue;
+		if (n <= 1) {
+			if (!lockref_get_not_dead(&gl->gl_lockref))
+				continue;
+			break;
+		} else {
+			if (__lockref_is_dead(&gl->gl_lockref))
+				continue;
+			n--;
 		}
-		gi->gl = rhashtable_walk_next(&gi->hti);
 	}
+	gi->gl = gl;
 }
 
 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
@@ -1988,7 +1997,6 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
 {
 	struct gfs2_glock_iter *gi = seq->private;
 
-	gi->gl = NULL;
 	rhashtable_walk_stop(&gi->hti);
 }
 
@@ -2076,7 +2084,8 @@ static int gfs2_glocks_release(struct inode *inode, struct file *file)
 	struct seq_file *seq = file->private_data;
 	struct gfs2_glock_iter *gi = seq->private;
 
-	gi->gl = NULL;
+	if (gi->gl)
+		gfs2_glock_put(gi->gl);
 	rhashtable_walk_exit(&gi->hti);
 	return seq_release_private(inode, file);
 }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e6a0a8a89ea7..3ba3f167641c 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -825,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
 		goto fail_rindex;
 	}
 	/*
-	 * i_mutex on quota files is special. Since this inode is hidden system
+	 * i_rwsem on quota files is special. Since this inode is hidden system
 	 * file, we are safe to define locking ourselves.
 	 */
 	lockdep_set_class(&sdp->sd_quota_inode->i_rwsem,
diff --git a/fs/inode.c b/fs/inode.c
index b153aeaa61ea..13ceb98c3bd3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,8 +348,7 @@ EXPORT_SYMBOL(inc_nlink);
 
 static void __address_space_init_once(struct address_space *mapping)
 {
-	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
-	spin_lock_init(&mapping->tree_lock);
+	INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
 	init_rwsem(&mapping->i_mmap_rwsem);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
@@ -504,14 +503,14 @@ EXPORT_SYMBOL(__remove_inode_hash);
 void clear_inode(struct inode *inode)
 {
 	/*
-	 * We have to cycle tree_lock here because reclaim can be still in the
+	 * We have to cycle the i_pages lock here because reclaim can be in the
 	 * process of removing the last page (in __delete_from_page_cache())
-	 * and we must not free mapping under it.
+	 * and we must not free the mapping under it.
 	 */
-	spin_lock_irq(&inode->i_data.tree_lock);
+	xa_lock_irq(&inode->i_data.i_pages);
 	BUG_ON(inode->i_data.nrpages);
 	BUG_ON(inode->i_data.nrexceptional);
-	spin_unlock_irq(&inode->i_data.tree_lock);
+	xa_unlock_irq(&inode->i_data.i_pages);
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
diff --git a/fs/libfs.c b/fs/libfs.c
index 7ff3cb904acd..0fb590d79f30 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1060,6 +1060,45 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(noop_fsync);
 
+int noop_set_page_dirty(struct page *page)
+{
+	/*
+	 * Unlike __set_page_dirty_no_writeback that handles dirty page
+	 * tracking in the page object, dax does all dirty tracking in
+	 * the inode address_space in response to mkwrite faults. In the
+	 * dax case we only need to worry about potentially dirty CPU
+	 * caches, not dirty page cache pages to write back.
+	 *
+	 * This callback is defined to prevent fallback to
+	 * __set_page_dirty_buffers() in set_page_dirty().
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(noop_set_page_dirty);
+
+void noop_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int length)
+{
+	/*
+	 * There is no page cache to invalidate in the dax case, however
+	 * we need this callback defined to prevent falling back to
+	 * block_invalidatepage() in do_invalidatepage().
+	 */
+}
+EXPORT_SYMBOL_GPL(noop_invalidatepage);
+
+ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+	/*
+	 * iomap based filesystems support direct I/O without need for
+	 * this callback. However, it still needs to be set in
+	 * inode->a_ops so that open/fcntl know that direct I/O is
+	 * generally supported.
+	 */
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(noop_direct_IO);
+
 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
 void kfree_link(void *p)
 {
diff --git a/fs/namei.c b/fs/namei.c
index a66ed5a1622a..186bd2464fd5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -224,9 +224,10 @@ getname_kernel(const char * filename)
 	if (len <= EMBEDDED_NAME_MAX) {
 		result->name = (char *)result->iname;
 	} else if (len <= PATH_MAX) {
+		const size_t size = offsetof(struct filename, iname[1]);
 		struct filename *tmp;
 
-		tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+		tmp = kmalloc(size, GFP_KERNEL);
 		if (unlikely(!tmp)) {
 			__putname(result);
 			return ERR_PTR(-ENOMEM);
@@ -1597,22 +1598,21 @@ static int lookup_fast(struct nameidata *nd,
 }
 
 /* Fast lookup failed, do it the slow way */
-static struct dentry *lookup_slow(const struct qstr *name,
-				  struct dentry *dir,
-				  unsigned int flags)
+static struct dentry *__lookup_slow(const struct qstr *name,
+				    struct dentry *dir,
+				    unsigned int flags)
 {
-	struct dentry *dentry = ERR_PTR(-ENOENT), *old;
+	struct dentry *dentry, *old;
 	struct inode *inode = dir->d_inode;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 
-	inode_lock_shared(inode);
 	/* Don't go there if it's already dead */
 	if (unlikely(IS_DEADDIR(inode)))
-		goto out;
+		return ERR_PTR(-ENOENT);
 again:
 	dentry = d_alloc_parallel(dir, name, &wq);
 	if (IS_ERR(dentry))
-		goto out;
+		return dentry;
 	if (unlikely(!d_in_lookup(dentry))) {
 		if (!(flags & LOOKUP_NO_REVAL)) {
 			int error = d_revalidate(dentry, flags);
@@ -1634,11 +1634,21 @@ again:
 			dentry = old;
 		}
 	}
-out:
-	inode_unlock_shared(inode);
 	return dentry;
 }
 
+static struct dentry *lookup_slow(const struct qstr *name,
+				  struct dentry *dir,
+				  unsigned int flags)
+{
+	struct inode *inode = dir->d_inode;
+	struct dentry *res;
+	inode_lock_shared(inode);
+	res = __lookup_slow(name, dir, flags);
+	inode_unlock_shared(inode);
+	return res;
+}
+
 static inline int may_lookup(struct nameidata *nd)
 {
 	if (nd->flags & LOOKUP_RCU) {
@@ -2421,56 +2431,63 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 }
 EXPORT_SYMBOL(vfs_path_lookup);
 
-/**
- * lookup_one_len - filesystem helper to lookup single pathname component
- * @name:	pathname component to lookup
- * @base:	base directory to lookup from
- * @len:	maximum length @len should be interpreted to
- *
- * Note that this routine is purely a helper for filesystem usage and should
- * not be called by generic code.
- *
- * The caller must hold base->i_mutex.
- */
-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+static int lookup_one_len_common(const char *name, struct dentry *base,
+				 int len, struct qstr *this)
 {
-	struct qstr this;
-	unsigned int c;
-	int err;
-
-	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
-
-	this.name = name;
-	this.len = len;
-	this.hash = full_name_hash(base, name, len);
+	this->name = name;
+	this->len = len;
+	this->hash = full_name_hash(base, name, len);
 	if (!len)
-		return ERR_PTR(-EACCES);
+		return -EACCES;
 
 	if (unlikely(name[0] == '.')) {
 		if (len < 2 || (len == 2 && name[1] == '.'))
-			return ERR_PTR(-EACCES);
+			return -EACCES;
 	}
 
 	while (len--) {
-		c = *(const unsigned char *)name++;
+		unsigned int c = *(const unsigned char *)name++;
 		if (c == '/' || c == '\0')
-			return ERR_PTR(-EACCES);
+			return -EACCES;
 	}
 	/*
 	 * See if the low-level filesystem might want
 	 * to use its own hash..
 	 */
 	if (base->d_flags & DCACHE_OP_HASH) {
-		int err = base->d_op->d_hash(base, &this);
+		int err = base->d_op->d_hash(base, this);
 		if (err < 0)
-			return ERR_PTR(err);
+			return err;
 	}
 
-	err = inode_permission(base->d_inode, MAY_EXEC);
+	return inode_permission(base->d_inode, MAY_EXEC);
+}
+
+/**
+ * lookup_one_len - filesystem helper to lookup single pathname component
+ * @name:	pathname component to lookup
+ * @base:	base directory to lookup from
+ * @len:	maximum length @len should be interpreted to
+ *
+ * Note that this routine is purely a helper for filesystem usage and should
+ * not be called by generic code.
+ *
+ * The caller must hold base->i_mutex.
+ */
+struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
+{
+	struct dentry *dentry;
+	struct qstr this;
+	int err;
+
+	WARN_ON_ONCE(!inode_is_locked(base->d_inode));
+
+	err = lookup_one_len_common(name, base, len, &this);
 	if (err)
 		return ERR_PTR(err);
 
-	return __lookup_hash(&this, base, 0);
+	dentry = lookup_dcache(&this, base, 0);
+	return dentry ? dentry : __lookup_slow(&this, base, 0);
 }
 EXPORT_SYMBOL(lookup_one_len);
 
@@ -2490,37 +2507,10 @@ struct dentry *lookup_one_len_unlocked(const char *name,
 				       struct dentry *base, int len)
 {
 	struct qstr this;
-	unsigned int c;
 	int err;
 	struct dentry *ret;
 
-	this.name = name;
-	this.len = len;
-	this.hash = full_name_hash(base, name, len);
-	if (!len)
-		return ERR_PTR(-EACCES);
-
-	if (unlikely(name[0] == '.')) {
-		if (len < 2 || (len == 2 && name[1] == '.'))
-			return ERR_PTR(-EACCES);
-	}
-
-	while (len--) {
-		c = *(const unsigned char *)name++;
-		if (c == '/' || c == '\0')
-			return ERR_PTR(-EACCES);
-	}
-	/*
-	 * See if the low-level filesystem might want
-	 * to use its own hash..
-	 */
-	if (base->d_flags & DCACHE_OP_HASH) {
-		int err = base->d_op->d_hash(base, &this);
-		if (err < 0)
-			return ERR_PTR(err);
-	}
-
-	err = inode_permission(base->d_inode, MAY_EXEC);
+	err = lookup_one_len_common(name, base, len, &this);
 	if (err)
 		return ERR_PTR(err);
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 123c069429a7..a813979b5be0 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -535,35 +535,10 @@ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char
 	return 0;
 }
 
-#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE)
-#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY)
-static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, __be32 **savep)
+static __be32 encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, size_t sz)
 {
-	__be32 bm[2];
-	__be32 *p;
-
-	bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0);
-	bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1);
-	if (bm[1] != 0) {
-		p = xdr_reserve_space(xdr, 16);
-		if (unlikely(p == NULL))
-			return htonl(NFS4ERR_RESOURCE);
-		*p++ = htonl(2);
-		*p++ = bm[0];
-		*p++ = bm[1];
-	} else if (bm[0] != 0) {
-		p = xdr_reserve_space(xdr, 12);
-		if (unlikely(p == NULL))
-			return htonl(NFS4ERR_RESOURCE);
-		*p++ = htonl(1);
-		*p++ = bm[0];
-	} else {
-		p = xdr_reserve_space(xdr, 8);
-		if (unlikely(p == NULL))
-			return htonl(NFS4ERR_RESOURCE);
-		*p++ = htonl(0);
-	}
-	*savep = p;
+	if (xdr_stream_encode_uint32_array(xdr, bitmap, sz) < 0)
+		return cpu_to_be32(NFS4ERR_RESOURCE);
 	return 0;
 }
 
@@ -656,9 +631,13 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	
 	if (unlikely(status != 0))
 		goto out;
-	status = encode_attr_bitmap(xdr, res->bitmap, &savep);
+	status = encode_attr_bitmap(xdr, res->bitmap, ARRAY_SIZE(res->bitmap));
 	if (unlikely(status != 0))
 		goto out;
+	status = cpu_to_be32(NFS4ERR_RESOURCE);
+	savep = xdr_reserve_space(xdr, sizeof(*savep));
+	if (unlikely(!savep))
+		goto out;
 	status = encode_attr_change(xdr, res->bitmap, res->change_attr);
 	if (unlikely(status != 0))
 		goto out;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index d8b47624fee2..1819d0d0ba4b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -19,6 +19,7 @@
 #include <linux/nfs_xdr.h>
 
 #include "nfs4_fs.h"
+#include "nfs4session.h"
 #include "delegation.h"
 #include "internal.h"
 #include "nfs4trace.h"
@@ -171,11 +172,15 @@ again:
  * nfs_inode_reclaim_delegation - process a delegation reclaim request
  * @inode: inode to process
  * @cred: credential to use for request
- * @res: new delegation state from server
+ * @type: delegation type
+ * @stateid: delegation stateid
+ * @pagemod_limit: write delegation "space_limit"
  *
  */
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
-				  struct nfs_openres *res)
+				  fmode_t type,
+				  const nfs4_stateid *stateid,
+				  unsigned long pagemod_limit)
 {
 	struct nfs_delegation *delegation;
 	struct rpc_cred *oldcred = NULL;
@@ -185,9 +190,9 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
 	if (delegation != NULL) {
 		spin_lock(&delegation->lock);
 		if (delegation->inode != NULL) {
-			nfs4_stateid_copy(&delegation->stateid, &res->delegation);
-			delegation->type = res->delegation_type;
-			delegation->pagemod_limit = res->pagemod_limit;
+			nfs4_stateid_copy(&delegation->stateid, stateid);
+			delegation->type = type;
+			delegation->pagemod_limit = pagemod_limit;
 			oldcred = delegation->cred;
 			delegation->cred = get_rpccred(cred);
 			clear_bit(NFS_DELEGATION_NEED_RECLAIM,
@@ -195,14 +200,14 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
 			spin_unlock(&delegation->lock);
 			rcu_read_unlock();
 			put_rpccred(oldcred);
-			trace_nfs4_reclaim_delegation(inode, res->delegation_type);
+			trace_nfs4_reclaim_delegation(inode, type);
 			return;
 		}
 		/* We appear to have raced with a delegation return. */
 		spin_unlock(&delegation->lock);
 	}
 	rcu_read_unlock();
-	nfs_inode_set_delegation(inode, cred, res);
+	nfs_inode_set_delegation(inode, cred, type, stateid, pagemod_limit);
 }
 
 static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -329,11 +334,16 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
  * nfs_inode_set_delegation - set up a delegation on an inode
  * @inode: inode to which delegation applies
  * @cred: cred to use for subsequent delegation processing
- * @res: new delegation state from server
+ * @type: delegation type
+ * @stateid: delegation stateid
+ * @pagemod_limit: write delegation "space_limit"
  *
  * Returns zero on success, or a negative errno value.
  */
-int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred,
+				  fmode_t type,
+				  const nfs4_stateid *stateid,
+				  unsigned long pagemod_limit)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs_client *clp = server->nfs_client;
@@ -345,9 +355,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
 	if (delegation == NULL)
 		return -ENOMEM;
-	nfs4_stateid_copy(&delegation->stateid, &res->delegation);
-	delegation->type = res->delegation_type;
-	delegation->pagemod_limit = res->pagemod_limit;
+	nfs4_stateid_copy(&delegation->stateid, stateid);
+	delegation->type = type;
+	delegation->pagemod_limit = pagemod_limit;
 	delegation->change_attr = inode_peek_iversion_raw(inode);
 	delegation->cred = get_rpccred(cred);
 	delegation->inode = inode;
@@ -392,7 +402,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	rcu_assign_pointer(nfsi->delegation, delegation);
 	delegation = NULL;
 
-	trace_nfs4_set_delegation(inode, res->delegation_type);
+	trace_nfs4_set_delegation(inode, type);
 
 out:
 	spin_unlock(&clp->cl_lock);
@@ -547,6 +557,22 @@ int nfs4_inode_return_delegation(struct inode *inode)
 	return err;
 }
 
+/**
+ * nfs4_inode_make_writeable
+ * @inode: pointer to inode
+ *
+ * Make the inode writeable by returning the delegation if necessary
+ *
+ * Returns zero on success, or a negative errno value.
+ */
+int nfs4_inode_make_writeable(struct inode *inode)
+{
+	if (!nfs4_has_session(NFS_SERVER(inode)->nfs_client) ||
+	    !nfs4_check_delegation(inode, FMODE_WRITE))
+		return nfs4_inode_return_delegation(inode);
+	return 0;
+}
+
 static void nfs_mark_return_if_closed_delegation(struct nfs_server *server,
 		struct nfs_delegation *delegation)
 {
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 185a09f37a89..bb1ef8c37af4 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -36,8 +36,10 @@ enum {
 	NFS_DELEGATION_TEST_EXPIRED,
 };
 
-int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
+int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred,
+		fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
+void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred,
+		fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit);
 int nfs4_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 void nfs_inode_return_delegation_noreclaim(struct inode *inode);
@@ -70,6 +72,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags);
 bool nfs4_delegation_flush_on_close(const struct inode *inode);
 void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
 		const nfs4_stateid *stateid);
+int nfs4_inode_make_writeable(struct inode *inode);
 
 #endif
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2f3f86726f5b..73f8b43d988c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1272,7 +1272,9 @@ static void nfs_drop_nlink(struct inode *inode)
 	/* drop the inode if we're reasonably sure this is the last link */
 	if (inode->i_nlink == 1)
 		clear_nlink(inode);
-	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
+		| NFS_INO_INVALID_CTIME
+		| NFS_INO_INVALID_OTHER;
 	spin_unlock(&inode->i_lock);
 }
 
@@ -1798,12 +1800,11 @@ static int nfs_safe_remove(struct dentry *dentry)
 
 	trace_nfs_remove_enter(dir, dentry);
 	if (inode != NULL) {
-		NFS_PROTO(inode)->return_delegation(inode);
-		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+		error = NFS_PROTO(dir)->remove(dir, dentry);
 		if (error == 0)
 			nfs_drop_nlink(inode);
 	} else
-		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+		error = NFS_PROTO(dir)->remove(dir, dentry);
 	if (error == -ENOENT)
 		nfs_dentry_handle_enoent(dentry);
 	trace_nfs_remove_exit(dir, dentry, error);
@@ -1932,8 +1933,6 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 		old_dentry, dentry);
 
 	trace_nfs_link_enter(inode, dir, dentry);
-	NFS_PROTO(inode)->return_delegation(inode);
-
 	d_drop(dentry);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
 	if (error == 0) {
@@ -2023,10 +2022,6 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		}
 	}
 
-	NFS_PROTO(old_inode)->return_delegation(old_inode);
-	if (new_inode != NULL)
-		NFS_PROTO(new_inode)->return_delegation(new_inode);
-
 	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
 	if (IS_ERR(task)) {
 		error = PTR_ERR(task);
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
index 0ee4b93d36ea..1c5d8d31fc0a 100644
--- a/fs/nfs/fscache-index.c
+++ b/fs/nfs/fscache-index.c
@@ -50,59 +50,6 @@ void nfs_fscache_unregister(void)
 }
 
 /*
- * Layout of the key for an NFS server cache object.
- */
-struct nfs_server_key {
-	uint16_t	nfsversion;		/* NFS protocol version */
-	uint16_t	family;			/* address family */
-	uint16_t	port;			/* IP port */
-	union {
-		struct in_addr	ipv4_addr;	/* IPv4 address */
-		struct in6_addr ipv6_addr;	/* IPv6 address */
-	} addr[0];
-};
-
-/*
- * Generate a key to describe a server in the main NFS index
- * - We return the length of the key, or 0 if we can't generate one
- */
-static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
-				   void *buffer, uint16_t bufmax)
-{
-	const struct nfs_client *clp = cookie_netfs_data;
-	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
-	const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
-	struct nfs_server_key *key = buffer;
-	uint16_t len = sizeof(struct nfs_server_key);
-
-	memset(key, 0, len);
-	key->nfsversion = clp->rpc_ops->version;
-	key->family = clp->cl_addr.ss_family;
-
-	switch (clp->cl_addr.ss_family) {
-	case AF_INET:
-		key->port = sin->sin_port;
-		key->addr[0].ipv4_addr = sin->sin_addr;
-		len += sizeof(key->addr[0].ipv4_addr);
-		break;
-
-	case AF_INET6:
-		key->port = sin6->sin6_port;
-		key->addr[0].ipv6_addr = sin6->sin6_addr;
-		len += sizeof(key->addr[0].ipv6_addr);
-		break;
-
-	default:
-		printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
-		       clp->cl_addr.ss_family);
-		len = 0;
-		break;
-	}
-
-	return len;
-}
-
-/*
  * Define the server object for FS-Cache.  This is used to describe a server
  * object to fscache_acquire_cookie().  It is keyed by the NFS protocol and
  * server address parameters.
@@ -110,33 +57,9 @@ static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
 const struct fscache_cookie_def nfs_fscache_server_index_def = {
 	.name		= "NFS.server",
 	.type 		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= nfs_server_get_key,
 };
 
 /*
- * Generate a key to describe a superblock key in the main NFS index
- */
-static uint16_t nfs_super_get_key(const void *cookie_netfs_data,
-				  void *buffer, uint16_t bufmax)
-{
-	const struct nfs_fscache_key *key;
-	const struct nfs_server *nfss = cookie_netfs_data;
-	uint16_t len;
-
-	key = nfss->fscache_key;
-	len = sizeof(key->key) + key->key.uniq_len;
-	if (len > bufmax) {
-		len = 0;
-	} else {
-		memcpy(buffer, &key->key, sizeof(key->key));
-		memcpy(buffer + sizeof(key->key),
-		       key->key.uniquifier, key->key.uniq_len);
-	}
-
-	return len;
-}
-
-/*
  * Define the superblock object for FS-Cache.  This is used to describe a
  * superblock object to fscache_acquire_cookie().  It is keyed by all the NFS
  * parameters that might cause a separate superblock.
@@ -144,84 +67,9 @@ static uint16_t nfs_super_get_key(const void *cookie_netfs_data,
 const struct fscache_cookie_def nfs_fscache_super_index_def = {
 	.name		= "NFS.super",
 	.type 		= FSCACHE_COOKIE_TYPE_INDEX,
-	.get_key	= nfs_super_get_key,
 };
 
 /*
- * Definition of the auxiliary data attached to NFS inode storage objects
- * within the cache.
- *
- * The contents of this struct are recorded in the on-disk local cache in the
- * auxiliary data attached to the data storage object backing an inode.  This
- * permits coherency to be managed when a new inode binds to an already extant
- * cache object.
- */
-struct nfs_fscache_inode_auxdata {
-	struct timespec	mtime;
-	struct timespec	ctime;
-	loff_t		size;
-	u64		change_attr;
-};
-
-/*
- * Generate a key to describe an NFS inode in an NFS server's index
- */
-static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data,
-					  void *buffer, uint16_t bufmax)
-{
-	const struct nfs_inode *nfsi = cookie_netfs_data;
-	uint16_t nsize;
-
-	/* use the inode's NFS filehandle as the key */
-	nsize = nfsi->fh.size;
-	memcpy(buffer, nfsi->fh.data, nsize);
-	return nsize;
-}
-
-/*
- * Get certain file attributes from the netfs data
- * - This function can be absent for an index
- * - Not permitted to return an error
- * - The netfs data from the cookie being used as the source is presented
- */
-static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data,
-				       uint64_t *size)
-{
-	const struct nfs_inode *nfsi = cookie_netfs_data;
-
-	*size = nfsi->vfs_inode.i_size;
-}
-
-/*
- * Get the auxiliary data from netfs data
- * - This function can be absent if the index carries no state data
- * - Should store the auxiliary data in the buffer
- * - Should return the amount of amount stored
- * - Not permitted to return an error
- * - The netfs data from the cookie being used as the source is presented
- */
-static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
-					  void *buffer, uint16_t bufmax)
-{
-	struct nfs_fscache_inode_auxdata auxdata;
-	const struct nfs_inode *nfsi = cookie_netfs_data;
-
-	memset(&auxdata, 0, sizeof(auxdata));
-	auxdata.size = nfsi->vfs_inode.i_size;
-	auxdata.mtime = nfsi->vfs_inode.i_mtime;
-	auxdata.ctime = nfsi->vfs_inode.i_ctime;
-
-	if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
-		auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode);
-
-	if (bufmax > sizeof(auxdata))
-		bufmax = sizeof(auxdata);
-
-	memcpy(buffer, &auxdata, bufmax);
-	return bufmax;
-}
-
-/*
  * Consult the netfs about the state of an object
  * - This function can be absent if the index carries no state data
  * - The netfs data from the cookie being used as the target is
@@ -230,7 +78,8 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
 static
 enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
 						  const void *data,
-						  uint16_t datalen)
+						  uint16_t datalen,
+						  loff_t object_size)
 {
 	struct nfs_fscache_inode_auxdata auxdata;
 	struct nfs_inode *nfsi = cookie_netfs_data;
@@ -239,7 +88,6 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
 		return FSCACHE_CHECKAUX_OBSOLETE;
 
 	memset(&auxdata, 0, sizeof(auxdata));
-	auxdata.size = nfsi->vfs_inode.i_size;
 	auxdata.mtime = nfsi->vfs_inode.i_mtime;
 	auxdata.ctime = nfsi->vfs_inode.i_ctime;
 
@@ -288,9 +136,6 @@ static void nfs_fh_put_context(void *cookie_netfs_data, void *context)
 const struct fscache_cookie_def nfs_fscache_inode_object_def = {
 	.name		= "NFS.fh",
 	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
-	.get_key	= nfs_fscache_inode_get_key,
-	.get_attr	= nfs_fscache_inode_get_attr,
-	.get_aux	= nfs_fscache_inode_get_aux,
 	.check_aux	= nfs_fscache_inode_check_aux,
 	.get_context	= nfs_fh_get_context,
 	.put_context	= nfs_fh_put_context,
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index d63bea8bbfbb..b55fc7920c3b 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -18,6 +18,7 @@
 #include <linux/in6.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/iversion.h>
 
 #include "internal.h"
 #include "iostat.h"
@@ -29,6 +30,21 @@ static struct rb_root nfs_fscache_keys = RB_ROOT;
 static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
 
 /*
+ * Layout of the key for an NFS server cache object.
+ */
+struct nfs_server_key {
+	struct {
+		uint16_t	nfsversion;		/* NFS protocol version */
+		uint16_t	family;			/* address family */
+		__be16		port;			/* IP port */
+	} hdr;
+	union {
+		struct in_addr	ipv4_addr;	/* IPv4 address */
+		struct in6_addr ipv6_addr;	/* IPv6 address */
+	};
+} __packed;
+
+/*
  * Get the per-client index cookie for an NFS client if the appropriate mount
  * flag was set
  * - We always try and get an index cookie for the client, but get filehandle
@@ -36,10 +52,41 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
  */
 void nfs_fscache_get_client_cookie(struct nfs_client *clp)
 {
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
+	const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
+	struct nfs_server_key key;
+	uint16_t len = sizeof(key.hdr);
+
+	memset(&key, 0, sizeof(key));
+	key.hdr.nfsversion = clp->rpc_ops->version;
+	key.hdr.family = clp->cl_addr.ss_family;
+
+	switch (clp->cl_addr.ss_family) {
+	case AF_INET:
+		key.hdr.port = sin->sin_port;
+		key.ipv4_addr = sin->sin_addr;
+		len += sizeof(key.ipv4_addr);
+		break;
+
+	case AF_INET6:
+		key.hdr.port = sin6->sin6_port;
+		key.ipv6_addr = sin6->sin6_addr;
+		len += sizeof(key.ipv6_addr);
+		break;
+
+	default:
+		printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
+		       clp->cl_addr.ss_family);
+		clp->fscache = NULL;
+		return;
+	}
+
 	/* create a cache index for looking up filehandles */
 	clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
 					      &nfs_fscache_server_index_def,
-					      clp, true);
+					      &key, len,
+					      NULL, 0,
+					      clp, 0, true);
 	dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
 		 clp, clp->fscache);
 }
@@ -52,7 +99,7 @@ void nfs_fscache_release_client_cookie(struct nfs_client *clp)
 	dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n",
 		 clp, clp->fscache);
 
-	fscache_relinquish_cookie(clp->fscache, 0);
+	fscache_relinquish_cookie(clp->fscache, NULL, false);
 	clp->fscache = NULL;
 }
 
@@ -139,7 +186,9 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
 	/* create a cache index for looking up filehandles */
 	nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
 					       &nfs_fscache_super_index_def,
-					       nfss, true);
+					       key, sizeof(*key) + ulen,
+					       NULL, 0,
+					       nfss, 0, true);
 	dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
 		 nfss, nfss->fscache);
 	return;
@@ -163,7 +212,7 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
 	dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n",
 		 nfss, nfss->fscache);
 
-	fscache_relinquish_cookie(nfss->fscache, 0);
+	fscache_relinquish_cookie(nfss->fscache, NULL, false);
 	nfss->fscache = NULL;
 
 	if (nfss->fscache_key) {
@@ -180,14 +229,25 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
  */
 void nfs_fscache_init_inode(struct inode *inode)
 {
+	struct nfs_fscache_inode_auxdata auxdata;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	nfsi->fscache = NULL;
 	if (!S_ISREG(inode->i_mode))
 		return;
+
+	memset(&auxdata, 0, sizeof(auxdata));
+	auxdata.mtime = nfsi->vfs_inode.i_mtime;
+	auxdata.ctime = nfsi->vfs_inode.i_ctime;
+
+	if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
+		auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode);
+
 	nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache,
 					       &nfs_fscache_inode_object_def,
-					       nfsi, false);
+					       nfsi->fh.data, nfsi->fh.size,
+					       &auxdata, sizeof(auxdata),
+					       nfsi, nfsi->vfs_inode.i_size, false);
 }
 
 /*
@@ -195,12 +255,16 @@ void nfs_fscache_init_inode(struct inode *inode)
  */
 void nfs_fscache_clear_inode(struct inode *inode)
 {
+	struct nfs_fscache_inode_auxdata auxdata;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct fscache_cookie *cookie = nfs_i_fscache(inode);
 
 	dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie);
 
-	fscache_relinquish_cookie(cookie, false);
+	memset(&auxdata, 0, sizeof(auxdata));
+	auxdata.mtime = nfsi->vfs_inode.i_mtime;
+	auxdata.ctime = nfsi->vfs_inode.i_ctime;
+	fscache_relinquish_cookie(cookie, &auxdata, false);
 	nfsi->fscache = NULL;
 }
 
@@ -232,20 +296,26 @@ static bool nfs_fscache_can_enable(void *data)
  */
 void nfs_fscache_open_file(struct inode *inode, struct file *filp)
 {
+	struct nfs_fscache_inode_auxdata auxdata;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct fscache_cookie *cookie = nfs_i_fscache(inode);
 
 	if (!fscache_cookie_valid(cookie))
 		return;
 
+	memset(&auxdata, 0, sizeof(auxdata));
+	auxdata.mtime = nfsi->vfs_inode.i_mtime;
+	auxdata.ctime = nfsi->vfs_inode.i_ctime;
+
 	if (inode_is_open_for_write(inode)) {
 		dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi);
 		clear_bit(NFS_INO_FSCACHE, &nfsi->flags);
-		fscache_disable_cookie(cookie, true);
+		fscache_disable_cookie(cookie, &auxdata, true);
 		fscache_uncache_all_inode_pages(cookie, inode);
 	} else {
 		dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi);
-		fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode);
+		fscache_enable_cookie(cookie, &auxdata, nfsi->vfs_inode.i_size,
+				      nfs_fscache_can_enable, inode);
 		if (fscache_cookie_enabled(cookie))
 			set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
 	}
@@ -422,7 +492,8 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
 		 "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
 		 nfs_i_fscache(inode), page, page->index, page->flags, sync);
 
-	ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL);
+	ret = fscache_write_page(nfs_i_fscache(inode), page,
+				 inode->i_size, GFP_KERNEL);
 	dfprintk(FSCACHE,
 		 "NFS:     readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
 		 page, page->index, page->flags, ret);
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index d7fe3e799f2f..161ba2edb9d0 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -57,6 +57,21 @@ struct nfs_fscache_key {
 };
 
 /*
+ * Definition of the auxiliary data attached to NFS inode storage objects
+ * within the cache.
+ *
+ * The contents of this struct are recorded in the on-disk local cache in the
+ * auxiliary data attached to the data storage object backing an inode.  This
+ * permits coherency to be managed when a new inode binds to an already extant
+ * cache object.
+ */
+struct nfs_fscache_inode_auxdata {
+	struct timespec	mtime;
+	struct timespec	ctime;
+	u64		change_attr;
+};
+
+/*
  * fscache-index.c
  */
 extern struct fscache_netfs nfs_fscache_netfs;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d17a90c4fa37..bd15d0b57626 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -195,7 +195,10 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
 static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
+	bool have_delegation = nfs_have_delegated_attributes(inode);
 
+	if (have_delegation)
+		flags &= ~(NFS_INO_INVALID_CHANGE|NFS_INO_REVAL_PAGECACHE);
 	if (inode->i_mapping->nrpages == 0)
 		flags &= ~NFS_INO_INVALID_DATA;
 	nfsi->cache_validity |= flags;
@@ -447,7 +450,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 		inode->i_mode = fattr->mode;
 		if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
 				&& nfs_server_capable(inode, NFS_CAP_MODE))
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
@@ -493,37 +496,35 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 		if (fattr->valid & NFS_ATTR_FATTR_ATIME)
 			inode->i_atime = fattr->atime;
 		else if (nfs_server_capable(inode, NFS_CAP_ATIME))
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
 		if (fattr->valid & NFS_ATTR_FATTR_MTIME)
 			inode->i_mtime = fattr->mtime;
 		else if (nfs_server_capable(inode, NFS_CAP_MTIME))
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
 		if (fattr->valid & NFS_ATTR_FATTR_CTIME)
 			inode->i_ctime = fattr->ctime;
 		else if (nfs_server_capable(inode, NFS_CAP_CTIME))
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
 		if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
 			inode_set_iversion_raw(inode, fattr->change_attr);
 		else
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
-				| NFS_INO_REVAL_PAGECACHE);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE);
 		if (fattr->valid & NFS_ATTR_FATTR_SIZE)
 			inode->i_size = nfs_size_to_loff_t(fattr->size);
 		else
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
-				| NFS_INO_REVAL_PAGECACHE);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE);
 		if (fattr->valid & NFS_ATTR_FATTR_NLINK)
 			set_nlink(inode, fattr->nlink);
 		else if (nfs_server_capable(inode, NFS_CAP_NLINK))
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
 		if (fattr->valid & NFS_ATTR_FATTR_OWNER)
 			inode->i_uid = fattr->uid;
 		else if (nfs_server_capable(inode, NFS_CAP_OWNER))
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
 		if (fattr->valid & NFS_ATTR_FATTR_GROUP)
 			inode->i_gid = fattr->gid;
 		else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
-			nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR);
+			nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
 		if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
 			inode->i_blocks = fattr->du.nfs2.blocks;
 		if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -608,11 +609,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 		goto out;
 	}
 
-	/*
-	 * Return any delegations if we're going to change ACLs
-	 */
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
-		NFS_PROTO(inode)->return_delegation(inode);
 	error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
 	if (error == 0)
 		error = nfs_refresh_inode(inode, fattr);
@@ -645,6 +641,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 	/* Optimisation */
 	if (offset == 0)
 		NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_DATA;
+	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
 
 	spin_unlock(&inode->i_lock);
 	truncate_pagecache(inode, offset);
@@ -657,6 +654,7 @@ out:
  * nfs_setattr_update_inode - Update inode metadata after a setattr call.
  * @inode: pointer to struct inode
  * @attr: pointer to struct iattr
+ * @fattr: pointer to struct nfs_fattr
  *
  * Note: we do this in the *proc.c in order to ensure that
  *       it works for things like exclusive creates too.
@@ -669,6 +667,8 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
 
 	spin_lock(&inode->i_lock);
 	NFS_I(inode)->attr_gencount = fattr->gencount;
+	nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
+			| NFS_INO_INVALID_CTIME);
 	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
 		if ((attr->ia_valid & ATTR_MODE) != 0) {
 			int mode = attr->ia_mode & S_IALLUGO;
@@ -683,13 +683,12 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
 				| NFS_INO_INVALID_ACL);
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
+		nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
 		nfs_vmtruncate(inode, attr->ia_size);
 	}
 	if (fattr->valid)
 		nfs_update_inode(inode, fattr);
-	else
-		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
@@ -1303,24 +1302,20 @@ static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)
 	return nfs_file_has_writers(nfsi) && nfs_file_io_is_buffered(nfsi);
 }
 
-static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
-	unsigned long ret = 0;
-
 	if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
 			&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
 			&& inode_eq_iversion_raw(inode, fattr->pre_change_attr)) {
 		inode_set_iversion_raw(inode, fattr->change_attr);
 		if (S_ISDIR(inode->i_mode))
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
-		ret |= NFS_INO_INVALID_ATTR;
 	}
 	/* If we have atomic WCC data, we may update some attributes */
 	if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
 			&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
 			&& timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
 		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
-		ret |= NFS_INO_INVALID_ATTR;
 	}
 
 	if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
@@ -1329,17 +1324,13 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
 		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
 		if (S_ISDIR(inode->i_mode))
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
-		ret |= NFS_INO_INVALID_ATTR;
 	}
 	if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
 			&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
 			&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
 			&& !nfs_have_writebacks(inode)) {
 		i_size_write(inode, nfs_size_to_loff_t(fattr->size));
-		ret |= NFS_INO_INVALID_ATTR;
 	}
-
-	return ret;
 }
 
 /**
@@ -1369,33 +1360,41 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if (!nfs_file_has_buffered_writers(nfsi)) {
 		/* Verify a few of the more important attributes */
 		if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
-			invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE;
+			invalid |= NFS_INO_INVALID_CHANGE
+				| NFS_INO_REVAL_PAGECACHE;
 
 		if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
-			invalid |= NFS_INO_INVALID_ATTR;
+			invalid |= NFS_INO_INVALID_MTIME;
 
 		if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec_equal(&inode->i_ctime, &fattr->ctime))
-			invalid |= NFS_INO_INVALID_ATTR;
+			invalid |= NFS_INO_INVALID_CTIME;
 
 		if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
 			cur_size = i_size_read(inode);
 			new_isize = nfs_size_to_loff_t(fattr->size);
 			if (cur_size != new_isize)
-				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+				invalid |= NFS_INO_INVALID_SIZE
+					| NFS_INO_REVAL_PAGECACHE;
 		}
 	}
 
 	/* Have any file permissions changed? */
 	if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
-		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+		invalid |= NFS_INO_INVALID_ACCESS
+			| NFS_INO_INVALID_ACL
+			| NFS_INO_INVALID_OTHER;
 	if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
-		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+		invalid |= NFS_INO_INVALID_ACCESS
+			| NFS_INO_INVALID_ACL
+			| NFS_INO_INVALID_OTHER;
 	if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
-		invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
+		invalid |= NFS_INO_INVALID_ACCESS
+			| NFS_INO_INVALID_ACL
+			| NFS_INO_INVALID_OTHER;
 
 	/* Has the link count changed? */
 	if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
-		invalid |= NFS_INO_INVALID_ATTR;
+		invalid |= NFS_INO_INVALID_OTHER;
 
 	if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime))
 		invalid |= NFS_INO_INVALID_ATIME;
@@ -1597,10 +1596,9 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 }
 EXPORT_SYMBOL_GPL(nfs_refresh_inode);
 
-static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
+static int nfs_post_op_update_inode_locked(struct inode *inode,
+		struct nfs_fattr *fattr, unsigned int invalid)
 {
-	unsigned long invalid = NFS_INO_INVALID_ATTR;
-
 	if (S_ISDIR(inode->i_mode))
 		invalid |= NFS_INO_INVALID_DATA;
 	nfs_set_cache_invalid(inode, invalid);
@@ -1629,7 +1627,9 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	spin_lock(&inode->i_lock);
 	nfs_fattr_set_barrier(fattr);
-	status = nfs_post_op_update_inode_locked(inode, fattr);
+	status = nfs_post_op_update_inode_locked(inode, fattr,
+			NFS_INO_INVALID_CHANGE
+			| NFS_INO_INVALID_CTIME);
 	spin_unlock(&inode->i_lock);
 
 	return status;
@@ -1681,7 +1681,10 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
 		fattr->valid |= NFS_ATTR_FATTR_PRESIZE;
 	}
 out_noforce:
-	status = nfs_post_op_update_inode_locked(inode, fattr);
+	status = nfs_post_op_update_inode_locked(inode, fattr,
+			NFS_INO_INVALID_CHANGE
+			| NFS_INO_INVALID_CTIME
+			| NFS_INO_INVALID_MTIME);
 	return status;
 }
 
@@ -1789,7 +1792,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			| NFS_INO_REVAL_PAGECACHE);
 
 	/* Do atomic weak cache consistency updates */
-	invalid |= nfs_wcc_update_inode(inode, fattr);
+	nfs_wcc_update_inode(inode, fattr);
 
 	if (pnfs_layoutcommit_outstanding(inode)) {
 		nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR;
@@ -1803,17 +1806,25 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 					inode->i_sb->s_id, inode->i_ino);
 			/* Could it be a race with writeback? */
 			if (!have_writers) {
-				invalid |= NFS_INO_INVALID_ATTR
+				invalid |= NFS_INO_INVALID_CHANGE
 					| NFS_INO_INVALID_DATA
 					| NFS_INO_INVALID_ACCESS
 					| NFS_INO_INVALID_ACL;
+				/* Force revalidate of all attributes */
+				save_cache_validity |= NFS_INO_INVALID_CTIME
+					| NFS_INO_INVALID_MTIME
+					| NFS_INO_INVALID_SIZE
+					| NFS_INO_INVALID_OTHER;
 				if (S_ISDIR(inode->i_mode))
 					nfs_force_lookup_revalidate(inode);
 			}
 			inode_set_iversion_raw(inode, fattr->change_attr);
 		}
 	} else {
-		nfsi->cache_validity |= save_cache_validity;
+		nfsi->cache_validity |= save_cache_validity &
+				(NFS_INO_INVALID_CHANGE
+				| NFS_INO_REVAL_PAGECACHE
+				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
 	}
 
@@ -1821,7 +1832,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
 	} else if (server->caps & NFS_CAP_MTIME) {
 		nfsi->cache_validity |= save_cache_validity &
-				(NFS_INO_INVALID_ATTR
+				(NFS_INO_INVALID_MTIME
 				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
 	}
@@ -1830,7 +1841,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
 	} else if (server->caps & NFS_CAP_CTIME) {
 		nfsi->cache_validity |= save_cache_validity &
-				(NFS_INO_INVALID_ATTR
+				(NFS_INO_INVALID_CTIME
 				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
 	}
@@ -1845,7 +1856,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			if (!nfs_have_writebacks(inode) || new_isize > cur_isize) {
 				i_size_write(inode, new_isize);
 				if (!have_writers)
-					invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+					invalid |= NFS_INO_INVALID_DATA;
 			}
 			dprintk("NFS: isize change on server for file %s/%ld "
 					"(%Ld to %Ld)\n",
@@ -1856,7 +1867,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		}
 	} else {
 		nfsi->cache_validity |= save_cache_validity &
-				(NFS_INO_INVALID_ATTR
+				(NFS_INO_INVALID_SIZE
 				| NFS_INO_REVAL_PAGECACHE
 				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
@@ -1877,55 +1888,61 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			umode_t newmode = inode->i_mode & S_IFMT;
 			newmode |= fattr->mode & S_IALLUGO;
 			inode->i_mode = newmode;
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			invalid |= NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL
+				| NFS_INO_INVALID_OTHER;
 		}
 	} else if (server->caps & NFS_CAP_MODE) {
 		nfsi->cache_validity |= save_cache_validity &
-				(NFS_INO_INVALID_ATTR
-				| NFS_INO_INVALID_ACCESS
+				(NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL
+				| NFS_INO_INVALID_OTHER
 				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
 	}
 
 	if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
 		if (!uid_eq(inode->i_uid, fattr->uid)) {
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			invalid |= NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL
+				| NFS_INO_INVALID_OTHER;
 			inode->i_uid = fattr->uid;
 		}
 	} else if (server->caps & NFS_CAP_OWNER) {
 		nfsi->cache_validity |= save_cache_validity &
-				(NFS_INO_INVALID_ATTR
-				| NFS_INO_INVALID_ACCESS
+				(NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL
+				| NFS_INO_INVALID_OTHER
 				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
 	}
 
 	if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
 		if (!gid_eq(inode->i_gid, fattr->gid)) {
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+			invalid |= NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL
+				| NFS_INO_INVALID_OTHER;
 			inode->i_gid = fattr->gid;
 		}
 	} else if (server->caps & NFS_CAP_OWNER_GROUP) {
 		nfsi->cache_validity |= save_cache_validity &
-				(NFS_INO_INVALID_ATTR
-				| NFS_INO_INVALID_ACCESS
+				(NFS_INO_INVALID_ACCESS
 				| NFS_INO_INVALID_ACL
+				| NFS_INO_INVALID_OTHER
 				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
 	}
 
 	if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
 		if (inode->i_nlink != fattr->nlink) {
-			invalid |= NFS_INO_INVALID_ATTR;
+			invalid |= NFS_INO_INVALID_OTHER;
 			if (S_ISDIR(inode->i_mode))
 				invalid |= NFS_INO_INVALID_DATA;
 			set_nlink(inode, fattr->nlink);
 		}
 	} else if (server->caps & NFS_CAP_NLINK) {
 		nfsi->cache_validity |= save_cache_validity &
-				(NFS_INO_INVALID_ATTR
+				(NFS_INO_INVALID_OTHER
 				| NFS_INO_REVAL_FORCED);
 		cache_revalidated = false;
 	}
@@ -1942,6 +1959,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	/* Update attrtimeo value if we're out of the unstable period */
 	if (invalid & NFS_INO_INVALID_ATTR) {
+		invalid &= ~NFS_INO_INVALID_ATTR;
 		nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = now;
@@ -1962,10 +1980,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			nfsi->attr_gencount = fattr->gencount;
 	}
 
-	/* Don't declare attrcache up to date if there were no attrs! */
-	if (cache_revalidated)
-		invalid &= ~NFS_INO_INVALID_ATTR;
-
 	/* Don't invalidate the data if we were to blame */
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
 				|| S_ISLNK(inode->i_mode)))
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7327930ad970..eadf1ab31d16 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -138,8 +138,11 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 		msg.rpc_cred = nfs_file_cred(sattr->ia_file);
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
-	if (status == 0)
+	if (status == 0) {
+		if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+			nfs_zap_acl_cache(inode);
 		nfs_setattr_update_inode(inode, sattr, fattr);
+	}
 	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }
@@ -383,11 +386,11 @@ out:
 }
 
 static int
-nfs3_proc_remove(struct inode *dir, const struct qstr *name)
+nfs3_proc_remove(struct inode *dir, struct dentry *dentry)
 {
 	struct nfs_removeargs arg = {
 		.fh = NFS_FH(dir),
-		.name = *name,
+		.name = dentry->d_name,
 	};
 	struct nfs_removeres res;
 	struct rpc_message msg = {
@@ -397,7 +400,7 @@ nfs3_proc_remove(struct inode *dir, const struct qstr *name)
 	};
 	int status = -ENOMEM;
 
-	dprintk("NFS call  remove %s\n", name->name);
+	dprintk("NFS call  remove %pd2\n", dentry);
 	res.dir_attr = nfs_alloc_fattr();
 	if (res.dir_attr == NULL)
 		goto out;
@@ -411,7 +414,7 @@ out:
 }
 
 static void
-nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
+nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
 }
@@ -433,7 +436,9 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 }
 
 static void
-nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+nfs3_proc_rename_setup(struct rpc_message *msg,
+		struct dentry *old_dentry,
+		struct dentry *new_dentry)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
 }
@@ -908,12 +913,6 @@ static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
 	return 0;
 }
 
-static int nfs3_return_delegation(struct inode *inode)
-{
-	nfs_wb_all(inode);
-	return 0;
-}
-
 static const struct inode_operations nfs3_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_lookup,
@@ -990,7 +989,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.clear_acl_cache = forget_all_cached_acls,
 	.close_context	= nfs_close_context,
 	.have_delegation = nfs3_have_delegation,
-	.return_delegation = nfs3_return_delegation,
 	.alloc_client	= nfs_alloc_client,
 	.init_client	= nfs_init_client,
 	.free_client	= nfs_free_client,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 6cd33bd5da87..09ee36dd8426 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1997,6 +1997,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	struct nfs_entry old = *entry;
 	__be32 *p;
 	int error;
+	u64 new_cookie;
 
 	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
@@ -2019,8 +2020,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (unlikely(error))
 		return error;
 
-	entry->prev_cookie = entry->cookie;
-	error = decode_cookie3(xdr, &entry->cookie);
+	error = decode_cookie3(xdr, &new_cookie);
 	if (unlikely(error))
 		return error;
 
@@ -2054,6 +2054,9 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 			zero_nfs_fh3(entry->fh);
 	}
 
+	entry->prev_cookie = entry->cookie;
+	entry->cookie = new_cookie;
+
 	return 0;
 
 out_overflow:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47f3c273245e..b71757e85066 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1045,7 +1045,9 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo,
 	struct nfs_inode *nfsi = NFS_I(dir);
 
 	spin_lock(&dir->i_lock);
-	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+	nfsi->cache_validity |= NFS_INO_INVALID_CTIME
+		| NFS_INO_INVALID_MTIME
+		| NFS_INO_INVALID_DATA;
 	if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) {
 		nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
 		nfsi->attrtimeo_timestamp = jiffies;
@@ -1669,6 +1671,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
 {
 	struct nfs_delegation *delegation;
 
+	fmode &= FMODE_READ|FMODE_WRITE;
 	rcu_read_lock();
 	delegation = rcu_dereference(NFS_I(inode)->delegation);
 	if (delegation == NULL || (delegation->type & fmode) == fmode) {
@@ -1751,12 +1754,16 @@ nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
 	}
 	if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
 		nfs_inode_set_delegation(state->inode,
-					 data->owner->so_cred,
-					 &data->o_res);
+				data->owner->so_cred,
+				data->o_res.delegation_type,
+				&data->o_res.delegation,
+				data->o_res.pagemod_limit);
 	else
 		nfs_inode_reclaim_delegation(state->inode,
-					     data->owner->so_cred,
-					     &data->o_res);
+				data->owner->so_cred,
+				data->o_res.delegation_type,
+				&data->o_res.delegation,
+				data->o_res.pagemod_limit);
 }
 
 /*
@@ -2743,27 +2750,40 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
  * fields corresponding to attributes that were used to store the verifier.
  * Make sure we clobber those fields in the later setattr call
  */
-static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
+static unsigned nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
 				struct iattr *sattr, struct nfs4_label **label)
 {
-	const u32 *attrset = opendata->o_res.attrset;
+	const __u32 *bitmask = opendata->o_arg.server->exclcreat_bitmask;
+	__u32 attrset[3];
+	unsigned ret;
+	unsigned i;
 
-	if ((attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
-	    !(sattr->ia_valid & ATTR_ATIME_SET))
-		sattr->ia_valid |= ATTR_ATIME;
+	for (i = 0; i < ARRAY_SIZE(attrset); i++) {
+		attrset[i] = opendata->o_res.attrset[i];
+		if (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE4_1)
+			attrset[i] &= ~bitmask[i];
+	}
+
+	ret = (opendata->o_arg.createmode == NFS4_CREATE_EXCLUSIVE) ?
+		sattr->ia_valid : 0;
 
-	if ((attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
-	    !(sattr->ia_valid & ATTR_MTIME_SET))
-		sattr->ia_valid |= ATTR_MTIME;
+	if ((attrset[1] & (FATTR4_WORD1_TIME_ACCESS|FATTR4_WORD1_TIME_ACCESS_SET))) {
+		if (sattr->ia_valid & ATTR_ATIME_SET)
+			ret |= ATTR_ATIME_SET;
+		else
+			ret |= ATTR_ATIME;
+	}
 
-	/* Except MODE, it seems harmless of setting twice. */
-	if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
-		(attrset[1] & FATTR4_WORD1_MODE ||
-		 attrset[2] & FATTR4_WORD2_MODE_UMASK))
-		sattr->ia_valid &= ~ATTR_MODE;
+	if ((attrset[1] & (FATTR4_WORD1_TIME_MODIFY|FATTR4_WORD1_TIME_MODIFY_SET))) {
+		if (sattr->ia_valid & ATTR_MTIME_SET)
+			ret |= ATTR_MTIME_SET;
+		else
+			ret |= ATTR_MTIME;
+	}
 
-	if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
+	if (!(attrset[2] & FATTR4_WORD2_SECURITY_LABEL))
 		*label = NULL;
+	return ret;
 }
 
 static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
@@ -2892,12 +2912,15 @@ static int _nfs4_do_open(struct inode *dir,
 
 	if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) &&
 	    (opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) {
-		nfs4_exclusive_attrset(opendata, sattr, &label);
+		unsigned attrs = nfs4_exclusive_attrset(opendata, sattr, &label);
 		/*
 		 * send create attributes which was not set by open
 		 * with an extra setattr.
 		 */
-		if (sattr->ia_valid & NFS4_VALID_ATTRS) {
+		if (attrs || label) {
+			unsigned ia_old = sattr->ia_valid;
+
+			sattr->ia_valid = attrs;
 			nfs_fattr_init(opendata->o_res.f_attr);
 			status = nfs4_do_setattr(state->inode, cred,
 					opendata->o_res.f_attr, sattr,
@@ -2907,6 +2930,7 @@ static int _nfs4_do_open(struct inode *dir,
 						opendata->o_res.f_attr);
 				nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
 			}
+			sattr->ia_valid = ia_old;
 		}
 	}
 	if (opened && opendata->file_created)
@@ -3874,6 +3898,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	if (IS_ERR(label))
 		return PTR_ERR(label);
 
+	/* Return any delegations if we're going to change ACLs */
+	if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+		nfs4_inode_make_writeable(inode);
+
 	status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label);
 	if (status == 0) {
 		nfs_setattr_update_inode(inode, sattr, fattr);
@@ -4048,7 +4076,6 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_accessargs args = {
 		.fh = NFS_FH(inode),
-		.bitmask = server->cache_consistency_bitmask,
 		.access = entry->mask,
 	};
 	struct nfs4_accessres res = {
@@ -4062,14 +4089,18 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
 	};
 	int status = 0;
 
-	res.fattr = nfs_alloc_fattr();
-	if (res.fattr == NULL)
-		return -ENOMEM;
+	if (!nfs_have_delegated_attributes(inode)) {
+		res.fattr = nfs_alloc_fattr();
+		if (res.fattr == NULL)
+			return -ENOMEM;
+		args.bitmask = server->cache_consistency_bitmask;
+	}
 
 	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
 	if (!status) {
 		nfs_access_set_mask(entry, res.access);
-		nfs_refresh_inode(inode, res.fattr);
+		if (res.fattr)
+			nfs_refresh_inode(inode, res.fattr);
 	}
 	nfs_free_fattr(res.fattr);
 	return status;
@@ -4199,10 +4230,32 @@ static int _nfs4_proc_remove(struct inode *dir, const struct qstr *name)
 	return status;
 }
 
-static int nfs4_proc_remove(struct inode *dir, const struct qstr *name)
+static int nfs4_proc_remove(struct inode *dir, struct dentry *dentry)
+{
+	struct nfs4_exception exception = { };
+	struct inode *inode = d_inode(dentry);
+	int err;
+
+	if (inode) {
+		if (inode->i_nlink == 1)
+			nfs4_inode_return_delegation(inode);
+		else
+			nfs4_inode_make_writeable(inode);
+	}
+	do {
+		err = _nfs4_proc_remove(dir, &dentry->d_name);
+		trace_nfs4_remove(dir, &dentry->d_name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err,
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
+static int nfs4_proc_rmdir(struct inode *dir, const struct qstr *name)
 {
 	struct nfs4_exception exception = { };
 	int err;
+
 	do {
 		err = _nfs4_proc_remove(dir, name);
 		trace_nfs4_remove(dir, name, err);
@@ -4212,17 +4265,20 @@ static int nfs4_proc_remove(struct inode *dir, const struct qstr *name)
 	return err;
 }
 
-static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
+static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry)
 {
-	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_removeargs *args = msg->rpc_argp;
 	struct nfs_removeres *res = msg->rpc_resp;
+	struct inode *inode = d_inode(dentry);
 
-	res->server = server;
+	res->server = NFS_SB(dentry->d_sb);
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 	nfs4_init_sequence(&args->seq_args, &res->seq_res, 1);
 
 	nfs_fattr_init(res->dir_attr);
+
+	if (inode)
+		nfs4_inode_return_delegation(inode);
 }
 
 static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
@@ -4248,14 +4304,21 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	return 1;
 }
 
-static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+static void nfs4_proc_rename_setup(struct rpc_message *msg,
+		struct dentry *old_dentry,
+		struct dentry *new_dentry)
 {
-	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_renameargs *arg = msg->rpc_argp;
 	struct nfs_renameres *res = msg->rpc_resp;
+	struct inode *old_inode = d_inode(old_dentry);
+	struct inode *new_inode = d_inode(new_dentry);
 
+	if (old_inode)
+		nfs4_inode_make_writeable(old_inode);
+	if (new_inode)
+		nfs4_inode_return_delegation(new_inode);
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
-	res->server = server;
+	res->server = NFS_SB(old_dentry->d_sb);
 	nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1);
 }
 
@@ -4317,6 +4380,8 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
 	}
 	arg.bitmask = nfs4_bitmask(server, res.label);
 
+	nfs4_inode_make_writeable(inode);
+
 	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (!status) {
 		update_changeattr(dir, &res.cinfo, res.fattr->time_start);
@@ -5310,7 +5375,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 	i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
 	if (i < 0)
 		return i;
-	nfs4_inode_return_delegation(inode);
+	nfs4_inode_make_writeable(inode);
 	ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 
 	/*
@@ -5325,7 +5390,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 	 * so mark the attribute cache invalid.
 	 */
 	spin_lock(&inode->i_lock);
-	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
+		| NFS_INO_INVALID_CTIME;
 	spin_unlock(&inode->i_lock);
 	nfs_access_zap_cache(inode);
 	nfs_zap_acl_cache(inode);
@@ -6621,22 +6687,24 @@ static int
 nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
 {
 	int ret;
-	struct cb_notify_lock_args *cbnl = key;
 	struct nfs4_lock_waiter	*waiter	= wait->private;
-	struct nfs_lowner	*lowner = &cbnl->cbnl_owner,
-				*wowner = waiter->owner;
 
-	/* Only wake if the callback was for the same owner */
-	if (lowner->clientid != wowner->clientid ||
-	    lowner->id != wowner->id		 ||
-	    lowner->s_dev != wowner->s_dev)
-		return 0;
+	/* NULL key means to wake up everyone */
+	if (key) {
+		struct cb_notify_lock_args	*cbnl = key;
+		struct nfs_lowner		*lowner = &cbnl->cbnl_owner,
+						*wowner = waiter->owner;
 
-	/* Make sure it's for the right inode */
-	if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh))
-		return 0;
+		/* Only wake if the callback was for the same owner. */
+		if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev)
+			return 0;
 
-	waiter->notified = true;
+		/* Make sure it's for the right inode */
+		if (nfs_compare_fh(NFS_FH(waiter->inode), &cbnl->cbnl_fh))
+			return 0;
+
+		waiter->notified = true;
+	}
 
 	/* override "private" so we can use default_wake_function */
 	wait->private = waiter->task;
@@ -6673,6 +6741,7 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 	add_wait_queue(q, &wait);
 
 	while(!signalled()) {
+		waiter.notified = false;
 		status = nfs4_proc_setlk(state, cmd, request);
 		if ((status != -EAGAIN) || IS_SETLK(cmd))
 			break;
@@ -8414,6 +8483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
 {
 	switch(task->tk_status) {
 	case 0:
+		wake_up_all(&clp->cl_lock_waitq);
+		/* Fallthrough */
 	case -NFS4ERR_COMPLETE_ALREADY:
 	case -NFS4ERR_WRONG_CRED: /* What to do here? */
 		break;
@@ -9593,7 +9664,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.link		= nfs4_proc_link,
 	.symlink	= nfs4_proc_symlink,
 	.mkdir		= nfs4_proc_mkdir,
-	.rmdir		= nfs4_proc_remove,
+	.rmdir		= nfs4_proc_rmdir,
 	.readdir	= nfs4_proc_readdir,
 	.mknod		= nfs4_proc_mknod,
 	.statfs		= nfs4_proc_statfs,
@@ -9614,7 +9685,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.close_context  = nfs4_close_context,
 	.open_context	= nfs4_atomic_open,
 	.have_delegation = nfs4_have_delegation,
-	.return_delegation = nfs4_inode_return_delegation,
 	.alloc_client	= nfs4_alloc_client,
 	.init_client	= nfs4_init_client,
 	.free_client	= nfs4_free_client,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 91a4d4eeb235..c10a422efe6f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -428,7 +428,6 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 	struct rb_node **p = &server->state_owners.rb_node,
 		       *parent = NULL;
 	struct nfs4_state_owner *sp;
-	int err;
 
 	while (*p != NULL) {
 		parent = *p;
@@ -445,9 +444,6 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 			return sp;
 		}
 	}
-	err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id);
-	if (err)
-		return ERR_PTR(err);
 	rb_link_node(&new->so_server_node, parent, p);
 	rb_insert_color(&new->so_server_node, &server->state_owners);
 	return new;
@@ -460,7 +456,6 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
 
 	if (!RB_EMPTY_NODE(&sp->so_server_node))
 		rb_erase(&sp->so_server_node, &server->state_owners);
-	ida_remove(&server->openowner_id, sp->so_seqid.owner_id);
 }
 
 static void
@@ -495,6 +490,12 @@ nfs4_alloc_state_owner(struct nfs_server *server,
 	sp = kzalloc(sizeof(*sp), gfp_flags);
 	if (!sp)
 		return NULL;
+	sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0,
+						gfp_flags);
+	if (sp->so_seqid.owner_id < 0) {
+		kfree(sp);
+		return NULL;
+	}
 	sp->so_server = server;
 	sp->so_cred = get_rpccred(cred);
 	spin_lock_init(&sp->so_lock);
@@ -526,6 +527,7 @@ static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
 {
 	nfs4_destroy_seqid_counter(&sp->so_seqid);
 	put_rpccred(sp->so_cred);
+	ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
 	kfree(sp);
 }
 
@@ -576,13 +578,9 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 	new = nfs4_alloc_state_owner(server, cred, gfp_flags);
 	if (new == NULL)
 		goto out;
-	do {
-		if (ida_pre_get(&server->openowner_id, gfp_flags) == 0)
-			break;
-		spin_lock(&clp->cl_lock);
-		sp = nfs4_insert_state_owner_locked(new);
-		spin_unlock(&clp->cl_lock);
-	} while (sp == ERR_PTR(-EAGAIN));
+	spin_lock(&clp->cl_lock);
+	sp = nfs4_insert_state_owner_locked(new);
+	spin_unlock(&clp->cl_lock);
 	if (sp != new)
 		nfs4_free_state_owner(new);
 out:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b993ad282de2..9b7392032321 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -98,6 +98,7 @@ static int nfs4_stat_to_errno(int);
 				((3+NFS4_FHSIZE) >> 2))
 #define nfs4_fattr_bitmap_maxsz 4
 #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
+#define nfstime4_maxsz		(3)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
 #define nfs4_owner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
@@ -112,7 +113,8 @@ static int nfs4_stat_to_errno(int);
 #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8)
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-				3 + 3 + 3 + nfs4_owner_maxsz + \
+				3*nfstime4_maxsz + \
+				nfs4_owner_maxsz + \
 				nfs4_group_maxsz + nfs4_label_maxsz + \
 				 decode_mdsthreshold_maxsz))
 #define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
@@ -123,7 +125,8 @@ static int nfs4_stat_to_errno(int);
 				nfs4_owner_maxsz + \
 				nfs4_group_maxsz + \
 				nfs4_label_maxsz + \
-				4 + 4)
+				1 + nfstime4_maxsz + \
+				1 + nfstime4_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
 #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
@@ -957,6 +960,35 @@ static void encode_uint64(struct xdr_stream *xdr, u64 n)
 	WARN_ON_ONCE(xdr_stream_encode_u64(xdr, n) < 0);
 }
 
+static ssize_t xdr_encode_bitmap4(struct xdr_stream *xdr,
+		const __u32 *bitmap, size_t len)
+{
+	ssize_t ret;
+
+	/* Trim empty words */
+	while (len > 0 && bitmap[len-1] == 0)
+		len--;
+	ret = xdr_stream_encode_uint32_array(xdr, bitmap, len);
+	if (WARN_ON_ONCE(ret < 0))
+		return ret;
+	return len;
+}
+
+static size_t mask_bitmap4(const __u32 *bitmap, const __u32 *mask,
+		__u32 *res, size_t len)
+{
+	size_t i;
+	__u32 tmp;
+
+	while (len > 0 && (bitmap[len-1] == 0 || mask[len-1] == 0))
+		len--;
+	for (i = len; i-- > 0;) {
+		tmp = bitmap[i] & mask[i];
+		res[i] = tmp;
+	}
+	return len;
+}
+
 static void encode_nfs4_seqid(struct xdr_stream *xdr,
 		const struct nfs_seqid *seqid)
 {
@@ -1011,6 +1043,14 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
 	encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE);
 }
 
+static __be32 *
+xdr_encode_nfstime4(__be32 *p, const struct timespec *t)
+{
+	p = xdr_encode_hyper(p, (__s64)t->tv_sec);
+	*p++ = cpu_to_be32(t->tv_nsec);
+	return p;
+}
+
 static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 				const struct nfs4_label *label,
 				const umode_t *umask,
@@ -1022,9 +1062,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 	int owner_namelen = 0;
 	int owner_grouplen = 0;
 	__be32 *p;
-	unsigned i;
 	uint32_t len = 0;
-	uint32_t bmval_len;
 	uint32_t bmval[3] = { 0 };
 
 	/*
@@ -1072,7 +1110,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 	if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
 		if (iap->ia_valid & ATTR_ATIME_SET) {
 			bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
-			len += 16;
+			len += 4 + (nfstime4_maxsz << 2);
 		} else if (iap->ia_valid & ATTR_ATIME) {
 			bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
 			len += 4;
@@ -1081,7 +1119,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 	if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
 		if (iap->ia_valid & ATTR_MTIME_SET) {
 			bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
-			len += 16;
+			len += 4 + (nfstime4_maxsz << 2);
 		} else if (iap->ia_valid & ATTR_MTIME) {
 			bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
 			len += 4;
@@ -1093,19 +1131,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 		bmval[2] |= FATTR4_WORD2_SECURITY_LABEL;
 	}
 
-	if (bmval[2] != 0)
-		bmval_len = 3;
-	else if (bmval[1] != 0)
-		bmval_len = 2;
-	else
-		bmval_len = 1;
-
-	p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len);
-
-	*p++ = cpu_to_be32(bmval_len);
-	for (i = 0; i < bmval_len; i++)
-		*p++ = cpu_to_be32(bmval[i]);
-	*p++ = cpu_to_be32(len);
+	xdr_encode_bitmap4(xdr, bmval, ARRAY_SIZE(bmval));
+	xdr_stream_encode_opaque_inline(xdr, (void **)&p, len);
 
 	if (bmval[0] & FATTR4_WORD0_SIZE)
 		p = xdr_encode_hyper(p, iap->ia_size);
@@ -1118,16 +1145,14 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
 	if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
 		if (iap->ia_valid & ATTR_ATIME_SET) {
 			*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
-			p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec);
-			*p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
+			p = xdr_encode_nfstime4(p, &iap->ia_atime);
 		} else
 			*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
 	}
 	if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
 		if (iap->ia_valid & ATTR_MTIME_SET) {
 			*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
-			p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec);
-			*p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+			p = xdr_encode_nfstime4(p, &iap->ia_mtime);
 		} else
 			*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
 	}
@@ -1199,85 +1224,45 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
 			create->server, create->server->attr_bitmask);
 }
 
-static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
-{
-	__be32 *p;
-
-	encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
-	p = reserve_space(xdr, 8);
-	*p++ = cpu_to_be32(1);
-	*p = cpu_to_be32(bitmap);
-}
-
-static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr)
-{
-	__be32 *p;
-
-	encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
-	p = reserve_space(xdr, 12);
-	*p++ = cpu_to_be32(2);
-	*p++ = cpu_to_be32(bm0);
-	*p = cpu_to_be32(bm1);
-}
-
-static void
-encode_getattr_three(struct xdr_stream *xdr,
-		     uint32_t bm0, uint32_t bm1, uint32_t bm2,
-		     struct compound_hdr *hdr)
+static void encode_getattr(struct xdr_stream *xdr,
+		const __u32 *bitmap, const __u32 *mask, size_t len,
+		struct compound_hdr *hdr)
 {
-	__be32 *p;
+	__u32 masked_bitmap[nfs4_fattr_bitmap_maxsz];
 
 	encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr);
-	if (bm2) {
-		p = reserve_space(xdr, 16);
-		*p++ = cpu_to_be32(3);
-		*p++ = cpu_to_be32(bm0);
-		*p++ = cpu_to_be32(bm1);
-		*p = cpu_to_be32(bm2);
-	} else if (bm1) {
-		p = reserve_space(xdr, 12);
-		*p++ = cpu_to_be32(2);
-		*p++ = cpu_to_be32(bm0);
-		*p = cpu_to_be32(bm1);
-	} else {
-		p = reserve_space(xdr, 8);
-		*p++ = cpu_to_be32(1);
-		*p = cpu_to_be32(bm0);
+	if (mask) {
+		if (WARN_ON_ONCE(len > ARRAY_SIZE(masked_bitmap)))
+			len = ARRAY_SIZE(masked_bitmap);
+		len = mask_bitmap4(bitmap, mask, masked_bitmap, len);
+		bitmap = masked_bitmap;
 	}
+	xdr_encode_bitmap4(xdr, bitmap, len);
 }
 
 static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
 {
-	encode_getattr_three(xdr, bitmask[0] & nfs4_fattr_bitmap[0],
-			   bitmask[1] & nfs4_fattr_bitmap[1],
-			   bitmask[2] & nfs4_fattr_bitmap[2],
-			   hdr);
+	encode_getattr(xdr, nfs4_fattr_bitmap, bitmask,
+			ARRAY_SIZE(nfs4_fattr_bitmap), hdr);
 }
 
 static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
 				 const u32 *open_bitmap,
 				 struct compound_hdr *hdr)
 {
-	encode_getattr_three(xdr,
-			     bitmask[0] & open_bitmap[0],
-			     bitmask[1] & open_bitmap[1],
-			     bitmask[2] & open_bitmap[2],
-			     hdr);
+	encode_getattr(xdr, open_bitmap, bitmask, 3, hdr);
 }
 
 static void encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
 {
-	encode_getattr_three(xdr,
-			     bitmask[0] & nfs4_fsinfo_bitmap[0],
-			     bitmask[1] & nfs4_fsinfo_bitmap[1],
-			     bitmask[2] & nfs4_fsinfo_bitmap[2],
-			     hdr);
+	encode_getattr(xdr, nfs4_fsinfo_bitmap, bitmask,
+			ARRAY_SIZE(nfs4_fsinfo_bitmap), hdr);
 }
 
 static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr)
 {
-	encode_getattr_two(xdr, bitmask[0] & nfs4_fs_locations_bitmap[0],
-			   bitmask[1] & nfs4_fs_locations_bitmap[1], hdr);
+	encode_getattr(xdr, nfs4_fs_locations_bitmap, bitmask,
+			ARRAY_SIZE(nfs4_fs_locations_bitmap), hdr);
 }
 
 static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
@@ -2116,7 +2101,8 @@ static void nfs4_xdr_enc_access(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_access(xdr, args->access, &hdr);
-	encode_getfattr(xdr, args->bitmask, &hdr);
+	if (args->bitmask)
+		encode_getfattr(xdr, args->bitmask, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -2558,13 +2544,17 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
+	const __u32 nfs4_acl_bitmap[1] = {
+		[0] = FATTR4_WORD0_ACL,
+	};
 	uint32_t replen;
 
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
 	replen = hdr.replen + op_decode_hdr_maxsz;
-	encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
+	encode_getattr(xdr, nfs4_acl_bitmap, NULL,
+			ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
 
 	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
 		args->acl_pages, 0, args->acl_len);
@@ -2643,8 +2633,8 @@ static void nfs4_xdr_enc_pathconf(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
-	encode_getattr_one(xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0],
-			   &hdr);
+	encode_getattr(xdr, nfs4_pathconf_bitmap, args->bitmask,
+			ARRAY_SIZE(nfs4_pathconf_bitmap), &hdr);
 	encode_nops(&hdr);
 }
 
@@ -2662,8 +2652,8 @@ static void nfs4_xdr_enc_statfs(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
-	encode_getattr_two(xdr, args->bitmask[0] & nfs4_statfs_bitmap[0],
-			   args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr);
+	encode_getattr(xdr, nfs4_statfs_bitmap, args->bitmask,
+			ARRAY_SIZE(nfs4_statfs_bitmap), &hdr);
 	encode_nops(&hdr);
 }
 
@@ -2683,7 +2673,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req,
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fhandle, &hdr);
-	encode_getattr_three(xdr, bitmask[0], bitmask[1], bitmask[2], &hdr);
+	encode_getattr(xdr, bitmask, NULL, 3, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -3217,34 +3207,27 @@ static int decode_ace(struct xdr_stream *xdr, void *ace)
 	return -EIO;
 }
 
-static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+static ssize_t
+decode_bitmap4(struct xdr_stream *xdr, uint32_t *bitmap, size_t sz)
 {
-	uint32_t bmlen;
-	__be32 *p;
-
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
-		goto out_overflow;
-	bmlen = be32_to_cpup(p);
+	ssize_t ret;
 
-	bitmap[0] = bitmap[1] = bitmap[2] = 0;
-	p = xdr_inline_decode(xdr, (bmlen << 2));
-	if (unlikely(!p))
-		goto out_overflow;
-	if (bmlen > 0) {
-		bitmap[0] = be32_to_cpup(p++);
-		if (bmlen > 1) {
-			bitmap[1] = be32_to_cpup(p++);
-			if (bmlen > 2)
-				bitmap[2] = be32_to_cpup(p);
-		}
-	}
-	return 0;
-out_overflow:
+	ret = xdr_stream_decode_uint32_array(xdr, bitmap, sz);
+	if (likely(ret >= 0))
+		return ret;
+	if (ret == -EMSGSIZE)
+		return sz;
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
 }
 
+static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
+{
+	ssize_t ret;
+	ret = decode_bitmap4(xdr, bitmap, 3);
+	return ret < 0 ? ret : 0;
+}
+
 static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep)
 {
 	__be32 *p;
@@ -3980,7 +3963,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
 	bitmap[1] &= ~FATTR4_WORD1_OWNER;
 
 	if (owner_name != NULL) {
-		len = decode_nfs4_string(xdr, owner_name, GFP_NOWAIT);
+		len = decode_nfs4_string(xdr, owner_name, GFP_NOIO);
 		if (len <= 0)
 			goto out;
 		dprintk("%s: name=%s\n", __func__, owner_name->data);
@@ -4015,7 +3998,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
 	bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
 
 	if (group_name != NULL) {
-		len = decode_nfs4_string(xdr, group_name, GFP_NOWAIT);
+		len = decode_nfs4_string(xdr, group_name, GFP_NOIO);
 		if (len <= 0)
 			goto out;
 		dprintk("%s: name=%s\n", __func__, group_name->data);
@@ -4155,19 +4138,25 @@ out_overflow:
 	return -EIO;
 }
 
+static __be32 *
+xdr_decode_nfstime4(__be32 *p, struct timespec *t)
+{
+	__u64 sec;
+
+	p = xdr_decode_hyper(p, &sec);
+	t-> tv_sec = (time_t)sec;
+	t->tv_nsec = be32_to_cpup(p++);
+	return p;
+}
+
 static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
 {
 	__be32 *p;
-	uint64_t sec;
-	uint32_t nsec;
 
-	p = xdr_inline_decode(xdr, 12);
+	p = xdr_inline_decode(xdr, nfstime4_maxsz << 2);
 	if (unlikely(!p))
 		goto out_overflow;
-	p = xdr_decode_hyper(p, &sec);
-	nsec = be32_to_cpup(p);
-	time->tv_sec = (time_t)sec;
-	time->tv_nsec = (long)nsec;
+	xdr_decode_nfstime4(p, time);
 	return 0;
 out_overflow:
 	print_overflow_msg(__func__, xdr);
@@ -5470,21 +5459,13 @@ decode_savefh(struct xdr_stream *xdr)
 
 static int decode_setattr(struct xdr_stream *xdr)
 {
-	__be32 *p;
-	uint32_t bmlen;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_SETATTR);
 	if (status)
 		return status;
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
-		goto out_overflow;
-	bmlen = be32_to_cpup(p);
-	p = xdr_inline_decode(xdr, bmlen << 2);
-	if (likely(p))
+	if (decode_bitmap4(xdr, NULL, 0) >= 0)
 		return 0;
-out_overflow:
 	print_overflow_msg(__func__, xdr);
 	return -EIO;
 }
@@ -6255,7 +6236,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_access(xdr, &res->supported, &res->access);
 	if (status != 0)
 		goto out;
-	decode_getfattr(xdr, res->fattr, res->server);
+	if (res->fattr)
+		decode_getfattr(xdr, res->fattr, res->server);
 out:
 	return status;
 }
@@ -7535,6 +7517,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	unsigned int savep;
 	uint32_t bitmap[3] = {0};
 	uint32_t len;
+	uint64_t new_cookie;
 	__be32 *p = xdr_inline_decode(xdr, 4);
 	if (unlikely(!p))
 		goto out_overflow;
@@ -7551,8 +7534,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	p = xdr_inline_decode(xdr, 12);
 	if (unlikely(!p))
 		goto out_overflow;
-	entry->prev_cookie = entry->cookie;
-	p = xdr_decode_hyper(p, &entry->cookie);
+	p = xdr_decode_hyper(p, &new_cookie);
 	entry->len = be32_to_cpup(p);
 
 	p = xdr_inline_decode(xdr, entry->len);
@@ -7586,6 +7568,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
 		entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
 
+	entry->prev_cookie = entry->cookie;
+	entry->cookie = new_cookie;
+
 	return 0;
 
 out_overflow:
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f7fd9192d4bc..4e93d6308733 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -300,11 +300,11 @@ out:
 }
   
 static int
-nfs_proc_remove(struct inode *dir, const struct qstr *name)
+nfs_proc_remove(struct inode *dir, struct dentry *dentry)
 {
 	struct nfs_removeargs arg = {
 		.fh = NFS_FH(dir),
-		.name = *name,
+		.name = dentry->d_name,
 	};
 	struct rpc_message msg = { 
 		.rpc_proc = &nfs_procedures[NFSPROC_REMOVE],
@@ -312,7 +312,7 @@ nfs_proc_remove(struct inode *dir, const struct qstr *name)
 	};
 	int			status;
 
-	dprintk("NFS call  remove %s\n", name->name);
+	dprintk("NFS call  remove %pd2\n",dentry);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 
@@ -321,7 +321,7 @@ nfs_proc_remove(struct inode *dir, const struct qstr *name)
 }
 
 static void
-nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
+nfs_proc_unlink_setup(struct rpc_message *msg, struct dentry *dentry)
 {
 	msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE];
 }
@@ -338,7 +338,9 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 }
 
 static void
-nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+nfs_proc_rename_setup(struct rpc_message *msg,
+		struct dentry *old_dentry,
+		struct dentry *new_dentry)
 {
 	msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
 }
@@ -671,12 +673,6 @@ static int nfs_have_delegation(struct inode *inode, fmode_t flags)
 	return 0;
 }
 
-static int nfs_return_delegation(struct inode *inode)
-{
-	nfs_wb_all(inode);
-	return 0;
-}
-
 static const struct inode_operations nfs_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_lookup,
@@ -741,7 +737,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.lock_check_bounds = nfs_lock_check_bounds,
 	.close_context	= nfs_close_context,
 	.have_delegation = nfs_have_delegation,
-	.return_delegation = nfs_return_delegation,
 	.alloc_client	= nfs_alloc_client,
 	.init_client	= nfs_init_client,
 	.free_client	= nfs_free_client,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 630b4a3c1a93..bf54fc9ae135 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -105,7 +105,7 @@ static void nfs_do_call_unlink(struct nfs_unlinkdata *data)
 	data->args.fh = NFS_FH(dir);
 	nfs_fattr_init(data->res.dir_attr);
 
-	NFS_PROTO(dir)->unlink_setup(&msg, dir);
+	NFS_PROTO(dir)->unlink_setup(&msg, data->dentry);
 
 	task_setup_data.rpc_client = NFS_CLIENT(dir);
 	task = rpc_run_task(&task_setup_data);
@@ -386,7 +386,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
 
 	nfs_sb_active(old_dir->i_sb);
 
-	NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
+	NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dentry, new_dentry);
 
 	return rpc_run_task(&task_setup_data);
 }
@@ -463,9 +463,6 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 
 	fileid = NFS_FILEID(d_inode(dentry));
 
-	/* Return delegation in anticipation of the rename */
-	NFS_PROTO(d_inode(dentry))->return_delegation(d_inode(dentry));
-
 	sdentry = NULL;
 	do {
 		int slen;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6579f3b367bd..0193053bc139 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -231,6 +231,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
 	if (i_size >= end)
 		goto out;
 	i_size_write(inode, end);
+	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
 	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
 out:
 	spin_unlock(&inode->i_lock);
@@ -1562,8 +1563,11 @@ static int nfs_writeback_done(struct rpc_task *task,
 	}
 
 	/* Deal with the suid/sgid bit corner case */
-	if (nfs_should_remove_suid(inode))
-		nfs_mark_for_revalidate(inode);
+	if (nfs_should_remove_suid(inode)) {
+		spin_lock(&inode->i_lock);
+		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER;
+		spin_unlock(&inode->i_lock);
+	}
 	return 0;
 }
 
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index c21e0b4454a6..dec98cab729d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -193,9 +193,9 @@ retry:
 				       (unsigned long long)oldkey,
 				       (unsigned long long)newkey);
 
-		spin_lock_irq(&btnc->tree_lock);
-		err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
-		spin_unlock_irq(&btnc->tree_lock);
+		xa_lock_irq(&btnc->i_pages);
+		err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page);
+		xa_unlock_irq(&btnc->i_pages);
 		/*
 		 * Note: page->index will not change to newkey until
 		 * nilfs_btnode_commit_change_key() will be called.
@@ -251,11 +251,11 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
 				       (unsigned long long)newkey);
 		mark_buffer_dirty(obh);
 
-		spin_lock_irq(&btnc->tree_lock);
-		radix_tree_delete(&btnc->page_tree, oldkey);
-		radix_tree_tag_set(&btnc->page_tree, newkey,
+		xa_lock_irq(&btnc->i_pages);
+		radix_tree_delete(&btnc->i_pages, oldkey);
+		radix_tree_tag_set(&btnc->i_pages, newkey,
 				   PAGECACHE_TAG_DIRTY);
-		spin_unlock_irq(&btnc->tree_lock);
+		xa_unlock_irq(&btnc->i_pages);
 
 		opage->index = obh->b_blocknr = newkey;
 		unlock_page(opage);
@@ -283,9 +283,9 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
 		return;
 
 	if (nbh == NULL) {	/* blocksize == pagesize */
-		spin_lock_irq(&btnc->tree_lock);
-		radix_tree_delete(&btnc->page_tree, newkey);
-		spin_unlock_irq(&btnc->tree_lock);
+		xa_lock_irq(&btnc->i_pages);
+		radix_tree_delete(&btnc->i_pages, newkey);
+		xa_unlock_irq(&btnc->i_pages);
 		unlock_page(ctxt->bh->b_page);
 	} else
 		brelse(nbh);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 68241512d7c1..4cb850a6f1c2 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -331,15 +331,15 @@ repeat:
 			struct page *page2;
 
 			/* move the page to the destination cache */
-			spin_lock_irq(&smap->tree_lock);
-			page2 = radix_tree_delete(&smap->page_tree, offset);
+			xa_lock_irq(&smap->i_pages);
+			page2 = radix_tree_delete(&smap->i_pages, offset);
 			WARN_ON(page2 != page);
 
 			smap->nrpages--;
-			spin_unlock_irq(&smap->tree_lock);
+			xa_unlock_irq(&smap->i_pages);
 
-			spin_lock_irq(&dmap->tree_lock);
-			err = radix_tree_insert(&dmap->page_tree, offset, page);
+			xa_lock_irq(&dmap->i_pages);
+			err = radix_tree_insert(&dmap->i_pages, offset, page);
 			if (unlikely(err < 0)) {
 				WARN_ON(err == -EEXIST);
 				page->mapping = NULL;
@@ -348,11 +348,11 @@ repeat:
 				page->mapping = dmap;
 				dmap->nrpages++;
 				if (PageDirty(page))
-					radix_tree_tag_set(&dmap->page_tree,
+					radix_tree_tag_set(&dmap->i_pages,
 							   offset,
 							   PAGECACHE_TAG_DIRTY);
 			}
-			spin_unlock_irq(&dmap->tree_lock);
+			xa_unlock_irq(&dmap->i_pages);
 		}
 		unlock_page(page);
 	}
@@ -474,15 +474,15 @@ int __nilfs_clear_page_dirty(struct page *page)
 	struct address_space *mapping = page->mapping;
 
 	if (mapping) {
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 		if (test_bit(PG_dirty, &page->flags)) {
-			radix_tree_tag_clear(&mapping->page_tree,
+			radix_tree_tag_clear(&mapping->i_pages,
 					     page_index(page),
 					     PAGECACHE_TAG_DIRTY);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			return clear_page_dirty_for_io(page);
 		}
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return 0;
 	}
 	return TestClearPageDirty(page);
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index b03057afac2a..66369ec90020 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -463,11 +463,10 @@ static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
 	if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
 		goto wakeup;
 
-	op->downcall.trailer_buf = vmalloc(op->downcall.trailer_size);
+	op->downcall.trailer_buf = vzalloc(op->downcall.trailer_size);
 	if (!op->downcall.trailer_buf)
 		goto Enomem;
 
-	memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
 	if (!copy_from_iter_full(op->downcall.trailer_buf,
 			         op->downcall.trailer_size, iter)) {
 		gossip_err("%s: failed to copy trailer.\n", __func__);
@@ -779,9 +778,35 @@ static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
 
 #endif /* CONFIG_COMPAT is in .config */
 
+static __poll_t orangefs_devreq_poll(struct file *file,
+				      struct poll_table_struct *poll_table)
+{
+	__poll_t poll_revent_mask = 0;
+
+	poll_wait(file, &orangefs_request_list_waitq, poll_table);
+
+	if (!list_empty(&orangefs_request_list))
+		poll_revent_mask |= EPOLLIN;
+	return poll_revent_mask;
+}
+
 /* the assigned character device major number */
 static int orangefs_dev_major;
 
+static const struct file_operations orangefs_devreq_file_operations = {
+	.owner = THIS_MODULE,
+	.read = orangefs_devreq_read,
+	.write_iter = orangefs_devreq_write_iter,
+	.open = orangefs_devreq_open,
+	.release = orangefs_devreq_release,
+	.unlocked_ioctl = orangefs_devreq_ioctl,
+
+#ifdef CONFIG_COMPAT		/* CONFIG_COMPAT is in .config */
+	.compat_ioctl = orangefs_devreq_compat_ioctl,
+#endif
+	.poll = orangefs_devreq_poll
+};
+
 /*
  * Initialize orangefs device specific state:
  * Must be called at module load time only
@@ -814,29 +839,3 @@ void orangefs_dev_cleanup(void)
 		     "*** /dev/%s character device unregistered ***\n",
 		     ORANGEFS_REQDEVICE_NAME);
 }
-
-static __poll_t orangefs_devreq_poll(struct file *file,
-				      struct poll_table_struct *poll_table)
-{
-	__poll_t poll_revent_mask = 0;
-
-	poll_wait(file, &orangefs_request_list_waitq, poll_table);
-
-	if (!list_empty(&orangefs_request_list))
-		poll_revent_mask |= EPOLLIN;
-	return poll_revent_mask;
-}
-
-const struct file_operations orangefs_devreq_file_operations = {
-	.owner = THIS_MODULE,
-	.read = orangefs_devreq_read,
-	.write_iter = orangefs_devreq_write_iter,
-	.open = orangefs_devreq_open,
-	.release = orangefs_devreq_release,
-	.unlocked_ioctl = orangefs_devreq_ioctl,
-
-#ifdef CONFIG_COMPAT		/* CONFIG_COMPAT is in .config */
-	.compat_ioctl = orangefs_devreq_compat_ioctl,
-#endif
-	.poll = orangefs_devreq_poll
-};
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 0d228cd087e6..26358efbf794 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -42,70 +42,6 @@ static int flush_racache(struct inode *inode)
 }
 
 /*
- * Copy to client-core's address space from the buffers specified
- * by the iovec upto total_size bytes.
- * NOTE: the iovector can either contain addresses which
- *       can futher be kernel-space or user-space addresses.
- *       or it can pointers to struct page's
- */
-static int precopy_buffers(int buffer_index,
-			   struct iov_iter *iter,
-			   size_t total_size)
-{
-	int ret = 0;
-	/*
-	 * copy data from application/kernel by pulling it out
-	 * of the iovec.
-	 */
-
-
-	if (total_size) {
-		ret = orangefs_bufmap_copy_from_iovec(iter,
-						      buffer_index,
-						      total_size);
-		if (ret < 0)
-		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
-			   __func__,
-			   (long)ret);
-	}
-
-	if (ret < 0)
-		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
-			__func__,
-			(long)ret);
-	return ret;
-}
-
-/*
- * Copy from client-core's address space to the buffers specified
- * by the iovec upto total_size bytes.
- * NOTE: the iovector can either contain addresses which
- *       can futher be kernel-space or user-space addresses.
- *       or it can pointers to struct page's
- */
-static int postcopy_buffers(int buffer_index,
-			    struct iov_iter *iter,
-			    size_t total_size)
-{
-	int ret = 0;
-	/*
-	 * copy data to application/kernel by pushing it out to
-	 * the iovec. NOTE; target buffers can be addresses or
-	 * struct page pointers.
-	 */
-	if (total_size) {
-		ret = orangefs_bufmap_copy_to_iovec(iter,
-						    buffer_index,
-						    total_size);
-		if (ret < 0)
-			gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
-				__func__,
-				(long)ret);
-	}
-	return ret;
-}
-
-/*
  * Post and wait for the I/O upcall to finish
  */
 static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
@@ -157,14 +93,15 @@ populate_shared_memory:
 		     total_size);
 	/*
 	 * Stage 1: copy the buffers into client-core's address space
-	 * precopy_buffers only pertains to writes.
 	 */
-	if (type == ORANGEFS_IO_WRITE) {
-		ret = precopy_buffers(buffer_index,
-				      iter,
-				      total_size);
-		if (ret < 0)
+	if (type == ORANGEFS_IO_WRITE && total_size) {
+		ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index,
+		    total_size);
+		if (ret < 0) {
+			gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
+			    __func__, (long)ret);
 			goto out;
+		}
 	}
 
 	gossip_debug(GOSSIP_FILE_DEBUG,
@@ -260,14 +197,20 @@ populate_shared_memory:
 
 	/*
 	 * Stage 3: Post copy buffers from client-core's address space
-	 * postcopy_buffers only pertains to reads.
 	 */
-	if (type == ORANGEFS_IO_READ) {
-		ret = postcopy_buffers(buffer_index,
-				       iter,
-				       new_op->downcall.resp.io.amt_complete);
-		if (ret < 0)
+	if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) {
+		/*
+		 * NOTE: the iovector can either contain addresses which
+		 *       can futher be kernel-space or user-space addresses.
+		 *       or it can pointers to struct page's
+		 */
+		ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index,
+		    new_op->downcall.resp.io.amt_complete);
+		if (ret < 0) {
+			gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
+			    __func__, (long)ret);
 			goto out;
+		}
 	}
 	gossip_debug(GOSSIP_FILE_DEBUG,
 	    "%s(%pU): Amount %s, returned by the sys-io call:%d\n",
@@ -585,6 +528,28 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 	return ret;
 }
 
+static int orangefs_fault(struct vm_fault *vmf)
+{
+	struct file *file = vmf->vma->vm_file;
+	int rc;
+	rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1,
+	    STATX_SIZE);
+	if (rc == -ESTALE)
+		rc = -EIO;
+	if (rc) {
+		gossip_err("%s: orangefs_inode_getattr failed, "
+		    "rc:%d:.\n", __func__, rc);
+		return rc;
+	}
+	return filemap_fault(vmf);
+}
+
+const struct vm_operations_struct orangefs_file_vm_ops = {
+	.fault = orangefs_fault,
+	.map_pages = filemap_map_pages,
+	.page_mkwrite = filemap_page_mkwrite,
+};
+
 /*
  * Memory map a region of a file.
  */
@@ -596,12 +561,16 @@ static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
 			(char *)file->f_path.dentry->d_name.name :
 			(char *)"Unknown"));
 
+	if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+		return -EINVAL;
+
 	/* set the sequential readahead hint */
 	vma->vm_flags |= VM_SEQ_READ;
 	vma->vm_flags &= ~VM_RAND_READ;
 
-	/* Use readonly mmap since we cannot support writable maps. */
-	return generic_file_readonly_mmap(file, vma);
+	file_accessed(file);
+	vma->vm_ops = &orangefs_file_vm_ops;
+	return 0;
 }
 
 #define mapping_nrpages(idata) ((idata)->nrpages)
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index fe1d705ad91f..79c61da8b1bc 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -138,7 +138,7 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
 }
 
 /** ORANGEFS2 implementation of address space operations */
-const struct address_space_operations orangefs_address_operations = {
+static const struct address_space_operations orangefs_address_operations = {
 	.readpage = orangefs_readpage,
 	.readpages = orangefs_readpages,
 	.invalidatepage = orangefs_invalidatepage,
@@ -307,7 +307,7 @@ int orangefs_update_time(struct inode *inode, struct timespec *time, int flags)
 }
 
 /* ORANGEDS2 implementation of VFS inode operations for files */
-const struct inode_operations orangefs_file_inode_operations = {
+static const struct inode_operations orangefs_file_inode_operations = {
 	.get_acl = orangefs_get_acl,
 	.set_acl = orangefs_set_acl,
 	.setattr = orangefs_setattr,
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
index 59f444dced9b..4f927023d095 100644
--- a/fs/orangefs/orangefs-bufmap.c
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -71,9 +71,9 @@ static void put(struct slot_map *m, int slot)
 	spin_lock(&m->q.lock);
 	__clear_bit(slot, m->map);
 	v = ++m->c;
-	if (unlikely(v == 1))	/* no free slots -> one free slot */
+	if (v > 0)
 		wake_up_locked(&m->q);
-	else if (unlikely(v == -1))	/* finished dying */
+	if (unlikely(v == -1))     /* finished dying */
 		wake_up_all_locked(&m->q);
 	spin_unlock(&m->q.lock);
 }
diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h
index c7db56a31b92..6e079d4230d0 100644
--- a/fs/orangefs/orangefs-debug.h
+++ b/fs/orangefs/orangefs-debug.h
@@ -43,12 +43,6 @@
 #define GOSSIP_MAX_NR                 16
 #define GOSSIP_MAX_DEBUG              (((__u64)1 << GOSSIP_MAX_NR) - 1)
 
-/*function prototypes*/
-__u64 ORANGEFS_kmod_eventlog_to_mask(const char *event_logging);
-__u64 ORANGEFS_debug_eventlog_to_mask(const char *event_logging);
-char *ORANGEFS_debug_mask_to_eventlog(__u64 mask);
-char *ORANGEFS_kmod_mask_to_eventlog(__u64 mask);
-
 /* a private internal type */
 struct __keyword_mask_s {
 	const char *keyword;
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index eebbaece85ef..c29bb0ebc6bb 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -65,11 +65,7 @@
 #define ORANGEFS_REQDEVICE_NAME          "pvfs2-req"
 
 #define ORANGEFS_DEVREQ_MAGIC             0x20030529
-#define ORANGEFS_LINK_MAX                 0x000000FF
 #define ORANGEFS_PURGE_RETRY_COUNT     0x00000005
-#define ORANGEFS_MAX_NUM_OPTIONS          0x00000004
-#define ORANGEFS_MAX_MOUNT_OPT_LEN        0x00000080
-#define ORANGEFS_MAX_FSKEY_LEN            64
 
 #define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) +   \
 sizeof(__u64) + sizeof(struct orangefs_upcall_s))
@@ -113,15 +109,6 @@ extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
 extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 
 /*
- * Redefine xtvec structure so that we could move helper functions out of
- * the define
- */
-struct xtvec {
-	__kernel_off_t xtv_off;		/* must be off_t */
-	__kernel_size_t xtv_len;	/* must be size_t */
-};
-
-/*
  * orangefs data structures
  */
 struct orangefs_kernel_op_s {
@@ -224,39 +211,6 @@ struct orangefs_sb_info_s {
 	struct list_head list;
 };
 
-/*
- * structure that holds the state of any async I/O operation issued
- * through the VFS. Needed especially to handle cancellation requests
- * or even completion notification so that the VFS client-side daemon
- * can free up its vfs_request slots.
- */
-struct orangefs_kiocb_s {
-	/* the pointer to the task that initiated the AIO */
-	struct task_struct *tsk;
-
-	/* pointer to the kiocb that kicked this operation */
-	struct kiocb *kiocb;
-
-	/* buffer index that was used for the I/O */
-	struct orangefs_bufmap *bufmap;
-	int buffer_index;
-
-	/* orangefs kernel operation type */
-	struct orangefs_kernel_op_s *op;
-
-	/* set to indicate the type of the operation */
-	int rw;
-
-	/* file offset */
-	loff_t offset;
-
-	/* and the count in bytes */
-	size_t bytes_to_be_copied;
-
-	ssize_t bytes_copied;
-	int needs_cleanup;
-};
-
 struct orangefs_stats {
 	unsigned long cache_hits;
 	unsigned long cache_misses;
@@ -305,21 +259,6 @@ static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
 	return &(ORANGEFS_I(inode)->refn.khandle);
 }
 
-static inline ino_t get_ino_from_khandle(struct inode *inode)
-{
-	struct orangefs_khandle *khandle;
-	ino_t ino;
-
-	khandle = get_khandle_from_ino(inode);
-	ino = orangefs_khandle_to_ino(khandle);
-	return ino;
-}
-
-static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry)
-{
-	return get_ino_from_khandle(dentry->d_parent->d_inode);
-}
-
 static inline int is_root_handle(struct inode *inode)
 {
 	gossip_debug(GOSSIP_DCACHE_DEBUG,
@@ -391,7 +330,6 @@ void fsid_key_table_finalize(void);
 /*
  * defined in inode.c
  */
-__u32 convert_to_orangefs_mask(unsigned long lite_mask);
 struct inode *orangefs_new_inode(struct super_block *sb,
 			      struct inode *dir,
 			      int mode,
@@ -410,17 +348,6 @@ int orangefs_update_time(struct inode *, struct timespec *, int);
 /*
  * defined in xattr.c
  */
-int orangefs_setxattr(struct dentry *dentry,
-		   const char *name,
-		   const void *value,
-		   size_t size,
-		   int flags);
-
-ssize_t orangefs_getxattr(struct dentry *dentry,
-		       const char *name,
-		       void *buffer,
-		       size_t size);
-
 ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 
 /*
@@ -467,8 +394,6 @@ int orangefs_inode_check_changed(struct inode *inode);
 
 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
 
-int orangefs_unmount_sb(struct super_block *sb);
-
 bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
 
 int orangefs_normalize_to_errno(__s32 error_code);
@@ -487,16 +412,11 @@ extern struct list_head *orangefs_htable_ops_in_progress;
 extern spinlock_t orangefs_htable_ops_in_progress_lock;
 extern int hash_table_size;
 
-extern const struct address_space_operations orangefs_address_operations;
-extern const struct inode_operations orangefs_file_inode_operations;
 extern const struct file_operations orangefs_file_operations;
 extern const struct inode_operations orangefs_symlink_inode_operations;
 extern const struct inode_operations orangefs_dir_inode_operations;
 extern const struct file_operations orangefs_dir_operations;
 extern const struct dentry_operations orangefs_dentry_operations;
-extern const struct file_operations orangefs_devreq_file_operations;
-
-extern wait_queue_head_t orangefs_bufmap_init_waitq;
 
 /*
  * misc convenience macros
diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h
index dc6e3e6269c3..61ee8d64c842 100644
--- a/fs/orangefs/protocol.h
+++ b/fs/orangefs/protocol.h
@@ -5,11 +5,6 @@
 #include <linux/slab.h>
 #include <linux/ioctl.h>
 
-/* pvfs2-config.h ***********************************************************/
-#define ORANGEFS_VERSION_MAJOR 2
-#define ORANGEFS_VERSION_MINOR 9
-#define ORANGEFS_VERSION_SUB 0
-
 /* khandle stuff  ***********************************************************/
 
 /*
@@ -70,16 +65,6 @@ static inline void ORANGEFS_khandle_from(struct orangefs_khandle *kh,
 }
 
 /* pvfs2-types.h ************************************************************/
-typedef __u32 ORANGEFS_uid;
-typedef __u32 ORANGEFS_gid;
-typedef __s32 ORANGEFS_fs_id;
-typedef __u32 ORANGEFS_permissions;
-typedef __u64 ORANGEFS_time;
-typedef __s64 ORANGEFS_size;
-typedef __u64 ORANGEFS_flags;
-typedef __u64 ORANGEFS_ds_position;
-typedef __s32 ORANGEFS_error;
-typedef __s64 ORANGEFS_offset;
 
 #define ORANGEFS_SUPER_MAGIC 0x20030528
 
@@ -145,7 +130,6 @@ typedef __s64 ORANGEFS_offset;
 #define ORANGEFS_APPEND_FL    FS_APPEND_FL
 #define ORANGEFS_NOATIME_FL   FS_NOATIME_FL
 #define ORANGEFS_MIRROR_FL    0x01000000ULL
-#define ORANGEFS_O_EXECUTE (1 << 0)
 #define ORANGEFS_FS_ID_NULL       ((__s32)0)
 #define ORANGEFS_ATTR_SYS_UID                   (1 << 0)
 #define ORANGEFS_ATTR_SYS_GID                   (1 << 1)
@@ -229,35 +213,6 @@ enum orangefs_ds_type {
 	ORANGEFS_TYPE_INTERNAL = (1 << 5)	/* for the server's private use */
 };
 
-/*
- * ORANGEFS_certificate simply stores a buffer with the buffer size.
- * The buffer can be converted to an OpenSSL X509 struct for use.
- */
-struct ORANGEFS_certificate {
-	__u32 buf_size;
-	unsigned char *buf;
-};
-
-/*
- * A credential identifies a user and is signed by the client/user
- * private key.
- */
-struct ORANGEFS_credential {
-	__u32 userid;	/* user id */
-	__u32 num_groups;	/* length of group_array */
-	__u32 *group_array;	/* groups for which the user is a member */
-	char *issuer;		/* alias of the issuing server */
-	__u64 timeout;	/* seconds after epoch to time out */
-	__u32 sig_size;	/* length of the signature in bytes */
-	unsigned char *signature;	/* digital signature */
-	struct ORANGEFS_certificate certificate;	/* user certificate buffer */
-};
-#define extra_size_ORANGEFS_credential (ORANGEFS_REQ_LIMIT_GROUPS	*	\
-				    sizeof(__u32)		+	\
-				    ORANGEFS_REQ_LIMIT_ISSUER	+	\
-				    ORANGEFS_REQ_LIMIT_SIGNATURE	+	\
-				    extra_size_ORANGEFS_certificate)
-
 /* This structure is used by the VFS-client interaction alone */
 struct ORANGEFS_keyval_pair {
 	char key[ORANGEFS_MAX_XATTR_NAMELEN];
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index ce6ff5a0a6e4..17032631c5cf 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -86,3 +86,20 @@ config OVERLAY_FS_NFS_EXPORT
 	  case basis with the "nfs_export=on" mount option.
 
 	  Say N unless you fully understand the consequences.
+
+config OVERLAY_FS_XINO_AUTO
+	bool "Overlayfs: auto enable inode number mapping"
+	default n
+	depends on OVERLAY_FS
+	help
+	  If this config option is enabled then overlay filesystems will use
+	  unused high bits in undelying filesystem inode numbers to map all
+	  inodes to a unified address space.  The mapped 64bit inode numbers
+	  might not be compatible with applications that expect 32bit inodes.
+
+	  If compatibility with applications that expect 32bit inodes is not an
+	  issue, then it is safe and recommended to say Y here.
+
+	  For more information, see Documentation/filesystems/overlayfs.txt
+
+	  If unsure, say N.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index d855f508fa20..8bede0742619 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -232,7 +232,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper)
+struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
 {
 	struct ovl_fh *fh;
 	int fh_type, fh_len, dwords;
@@ -300,7 +300,7 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	 * up and a pure upper inode.
 	 */
 	if (ovl_can_decode_fh(lower->d_sb)) {
-		fh = ovl_encode_fh(lower, false);
+		fh = ovl_encode_real_fh(lower, false);
 		if (IS_ERR(fh))
 			return PTR_ERR(fh);
 	}
@@ -321,7 +321,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
 	const struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_fh(upper, true);
+	fh = ovl_encode_real_fh(upper, true);
 	if (IS_ERR(fh))
 		return PTR_ERR(fh);
 
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 87bd4148f4fb..425a94672300 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -228,8 +228,8 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
 		goto fail;
 
 	/* Encode an upper or lower file handle */
-	fh = ovl_encode_fh(enc_lower ? ovl_dentry_lower(dentry) :
-				       ovl_dentry_upper(dentry), !enc_lower);
+	fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
+				ovl_dentry_upper(dentry), !enc_lower);
 	err = PTR_ERR(fh);
 	if (IS_ERR(fh))
 		goto fail;
@@ -267,8 +267,8 @@ static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
 	return OVL_FILEID;
 }
 
-static int ovl_encode_inode_fh(struct inode *inode, u32 *fid, int *max_len,
-			       struct inode *parent)
+static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
+			 struct inode *parent)
 {
 	struct dentry *dentry;
 	int type;
@@ -305,15 +305,12 @@ static struct dentry *ovl_obtain_alias(struct super_block *sb,
 	if (d_is_dir(upper ?: lower))
 		return ERR_PTR(-EIO);
 
-	inode = ovl_get_inode(sb, dget(upper), lower, index, !!lower);
+	inode = ovl_get_inode(sb, dget(upper), lowerpath, index, !!lower);
 	if (IS_ERR(inode)) {
 		dput(upper);
 		return ERR_CAST(inode);
 	}
 
-	if (index)
-		ovl_set_flag(OVL_INDEX, inode);
-
 	dentry = d_find_any_alias(inode);
 	if (!dentry) {
 		dentry = d_alloc_anon(inode->i_sb);
@@ -685,7 +682,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
 	if (!ofs->upper_mnt)
 		return ERR_PTR(-EACCES);
 
-	upper = ovl_decode_fh(fh, ofs->upper_mnt);
+	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
 	if (IS_ERR_OR_NULL(upper))
 		return upper;
 
@@ -703,25 +700,39 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 	struct ovl_path *stack = &origin;
 	struct dentry *dentry = NULL;
 	struct dentry *index = NULL;
-	struct inode *inode = NULL;
-	bool is_deleted = false;
+	struct inode *inode;
 	int err;
 
-	/* First lookup indexed upper by fh */
+	/* First lookup overlay inode in inode cache by origin fh */
+	err = ovl_check_origin_fh(ofs, fh, false, NULL, &stack);
+	if (err)
+		return ERR_PTR(err);
+
+	if (!d_is_dir(origin.dentry) ||
+	    !(origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+		inode = ovl_lookup_inode(sb, origin.dentry, false);
+		err = PTR_ERR(inode);
+		if (IS_ERR(inode))
+			goto out_err;
+		if (inode) {
+			dentry = d_find_any_alias(inode);
+			iput(inode);
+			if (dentry)
+				goto out;
+		}
+	}
+
+	/* Then lookup indexed upper/whiteout by origin fh */
 	if (ofs->indexdir) {
 		index = ovl_get_index_fh(ofs, fh);
 		err = PTR_ERR(index);
 		if (IS_ERR(index)) {
-			if (err != -ESTALE)
-				return ERR_PTR(err);
-
-			/* Found a whiteout index - treat as deleted inode */
-			is_deleted = true;
 			index = NULL;
+			goto out_err;
 		}
 	}
 
-	/* Then try to get upper dir by index */
+	/* Then try to get a connected upper dir by index */
 	if (index && d_is_dir(index)) {
 		struct dentry *upper = ovl_index_upper(ofs, index);
 
@@ -734,24 +745,19 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 		goto out;
 	}
 
-	/* Then lookup origin by fh */
-	err = ovl_check_origin_fh(ofs, fh, NULL, &stack);
-	if (err) {
-		goto out_err;
-	} else if (index) {
-		err = ovl_verify_origin(index, origin.dentry, false);
+	/* Otherwise, get a connected non-upper dir or disconnected non-dir */
+	if (d_is_dir(origin.dentry) &&
+	    (origin.dentry->d_flags & DCACHE_DISCONNECTED)) {
+		dput(origin.dentry);
+		origin.dentry = NULL;
+		err = ovl_check_origin_fh(ofs, fh, true, NULL, &stack);
 		if (err)
 			goto out_err;
-	} else if (is_deleted) {
-		/* Lookup deleted non-dir by origin inode */
-		if (!d_is_dir(origin.dentry))
-			inode = ovl_lookup_inode(sb, origin.dentry, false);
-		err = -ESTALE;
-		if (!inode || atomic_read(&inode->i_count) == 1)
+	}
+	if (index) {
+		err = ovl_verify_origin(index, origin.dentry, false);
+		if (err)
 			goto out_err;
-
-		/* Deleted but still open? */
-		index = dget(ovl_i_dentry_upper(inode));
 	}
 
 	dentry = ovl_get_dentry(sb, NULL, &origin, index);
@@ -759,7 +765,6 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 out:
 	dput(origin.dentry);
 	dput(index);
-	iput(inode);
 	return dentry;
 
 out_err:
@@ -829,7 +834,7 @@ static struct dentry *ovl_get_parent(struct dentry *dentry)
 }
 
 const struct export_operations ovl_export_operations = {
-	.encode_fh	= ovl_encode_inode_fh,
+	.encode_fh	= ovl_encode_fh,
 	.fh_to_dentry	= ovl_fh_to_dentry,
 	.fh_to_parent	= ovl_fh_to_parent,
 	.get_name	= ovl_get_name,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 3b1bd469accd..6e3815fb006b 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -16,13 +16,6 @@
 #include "overlayfs.h"
 
 
-static dev_t ovl_get_pseudo_dev(struct dentry *dentry)
-{
-	struct ovl_entry *oe = dentry->d_fsdata;
-
-	return oe->lowerstack[0].layer->pseudo_dev;
-}
-
 int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	int err;
@@ -66,6 +59,69 @@ out:
 	return err;
 }
 
+static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat,
+			   struct ovl_layer *lower_layer)
+{
+	bool samefs = ovl_same_sb(dentry->d_sb);
+	unsigned int xinobits = ovl_xino_bits(dentry->d_sb);
+
+	if (samefs) {
+		/*
+		 * When all layers are on the same fs, all real inode
+		 * number are unique, so we use the overlay st_dev,
+		 * which is friendly to du -x.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		return 0;
+	} else if (xinobits) {
+		unsigned int shift = 64 - xinobits;
+		/*
+		 * All inode numbers of underlying fs should not be using the
+		 * high xinobits, so we use high xinobits to partition the
+		 * overlay st_ino address space. The high bits holds the fsid
+		 * (upper fsid is 0). This way overlay inode numbers are unique
+		 * and all inodes use overlay st_dev. Inode numbers are also
+		 * persistent for a given layer configuration.
+		 */
+		if (stat->ino >> shift) {
+			pr_warn_ratelimited("overlayfs: inode number too big (%pd2, ino=%llu, xinobits=%d)\n",
+					    dentry, stat->ino, xinobits);
+		} else {
+			if (lower_layer)
+				stat->ino |= ((u64)lower_layer->fsid) << shift;
+
+			stat->dev = dentry->d_sb->s_dev;
+			return 0;
+		}
+	}
+
+	/* The inode could not be mapped to a unified st_ino address space */
+	if (S_ISDIR(dentry->d_inode->i_mode)) {
+		/*
+		 * Always use the overlay st_dev for directories, so 'find
+		 * -xdev' will scan the entire overlay mount and won't cross the
+		 * overlay mount boundaries.
+		 *
+		 * If not all layers are on the same fs the pair {real st_ino;
+		 * overlay st_dev} is not unique, so use the non persistent
+		 * overlay st_ino for directories.
+		 */
+		stat->dev = dentry->d_sb->s_dev;
+		stat->ino = dentry->d_inode->i_ino;
+	} else if (lower_layer && lower_layer->fsid) {
+		/*
+		 * For non-samefs setup, if we cannot map all layers st_ino
+		 * to a unified address space, we need to make sure that st_dev
+		 * is unique per lower fs. Upper layer uses real st_dev and
+		 * lower layers use the unique anonymous bdev assigned to the
+		 * lower fs.
+		 */
+		stat->dev = lower_layer->fs->pseudo_dev;
+	}
+
+	return 0;
+}
+
 int ovl_getattr(const struct path *path, struct kstat *stat,
 		u32 request_mask, unsigned int flags)
 {
@@ -75,6 +131,7 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 	const struct cred *old_cred;
 	bool is_dir = S_ISDIR(dentry->d_inode->i_mode);
 	bool samefs = ovl_same_sb(dentry->d_sb);
+	struct ovl_layer *lower_layer = NULL;
 	int err;
 
 	type = ovl_path_real(dentry, &realpath);
@@ -84,14 +141,18 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 		goto out;
 
 	/*
-	 * For non-dir or same fs, we use st_ino of the copy up origin, if we
-	 * know it. This guaranties constant st_dev/st_ino across copy up.
+	 * For non-dir or same fs, we use st_ino of the copy up origin.
+	 * This guaranties constant st_dev/st_ino across copy up.
+	 * With xino feature and non-samefs, we use st_ino of the copy up
+	 * origin masked with high bits that represent the layer id.
 	 *
-	 * If filesystem supports NFS export ops, this also guaranties
+	 * If lower filesystem supports NFS file handles, this also guaranties
 	 * persistent st_ino across mount cycle.
 	 */
-	if (!is_dir || samefs) {
-		if (OVL_TYPE_ORIGIN(type)) {
+	if (!is_dir || samefs || ovl_xino_bits(dentry->d_sb)) {
+		if (!OVL_TYPE_UPPER(type)) {
+			lower_layer = ovl_layer_lower(dentry);
+		} else if (OVL_TYPE_ORIGIN(type)) {
 			struct kstat lowerstat;
 			u32 lowermask = STATX_INO | (!is_dir ? STATX_NLINK : 0);
 
@@ -118,43 +179,17 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
 			 */
 			if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
 			    (!ovl_verify_lower(dentry->d_sb) &&
-			     (is_dir || lowerstat.nlink == 1)))
+			     (is_dir || lowerstat.nlink == 1))) {
 				stat->ino = lowerstat.ino;
-
-			if (samefs)
-				WARN_ON_ONCE(stat->dev != lowerstat.dev);
-			else
-				stat->dev = ovl_get_pseudo_dev(dentry);
-		}
-		if (samefs) {
-			/*
-			 * When all layers are on the same fs, all real inode
-			 * number are unique, so we use the overlay st_dev,
-			 * which is friendly to du -x.
-			 */
-			stat->dev = dentry->d_sb->s_dev;
-		} else if (!OVL_TYPE_UPPER(type)) {
-			/*
-			 * For non-samefs setup, to make sure that st_dev/st_ino
-			 * pair is unique across the system, we use a unique
-			 * anonymous st_dev for lower layer inode.
-			 */
-			stat->dev = ovl_get_pseudo_dev(dentry);
+				lower_layer = ovl_layer_lower(dentry);
+			}
 		}
-	} else {
-		/*
-		 * Always use the overlay st_dev for directories, so 'find
-		 * -xdev' will scan the entire overlay mount and won't cross the
-		 * overlay mount boundaries.
-		 *
-		 * If not all layers are on the same fs the pair {real st_ino;
-		 * overlay st_dev} is not unique, so use the non persistent
-		 * overlay st_ino for directories.
-		 */
-		stat->dev = dentry->d_sb->s_dev;
-		stat->ino = dentry->d_inode->i_ino;
 	}
 
+	err = ovl_map_dev_ino(dentry, stat, lower_layer);
+	if (err)
+		goto out;
+
 	/*
 	 * It's probably not worth it to count subdirs to get the
 	 * correct link count.  nlink=1 seems to pacify 'find' and
@@ -383,24 +418,18 @@ int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags)
 
 int ovl_update_time(struct inode *inode, struct timespec *ts, int flags)
 {
-	struct dentry *alias;
-	struct path upperpath;
-
-	if (!(flags & S_ATIME))
-		return 0;
-
-	alias = d_find_any_alias(inode);
-	if (!alias)
-		return 0;
-
-	ovl_path_upper(alias, &upperpath);
-	if (upperpath.dentry) {
-		touch_atime(&upperpath);
-		inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+	if (flags & S_ATIME) {
+		struct ovl_fs *ofs = inode->i_sb->s_fs_info;
+		struct path upperpath = {
+			.mnt = ofs->upper_mnt,
+			.dentry = ovl_upperdentry_dereference(OVL_I(inode)),
+		};
+
+		if (upperpath.dentry) {
+			touch_atime(&upperpath);
+			inode->i_atime = d_inode(upperpath.dentry)->i_atime;
+		}
 	}
-
-	dput(alias);
-
 	return 0;
 }
 
@@ -459,9 +488,27 @@ static inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode)
 #endif
 }
 
-static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
+static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
+			   unsigned long ino, int fsid)
 {
-	inode->i_ino = get_next_ino();
+	int xinobits = ovl_xino_bits(inode->i_sb);
+
+	/*
+	 * When NFS export is enabled and d_ino is consistent with st_ino
+	 * (samefs or i_ino has enough bits to encode layer), set the same
+	 * value used for d_ino to i_ino, because nfsd readdirplus compares
+	 * d_ino values to i_ino values of child entries. When called from
+	 * ovl_new_inode(), ino arg is 0, so i_ino will be updated to real
+	 * upper inode i_ino on ovl_inode_init() or ovl_inode_update().
+	 */
+	if (inode->i_sb->s_export_op &&
+	    (ovl_same_sb(inode->i_sb) || xinobits)) {
+		inode->i_ino = ino;
+		if (xinobits && fsid && !(ino >> (64 - xinobits)))
+			inode->i_ino |= (unsigned long)fsid << (64 - xinobits);
+	} else {
+		inode->i_ino = get_next_ino();
+	}
 	inode->i_mode = mode;
 	inode->i_flags |= S_NOCMTIME;
 #ifdef CONFIG_FS_POSIX_ACL
@@ -597,7 +644,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev)
 
 	inode = new_inode(sb);
 	if (inode)
-		ovl_fill_inode(inode, mode, rdev);
+		ovl_fill_inode(inode, mode, rdev, 0, 0);
 
 	return inode;
 }
@@ -703,13 +750,16 @@ static bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper,
 }
 
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
-			    struct dentry *lowerdentry, struct dentry *index,
+			    struct ovl_path *lowerpath, struct dentry *index,
 			    unsigned int numlower)
 {
 	struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL;
 	struct inode *inode;
+	struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL;
 	bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, index);
+	int fsid = bylower ? lowerpath->layer->fsid : 0;
 	bool is_dir;
+	unsigned long ino = 0;
 
 	if (!realinode)
 		realinode = d_inode(lowerdentry);
@@ -748,18 +798,22 @@ struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 		if (!is_dir)
 			nlink = ovl_get_nlink(lowerdentry, upperdentry, nlink);
 		set_nlink(inode, nlink);
+		ino = key->i_ino;
 	} else {
 		/* Lower hardlink that will be broken on copy up */
 		inode = new_inode(sb);
 		if (!inode)
 			goto out_nomem;
 	}
-	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev);
+	ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev, ino, fsid);
 	ovl_inode_init(inode, upperdentry, lowerdentry);
 
 	if (upperdentry && ovl_is_impuredir(upperdentry))
 		ovl_set_flag(OVL_IMPURE, inode);
 
+	if (index)
+		ovl_set_flag(OVL_INDEX, inode);
+
 	/* Check for non-merge dir that may have whiteouts */
 	if (is_dir) {
 		if (((upperdentry && lowerdentry) || numlower > 1) ||
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 70fcfcc684cc..2dba29eadde6 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -56,6 +56,15 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
 			if (s == next)
 				goto invalid;
 		}
+		/*
+		 * One of the ancestor path elements in an absolute path
+		 * lookup in ovl_lookup_layer() could have been opaque and
+		 * that will stop further lookup in lower layers (d->stop=true)
+		 * But we have found an absolute redirect in decendant path
+		 * element and that should force continue lookup in lower
+		 * layers (reset d->stop).
+		 */
+		d->stop = false;
 	} else {
 		if (strchr(buf, '/') != NULL)
 			goto invalid;
@@ -171,7 +180,8 @@ invalid:
 	goto out;
 }
 
-struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
+struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
+				  bool connected)
 {
 	struct dentry *real;
 	int bytes;
@@ -186,7 +196,7 @@ struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt)
 	bytes = (fh->len - offsetof(struct ovl_fh, fid));
 	real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
 				  bytes >> 2, (int)fh->type,
-				  ovl_acceptable, mnt);
+				  connected ? ovl_acceptable : NULL, mnt);
 	if (IS_ERR(real)) {
 		/*
 		 * Treat stale file handle to lower file as "origin unknown".
@@ -220,6 +230,7 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 {
 	struct dentry *this;
 	int err;
+	bool last_element = !post[0];
 
 	this = lookup_one_len_unlocked(name, base, namelen);
 	if (IS_ERR(this)) {
@@ -245,11 +256,23 @@ static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
 		d->stop = true;
 		if (d->is_dir)
 			goto put_and_out;
+
+		/*
+		 * NB: handle failure to lookup non-last element when non-dir
+		 * redirects become possible
+		 */
+		WARN_ON(!last_element);
 		goto out;
 	}
-	d->is_dir = true;
-	if (!d->last && ovl_is_opaquedir(this)) {
-		d->stop = d->opaque = true;
+	if (last_element)
+		d->is_dir = true;
+	if (d->last)
+		goto out;
+
+	if (ovl_is_opaquedir(this)) {
+		d->stop = true;
+		if (last_element)
+			d->opaque = true;
 		goto out;
 	}
 	err = ovl_check_redirect(this, d, prelen, post);
@@ -310,14 +333,15 @@ static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
 }
 
 
-int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 			struct dentry *upperdentry, struct ovl_path **stackp)
 {
 	struct dentry *origin = NULL;
 	int i;
 
 	for (i = 0; i < ofs->numlower; i++) {
-		origin = ovl_decode_fh(fh, ofs->lower_layers[i].mnt);
+		origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
+					    connected);
 		if (origin)
 			break;
 	}
@@ -361,7 +385,7 @@ static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
 	if (IS_ERR_OR_NULL(fh))
 		return PTR_ERR(fh);
 
-	err = ovl_check_origin_fh(ofs, fh, upperdentry, stackp);
+	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
 	kfree(fh);
 
 	if (err) {
@@ -415,7 +439,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 	struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_fh(real, is_upper);
+	fh = ovl_encode_real_fh(real, is_upper);
 	err = PTR_ERR(fh);
 	if (IS_ERR(fh))
 		goto fail;
@@ -451,7 +475,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 	if (IS_ERR_OR_NULL(fh))
 		return ERR_CAST(fh);
 
-	upper = ovl_decode_fh(fh, ofs->upper_mnt);
+	upper = ovl_decode_real_fh(fh, ofs->upper_mnt, true);
 	kfree(fh);
 
 	if (IS_ERR_OR_NULL(upper))
@@ -558,7 +582,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
 
 	/* Check if non-dir index is orphan and don't warn before cleaning it */
 	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
-		err = ovl_check_origin_fh(ofs, fh, index, &stack);
+		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
 		if (err)
 			goto fail;
 
@@ -619,7 +643,7 @@ int ovl_get_index_name(struct dentry *origin, struct qstr *name)
 	struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_fh(origin, false);
+	fh = ovl_encode_real_fh(origin, false);
 	if (IS_ERR(fh))
 		return PTR_ERR(fh);
 
@@ -815,7 +839,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		.is_dir = false,
 		.opaque = false,
 		.stop = false,
-		.last = !poe->numlower,
+		.last = ofs->config.redirect_follow ? false : !poe->numlower,
 		.redirect = NULL,
 	};
 
@@ -873,7 +897,11 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	for (i = 0; !d.stop && i < poe->numlower; i++) {
 		struct ovl_path lower = poe->lowerstack[i];
 
-		d.last = i == poe->numlower - 1;
+		if (!ofs->config.redirect_follow)
+			d.last = i == poe->numlower - 1;
+		else
+			d.last = lower.layer->idx == roe->numlower;
+
 		err = ovl_lookup_layer(lower.dentry, &d, &this);
 		if (err)
 			goto out_put;
@@ -976,17 +1004,18 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 		upperdentry = dget(index);
 
 	if (upperdentry || ctr) {
-		if (ctr)
-			origin = stack[0].dentry;
-		inode = ovl_get_inode(dentry->d_sb, upperdentry, origin, index,
+		inode = ovl_get_inode(dentry->d_sb, upperdentry, stack, index,
 				      ctr);
 		err = PTR_ERR(inode);
 		if (IS_ERR(inode))
 			goto out_free_oe;
 
+		/*
+		 * NB: handle redirected hard links when non-dir redirects
+		 * become possible
+		 */
+		WARN_ON(OVL_I(inode)->redirect);
 		OVL_I(inode)->redirect = upperredirect;
-		if (index)
-			ovl_set_flag(OVL_INDEX, inode);
 	}
 
 	revert_creds(old_cred);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 225ff1171147..e0b7de799f6b 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -202,7 +202,7 @@ void ovl_drop_write(struct dentry *dentry);
 struct dentry *ovl_workdir(struct dentry *dentry);
 const struct cred *ovl_override_creds(struct super_block *sb);
 struct super_block *ovl_same_sb(struct super_block *sb);
-bool ovl_can_decode_fh(struct super_block *sb);
+int ovl_can_decode_fh(struct super_block *sb);
 struct dentry *ovl_indexdir(struct super_block *sb);
 bool ovl_index_all(struct super_block *sb);
 bool ovl_verify_lower(struct super_block *sb);
@@ -215,6 +215,7 @@ void ovl_path_lower(struct dentry *dentry, struct path *path);
 enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
 struct dentry *ovl_dentry_upper(struct dentry *dentry);
 struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct ovl_layer *ovl_layer_lower(struct dentry *dentry);
 struct dentry *ovl_dentry_real(struct dentry *dentry);
 struct dentry *ovl_i_dentry_upper(struct inode *inode);
 struct inode *ovl_inode_upper(struct inode *inode);
@@ -263,11 +264,19 @@ static inline bool ovl_is_impuredir(struct dentry *dentry)
 	return ovl_check_dir_xattr(dentry, OVL_XATTR_IMPURE);
 }
 
+static inline unsigned int ovl_xino_bits(struct super_block *sb)
+{
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	return ofs->xino_bits;
+}
+
 
 /* namei.c */
 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
-struct dentry *ovl_decode_fh(struct ovl_fh *fh, struct vfsmount *mnt);
-int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
+				  bool connected);
+int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 			struct dentry *upperdentry, struct ovl_path **stackp);
 int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 		      struct dentry *real, bool is_upper, bool set);
@@ -329,7 +338,7 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
 struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real,
 			       bool is_upper);
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
-			    struct dentry *lowerdentry, struct dentry *index,
+			    struct ovl_path *lowerpath, struct dentry *index,
 			    unsigned int numlower);
 static inline void ovl_copyattr(struct inode *from, struct inode *to)
 {
@@ -361,7 +370,7 @@ int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_flags(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct dentry *old, struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_fh(struct dentry *real, bool is_upper);
+struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
 int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 		   struct dentry *upper);
 
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index bfef6edcc111..41655a7d6894 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -18,13 +18,21 @@ struct ovl_config {
 	const char *redirect_mode;
 	bool index;
 	bool nfs_export;
+	int xino;
+};
+
+struct ovl_sb {
+	struct super_block *sb;
+	dev_t pseudo_dev;
 };
 
 struct ovl_layer {
 	struct vfsmount *mnt;
-	dev_t pseudo_dev;
-	/* Index of this layer in fs root (upper == 0) */
+	struct ovl_sb *fs;
+	/* Index of this layer in fs root (upper idx == 0) */
 	int idx;
+	/* One fsid per unique underlying sb (upper fsid == 0) */
+	int fsid;
 };
 
 struct ovl_path {
@@ -35,8 +43,11 @@ struct ovl_path {
 /* private information held for overlayfs's superblock */
 struct ovl_fs {
 	struct vfsmount *upper_mnt;
-	unsigned numlower;
+	unsigned int numlower;
+	/* Number of unique lower sb that differ from upper sb */
+	unsigned int numlowerfs;
 	struct ovl_layer *lower_layers;
+	struct ovl_sb *lower_fs;
 	/* workbasedir is the path at workdir= mount option */
 	struct dentry *workbasedir;
 	/* workdir is the 'work' directory under workbasedir */
@@ -50,11 +61,11 @@ struct ovl_fs {
 	const struct cred *creator_cred;
 	bool tmpfile;
 	bool noxattr;
-	/* sb common to all layers */
-	struct super_block *same_sb;
 	/* Did we take the inuse lock? */
 	bool upperdir_locked;
 	bool workdir_locked;
+	/* Inode numbers in all layers do not use the high xino_bits */
+	unsigned int xino_bits;
 };
 
 /* private information held for every overlayfs dentry */
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index c11f5c0906c3..ef1fe42ff7bb 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -120,6 +120,10 @@ static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
 	if (!rdd->dentry)
 		return false;
 
+	/* Always recalc d_ino when remapping lower inode numbers */
+	if (ovl_xino_bits(rdd->dentry->d_sb))
+		return true;
+
 	/* Always recalc d_ino for parent */
 	if (strcmp(p->name, "..") == 0)
 		return true;
@@ -435,6 +439,19 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
 	return cache;
 }
 
+/* Map inode number to lower fs unique range */
+static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
+			       const char *name, int namelen)
+{
+	if (ino >> (64 - xinobits)) {
+		pr_warn_ratelimited("overlayfs: d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
+				    namelen, name, ino, xinobits);
+		return ino;
+	}
+
+	return ino | ((u64)fsid) << (64 - xinobits);
+}
+
 /*
  * Set d_ino for upper entries. Non-upper entries should always report
  * the uppermost real inode ino and should not call this function.
@@ -452,9 +469,10 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
 	struct dentry *this = NULL;
 	enum ovl_path_type type;
 	u64 ino = p->real_ino;
+	int xinobits = ovl_xino_bits(dir->d_sb);
 	int err = 0;
 
-	if (!ovl_same_sb(dir->d_sb))
+	if (!ovl_same_sb(dir->d_sb) && !xinobits)
 		goto out;
 
 	if (p->name[0] == '.') {
@@ -491,6 +509,10 @@ get:
 
 		WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
 		ino = stat.ino;
+	} else if (xinobits && !OVL_TYPE_UPPER(type)) {
+		ino = ovl_remap_lower_ino(ino, xinobits,
+					  ovl_layer_lower(this)->fsid,
+					  p->name, p->len);
 	}
 
 out:
@@ -618,6 +640,8 @@ struct ovl_readdir_translate {
 	struct ovl_dir_cache *cache;
 	struct dir_context ctx;
 	u64 parent_ino;
+	int fsid;
+	int xinobits;
 };
 
 static int ovl_fill_real(struct dir_context *ctx, const char *name,
@@ -628,14 +652,17 @@ static int ovl_fill_real(struct dir_context *ctx, const char *name,
 		container_of(ctx, struct ovl_readdir_translate, ctx);
 	struct dir_context *orig_ctx = rdt->orig_ctx;
 
-	if (rdt->parent_ino && strcmp(name, "..") == 0)
+	if (rdt->parent_ino && strcmp(name, "..") == 0) {
 		ino = rdt->parent_ino;
-	else if (rdt->cache) {
+	} else if (rdt->cache) {
 		struct ovl_cache_entry *p;
 
 		p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
 		if (p)
 			ino = p->ino;
+	} else if (rdt->xinobits) {
+		ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
+					  name, namelen);
 	}
 
 	return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
@@ -646,11 +673,16 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
 	int err;
 	struct ovl_dir_file *od = file->private_data;
 	struct dentry *dir = file->f_path.dentry;
+	struct ovl_layer *lower_layer = ovl_layer_lower(dir);
 	struct ovl_readdir_translate rdt = {
 		.ctx.actor = ovl_fill_real,
 		.orig_ctx = ctx,
+		.xinobits = ovl_xino_bits(dir->d_sb),
 	};
 
+	if (rdt.xinobits && lower_layer)
+		rdt.fsid = lower_layer->fsid;
+
 	if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
 		struct kstat stat;
 		struct path statpath = file->f_path;
@@ -693,9 +725,10 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx)
 		 * dir is impure then need to adjust d_ino for copied up
 		 * entries.
 		 */
-		if (ovl_same_sb(dentry->d_sb) &&
-		    (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
-		     OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))) {
+		if (ovl_xino_bits(dentry->d_sb) ||
+		    (ovl_same_sb(dentry->d_sb) &&
+		     (ovl_test_flag(OVL_IMPURE, d_inode(dentry)) ||
+		      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
 			return ovl_iterate_real(file, ctx);
 		}
 		return iterate_dir(od->realfile, ctx);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7c24619ae7fc..e8551c97de51 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -17,6 +17,7 @@
 #include <linux/statfs.h>
 #include <linux/seq_file.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/exportfs.h>
 #include "overlayfs.h"
 
 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
@@ -50,6 +51,11 @@ module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
 MODULE_PARM_DESC(ovl_nfs_export_def,
 		 "Default to on or off for the NFS export feature");
 
+static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
+module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
+MODULE_PARM_DESC(ovl_xino_auto_def,
+		 "Auto enable xino feature");
+
 static void ovl_entry_stack_free(struct ovl_entry *oe)
 {
 	unsigned int i;
@@ -236,11 +242,12 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	if (ofs->upperdir_locked)
 		ovl_inuse_unlock(ofs->upper_mnt->mnt_root);
 	mntput(ofs->upper_mnt);
-	for (i = 0; i < ofs->numlower; i++) {
+	for (i = 0; i < ofs->numlower; i++)
 		mntput(ofs->lower_layers[i].mnt);
-		free_anon_bdev(ofs->lower_layers[i].pseudo_dev);
-	}
+	for (i = 0; i < ofs->numlowerfs; i++)
+		free_anon_bdev(ofs->lower_fs[i].pseudo_dev);
 	kfree(ofs->lower_layers);
+	kfree(ofs->lower_fs);
 
 	kfree(ofs->config.lowerdir);
 	kfree(ofs->config.upperdir);
@@ -325,6 +332,23 @@ static const char *ovl_redirect_mode_def(void)
 	return ovl_redirect_dir_def ? "on" : "off";
 }
 
+enum {
+	OVL_XINO_OFF,
+	OVL_XINO_AUTO,
+	OVL_XINO_ON,
+};
+
+static const char * const ovl_xino_str[] = {
+	"off",
+	"auto",
+	"on",
+};
+
+static inline int ovl_xino_def(void)
+{
+	return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
+}
+
 /**
  * ovl_show_options
  *
@@ -350,6 +374,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 	if (ofs->config.nfs_export != ovl_nfs_export_def)
 		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 						"on" : "off");
+	if (ofs->config.xino != ovl_xino_def())
+		seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
 	return 0;
 }
 
@@ -384,6 +410,9 @@ enum {
 	OPT_INDEX_OFF,
 	OPT_NFS_EXPORT_ON,
 	OPT_NFS_EXPORT_OFF,
+	OPT_XINO_ON,
+	OPT_XINO_OFF,
+	OPT_XINO_AUTO,
 	OPT_ERR,
 };
 
@@ -397,6 +426,9 @@ static const match_table_t ovl_tokens = {
 	{OPT_INDEX_OFF,			"index=off"},
 	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
 	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
+	{OPT_XINO_ON,			"xino=on"},
+	{OPT_XINO_OFF,			"xino=off"},
+	{OPT_XINO_AUTO,			"xino=auto"},
 	{OPT_ERR,			NULL}
 };
 
@@ -511,6 +543,18 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->nfs_export = false;
 			break;
 
+		case OPT_XINO_ON:
+			config->xino = OVL_XINO_ON;
+			break;
+
+		case OPT_XINO_OFF:
+			config->xino = OVL_XINO_OFF;
+			break;
+
+		case OPT_XINO_AUTO:
+			config->xino = OVL_XINO_AUTO;
+			break;
+
 		default:
 			pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
 			return -EINVAL;
@@ -700,6 +744,7 @@ static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
 static int ovl_lower_dir(const char *name, struct path *path,
 			 struct ovl_fs *ofs, int *stack_depth, bool *remote)
 {
+	int fh_type;
 	int err;
 
 	err = ovl_mount_dir_noesc(name, path);
@@ -719,15 +764,19 @@ static int ovl_lower_dir(const char *name, struct path *path,
 	 * The inodes index feature and NFS export need to encode and decode
 	 * file handles, so they require that all layers support them.
 	 */
+	fh_type = ovl_can_decode_fh(path->dentry->d_sb);
 	if ((ofs->config.nfs_export ||
-	     (ofs->config.index && ofs->config.upperdir)) &&
-	    !ovl_can_decode_fh(path->dentry->d_sb)) {
+	     (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
 		ofs->config.index = false;
 		ofs->config.nfs_export = false;
 		pr_warn("overlayfs: fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
 			name);
 	}
 
+	/* Check if lower fs has 32bit inode numbers */
+	if (fh_type != FILEID_INO32_GEN)
+		ofs->xino_bits = 0;
+
 	return 0;
 
 out_put:
@@ -951,6 +1000,7 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 {
 	struct vfsmount *mnt = ofs->upper_mnt;
 	struct dentry *temp;
+	int fh_type;
 	int err;
 
 	err = mnt_want_write(mnt);
@@ -1000,12 +1050,16 @@ static int ovl_make_workdir(struct ovl_fs *ofs, struct path *workpath)
 	}
 
 	/* Check if upper/work fs supports file handles */
-	if (ofs->config.index &&
-	    !ovl_can_decode_fh(ofs->workdir->d_sb)) {
+	fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
+	if (ofs->config.index && !fh_type) {
 		ofs->config.index = false;
 		pr_warn("overlayfs: upper fs does not support file handles, falling back to index=off.\n");
 	}
 
+	/* Check if upper fs has 32bit inode numbers */
+	if (fh_type != FILEID_INO32_GEN)
+		ofs->xino_bits = 0;
+
 	/* NFS export of r/w mount depends on index */
 	if (ofs->config.nfs_export && !ofs->config.index) {
 		pr_warn("overlayfs: NFS export requires \"index=on\", falling back to nfs_export=off.\n");
@@ -1108,6 +1162,35 @@ out:
 	return err;
 }
 
+/* Get a unique fsid for the layer */
+static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
+{
+	unsigned int i;
+	dev_t dev;
+	int err;
+
+	/* fsid 0 is reserved for upper fs even with non upper overlay */
+	if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
+		return 0;
+
+	for (i = 0; i < ofs->numlowerfs; i++) {
+		if (ofs->lower_fs[i].sb == sb)
+			return i + 1;
+	}
+
+	err = get_anon_bdev(&dev);
+	if (err) {
+		pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+		return err;
+	}
+
+	ofs->lower_fs[ofs->numlowerfs].sb = sb;
+	ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
+	ofs->numlowerfs++;
+
+	return ofs->numlowerfs;
+}
+
 static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
 				unsigned int numlower)
 {
@@ -1119,23 +1202,27 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
 				    GFP_KERNEL);
 	if (ofs->lower_layers == NULL)
 		goto out;
+
+	ofs->lower_fs = kcalloc(numlower, sizeof(struct ovl_sb),
+				GFP_KERNEL);
+	if (ofs->lower_fs == NULL)
+		goto out;
+
 	for (i = 0; i < numlower; i++) {
 		struct vfsmount *mnt;
-		dev_t dev;
+		int fsid;
 
-		err = get_anon_bdev(&dev);
-		if (err) {
-			pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
+		err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
+		if (err < 0)
 			goto out;
-		}
 
 		mnt = clone_private_mount(&stack[i]);
 		err = PTR_ERR(mnt);
 		if (IS_ERR(mnt)) {
 			pr_err("overlayfs: failed to clone lowerpath\n");
-			free_anon_bdev(dev);
 			goto out;
 		}
+
 		/*
 		 * Make lower layers R/O.  That way fchmod/fchown on lower file
 		 * will fail instead of modifying lower fs.
@@ -1143,16 +1230,41 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
 		mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
 
 		ofs->lower_layers[ofs->numlower].mnt = mnt;
-		ofs->lower_layers[ofs->numlower].pseudo_dev = dev;
 		ofs->lower_layers[ofs->numlower].idx = i + 1;
+		ofs->lower_layers[ofs->numlower].fsid = fsid;
+		if (fsid) {
+			ofs->lower_layers[ofs->numlower].fs =
+				&ofs->lower_fs[fsid - 1];
+		}
 		ofs->numlower++;
+	}
+
+	/*
+	 * When all layers on same fs, overlay can use real inode numbers.
+	 * With mount option "xino=on", mounter declares that there are enough
+	 * free high bits in underlying fs to hold the unique fsid.
+	 * If overlayfs does encounter underlying inodes using the high xino
+	 * bits reserved for fsid, it emits a warning and uses the original
+	 * inode number.
+	 */
+	if (!ofs->numlowerfs || (ofs->numlowerfs == 1 && !ofs->upper_mnt)) {
+		ofs->xino_bits = 0;
+		ofs->config.xino = OVL_XINO_OFF;
+	} else if (ofs->config.xino == OVL_XINO_ON && !ofs->xino_bits) {
+		/*
+		 * This is a roundup of number of bits needed for numlowerfs+1
+		 * (i.e. ilog2(numlowerfs+1 - 1) + 1). fsid 0 is reserved for
+		 * upper fs even with non upper overlay.
+		 */
+		BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 31);
+		ofs->xino_bits = ilog2(ofs->numlowerfs) + 1;
+	}
 
-		/* Check if all lower layers are on same sb */
-		if (i == 0)
-			ofs->same_sb = mnt->mnt_sb;
-		else if (ofs->same_sb != mnt->mnt_sb)
-			ofs->same_sb = NULL;
+	if (ofs->xino_bits) {
+		pr_info("overlayfs: \"xino\" feature enabled using %d upper inode bits.\n",
+			ofs->xino_bits);
 	}
+
 	err = 0;
 out:
 	return err;
@@ -1263,6 +1375,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	ofs->config.index = ovl_index_def;
 	ofs->config.nfs_export = ovl_nfs_export_def;
+	ofs->config.xino = ovl_xino_def();
 	err = ovl_parse_opt((char *) data, &ofs->config);
 	if (err)
 		goto out_err;
@@ -1276,6 +1389,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
 	sb->s_stack_depth = 0;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	/* Assume underlaying fs uses 32bit inodes unless proven otherwise */
+	if (ofs->config.xino != OVL_XINO_OFF)
+		ofs->xino_bits = BITS_PER_LONG - 32;
+
 	if (ofs->config.upperdir) {
 		if (!ofs->config.workdir) {
 			pr_err("overlayfs: missing 'workdir'\n");
@@ -1305,8 +1422,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	/* If the upper fs is nonexistent, we mark overlayfs r/o too */
 	if (!ofs->upper_mnt)
 		sb->s_flags |= SB_RDONLY;
-	else if (ofs->upper_mnt->mnt_sb != ofs->same_sb)
-		ofs->same_sb = NULL;
 
 	if (!(ovl_force_readonly(ofs)) && ofs->config.index) {
 		err = ovl_get_indexdir(ofs, oe, &upperpath);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 930784a26623..6f1078028c66 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -47,13 +47,29 @@ struct super_block *ovl_same_sb(struct super_block *sb)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
 
-	return ofs->same_sb;
+	if (!ofs->numlowerfs)
+		return ofs->upper_mnt->mnt_sb;
+	else if (ofs->numlowerfs == 1 && !ofs->upper_mnt)
+		return ofs->lower_fs[0].sb;
+	else
+		return NULL;
 }
 
-bool ovl_can_decode_fh(struct super_block *sb)
+/*
+ * Check if underlying fs supports file handles and try to determine encoding
+ * type, in order to deduce maximum inode number used by fs.
+ *
+ * Return 0 if file handles are not supported.
+ * Return 1 (FILEID_INO32_GEN) if fs uses the default 32bit inode encoding.
+ * Return -1 if fs uses a non default encoding with unknown inode size.
+ */
+int ovl_can_decode_fh(struct super_block *sb)
 {
-	return (sb->s_export_op && sb->s_export_op->fh_to_dentry &&
-		!uuid_is_null(&sb->s_uuid));
+	if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry ||
+	    uuid_is_null(&sb->s_uuid))
+		return 0;
+
+	return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
 }
 
 struct dentry *ovl_indexdir(struct super_block *sb)
@@ -172,6 +188,13 @@ struct dentry *ovl_dentry_lower(struct dentry *dentry)
 	return oe->numlower ? oe->lowerstack[0].dentry : NULL;
 }
 
+struct ovl_layer *ovl_layer_lower(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->numlower ? oe->lowerstack[0].layer : NULL;
+}
+
 struct dentry *ovl_dentry_real(struct dentry *dentry)
 {
 	return ovl_dentry_upper(dentry) ?: ovl_dentry_lower(dentry);
@@ -279,12 +302,16 @@ void ovl_dentry_set_redirect(struct dentry *dentry, const char *redirect)
 void ovl_inode_init(struct inode *inode, struct dentry *upperdentry,
 		    struct dentry *lowerdentry)
 {
+	struct inode *realinode = d_inode(upperdentry ?: lowerdentry);
+
 	if (upperdentry)
 		OVL_I(inode)->__upperdentry = upperdentry;
 	if (lowerdentry)
 		OVL_I(inode)->lower = igrab(d_inode(lowerdentry));
 
-	ovl_copyattr(d_inode(upperdentry ?: lowerdentry), inode);
+	ovl_copyattr(realinode, inode);
+	if (!inode->i_ino)
+		inode->i_ino = realinode->i_ino;
 }
 
 void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
@@ -299,6 +326,8 @@ void ovl_inode_update(struct inode *inode, struct dentry *upperdentry)
 	smp_wmb();
 	OVL_I(inode)->__upperdentry = upperdentry;
 	if (inode_unhashed(inode)) {
+		if (!inode->i_ino)
+			inode->i_ino = upperinode->i_ino;
 		inode->i_private = upperinode;
 		__insert_inode_hash(inode, (unsigned long) upperinode);
 	}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 598803576e4c..ae2c807fd719 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -141,25 +141,12 @@ static inline const char *get_task_state(struct task_struct *tsk)
 	return task_state_array[task_state_index(tsk)];
 }
 
-static inline int get_task_umask(struct task_struct *tsk)
-{
-	struct fs_struct *fs;
-	int umask = -ENOENT;
-
-	task_lock(tsk);
-	fs = tsk->fs;
-	if (fs)
-		umask = fs->umask;
-	task_unlock(tsk);
-	return umask;
-}
-
 static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *p)
 {
 	struct user_namespace *user_ns = seq_user_ns(m);
 	struct group_info *group_info;
-	int g, umask;
+	int g, umask = -1;
 	struct task_struct *tracer;
 	const struct cred *cred;
 	pid_t ppid, tpid = 0, tgid, ngid;
@@ -177,17 +164,18 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	ngid = task_numa_group_id(p);
 	cred = get_task_cred(p);
 
-	umask = get_task_umask(p);
-	if (umask >= 0)
-		seq_printf(m, "Umask:\t%#04o\n", umask);
-
 	task_lock(p);
+	if (p->fs)
+		umask = p->fs->umask;
 	if (p->files)
 		max_fds = files_fdtable(p->files)->max_fds;
 	task_unlock(p);
 	rcu_read_unlock();
 
-	seq_printf(m, "State:\t%s", get_task_state(p));
+	if (umask >= 0)
+		seq_printf(m, "Umask:\t%#04o\n", umask);
+	seq_puts(m, "State:\t");
+	seq_puts(m, get_task_state(p));
 
 	seq_put_decimal_ull(m, "\nTgid:\t", tgid);
 	seq_put_decimal_ull(m, "\nNgid:\t", ngid);
@@ -313,8 +301,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
 	seq_puts(m, header);
 	CAP_FOR_EACH_U32(__capi) {
-		seq_printf(m, "%08x",
-			   a->cap[CAP_LAST_U32 - __capi]);
+		seq_put_hex_ll(m, NULL,
+			   a->cap[CAP_LAST_U32 - __capi], 8);
 	}
 	seq_putc(m, '\n');
 }
@@ -368,7 +356,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
 
 static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
 {
-	seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state);
+	seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+	seq_putc(m, '\n');
 }
 
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -504,7 +493,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(task->real_start_time);
 
-	seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+	seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
+	seq_puts(m, " (");
+	seq_puts(m, tcomm);
+	seq_puts(m, ") ");
+	seq_putc(m, state);
 	seq_put_decimal_ll(m, " ", ppid);
 	seq_put_decimal_ll(m, " ", pgid);
 	seq_put_decimal_ll(m, " ", sid);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d53246863cfb..eafa39a3a88c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -388,14 +388,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long wchan;
 	char symname[KSYM_NAME_LEN];
 
-	wchan = get_wchan(task);
+	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+		goto print0;
 
-	if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
-			&& !lookup_symbol_name(wchan, symname))
-		seq_printf(m, "%s", symname);
-	else
-		seq_putc(m, '0');
+	wchan = get_wchan(task);
+	if (wchan && !lookup_symbol_name(wchan, symname)) {
+		seq_puts(m, symname);
+		return 0;
+	}
 
+print0:
+	seq_putc(m, '0');
 	return 0;
 }
 #endif /* CONFIG_KALLSYMS */
@@ -1910,6 +1913,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
 	unsigned long long sval, eval;
 	unsigned int len;
 
+	if (str[0] == '0' && str[1] != '-')
+		return -EINVAL;
 	len = _parse_integer(str, 16, &sval);
 	if (len & KSTRTOX_OVERFLOW)
 		return -EINVAL;
@@ -1921,6 +1926,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
 		return -EINVAL;
 	str++;
 
+	if (str[0] == '0' && str[1])
+		return -EINVAL;
 	len = _parse_integer(str, 16, &eval);
 	if (len & KSTRTOX_OVERFLOW)
 		return -EINVAL;
@@ -2204,6 +2211,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 		}
 	}
 	up_read(&mm->mmap_sem);
+	mmput(mm);
 
 	for (i = 0; i < nr_files; i++) {
 		char buf[4 * sizeof(long) + 2];	/* max: %lx-%lx\0 */
@@ -2221,7 +2229,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 	}
 	if (fa)
 		flex_array_free(fa);
-	mmput(mm);
 
 out_put_task:
 	put_task_struct(task);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 403cbb12a6e9..8233e7af9389 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -6,7 +6,8 @@
 
 static int cmdline_proc_show(struct seq_file *m, void *v)
 {
-	seq_printf(m, "%s\n", saved_command_line);
+	seq_puts(m, saved_command_line);
+	seq_putc(m, '\n');
 	return 0;
 }
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5d709fa8f3a2..2078e70e1595 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -8,12 +8,14 @@
  * Copyright (C) 1997 Theodore Ts'o
  */
 
+#include <linux/cache.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/namei.h>
 #include <linux/slab.h>
 #include <linux/printk.h>
 #include <linux/mount.h>
@@ -28,6 +30,17 @@
 
 static DEFINE_RWLOCK(proc_subdir_lock);
 
+struct kmem_cache *proc_dir_entry_cache __ro_after_init;
+
+void pde_free(struct proc_dir_entry *pde)
+{
+	if (S_ISLNK(pde->mode))
+		kfree(pde->data);
+	if (pde->name != pde->inline_name)
+		kfree(pde->name);
+	kmem_cache_free(proc_dir_entry_cache, pde);
+}
+
 static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len)
 {
 	if (len < de->namelen)
@@ -40,8 +53,8 @@ static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int
 
 static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
 {
-	return rb_entry_safe(rb_first_cached(&dir->subdir),
-			     struct proc_dir_entry, subdir_node);
+	return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
+			     subdir_node);
 }
 
 static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
@@ -54,7 +67,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
 					      const char *name,
 					      unsigned int len)
 {
-	struct rb_node *node = dir->subdir.rb_root.rb_node;
+	struct rb_node *node = dir->subdir.rb_node;
 
 	while (node) {
 		struct proc_dir_entry *de = rb_entry(node,
@@ -75,9 +88,8 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
 static bool pde_subdir_insert(struct proc_dir_entry *dir,
 			      struct proc_dir_entry *de)
 {
-	struct rb_root_cached *root = &dir->subdir;
-	struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
-	bool leftmost = true;
+	struct rb_root *root = &dir->subdir;
+	struct rb_node **new = &root->rb_node, *parent = NULL;
 
 	/* Figure out where to put new node */
 	while (*new) {
@@ -89,16 +101,15 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
 		parent = *new;
 		if (result < 0)
 			new = &(*new)->rb_left;
-		else if (result > 0) {
+		else if (result > 0)
 			new = &(*new)->rb_right;
-			leftmost = false;
-		} else
+		else
 			return false;
 	}
 
 	/* Add new node and rebalance tree. */
 	rb_link_node(&de->subdir_node, parent, new);
-	rb_insert_color_cached(&de->subdir_node, root, leftmost);
+	rb_insert_color(&de->subdir_node, root);
 	return true;
 }
 
@@ -207,6 +218,26 @@ void proc_free_inum(unsigned int inum)
 	ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
 }
 
+static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+	if (flags & LOOKUP_RCU)
+		return -ECHILD;
+
+	if (atomic_read(&PDE(d_inode(dentry))->in_use) < 0)
+		return 0; /* revalidate */
+	return 1;
+}
+
+static int proc_misc_d_delete(const struct dentry *dentry)
+{
+	return atomic_read(&PDE(d_inode(dentry))->in_use) < 0;
+}
+
+static const struct dentry_operations proc_misc_dentry_ops = {
+	.d_revalidate	= proc_misc_d_revalidate,
+	.d_delete	= proc_misc_d_delete,
+};
+
 /*
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
@@ -224,7 +255,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
 		inode = proc_get_inode(dir->i_sb, de);
 		if (!inode)
 			return ERR_PTR(-ENOMEM);
-		d_set_d_op(dentry, &simple_dentry_operations);
+		d_set_d_op(dentry, &proc_misc_dentry_ops);
 		d_add(dentry, inode);
 		return NULL;
 	}
@@ -354,6 +385,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 		WARN(1, "name len %u\n", qstr.len);
 		return NULL;
 	}
+	if (qstr.len == 1 && fn[0] == '.') {
+		WARN(1, "name '.'\n");
+		return NULL;
+	}
+	if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') {
+		WARN(1, "name '..'\n");
+		return NULL;
+	}
 	if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
 		WARN(1, "create '/proc/%s' by hand\n", qstr.name);
 		return NULL;
@@ -363,16 +402,26 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 		return NULL;
 	}
 
-	ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
+	ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
 	if (!ent)
 		goto out;
 
+	if (qstr.len + 1 <= sizeof(ent->inline_name)) {
+		ent->name = ent->inline_name;
+	} else {
+		ent->name = kmalloc(qstr.len + 1, GFP_KERNEL);
+		if (!ent->name) {
+			pde_free(ent);
+			return NULL;
+		}
+	}
+
 	memcpy(ent->name, fn, qstr.len + 1);
 	ent->namelen = qstr.len;
 	ent->mode = mode;
 	ent->nlink = nlink;
-	ent->subdir = RB_ROOT_CACHED;
-	atomic_set(&ent->count, 1);
+	ent->subdir = RB_ROOT;
+	refcount_set(&ent->refcnt, 1);
 	spin_lock_init(&ent->pde_unload_lock);
 	INIT_LIST_HEAD(&ent->pde_openers);
 	proc_set_user(ent, (*parent)->uid, (*parent)->gid);
@@ -395,12 +444,11 @@ struct proc_dir_entry *proc_symlink(const char *name,
 			strcpy((char*)ent->data,dest);
 			ent->proc_iops = &proc_link_inode_operations;
 			if (proc_register(parent, ent) < 0) {
-				kfree(ent->data);
-				kfree(ent);
+				pde_free(ent);
 				ent = NULL;
 			}
 		} else {
-			kfree(ent);
+			pde_free(ent);
 			ent = NULL;
 		}
 	}
@@ -423,7 +471,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
 		ent->proc_iops = &proc_dir_inode_operations;
 		parent->nlink++;
 		if (proc_register(parent, ent) < 0) {
-			kfree(ent);
+			pde_free(ent);
 			parent->nlink--;
 			ent = NULL;
 		}
@@ -458,7 +506,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
 		ent->proc_iops = NULL;
 		parent->nlink++;
 		if (proc_register(parent, ent) < 0) {
-			kfree(ent);
+			pde_free(ent);
 			parent->nlink--;
 			ent = NULL;
 		}
@@ -495,7 +543,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 		goto out_free;
 	return pde;
 out_free:
-	kfree(pde);
+	pde_free(pde);
 out:
 	return NULL;
 }
@@ -522,19 +570,12 @@ void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
 }
 EXPORT_SYMBOL(proc_set_user);
 
-static void free_proc_entry(struct proc_dir_entry *de)
-{
-	proc_free_inum(de->low_ino);
-
-	if (S_ISLNK(de->mode))
-		kfree(de->data);
-	kfree(de);
-}
-
 void pde_put(struct proc_dir_entry *pde)
 {
-	if (atomic_dec_and_test(&pde->count))
-		free_proc_entry(pde);
+	if (refcount_dec_and_test(&pde->refcnt)) {
+		proc_free_inum(pde->low_ino);
+		pde_free(pde);
+	}
 }
 
 /*
@@ -555,7 +596,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 
 	de = pde_subdir_find(parent, fn, len);
 	if (de)
-		rb_erase_cached(&de->subdir_node, &parent->subdir);
+		rb_erase(&de->subdir_node, &parent->subdir);
 	write_unlock(&proc_subdir_lock);
 	if (!de) {
 		WARN(1, "name '%s'\n", name);
@@ -592,13 +633,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 		write_unlock(&proc_subdir_lock);
 		return -ENOENT;
 	}
-	rb_erase_cached(&root->subdir_node, &parent->subdir);
+	rb_erase(&root->subdir_node, &parent->subdir);
 
 	de = root;
 	while (1) {
 		next = pde_subdir_first(de);
 		if (next) {
-			rb_erase_cached(&next->subdir_node, &de->subdir);
+			rb_erase(&next->subdir_node, &de->subdir);
 			de = next;
 			continue;
 		}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6e8724958116..2cf3b74391ca 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -54,6 +54,7 @@ static void proc_evict_inode(struct inode *inode)
 }
 
 static struct kmem_cache *proc_inode_cachep __ro_after_init;
+static struct kmem_cache *pde_opener_cache __ro_after_init;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
@@ -92,7 +93,7 @@ static void init_once(void *foo)
 	inode_init_once(&ei->vfs_inode);
 }
 
-void __init proc_init_inodecache(void)
+void __init proc_init_kmemcache(void)
 {
 	proc_inode_cachep = kmem_cache_create("proc_inode_cache",
 					     sizeof(struct proc_inode),
@@ -100,6 +101,13 @@ void __init proc_init_inodecache(void)
 						SLAB_MEM_SPREAD|SLAB_ACCOUNT|
 						SLAB_PANIC),
 					     init_once);
+	pde_opener_cache =
+		kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
+				  SLAB_ACCOUNT|SLAB_PANIC, NULL);
+	proc_dir_entry_cache = kmem_cache_create_usercopy(
+		"proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC,
+		offsetof(struct proc_dir_entry, inline_name),
+		sizeof_field(struct proc_dir_entry, inline_name), NULL);
 }
 
 static int proc_show_options(struct seq_file *seq, struct dentry *root)
@@ -138,7 +146,7 @@ static void unuse_pde(struct proc_dir_entry *pde)
 		complete(pde->pde_unload_completion);
 }
 
-/* pde is locked */
+/* pde is locked on entry, unlocked on exit */
 static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 {
 	/*
@@ -157,9 +165,10 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 		pdeo->c = &c;
 		spin_unlock(&pde->pde_unload_lock);
 		wait_for_completion(&c);
-		spin_lock(&pde->pde_unload_lock);
 	} else {
 		struct file *file;
+		struct completion *c;
+
 		pdeo->closing = true;
 		spin_unlock(&pde->pde_unload_lock);
 		file = pdeo->file;
@@ -167,9 +176,11 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 		spin_lock(&pde->pde_unload_lock);
 		/* After ->release. */
 		list_del(&pdeo->lh);
-		if (unlikely(pdeo->c))
-			complete(pdeo->c);
-		kfree(pdeo);
+		c = pdeo->c;
+		spin_unlock(&pde->pde_unload_lock);
+		if (unlikely(c))
+			complete(c);
+		kmem_cache_free(pde_opener_cache, pdeo);
 	}
 }
 
@@ -188,6 +199,7 @@ void proc_entry_rundown(struct proc_dir_entry *de)
 		struct pde_opener *pdeo;
 		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
 		close_pdeo(de, pdeo);
+		spin_lock(&de->pde_unload_lock);
 	}
 	spin_unlock(&de->pde_unload_lock);
 }
@@ -338,31 +350,36 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	 *
 	 * Save every "struct file" with custom ->release hook.
 	 */
-	pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
-	if (!pdeo)
-		return -ENOMEM;
-
-	if (!use_pde(pde)) {
-		kfree(pdeo);
+	if (!use_pde(pde))
 		return -ENOENT;
-	}
-	open = pde->proc_fops->open;
+
 	release = pde->proc_fops->release;
+	if (release) {
+		pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
+		if (!pdeo) {
+			rv = -ENOMEM;
+			goto out_unuse;
+		}
+	}
 
+	open = pde->proc_fops->open;
 	if (open)
 		rv = open(inode, file);
 
-	if (rv == 0 && release) {
-		/* To know what to release. */
-		pdeo->file = file;
-		pdeo->closing = false;
-		pdeo->c = NULL;
-		spin_lock(&pde->pde_unload_lock);
-		list_add(&pdeo->lh, &pde->pde_openers);
-		spin_unlock(&pde->pde_unload_lock);
-	} else
-		kfree(pdeo);
+	if (release) {
+		if (rv == 0) {
+			/* To know what to release. */
+			pdeo->file = file;
+			pdeo->closing = false;
+			pdeo->c = NULL;
+			spin_lock(&pde->pde_unload_lock);
+			list_add(&pdeo->lh, &pde->pde_openers);
+			spin_unlock(&pde->pde_unload_lock);
+		} else
+			kmem_cache_free(pde_opener_cache, pdeo);
+	}
 
+out_unuse:
 	unuse_pde(pde);
 	return rv;
 }
@@ -375,7 +392,7 @@ static int proc_reg_release(struct inode *inode, struct file *file)
 	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
 		if (pdeo->file == file) {
 			close_pdeo(pde, pdeo);
-			break;
+			return 0;
 		}
 	}
 	spin_unlock(&pde->pde_unload_lock);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index d697c8ab0a14..0f1692e63cb6 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
 
 #include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
+#include <linux/refcount.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/binfmts.h>
@@ -36,7 +37,7 @@ struct proc_dir_entry {
 	 * negative -> it's going away RSN
 	 */
 	atomic_t in_use;
-	atomic_t count;		/* use count */
+	refcount_t refcnt;
 	struct list_head pde_openers;	/* who did ->open, but not ->release */
 	/* protects ->pde_openers and all struct pde_opener instances */
 	spinlock_t pde_unload_lock;
@@ -50,13 +51,22 @@ struct proc_dir_entry {
 	kgid_t gid;
 	loff_t size;
 	struct proc_dir_entry *parent;
-	struct rb_root_cached subdir;
+	struct rb_root subdir;
 	struct rb_node subdir_node;
+	char *name;
 	umode_t mode;
 	u8 namelen;
-	char name[];
+#ifdef CONFIG_64BIT
+#define SIZEOF_PDE_INLINE_NAME	(192-139)
+#else
+#define SIZEOF_PDE_INLINE_NAME	(128-87)
+#endif
+	char inline_name[SIZEOF_PDE_INLINE_NAME];
 } __randomize_layout;
 
+extern struct kmem_cache *proc_dir_entry_cache;
+void pde_free(struct proc_dir_entry *pde);
+
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
 	int (*proc_show)(struct seq_file *m,
@@ -159,7 +169,7 @@ int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *
 
 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
 {
-	atomic_inc(&pde->count);
+	refcount_inc(&pde->refcnt);
 	return pde;
 }
 extern void pde_put(struct proc_dir_entry *);
@@ -177,12 +187,12 @@ struct pde_opener {
 	struct list_head lh;
 	bool closing;
 	struct completion *c;
-};
+} __randomize_layout;
 extern const struct inode_operations proc_link_inode_operations;
 
 extern const struct inode_operations proc_pid_link_inode_operations;
 
-extern void proc_init_inodecache(void);
+void proc_init_kmemcache(void);
 void set_proc_pid_nlink(void);
 extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
 extern int proc_fill_super(struct super_block *, void *data, int flags);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6bb20f864259..65a72ab57471 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 
 static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 {
-	char v[32];
-	static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
-	int len;
-
-	len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
-
-	seq_write(m, s, 16);
-
-	if (len > 0) {
-		if (len < 8)
-			seq_write(m, blanks, 8 - len);
-
-		seq_write(m, v, len);
-	}
+	seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
 	seq_write(m, " kB\n", 4);
 }
 
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 68c06ae7888c..1763f370489d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -192,15 +192,16 @@ static __net_init int proc_net_ns_init(struct net *net)
 	int err;
 
 	err = -ENOMEM;
-	netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
+	netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
 	if (!netd)
 		goto out;
 
-	netd->subdir = RB_ROOT_CACHED;
+	netd->subdir = RB_ROOT;
 	netd->data = net;
 	netd->nlink = 2;
 	netd->namelen = 3;
 	netd->parent = &proc_root;
+	netd->name = netd->inline_name;
 	memcpy(netd->name, "net", 4);
 
 	uid = make_kuid(net->user_ns, 0);
@@ -223,7 +224,7 @@ static __net_init int proc_net_ns_init(struct net *net)
 	return 0;
 
 free_net:
-	kfree(netd);
+	pde_free(netd);
 out:
 	return err;
 }
@@ -231,7 +232,7 @@ out:
 static __net_exit void proc_net_ns_exit(struct net *net)
 {
 	remove_proc_entry("stat", net->proc_net);
-	kfree(net->proc_net);
+	pde_free(net->proc_net);
 }
 
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index c41ab261397d..8989936f2995 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -707,14 +707,14 @@ static bool proc_sys_link_fill_cache(struct file *file,
 				    struct ctl_table *table)
 {
 	bool ret = true;
+
 	head = sysctl_head_grab(head);
+	if (IS_ERR(head))
+		return false;
 
-	if (S_ISLNK(table->mode)) {
-		/* It is not an error if we can not follow the link ignore it */
-		int err = sysctl_follow_link(&head, &table);
-		if (err)
-			goto out;
-	}
+	/* It is not an error if we can not follow the link ignore it */
+	if (sysctl_follow_link(&head, &table))
+		goto out;
 
 	ret = proc_sys_fill_cache(file, ctx, head, table);
 out:
@@ -1086,7 +1086,7 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
 	if ((table->proc_handler == proc_douintvec) ||
 	    (table->proc_handler == proc_douintvec_minmax)) {
 		if (table->maxlen != sizeof(unsigned int))
-			err |= sysctl_err(path, table, "array now allowed");
+			err |= sysctl_err(path, table, "array not allowed");
 	}
 
 	return err;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ede8e64974be..61b7340b357a 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -123,23 +123,13 @@ static struct file_system_type proc_fs_type = {
 
 void __init proc_root_init(void)
 {
-	int err;
-
-	proc_init_inodecache();
+	proc_init_kmemcache();
 	set_proc_pid_nlink();
-	err = register_filesystem(&proc_fs_type);
-	if (err)
-		return;
-
 	proc_self_init();
 	proc_thread_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	proc_net_init();
-
-#ifdef CONFIG_SYSVIPC
-	proc_mkdir("sysvipc", NULL);
-#endif
 	proc_mkdir("fs", NULL);
 	proc_mkdir("driver", NULL);
 	proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
@@ -150,6 +140,8 @@ void __init proc_root_init(void)
 	proc_tty_init();
 	proc_mkdir("bus", NULL);
 	proc_sys_init();
+
+	register_filesystem(&proc_fs_type);
 }
 
 static int proc_root_getattr(const struct path *path, struct kstat *stat,
@@ -207,12 +199,13 @@ struct proc_dir_entry proc_root = {
 	.namelen	= 5, 
 	.mode		= S_IFDIR | S_IRUGO | S_IXUGO, 
 	.nlink		= 2, 
-	.count		= ATOMIC_INIT(1),
+	.refcnt		= REFCOUNT_INIT(1),
 	.proc_iops	= &proc_root_inode_operations, 
 	.proc_fops	= &proc_root_operations,
 	.parent		= &proc_root,
-	.subdir		= RB_ROOT_CACHED,
-	.name		= "/proc",
+	.subdir		= RB_ROOT,
+	.name		= proc_root.inline_name,
+	.inline_name	= "/proc",
 };
 
 int pid_ns_prepare_proc(struct pid_namespace *ns)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ec6d2983a5cb..65ae54659833 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -24,6 +24,8 @@
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
 	unsigned long text, lib, swap, anon, file, shmem;
@@ -53,39 +55,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	lib = (mm->exec_vm << PAGE_SHIFT) - text;
 
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	seq_printf(m,
-		"VmPeak:\t%8lu kB\n"
-		"VmSize:\t%8lu kB\n"
-		"VmLck:\t%8lu kB\n"
-		"VmPin:\t%8lu kB\n"
-		"VmHWM:\t%8lu kB\n"
-		"VmRSS:\t%8lu kB\n"
-		"RssAnon:\t%8lu kB\n"
-		"RssFile:\t%8lu kB\n"
-		"RssShmem:\t%8lu kB\n"
-		"VmData:\t%8lu kB\n"
-		"VmStk:\t%8lu kB\n"
-		"VmExe:\t%8lu kB\n"
-		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n"
-		"VmSwap:\t%8lu kB\n",
-		hiwater_vm << (PAGE_SHIFT-10),
-		total_vm << (PAGE_SHIFT-10),
-		mm->locked_vm << (PAGE_SHIFT-10),
-		mm->pinned_vm << (PAGE_SHIFT-10),
-		hiwater_rss << (PAGE_SHIFT-10),
-		total_rss << (PAGE_SHIFT-10),
-		anon << (PAGE_SHIFT-10),
-		file << (PAGE_SHIFT-10),
-		shmem << (PAGE_SHIFT-10),
-		mm->data_vm << (PAGE_SHIFT-10),
-		mm->stack_vm << (PAGE_SHIFT-10),
-		text >> 10,
-		lib >> 10,
-		mm_pgtables_bytes(mm) >> 10,
-		swap << (PAGE_SHIFT-10));
+	SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
+	SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
+	SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
+	SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+	SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
+	SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
+	SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
+	SEQ_PUT_DEC(" kB\nRssFile:\t", file);
+	SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
+	SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
+	SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmExe:\t", text >> 10, 8);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmLib:\t", lib >> 10, 8);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
+	SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
+	seq_puts(m, " kB\n");
 	hugetlb_report_usage(m, mm);
 }
+#undef SEQ_PUT_DEC
 
 unsigned long task_vsize(struct mm_struct *mm)
 {
@@ -287,15 +278,18 @@ static void show_vma_header_prefix(struct seq_file *m,
 				   dev_t dev, unsigned long ino)
 {
 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
-	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
-		   start,
-		   end,
-		   flags & VM_READ ? 'r' : '-',
-		   flags & VM_WRITE ? 'w' : '-',
-		   flags & VM_EXEC ? 'x' : '-',
-		   flags & VM_MAYSHARE ? 's' : 'p',
-		   pgoff,
-		   MAJOR(dev), MINOR(dev), ino);
+	seq_put_hex_ll(m, NULL, start, 8);
+	seq_put_hex_ll(m, "-", end, 8);
+	seq_putc(m, ' ');
+	seq_putc(m, flags & VM_READ ? 'r' : '-');
+	seq_putc(m, flags & VM_WRITE ? 'w' : '-');
+	seq_putc(m, flags & VM_EXEC ? 'x' : '-');
+	seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
+	seq_put_hex_ll(m, " ", pgoff, 8);
+	seq_put_hex_ll(m, " ", MAJOR(dev), 2);
+	seq_put_hex_ll(m, ":", MINOR(dev), 2);
+	seq_put_decimal_ull(m, " ", ino);
+	seq_putc(m, ' ');
 }
 
 static void
@@ -694,8 +688,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		if (!mnemonics[i][0])
 			continue;
 		if (vma->vm_flags & (1UL << i)) {
-			seq_printf(m, "%c%c ",
-				   mnemonics[i][0], mnemonics[i][1]);
+			seq_putc(m, mnemonics[i][0]);
+			seq_putc(m, mnemonics[i][1]);
+			seq_putc(m, ' ');
 		}
 	}
 	seq_putc(m, '\n');
@@ -736,6 +731,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
 {
 }
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
 	struct proc_maps_private *priv = m->private;
@@ -809,51 +806,34 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		ret = SEQ_SKIP;
 	}
 
-	if (!rollup_mode)
-		seq_printf(m,
-			   "Size:           %8lu kB\n"
-			   "KernelPageSize: %8lu kB\n"
-			   "MMUPageSize:    %8lu kB\n",
-			   (vma->vm_end - vma->vm_start) >> 10,
-			   vma_kernel_pagesize(vma) >> 10,
-			   vma_mmu_pagesize(vma) >> 10);
-
-
-	if (!rollup_mode || last_vma)
-		seq_printf(m,
-			   "Rss:            %8lu kB\n"
-			   "Pss:            %8lu kB\n"
-			   "Shared_Clean:   %8lu kB\n"
-			   "Shared_Dirty:   %8lu kB\n"
-			   "Private_Clean:  %8lu kB\n"
-			   "Private_Dirty:  %8lu kB\n"
-			   "Referenced:     %8lu kB\n"
-			   "Anonymous:      %8lu kB\n"
-			   "LazyFree:       %8lu kB\n"
-			   "AnonHugePages:  %8lu kB\n"
-			   "ShmemPmdMapped: %8lu kB\n"
-			   "Shared_Hugetlb: %8lu kB\n"
-			   "Private_Hugetlb: %7lu kB\n"
-			   "Swap:           %8lu kB\n"
-			   "SwapPss:        %8lu kB\n"
-			   "Locked:         %8lu kB\n",
-			   mss->resident >> 10,
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
-			   mss->shared_clean  >> 10,
-			   mss->shared_dirty  >> 10,
-			   mss->private_clean >> 10,
-			   mss->private_dirty >> 10,
-			   mss->referenced >> 10,
-			   mss->anonymous >> 10,
-			   mss->lazyfree >> 10,
-			   mss->anonymous_thp >> 10,
-			   mss->shmem_thp >> 10,
-			   mss->shared_hugetlb >> 10,
-			   mss->private_hugetlb >> 10,
-			   mss->swap >> 10,
-			   (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+	if (!rollup_mode) {
+		SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
+		SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+		SEQ_PUT_DEC(" kB\nMMUPageSize:    ", vma_mmu_pagesize(vma));
+		seq_puts(m, " kB\n");
+	}
 
+	if (!rollup_mode || last_vma) {
+		SEQ_PUT_DEC("Rss:            ", mss->resident);
+		SEQ_PUT_DEC(" kB\nPss:            ", mss->pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nShared_Clean:   ", mss->shared_clean);
+		SEQ_PUT_DEC(" kB\nShared_Dirty:   ", mss->shared_dirty);
+		SEQ_PUT_DEC(" kB\nPrivate_Clean:  ", mss->private_clean);
+		SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
+		SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
+		SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+		SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
+		SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
+		SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+		SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+		seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+					  mss->private_hugetlb >> 10, 7);
+		SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
+		SEQ_PUT_DEC(" kB\nSwapPss:        ",
+						mss->swap_pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+		seq_puts(m, " kB\n");
+	}
 	if (!rollup_mode) {
 		arch_show_smap(m, vma);
 		show_smap_vma_flags(m, vma);
@@ -861,6 +841,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 	m_cache_vma(m, vma);
 	return ret;
 }
+#undef SEQ_PUT_DEC
 
 static int show_pid_smap(struct seq_file *m, void *v)
 {
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 1143ef351c58..dc720573fd53 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -258,7 +258,7 @@ static int pstore_decompress(void *in, void *out,
 
 static void allocate_buf_for_compression(void)
 {
-	if (!zbackend)
+	if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend)
 		return;
 
 	if (!crypto_has_comp(zbackend->name, 0, 0)) {
@@ -287,7 +287,7 @@ static void allocate_buf_for_compression(void)
 
 static void free_buf_for_compression(void)
 {
-	if (!IS_ERR_OR_NULL(tfm))
+	if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && !IS_ERR_OR_NULL(tfm))
 		crypto_free_comp(tfm);
 	kfree(big_oops_buf);
 	big_oops_buf = NULL;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 70057359fbaf..23148c3ed675 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super,
 	if (IS_ERR(journal->j_dev_bd)) {
 		result = PTR_ERR(journal->j_dev_bd);
 		journal->j_dev_bd = NULL;
-		reiserfs_warning(super,
+		reiserfs_warning(super, "sh-457",
 				 "journal_init_dev: Cannot open '%s': %i",
 				 jdev_name, result);
 		return result;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eea09f6d8830..c6c27f1f9c98 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -6,6 +6,7 @@
  * initial implementation -- AV, Oct 2001.
  */
 
+#include <linux/cache.h>
 #include <linux/fs.h>
 #include <linux/export.h>
 #include <linux/seq_file.h>
@@ -19,6 +20,8 @@
 #include <linux/uaccess.h>
 #include <asm/page.h>
 
+static struct kmem_cache *seq_file_cache __ro_after_init;
+
 static void seq_set_overflow(struct seq_file *m)
 {
 	m->count = m->size;
@@ -26,7 +29,7 @@ static void seq_set_overflow(struct seq_file *m)
 
 static void *seq_buf_alloc(unsigned long size)
 {
-	return kvmalloc(size, GFP_KERNEL);
+	return kvmalloc(size, GFP_KERNEL_ACCOUNT);
 }
 
 /**
@@ -51,7 +54,7 @@ int seq_open(struct file *file, const struct seq_operations *op)
 
 	WARN_ON(file->private_data);
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
@@ -366,7 +369,7 @@ int seq_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
 	kvfree(m->buf);
-	kfree(m);
+	kmem_cache_free(seq_file_cache, m);
 	return 0;
 }
 EXPORT_SYMBOL(seq_release);
@@ -563,7 +566,7 @@ static void single_stop(struct seq_file *p, void *v)
 int single_open(struct file *file, int (*show)(struct seq_file *, void *),
 		void *data)
 {
-	struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL);
+	struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT);
 	int res = -ENOMEM;
 
 	if (op) {
@@ -625,7 +628,7 @@ void *__seq_open_private(struct file *f, const struct seq_operations *ops,
 	void *private;
 	struct seq_file *seq;
 
-	private = kzalloc(psize, GFP_KERNEL);
+	private = kzalloc(psize, GFP_KERNEL_ACCOUNT);
 	if (private == NULL)
 		goto out;
 
@@ -673,29 +676,37 @@ void seq_puts(struct seq_file *m, const char *s)
 }
 EXPORT_SYMBOL(seq_puts);
 
-/*
+/**
  * A helper routine for putting decimal numbers without rich format of printf().
  * only 'unsigned long long' is supported.
- * This routine will put strlen(delimiter) + number into seq_file.
+ * @m: seq_file identifying the buffer to which data should be written
+ * @delimiter: a string which is printed before the number
+ * @num: the number
+ * @width: a minimum field width
+ *
+ * This routine will put strlen(delimiter) + number into seq_filed.
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-			 unsigned long long num)
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+			 unsigned long long num, unsigned int width)
 {
 	int len;
 
 	if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (!width)
+		width = 1;
 
-	if (m->count + 1 >= m->size)
+	if (m->count + width >= m->size)
 		goto overflow;
 
 	if (num < 10) {
@@ -703,7 +714,7 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
 	if (!len)
 		goto overflow;
 
@@ -713,8 +724,60 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 overflow:
 	seq_set_overflow(m);
 }
+
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
+			 unsigned long long num)
+{
+	return seq_put_decimal_ull_width(m, delimiter, num, 0);
+}
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
+/**
+ * seq_put_hex_ll - put a number in hexadecimal notation
+ * @m: seq_file identifying the buffer to which data should be written
+ * @delimiter: a string which is printed before the number
+ * @v: the number
+ * @width: a minimum field width
+ *
+ * seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v)
+ *
+ * This routine is very quick when you show lots of numbers.
+ * In usual cases, it will be better to use seq_printf(). It's easier to read.
+ */
+void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
+				unsigned long long v, unsigned int width)
+{
+	unsigned int len;
+	int i;
+
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
+
+	/* If x is 0, the result of __builtin_clzll is undefined */
+	if (v == 0)
+		len = 1;
+	else
+		len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4;
+
+	if (len < width)
+		len = width;
+
+	if (m->count + len > m->size) {
+		seq_set_overflow(m);
+		return;
+	}
+
+	for (i = len - 1; i >= 0; i--) {
+		m->buf[m->count + i] = hex_asc[0xf & v];
+		v = v >> 4;
+	}
+	m->count += len;
+}
+
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
 {
 	int len;
@@ -722,12 +785,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 	if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (m->count + 2 >= m->size)
 		goto overflow;
@@ -742,7 +805,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, 0);
 	if (!len)
 		goto overflow;
 
@@ -782,8 +845,14 @@ EXPORT_SYMBOL(seq_write);
 void seq_pad(struct seq_file *m, char c)
 {
 	int size = m->pad_until - m->count;
-	if (size > 0)
-		seq_printf(m, "%*s", size, "");
+	if (size > 0) {
+		if (size + m->count > m->size) {
+			seq_set_overflow(m);
+			return;
+		}
+		memset(m->buf + m->count, ' ', size);
+		m->count += size;
+	}
 	if (c)
 		seq_putc(m, c);
 }
@@ -1040,3 +1109,8 @@ seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head,
 	return NULL;
 }
 EXPORT_SYMBOL(seq_hlist_next_percpu);
+
+void __init seq_file_init(void)
+{
+	seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC);
+}
diff --git a/fs/super.c b/fs/super.c
index 672538ca9831..5fa9a8d8d865 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
 #include <linux/user_namespace.h>
 #include "internal.h"
 
+static int thaw_super_locked(struct super_block *sb);
 
 static LIST_HEAD(super_blocks);
 static DEFINE_SPINLOCK(sb_lock);
@@ -574,6 +575,28 @@ void drop_super_exclusive(struct super_block *sb)
 }
 EXPORT_SYMBOL(drop_super_exclusive);
 
+static void __iterate_supers(void (*f)(struct super_block *))
+{
+	struct super_block *sb, *p = NULL;
+
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (hlist_unhashed(&sb->s_instances))
+			continue;
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+
+		f(sb);
+
+		spin_lock(&sb_lock);
+		if (p)
+			__put_super(p);
+		p = sb;
+	}
+	if (p)
+		__put_super(p);
+	spin_unlock(&sb_lock);
+}
 /**
  *	iterate_supers - call function for all active superblocks
  *	@f: function to call
@@ -881,33 +904,22 @@ cancel_readonly:
 	return retval;
 }
 
-static void do_emergency_remount(struct work_struct *work)
+static void do_emergency_remount_callback(struct super_block *sb)
 {
-	struct super_block *sb, *p = NULL;
-
-	spin_lock(&sb_lock);
-	list_for_each_entry(sb, &super_blocks, s_list) {
-		if (hlist_unhashed(&sb->s_instances))
-			continue;
-		sb->s_count++;
-		spin_unlock(&sb_lock);
-		down_write(&sb->s_umount);
-		if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
-		    !sb_rdonly(sb)) {
-			/*
-			 * What lock protects sb->s_flags??
-			 */
-			do_remount_sb(sb, SB_RDONLY, NULL, 1);
-		}
-		up_write(&sb->s_umount);
-		spin_lock(&sb_lock);
-		if (p)
-			__put_super(p);
-		p = sb;
+	down_write(&sb->s_umount);
+	if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
+	    !sb_rdonly(sb)) {
+		/*
+		 * What lock protects sb->s_flags??
+		 */
+		do_remount_sb(sb, SB_RDONLY, NULL, 1);
 	}
-	if (p)
-		__put_super(p);
-	spin_unlock(&sb_lock);
+	up_write(&sb->s_umount);
+}
+
+static void do_emergency_remount(struct work_struct *work)
+{
+	__iterate_supers(do_emergency_remount_callback);
 	kfree(work);
 	printk("Emergency Remount complete\n");
 }
@@ -923,6 +935,40 @@ void emergency_remount(void)
 	}
 }
 
+static void do_thaw_all_callback(struct super_block *sb)
+{
+	down_write(&sb->s_umount);
+	if (sb->s_root && sb->s_flags & MS_BORN) {
+		emergency_thaw_bdev(sb);
+		thaw_super_locked(sb);
+	} else {
+		up_write(&sb->s_umount);
+	}
+}
+
+static void do_thaw_all(struct work_struct *work)
+{
+	__iterate_supers(do_thaw_all_callback);
+	kfree(work);
+	printk(KERN_WARNING "Emergency Thaw complete\n");
+}
+
+/**
+ * emergency_thaw_all -- forcibly thaw every frozen filesystem
+ *
+ * Used for emergency unfreeze of all filesystems via SysRq
+ */
+void emergency_thaw_all(void)
+{
+	struct work_struct *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (work) {
+		INIT_WORK(work, do_thaw_all);
+		schedule_work(work);
+	}
+}
+
 /*
  * Unnamed block devices are dummy devices used by virtual
  * filesystems which don't use real block-devices.  -- jrs
@@ -1492,11 +1538,10 @@ EXPORT_SYMBOL(freeze_super);
  *
  * Unlocks the filesystem and marks it writeable again after freeze_super().
  */
-int thaw_super(struct super_block *sb)
+static int thaw_super_locked(struct super_block *sb)
 {
 	int error;
 
-	down_write(&sb->s_umount);
 	if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
 		up_write(&sb->s_umount);
 		return -EINVAL;
@@ -1527,4 +1572,10 @@ out:
 	deactivate_locked_super(sb);
 	return 0;
 }
+
+int thaw_super(struct super_block *sb)
+{
+	down_write(&sb->s_umount);
+	return thaw_super_locked(sb);
+}
 EXPORT_SYMBOL(thaw_super);
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 2dcf3d473fec..9571616b5dda 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -632,7 +632,7 @@ static int scan_for_idx_cb(struct ubifs_info *c,
  */
 static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
 {
-	struct ubifs_lprops *lprops;
+	const struct ubifs_lprops *lprops;
 	struct scan_data data;
 	int err;
 
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 6c3a1abd0e22..f5a46844340c 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -244,7 +244,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c,
 /**
  * lpt_heap_replace - replace lprops in a category heap.
  * @c: UBIFS file-system description object
- * @old_lprops: LEB properties to replace
  * @new_lprops: LEB properties with which to replace
  * @cat: LEB category
  *
@@ -254,7 +253,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c,
  * lprops.  This function does that.
  */
 static void lpt_heap_replace(struct ubifs_info *c,
-			     struct ubifs_lprops *old_lprops,
 			     struct ubifs_lprops *new_lprops, int cat)
 {
 	struct ubifs_lpt_heap *heap;
@@ -362,7 +360,7 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
 	case LPROPS_DIRTY:
 	case LPROPS_DIRTY_IDX:
 	case LPROPS_FREE:
-		lpt_heap_replace(c, old_lprops, new_lprops, cat);
+		lpt_heap_replace(c, new_lprops, cat);
 		break;
 	case LPROPS_UNCAT:
 	case LPROPS_EMPTY:
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index aab87340d3de..16f03d9929e5 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -175,7 +175,6 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
 void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 		    int lnum, int offs)
 {
-	lnum = lnum;
 	dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
 	ubifs_assert(offs % c->min_io_size == 0);
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b16ef162344a..6c397a389105 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1737,8 +1737,11 @@ static void ubifs_remount_ro(struct ubifs_info *c)
 
 	dbg_save_space_info(c);
 
-	for (i = 0; i < c->jhead_cnt; i++)
-		ubifs_wbuf_sync(&c->jheads[i].wbuf);
+	for (i = 0; i < c->jhead_cnt; i++) {
+		err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+		if (err)
+			ubifs_ro_mode(c, err);
+	}
 
 	c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
 	c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
@@ -1804,8 +1807,11 @@ static void ubifs_put_super(struct super_block *sb)
 			int err;
 
 			/* Synchronize write-buffers */
-			for (i = 0; i < c->jhead_cnt; i++)
-				ubifs_wbuf_sync(&c->jheads[i].wbuf);
+			for (i = 0; i < c->jhead_cnt; i++) {
+				err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+				if (err)
+					ubifs_ro_mode(c, err);
+			}
 
 			/*
 			 * We are being cleanly unmounted which means the
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 39387bdd225d..4bcc095fe44a 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1947,7 +1947,7 @@ void
 xfs_alloc_compute_maxlevels(
 	xfs_mount_t	*mp)	/* file system mount structure */
 {
-	mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr,
+	mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
 			(mp->m_sb.sb_agblocks + 1) / 2);
 }
 
@@ -1959,7 +1959,6 @@ xfs_alloc_compute_maxlevels(
  */
 xfs_extlen_t
 xfs_alloc_longest_free_extent(
-	struct xfs_mount	*mp,
 	struct xfs_perag	*pag,
 	xfs_extlen_t		need,
 	xfs_extlen_t		reserved)
@@ -2038,8 +2037,7 @@ xfs_alloc_space_available(
 
 	/* do we have enough contiguous free space for the allocation? */
 	alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
-	longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
-			reservation);
+	longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
 	if (longest < alloc_len)
 		return false;
 
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index a311a2414a6b..cbf789ea5a4e 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -116,9 +116,8 @@ xfs_alloc_allow_busy_reuse(int datatype)
 unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
 unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
 
-xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
-		struct xfs_perag *pag, xfs_extlen_t need,
-		xfs_extlen_t reserved);
+xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_perag *pag,
+		xfs_extlen_t need, xfs_extlen_t reserved);
 unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
 		struct xfs_perag *pag);
 
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 3b03d886df66..6a7c2f03ea11 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3225,7 +3225,7 @@ xfs_bmap_longest_free_extent(
 		}
 	}
 
-	longest = xfs_alloc_longest_free_extent(mp, pag,
+	longest = xfs_alloc_longest_free_extent(pag,
 				xfs_alloc_min_freelist(mp, pag),
 				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
 	if (*blen < longest)
@@ -5667,7 +5667,6 @@ xfs_bmap_collapse_extents(
 	xfs_fileoff_t		*next_fsb,
 	xfs_fileoff_t		offset_shift_fsb,
 	bool			*done,
-	xfs_fileoff_t		stop_fsb,
 	xfs_fsblock_t		*firstblock,
 	struct xfs_defer_ops	*dfops)
 {
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index f3be6416260b..2b766b37096d 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -228,7 +228,7 @@ void	xfs_bmap_del_extent_cow(struct xfs_inode *ip,
 uint	xfs_default_attroffset(struct xfs_inode *ip);
 int	xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
-		bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
+		bool *done, xfs_fsblock_t *firstblock,
 		struct xfs_defer_ops *dfops);
 int	xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip,
 		xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index edc0193358a5..ac7d66427e42 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4531,7 +4531,6 @@ xfs_btree_sblock_verify(
  */
 uint
 xfs_btree_compute_maxlevels(
-	struct xfs_mount	*mp,
 	uint			*limits,
 	unsigned long		len)
 {
@@ -4839,7 +4838,6 @@ xfs_btree_query_all(
  */
 xfs_extlen_t
 xfs_btree_calc_size(
-	struct xfs_mount	*mp,
 	uint			*limits,
 	unsigned long long	len)
 {
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 58e30c0975c3..9227159a751e 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -481,10 +481,8 @@ xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp,
 xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
 		unsigned int max_recs);
 
-uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
-				 unsigned long len);
-xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
-		unsigned long long len);
+uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
+xfs_extlen_t xfs_btree_calc_size(uint *limits, unsigned long long len);
 
 /* return codes */
 #define XFS_BTREE_QUERY_RANGE_CONTINUE	0	/* keep iterating */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 0e2cf5f0be1f..de627fa19168 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2406,7 +2406,7 @@ xfs_ialloc_compute_maxlevels(
 	uint		inodes;
 
 	inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
-	mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_inobt_mnr,
+	mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr,
 							 inodes);
 }
 
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index a2dd7f4a2719..367e9a0726e6 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -556,7 +556,7 @@ xfs_inobt_max_size(
 	if (mp->m_inobt_mxr[0] == 0)
 		return 0;
 
-	return xfs_btree_calc_size(mp, mp->m_inobt_mnr,
+	return xfs_btree_calc_size(mp->m_inobt_mnr,
 		(uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
 				XFS_INODES_PER_CHUNK);
 }
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index bee68c23d612..560e28473024 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -351,7 +351,6 @@ xfs_refcount_merge_center_extents(
 	struct xfs_refcount_irec	*center,
 	struct xfs_refcount_irec	*right,
 	unsigned long long		extlen,
-	xfs_agblock_t			*agbno,
 	xfs_extlen_t			*aglen)
 {
 	int				error;
@@ -471,7 +470,6 @@ xfs_refcount_merge_right_extent(
 	struct xfs_btree_cur		*cur,
 	struct xfs_refcount_irec	*right,
 	struct xfs_refcount_irec	*cright,
-	xfs_agblock_t			*agbno,
 	xfs_extlen_t			*aglen)
 {
 	int				error;
@@ -749,7 +747,7 @@ xfs_refcount_merge_extents(
 	    ulen < MAXREFCEXTLEN) {
 		*shape_changed = true;
 		return xfs_refcount_merge_center_extents(cur, &left, &cleft,
-				&right, ulen, agbno, aglen);
+				&right, ulen, aglen);
 	}
 
 	/* Try to merge left and cleft. */
@@ -778,7 +776,7 @@ xfs_refcount_merge_extents(
 	    ulen < MAXREFCEXTLEN) {
 		*shape_changed = true;
 		return xfs_refcount_merge_right_extent(cur, &right, &cright,
-				agbno, aglen);
+				aglen);
 	}
 
 	return error;
@@ -1356,9 +1354,7 @@ xfs_refcount_adjust_cow_extents(
 	struct xfs_btree_cur	*cur,
 	xfs_agblock_t		agbno,
 	xfs_extlen_t		aglen,
-	enum xfs_refc_adjust_op	adj,
-	struct xfs_defer_ops	*dfops,
-	struct xfs_owner_info	*oinfo)
+	enum xfs_refc_adjust_op	adj)
 {
 	struct xfs_refcount_irec	ext, tmp;
 	int				error;
@@ -1437,8 +1433,7 @@ xfs_refcount_adjust_cow(
 	struct xfs_btree_cur	*cur,
 	xfs_agblock_t		agbno,
 	xfs_extlen_t		aglen,
-	enum xfs_refc_adjust_op	adj,
-	struct xfs_defer_ops	*dfops)
+	enum xfs_refc_adjust_op	adj)
 {
 	bool			shape_changed;
 	int			error;
@@ -1465,8 +1460,7 @@ xfs_refcount_adjust_cow(
 		goto out_error;
 
 	/* Now that we've taken care of the ends, adjust the middle extents */
-	error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj,
-			dfops, NULL);
+	error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
 	if (error)
 		goto out_error;
 
@@ -1493,7 +1487,7 @@ __xfs_refcount_cow_alloc(
 
 	/* Add refcount btree reservation */
 	return xfs_refcount_adjust_cow(rcur, agbno, aglen,
-			XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
+			XFS_REFCOUNT_ADJUST_COW_ALLOC);
 }
 
 /*
@@ -1511,7 +1505,7 @@ __xfs_refcount_cow_free(
 
 	/* Remove refcount btree reservation */
 	return xfs_refcount_adjust_cow(rcur, agbno, aglen,
-			XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
+			XFS_REFCOUNT_ADJUST_COW_FREE);
 }
 
 /* Record a CoW staging extent in the refcount btree. */
@@ -1568,7 +1562,7 @@ struct xfs_refcount_recovery {
 /* Stuff an extent on the recovery list. */
 STATIC int
 xfs_refcount_recover_extent(
-	struct xfs_btree_cur		*cur,
+	struct xfs_btree_cur 		*cur,
 	union xfs_btree_rec		*rec,
 	void				*priv)
 {
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 265fdcefcbae..375abfeb6267 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -373,7 +373,6 @@ xfs_refcountbt_init_cursor(
  */
 int
 xfs_refcountbt_maxrecs(
-	struct xfs_mount	*mp,
 	int			blocklen,
 	bool			leaf)
 {
@@ -390,7 +389,7 @@ void
 xfs_refcountbt_compute_maxlevels(
 	struct xfs_mount		*mp)
 {
-	mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(mp,
+	mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(
 			mp->m_refc_mnr, mp->m_sb.sb_agblocks);
 }
 
@@ -400,7 +399,7 @@ xfs_refcountbt_calc_size(
 	struct xfs_mount	*mp,
 	unsigned long long	len)
 {
-	return xfs_btree_calc_size(mp, mp->m_refc_mnr, len);
+	return xfs_btree_calc_size(mp->m_refc_mnr, len);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h
index 9db008b955b7..2bc4694ef146 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.h
+++ b/fs/xfs/libxfs/xfs_refcount_btree.h
@@ -60,8 +60,7 @@ struct xfs_mount;
 extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp,
 		struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agnumber_t agno,
 		struct xfs_defer_ops *dfops);
-extern int xfs_refcountbt_maxrecs(struct xfs_mount *mp, int blocklen,
-		bool leaf);
+extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf);
 extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp);
 
 extern xfs_extlen_t xfs_refcountbt_calc_size(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 79822cf6ebe3..fba8d2718017 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -376,7 +376,6 @@ xfs_rmap_free_check_owner(
 	struct xfs_mount	*mp,
 	uint64_t		ltoff,
 	struct xfs_rmap_irec	*rec,
-	xfs_fsblock_t		bno,
 	xfs_filblks_t		len,
 	uint64_t		owner,
 	uint64_t		offset,
@@ -519,7 +518,7 @@ xfs_rmap_unmap(
 			bno + len, out_error);
 
 	/* Check owner information. */
-	error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
+	error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, len, owner,
 			offset, flags);
 	if (error)
 		goto out_error;
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 8b0d0de1cd11..d756e0b84abf 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -499,7 +499,6 @@ xfs_rmapbt_init_cursor(
  */
 int
 xfs_rmapbt_maxrecs(
-	struct xfs_mount	*mp,
 	int			blocklen,
 	int			leaf)
 {
@@ -534,7 +533,7 @@ xfs_rmapbt_compute_maxlevels(
 	if (xfs_sb_version_hasreflink(&mp->m_sb))
 		mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS;
 	else
-		mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(mp,
+		mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
 				mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
 }
 
@@ -544,7 +543,7 @@ xfs_rmapbt_calc_size(
 	struct xfs_mount	*mp,
 	unsigned long long	len)
 {
-	return xfs_btree_calc_size(mp, mp->m_rmap_mnr, len);
+	return xfs_btree_calc_size(mp->m_rmap_mnr, len);
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h
index 19c08e933049..d68d96eed7ea 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.h
+++ b/fs/xfs/libxfs/xfs_rmap_btree.h
@@ -55,7 +55,7 @@ struct xfs_mount;
 struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp,
 				struct xfs_trans *tp, struct xfs_buf *bp,
 				xfs_agnumber_t agno);
-int xfs_rmapbt_maxrecs(struct xfs_mount *mp, int blocklen, int leaf);
+int xfs_rmapbt_maxrecs(int blocklen, int leaf);
 extern void xfs_rmapbt_compute_maxlevels(struct xfs_mount *mp);
 
 extern xfs_extlen_t xfs_rmapbt_calc_size(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 53433cc024fd..d9b94bd5f689 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -756,15 +756,13 @@ xfs_sb_mount_common(
 	mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
 	mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
 
-	mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 1);
-	mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, 0);
+	mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 1);
+	mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(sbp->sb_blocksize, 0);
 	mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
 	mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
 
-	mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize,
-			true);
-	mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize,
-			false);
+	mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, true);
+	mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(sbp->sb_blocksize, false);
 	mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2;
 	mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
 
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 5f17641f040f..3bccdf73e141 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -734,8 +734,7 @@ xfs_calc_clear_agi_bucket_reservation(
  *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
  */
 STATIC uint
-xfs_calc_qm_setqlim_reservation(
-	struct xfs_mount	*mp)
+xfs_calc_qm_setqlim_reservation(void)
 {
 	return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
 }
@@ -772,8 +771,7 @@ xfs_calc_qm_quotaoff_reservation(
  *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
  */
 STATIC uint
-xfs_calc_qm_quotaoff_end_reservation(
-	struct xfs_mount	*mp)
+xfs_calc_qm_quotaoff_end_reservation(void)
 {
 	return sizeof(struct xfs_qoff_logitem) * 2;
 }
@@ -877,14 +875,14 @@ xfs_trans_resv_calc(
 	 * The following transactions are logged in logical format with
 	 * a default log count.
 	 */
-	resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp);
+	resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation();
 	resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
 
 	resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp);
 	resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
 
 	resp->tr_qm_equotaoff.tr_logres =
-		xfs_calc_qm_quotaoff_end_reservation(mp);
+		xfs_calc_qm_quotaoff_end_reservation();
 	resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
 
 	resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 31f1f10eecd1..0ab824f574ed 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1195,16 +1195,22 @@ xfs_vm_writepages(
 	int			ret;
 
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-	if (dax_mapping(mapping))
-		return dax_writeback_mapping_range(mapping,
-				xfs_find_bdev_for_inode(mapping->host), wbc);
-
 	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
 	if (wpc.ioend)
 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
 	return ret;
 }
 
+STATIC int
+xfs_dax_writepages(
+	struct address_space	*mapping,
+	struct writeback_control *wbc)
+{
+	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+	return dax_writeback_mapping_range(mapping,
+			xfs_find_bdev_for_inode(mapping->host), wbc);
+}
+
 /*
  * Called to move a page into cleanable state - and from there
  * to be released. The page should already be clean. We always
@@ -1367,17 +1373,6 @@ out_unlock:
 	return error;
 }
 
-STATIC ssize_t
-xfs_vm_direct_IO(
-	struct kiocb		*iocb,
-	struct iov_iter		*iter)
-{
-	/*
-	 * We just need the method present so that open/fcntl allow direct I/O.
-	 */
-	return -EINVAL;
-}
-
 STATIC sector_t
 xfs_vm_bmap(
 	struct address_space	*mapping,
@@ -1472,19 +1467,8 @@ xfs_vm_set_page_dirty(
 	newly_dirty = !TestSetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
-	if (newly_dirty) {
-		/* sigh - __set_page_dirty() is static, so copy it here, too */
-		unsigned long flags;
-
-		spin_lock_irqsave(&mapping->tree_lock, flags);
-		if (page->mapping) {	/* Race with truncate? */
-			WARN_ON_ONCE(!PageUptodate(page));
-			account_page_dirtied(page, mapping);
-			radix_tree_tag_set(&mapping->page_tree,
-					page_index(page), PAGECACHE_TAG_DIRTY);
-		}
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	}
+	if (newly_dirty)
+		__set_page_dirty(page, mapping, 1);
 	unlock_page_memcg(page);
 	if (newly_dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1500,8 +1484,15 @@ const struct address_space_operations xfs_address_space_operations = {
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.bmap			= xfs_vm_bmap,
-	.direct_IO		= xfs_vm_direct_IO,
+	.direct_IO		= noop_direct_IO,
 	.migratepage		= buffer_migrate_page,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
 };
+
+const struct address_space_operations xfs_dax_aops = {
+	.writepages		= xfs_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 88c85ea63da0..69346d460dfa 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -54,6 +54,7 @@ struct xfs_ioend {
 };
 
 extern const struct address_space_operations xfs_address_space_operations;
+extern const struct address_space_operations xfs_dax_aops;
 
 int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index e5fb008d75e8..2203465e63ea 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -53,6 +53,25 @@ xfs_bui_item_free(
 	kmem_zone_free(xfs_bui_zone, buip);
 }
 
+/*
+ * Freeing the BUI requires that we remove it from the AIL if it has already
+ * been placed there. However, the BUI may not yet have been placed in the AIL
+ * when called by xfs_bui_release() from BUD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the BUI.
+ */
+void
+xfs_bui_release(
+	struct xfs_bui_log_item	*buip)
+{
+	ASSERT(atomic_read(&buip->bui_refcount) > 0);
+	if (atomic_dec_and_test(&buip->bui_refcount)) {
+		xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_bui_item_free(buip);
+	}
+}
+
+
 STATIC void
 xfs_bui_item_size(
 	struct xfs_log_item	*lip,
@@ -142,7 +161,7 @@ xfs_bui_item_unlock(
 	struct xfs_log_item	*lip)
 {
 	if (lip->li_flags & XFS_LI_ABORTED)
-		xfs_bui_item_free(BUI_ITEM(lip));
+		xfs_bui_release(BUI_ITEM(lip));
 }
 
 /*
@@ -206,24 +225,6 @@ xfs_bui_init(
 	return buip;
 }
 
-/*
- * Freeing the BUI requires that we remove it from the AIL if it has already
- * been placed there. However, the BUI may not yet have been placed in the AIL
- * when called by xfs_bui_release() from BUD processing due to the ordering of
- * committed vs unpin operations in bulk insert operations. Hence the reference
- * count to ensure only the last caller frees the BUI.
- */
-void
-xfs_bui_release(
-	struct xfs_bui_log_item	*buip)
-{
-	ASSERT(atomic_read(&buip->bui_refcount) > 0);
-	if (atomic_dec_and_test(&buip->bui_refcount)) {
-		xfs_trans_ail_remove(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR);
-		xfs_bui_item_free(buip);
-	}
-}
-
 static inline struct xfs_bud_log_item *BUD_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_bud_log_item, bud_item);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 05dee8fdd895..8cd8c412f52d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1326,7 +1326,6 @@ xfs_collapse_file_space(
 	int			error;
 	struct xfs_defer_ops	dfops;
 	xfs_fsblock_t		first_block;
-	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
 	xfs_fileoff_t		next_fsb = XFS_B_TO_FSB(mp, offset + len);
 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
@@ -1361,7 +1360,7 @@ xfs_collapse_file_space(
 
 		xfs_defer_init(&dfops, &first_block);
 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
-				&done, stop_fsb, &first_block, &dfops);
+				&done, &first_block, &dfops);
 		if (error)
 			goto out_bmap_cancel;
 
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ac669a10c62f..55661cbdb51b 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1754,7 +1754,6 @@ xfs_buftarg_shrink_count(
 
 void
 xfs_free_buftarg(
-	struct xfs_mount	*mp,
 	struct xfs_buftarg	*btp)
 {
 	unregister_shrinker(&btp->bt_shrinker);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 2f4c91452861..edced162a674 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -388,7 +388,7 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
  */
 extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
 			struct block_device *, struct dax_device *);
-extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
+extern void xfs_free_buftarg(struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
 
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index b2cde5426182..7b68e6c9a474 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -50,19 +50,19 @@ xfs_trim_extents(
 
 	pag = xfs_perag_get(mp, agno);
 
-	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
-	if (error || !agbp)
-		goto out_put_perag;
-
-	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
-
 	/*
 	 * Force out the log.  This means any transactions that might have freed
-	 * space before we took the AGF buffer lock are now on disk, and the
+	 * space before we take the AGF buffer lock are now on disk, and the
 	 * volatile disk cache is flushed.
 	 */
 	xfs_log_force(mp, XFS_LOG_SYNC);
 
+	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
+	if (error || !agbp)
+		goto out_put_perag;
+
+	cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
+
 	/*
 	 * Look up the longest btree in the AGF and start with it.
 	 */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 64da90655e95..b5b1e567b9f4 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -51,6 +51,24 @@ xfs_efi_item_free(
 }
 
 /*
+ * Freeing the efi requires that we remove it from the AIL if it has already
+ * been placed there. However, the EFI may not yet have been placed in the AIL
+ * when called by xfs_efi_release() from EFD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the EFI.
+ */
+void
+xfs_efi_release(
+	struct xfs_efi_log_item	*efip)
+{
+	ASSERT(atomic_read(&efip->efi_refcount) > 0);
+	if (atomic_dec_and_test(&efip->efi_refcount)) {
+		xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_efi_item_free(efip);
+	}
+}
+
+/*
  * This returns the number of iovecs needed to log the given efi item.
  * We only need 1 iovec for an efi item.  It just logs the efi_log_format
  * structure.
@@ -151,7 +169,7 @@ xfs_efi_item_unlock(
 	struct xfs_log_item	*lip)
 {
 	if (lip->li_flags & XFS_LI_ABORTED)
-		xfs_efi_item_free(EFI_ITEM(lip));
+		xfs_efi_release(EFI_ITEM(lip));
 }
 
 /*
@@ -279,24 +297,6 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
 	return -EFSCORRUPTED;
 }
 
-/*
- * Freeing the efi requires that we remove it from the AIL if it has already
- * been placed there. However, the EFI may not yet have been placed in the AIL
- * when called by xfs_efi_release() from EFD processing due to the ordering of
- * committed vs unpin operations in bulk insert operations. Hence the reference
- * count to ensure only the last caller frees the EFI.
- */
-void
-xfs_efi_release(
-	struct xfs_efi_log_item	*efip)
-{
-	ASSERT(atomic_read(&efip->efi_refcount) > 0);
-	if (atomic_dec_and_test(&efip->efi_refcount)) {
-		xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR);
-		xfs_efi_item_free(efip);
-	}
-}
-
 static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_efd_log_item, efd_item);
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 043ca3808ea2..3f8722e51dbe 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -34,7 +34,6 @@
 
 struct xfs_fstrm_item {
 	struct xfs_mru_cache_elem	mru;
-	struct xfs_inode		*ip;
 	xfs_agnumber_t			ag; /* AG in use for this directory */
 };
 
@@ -122,14 +121,15 @@ xfs_filestream_put_ag(
 
 static void
 xfs_fstrm_free_func(
+	void			*data,
 	struct xfs_mru_cache_elem *mru)
 {
+	struct xfs_mount	*mp = data;
 	struct xfs_fstrm_item	*item =
 		container_of(mru, struct xfs_fstrm_item, mru);
 
-	xfs_filestream_put_ag(item->ip->i_mount, item->ag);
-
-	trace_xfs_filestream_free(item->ip, item->ag);
+	xfs_filestream_put_ag(mp, item->ag);
+	trace_xfs_filestream_free(mp, mru->key, item->ag);
 
 	kmem_free(item);
 }
@@ -165,7 +165,7 @@ xfs_filestream_pick_ag(
 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
 
 	for (nscan = 0; 1; nscan++) {
-		trace_xfs_filestream_scan(ip, ag);
+		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
 
 		pag = xfs_perag_get(mp, ag);
 
@@ -198,7 +198,7 @@ xfs_filestream_pick_ag(
 			goto next_ag;
 		}
 
-		longest = xfs_alloc_longest_free_extent(mp, pag,
+		longest = xfs_alloc_longest_free_extent(pag,
 				xfs_alloc_min_freelist(mp, pag),
 				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
 		if (((minlen && longest >= minlen) ||
@@ -265,7 +265,6 @@ next_ag:
 		goto out_put_ag;
 
 	item->ag = *agp;
-	item->ip = ip;
 
 	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
 	if (err) {
@@ -333,7 +332,7 @@ xfs_filestream_lookup_ag(
 		ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
 		xfs_mru_cache_done(mp->m_filestream);
 
-		trace_xfs_filestream_lookup(ip, ag);
+		trace_xfs_filestream_lookup(mp, ip->i_ino, ag);
 		goto out;
 	}
 
@@ -399,7 +398,7 @@ xfs_filestream_new_ag(
 	 * Only free the item here so we skip over the old AG earlier.
 	 */
 	if (mru)
-		xfs_fstrm_free_func(mru);
+		xfs_fstrm_free_func(mp, mru);
 
 	IRELE(pip);
 exit:
@@ -426,8 +425,8 @@ xfs_filestream_mount(
 	 * timer tunable to within about 10 percent.  This requires at least 10
 	 * groups.
 	 */
-	return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10,
-				    10, xfs_fstrm_free_func);
+	return xfs_mru_cache_create(&mp->m_filestream, mp,
+			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
 }
 
 void
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3e3aab3888fa..2b70c8b4cee2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -972,10 +972,8 @@ xfs_dir_ialloc(
 	xfs_nlink_t	nlink,
 	dev_t		rdev,
 	prid_t		prid,		/* project id */
-	xfs_inode_t	**ipp,		/* pointer to inode; it will be
+	xfs_inode_t	**ipp)		/* pointer to inode; it will be
 					   locked. */
-	int		*committed)
-
 {
 	xfs_trans_t	*tp;
 	xfs_inode_t	*ip;
@@ -1050,8 +1048,6 @@ xfs_dir_ialloc(
 		}
 
 		code = xfs_trans_roll(&tp);
-		if (committed != NULL)
-			*committed = 1;
 
 		/*
 		 * Re-attach the quota info that we detached from prev trx.
@@ -1088,9 +1084,6 @@ xfs_dir_ialloc(
 		}
 		ASSERT(!ialloc_context && ip);
 
-	} else {
-		if (committed != NULL)
-			*committed = 0;
 	}
 
 	*ipp = ip;
@@ -1217,8 +1210,7 @@ xfs_create(
 	 * entry pointing to them, but a directory also the "." entry
 	 * pointing to itself.
 	 */
-	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
-			NULL);
+	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1309,7 +1301,6 @@ xfs_create(
 int
 xfs_create_tmpfile(
 	struct xfs_inode	*dp,
-	struct dentry		*dentry,
 	umode_t			mode,
 	struct xfs_inode	**ipp)
 {
@@ -1351,7 +1342,7 @@ xfs_create_tmpfile(
 	if (error)
 		goto out_trans_cancel;
 
-	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
+	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip);
 	if (error)
 		goto out_trans_cancel;
 
@@ -1611,13 +1602,15 @@ xfs_itruncate_extents(
 			goto out;
 	}
 
-	/* Remove all pending CoW reservations. */
-	error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
-			last_block, true);
-	if (error)
-		goto out;
+	if (whichfork == XFS_DATA_FORK) {
+		/* Remove all pending CoW reservations. */
+		error = xfs_reflink_cancel_cow_blocks(ip, &tp,
+				first_unmap_block, last_block, true);
+		if (error)
+			goto out;
 
-	xfs_itruncate_clear_reflink_flags(ip);
+		xfs_itruncate_clear_reflink_flags(ip);
+	}
 
 	/*
 	 * Always re-log the inode so that our permanent transaction can keep
@@ -2903,7 +2896,7 @@ xfs_rename_alloc_whiteout(
 	struct xfs_inode	*tmpfile;
 	int			error;
 
-	error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
+	error = xfs_create_tmpfile(dp, S_IFCHR | WHITEOUT_MODE, &tmpfile);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 132d8aa2afc4..1eebc53df7d7 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -393,8 +393,8 @@ int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
 			   struct xfs_inode **ipp, struct xfs_name *ci_name);
 int		xfs_create(struct xfs_inode *dp, struct xfs_name *name,
 			   umode_t mode, dev_t rdev, struct xfs_inode **ipp);
-int		xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
-			   umode_t mode, struct xfs_inode **ipp);
+int		xfs_create_tmpfile(struct xfs_inode *dp, umode_t mode,
+			   struct xfs_inode **ipp);
 int		xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
 			   struct xfs_inode *ip);
 int		xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
@@ -431,7 +431,7 @@ xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
 int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
 			       xfs_nlink_t, dev_t, prid_t,
-			       struct xfs_inode **, int *);
+			       struct xfs_inode **);
 
 /* from xfs_file.c */
 enum xfs_prealloc_flags {
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index e0307fbff911..a3ed3c811dfa 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -177,7 +177,7 @@ xfs_generic_create(
 	if (!tmpfile) {
 		error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
 	} else {
-		error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
+		error = xfs_create_tmpfile(XFS_I(dir), mode, &ip);
 	}
 	if (unlikely(error))
 		goto out_free_acl;
@@ -1285,7 +1285,10 @@ xfs_setup_iops(
 	case S_IFREG:
 		inode->i_op = &xfs_inode_operations;
 		inode->i_fop = &xfs_file_operations;
-		inode->i_mapping->a_ops = &xfs_address_space_operations;
+		if (IS_DAX(inode))
+			inode->i_mapping->a_ops = &xfs_dax_aops;
+		else
+			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
 		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b9c9c848146b..2fcd9ed5d075 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -560,7 +560,6 @@ xfs_log_done(
  */
 int
 xfs_log_notify(
-	struct xfs_mount	*mp,
 	struct xlog_in_core	*iclog,
 	xfs_log_callback_t	*cb)
 {
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 7e2d62922a16..fa8ad31d587f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -141,8 +141,7 @@ int	xfs_log_mount_cancel(struct xfs_mount *);
 xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
 void	  xfs_log_space_wake(struct xfs_mount *mp);
-int	  xfs_log_notify(struct xfs_mount	*mp,
-			 struct xlog_in_core	*iclog,
+int	  xfs_log_notify(struct xlog_in_core	*iclog,
 			 struct xfs_log_callback *callback_entry);
 int	  xfs_log_release_iclog(struct xfs_mount *mp,
 			 struct xlog_in_core	 *iclog);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index cb376ac8a595..4668403b1741 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -848,7 +848,7 @@ restart:
 	/* attach all the transactions w/ busy extents to iclog */
 	ctx->log_cb.cb_func = xlog_cil_committed;
 	ctx->log_cb.cb_arg = ctx;
-	error = xfs_log_notify(log->l_mp, commit_iclog, &ctx->log_cb);
+	error = xfs_log_notify(commit_iclog, &ctx->log_cb);
 	if (error)
 		goto out_abort;
 
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index f8a674d7f092..70eea7ae2876 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -112,6 +112,7 @@ struct xfs_mru_cache {
 	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
 	struct delayed_work	work;      /* Workqueue data for reaping.   */
 	unsigned int		queued;	   /* work has been queued */
+	void			*data;
 };
 
 static struct workqueue_struct	*xfs_mru_reap_wq;
@@ -259,7 +260,7 @@ _xfs_mru_cache_clear_reap_list(
 
 	list_for_each_entry_safe(elem, next, &tmp, list_node) {
 		list_del_init(&elem->list_node);
-		mru->free_func(elem);
+		mru->free_func(mru->data, elem);
 	}
 
 	spin_lock(&mru->lock);
@@ -326,6 +327,7 @@ xfs_mru_cache_uninit(void)
 int
 xfs_mru_cache_create(
 	struct xfs_mru_cache	**mrup,
+	void			*data,
 	unsigned int		lifetime_ms,
 	unsigned int		grp_count,
 	xfs_mru_cache_free_func_t free_func)
@@ -369,7 +371,7 @@ xfs_mru_cache_create(
 
 	mru->grp_time  = grp_time;
 	mru->free_func = free_func;
-
+	mru->data = data;
 	*mrup = mru;
 
 exit:
@@ -492,7 +494,7 @@ xfs_mru_cache_delete(
 
 	elem = xfs_mru_cache_remove(mru, key);
 	if (elem)
-		mru->free_func(elem);
+		mru->free_func(mru->data, elem);
 }
 
 /*
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h
index fb5245ba5ff7..b3f3fbdfcc47 100644
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -26,13 +26,13 @@ struct xfs_mru_cache_elem {
 };
 
 /* Function pointer type for callback to free a client's data pointer. */
-typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);
+typedef void (*xfs_mru_cache_free_func_t)(void *, struct xfs_mru_cache_elem *);
 
 int xfs_mru_cache_init(void);
 void xfs_mru_cache_uninit(void);
-int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
-			     unsigned int grp_count,
-			     xfs_mru_cache_free_func_t free_func);
+int xfs_mru_cache_create(struct xfs_mru_cache **mrup, void *data,
+		unsigned int lifetime_ms, unsigned int grp_count,
+		xfs_mru_cache_free_func_t free_func);
 void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
 int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
 		struct xfs_mru_cache_elem *elem);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5b848f4b637f..ec39ae274c78 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -748,7 +748,6 @@ xfs_qm_qino_alloc(
 {
 	xfs_trans_t	*tp;
 	int		error;
-	int		committed;
 	bool		need_alloc = true;
 
 	*ip = NULL;
@@ -788,8 +787,7 @@ xfs_qm_qino_alloc(
 		return error;
 
 	if (need_alloc) {
-		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
-				&committed);
+		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip);
 		if (error) {
 			xfs_trans_cancel(tp);
 			return error;
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 7a39f40645f7..15c9393dd7a7 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -52,6 +52,25 @@ xfs_cui_item_free(
 		kmem_zone_free(xfs_cui_zone, cuip);
 }
 
+/*
+ * Freeing the CUI requires that we remove it from the AIL if it has already
+ * been placed there. However, the CUI may not yet have been placed in the AIL
+ * when called by xfs_cui_release() from CUD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the CUI.
+ */
+void
+xfs_cui_release(
+	struct xfs_cui_log_item	*cuip)
+{
+	ASSERT(atomic_read(&cuip->cui_refcount) > 0);
+	if (atomic_dec_and_test(&cuip->cui_refcount)) {
+		xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_cui_item_free(cuip);
+	}
+}
+
+
 STATIC void
 xfs_cui_item_size(
 	struct xfs_log_item	*lip,
@@ -141,7 +160,7 @@ xfs_cui_item_unlock(
 	struct xfs_log_item	*lip)
 {
 	if (lip->li_flags & XFS_LI_ABORTED)
-		xfs_cui_item_free(CUI_ITEM(lip));
+		xfs_cui_release(CUI_ITEM(lip));
 }
 
 /*
@@ -211,24 +230,6 @@ xfs_cui_init(
 	return cuip;
 }
 
-/*
- * Freeing the CUI requires that we remove it from the AIL if it has already
- * been placed there. However, the CUI may not yet have been placed in the AIL
- * when called by xfs_cui_release() from CUD processing due to the ordering of
- * committed vs unpin operations in bulk insert operations. Hence the reference
- * count to ensure only the last caller frees the CUI.
- */
-void
-xfs_cui_release(
-	struct xfs_cui_log_item	*cuip)
-{
-	ASSERT(atomic_read(&cuip->cui_refcount) > 0);
-	if (atomic_dec_and_test(&cuip->cui_refcount)) {
-		xfs_trans_ail_remove(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR);
-		xfs_cui_item_free(cuip);
-	}
-}
-
 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_cud_log_item, cud_item);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 49d3124863a8..06a07846c9b3 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -52,6 +52,24 @@ xfs_rui_item_free(
 		kmem_zone_free(xfs_rui_zone, ruip);
 }
 
+/*
+ * Freeing the RUI requires that we remove it from the AIL if it has already
+ * been placed there. However, the RUI may not yet have been placed in the AIL
+ * when called by xfs_rui_release() from RUD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the RUI.
+ */
+void
+xfs_rui_release(
+	struct xfs_rui_log_item	*ruip)
+{
+	ASSERT(atomic_read(&ruip->rui_refcount) > 0);
+	if (atomic_dec_and_test(&ruip->rui_refcount)) {
+		xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
+		xfs_rui_item_free(ruip);
+	}
+}
+
 STATIC void
 xfs_rui_item_size(
 	struct xfs_log_item	*lip,
@@ -141,7 +159,7 @@ xfs_rui_item_unlock(
 	struct xfs_log_item	*lip)
 {
 	if (lip->li_flags & XFS_LI_ABORTED)
-		xfs_rui_item_free(RUI_ITEM(lip));
+		xfs_rui_release(RUI_ITEM(lip));
 }
 
 /*
@@ -233,24 +251,6 @@ xfs_rui_copy_format(
 	return 0;
 }
 
-/*
- * Freeing the RUI requires that we remove it from the AIL if it has already
- * been placed there. However, the RUI may not yet have been placed in the AIL
- * when called by xfs_rui_release() from RUD processing due to the ordering of
- * committed vs unpin operations in bulk insert operations. Hence the reference
- * count to ensure only the last caller frees the RUI.
- */
-void
-xfs_rui_release(
-	struct xfs_rui_log_item	*ruip)
-{
-	ASSERT(atomic_read(&ruip->rui_refcount) > 0);
-	if (atomic_dec_and_test(&ruip->rui_refcount)) {
-		xfs_trans_ail_remove(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR);
-		xfs_rui_item_free(ruip);
-	}
-}
-
 static inline struct xfs_rud_log_item *RUD_ITEM(struct xfs_log_item *lip)
 {
 	return container_of(lip, struct xfs_rud_log_item, rud_item);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 612c1d5348b3..d71424052917 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -722,7 +722,7 @@ xfs_close_devices(
 		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
 		struct dax_device *dax_logdev = mp->m_logdev_targp->bt_daxdev;
 
-		xfs_free_buftarg(mp, mp->m_logdev_targp);
+		xfs_free_buftarg(mp->m_logdev_targp);
 		xfs_blkdev_put(logdev);
 		fs_put_dax(dax_logdev);
 	}
@@ -730,11 +730,11 @@ xfs_close_devices(
 		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
 		struct dax_device *dax_rtdev = mp->m_rtdev_targp->bt_daxdev;
 
-		xfs_free_buftarg(mp, mp->m_rtdev_targp);
+		xfs_free_buftarg(mp->m_rtdev_targp);
 		xfs_blkdev_put(rtdev);
 		fs_put_dax(dax_rtdev);
 	}
-	xfs_free_buftarg(mp, mp->m_ddev_targp);
+	xfs_free_buftarg(mp->m_ddev_targp);
 	fs_put_dax(dax_ddev);
 }
 
@@ -808,9 +808,9 @@ xfs_open_devices(
 
  out_free_rtdev_targ:
 	if (mp->m_rtdev_targp)
-		xfs_free_buftarg(mp, mp->m_rtdev_targp);
+		xfs_free_buftarg(mp->m_rtdev_targp);
  out_free_ddev_targ:
-	xfs_free_buftarg(mp, mp->m_ddev_targp);
+	xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
 	xfs_blkdev_put(rtdev);
 	fs_put_dax(dax_rtdev);
@@ -1247,7 +1247,6 @@ xfs_quiesce_attr(
 STATIC int
 xfs_test_remount_options(
 	struct super_block	*sb,
-	struct xfs_mount	*mp,
 	char			*options)
 {
 	int			error = 0;
@@ -1278,7 +1277,7 @@ xfs_fs_remount(
 	int			error;
 
 	/* First, check for complete junk; i.e. invalid options */
-	error = xfs_test_remount_options(sb, mp, options);
+	error = xfs_test_remount_options(sb, options);
 	if (error)
 		return error;
 
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 2e9e793a8f9d..5b66ac12913c 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -264,7 +264,7 @@ xfs_symlink(
 	 * Allocate an inode for the symlink.
 	 */
 	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-			       prid, &ip, NULL);
+			       prid, &ip);
 	if (error)
 		goto out_trans_cancel;
 
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a982c0b623d0..8955254b900e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -506,8 +506,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
 DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
 
 DECLARE_EVENT_CLASS(xfs_filestream_class,
-	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno),
-	TP_ARGS(ip, agno),
+	TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno),
+	TP_ARGS(mp, ino, agno),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
@@ -515,10 +515,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
 		__field(int, streams)
 	),
 	TP_fast_assign(
-		__entry->dev = VFS_I(ip)->i_sb->s_dev;
-		__entry->ino = ip->i_ino;
+		__entry->dev = mp->m_super->s_dev;
+		__entry->ino = ino;
 		__entry->agno = agno;
-		__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno);
+		__entry->streams = xfs_filestream_peek_ag(mp, agno);
 	),
 	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -528,8 +528,8 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
 )
 #define DEFINE_FILESTREAM_EVENT(name) \
 DEFINE_EVENT(xfs_filestream_class, name, \
-	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \
-	TP_ARGS(ip, agno))
+	TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \
+	TP_ARGS(mp, ino, agno))
 DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
 DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
 DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index d591bb77f592..40a916efd7c0 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -254,6 +254,8 @@ int acpi_processor_pstate_control(void);
 /* note: this locks both the calling module and the processor module
          if a _PPC object exists, rmmod is disallowed then */
 int acpi_processor_notify_smm(struct module *calling_module);
+int acpi_processor_get_psd(acpi_handle handle,
+			   struct acpi_psd_package *pdomain);
 
 /* parsing the _P* objects. */
 extern int acpi_processor_get_performance_info(struct acpi_processor *pr);
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 04c4cc6fd820..66d1d45fa2e1 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -25,6 +25,50 @@
 #define mmiowb() do {} while (0)
 #endif
 
+#ifndef __io_br
+#define __io_br()      barrier()
+#endif
+
+/* prevent prefetching of coherent DMA data ahead of a dma-complete */
+#ifndef __io_ar
+#ifdef rmb
+#define __io_ar()      rmb()
+#else
+#define __io_ar()      barrier()
+#endif
+#endif
+
+/* flush writes to coherent DMA data before possibly triggering a DMA read */
+#ifndef __io_bw
+#ifdef wmb
+#define __io_bw()      wmb()
+#else
+#define __io_bw()      barrier()
+#endif
+#endif
+
+/* serialize device access against a spin_unlock, usually handled there. */
+#ifndef __io_aw
+#define __io_aw()      barrier()
+#endif
+
+#ifndef __io_pbw
+#define __io_pbw()     __io_bw()
+#endif
+
+#ifndef __io_paw
+#define __io_paw()     __io_aw()
+#endif
+
+#ifndef __io_pbr
+#define __io_pbr()     __io_br()
+#endif
+
+#ifndef __io_par
+#define __io_par()     __io_ar()
+#endif
+
+
 /*
  * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
  *
@@ -110,7 +154,12 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
 #define readb readb
 static inline u8 readb(const volatile void __iomem *addr)
 {
-	return __raw_readb(addr);
+	u8 val;
+
+	__io_br();
+	val = __raw_readb(addr);
+	__io_ar();
+	return val;
 }
 #endif
 
@@ -118,7 +167,12 @@ static inline u8 readb(const volatile void __iomem *addr)
 #define readw readw
 static inline u16 readw(const volatile void __iomem *addr)
 {
-	return __le16_to_cpu(__raw_readw(addr));
+	u16 val;
+
+	__io_br();
+	val = __le16_to_cpu(__raw_readw(addr));
+	__io_ar();
+	return val;
 }
 #endif
 
@@ -126,7 +180,12 @@ static inline u16 readw(const volatile void __iomem *addr)
 #define readl readl
 static inline u32 readl(const volatile void __iomem *addr)
 {
-	return __le32_to_cpu(__raw_readl(addr));
+	u32 val;
+
+	__io_br();
+	val = __le32_to_cpu(__raw_readl(addr));
+	__io_ar();
+	return val;
 }
 #endif
 
@@ -135,7 +194,12 @@ static inline u32 readl(const volatile void __iomem *addr)
 #define readq readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
-	return __le64_to_cpu(__raw_readq(addr));
+	u64 val;
+
+	__io_br();
+	val = __le64_to_cpu(__raw_readq(addr));
+	__io_ar();
+	return val;
 }
 #endif
 #endif /* CONFIG_64BIT */
@@ -144,7 +208,9 @@ static inline u64 readq(const volatile void __iomem *addr)
 #define writeb writeb
 static inline void writeb(u8 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writeb(value, addr);
+	__io_aw();
 }
 #endif
 
@@ -152,7 +218,9 @@ static inline void writeb(u8 value, volatile void __iomem *addr)
 #define writew writew
 static inline void writew(u16 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writew(cpu_to_le16(value), addr);
+	__io_aw();
 }
 #endif
 
@@ -160,7 +228,9 @@ static inline void writew(u16 value, volatile void __iomem *addr)
 #define writel writel
 static inline void writel(u32 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writel(__cpu_to_le32(value), addr);
+	__io_aw();
 }
 #endif
 
@@ -169,7 +239,9 @@ static inline void writel(u32 value, volatile void __iomem *addr)
 #define writeq writeq
 static inline void writeq(u64 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writeq(__cpu_to_le64(value), addr);
+	__io_aw();
 }
 #endif
 #endif /* CONFIG_64BIT */
@@ -180,35 +252,67 @@ static inline void writeq(u64 value, volatile void __iomem *addr)
  * accesses.
  */
 #ifndef readb_relaxed
-#define readb_relaxed readb
+#define readb_relaxed readb_relaxed
+static inline u8 readb_relaxed(const volatile void __iomem *addr)
+{
+	return __raw_readb(addr);
+}
 #endif
 
 #ifndef readw_relaxed
-#define readw_relaxed readw
+#define readw_relaxed readw_relaxed
+static inline u16 readw_relaxed(const volatile void __iomem *addr)
+{
+	return __le16_to_cpu(__raw_readw(addr));
+}
 #endif
 
 #ifndef readl_relaxed
-#define readl_relaxed readl
+#define readl_relaxed readl_relaxed
+static inline u32 readl_relaxed(const volatile void __iomem *addr)
+{
+	return __le32_to_cpu(__raw_readl(addr));
+}
 #endif
 
 #if defined(readq) && !defined(readq_relaxed)
-#define readq_relaxed readq
+#define readq_relaxed readq_relaxed
+static inline u64 readq_relaxed(const volatile void __iomem *addr)
+{
+	return __le64_to_cpu(__raw_readq(addr));
+}
 #endif
 
 #ifndef writeb_relaxed
-#define writeb_relaxed writeb
+#define writeb_relaxed writeb_relaxed
+static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
+{
+	__raw_writeb(value, addr);
+}
 #endif
 
 #ifndef writew_relaxed
-#define writew_relaxed writew
+#define writew_relaxed writew_relaxed
+static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
+{
+	__raw_writew(cpu_to_le16(value), addr);
+}
 #endif
 
 #ifndef writel_relaxed
-#define writel_relaxed writel
+#define writel_relaxed writel_relaxed
+static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
+{
+	__raw_writel(__cpu_to_le32(value), addr);
+}
 #endif
 
 #if defined(writeq) && !defined(writeq_relaxed)
-#define writeq_relaxed writeq
+#define writeq_relaxed writeq_relaxed
+static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
+{
+	__raw_writeq(__cpu_to_le64(value), addr);
+}
 #endif
 
 /*
@@ -363,7 +467,12 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer,
 #define inb inb
 static inline u8 inb(unsigned long addr)
 {
-	return readb(PCI_IOBASE + addr);
+	u8 val;
+
+	__io_pbr();
+	val = __raw_readb(PCI_IOBASE + addr);
+	__io_par();
+	return val;
 }
 #endif
 
@@ -371,7 +480,12 @@ static inline u8 inb(unsigned long addr)
 #define inw inw
 static inline u16 inw(unsigned long addr)
 {
-	return readw(PCI_IOBASE + addr);
+	u16 val;
+
+	__io_pbr();
+	val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr));
+	__io_par();
+	return val;
 }
 #endif
 
@@ -379,7 +493,12 @@ static inline u16 inw(unsigned long addr)
 #define inl inl
 static inline u32 inl(unsigned long addr)
 {
-	return readl(PCI_IOBASE + addr);
+	u32 val;
+
+	__io_pbr();
+	val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr));
+	__io_par();
+	return val;
 }
 #endif
 
@@ -387,7 +506,9 @@ static inline u32 inl(unsigned long addr)
 #define outb outb
 static inline void outb(u8 value, unsigned long addr)
 {
-	writeb(value, PCI_IOBASE + addr);
+	__io_pbw();
+	__raw_writeb(value, PCI_IOBASE + addr);
+	__io_paw();
 }
 #endif
 
@@ -395,7 +516,9 @@ static inline void outb(u8 value, unsigned long addr)
 #define outw outw
 static inline void outw(u16 value, unsigned long addr)
 {
-	writew(value, PCI_IOBASE + addr);
+	__io_pbw();
+	__raw_writew(cpu_to_le16(value), PCI_IOBASE + addr);
+	__io_paw();
 }
 #endif
 
@@ -403,7 +526,9 @@ static inline void outw(u16 value, unsigned long addr)
 #define outl outl
 static inline void outl(u32 value, unsigned long addr)
 {
-	writel(value, PCI_IOBASE + addr);
+	__io_pbw();
+	__raw_writel(cpu_to_le32(value), PCI_IOBASE + addr);
+	__io_paw();
 }
 #endif
 
diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h
index 18c6abe81fbd..728e5c5706c4 100644
--- a/include/asm-generic/kvm_para.h
+++ b/include/asm-generic/kvm_para.h
@@ -19,6 +19,11 @@ static inline unsigned int kvm_arch_para_features(void)
 	return 0;
 }
 
+static inline unsigned int kvm_arch_para_hints(void)
+{
+	return 0;
+}
+
 static inline bool kvm_para_available(void)
 {
 	return false;
diff --git a/include/dt-bindings/clock/axg-clkc.h b/include/dt-bindings/clock/axg-clkc.h
index 941ac70e7f30..555937a25504 100644
--- a/include/dt-bindings/clock/axg-clkc.h
+++ b/include/dt-bindings/clock/axg-clkc.h
@@ -67,5 +67,6 @@
 #define CLKID_AO_I2C				58
 #define CLKID_SD_EMMC_B_CLK0			59
 #define CLKID_SD_EMMC_C_CLK0			60
+#define CLKID_HIFI_PLL				69
 
 #endif /* __AXG_CLKC_H */
diff --git a/include/dt-bindings/clock/histb-clock.h b/include/dt-bindings/clock/histb-clock.h
index 067f5e501b0c..fab30b3f78b2 100644
--- a/include/dt-bindings/clock/histb-clock.h
+++ b/include/dt-bindings/clock/histb-clock.h
@@ -22,18 +22,18 @@
 #define HISTB_OSC_CLK			0
 #define HISTB_APB_CLK			1
 #define HISTB_AHB_CLK			2
-#define HISTB_UART1_CLK		3
-#define HISTB_UART2_CLK		4
-#define HISTB_UART3_CLK		5
-#define HISTB_I2C0_CLK		6
-#define HISTB_I2C1_CLK		7
-#define HISTB_I2C2_CLK		8
-#define HISTB_I2C3_CLK		9
-#define HISTB_I2C4_CLK		10
-#define HISTB_I2C5_CLK		11
-#define HISTB_SPI0_CLK		12
-#define HISTB_SPI1_CLK		13
-#define HISTB_SPI2_CLK		14
+#define HISTB_UART1_CLK			3
+#define HISTB_UART2_CLK			4
+#define HISTB_UART3_CLK			5
+#define HISTB_I2C0_CLK			6
+#define HISTB_I2C1_CLK			7
+#define HISTB_I2C2_CLK			8
+#define HISTB_I2C3_CLK			9
+#define HISTB_I2C4_CLK			10
+#define HISTB_I2C5_CLK			11
+#define HISTB_SPI0_CLK			12
+#define HISTB_SPI1_CLK			13
+#define HISTB_SPI2_CLK			14
 #define HISTB_SCI_CLK			15
 #define HISTB_FMC_CLK			16
 #define HISTB_MMC_BIU_CLK		17
@@ -43,7 +43,7 @@
 #define HISTB_SDIO0_BIU_CLK		21
 #define HISTB_SDIO0_CIU_CLK		22
 #define HISTB_SDIO0_DRV_CLK		23
-#define HISTB_SDIO0_SAMPLE_CLK	24
+#define HISTB_SDIO0_SAMPLE_CLK		24
 #define HISTB_PCIE_AUX_CLK		25
 #define HISTB_PCIE_PIPE_CLK		26
 #define HISTB_PCIE_SYS_CLK		27
@@ -53,21 +53,22 @@
 #define HISTB_ETH1_MAC_CLK		31
 #define HISTB_ETH1_MACIF_CLK		32
 #define HISTB_COMBPHY1_CLK		33
-#define HISTB_USB2_BUS_CLK	34
-#define HISTB_USB2_PHY_CLK	35
-#define HISTB_USB2_UTMI_CLK	36
-#define HISTB_USB2_12M_CLK	37
-#define HISTB_USB2_48M_CLK	38
-#define HISTB_USB2_OTG_UTMI_CLK	39
-#define HISTB_USB2_PHY1_REF_CLK	40
-#define HISTB_USB2_PHY2_REF_CLK	41
+#define HISTB_USB2_BUS_CLK		34
+#define HISTB_USB2_PHY_CLK		35
+#define HISTB_USB2_UTMI_CLK		36
+#define HISTB_USB2_12M_CLK		37
+#define HISTB_USB2_48M_CLK		38
+#define HISTB_USB2_OTG_UTMI_CLK		39
+#define HISTB_USB2_PHY1_REF_CLK		40
+#define HISTB_USB2_PHY2_REF_CLK		41
+#define HISTB_COMBPHY0_CLK		42
 
 /* clocks provided by mcu CRG */
-#define HISTB_MCE_CLK	1
-#define HISTB_IR_CLK	2
-#define HISTB_TIMER01_CLK	3
-#define HISTB_LEDC_CLK	4
-#define HISTB_UART0_CLK	5
-#define HISTB_LSADC_CLK	6
+#define HISTB_MCE_CLK			1
+#define HISTB_IR_CLK			2
+#define HISTB_TIMER01_CLK		3
+#define HISTB_LEDC_CLK			4
+#define HISTB_UART0_CLK			5
+#define HISTB_LSADC_CLK			6
 
 #endif	/* __DTS_HISTB_CLOCK_H */
diff --git a/include/dt-bindings/clock/imx6sll-clock.h b/include/dt-bindings/clock/imx6sll-clock.h
new file mode 100644
index 000000000000..151111e68f4f
--- /dev/null
+++ b/include/dt-bindings/clock/imx6sll-clock.h
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017-2018 NXP.
+ *
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_IMX6SLL_H
+#define __DT_BINDINGS_CLOCK_IMX6SLL_H
+
+#define IMX6SLL_CLK_DUMMY		0
+#define IMX6SLL_CLK_CKIL		1
+#define IMX6SLL_CLK_OSC			2
+#define IMX6SLL_PLL1_BYPASS_SRC		3
+#define IMX6SLL_PLL2_BYPASS_SRC		4
+#define IMX6SLL_PLL3_BYPASS_SRC		5
+#define IMX6SLL_PLL4_BYPASS_SRC		6
+#define IMX6SLL_PLL5_BYPASS_SRC		7
+#define IMX6SLL_PLL6_BYPASS_SRC		8
+#define IMX6SLL_PLL7_BYPASS_SRC		9
+#define IMX6SLL_CLK_PLL1		10
+#define IMX6SLL_CLK_PLL2		11
+#define IMX6SLL_CLK_PLL3		12
+#define IMX6SLL_CLK_PLL4		13
+#define IMX6SLL_CLK_PLL5		14
+#define IMX6SLL_CLK_PLL6		15
+#define IMX6SLL_CLK_PLL7		16
+#define IMX6SLL_PLL1_BYPASS		17
+#define IMX6SLL_PLL2_BYPASS		18
+#define IMX6SLL_PLL3_BYPASS		19
+#define IMX6SLL_PLL4_BYPASS		20
+#define IMX6SLL_PLL5_BYPASS		21
+#define IMX6SLL_PLL6_BYPASS		22
+#define IMX6SLL_PLL7_BYPASS		23
+#define IMX6SLL_CLK_PLL1_SYS		24
+#define IMX6SLL_CLK_PLL2_BUS		25
+#define IMX6SLL_CLK_PLL3_USB_OTG	26
+#define IMX6SLL_CLK_PLL4_AUDIO		27
+#define IMX6SLL_CLK_PLL5_VIDEO		28
+#define IMX6SLL_CLK_PLL6_ENET		29
+#define IMX6SLL_CLK_PLL7_USB_HOST	30
+#define IMX6SLL_CLK_USBPHY1		31
+#define IMX6SLL_CLK_USBPHY2		32
+#define IMX6SLL_CLK_USBPHY1_GATE	33
+#define IMX6SLL_CLK_USBPHY2_GATE	34
+#define IMX6SLL_CLK_PLL2_PFD0		35
+#define IMX6SLL_CLK_PLL2_PFD1		36
+#define IMX6SLL_CLK_PLL2_PFD2		37
+#define IMX6SLL_CLK_PLL2_PFD3		38
+#define IMX6SLL_CLK_PLL3_PFD0		39
+#define IMX6SLL_CLK_PLL3_PFD1		40
+#define IMX6SLL_CLK_PLL3_PFD2		41
+#define IMX6SLL_CLK_PLL3_PFD3		42
+#define IMX6SLL_CLK_PLL4_POST_DIV	43
+#define IMX6SLL_CLK_PLL4_AUDIO_DIV	44
+#define IMX6SLL_CLK_PLL5_POST_DIV	45
+#define IMX6SLL_CLK_PLL5_VIDEO_DIV	46
+#define IMX6SLL_CLK_PLL2_198M		47
+#define IMX6SLL_CLK_PLL3_120M		48
+#define IMX6SLL_CLK_PLL3_80M		49
+#define IMX6SLL_CLK_PLL3_60M		50
+#define IMX6SLL_CLK_STEP		51
+#define IMX6SLL_CLK_PLL1_SW		52
+#define IMX6SLL_CLK_AXI_ALT_SEL		53
+#define IMX6SLL_CLK_AXI_SEL		54
+#define IMX6SLL_CLK_PERIPH_PRE		55
+#define IMX6SLL_CLK_PERIPH2_PRE		56
+#define IMX6SLL_CLK_PERIPH_CLK2_SEL	57
+#define IMX6SLL_CLK_PERIPH2_CLK2_SEL	58
+#define IMX6SLL_CLK_PERCLK_SEL		59
+#define IMX6SLL_CLK_USDHC1_SEL		60
+#define IMX6SLL_CLK_USDHC2_SEL		61
+#define IMX6SLL_CLK_USDHC3_SEL		62
+#define IMX6SLL_CLK_SSI1_SEL		63
+#define IMX6SLL_CLK_SSI2_SEL		64
+#define IMX6SLL_CLK_SSI3_SEL		65
+#define IMX6SLL_CLK_PXP_SEL		66
+#define IMX6SLL_CLK_LCDIF_PRE_SEL	67
+#define IMX6SLL_CLK_LCDIF_SEL		68
+#define IMX6SLL_CLK_EPDC_PRE_SEL	69
+#define IMX6SLL_CLK_SPDIF_SEL		70
+#define IMX6SLL_CLK_ECSPI_SEL		71
+#define IMX6SLL_CLK_UART_SEL		72
+#define IMX6SLL_CLK_ARM			73
+#define IMX6SLL_CLK_PERIPH		74
+#define IMX6SLL_CLK_PERIPH2		75
+#define IMX6SLL_CLK_PERIPH2_CLK2	76
+#define IMX6SLL_CLK_PERIPH_CLK2		77
+#define IMX6SLL_CLK_MMDC_PODF		78
+#define IMX6SLL_CLK_AXI_PODF		79
+#define IMX6SLL_CLK_AHB			80
+#define IMX6SLL_CLK_IPG			81
+#define IMX6SLL_CLK_PERCLK		82
+#define IMX6SLL_CLK_USDHC1_PODF		83
+#define IMX6SLL_CLK_USDHC2_PODF		84
+#define IMX6SLL_CLK_USDHC3_PODF		85
+#define IMX6SLL_CLK_SSI1_PRED		86
+#define IMX6SLL_CLK_SSI2_PRED		87
+#define IMX6SLL_CLK_SSI3_PRED		88
+#define IMX6SLL_CLK_SSI1_PODF		89
+#define IMX6SLL_CLK_SSI2_PODF		90
+#define IMX6SLL_CLK_SSI3_PODF		91
+#define IMX6SLL_CLK_PXP_PODF		92
+#define IMX6SLL_CLK_LCDIF_PRED		93
+#define IMX6SLL_CLK_LCDIF_PODF		94
+#define IMX6SLL_CLK_EPDC_SEL		95
+#define IMX6SLL_CLK_EPDC_PODF		96
+#define IMX6SLL_CLK_SPDIF_PRED		97
+#define IMX6SLL_CLK_SPDIF_PODF		98
+#define IMX6SLL_CLK_ECSPI_PODF		99
+#define IMX6SLL_CLK_UART_PODF		100
+
+/* CCGR 0 */
+#define IMX6SLL_CLK_AIPSTZ1		101
+#define IMX6SLL_CLK_AIPSTZ2		102
+#define IMX6SLL_CLK_DCP			103
+#define IMX6SLL_CLK_UART2_IPG		104
+#define IMX6SLL_CLK_UART2_SERIAL	105
+
+/* CCGR 1 */
+#define IMX6SLL_CLK_ECSPI1		106
+#define IMX6SLL_CLK_ECSPI2		107
+#define IMX6SLL_CLK_ECSPI3		108
+#define IMX6SLL_CLK_ECSPI4		109
+#define IMX6SLL_CLK_UART3_IPG		110
+#define IMX6SLL_CLK_UART3_SERIAL	111
+#define IMX6SLL_CLK_UART4_IPG		112
+#define IMX6SLL_CLK_UART4_SERIAL	113
+#define IMX6SLL_CLK_EPIT1		114
+#define IMX6SLL_CLK_EPIT2		115
+#define IMX6SLL_CLK_GPT_BUS		116
+#define IMX6SLL_CLK_GPT_SERIAL		117
+
+/* CCGR2 */
+#define IMX6SLL_CLK_CSI			118
+#define IMX6SLL_CLK_I2C1		119
+#define IMX6SLL_CLK_I2C2		120
+#define IMX6SLL_CLK_I2C3		121
+#define IMX6SLL_CLK_OCOTP		122
+#define IMX6SLL_CLK_LCDIF_APB		123
+#define IMX6SLL_CLK_PXP			124
+
+/* CCGR3 */
+#define IMX6SLL_CLK_UART5_IPG		125
+#define IMX6SLL_CLK_UART5_SERIAL	126
+#define IMX6SLL_CLK_EPDC_AXI		127
+#define IMX6SLL_CLK_EPDC_PIX		128
+#define IMX6SLL_CLK_LCDIF_PIX		129
+#define IMX6SLL_CLK_WDOG1		130
+#define IMX6SLL_CLK_MMDC_P0_FAST	131
+#define IMX6SLL_CLK_MMDC_P0_IPG		132
+#define IMX6SLL_CLK_OCRAM		133
+
+/* CCGR4 */
+#define IMX6SLL_CLK_PWM1		134
+#define IMX6SLL_CLK_PWM2		135
+#define IMX6SLL_CLK_PWM3		136
+#define IMX6SLL_CLK_PWM4		137
+
+/* CCGR 5 */
+#define IMX6SLL_CLK_ROM			138
+#define IMX6SLL_CLK_SDMA		139
+#define IMX6SLL_CLK_KPP			140
+#define IMX6SLL_CLK_WDOG2		141
+#define IMX6SLL_CLK_SPBA		142
+#define IMX6SLL_CLK_SPDIF		143
+#define IMX6SLL_CLK_SPDIF_GCLK		144
+#define IMX6SLL_CLK_SSI1		145
+#define IMX6SLL_CLK_SSI1_IPG		146
+#define IMX6SLL_CLK_SSI2		147
+#define IMX6SLL_CLK_SSI2_IPG		148
+#define IMX6SLL_CLK_SSI3		149
+#define IMX6SLL_CLK_SSI3_IPG		150
+#define IMX6SLL_CLK_UART1_IPG		151
+#define IMX6SLL_CLK_UART1_SERIAL	152
+
+/* CCGR 6 */
+#define IMX6SLL_CLK_USBOH3		153
+#define IMX6SLL_CLK_USDHC1		154
+#define IMX6SLL_CLK_USDHC2		155
+#define IMX6SLL_CLK_USDHC3		156
+
+#define IMX6SLL_CLK_IPP_DI0		157
+#define IMX6SLL_CLK_IPP_DI1		158
+#define IMX6SLL_CLK_LDB_DI0_SEL		159
+#define IMX6SLL_CLK_LDB_DI0_DIV_3_5	160
+#define IMX6SLL_CLK_LDB_DI0_DIV_7	161
+#define IMX6SLL_CLK_LDB_DI0_DIV_SEL	162
+#define IMX6SLL_CLK_LDB_DI0		163
+#define IMX6SLL_CLK_LDB_DI1_SEL		164
+#define IMX6SLL_CLK_LDB_DI1_DIV_3_5	165
+#define IMX6SLL_CLK_LDB_DI1_DIV_7	166
+#define IMX6SLL_CLK_LDB_DI1_DIV_SEL	167
+#define IMX6SLL_CLK_LDB_DI1		168
+#define IMX6SLL_CLK_EXTERN_AUDIO_SEL    169
+#define IMX6SLL_CLK_EXTERN_AUDIO_PRED   170
+#define IMX6SLL_CLK_EXTERN_AUDIO_PODF   171
+#define IMX6SLL_CLK_EXTERN_AUDIO        172
+
+#define IMX6SLL_CLK_END			173
+
+#endif /* __DT_BINDINGS_CLOCK_IMX6SLL_H */
diff --git a/include/dt-bindings/clock/mt2701-clk.h b/include/dt-bindings/clock/mt2701-clk.h
index 551f7600ab58..24e93dfcee9f 100644
--- a/include/dt-bindings/clock/mt2701-clk.h
+++ b/include/dt-bindings/clock/mt2701-clk.h
@@ -176,7 +176,8 @@
 #define CLK_TOP_AUD_EXT1			156
 #define CLK_TOP_AUD_EXT2			157
 #define CLK_TOP_NFI1X_PAD			158
-#define CLK_TOP_NR				159
+#define CLK_TOP_AXISEL_D4			159
+#define CLK_TOP_NR				160
 
 /* APMIXEDSYS */
 
diff --git a/include/dt-bindings/clock/mt2712-clk.h b/include/dt-bindings/clock/mt2712-clk.h
index 48a8e797a617..76265836a1e1 100644
--- a/include/dt-bindings/clock/mt2712-clk.h
+++ b/include/dt-bindings/clock/mt2712-clk.h
@@ -222,7 +222,13 @@
 #define CLK_TOP_APLL_DIV_PDN5		183
 #define CLK_TOP_APLL_DIV_PDN6		184
 #define CLK_TOP_APLL_DIV_PDN7		185
-#define CLK_TOP_NR_CLK			186
+#define CLK_TOP_APLL1_D3		186
+#define CLK_TOP_APLL1_REF_SEL		187
+#define CLK_TOP_APLL2_REF_SEL		188
+#define CLK_TOP_NFI2X_EN		189
+#define CLK_TOP_NFIECC_EN		190
+#define CLK_TOP_NFI1X_CK_EN		191
+#define CLK_TOP_NR_CLK			192
 
 /* INFRACFG */
 
@@ -281,7 +287,9 @@
 #define CLK_PERI_MSDC30_3_EN		41
 #define CLK_PERI_MSDC50_0_HCLK_EN	42
 #define CLK_PERI_MSDC50_3_HCLK_EN	43
-#define CLK_PERI_NR_CLK			44
+#define CLK_PERI_MSDC30_0_QTR_EN	44
+#define CLK_PERI_MSDC30_3_QTR_EN	45
+#define CLK_PERI_NR_CLK			46
 
 /* MCUCFG */
 
diff --git a/include/dt-bindings/clock/mt7622-clk.h b/include/dt-bindings/clock/mt7622-clk.h
index 3e514ed51d15..e9d77f0e8bce 100644
--- a/include/dt-bindings/clock/mt7622-clk.h
+++ b/include/dt-bindings/clock/mt7622-clk.h
@@ -235,7 +235,8 @@
 #define CLK_AUDIO_MEM_ASRC3		43
 #define CLK_AUDIO_MEM_ASRC4		44
 #define CLK_AUDIO_MEM_ASRC5		45
-#define CLK_AUDIO_NR_CLK		46
+#define CLK_AUDIO_AFE_CONN		46
+#define CLK_AUDIO_NR_CLK		47
 
 /* SSUSBSYS */
 
diff --git a/include/dt-bindings/clock/qcom,rpmcc.h b/include/dt-bindings/clock/qcom,rpmcc.h
index b8337a5fa347..c585b82b9c05 100644
--- a/include/dt-bindings/clock/qcom,rpmcc.h
+++ b/include/dt-bindings/clock/qcom,rpmcc.h
@@ -40,6 +40,11 @@
 #define RPM_SMI_CLK				22
 #define RPM_SMI_A_CLK				23
 #define RPM_PLL4_CLK				24
+#define RPM_XO_D0				25
+#define RPM_XO_D1				26
+#define RPM_XO_A0				27
+#define RPM_XO_A1				28
+#define RPM_XO_A2				29
 
 /* SMD RPM clocks */
 #define RPM_SMD_XO_CLK_SRC				0
diff --git a/include/dt-bindings/clock/r8a77965-cpg-mssr.h b/include/dt-bindings/clock/r8a77965-cpg-mssr.h
new file mode 100644
index 000000000000..6d3b5a9a6084
--- /dev/null
+++ b/include/dt-bindings/clock/r8a77965-cpg-mssr.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Jacopo Mondi <jacopo+renesas@jmondi.org>
+ */
+#ifndef __DT_BINDINGS_CLOCK_R8A77965_CPG_MSSR_H__
+#define __DT_BINDINGS_CLOCK_R8A77965_CPG_MSSR_H__
+
+#include <dt-bindings/clock/renesas-cpg-mssr.h>
+
+/* r8a77965 CPG Core Clocks */
+#define R8A77965_CLK_Z			0
+#define R8A77965_CLK_ZR			1
+#define R8A77965_CLK_ZG			2
+#define R8A77965_CLK_ZTR		3
+#define R8A77965_CLK_ZTRD2		4
+#define R8A77965_CLK_ZT			5
+#define R8A77965_CLK_ZX			6
+#define R8A77965_CLK_S0D1		7
+#define R8A77965_CLK_S0D2		8
+#define R8A77965_CLK_S0D3		9
+#define R8A77965_CLK_S0D4		10
+#define R8A77965_CLK_S0D6		11
+#define R8A77965_CLK_S0D8		12
+#define R8A77965_CLK_S0D12		13
+#define R8A77965_CLK_S1D1		14
+#define R8A77965_CLK_S1D2		15
+#define R8A77965_CLK_S1D4		16
+#define R8A77965_CLK_S2D1		17
+#define R8A77965_CLK_S2D2		18
+#define R8A77965_CLK_S2D4		19
+#define R8A77965_CLK_S3D1		20
+#define R8A77965_CLK_S3D2		21
+#define R8A77965_CLK_S3D4		22
+#define R8A77965_CLK_LB			23
+#define R8A77965_CLK_CL			24
+#define R8A77965_CLK_ZB3		25
+#define R8A77965_CLK_ZB3D2		26
+#define R8A77965_CLK_CR			27
+#define R8A77965_CLK_CRD2		28
+#define R8A77965_CLK_SD0H		29
+#define R8A77965_CLK_SD0		30
+#define R8A77965_CLK_SD1H		31
+#define R8A77965_CLK_SD1		32
+#define R8A77965_CLK_SD2H		33
+#define R8A77965_CLK_SD2		34
+#define R8A77965_CLK_SD3H		35
+#define R8A77965_CLK_SD3		36
+#define R8A77965_CLK_SSP2		37
+#define R8A77965_CLK_SSP1		38
+#define R8A77965_CLK_SSPRS		39
+#define R8A77965_CLK_RPC		40
+#define R8A77965_CLK_RPCD2		41
+#define R8A77965_CLK_MSO		42
+#define R8A77965_CLK_CANFD		43
+#define R8A77965_CLK_HDMI		44
+#define R8A77965_CLK_CSI0		45
+#define R8A77965_CLK_CP			46
+#define R8A77965_CLK_CPEX		47
+#define R8A77965_CLK_R			48
+#define R8A77965_CLK_OSC		49
+
+#endif /* __DT_BINDINGS_CLOCK_R8A77965_CPG_MSSR_H__ */
diff --git a/include/dt-bindings/clock/r8a77980-cpg-mssr.h b/include/dt-bindings/clock/r8a77980-cpg-mssr.h
new file mode 100644
index 000000000000..a4c0d76c392e
--- /dev/null
+++ b/include/dt-bindings/clock/r8a77980-cpg-mssr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 Renesas Electronics Corp.
+ * Copyright (C) 2018 Cogent Embedded, Inc.
+ */
+#ifndef __DT_BINDINGS_CLOCK_R8A77980_CPG_MSSR_H__
+#define __DT_BINDINGS_CLOCK_R8A77980_CPG_MSSR_H__
+
+#include <dt-bindings/clock/renesas-cpg-mssr.h>
+
+/* r8a77980 CPG Core Clocks */
+#define R8A77980_CLK_Z2			0
+#define R8A77980_CLK_ZR			1
+#define R8A77980_CLK_ZTR		2
+#define R8A77980_CLK_ZTRD2		3
+#define R8A77980_CLK_ZT			4
+#define R8A77980_CLK_ZX			5
+#define R8A77980_CLK_S0D1		6
+#define R8A77980_CLK_S0D2		7
+#define R8A77980_CLK_S0D3		8
+#define R8A77980_CLK_S0D4		9
+#define R8A77980_CLK_S0D6		10
+#define R8A77980_CLK_S0D12		11
+#define R8A77980_CLK_S0D24		12
+#define R8A77980_CLK_S1D1		13
+#define R8A77980_CLK_S1D2		14
+#define R8A77980_CLK_S1D4		15
+#define R8A77980_CLK_S2D1		16
+#define R8A77980_CLK_S2D2		17
+#define R8A77980_CLK_S2D4		18
+#define R8A77980_CLK_S3D1		19
+#define R8A77980_CLK_S3D2		20
+#define R8A77980_CLK_S3D4		21
+#define R8A77980_CLK_LB			22
+#define R8A77980_CLK_CL			23
+#define R8A77980_CLK_ZB3		24
+#define R8A77980_CLK_ZB3D2		25
+#define R8A77980_CLK_ZB3D4		26
+#define R8A77980_CLK_SD0H		27
+#define R8A77980_CLK_SD0		28
+#define R8A77980_CLK_RPC		29
+#define R8A77980_CLK_RPCD2		30
+#define R8A77980_CLK_MSO		31
+#define R8A77980_CLK_CANFD		32
+#define R8A77980_CLK_CSI0		33
+#define R8A77980_CLK_CP			34
+#define R8A77980_CLK_CPEX		35
+#define R8A77980_CLK_R			36
+#define R8A77980_CLK_OSC		37
+
+#endif /* __DT_BINDINGS_CLOCK_R8A77980_CPG_MSSR_H__ */
diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h
index d2b26a4b43eb..a82a0109faff 100644
--- a/include/dt-bindings/clock/rk3328-cru.h
+++ b/include/dt-bindings/clock/rk3328-cru.h
@@ -193,7 +193,6 @@
 #define HCLK_VPU_PRE		324
 #define HCLK_VIO_PRE		325
 #define HCLK_VPU		326
-#define HCLK_VIO		327
 #define HCLK_BUS_PRE		328
 #define HCLK_PERI_PRE		329
 #define HCLK_H264		330
diff --git a/include/dt-bindings/clock/sprd,sc9860-clk.h b/include/dt-bindings/clock/sprd,sc9860-clk.h
index 4cb202f090c2..f2ab4631df0d 100644
--- a/include/dt-bindings/clock/sprd,sc9860-clk.h
+++ b/include/dt-bindings/clock/sprd,sc9860-clk.h
@@ -229,7 +229,26 @@
 #define	CLK_SDIO1_2X_EN		65
 #define	CLK_SDIO2_2X_EN		66
 #define	CLK_EMMC_2X_EN		67
-#define CLK_AON_GATE_NUM	(CLK_EMMC_2X_EN + 1)
+#define	CLK_ARCH_RTC_EB		68
+#define	CLK_KPB_RTC_EB		69
+#define	CLK_AON_SYST_RTC_EB	70
+#define	CLK_AP_SYST_RTC_EB	71
+#define	CLK_AON_TMR_RTC_EB	72
+#define	CLK_AP_TMR0_RTC_EB	73
+#define	CLK_EIC_RTC_EB		74
+#define	CLK_EIC_RTCDV5_EB	75
+#define	CLK_AP_WDG_RTC_EB	76
+#define	CLK_AP_TMR1_RTC_EB	77
+#define	CLK_AP_TMR2_RTC_EB	78
+#define	CLK_DCXO_TMR_RTC_EB	79
+#define	CLK_BB_CAL_RTC_EB	80
+#define	CLK_AVS_BIG_RTC_EB	81
+#define	CLK_AVS_LIT_RTC_EB	82
+#define	CLK_AVS_GPU0_RTC_EB	83
+#define	CLK_AVS_GPU1_RTC_EB	84
+#define	CLK_GPU_TS_EB		85
+#define	CLK_RTCDV10_EB		86
+#define	CLK_AON_GATE_NUM	(CLK_RTCDV10_EB + 1)
 
 #define	CLK_LIT_MCU		0
 #define	CLK_BIG_MCU		1
diff --git a/include/dt-bindings/clock/stm32fx-clock.h b/include/dt-bindings/clock/stm32fx-clock.h
index 49bb3c203e5c..58d8b515be55 100644
--- a/include/dt-bindings/clock/stm32fx-clock.h
+++ b/include/dt-bindings/clock/stm32fx-clock.h
@@ -33,11 +33,12 @@
 #define CLK_SAI2		11
 #define CLK_I2SQ_PDIV		12
 #define CLK_SAIQ_PDIV		13
-
-#define END_PRIMARY_CLK		14
-
 #define CLK_HSI			14
 #define CLK_SYSCLK		15
+#define CLK_F469_DSI		16
+
+#define END_PRIMARY_CLK		17
+
 #define CLK_HDMI_CEC		16
 #define CLK_SPDIF		17
 #define CLK_USART1		18
diff --git a/include/dt-bindings/clock/stm32mp1-clks.h b/include/dt-bindings/clock/stm32mp1-clks.h
new file mode 100644
index 000000000000..86e3ec662ef4
--- /dev/null
+++ b/include/dt-bindings/clock/stm32mp1-clks.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: Gabriel Fernandez <gabriel.fernandez@st.com> for STMicroelectronics.
+ */
+
+#ifndef _DT_BINDINGS_STM32MP1_CLKS_H_
+#define _DT_BINDINGS_STM32MP1_CLKS_H_
+
+/* OSCILLATOR clocks */
+#define CK_HSE		0
+#define CK_CSI		1
+#define CK_LSI		2
+#define CK_LSE		3
+#define CK_HSI		4
+#define CK_HSE_DIV2	5
+
+/* Bus clocks */
+#define TIM2		6
+#define TIM3		7
+#define TIM4		8
+#define TIM5		9
+#define TIM6		10
+#define TIM7		11
+#define TIM12		12
+#define TIM13		13
+#define TIM14		14
+#define LPTIM1		15
+#define SPI2		16
+#define SPI3		17
+#define USART2		18
+#define USART3		19
+#define UART4		20
+#define UART5		21
+#define UART7		22
+#define UART8		23
+#define I2C1		24
+#define I2C2		25
+#define I2C3		26
+#define I2C5		27
+#define SPDIF		28
+#define CEC		29
+#define DAC12		30
+#define MDIO		31
+#define TIM1		32
+#define TIM8		33
+#define TIM15		34
+#define TIM16		35
+#define TIM17		36
+#define SPI1		37
+#define SPI4		38
+#define SPI5		39
+#define USART6		40
+#define SAI1		41
+#define SAI2		42
+#define SAI3		43
+#define DFSDM		44
+#define FDCAN		45
+#define LPTIM2		46
+#define LPTIM3		47
+#define LPTIM4		48
+#define LPTIM5		49
+#define SAI4		50
+#define SYSCFG		51
+#define VREF		52
+#define TMPSENS		53
+#define PMBCTRL		54
+#define HDP		55
+#define LTDC		56
+#define DSI		57
+#define IWDG2		58
+#define USBPHY		59
+#define STGENRO		60
+#define SPI6		61
+#define I2C4		62
+#define I2C6		63
+#define USART1		64
+#define RTCAPB		65
+#define TZC		66
+#define TZPC		67
+#define IWDG1		68
+#define BSEC		69
+#define STGEN		70
+#define DMA1		71
+#define DMA2		72
+#define DMAMUX		73
+#define ADC12		74
+#define USBO		75
+#define SDMMC3		76
+#define DCMI		77
+#define CRYP2		78
+#define HASH2		79
+#define RNG2		80
+#define CRC2		81
+#define HSEM		82
+#define IPCC		83
+#define GPIOA		84
+#define GPIOB		85
+#define GPIOC		86
+#define GPIOD		87
+#define GPIOE		88
+#define GPIOF		89
+#define GPIOG		90
+#define GPIOH		91
+#define GPIOI		92
+#define GPIOJ		93
+#define GPIOK		94
+#define GPIOZ		95
+#define CRYP1		96
+#define HASH1		97
+#define RNG1		98
+#define BKPSRAM		99
+#define MDMA		100
+#define GPU		101
+#define ETHCK		102
+#define ETHTX		103
+#define ETHRX		104
+#define ETHMAC		105
+#define FMC		106
+#define QSPI		107
+#define SDMMC1		108
+#define SDMMC2		109
+#define CRC1		110
+#define USBH		111
+#define ETHSTP		112
+
+/* Kernel clocks */
+#define SDMMC1_K	118
+#define SDMMC2_K	119
+#define SDMMC3_K	120
+#define FMC_K		121
+#define QSPI_K		122
+#define ETHCK_K		123
+#define RNG1_K		124
+#define RNG2_K		125
+#define GPU_K		126
+#define USBPHY_K	127
+#define STGEN_K		128
+#define SPDIF_K		129
+#define SPI1_K		130
+#define SPI2_K		131
+#define SPI3_K		132
+#define SPI4_K		133
+#define SPI5_K		134
+#define SPI6_K		135
+#define CEC_K		136
+#define I2C1_K		137
+#define I2C2_K		138
+#define I2C3_K		139
+#define I2C4_K		140
+#define I2C5_K		141
+#define I2C6_K		142
+#define LPTIM1_K	143
+#define LPTIM2_K	144
+#define LPTIM3_K	145
+#define LPTIM4_K	146
+#define LPTIM5_K	147
+#define USART1_K	148
+#define USART2_K	149
+#define USART3_K	150
+#define UART4_K		151
+#define UART5_K		152
+#define USART6_K	153
+#define UART7_K		154
+#define UART8_K		155
+#define DFSDM_K		156
+#define FDCAN_K		157
+#define SAI1_K		158
+#define SAI2_K		159
+#define SAI3_K		160
+#define SAI4_K		161
+#define ADC12_K		162
+#define DSI_K		163
+#define DSI_PX		164
+#define ADFSDM_K	165
+#define USBO_K		166
+#define LTDC_PX		167
+#define DAC12_K		168
+#define ETHPTP_K	169
+
+/* PLL */
+#define PLL1		176
+#define PLL2		177
+#define PLL3		178
+#define PLL4		179
+
+/* ODF */
+#define PLL1_P		180
+#define PLL1_Q		181
+#define PLL1_R		182
+#define PLL2_P		183
+#define PLL2_Q		184
+#define PLL2_R		185
+#define PLL3_P		186
+#define PLL3_Q		187
+#define PLL3_R		188
+#define PLL4_P		189
+#define PLL4_Q		190
+#define PLL4_R		191
+
+/* AUX */
+#define RTC		192
+
+/* MCLK */
+#define CK_PER		193
+#define CK_MPU		194
+#define CK_AXI		195
+#define CK_MCU		196
+
+/* Time base */
+#define TIM2_K		197
+#define TIM3_K		198
+#define TIM4_K		199
+#define TIM5_K		200
+#define TIM6_K		201
+#define TIM7_K		202
+#define TIM12_K		203
+#define TIM13_K		204
+#define TIM14_K		205
+#define TIM1_K		206
+#define TIM8_K		207
+#define TIM15_K		208
+#define TIM16_K		209
+#define TIM17_K		210
+
+/* MCO clocks */
+#define CK_MCO1		211
+#define CK_MCO2		212
+
+/* TRACE & DEBUG clocks */
+#define DBG		213
+#define CK_DBG		214
+#define CK_TRACE	215
+
+/* DDR */
+#define DDRC1		220
+#define DDRC1LP		221
+#define DDRC2		222
+#define DDRC2LP		223
+#define DDRPHYC		224
+#define DDRPHYCLP	225
+#define DDRCAPB		226
+#define DDRCAPBLP	227
+#define AXIDCG		228
+#define DDRPHYCAPB	229
+#define DDRPHYCAPBLP	230
+#define DDRPERFM	231
+
+#define STM32MP1_LAST_CLK 232
+
+#define LTDC_K		LTDC_PX
+#define ETHMAC_K	ETHCK_K
+
+#endif /* _DT_BINDINGS_STM32MP1_CLKS_H_ */
diff --git a/include/dt-bindings/clock/stratix10-clock.h b/include/dt-bindings/clock/stratix10-clock.h
new file mode 100644
index 000000000000..0ac1c90a18bf
--- /dev/null
+++ b/include/dt-bindings/clock/stratix10-clock.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier:	GPL-2.0 */
+/*
+ * Copyright (C) 2017, Intel Corporation
+ */
+
+#ifndef __STRATIX10_CLOCK_H
+#define __STRATIX10_CLOCK_H
+
+/* fixed rate clocks */
+#define STRATIX10_OSC1			0
+#define STRATIX10_CB_INTOSC_HS_DIV2_CLK	1
+#define STRATIX10_CB_INTOSC_LS_CLK	2
+#define STRATIX10_F2S_FREE_CLK		3
+
+/* fixed factor clocks */
+#define STRATIX10_L4_SYS_FREE_CLK	4
+#define STRATIX10_MPU_PERIPH_CLK	5
+#define STRATIX10_MPU_L2RAM_CLK		6
+#define STRATIX10_SDMMC_CIU_CLK		7
+
+/* PLL clocks */
+#define STRATIX10_MAIN_PLL_CLK		8
+#define STRATIX10_PERIPH_PLL_CLK	9
+#define STRATIX10_BOOT_CLK		10
+
+/* Periph clocks */
+#define STRATIX10_MAIN_MPU_BASE_CLK	11
+#define STRATIX10_MAIN_NOC_BASE_CLK	12
+#define STRATIX10_MAIN_EMACA_CLK	13
+#define STRATIX10_MAIN_EMACB_CLK	14
+#define STRATIX10_MAIN_EMAC_PTP_CLK	15
+#define STRATIX10_MAIN_GPIO_DB_CLK	16
+#define STRATIX10_MAIN_SDMMC_CLK	17
+#define STRATIX10_MAIN_S2F_USR0_CLK	18
+#define STRATIX10_MAIN_S2F_USR1_CLK	19
+#define STRATIX10_MAIN_PSI_REF_CLK	20
+
+#define STRATIX10_PERI_MPU_BASE_CLK	21
+#define STRATIX10_PERI_NOC_BASE_CLK	22
+#define STRATIX10_PERI_EMACA_CLK	23
+#define STRATIX10_PERI_EMACB_CLK	24
+#define STRATIX10_PERI_EMAC_PTP_CLK	25
+#define STRATIX10_PERI_GPIO_DB_CLK	26
+#define STRATIX10_PERI_SDMMC_CLK	27
+#define STRATIX10_PERI_S2F_USR0_CLK	28
+#define STRATIX10_PERI_S2F_USR1_CLK	29
+#define STRATIX10_PERI_PSI_REF_CLK	30
+
+#define STRATIX10_MPU_FREE_CLK		31
+#define STRATIX10_NOC_FREE_CLK		32
+#define STRATIX10_S2F_USR0_CLK		33
+#define STRATIX10_NOC_CLK		34
+#define STRATIX10_EMAC_A_FREE_CLK	35
+#define STRATIX10_EMAC_B_FREE_CLK	36
+#define STRATIX10_EMAC_PTP_FREE_CLK	37
+#define STRATIX10_GPIO_DB_FREE_CLK	38
+#define STRATIX10_SDMMC_FREE_CLK	39
+#define STRATIX10_S2F_USER1_FREE_CLK	40
+#define STRATIX10_PSI_REF_FREE_CLK	41
+
+/* Gate clocks */
+#define STRATIX10_MPU_CLK		42
+#define STRATIX10_L4_MAIN_CLK		43
+#define STRATIX10_L4_MP_CLK		44
+#define STRATIX10_L4_SP_CLK		45
+#define STRATIX10_CS_AT_CLK		46
+#define STRATIX10_CS_TRACE_CLK		47
+#define STRATIX10_CS_PDBG_CLK		48
+#define STRATIX10_CS_TIMER_CLK		49
+#define STRATIX10_S2F_USER0_CLK		50
+#define STRATIX10_S2F_USER1_CLK		51
+#define STRATIX10_EMAC0_CLK		52
+#define STRATIX10_EMAC1_CLK		53
+#define STRATIX10_EMAC2_CLK		54
+#define STRATIX10_EMAC_PTP_CLK		55
+#define STRATIX10_GPIO_DB_CLK		56
+#define STRATIX10_SDMMC_CLK		57
+#define STRATIX10_PSI_REF_CLK		58
+#define STRATIX10_USB_CLK		59
+#define STRATIX10_SPI_M_CLK		60
+#define STRATIX10_NAND_CLK		61
+#define STRATIX10_NUM_CLKS		62
+
+#endif	/* __STRATIX10_CLOCK_H */
diff --git a/include/dt-bindings/clock/sun50i-h6-ccu.h b/include/dt-bindings/clock/sun50i-h6-ccu.h
new file mode 100644
index 000000000000..a1545cd60e75
--- /dev/null
+++ b/include/dt-bindings/clock/sun50i-h6-ccu.h
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: (GPL-2.0+ or MIT)
+/*
+ * Copyright (C) 2017 Icenowy Zheng <icenowy@aosc.io>
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN50I_H6_H_
+#define _DT_BINDINGS_CLK_SUN50I_H6_H_
+
+#define CLK_PLL_PERIPH0		3
+
+#define CLK_CPUX		21
+
+#define CLK_APB1		26
+
+#define CLK_DE			29
+#define CLK_BUS_DE		30
+#define CLK_DEINTERLACE		31
+#define CLK_BUS_DEINTERLACE	32
+#define CLK_GPU			33
+#define CLK_BUS_GPU		34
+#define CLK_CE			35
+#define CLK_BUS_CE		36
+#define CLK_VE			37
+#define CLK_BUS_VE		38
+#define CLK_EMCE		39
+#define CLK_BUS_EMCE		40
+#define CLK_VP9			41
+#define CLK_BUS_VP9		42
+#define CLK_BUS_DMA		43
+#define CLK_BUS_MSGBOX		44
+#define CLK_BUS_SPINLOCK	45
+#define CLK_BUS_HSTIMER		46
+#define CLK_AVS			47
+#define CLK_BUS_DBG		48
+#define CLK_BUS_PSI		49
+#define CLK_BUS_PWM		50
+#define CLK_BUS_IOMMU		51
+
+#define CLK_MBUS_DMA		53
+#define CLK_MBUS_VE		54
+#define CLK_MBUS_CE		55
+#define CLK_MBUS_TS		56
+#define CLK_MBUS_NAND		57
+#define CLK_MBUS_CSI		58
+#define CLK_MBUS_DEINTERLACE	59
+
+#define CLK_NAND0		61
+#define CLK_NAND1		62
+#define CLK_BUS_NAND		63
+#define CLK_MMC0		64
+#define CLK_MMC1		65
+#define CLK_MMC2		66
+#define CLK_BUS_MMC0		67
+#define CLK_BUS_MMC1		68
+#define CLK_BUS_MMC2		69
+#define CLK_BUS_UART0		70
+#define CLK_BUS_UART1		71
+#define CLK_BUS_UART2		72
+#define CLK_BUS_UART3		73
+#define CLK_BUS_I2C0		74
+#define CLK_BUS_I2C1		75
+#define CLK_BUS_I2C2		76
+#define CLK_BUS_I2C3		77
+#define CLK_BUS_SCR0		78
+#define CLK_BUS_SCR1		79
+#define CLK_SPI0		80
+#define CLK_SPI1		81
+#define CLK_BUS_SPI0		82
+#define CLK_BUS_SPI1		83
+#define CLK_BUS_EMAC		84
+#define CLK_TS			85
+#define CLK_BUS_TS		86
+#define CLK_IR_TX		87
+#define CLK_BUS_IR_TX		88
+#define CLK_BUS_THS		89
+#define CLK_I2S3		90
+#define CLK_I2S0		91
+#define CLK_I2S1		92
+#define CLK_I2S2		93
+#define CLK_BUS_I2S0		94
+#define CLK_BUS_I2S1		95
+#define CLK_BUS_I2S2		96
+#define CLK_BUS_I2S3		97
+#define CLK_SPDIF		98
+#define CLK_BUS_SPDIF		99
+#define CLK_DMIC		100
+#define CLK_BUS_DMIC		101
+#define CLK_AUDIO_HUB		102
+#define CLK_BUS_AUDIO_HUB	103
+#define CLK_USB_OHCI0		104
+#define CLK_USB_PHY0		105
+#define CLK_USB_PHY1		106
+#define CLK_USB_OHCI3		107
+#define CLK_USB_PHY3		108
+#define CLK_USB_HSIC_12M	109
+#define CLK_USB_HSIC		110
+#define CLK_BUS_OHCI0		111
+#define CLK_BUS_OHCI3		112
+#define CLK_BUS_EHCI0		113
+#define CLK_BUS_XHCI		114
+#define CLK_BUS_EHCI3		115
+#define CLK_BUS_OTG		116
+#define CLK_PCIE_REF_100M	117
+#define CLK_PCIE_REF		118
+#define CLK_PCIE_REF_OUT	119
+#define CLK_PCIE_MAXI		120
+#define CLK_PCIE_AUX		121
+#define CLK_BUS_PCIE		122
+#define CLK_HDMI		123
+#define CLK_HDMI_SLOW		124
+#define CLK_HDMI_CEC		125
+#define CLK_BUS_HDMI		126
+#define CLK_BUS_TCON_TOP	127
+#define CLK_TCON_LCD0		128
+#define CLK_BUS_TCON_LCD0	129
+#define CLK_TCON_TV0		130
+#define CLK_BUS_TCON_TV0	131
+#define CLK_CSI_CCI		132
+#define CLK_CSI_TOP		133
+#define CLK_CSI_MCLK		134
+#define CLK_BUS_CSI		135
+#define CLK_HDCP		136
+#define CLK_BUS_HDCP		137
+
+#endif /* _DT_BINDINGS_CLK_SUN50I_H6_H_ */
diff --git a/include/dt-bindings/clock/sun8i-h3-ccu.h b/include/dt-bindings/clock/sun8i-h3-ccu.h
index e139fe5c62ec..c5f7e9a70968 100644
--- a/include/dt-bindings/clock/sun8i-h3-ccu.h
+++ b/include/dt-bindings/clock/sun8i-h3-ccu.h
@@ -43,6 +43,8 @@
 #ifndef _DT_BINDINGS_CLK_SUN8I_H3_H_
 #define _DT_BINDINGS_CLK_SUN8I_H3_H_
 
+#define CLK_PLL_VIDEO		6
+
 #define CLK_PLL_PERIPH0		9
 
 #define CLK_CPUX		14
diff --git a/include/dt-bindings/clock/tegra210-car.h b/include/dt-bindings/clock/tegra210-car.h
index 6422314e46eb..6b77e721f6b1 100644
--- a/include/dt-bindings/clock/tegra210-car.h
+++ b/include/dt-bindings/clock/tegra210-car.h
@@ -95,7 +95,7 @@
 #define TEGRA210_CLK_CSITE 73
 /* 74 */
 /* 75 */
-/* 76 */
+#define TEGRA210_CLK_LA 76
 /* 77 */
 #define TEGRA210_CLK_SOC_THERM 78
 #define TEGRA210_CLK_DTV 79
diff --git a/include/dt-bindings/reset/sun50i-h6-ccu.h b/include/dt-bindings/reset/sun50i-h6-ccu.h
new file mode 100644
index 000000000000..81106f455097
--- /dev/null
+++ b/include/dt-bindings/reset/sun50i-h6-ccu.h
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: (GPL-2.0+ or MIT)
+/*
+ * Copyright (C) 2017 Icenowy Zheng <icenowy@aosc.io>
+ */
+
+#ifndef _DT_BINDINGS_RESET_SUN50I_H6_H_
+#define _DT_BINDINGS_RESET_SUN50I_H6_H_
+
+#define RST_MBUS		0
+#define RST_BUS_DE		1
+#define RST_BUS_DEINTERLACE	2
+#define RST_BUS_GPU		3
+#define RST_BUS_CE		4
+#define RST_BUS_VE		5
+#define RST_BUS_EMCE		6
+#define RST_BUS_VP9		7
+#define RST_BUS_DMA		8
+#define RST_BUS_MSGBOX		9
+#define RST_BUS_SPINLOCK	10
+#define RST_BUS_HSTIMER		11
+#define RST_BUS_DBG		12
+#define RST_BUS_PSI		13
+#define RST_BUS_PWM		14
+#define RST_BUS_IOMMU		15
+#define RST_BUS_DRAM		16
+#define RST_BUS_NAND		17
+#define RST_BUS_MMC0		18
+#define RST_BUS_MMC1		19
+#define RST_BUS_MMC2		20
+#define RST_BUS_UART0		21
+#define RST_BUS_UART1		22
+#define RST_BUS_UART2		23
+#define RST_BUS_UART3		24
+#define RST_BUS_I2C0		25
+#define RST_BUS_I2C1		26
+#define RST_BUS_I2C2		27
+#define RST_BUS_I2C3		28
+#define RST_BUS_SCR0		29
+#define RST_BUS_SCR1		30
+#define RST_BUS_SPI0		31
+#define RST_BUS_SPI1		32
+#define RST_BUS_EMAC		33
+#define RST_BUS_TS		34
+#define RST_BUS_IR_TX		35
+#define RST_BUS_THS		36
+#define RST_BUS_I2S0		37
+#define RST_BUS_I2S1		38
+#define RST_BUS_I2S2		39
+#define RST_BUS_I2S3		40
+#define RST_BUS_SPDIF		41
+#define RST_BUS_DMIC		42
+#define RST_BUS_AUDIO_HUB	43
+#define RST_USB_PHY0		44
+#define RST_USB_PHY1		45
+#define RST_USB_PHY3		46
+#define RST_USB_HSIC		47
+#define RST_BUS_OHCI0		48
+#define RST_BUS_OHCI3		49
+#define RST_BUS_EHCI0		50
+#define RST_BUS_XHCI		51
+#define RST_BUS_EHCI3		52
+#define RST_BUS_OTG		53
+#define RST_BUS_PCIE		54
+#define RST_PCIE_POWERUP	55
+#define RST_BUS_HDMI		56
+#define RST_BUS_HDMI_SUB	57
+#define RST_BUS_TCON_TOP	58
+#define RST_BUS_TCON_LCD0	59
+#define RST_BUS_TCON_TV0	60
+#define RST_BUS_CSI		61
+#define RST_BUS_HDCP		62
+
+#endif /* _DT_BINDINGS_RESET_SUN50I_H6_H_ */
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 02924ae2527e..24f03941ada8 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -57,11 +57,15 @@ struct vgic_global {
 	/* Physical address of vgic virtual cpu interface */
 	phys_addr_t		vcpu_base;
 
-	/* GICV mapping */
+	/* GICV mapping, kernel VA */
 	void __iomem		*vcpu_base_va;
+	/* GICV mapping, HYP VA */
+	void __iomem		*vcpu_hyp_va;
 
-	/* virtual control interface mapping */
+	/* virtual control interface mapping, kernel VA */
 	void __iomem		*vctrl_base;
+	/* virtual control interface mapping, HYP VA */
+	void __iomem		*vctrl_hyp;
 
 	/* Number of implemented list registers */
 	int			nr_lr;
@@ -209,10 +213,6 @@ struct vgic_dist {
 
 	int			nr_spis;
 
-	/* TODO: Consider moving to global state */
-	/* Virtual control interface mapping */
-	void __iomem		*vctrl_base;
-
 	/* base addresses in guest physical address space: */
 	gpa_t			vgic_dist_base;		/* distributor */
 	union {
@@ -263,7 +263,6 @@ struct vgic_dist {
 struct vgic_v2_cpu_if {
 	u32		vgic_hcr;
 	u32		vgic_vmcr;
-	u64		vgic_elrsr;	/* Saved only */
 	u32		vgic_apr;
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
@@ -272,7 +271,6 @@ struct vgic_v3_cpu_if {
 	u32		vgic_hcr;
 	u32		vgic_vmcr;
 	u32		vgic_sre;	/* Restored only, change ignored */
-	u32		vgic_elrsr;	/* Saved only */
 	u32		vgic_ap0r[4];
 	u32		vgic_ap1r[4];
 	u64		vgic_lr[VGIC_V3_MAX_LRS];
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 2f7a29242b87..38cd77b39a64 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -26,7 +26,8 @@
 #define IORT_IRQ_MASK(irq)		(irq & 0xffffffffULL)
 #define IORT_IRQ_TRIGGER_MASK(irq)	((irq >> 32) & 0xffffffffULL)
 
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node);
+int iort_register_domain_token(int trans_id, phys_addr_t base,
+			       struct fwnode_handle *fw_node);
 void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
@@ -38,6 +39,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 /* IOMMU interface */
 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
 const struct iommu_ops *iort_iommu_configure(struct device *dev);
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
 #else
 static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
@@ -52,6 +54,9 @@ static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
 static inline const struct iommu_ops *iort_iommu_configure(
 				      struct device *dev)
 { return NULL; }
+static inline
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{ return 0; }
 #endif
 
 #endif /* __ACPI_IORT_H__ */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3e4ce54d84ab..f6be4b0b6c18 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -28,6 +28,7 @@ void bdi_put(struct backing_dev_info *bdi);
 
 __printf(2, 3)
 int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
+__printf(2, 0)
 int bdi_register_va(struct backing_dev_info *bdi, const char *fmt,
 		    va_list args);
 int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner);
@@ -175,7 +176,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 }
 
 long congestion_wait(int sync, long timeout);
-long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
+long wait_iff_congested(int sync, long timeout);
 
 static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi)
 {
@@ -329,7 +330,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode)
  * @inode: inode of interest
  *
  * Returns the wb @inode is currently associated with.  The caller must be
- * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
+ * holding either @inode->i_lock, the i_pages lock, or the
  * associated wb's list_lock.
  */
 static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
@@ -337,7 +338,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 #ifdef CONFIG_LOCKDEP
 	WARN_ON_ONCE(debug_locks &&
 		     (!lockdep_is_held(&inode->i_lock) &&
-		      !lockdep_is_held(&inode->i_mapping->tree_lock) &&
+		      !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
 		      !lockdep_is_held(&inode->i_wb->list_lock)));
 #endif
 	return inode->i_wb;
@@ -349,7 +350,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
  * @lockedp: temp bool output param, to be passed to the end function
  *
  * The caller wants to access the wb associated with @inode but isn't
- * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
+ * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
  * function determines the wb associated with @inode and ensures that the
  * association doesn't change until the transaction is finished with
  * unlocked_inode_to_wb_end().
@@ -370,11 +371,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 	*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 
 	if (unlikely(*lockedp))
-		spin_lock_irq(&inode->i_mapping->tree_lock);
+		xa_lock_irq(&inode->i_mapping->i_pages);
 
 	/*
-	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
-	 * inode_to_wb() will bark.  Deref directly.
+	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
+	 * lock.  inode_to_wb() will bark.  Deref directly.
 	 */
 	return inode->i_wb;
 }
@@ -387,7 +388,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 {
 	if (unlikely(locked))
-		spin_unlock_irq(&inode->i_mapping->tree_lock);
+		xa_unlock_irq(&inode->i_mapping->i_pages);
 
 	rcu_read_unlock();
 }
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index b0abe21d6cc9..4955e0863b83 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -61,6 +61,8 @@ struct linux_binprm {
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
+
+	struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
 } __randomize_layout;
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
@@ -118,6 +120,7 @@ extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 extern void setup_new_exec(struct linux_binprm * bprm);
+extern void finalize_exec(struct linux_binprm *bprm);
 extern void would_dump(struct linux_binprm *, struct file *);
 
 extern int suid_dumpable;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8efcf49796a3..e3986f4b3461 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -183,7 +183,6 @@ enum {
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 	BLK_MQ_S_SCHED_RESTART	= 2,
-	BLK_MQ_S_START_ON_RUN	= 3,
 
 	BLK_MQ_MAX_DEPTH	= 10240,
 
@@ -270,7 +269,6 @@ void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
-void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 		busy_tag_iter_fn *fn, void *priv);
 void blk_mq_freeze_queue(struct request_queue *q);
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 59042d5ac520..3901927cf6a0 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_OSD_POOLRESEND |		\
+	 CEPH_FEATURE_MDS_QUOTA |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
 	 CEPH_FEATURE_NEW_OSDOP_ENCODING |	\
 	 CEPH_FEATURE_SERVER_JEWEL |		\
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 88dd51381aaf..7ecfc88314d8 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -134,6 +134,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_CLIENT_LEASE           0x311
 #define CEPH_MSG_CLIENT_SNAP            0x312
 #define CEPH_MSG_CLIENT_CAPRELEASE      0x313
+#define CEPH_MSG_CLIENT_QUOTA           0x314
 
 /* pool ops */
 #define CEPH_MSG_POOLOP_REPLY           48
@@ -807,4 +808,20 @@ struct ceph_mds_snap_realm {
 } __attribute__ ((packed));
 /* followed by my snap list, then prior parent snap list */
 
+/*
+ * quotas
+ */
+struct ceph_mds_quota {
+	__le64 ino;		/* ino */
+	struct ceph_timespec rctime;
+	__le64 rbytes;		/* dir stats */
+	__le64 rfiles;
+	__le64 rsubdirs;
+	__u8 struct_v;		/* compat */
+	__u8 struct_compat;
+	__le32 struct_len;
+	__le64 max_bytes;	/* quota max. bytes */
+	__le64 max_files;	/* quota max. files */
+} __attribute__ ((packed));
+
 #endif
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index c2ec44cf5098..49c93b9308d7 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -262,6 +262,7 @@ extern struct kmem_cache *ceph_cap_cachep;
 extern struct kmem_cache *ceph_cap_flush_cachep;
 extern struct kmem_cache *ceph_dentry_cachep;
 extern struct kmem_cache *ceph_file_cachep;
+extern struct kmem_cache *ceph_dir_file_cachep;
 
 /* ceph_common.c */
 extern bool libceph_compatible(void *data);
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index ead9d85f1c11..c7dfcb8a1fb2 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -76,6 +76,7 @@ enum ceph_msg_data_type {
 #ifdef CONFIG_BLOCK
 	CEPH_MSG_DATA_BIO,	/* data source/destination is a bio list */
 #endif /* CONFIG_BLOCK */
+	CEPH_MSG_DATA_BVECS,	/* data source/destination is a bio_vec array */
 };
 
 static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
@@ -87,22 +88,106 @@ static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
 #ifdef CONFIG_BLOCK
 	case CEPH_MSG_DATA_BIO:
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
 		return true;
 	default:
 		return false;
 	}
 }
 
+#ifdef CONFIG_BLOCK
+
+struct ceph_bio_iter {
+	struct bio *bio;
+	struct bvec_iter iter;
+};
+
+#define __ceph_bio_iter_advance_step(it, n, STEP) do {			      \
+	unsigned int __n = (n), __cur_n;				      \
+									      \
+	while (__n) {							      \
+		BUG_ON(!(it)->iter.bi_size);				      \
+		__cur_n = min((it)->iter.bi_size, __n);			      \
+		(void)(STEP);						      \
+		bio_advance_iter((it)->bio, &(it)->iter, __cur_n);	      \
+		if (!(it)->iter.bi_size && (it)->bio->bi_next) {	      \
+			dout("__ceph_bio_iter_advance_step next bio\n");      \
+			(it)->bio = (it)->bio->bi_next;			      \
+			(it)->iter = (it)->bio->bi_iter;		      \
+		}							      \
+		__n -= __cur_n;						      \
+	}								      \
+} while (0)
+
+/*
+ * Advance @it by @n bytes.
+ */
+#define ceph_bio_iter_advance(it, n)					      \
+	__ceph_bio_iter_advance_step(it, n, 0)
+
+/*
+ * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
+ */
+#define ceph_bio_iter_advance_step(it, n, BVEC_STEP)			      \
+	__ceph_bio_iter_advance_step(it, n, ({				      \
+		struct bio_vec bv;					      \
+		struct bvec_iter __cur_iter;				      \
+									      \
+		__cur_iter = (it)->iter;				      \
+		__cur_iter.bi_size = __cur_n;				      \
+		__bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
+			(void)(BVEC_STEP);				      \
+	}))
+
+#endif /* CONFIG_BLOCK */
+
+struct ceph_bvec_iter {
+	struct bio_vec *bvecs;
+	struct bvec_iter iter;
+};
+
+#define __ceph_bvec_iter_advance_step(it, n, STEP) do {			      \
+	BUG_ON((n) > (it)->iter.bi_size);				      \
+	(void)(STEP);							      \
+	bvec_iter_advance((it)->bvecs, &(it)->iter, (n));		      \
+} while (0)
+
+/*
+ * Advance @it by @n bytes.
+ */
+#define ceph_bvec_iter_advance(it, n)					      \
+	__ceph_bvec_iter_advance_step(it, n, 0)
+
+/*
+ * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
+ */
+#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP)			      \
+	__ceph_bvec_iter_advance_step(it, n, ({				      \
+		struct bio_vec bv;					      \
+		struct bvec_iter __cur_iter;				      \
+									      \
+		__cur_iter = (it)->iter;				      \
+		__cur_iter.bi_size = (n);				      \
+		for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter)	      \
+			(void)(BVEC_STEP);				      \
+	}))
+
+#define ceph_bvec_iter_shorten(it, n) do {				      \
+	BUG_ON((n) > (it)->iter.bi_size);				      \
+	(it)->iter.bi_size = (n);					      \
+} while (0)
+
 struct ceph_msg_data {
 	struct list_head		links;	/* ceph_msg->data */
 	enum ceph_msg_data_type		type;
 	union {
 #ifdef CONFIG_BLOCK
 		struct {
-			struct bio	*bio;
-			size_t		bio_length;
+			struct ceph_bio_iter	bio_pos;
+			u32			bio_length;
 		};
 #endif /* CONFIG_BLOCK */
+		struct ceph_bvec_iter	bvec_pos;
 		struct {
 			struct page	**pages;	/* NOT OWNER. */
 			size_t		length;		/* total # bytes */
@@ -122,11 +207,9 @@ struct ceph_msg_data_cursor {
 	bool			need_crc;	/* crc update needed */
 	union {
 #ifdef CONFIG_BLOCK
-		struct {				/* bio */
-			struct bio	*bio;		/* bio from list */
-			struct bvec_iter bvec_iter;
-		};
+		struct ceph_bio_iter	bio_iter;
 #endif /* CONFIG_BLOCK */
+		struct bvec_iter	bvec_iter;
 		struct {				/* pages */
 			unsigned int	page_offset;	/* offset in page */
 			unsigned short	page_index;	/* index in array */
@@ -290,9 +373,11 @@ extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 				struct ceph_pagelist *pagelist);
 #ifdef CONFIG_BLOCK
-extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
-				size_t length);
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+			   u32 length);
 #endif /* CONFIG_BLOCK */
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+			     struct ceph_bvec_iter *bvec_pos);
 
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 				     bool can_fail);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 52fb37d1c2a5..528ccc943cee 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -57,6 +57,7 @@ enum ceph_osd_data_type {
 #ifdef CONFIG_BLOCK
 	CEPH_OSD_DATA_TYPE_BIO,
 #endif /* CONFIG_BLOCK */
+	CEPH_OSD_DATA_TYPE_BVECS,
 };
 
 struct ceph_osd_data {
@@ -72,10 +73,11 @@ struct ceph_osd_data {
 		struct ceph_pagelist	*pagelist;
 #ifdef CONFIG_BLOCK
 		struct {
-			struct bio	*bio;		/* list of bios */
-			size_t		bio_length;	/* total in list */
+			struct ceph_bio_iter	bio_pos;
+			u32			bio_length;
 		};
 #endif /* CONFIG_BLOCK */
+		struct ceph_bvec_iter	bvec_pos;
 	};
 };
 
@@ -405,10 +407,14 @@ extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
 					unsigned int which,
 					struct ceph_pagelist *pagelist);
 #ifdef CONFIG_BLOCK
-extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
-					unsigned int which,
-					struct bio *bio, size_t bio_length);
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+				    unsigned int which,
+				    struct ceph_bio_iter *bio_pos,
+				    u32 bio_length);
 #endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+					 unsigned int which,
+					 struct ceph_bvec_iter *bvec_pos);
 
 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
 					unsigned int which,
@@ -418,6 +424,9 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
 					struct page **pages, u64 length,
 					u32 alignment, bool pages_from_pool,
 					bool own_pages);
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+				       unsigned int which,
+				       struct bio_vec *bvecs, u32 bytes);
 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 					unsigned int which,
 					struct page **pages, u64 length,
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index d41fad99c0fa..e71fb222c7c3 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -5,7 +5,6 @@
 #include <linux/rbtree.h>
 #include <linux/ceph/types.h>
 #include <linux/ceph/decode.h>
-#include <linux/ceph/ceph_fs.h>
 #include <linux/crush/crush.h>
 
 /*
@@ -280,11 +279,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
 		       const struct ceph_osds *new_acting,
 		       bool any_change);
 
-/* calculate mapping of a file extent to an object */
-extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-					 u64 off, u64 len,
-					 u64 *bno, u64 *oxoff, u64 *oxlen);
-
 int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
 				const struct ceph_object_id *oid,
 				const struct ceph_object_locator *oloc,
diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h
new file mode 100644
index 000000000000..cbd0d24b7148
--- /dev/null
+++ b/include/linux/ceph/striper.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CEPH_STRIPER_H
+#define _LINUX_CEPH_STRIPER_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct ceph_file_layout;
+
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+				   u64 off, u64 len,
+				   u64 *objno, u64 *objoff, u32 *xlen);
+
+struct ceph_object_extent {
+	struct list_head oe_item;
+	u64 oe_objno;
+	u64 oe_off;
+	u64 oe_len;
+};
+
+static inline void ceph_object_extent_init(struct ceph_object_extent *ex)
+{
+	INIT_LIST_HEAD(&ex->oe_item);
+}
+
+/*
+ * Called for each mapped stripe unit.
+ *
+ * @bytes: number of bytes mapped, i.e. the minimum of the full length
+ *         requested (file extent length) or the remainder of the stripe
+ *         unit within an object
+ */
+typedef void (*ceph_object_extent_fn_t)(struct ceph_object_extent *ex,
+					u32 bytes, void *arg);
+
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 struct ceph_object_extent *alloc_fn(void *arg),
+			 void *alloc_arg,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg);
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg);
+
+struct ceph_file_extent {
+	u64 fe_off;
+	u64 fe_len;
+};
+
+static inline u64 ceph_file_extents_bytes(struct ceph_file_extent *file_extents,
+					  u32 num_file_extents)
+{
+	u64 bytes = 0;
+	u32 i;
+
+	for (i = 0; i < num_file_extents; i++)
+		bytes += file_extents[i].fe_len;
+
+	return bytes;
+}
+
+int ceph_extent_to_file(struct ceph_file_layout *l,
+			u64 objno, u64 objoff, u64 objlen,
+			struct ceph_file_extent **file_extents,
+			u32 *num_file_extents);
+
+#endif
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index f711be6e8c44..210a890008f9 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -399,6 +399,7 @@ struct clk_divider {
 	spinlock_t	*lock;
 };
 
+#define clk_div_mask(width)	((1 << (width)) - 1)
 #define to_clk_divider(_hw) container_of(_hw, struct clk_divider, hw)
 
 #define CLK_DIVIDER_ONE_BASED		BIT(0)
@@ -419,6 +420,10 @@ long divider_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
 			       unsigned long rate, unsigned long *prate,
 			       const struct clk_div_table *table,
 			       u8 width, unsigned long flags);
+long divider_ro_round_rate_parent(struct clk_hw *hw, struct clk_hw *parent,
+				  unsigned long rate, unsigned long *prate,
+				  const struct clk_div_table *table, u8 width,
+				  unsigned long flags, unsigned int val);
 int divider_get_val(unsigned long rate, unsigned long parent_rate,
 		const struct clk_div_table *table, u8 width,
 		unsigned long flags);
@@ -449,8 +454,9 @@ void clk_hw_unregister_divider(struct clk_hw *hw);
  *
  * @hw:		handle between common and hardware-specific interfaces
  * @reg:	register controlling multiplexer
+ * @table:	array of register values corresponding to the parent index
  * @shift:	shift to multiplexer bit field
- * @width:	width of mutliplexer bit field
+ * @mask:	mask of mutliplexer bit field
  * @flags:	hardware-specific flags
  * @lock:	register lock
  *
@@ -510,6 +516,10 @@ struct clk_hw *clk_hw_register_mux_table(struct device *dev, const char *name,
 		void __iomem *reg, u8 shift, u32 mask,
 		u8 clk_mux_flags, u32 *table, spinlock_t *lock);
 
+int clk_mux_val_to_index(struct clk_hw *hw, u32 *table, unsigned int flags,
+			 unsigned int val);
+unsigned int clk_mux_index_to_val(u32 *table, unsigned int flags, u8 index);
+
 void clk_unregister_mux(struct clk *clk);
 void clk_hw_unregister_mux(struct clk_hw *hw);
 
@@ -774,6 +784,17 @@ static inline long divider_round_rate(struct clk_hw *hw, unsigned long rate,
 					 rate, prate, table, width, flags);
 }
 
+static inline long divider_ro_round_rate(struct clk_hw *hw, unsigned long rate,
+					 unsigned long *prate,
+					 const struct clk_div_table *table,
+					 u8 width, unsigned long flags,
+					 unsigned int val)
+{
+	return divider_ro_round_rate_parent(hw, clk_hw_get_parent(hw),
+					    rate, prate, table, width, flags,
+					    val);
+}
+
 /*
  * FIXME clock api without lock protection
  */
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 4c4ef9f34db3..0dbd0885b2c2 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -209,7 +209,7 @@ static inline int clk_prepare(struct clk *clk)
 	return 0;
 }
 
-static inline int clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks)
+static inline int __must_check clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks)
 {
 	might_sleep();
 	return 0;
@@ -603,8 +603,8 @@ static inline struct clk *clk_get(struct device *dev, const char *id)
 	return NULL;
 }
 
-static inline int clk_bulk_get(struct device *dev, int num_clks,
-			       struct clk_bulk_data *clks)
+static inline int __must_check clk_bulk_get(struct device *dev, int num_clks,
+					    struct clk_bulk_data *clks)
 {
 	return 0;
 }
@@ -614,8 +614,8 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id)
 	return NULL;
 }
 
-static inline int devm_clk_bulk_get(struct device *dev, int num_clks,
-				    struct clk_bulk_data *clks)
+static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
+						 struct clk_bulk_data *clks)
 {
 	return 0;
 }
@@ -645,7 +645,7 @@ static inline int clk_enable(struct clk *clk)
 	return 0;
 }
 
-static inline int clk_bulk_enable(int num_clks, struct clk_bulk_data *clks)
+static inline int __must_check clk_bulk_enable(int num_clks, struct clk_bulk_data *clks)
 {
 	return 0;
 }
@@ -719,8 +719,8 @@ static inline void clk_disable_unprepare(struct clk *clk)
 	clk_unprepare(clk);
 }
 
-static inline int clk_bulk_prepare_enable(int num_clks,
-					  struct clk_bulk_data *clks)
+static inline int __must_check clk_bulk_prepare_enable(int num_clks,
+					struct clk_bulk_data *clks)
 {
 	int ret;
 
diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
index d23c9cf26993..afb9edfa5d58 100644
--- a/include/linux/clk/tegra.h
+++ b/include/linux/clk/tegra.h
@@ -128,5 +128,6 @@ extern void tegra210_sata_pll_hw_sequence_start(void);
 extern void tegra210_set_sata_pll_seq_sw(bool state);
 extern void tegra210_put_utmipll_in_iddq(void);
 extern void tegra210_put_utmipll_out_iddq(void);
+extern int tegra210_clk_handle_mbist_war(unsigned int id);
 
 #endif /* __LINUX_CLK_TEGRA_H_ */
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 7e3bceee3489..a8faa38b1ed6 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -212,6 +212,7 @@ enum {
  * struct ti_clk_ll_ops - low-level ops for clocks
  * @clk_readl: pointer to register read function
  * @clk_writel: pointer to register write function
+ * @clk_rmw: pointer to register read-modify-write function
  * @clkdm_clk_enable: pointer to clockdomain enable function
  * @clkdm_clk_disable: pointer to clockdomain disable function
  * @clkdm_lookup: pointer to clockdomain lookup function
@@ -227,6 +228,7 @@ enum {
 struct ti_clk_ll_ops {
 	u32	(*clk_readl)(const struct clk_omap_reg *reg);
 	void	(*clk_writel)(u32 val, const struct clk_omap_reg *reg);
+	void	(*clk_rmw)(u32 val, u32 mask, const struct clk_omap_reg *reg);
 	int	(*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk);
 	int	(*clkdm_clk_disable)(struct clockdomain *clkdm,
 				     struct clk *clk);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f188eab10570..081281ad5772 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -24,6 +24,17 @@
 #include <asm/siginfo.h>
 #include <asm/signal.h>
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+/*
+ * It may be useful for an architecture to override the definitions of the
+ * COMPAT_SYSCALL_DEFINE0 and COMPAT_SYSCALL_DEFINEx() macros, in particular
+ * to use a different calling convention for syscalls. To allow for that,
+ + the prototypes for the compat_sys_*() functions below will *not* be included
+ * if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled.
+ */
+#include <asm/syscall_wrapper.h>
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 #ifndef COMPAT_USE_64BIT_TIME
 #define COMPAT_USE_64BIT_TIME 0
 #endif
@@ -32,10 +43,12 @@
 #define __SC_DELOUSE(t,v) ((__force t)(unsigned long)(v))
 #endif
 
+#ifndef COMPAT_SYSCALL_DEFINE0
 #define COMPAT_SYSCALL_DEFINE0(name) \
 	asmlinkage long compat_sys_##name(void); \
 	ALLOW_ERROR_INJECTION(compat_sys_##name, ERRNO); \
 	asmlinkage long compat_sys_##name(void)
+#endif /* COMPAT_SYSCALL_DEFINE0 */
 
 #define COMPAT_SYSCALL_DEFINE1(name, ...) \
         COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
@@ -50,18 +63,25 @@
 #define COMPAT_SYSCALL_DEFINE6(name, ...) \
 	COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
 
-#define COMPAT_SYSCALL_DEFINEx(x, name, ...)				\
-	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
-	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
-		__attribute__((alias(__stringify(compat_SyS##name))));  \
-	ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO);	\
-	static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
-	asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
-	asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
-	{								\
-		return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));	\
-	}								\
-	static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+/*
+ * The asmlinkage stub is aliased to a function named __se_compat_sys_*() which
+ * sign-extends 32-bit ints to longs whenever needed. The actual work is
+ * done within __do_compat_sys_*().
+ */
+#ifndef COMPAT_SYSCALL_DEFINEx
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)					\
+	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
+	asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
+		__attribute__((alias(__stringify(__se_compat_sys##name))));	\
+	ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO);				\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
+	asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
+	{									\
+		return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\
+	}									\
+	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+#endif /* COMPAT_SYSCALL_DEFINEx */
 
 #ifndef compat_user_stack_pointer
 #define compat_user_stack_pointer() current_user_stack_pointer()
@@ -519,7 +539,12 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long);
  * Please note that these prototypes here are only provided for information
  * purposes, for static analysis, and for linking from the syscall table.
  * These functions should not be called elsewhere from kernel code.
+ *
+ * As the syscall calling convention may be different from the default
+ * for architectures overriding the syscall calling convention, do not
+ * include the prototypes if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled.
  */
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
 asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p);
 asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr,
 				     u32 __user *iocb);
@@ -957,6 +982,8 @@ asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
 /* obsolete: net/socket.c */
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
 
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 
 /*
  * For most but not all architectures, "am I in a compat syscall?" and
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d3f264a5b04d..ceb96ecab96e 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -17,9 +17,6 @@
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#define randomized_struct_fields_start	struct {
-#define randomized_struct_fields_end	};
-
 /* all clang versions usable with the kernel support KASAN ABI version 5 */
 #define KASAN_ABI_VERSION 5
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e2c7f4369eff..b4bf73f5e38f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -242,6 +242,9 @@
 #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 #define __randomize_layout __attribute__((randomize_layout))
 #define __no_randomize_layout __attribute__((no_randomize_layout))
+/* This anon struct can add padding, so only enable it under randstruct. */
+#define randomized_struct_fields_start	struct {
+#define randomized_struct_fields_end	} __randomize_layout;
 #endif
 
 #endif /* GCC_VERSION >= 40500 */
@@ -256,15 +259,6 @@
  */
 #define __visible	__attribute__((externally_visible))
 
-/*
- * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only
- * possible since GCC 4.6. To provide as much build testing coverage
- * as possible, this is used for all GCC 4.6+ builds, and not just on
- * RANDSTRUCT_PLUGIN builds.
- */
-#define randomized_struct_fields_start	struct {
-#define randomized_struct_fields_end	} __randomize_layout;
-
 #endif /* GCC_VERSION >= 40600 */
 
 
diff --git a/include/linux/const.h b/include/linux/const.h
new file mode 100644
index 000000000000..7b55a55f5911
--- /dev/null
+++ b/include/linux/const.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_CONST_H
+#define _LINUX_CONST_H
+
+#include <uapi/linux/const.h>
+
+#define UL(x)		(_UL(x))
+#define ULL(x)		(_ULL(x))
+
+#endif /* _LINUX_CONST_H */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1fe49724da9e..87f48dd932eb 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -960,8 +960,6 @@ extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
 extern struct freq_attr *cpufreq_generic_attr[];
-int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
-				      struct cpufreq_frequency_table *table);
 int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy);
 
 unsigned int cpufreq_generic_get(unsigned int cpu);
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a806e94c482f..1eefabf1621f 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv,
 				  struct cpuidle_device *dev);
 
 extern int cpuidle_select(struct cpuidle_driver *drv,
-			  struct cpuidle_device *dev);
+			  struct cpuidle_device *dev,
+			  bool *stop_tick);
 extern int cpuidle_enter(struct cpuidle_driver *drv,
 			 struct cpuidle_device *dev, int index);
 extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -167,7 +168,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
 					 struct cpuidle_device *dev)
 {return true; }
 static inline int cpuidle_select(struct cpuidle_driver *drv,
-				 struct cpuidle_device *dev)
+				 struct cpuidle_device *dev, bool *stop_tick)
 {return -ENODEV; }
 static inline int cpuidle_enter(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev, int index)
@@ -250,7 +251,8 @@ struct cpuidle_governor {
 					struct cpuidle_device *dev);
 
 	int  (*select)		(struct cpuidle_driver *drv,
-					struct cpuidle_device *dev);
+					struct cpuidle_device *dev,
+					bool *stop_tick);
 	void (*reflect)		(struct cpuidle_device *dev, int index);
 };
 
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0185ecdae135..f9eb22ad341e 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -26,18 +26,42 @@ extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *dax_get_by_host(const char *host);
+struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops);
 void put_dax(struct dax_device *dax_dev);
+void kill_dax(struct dax_device *dax_dev);
+void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
 #else
 static inline struct dax_device *dax_get_by_host(const char *host)
 {
 	return NULL;
 }
-
+static inline struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops)
+{
+	/*
+	 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
+	 * NULL is an error or expected.
+	 */
+	return NULL;
+}
 static inline void put_dax(struct dax_device *dax_dev)
 {
 }
+static inline void kill_dax(struct dax_device *dax_dev)
+{
+}
+static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
+{
+}
+static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+	return false;
+}
 #endif
 
+struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int __bdev_dax_supported(struct super_block *sb, int blocksize);
@@ -57,6 +81,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
 }
 
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -76,22 +102,23 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
 	return NULL;
 }
+
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *alloc_dax(void *private, const char *host,
-		const struct dax_operations *ops);
 bool dax_alive(struct dax_device *dax_dev);
-void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		void **kaddr, pfn_t *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
-void dax_write_cache(struct dax_device *dax_dev, bool wc);
-bool dax_write_cache_enabled(struct dax_device *dax_dev);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops);
@@ -121,7 +148,4 @@ static inline bool dax_mapping(struct address_space *mapping)
 	return mapping->host && IS_DAX(mapping->host);
 }
 
-struct writeback_control;
-int dax_writeback_mapping_range(struct address_space *mapping,
-		struct block_device *bdev, struct writeback_control *wbc);
 #endif
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index f838764993eb..861be5cab1df 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -470,7 +470,11 @@ typedef void (*dma_async_tx_callback_result)(void *dma_async_param,
 				const struct dmaengine_result *result);
 
 struct dmaengine_unmap_data {
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
+	u16 map_cnt;
+#else
 	u8 map_cnt;
+#endif
 	u8 to_cnt;
 	u8 from_cnt;
 	u8 bidi_cnt;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0d798052bd85..760d8da1b6c7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -13,6 +13,7 @@
 #include <linux/list_lru.h>
 #include <linux/llist.h>
 #include <linux/radix-tree.h>
+#include <linux/xarray.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/pid.h>
@@ -390,12 +391,11 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 
 struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
-	struct radix_tree_root	page_tree;	/* radix tree of all pages */
-	spinlock_t		tree_lock;	/* and lock protecting it */
+	struct radix_tree_root	i_pages;	/* cached pages */
 	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
 	struct rb_root_cached	i_mmap;		/* tree of private and shared mappings */
 	struct rw_semaphore	i_mmap_rwsem;	/* protect tree, count, list */
-	/* Protected by tree_lock together with the radix tree */
+	/* Protected by the i_pages lock */
 	unsigned long		nrpages;	/* number of total pages */
 	/* number of shadow or DAX exceptional entries */
 	unsigned long		nrexceptional;
@@ -1321,6 +1321,8 @@ extern int send_sigurg(struct fown_struct *fown);
 
 /* sb->s_iflags to limit user namespace mounts */
 #define SB_I_USERNS_VISIBLE		0x00000010 /* fstype already mounted */
+#define SB_I_IMA_UNVERIFIABLE_SIGNATURE	0x00000020
+#define SB_I_UNTRUSTED_MOUNTER		0x00000040
 
 /* Possible states of 'frozen' field */
 enum {
@@ -1665,7 +1667,7 @@ typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
 			 unsigned);
 
 struct dir_context {
-	const filldir_t actor;
+	filldir_t actor;
 	loff_t pos;
 };
 
@@ -1987,7 +1989,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  *
  * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
  *			synchronize competing switching instances and to tell
- *			wb stat updates to grab mapping->tree_lock.  See
+ *			wb stat updates to grab the i_pages lock.  See
  *			inode_switch_wb_work_fn() for details.
  *
  * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
@@ -2443,6 +2445,7 @@ extern int sync_blockdev(struct block_device *bdev);
 extern void kill_bdev(struct block_device *);
 extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
+extern void emergency_thaw_bdev(struct super_block *sb);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
 
@@ -2468,6 +2471,11 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	return 0;
 }
 
+static inline int emergency_thaw_bdev(struct super_block *sb)
+{
+	return 0;
+}
+
 static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
 {
 }
@@ -3125,6 +3133,10 @@ extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *,
 			 struct inode *, struct dentry *, unsigned int);
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
+extern int noop_set_page_dirty(struct page *page);
+extern void noop_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int length);
+extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 3b03e29e2f1a..34cf0fdd7dc7 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -29,6 +29,18 @@ struct fscache_cache_ops;
 struct fscache_object;
 struct fscache_operation;
 
+enum fscache_obj_ref_trace {
+	fscache_obj_get_add_to_deps,
+	fscache_obj_get_queue,
+	fscache_obj_put_alloc_fail,
+	fscache_obj_put_attach_fail,
+	fscache_obj_put_drop_obj,
+	fscache_obj_put_enq_dep,
+	fscache_obj_put_queue,
+	fscache_obj_put_work,
+	fscache_obj_ref__nr_traces
+};
+
 /*
  * cache tag definition
  */
@@ -123,7 +135,8 @@ extern void fscache_op_work_func(struct work_struct *work);
 extern void fscache_enqueue_operation(struct fscache_operation *);
 extern void fscache_op_complete(struct fscache_operation *, bool);
 extern void fscache_put_operation(struct fscache_operation *);
-extern void fscache_operation_init(struct fscache_operation *,
+extern void fscache_operation_init(struct fscache_cookie *,
+				   struct fscache_operation *,
 				   fscache_operation_processor_t,
 				   fscache_operation_cancel_t,
 				   fscache_operation_release_t);
@@ -185,7 +198,7 @@ static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
 {
 	atomic_sub(n_pages, &op->n_pages);
 	if (atomic_read(&op->n_pages) <= 0)
-		fscache_op_complete(&op->op, true);
+		fscache_op_complete(&op->op, false);
 }
 
 /**
@@ -231,7 +244,8 @@ struct fscache_cache_ops {
 	void (*lookup_complete)(struct fscache_object *object);
 
 	/* increment the usage count on this object (may fail if unmounting) */
-	struct fscache_object *(*grab_object)(struct fscache_object *object);
+	struct fscache_object *(*grab_object)(struct fscache_object *object,
+					      enum fscache_obj_ref_trace why);
 
 	/* pin an object in the cache */
 	int (*pin_object)(struct fscache_object *object);
@@ -254,7 +268,8 @@ struct fscache_cache_ops {
 	void (*drop_object)(struct fscache_object *object);
 
 	/* dispose of a reference to an object */
-	void (*put_object)(struct fscache_object *object);
+	void (*put_object)(struct fscache_object *object,
+			   enum fscache_obj_ref_trace why);
 
 	/* sync a cache */
 	void (*sync_cache)(struct fscache_cache *cache);
@@ -538,7 +553,8 @@ extern bool fscache_object_sleep_till_congested(signed long *timeoutp);
 
 extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
 					       const void *data,
-					       uint16_t datalen);
+					       uint16_t datalen,
+					       loff_t object_size);
 
 extern void fscache_object_retrying_stale(struct fscache_object *object);
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index fe0c349684fa..84b90a79d75a 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
+#include <linux/list_bl.h>
 
 #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
 #define fscache_available() (1)
@@ -83,45 +84,15 @@ struct fscache_cookie_def {
 		const void *parent_netfs_data,
 		const void *cookie_netfs_data);
 
-	/* get an index key
-	 * - should store the key data in the buffer
-	 * - should return the amount of data stored
-	 * - not permitted to return an error
-	 * - the netfs data from the cookie being used as the source is
-	 *   presented
-	 */
-	uint16_t (*get_key)(const void *cookie_netfs_data,
-			    void *buffer,
-			    uint16_t bufmax);
-
-	/* get certain file attributes from the netfs data
-	 * - this function can be absent for an index
-	 * - not permitted to return an error
-	 * - the netfs data from the cookie being used as the source is
-	 *   presented
-	 */
-	void (*get_attr)(const void *cookie_netfs_data, uint64_t *size);
-
-	/* get the auxiliary data from netfs data
-	 * - this function can be absent if the index carries no state data
-	 * - should store the auxiliary data in the buffer
-	 * - should return the amount of amount stored
-	 * - not permitted to return an error
-	 * - the netfs data from the cookie being used as the source is
-	 *   presented
-	 */
-	uint16_t (*get_aux)(const void *cookie_netfs_data,
-			    void *buffer,
-			    uint16_t bufmax);
-
 	/* consult the netfs about the state of an object
 	 * - this function can be absent if the index carries no state data
 	 * - the netfs data from the cookie being used as the target is
-	 *   presented, as is the auxiliary data
+	 *   presented, as is the auxiliary data and the object size
 	 */
 	enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
 					   const void *data,
-					   uint16_t datalen);
+					   uint16_t datalen,
+					   loff_t object_size);
 
 	/* get an extra reference on a read context
 	 * - this function can be absent if the completion function doesn't
@@ -154,7 +125,6 @@ struct fscache_netfs {
 	uint32_t			version;	/* indexing version */
 	const char			*name;		/* filesystem name */
 	struct fscache_cookie		*primary_index;
-	struct list_head		link;		/* internal link */
 };
 
 /*
@@ -173,6 +143,7 @@ struct fscache_cookie {
 	struct hlist_head		backing_objects; /* object(s) backing this file/index */
 	const struct fscache_cookie_def	*def;		/* definition */
 	struct fscache_cookie		*parent;	/* parent of this entry */
+	struct hlist_bl_node		hash_link;	/* Link in hash table */
 	void				*netfs_data;	/* back pointer to netfs */
 	struct radix_tree_root		stores;		/* pages to be stored on this cookie */
 #define FSCACHE_COOKIE_PENDING_TAG	0		/* pages tag: pending write to cache */
@@ -186,6 +157,22 @@ struct fscache_cookie {
 #define FSCACHE_COOKIE_RELINQUISHED	4	/* T if cookie has been relinquished */
 #define FSCACHE_COOKIE_ENABLED		5	/* T if cookie is enabled */
 #define FSCACHE_COOKIE_ENABLEMENT_LOCK	6	/* T if cookie is being en/disabled */
+#define FSCACHE_COOKIE_AUX_UPDATED	8	/* T if the auxiliary data was updated */
+#define FSCACHE_COOKIE_ACQUIRED		9	/* T if cookie is in use */
+#define FSCACHE_COOKIE_RELINQUISHING	10	/* T if cookie is being relinquished */
+
+	u8				type;		/* Type of object */
+	u8				key_len;	/* Length of index key */
+	u8				aux_len;	/* Length of auxiliary data */
+	u32				key_hash;	/* Hash of parent, type, key, len */
+	union {
+		void			*key;		/* Index key */
+		u8			inline_key[16];	/* - If the key is short enough */
+	};
+	union {
+		void			*aux;		/* Auxiliary data */
+		u8			inline_aux[8];	/* - If the aux data is short enough */
+	};
 };
 
 static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie)
@@ -208,10 +195,12 @@ extern void __fscache_release_cache_tag(struct fscache_cache_tag *);
 extern struct fscache_cookie *__fscache_acquire_cookie(
 	struct fscache_cookie *,
 	const struct fscache_cookie_def *,
-	void *, bool);
-extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool);
-extern int __fscache_check_consistency(struct fscache_cookie *);
-extern void __fscache_update_cookie(struct fscache_cookie *);
+	const void *, size_t,
+	const void *, size_t,
+	void *, loff_t, bool);
+extern void __fscache_relinquish_cookie(struct fscache_cookie *, const void *, bool);
+extern int __fscache_check_consistency(struct fscache_cookie *, const void *);
+extern void __fscache_update_cookie(struct fscache_cookie *, const void *);
 extern int __fscache_attr_changed(struct fscache_cookie *);
 extern void __fscache_invalidate(struct fscache_cookie *);
 extern void __fscache_wait_on_invalidate(struct fscache_cookie *);
@@ -228,7 +217,7 @@ extern int __fscache_read_or_alloc_pages(struct fscache_cookie *,
 					 void *,
 					 gfp_t);
 extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t);
-extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t);
+extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t);
 extern void __fscache_uncache_page(struct fscache_cookie *, struct page *);
 extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
 extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *);
@@ -238,8 +227,8 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *,
 					      struct inode *);
 extern void __fscache_readpages_cancel(struct fscache_cookie *cookie,
 				       struct list_head *pages);
-extern void __fscache_disable_cookie(struct fscache_cookie *, bool);
-extern void __fscache_enable_cookie(struct fscache_cookie *,
+extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool);
+extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t,
 				    bool (*)(void *), void *);
 
 /**
@@ -317,8 +306,13 @@ void fscache_release_cache_tag(struct fscache_cache_tag *tag)
  * fscache_acquire_cookie - Acquire a cookie to represent a cache object
  * @parent: The cookie that's to be the parent of this one
  * @def: A description of the cache object, including callback operations
+ * @index_key: The index key for this cookie
+ * @index_key_len: Size of the index key
+ * @aux_data: The auxiliary data for the cookie (may be NULL)
+ * @aux_data_len: Size of the auxiliary data buffer
  * @netfs_data: An arbitrary piece of data to be kept in the cookie to
  * represent the cache object to the netfs
+ * @object_size: The initial size of object
  * @enable: Whether or not to enable a data cookie immediately
  *
  * This function is used to inform FS-Cache about part of an index hierarchy
@@ -332,12 +326,19 @@ static inline
 struct fscache_cookie *fscache_acquire_cookie(
 	struct fscache_cookie *parent,
 	const struct fscache_cookie_def *def,
+	const void *index_key,
+	size_t index_key_len,
+	const void *aux_data,
+	size_t aux_data_len,
 	void *netfs_data,
+	loff_t object_size,
 	bool enable)
 {
 	if (fscache_cookie_valid(parent) && fscache_cookie_enabled(parent))
-		return __fscache_acquire_cookie(parent, def, netfs_data,
-						enable);
+		return __fscache_acquire_cookie(parent, def,
+						index_key, index_key_len,
+						aux_data, aux_data_len,
+						netfs_data, object_size, enable);
 	else
 		return NULL;
 }
@@ -346,36 +347,44 @@ struct fscache_cookie *fscache_acquire_cookie(
  * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding
  * it
  * @cookie: The cookie being returned
+ * @aux_data: The updated auxiliary data for the cookie (may be NULL)
  * @retire: True if the cache object the cookie represents is to be discarded
  *
  * This function returns a cookie to the cache, forcibly discarding the
- * associated cache object if retire is set to true.
+ * associated cache object if retire is set to true.  The opportunity is
+ * provided to update the auxiliary data in the cache before the object is
+ * disconnected.
  *
  * See Documentation/filesystems/caching/netfs-api.txt for a complete
  * description.
  */
 static inline
-void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire)
+void fscache_relinquish_cookie(struct fscache_cookie *cookie,
+			       const void *aux_data,
+			       bool retire)
 {
 	if (fscache_cookie_valid(cookie))
-		__fscache_relinquish_cookie(cookie, retire);
+		__fscache_relinquish_cookie(cookie, aux_data, retire);
 }
 
 /**
- * fscache_check_consistency - Request that if the cache is updated
+ * fscache_check_consistency - Request validation of a cache's auxiliary data
  * @cookie: The cookie representing the cache object
+ * @aux_data: The updated auxiliary data for the cookie (may be NULL)
  *
- * Request an consistency check from fscache, which passes the request
- * to the backing cache.
+ * Request an consistency check from fscache, which passes the request to the
+ * backing cache.  The auxiliary data on the cookie will be updated first if
+ * @aux_data is set.
  *
  * Returns 0 if consistent and -ESTALE if inconsistent.  May also
  * return -ENOMEM and -ERESTARTSYS.
  */
 static inline
-int fscache_check_consistency(struct fscache_cookie *cookie)
+int fscache_check_consistency(struct fscache_cookie *cookie,
+			      const void *aux_data)
 {
 	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_check_consistency(cookie);
+		return __fscache_check_consistency(cookie, aux_data);
 	else
 		return 0;
 }
@@ -383,18 +392,20 @@ int fscache_check_consistency(struct fscache_cookie *cookie)
 /**
  * fscache_update_cookie - Request that a cache object be updated
  * @cookie: The cookie representing the cache object
+ * @aux_data: The updated auxiliary data for the cookie (may be NULL)
  *
  * Request an update of the index data for the cache object associated with the
- * cookie.
+ * cookie.  The auxiliary data on the cookie will be updated first if @aux_data
+ * is set.
  *
  * See Documentation/filesystems/caching/netfs-api.txt for a complete
  * description.
  */
 static inline
-void fscache_update_cookie(struct fscache_cookie *cookie)
+void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data)
 {
 	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		__fscache_update_cookie(cookie);
+		__fscache_update_cookie(cookie, aux_data);
 }
 
 /**
@@ -648,6 +659,7 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie,
  * fscache_write_page - Request storage of a page in the cache
  * @cookie: The cookie representing the cache object
  * @page: The netfs page to store
+ * @object_size: Updated size of object
  * @gfp: The conditions under which memory allocation should be made
  *
  * Request the contents of the netfs page be written into the cache.  This
@@ -665,10 +677,11 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie,
 static inline
 int fscache_write_page(struct fscache_cookie *cookie,
 		       struct page *page,
+		       loff_t object_size,
 		       gfp_t gfp)
 {
 	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		return __fscache_write_page(cookie, page, gfp);
+		return __fscache_write_page(cookie, page, object_size, gfp);
 	else
 		return -ENOBUFS;
 }
@@ -780,6 +793,7 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
 /**
  * fscache_disable_cookie - Disable a cookie
  * @cookie: The cookie representing the cache object
+ * @aux_data: The updated auxiliary data for the cookie (may be NULL)
  * @invalidate: Invalidate the backing object
  *
  * Disable a cookie from accepting further alloc, read, write, invalidate,
@@ -790,34 +804,44 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
  *
  * If @invalidate is set, then the backing object will be invalidated and
  * detached, otherwise it will just be detached.
+ *
+ * If @aux_data is set, then auxiliary data will be updated from that.
  */
 static inline
-void fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
+void fscache_disable_cookie(struct fscache_cookie *cookie,
+			    const void *aux_data,
+			    bool invalidate)
 {
 	if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
-		__fscache_disable_cookie(cookie, invalidate);
+		__fscache_disable_cookie(cookie, aux_data, invalidate);
 }
 
 /**
  * fscache_enable_cookie - Reenable a cookie
  * @cookie: The cookie representing the cache object
+ * @aux_data: The updated auxiliary data for the cookie (may be NULL)
+ * @object_size: Current size of object
  * @can_enable: A function to permit enablement once lock is held
  * @data: Data for can_enable()
  *
  * Reenable a previously disabled cookie, allowing it to accept further alloc,
  * read, write, invalidate, update or acquire operations.  An attempt will be
- * made to immediately reattach the cookie to a backing object.
+ * made to immediately reattach the cookie to a backing object.  If @aux_data
+ * is set, the auxiliary data attached to the cookie will be updated.
  *
  * The can_enable() function is called (if not NULL) once the enablement lock
  * is held to rule on whether enablement is still permitted to go ahead.
  */
 static inline
 void fscache_enable_cookie(struct fscache_cookie *cookie,
+			   const void *aux_data,
+			   loff_t object_size,
 			   bool (*can_enable)(void *data),
 			   void *data)
 {
 	if (fscache_cookie_valid(cookie) && !fscache_cookie_enabled(cookie))
-		__fscache_enable_cookie(cookie, can_enable, data);
+		__fscache_enable_cookie(cookie, aux_data, object_size,
+					can_enable, data);
 }
 
 #endif /* _LINUX_FSCACHE_H */
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 325017ad9311..39988924de3a 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -80,76 +80,145 @@
 struct hmm;
 
 /*
- * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
+ * hmm_pfn_flag_e - HMM flag enums
  *
  * Flags:
- * HMM_PFN_VALID: pfn is valid
- * HMM_PFN_READ:  CPU page table has read permission set
+ * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
  * HMM_PFN_WRITE: CPU page table has write permission set
+ * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE)
+ *
+ * The driver provide a flags array, if driver valid bit for an entry is bit
+ * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide
+ * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
+ * Same logic apply to all flags. This is same idea as vm_page_prot in vma
+ * except that this is per device driver rather than per architecture.
+ */
+enum hmm_pfn_flag_e {
+	HMM_PFN_VALID = 0,
+	HMM_PFN_WRITE,
+	HMM_PFN_DEVICE_PRIVATE,
+	HMM_PFN_FLAG_MAX
+};
+
+/*
+ * hmm_pfn_value_e - HMM pfn special value
+ *
+ * Flags:
  * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
- * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none()
+ * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
  * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
  *      result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
  *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
  *      set and the pfn value is undefined.
- * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE)
+ *
+ * Driver provide entry value for none entry, error entry and special entry,
+ * driver can alias (ie use same value for error and special for instance). It
+ * should not alias none and error or special.
+ *
+ * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
+ * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
+ * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table
+ * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
  */
-typedef unsigned long hmm_pfn_t;
+enum hmm_pfn_value_e {
+	HMM_PFN_ERROR,
+	HMM_PFN_NONE,
+	HMM_PFN_SPECIAL,
+	HMM_PFN_VALUE_MAX
+};
 
-#define HMM_PFN_VALID (1 << 0)
-#define HMM_PFN_READ (1 << 1)
-#define HMM_PFN_WRITE (1 << 2)
-#define HMM_PFN_ERROR (1 << 3)
-#define HMM_PFN_EMPTY (1 << 4)
-#define HMM_PFN_SPECIAL (1 << 5)
-#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6)
-#define HMM_PFN_SHIFT 7
+/*
+ * struct hmm_range - track invalidation lock on virtual address range
+ *
+ * @vma: the vm area struct for the range
+ * @list: all range lock are on a list
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @pfns: array of pfns (big enough for the range)
+ * @flags: pfn flags to match device driver page table
+ * @values: pfn value for some special case (none, special, error, ...)
+ * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
+ * @valid: pfns array did not change since it has been fill by an HMM function
+ */
+struct hmm_range {
+	struct vm_area_struct	*vma;
+	struct list_head	list;
+	unsigned long		start;
+	unsigned long		end;
+	uint64_t		*pfns;
+	const uint64_t		*flags;
+	const uint64_t		*values;
+	uint8_t			pfn_shift;
+	bool			valid;
+};
 
 /*
- * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t
- * @pfn: hmm_pfn_t to convert to struct page
- * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise
+ * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn
+ * @range: range use to decode HMM pfn value
+ * @pfn: HMM pfn value to get corresponding struct page from
+ * Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise
  *
- * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page
- * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL.
+ * If the HMM pfn is valid (ie valid flag set) then return the struct page
+ * matching the pfn value stored in the HMM pfn. Otherwise return NULL.
  */
-static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn)
+static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
+					   uint64_t pfn)
 {
-	if (!(pfn & HMM_PFN_VALID))
+	if (pfn == range->values[HMM_PFN_NONE])
+		return NULL;
+	if (pfn == range->values[HMM_PFN_ERROR])
 		return NULL;
-	return pfn_to_page(pfn >> HMM_PFN_SHIFT);
+	if (pfn == range->values[HMM_PFN_SPECIAL])
+		return NULL;
+	if (!(pfn & range->flags[HMM_PFN_VALID]))
+		return NULL;
+	return pfn_to_page(pfn >> range->pfn_shift);
 }
 
 /*
- * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t
- * @pfn: hmm_pfn_t to extract pfn from
- * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise
+ * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn
+ * @range: range use to decode HMM pfn value
+ * @pfn: HMM pfn value to extract pfn from
+ * Returns: pfn value if HMM pfn is valid, -1UL otherwise
  */
-static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn)
+static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
+					   uint64_t pfn)
 {
-	if (!(pfn & HMM_PFN_VALID))
+	if (pfn == range->values[HMM_PFN_NONE])
+		return -1UL;
+	if (pfn == range->values[HMM_PFN_ERROR])
+		return -1UL;
+	if (pfn == range->values[HMM_PFN_SPECIAL])
+		return -1UL;
+	if (!(pfn & range->flags[HMM_PFN_VALID]))
 		return -1UL;
-	return (pfn >> HMM_PFN_SHIFT);
+	return (pfn >> range->pfn_shift);
 }
 
 /*
- * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page
- * @page: struct page pointer for which to create the hmm_pfn_t
- * Returns: valid hmm_pfn_t for the page
+ * hmm_pfn_from_page() - create a valid HMM pfn value from struct page
+ * @range: range use to encode HMM pfn value
+ * @page: struct page pointer for which to create the HMM pfn
+ * Returns: valid HMM pfn for the page
  */
-static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page)
+static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
+					 struct page *page)
 {
-	return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+	return (page_to_pfn(page) << range->pfn_shift) |
+		range->flags[HMM_PFN_VALID];
 }
 
 /*
- * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn
- * @pfn: pfn value for which to create the hmm_pfn_t
- * Returns: valid hmm_pfn_t for the pfn
+ * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn
+ * @range: range use to encode HMM pfn value
+ * @pfn: pfn value for which to create the HMM pfn
+ * Returns: valid HMM pfn for the pfn
  */
-static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn)
+static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
+					unsigned long pfn)
 {
-	return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+	return (pfn << range->pfn_shift) |
+		range->flags[HMM_PFN_VALID];
 }
 
 
@@ -218,6 +287,16 @@ enum hmm_update_type {
  * @update: callback to update range on a device
  */
 struct hmm_mirror_ops {
+	/* release() - release hmm_mirror
+	 *
+	 * @mirror: pointer to struct hmm_mirror
+	 *
+	 * This is called when the mm_struct is being released.
+	 * The callback should make sure no references to the mirror occur
+	 * after the callback returns.
+	 */
+	void (*release)(struct hmm_mirror *mirror);
+
 	/* sync_cpu_device_pagetables() - synchronize page tables
 	 *
 	 * @mirror: pointer to struct hmm_mirror
@@ -262,23 +341,6 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
 
 
 /*
- * struct hmm_range - track invalidation lock on virtual address range
- *
- * @list: all range lock are on a list
- * @start: range virtual start address (inclusive)
- * @end: range virtual end address (exclusive)
- * @pfns: array of pfns (big enough for the range)
- * @valid: pfns array did not change since it has been fill by an HMM function
- */
-struct hmm_range {
-	struct list_head	list;
-	unsigned long		start;
-	unsigned long		end;
-	hmm_pfn_t		*pfns;
-	bool			valid;
-};
-
-/*
  * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device
  * driver lock that serializes device page table updates, then call
  * hmm_vma_range_done(), to check if the snapshot is still valid. The same
@@ -291,17 +353,13 @@ struct hmm_range {
  *
  * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID !
  */
-int hmm_vma_get_pfns(struct vm_area_struct *vma,
-		     struct hmm_range *range,
-		     unsigned long start,
-		     unsigned long end,
-		     hmm_pfn_t *pfns);
-bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
+int hmm_vma_get_pfns(struct hmm_range *range);
+bool hmm_vma_range_done(struct hmm_range *range);
 
 
 /*
  * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will
- * not migrate any device memory back to system memory. The hmm_pfn_t array will
+ * not migrate any device memory back to system memory. The HMM pfn array will
  * be updated with the fault result and current snapshot of the CPU page table
  * for the range.
  *
@@ -310,22 +368,26 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
  * function returns -EAGAIN.
  *
  * Return value does not reflect if the fault was successful for every single
- * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to
+ * address or not. Therefore, the caller must to inspect the HMM pfn array to
  * determine fault status for each address.
  *
  * Trying to fault inside an invalid vma will result in -EINVAL.
  *
  * See the function description in mm/hmm.c for further documentation.
  */
-int hmm_vma_fault(struct vm_area_struct *vma,
-		  struct hmm_range *range,
-		  unsigned long start,
-		  unsigned long end,
-		  hmm_pfn_t *pfns,
-		  bool write,
-		  bool block);
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+int hmm_vma_fault(struct hmm_range *range, bool block);
 
+/* Below are for HMM internal use only! Not to be used by device driver! */
+void hmm_mm_destroy(struct mm_struct *mm);
+
+static inline void hmm_mm_init(struct mm_struct *mm)
+{
+	mm->hmm = NULL;
+}
+#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+static inline void hmm_mm_init(struct mm_struct *mm) {}
+#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 struct hmm_devmem;
@@ -498,23 +560,9 @@ struct hmm_device {
 struct hmm_device *hmm_device_new(void *drvdata);
 void hmm_device_put(struct hmm_device *hmm_device);
 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-#endif /* IS_ENABLED(CONFIG_HMM) */
-
-/* Below are for HMM internal use only! Not to be used by device driver! */
-#if IS_ENABLED(CONFIG_HMM_MIRROR)
-void hmm_mm_destroy(struct mm_struct *mm);
-
-static inline void hmm_mm_init(struct mm_struct *mm)
-{
-	mm->hmm = NULL;
-}
-#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-static inline void hmm_mm_destroy(struct mm_struct *mm) {}
-static inline void hmm_mm_init(struct mm_struct *mm) {}
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
-
 #else /* IS_ENABLED(CONFIG_HMM) */
 static inline void hmm_mm_destroy(struct mm_struct *mm) {}
 static inline void hmm_mm_init(struct mm_struct *mm) {}
+#endif /* IS_ENABLED(CONFIG_HMM) */
+
 #endif /* LINUX_HMM_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 78f456fcd242..a2656c3ebe81 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -424,6 +424,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 }
 
 extern u64 hrtimer_get_next_event(void);
+extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);
 
 extern bool hrtimer_active(const struct hrtimer *timer);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2048f3c3b68a..192ed8fbc403 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -26,7 +26,6 @@
 #define _HYPERV_H
 
 #include <uapi/linux/hyperv.h>
-#include <uapi/asm/hyperv.h>
 
 #include <linux/types.h>
 #include <linux/scatterlist.h>
diff --git a/include/linux/i2c-pca-platform.h b/include/linux/i2c-pca-platform.h
index 0e5f7c77d1d8..c37329432a8e 100644
--- a/include/linux/i2c-pca-platform.h
+++ b/include/linux/i2c-pca-platform.h
@@ -3,9 +3,6 @@
 #define I2C_PCA9564_PLATFORM_H
 
 struct i2c_pca9564_pf_platform_data {
-	int gpio;		/* pin to reset chip. driver will work when
-				 * not supplied (negative value), but it
-				 * cannot exit some error conditions then */
 	int i2c_clock_speed;	/* values are defined in linux/i2c-algo-pca.h */
 	int timeout;		/* timeout in jiffies */
 };
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 419a38e7c315..44ad14e016b5 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -47,6 +47,7 @@ struct i2c_algorithm;
 struct i2c_adapter;
 struct i2c_client;
 struct i2c_driver;
+struct i2c_device_identity;
 union i2c_smbus_data;
 struct i2c_board_info;
 enum i2c_slave_event;
@@ -186,8 +187,37 @@ extern s32 i2c_smbus_write_i2c_block_data(const struct i2c_client *client,
 extern s32
 i2c_smbus_read_i2c_block_data_or_emulated(const struct i2c_client *client,
 					  u8 command, u8 length, u8 *values);
+int i2c_get_device_id(const struct i2c_client *client,
+		      struct i2c_device_identity *id);
 #endif /* I2C */
 
+/**
+ * struct i2c_device_identity - i2c client device identification
+ * @manufacturer_id: 0 - 4095, database maintained by NXP
+ * @part_id: 0 - 511, according to manufacturer
+ * @die_revision: 0 - 7, according to manufacturer
+ */
+struct i2c_device_identity {
+	u16 manufacturer_id;
+#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS                0
+#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_1              1
+#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_2              2
+#define I2C_DEVICE_ID_NXP_SEMICONDUCTORS_3              3
+#define I2C_DEVICE_ID_RAMTRON_INTERNATIONAL             4
+#define I2C_DEVICE_ID_ANALOG_DEVICES                    5
+#define I2C_DEVICE_ID_STMICROELECTRONICS                6
+#define I2C_DEVICE_ID_ON_SEMICONDUCTOR                  7
+#define I2C_DEVICE_ID_SPRINTEK_CORPORATION              8
+#define I2C_DEVICE_ID_ESPROS_PHOTONICS_AG               9
+#define I2C_DEVICE_ID_FUJITSU_SEMICONDUCTOR            10
+#define I2C_DEVICE_ID_FLIR                             11
+#define I2C_DEVICE_ID_O2MICRO                          12
+#define I2C_DEVICE_ID_ATMEL                            13
+#define I2C_DEVICE_ID_NONE                         0xffff
+	u16 part_id;
+	u8 die_revision;
+};
+
 enum i2c_alert_protocol {
 	I2C_PROTOCOL_SMBUS_ALERT,
 	I2C_PROTOCOL_SMBUS_HOST_NOTIFY,
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 7d6a6313f0ab..e856f4e0ab35 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -29,29 +29,31 @@ struct idr {
 #define IDR_FREE	0
 
 /* Set the IDR flag and the IDR_FREE tag */
-#define IDR_RT_MARKER		((__force gfp_t)(3 << __GFP_BITS_SHIFT))
+#define IDR_RT_MARKER	(ROOT_IS_IDR | (__force gfp_t)			\
+					(1 << (ROOT_TAG_SHIFT + IDR_FREE)))
 
-#define IDR_INIT_BASE(base) {						\
-	.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER),			\
+#define IDR_INIT_BASE(name, base) {					\
+	.idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER),			\
 	.idr_base = (base),						\
 	.idr_next = 0,							\
 }
 
 /**
  * IDR_INIT() - Initialise an IDR.
+ * @name: Name of IDR.
  *
  * A freshly-initialised IDR contains no IDs.
  */
-#define IDR_INIT	IDR_INIT_BASE(0)
+#define IDR_INIT(name)	IDR_INIT_BASE(name, 0)
 
 /**
- * DEFINE_IDR() - Define a statically-allocated IDR
- * @name: Name of IDR
+ * DEFINE_IDR() - Define a statically-allocated IDR.
+ * @name: Name of IDR.
  *
  * An IDR defined using this macro is ready for use with no additional
  * initialisation required.  It contains no IDs.
  */
-#define DEFINE_IDR(name)	struct idr name = IDR_INIT
+#define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -218,10 +220,10 @@ struct ida {
 	struct radix_tree_root	ida_rt;
 };
 
-#define IDA_INIT	{						\
-	.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),		\
+#define IDA_INIT(name)	{						\
+	.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),	\
 }
-#define DEFINE_IDA(name)	struct ida name = IDA_INIT
+#define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 8dad3dd26eae..ef169d67df92 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -209,12 +209,12 @@
 #define DMA_FECTL_IM (((u32)1) << 31)
 
 /* FSTS_REG */
-#define DMA_FSTS_PPF ((u32)2)
-#define DMA_FSTS_PFO ((u32)1)
-#define DMA_FSTS_IQE (1 << 4)
-#define DMA_FSTS_ICE (1 << 5)
-#define DMA_FSTS_ITE (1 << 6)
-#define DMA_FSTS_PRO (1 << 7)
+#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */
+#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */
+#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */
+#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */
+#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */
+#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */
 #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
 
 /* FRCD_REG, 32 bits access */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 41b8c5757859..19938ee6eb31 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -465,23 +465,23 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	return -ENODEV;
 }
 
-static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			      size_t size)
+static inline size_t iommu_unmap(struct iommu_domain *domain,
+				 unsigned long iova, size_t size)
 {
-	return -ENODEV;
+	return 0;
 }
 
-static inline int iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova,
-				   int gfp_order)
+static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
+				      unsigned long iova, int gfp_order)
 {
-	return -ENODEV;
+	return 0;
 }
 
 static inline size_t iommu_map_sg(struct iommu_domain *domain,
 				  unsigned long iova, struct scatterlist *sg,
 				  unsigned int nents, int prot)
 {
-	return -ENODEV;
+	return 0;
 }
 
 static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 9385aa57497b..a27cf6652327 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate);
 /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
 #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
 
-/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
-#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
+/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
+#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
+
+/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
+#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
 #ifndef __jiffy_arch_data
 #define __jiffy_arch_data
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4ae1dfd9bf05..6a1eb0b0aad9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -439,7 +439,8 @@ extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
 
-extern int num_to_str(char *buf, int size, unsigned long long num);
+extern int num_to_str(char *buf, int size,
+		      unsigned long long num, unsigned int width);
 
 /* lib/printf utilities */
 
@@ -543,6 +544,7 @@ extern enum system_states {
 	SYSTEM_RESTART,
 } system_state;
 
+/* This cannot be an enum because some may be used in assembly source. */
 #define TAINT_PROPRIETARY_MODULE	0
 #define TAINT_FORCED_MODULE		1
 #define TAINT_CPU_OUT_OF_SPEC		2
@@ -560,7 +562,8 @@ extern enum system_states {
 #define TAINT_SOFTLOCKUP		14
 #define TAINT_LIVEPATCH			15
 #define TAINT_AUX			16
-#define TAINT_FLAGS_COUNT		17
+#define TAINT_RANDSTRUCT		17
+#define TAINT_FLAGS_COUNT		18
 
 struct taint_flag {
 	char c_true;	/* character printed when tainted */
@@ -822,14 +825,15 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 
 #define __cmp(x, y, op)	((x) op (y) ? (x) : (y))
 
-#define __cmp_once(x, y, op) ({		\
-		typeof(x) __x = (x);	\
-		typeof(y) __y = (y);	\
-		__cmp(__x, __y, op); })
+#define __cmp_once(x, y, unique_x, unique_y, op) ({	\
+		typeof(x) unique_x = (x);		\
+		typeof(y) unique_y = (y);		\
+		__cmp(unique_x, unique_y, op); })
 
-#define __careful_cmp(x, y, op)				\
-		__builtin_choose_expr(__safe_cmp(x, y),	\
-				      __cmp(x, y, op), __cmp_once(x, y, op))
+#define __careful_cmp(x, y, op) \
+	__builtin_choose_expr(__safe_cmp(x, y), \
+		__cmp(x, y, op), \
+		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
 
 /**
  * min - return minimum of two values of the same or compatible types
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0ebcbeb21056..9e4e638fb505 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -99,21 +99,25 @@ struct compat_kexec_segment {
 
 #ifdef CONFIG_KEXEC_FILE
 struct purgatory_info {
-	/* Pointer to elf header of read only purgatory */
-	Elf_Ehdr *ehdr;
-
-	/* Pointer to purgatory sechdrs which are modifiable */
+	/*
+	 * Pointer to elf header at the beginning of kexec_purgatory.
+	 * Note: kexec_purgatory is read only
+	 */
+	const Elf_Ehdr *ehdr;
+	/*
+	 * Temporary, modifiable buffer for sechdrs used for relocation.
+	 * This memory can be freed post image load.
+	 */
 	Elf_Shdr *sechdrs;
 	/*
-	 * Temporary buffer location where purgatory is loaded and relocated
-	 * This memory can be freed post image load
+	 * Temporary, modifiable buffer for stripped purgatory used for
+	 * relocation. This memory can be freed post image load.
 	 */
 	void *purgatory_buf;
-
-	/* Address where purgatory is finally loaded and is executed from */
-	unsigned long purgatory_load_addr;
 };
 
+struct kimage;
+
 typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
 typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
 			     unsigned long kernel_len, char *initrd,
@@ -135,6 +139,11 @@ struct kexec_file_ops {
 #endif
 };
 
+extern const struct kexec_file_ops * const kexec_file_loaders[];
+
+int kexec_image_probe_default(struct kimage *image, void *buf,
+			      unsigned long buf_len);
+
 /**
  * struct kexec_buf - parameters for finding a place for a buffer in memory
  * @image:	kexec image in which memory to search.
@@ -159,10 +168,44 @@ struct kexec_buf {
 	bool top_down;
 };
 
+int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf);
+int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+				   void *buf, unsigned int size,
+				   bool get_value);
+void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
+
+int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+					    Elf_Shdr *section,
+					    const Elf_Shdr *relsec,
+					    const Elf_Shdr *symtab);
+int __weak arch_kexec_apply_relocations(struct purgatory_info *pi,
+					Elf_Shdr *section,
+					const Elf_Shdr *relsec,
+					const Elf_Shdr *symtab);
+
 int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf,
 			       int (*func)(struct resource *, void *));
 extern int kexec_add_buffer(struct kexec_buf *kbuf);
 int kexec_locate_mem_hole(struct kexec_buf *kbuf);
+
+/* Alignment required for elf header segment */
+#define ELF_CORE_HEADER_ALIGN   4096
+
+struct crash_mem_range {
+	u64 start, end;
+};
+
+struct crash_mem {
+	unsigned int max_nr_ranges;
+	unsigned int nr_ranges;
+	struct crash_mem_range ranges[0];
+};
+
+extern int crash_exclude_mem_range(struct crash_mem *mem,
+				   unsigned long long mstart,
+				   unsigned long long mend);
+extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
+				       void **addr, unsigned long *sz);
 #endif /* CONFIG_KEXEC_FILE */
 
 struct kimage {
@@ -209,7 +252,7 @@ struct kimage {
 	unsigned long cmdline_buf_len;
 
 	/* File operations provided by image loader */
-	struct kexec_file_ops *fops;
+	const struct kexec_file_ops *fops;
 
 	/* Image loader handling the kernel can store a pointer here */
 	void *image_loader_data;
@@ -226,14 +269,6 @@ extern void machine_kexec_cleanup(struct kimage *image);
 extern int kernel_kexec(void);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
 						unsigned int order);
-extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
-				unsigned long max, int top_down,
-				unsigned long *load_addr);
-extern int kexec_purgatory_get_set_symbol(struct kimage *image,
-					  const char *name, void *buf,
-					  unsigned int size, bool get_value);
-extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
-					     const char *name);
 extern void __crash_kexec(struct pt_regs *);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
@@ -273,16 +308,6 @@ int crash_shrink_memory(unsigned long new_size);
 size_t crash_get_memory_size(void);
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
 
-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-					 unsigned long buf_len);
-void * __weak arch_kexec_kernel_image_load(struct kimage *image);
-int __weak arch_kimage_file_post_load_cleanup(struct kimage *image);
-int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
-					unsigned long buf_len);
-int __weak arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr,
-					Elf_Shdr *sechdrs, unsigned int relsec);
-int __weak arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
-					unsigned int relsec);
 void arch_kexec_protect_crashkres(void);
 void arch_kexec_unprotect_crashkres(void);
 
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index e251533a5939..89fc8dc7bf38 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -41,11 +41,11 @@
  */
 
 /*
- * Note about locking : There is no locking required until only * one reader
- * and one writer is using the fifo and no kfifo_reset() will be * called
- *  kfifo_reset_out() can be safely used, until it will be only called
+ * Note about locking: There is no locking required until only one reader
+ * and one writer is using the fifo and no kfifo_reset() will be called.
+ * kfifo_reset_out() can be safely used, until it will be only called
  * in the reader thread.
- *  For multiple writer and one reader there is only a need to lock the writer.
+ * For multiple writer and one reader there is only a need to lock the writer.
  * And vice versa for only one writer and multiple reader there is only a need
  * to lock the reader.
  */
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 51f6ef2c2ff4..f23b90b02898 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -9,4 +9,9 @@ static inline bool kvm_para_has_feature(unsigned int feature)
 {
 	return !!(kvm_arch_para_features() & (1UL << feature));
 }
+
+static inline bool kvm_para_has_hint(unsigned int feature)
+{
+	return !!(kvm_arch_para_hints() & (1UL << feature));
+}
 #endif /* __LINUX_KVM_PARA_H */
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index ff855ed965fb..097072c5a852 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -76,12 +76,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len, int *cmd_rc);
 
+struct device_node;
 struct nvdimm_bus_descriptor {
 	const struct attribute_group **attr_groups;
 	unsigned long bus_dsm_mask;
 	unsigned long cmd_mask;
 	struct module *module;
 	char *provider_name;
+	struct device_node *of_node;
 	ndctl_fn ndctl;
 	int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
 	int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc,
@@ -123,6 +125,7 @@ struct nd_region_desc {
 	int num_lanes;
 	int numa_node;
 	unsigned long flags;
+	struct device_node *of_node;
 };
 
 struct device;
@@ -164,6 +167,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
+struct device *nd_region_dev(struct nd_region *nd_region);
 struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index 2eac32095113..99f17cc8e163 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -37,6 +37,7 @@ struct lockref {
 extern void lockref_get(struct lockref *);
 extern int lockref_put_return(struct lockref *);
 extern int lockref_get_not_zero(struct lockref *);
+extern int lockref_put_not_zero(struct lockref *);
 extern int lockref_get_or_lock(struct lockref *);
 extern int lockref_put_or_lock(struct lockref *);
 
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 6e6951b42be5..9d0b286f3dba 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -554,6 +554,10 @@
  *	@new points to the new credentials.
  *	@old points to the original credentials.
  *	Transfer data from original creds to new creds
+ * @cred_getsecid:
+ *	Retrieve the security identifier of the cred structure @c
+ *	@c contains the credentials, secid will be placed into @secid.
+ *	In case of failure, @secid will be set to zero.
  * @kernel_act_as:
  *	Set the credentials for a kernel service to act as (subjective context).
  *	@new points to the credentials to be modified.
@@ -672,7 +676,8 @@
  *	@p contains the task_struct for process.
  *	@info contains the signal information.
  *	@sig contains the signal value.
- *	@secid contains the sid of the process where the signal originated
+ *	@cred contains the cred of the process where the signal originated, or
+ *	NULL if the current task is the originator.
  *	Return 0 if permission is granted.
  * @task_prctl:
  *	Check permission before performing a process control operation on the
@@ -1568,6 +1573,7 @@ union security_list_options {
 	int (*cred_prepare)(struct cred *new, const struct cred *old,
 				gfp_t gfp);
 	void (*cred_transfer)(struct cred *new, const struct cred *old);
+	void (*cred_getsecid)(const struct cred *c, u32 *secid);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
@@ -1591,7 +1597,7 @@ union security_list_options {
 	int (*task_getscheduler)(struct task_struct *p);
 	int (*task_movememory)(struct task_struct *p);
 	int (*task_kill)(struct task_struct *p, struct siginfo *info,
-				int sig, u32 secid);
+				int sig, const struct cred *cred);
 	int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3,
 				unsigned long arg4, unsigned long arg5);
 	void (*task_to_inode)(struct task_struct *p, struct inode *inode);
@@ -1763,233 +1769,234 @@ union security_list_options {
 };
 
 struct security_hook_heads {
-	struct list_head binder_set_context_mgr;
-	struct list_head binder_transaction;
-	struct list_head binder_transfer_binder;
-	struct list_head binder_transfer_file;
-	struct list_head ptrace_access_check;
-	struct list_head ptrace_traceme;
-	struct list_head capget;
-	struct list_head capset;
-	struct list_head capable;
-	struct list_head quotactl;
-	struct list_head quota_on;
-	struct list_head syslog;
-	struct list_head settime;
-	struct list_head vm_enough_memory;
-	struct list_head bprm_set_creds;
-	struct list_head bprm_check_security;
-	struct list_head bprm_committing_creds;
-	struct list_head bprm_committed_creds;
-	struct list_head sb_alloc_security;
-	struct list_head sb_free_security;
-	struct list_head sb_copy_data;
-	struct list_head sb_remount;
-	struct list_head sb_kern_mount;
-	struct list_head sb_show_options;
-	struct list_head sb_statfs;
-	struct list_head sb_mount;
-	struct list_head sb_umount;
-	struct list_head sb_pivotroot;
-	struct list_head sb_set_mnt_opts;
-	struct list_head sb_clone_mnt_opts;
-	struct list_head sb_parse_opts_str;
-	struct list_head dentry_init_security;
-	struct list_head dentry_create_files_as;
+	struct hlist_head binder_set_context_mgr;
+	struct hlist_head binder_transaction;
+	struct hlist_head binder_transfer_binder;
+	struct hlist_head binder_transfer_file;
+	struct hlist_head ptrace_access_check;
+	struct hlist_head ptrace_traceme;
+	struct hlist_head capget;
+	struct hlist_head capset;
+	struct hlist_head capable;
+	struct hlist_head quotactl;
+	struct hlist_head quota_on;
+	struct hlist_head syslog;
+	struct hlist_head settime;
+	struct hlist_head vm_enough_memory;
+	struct hlist_head bprm_set_creds;
+	struct hlist_head bprm_check_security;
+	struct hlist_head bprm_committing_creds;
+	struct hlist_head bprm_committed_creds;
+	struct hlist_head sb_alloc_security;
+	struct hlist_head sb_free_security;
+	struct hlist_head sb_copy_data;
+	struct hlist_head sb_remount;
+	struct hlist_head sb_kern_mount;
+	struct hlist_head sb_show_options;
+	struct hlist_head sb_statfs;
+	struct hlist_head sb_mount;
+	struct hlist_head sb_umount;
+	struct hlist_head sb_pivotroot;
+	struct hlist_head sb_set_mnt_opts;
+	struct hlist_head sb_clone_mnt_opts;
+	struct hlist_head sb_parse_opts_str;
+	struct hlist_head dentry_init_security;
+	struct hlist_head dentry_create_files_as;
 #ifdef CONFIG_SECURITY_PATH
-	struct list_head path_unlink;
-	struct list_head path_mkdir;
-	struct list_head path_rmdir;
-	struct list_head path_mknod;
-	struct list_head path_truncate;
-	struct list_head path_symlink;
-	struct list_head path_link;
-	struct list_head path_rename;
-	struct list_head path_chmod;
-	struct list_head path_chown;
-	struct list_head path_chroot;
+	struct hlist_head path_unlink;
+	struct hlist_head path_mkdir;
+	struct hlist_head path_rmdir;
+	struct hlist_head path_mknod;
+	struct hlist_head path_truncate;
+	struct hlist_head path_symlink;
+	struct hlist_head path_link;
+	struct hlist_head path_rename;
+	struct hlist_head path_chmod;
+	struct hlist_head path_chown;
+	struct hlist_head path_chroot;
 #endif
-	struct list_head inode_alloc_security;
-	struct list_head inode_free_security;
-	struct list_head inode_init_security;
-	struct list_head inode_create;
-	struct list_head inode_link;
-	struct list_head inode_unlink;
-	struct list_head inode_symlink;
-	struct list_head inode_mkdir;
-	struct list_head inode_rmdir;
-	struct list_head inode_mknod;
-	struct list_head inode_rename;
-	struct list_head inode_readlink;
-	struct list_head inode_follow_link;
-	struct list_head inode_permission;
-	struct list_head inode_setattr;
-	struct list_head inode_getattr;
-	struct list_head inode_setxattr;
-	struct list_head inode_post_setxattr;
-	struct list_head inode_getxattr;
-	struct list_head inode_listxattr;
-	struct list_head inode_removexattr;
-	struct list_head inode_need_killpriv;
-	struct list_head inode_killpriv;
-	struct list_head inode_getsecurity;
-	struct list_head inode_setsecurity;
-	struct list_head inode_listsecurity;
-	struct list_head inode_getsecid;
-	struct list_head inode_copy_up;
-	struct list_head inode_copy_up_xattr;
-	struct list_head file_permission;
-	struct list_head file_alloc_security;
-	struct list_head file_free_security;
-	struct list_head file_ioctl;
-	struct list_head mmap_addr;
-	struct list_head mmap_file;
-	struct list_head file_mprotect;
-	struct list_head file_lock;
-	struct list_head file_fcntl;
-	struct list_head file_set_fowner;
-	struct list_head file_send_sigiotask;
-	struct list_head file_receive;
-	struct list_head file_open;
-	struct list_head task_alloc;
-	struct list_head task_free;
-	struct list_head cred_alloc_blank;
-	struct list_head cred_free;
-	struct list_head cred_prepare;
-	struct list_head cred_transfer;
-	struct list_head kernel_act_as;
-	struct list_head kernel_create_files_as;
-	struct list_head kernel_read_file;
-	struct list_head kernel_post_read_file;
-	struct list_head kernel_module_request;
-	struct list_head task_fix_setuid;
-	struct list_head task_setpgid;
-	struct list_head task_getpgid;
-	struct list_head task_getsid;
-	struct list_head task_getsecid;
-	struct list_head task_setnice;
-	struct list_head task_setioprio;
-	struct list_head task_getioprio;
-	struct list_head task_prlimit;
-	struct list_head task_setrlimit;
-	struct list_head task_setscheduler;
-	struct list_head task_getscheduler;
-	struct list_head task_movememory;
-	struct list_head task_kill;
-	struct list_head task_prctl;
-	struct list_head task_to_inode;
-	struct list_head ipc_permission;
-	struct list_head ipc_getsecid;
-	struct list_head msg_msg_alloc_security;
-	struct list_head msg_msg_free_security;
-	struct list_head msg_queue_alloc_security;
-	struct list_head msg_queue_free_security;
-	struct list_head msg_queue_associate;
-	struct list_head msg_queue_msgctl;
-	struct list_head msg_queue_msgsnd;
-	struct list_head msg_queue_msgrcv;
-	struct list_head shm_alloc_security;
-	struct list_head shm_free_security;
-	struct list_head shm_associate;
-	struct list_head shm_shmctl;
-	struct list_head shm_shmat;
-	struct list_head sem_alloc_security;
-	struct list_head sem_free_security;
-	struct list_head sem_associate;
-	struct list_head sem_semctl;
-	struct list_head sem_semop;
-	struct list_head netlink_send;
-	struct list_head d_instantiate;
-	struct list_head getprocattr;
-	struct list_head setprocattr;
-	struct list_head ismaclabel;
-	struct list_head secid_to_secctx;
-	struct list_head secctx_to_secid;
-	struct list_head release_secctx;
-	struct list_head inode_invalidate_secctx;
-	struct list_head inode_notifysecctx;
-	struct list_head inode_setsecctx;
-	struct list_head inode_getsecctx;
+	struct hlist_head inode_alloc_security;
+	struct hlist_head inode_free_security;
+	struct hlist_head inode_init_security;
+	struct hlist_head inode_create;
+	struct hlist_head inode_link;
+	struct hlist_head inode_unlink;
+	struct hlist_head inode_symlink;
+	struct hlist_head inode_mkdir;
+	struct hlist_head inode_rmdir;
+	struct hlist_head inode_mknod;
+	struct hlist_head inode_rename;
+	struct hlist_head inode_readlink;
+	struct hlist_head inode_follow_link;
+	struct hlist_head inode_permission;
+	struct hlist_head inode_setattr;
+	struct hlist_head inode_getattr;
+	struct hlist_head inode_setxattr;
+	struct hlist_head inode_post_setxattr;
+	struct hlist_head inode_getxattr;
+	struct hlist_head inode_listxattr;
+	struct hlist_head inode_removexattr;
+	struct hlist_head inode_need_killpriv;
+	struct hlist_head inode_killpriv;
+	struct hlist_head inode_getsecurity;
+	struct hlist_head inode_setsecurity;
+	struct hlist_head inode_listsecurity;
+	struct hlist_head inode_getsecid;
+	struct hlist_head inode_copy_up;
+	struct hlist_head inode_copy_up_xattr;
+	struct hlist_head file_permission;
+	struct hlist_head file_alloc_security;
+	struct hlist_head file_free_security;
+	struct hlist_head file_ioctl;
+	struct hlist_head mmap_addr;
+	struct hlist_head mmap_file;
+	struct hlist_head file_mprotect;
+	struct hlist_head file_lock;
+	struct hlist_head file_fcntl;
+	struct hlist_head file_set_fowner;
+	struct hlist_head file_send_sigiotask;
+	struct hlist_head file_receive;
+	struct hlist_head file_open;
+	struct hlist_head task_alloc;
+	struct hlist_head task_free;
+	struct hlist_head cred_alloc_blank;
+	struct hlist_head cred_free;
+	struct hlist_head cred_prepare;
+	struct hlist_head cred_transfer;
+	struct hlist_head cred_getsecid;
+	struct hlist_head kernel_act_as;
+	struct hlist_head kernel_create_files_as;
+	struct hlist_head kernel_read_file;
+	struct hlist_head kernel_post_read_file;
+	struct hlist_head kernel_module_request;
+	struct hlist_head task_fix_setuid;
+	struct hlist_head task_setpgid;
+	struct hlist_head task_getpgid;
+	struct hlist_head task_getsid;
+	struct hlist_head task_getsecid;
+	struct hlist_head task_setnice;
+	struct hlist_head task_setioprio;
+	struct hlist_head task_getioprio;
+	struct hlist_head task_prlimit;
+	struct hlist_head task_setrlimit;
+	struct hlist_head task_setscheduler;
+	struct hlist_head task_getscheduler;
+	struct hlist_head task_movememory;
+	struct hlist_head task_kill;
+	struct hlist_head task_prctl;
+	struct hlist_head task_to_inode;
+	struct hlist_head ipc_permission;
+	struct hlist_head ipc_getsecid;
+	struct hlist_head msg_msg_alloc_security;
+	struct hlist_head msg_msg_free_security;
+	struct hlist_head msg_queue_alloc_security;
+	struct hlist_head msg_queue_free_security;
+	struct hlist_head msg_queue_associate;
+	struct hlist_head msg_queue_msgctl;
+	struct hlist_head msg_queue_msgsnd;
+	struct hlist_head msg_queue_msgrcv;
+	struct hlist_head shm_alloc_security;
+	struct hlist_head shm_free_security;
+	struct hlist_head shm_associate;
+	struct hlist_head shm_shmctl;
+	struct hlist_head shm_shmat;
+	struct hlist_head sem_alloc_security;
+	struct hlist_head sem_free_security;
+	struct hlist_head sem_associate;
+	struct hlist_head sem_semctl;
+	struct hlist_head sem_semop;
+	struct hlist_head netlink_send;
+	struct hlist_head d_instantiate;
+	struct hlist_head getprocattr;
+	struct hlist_head setprocattr;
+	struct hlist_head ismaclabel;
+	struct hlist_head secid_to_secctx;
+	struct hlist_head secctx_to_secid;
+	struct hlist_head release_secctx;
+	struct hlist_head inode_invalidate_secctx;
+	struct hlist_head inode_notifysecctx;
+	struct hlist_head inode_setsecctx;
+	struct hlist_head inode_getsecctx;
 #ifdef CONFIG_SECURITY_NETWORK
-	struct list_head unix_stream_connect;
-	struct list_head unix_may_send;
-	struct list_head socket_create;
-	struct list_head socket_post_create;
-	struct list_head socket_bind;
-	struct list_head socket_connect;
-	struct list_head socket_listen;
-	struct list_head socket_accept;
-	struct list_head socket_sendmsg;
-	struct list_head socket_recvmsg;
-	struct list_head socket_getsockname;
-	struct list_head socket_getpeername;
-	struct list_head socket_getsockopt;
-	struct list_head socket_setsockopt;
-	struct list_head socket_shutdown;
-	struct list_head socket_sock_rcv_skb;
-	struct list_head socket_getpeersec_stream;
-	struct list_head socket_getpeersec_dgram;
-	struct list_head sk_alloc_security;
-	struct list_head sk_free_security;
-	struct list_head sk_clone_security;
-	struct list_head sk_getsecid;
-	struct list_head sock_graft;
-	struct list_head inet_conn_request;
-	struct list_head inet_csk_clone;
-	struct list_head inet_conn_established;
-	struct list_head secmark_relabel_packet;
-	struct list_head secmark_refcount_inc;
-	struct list_head secmark_refcount_dec;
-	struct list_head req_classify_flow;
-	struct list_head tun_dev_alloc_security;
-	struct list_head tun_dev_free_security;
-	struct list_head tun_dev_create;
-	struct list_head tun_dev_attach_queue;
-	struct list_head tun_dev_attach;
-	struct list_head tun_dev_open;
-	struct list_head sctp_assoc_request;
-	struct list_head sctp_bind_connect;
-	struct list_head sctp_sk_clone;
+	struct hlist_head unix_stream_connect;
+	struct hlist_head unix_may_send;
+	struct hlist_head socket_create;
+	struct hlist_head socket_post_create;
+	struct hlist_head socket_bind;
+	struct hlist_head socket_connect;
+	struct hlist_head socket_listen;
+	struct hlist_head socket_accept;
+	struct hlist_head socket_sendmsg;
+	struct hlist_head socket_recvmsg;
+	struct hlist_head socket_getsockname;
+	struct hlist_head socket_getpeername;
+	struct hlist_head socket_getsockopt;
+	struct hlist_head socket_setsockopt;
+	struct hlist_head socket_shutdown;
+	struct hlist_head socket_sock_rcv_skb;
+	struct hlist_head socket_getpeersec_stream;
+	struct hlist_head socket_getpeersec_dgram;
+	struct hlist_head sk_alloc_security;
+	struct hlist_head sk_free_security;
+	struct hlist_head sk_clone_security;
+	struct hlist_head sk_getsecid;
+	struct hlist_head sock_graft;
+	struct hlist_head inet_conn_request;
+	struct hlist_head inet_csk_clone;
+	struct hlist_head inet_conn_established;
+	struct hlist_head secmark_relabel_packet;
+	struct hlist_head secmark_refcount_inc;
+	struct hlist_head secmark_refcount_dec;
+	struct hlist_head req_classify_flow;
+	struct hlist_head tun_dev_alloc_security;
+	struct hlist_head tun_dev_free_security;
+	struct hlist_head tun_dev_create;
+	struct hlist_head tun_dev_attach_queue;
+	struct hlist_head tun_dev_attach;
+	struct hlist_head tun_dev_open;
+	struct hlist_head sctp_assoc_request;
+	struct hlist_head sctp_bind_connect;
+	struct hlist_head sctp_sk_clone;
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_INFINIBAND
-	struct list_head ib_pkey_access;
-	struct list_head ib_endport_manage_subnet;
-	struct list_head ib_alloc_security;
-	struct list_head ib_free_security;
+	struct hlist_head ib_pkey_access;
+	struct hlist_head ib_endport_manage_subnet;
+	struct hlist_head ib_alloc_security;
+	struct hlist_head ib_free_security;
 #endif	/* CONFIG_SECURITY_INFINIBAND */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-	struct list_head xfrm_policy_alloc_security;
-	struct list_head xfrm_policy_clone_security;
-	struct list_head xfrm_policy_free_security;
-	struct list_head xfrm_policy_delete_security;
-	struct list_head xfrm_state_alloc;
-	struct list_head xfrm_state_alloc_acquire;
-	struct list_head xfrm_state_free_security;
-	struct list_head xfrm_state_delete_security;
-	struct list_head xfrm_policy_lookup;
-	struct list_head xfrm_state_pol_flow_match;
-	struct list_head xfrm_decode_session;
+	struct hlist_head xfrm_policy_alloc_security;
+	struct hlist_head xfrm_policy_clone_security;
+	struct hlist_head xfrm_policy_free_security;
+	struct hlist_head xfrm_policy_delete_security;
+	struct hlist_head xfrm_state_alloc;
+	struct hlist_head xfrm_state_alloc_acquire;
+	struct hlist_head xfrm_state_free_security;
+	struct hlist_head xfrm_state_delete_security;
+	struct hlist_head xfrm_policy_lookup;
+	struct hlist_head xfrm_state_pol_flow_match;
+	struct hlist_head xfrm_decode_session;
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 #ifdef CONFIG_KEYS
-	struct list_head key_alloc;
-	struct list_head key_free;
-	struct list_head key_permission;
-	struct list_head key_getsecurity;
+	struct hlist_head key_alloc;
+	struct hlist_head key_free;
+	struct hlist_head key_permission;
+	struct hlist_head key_getsecurity;
 #endif	/* CONFIG_KEYS */
 #ifdef CONFIG_AUDIT
-	struct list_head audit_rule_init;
-	struct list_head audit_rule_known;
-	struct list_head audit_rule_match;
-	struct list_head audit_rule_free;
+	struct hlist_head audit_rule_init;
+	struct hlist_head audit_rule_known;
+	struct hlist_head audit_rule_match;
+	struct hlist_head audit_rule_free;
 #endif /* CONFIG_AUDIT */
 #ifdef CONFIG_BPF_SYSCALL
-	struct list_head bpf;
-	struct list_head bpf_map;
-	struct list_head bpf_prog;
-	struct list_head bpf_map_alloc_security;
-	struct list_head bpf_map_free_security;
-	struct list_head bpf_prog_alloc_security;
-	struct list_head bpf_prog_free_security;
+	struct hlist_head bpf;
+	struct hlist_head bpf_map;
+	struct hlist_head bpf_prog;
+	struct hlist_head bpf_map_alloc_security;
+	struct hlist_head bpf_map_free_security;
+	struct hlist_head bpf_prog_alloc_security;
+	struct hlist_head bpf_prog_free_security;
 #endif /* CONFIG_BPF_SYSCALL */
 } __randomize_layout;
 
@@ -1998,8 +2005,8 @@ struct security_hook_heads {
  * For use with generic list macros for common operations.
  */
 struct security_hook_list {
-	struct list_head		list;
-	struct list_head		*head;
+	struct hlist_node		list;
+	struct hlist_head		*head;
 	union security_list_options	hook;
 	char				*lsm;
 } __randomize_layout;
@@ -2038,7 +2045,7 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 	int i;
 
 	for (i = 0; i < count; i++)
-		list_del_rcu(&hooks[i].list);
+		hlist_del_rcu(&hooks[i].list);
 }
 #endif /* CONFIG_SECURITY_SELINUX_DISABLE */
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 0257aee7ab4b..ca59883c8364 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -318,6 +318,9 @@ static inline bool memblock_bottom_up(void)
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
 					ulong flags);
+phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
+					phys_addr_t align, phys_addr_t max_addr,
+					int nid, ulong flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c46016bb25eb..d99b71bc2c66 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -48,13 +48,12 @@ enum memcg_stat_item {
 	MEMCG_NR_STAT,
 };
 
-/* Cgroup-specific events, on top of universal VM events */
-enum memcg_event_item {
-	MEMCG_LOW = NR_VM_EVENT_ITEMS,
+enum memcg_memory_event {
+	MEMCG_LOW,
 	MEMCG_HIGH,
 	MEMCG_MAX,
 	MEMCG_OOM,
-	MEMCG_NR_EVENTS,
+	MEMCG_NR_MEMORY_EVENTS,
 };
 
 struct mem_cgroup_reclaim_cookie {
@@ -88,7 +87,7 @@ enum mem_cgroup_events_target {
 
 struct mem_cgroup_stat_cpu {
 	long count[MEMCG_NR_STAT];
-	unsigned long events[MEMCG_NR_EVENTS];
+	unsigned long events[NR_VM_EVENT_ITEMS];
 	unsigned long nr_page_events;
 	unsigned long targets[MEM_CGROUP_NTARGETS];
 };
@@ -120,6 +119,9 @@ struct mem_cgroup_per_node {
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
+	bool			congested;	/* memcg has many dirty pages */
+						/* backed by a congested BDI */
+
 	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
 						/* use container_of	   */
 };
@@ -202,7 +204,8 @@ struct mem_cgroup {
 	/* OOM-Killer disable */
 	int		oom_kill_disable;
 
-	/* handle for "memory.events" */
+	/* memory.events */
+	atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
 	struct cgroup_file events_file;
 
 	/* protect arrays of thresholds */
@@ -231,9 +234,10 @@ struct mem_cgroup {
 	struct task_struct	*move_lock_task;
 	unsigned long		move_lock_flags;
 
+	/* memory.stat */
 	struct mem_cgroup_stat_cpu __percpu *stat_cpu;
 	atomic_long_t		stat[MEMCG_NR_STAT];
-	atomic_long_t		events[MEMCG_NR_EVENTS];
+	atomic_long_t		events[NR_VM_EVENT_ITEMS];
 
 	unsigned long		socket_pressure;
 
@@ -645,9 +649,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void __count_memcg_events(struct mem_cgroup *memcg,
-					int idx, unsigned long count)
+					enum vm_event_item idx,
+					unsigned long count)
 {
 	unsigned long x;
 
@@ -663,7 +667,8 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
 }
 
 static inline void count_memcg_events(struct mem_cgroup *memcg,
-				      int idx, unsigned long count)
+				      enum vm_event_item idx,
+				      unsigned long count)
 {
 	unsigned long flags;
 
@@ -672,9 +677,8 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
 	local_irq_restore(flags);
 }
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void count_memcg_page_event(struct page *page,
-					  int idx)
+					  enum vm_event_item idx)
 {
 	if (page->mem_cgroup)
 		count_memcg_events(page->mem_cgroup, idx, 1);
@@ -698,10 +702,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
 	rcu_read_unlock();
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-				    enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+				      enum memcg_memory_event event)
 {
-	count_memcg_events(memcg, event, 1);
+	atomic_long_inc(&memcg->memory_events[event]);
 	cgroup_file_notify(&memcg->events_file);
 }
 
@@ -721,8 +725,8 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-				    enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+				      enum memcg_memory_event event)
 {
 }
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 2b0265265c28..e0e49b5b1ee1 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -216,9 +216,6 @@ void put_online_mems(void);
 void mem_hotplug_begin(void);
 void mem_hotplug_done(void);
 
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
 #else /* ! CONFIG_MEMORY_HOTPLUG */
 #define pfn_to_online_page(pfn)			\
 ({						\
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index c61535979b8f..2d4e23c9ea0a 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -183,6 +183,7 @@ struct cros_ec_debugfs;
  * @ec_dev: cros_ec_device structure to talk to the physical device
  * @dev: pointer to the platform device
  * @debug_info: cros_ec_debugfs structure for debugging information
+ * @has_kb_wake_angle: true if at least 2 accelerometer are connected to the EC.
  * @cmd_offset: offset to apply for each command.
  */
 struct cros_ec_dev {
@@ -191,6 +192,7 @@ struct cros_ec_dev {
 	struct cros_ec_device *ec_dev;
 	struct device *dev;
 	struct cros_ec_debugfs *debug_info;
+	bool has_kb_wake_angle;
 	u16 cmd_offset;
 	u32 features[2];
 };
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 2b96e630e3b6..f2edd9969b40 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2948,6 +2948,9 @@ struct ec_response_usb_pd_control_v1 {
 
 #define EC_CMD_USB_PD_PORTS 0x102
 
+/* Maximum number of PD ports on a device, num_ports will be <= this */
+#define EC_USB_PD_MAX_PORTS 8
+
 struct ec_response_usb_pd_ports {
 	uint8_t num_ports;
 } __packed;
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 48c3c5be7eb1..9ed2871ea335 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -141,15 +141,4 @@ enum s2mps_rtc_reg {
 #define WTSR_ENABLE_SHIFT	6
 #define WTSR_ENABLE_MASK	(1 << WTSR_ENABLE_SHIFT)
 
-enum {
-	RTC_SEC = 0,
-	RTC_MIN,
-	RTC_HOUR,
-	RTC_WEEKDAY,
-	RTC_DATE,
-	RTC_MONTH,
-	RTC_YEAR1,
-	RTC_YEAR2,
-};
-
 #endif /*  __LINUX_MFD_SEC_RTC_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ab45f8a0d288..f2b4abbca55e 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,8 +7,7 @@
 #include <linux/migrate_mode.h>
 #include <linux/hugetlb.h>
 
-typedef struct page *new_page_t(struct page *page, unsigned long private,
-				int **reason);
+typedef struct page *new_page_t(struct page *page, unsigned long private);
 typedef void free_page_t(struct page *page, unsigned long private);
 
 /*
@@ -43,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page,
 		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
 				preferred_nid, nodemask);
 
-	if (thp_migration_supported() && PageTransHuge(page)) {
-		order = HPAGE_PMD_ORDER;
+	if (PageTransHuge(page)) {
 		gfp_mask |= GFP_TRANSHUGE;
+		order = HPAGE_PMD_ORDER;
 	}
 
 	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3ad632366973..1ac1f06a4be6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,7 +747,7 @@ int finish_mkwrite_fault(struct vm_fault *vmf);
  * refcount. The each user mapping also has a reference to the page.
  *
  * The pagecache pages are stored in a per-mapping radix tree, which is
- * rooted at mapping->page_tree, and indexed by offset.
+ * rooted at mapping->i_pages, and indexed by offset.
  * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
  * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *
@@ -1466,6 +1466,7 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned int offset,
 			      unsigned int length);
 
+void __set_page_dirty(struct page *, struct address_space *, int warn);
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
@@ -2108,6 +2109,7 @@ extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
 extern void zone_pcp_reset(struct zone *zone);
+extern void setup_zone_pageset(struct zone *zone);
 
 /* page_alloc.c */
 extern int min_free_kbytes;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f11ae29005f1..32699b2dc52a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -180,6 +180,7 @@ enum node_stat_item {
 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
+	NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
 	NR_VM_NODE_STAT_ITEMS
 };
 
@@ -884,7 +885,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
+extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 48fb2b43c35a..7d361be2e24f 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -502,6 +502,7 @@ enum dmi_field {
 	DMI_CHASSIS_SERIAL,
 	DMI_CHASSIS_ASSET_TAG,
 	DMI_STRING_MAX,
+	DMI_OEM_STRING,	/* special case - will not be in dmi_ident */
 };
 
 struct dmi_strmatch {
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 5dc6b695437d..43c181a6add5 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -180,6 +180,12 @@ struct nd_region;
 void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event);
 int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
 		struct module *module, const char *mod_name);
+static inline void nd_driver_unregister(struct nd_device_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
 #define nd_driver_register(driver) \
 	__nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#define module_nd_driver(driver) \
+	module_driver(driver, nd_driver_register, nd_driver_unregister)
 #endif /* __LINUX_ND_H__ */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 38187c68063d..2f129bbfaae8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -198,14 +198,24 @@ struct nfs_inode {
 /*
  * Cache validity bit flags
  */
-#define NFS_INO_INVALID_ATTR	0x0001		/* cached attrs are invalid */
-#define NFS_INO_INVALID_DATA	0x0002		/* cached data is invalid */
-#define NFS_INO_INVALID_ATIME	0x0004		/* cached atime is invalid */
-#define NFS_INO_INVALID_ACCESS	0x0008		/* cached access cred invalid */
-#define NFS_INO_INVALID_ACL	0x0010		/* cached acls are invalid */
-#define NFS_INO_REVAL_PAGECACHE	0x0020		/* must revalidate pagecache */
-#define NFS_INO_REVAL_FORCED	0x0040		/* force revalidation ignoring a delegation */
-#define NFS_INO_INVALID_LABEL	0x0080		/* cached label is invalid */
+#define NFS_INO_INVALID_DATA	BIT(1)		/* cached data is invalid */
+#define NFS_INO_INVALID_ATIME	BIT(2)		/* cached atime is invalid */
+#define NFS_INO_INVALID_ACCESS	BIT(3)		/* cached access cred invalid */
+#define NFS_INO_INVALID_ACL	BIT(4)		/* cached acls are invalid */
+#define NFS_INO_REVAL_PAGECACHE	BIT(5)		/* must revalidate pagecache */
+#define NFS_INO_REVAL_FORCED	BIT(6)		/* force revalidation ignoring a delegation */
+#define NFS_INO_INVALID_LABEL	BIT(7)		/* cached label is invalid */
+#define NFS_INO_INVALID_CHANGE	BIT(8)		/* cached change is invalid */
+#define NFS_INO_INVALID_CTIME	BIT(9)		/* cached ctime is invalid */
+#define NFS_INO_INVALID_MTIME	BIT(10)		/* cached mtime is invalid */
+#define NFS_INO_INVALID_SIZE	BIT(11)		/* cached size is invalid */
+#define NFS_INO_INVALID_OTHER	BIT(12)		/* other attrs are invalid */
+
+#define NFS_INO_INVALID_ATTR	(NFS_INO_INVALID_CHANGE \
+		| NFS_INO_INVALID_CTIME \
+		| NFS_INO_INVALID_MTIME \
+		| NFS_INO_INVALID_SIZE \
+		| NFS_INO_INVALID_OTHER)	/* inode metadata is invalid */
 
 /*
  * Bit offsets in flags field
@@ -292,10 +302,11 @@ static inline void nfs_mark_for_revalidate(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	spin_lock(&inode->i_lock);
-	nfsi->cache_validity |= NFS_INO_INVALID_ATTR |
-				NFS_INO_REVAL_PAGECACHE |
-				NFS_INO_INVALID_ACCESS |
-				NFS_INO_INVALID_ACL;
+	nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE
+		| NFS_INO_INVALID_ACCESS
+		| NFS_INO_INVALID_ACL
+		| NFS_INO_INVALID_CHANGE
+		| NFS_INO_INVALID_CTIME;
 	if (S_ISDIR(inode->i_mode))
 		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
 	spin_unlock(&inode->i_lock);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6959968dc36a..34d28564ecf3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1590,11 +1590,13 @@ struct nfs_rpc_ops {
 			    unsigned int);
 	int	(*create)  (struct inode *, struct dentry *,
 			    struct iattr *, int);
-	int	(*remove)  (struct inode *, const struct qstr *);
-	void	(*unlink_setup)  (struct rpc_message *, struct inode *dir);
+	int	(*remove)  (struct inode *, struct dentry *);
+	void	(*unlink_setup)  (struct rpc_message *, struct dentry *);
 	void	(*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *);
 	int	(*unlink_done) (struct rpc_task *, struct inode *);
-	void	(*rename_setup)  (struct rpc_message *msg, struct inode *dir);
+	void	(*rename_setup)  (struct rpc_message *msg,
+			struct dentry *old_dentry,
+			struct dentry *new_dentry);
 	void	(*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *);
 	int	(*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir);
 	int	(*link)    (struct inode *, struct inode *, const struct qstr *);
@@ -1633,7 +1635,6 @@ struct nfs_rpc_ops {
 				struct iattr *iattr,
 				int *);
 	int (*have_delegation)(struct inode *, fmode_t);
-	int (*return_delegation)(struct inode *);
 	struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *);
 	struct nfs_client *(*init_client) (struct nfs_client *,
 				const struct nfs_client_initdata *);
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index cdad58bbfd8b..4ae347cbc36d 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -63,7 +63,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			bool skip_hwpoisoned_pages);
 
-struct page *alloc_migrate_target(struct page *page, unsigned long private,
-				int **resultp);
+struct page *alloc_migrate_target(struct page *page, unsigned long private);
 
 #endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 34ce3ebf97d5..b1bd2186e6d2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -144,7 +144,7 @@ void release_pages(struct page **pages, int nr);
  * 3. check the page is still in pagecache (if no, goto 1)
  *
  * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with tree_lock held for write):
+ * following (with the i_pages lock held):
  * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
  * B. remove page from pagecache
  * C. free the page
@@ -157,7 +157,7 @@ void release_pages(struct page **pages, int nr);
  *
  * It is possible that between 1 and 2, the page is removed then the exact same
  * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using tree_lock could equally have run before or after
+ * old find_get_page using a lock could equally have run before or after
  * such a re-insertion, depending on order that locks are granted.
  *
  * Lookups racing against pagecache insertion isn't a big problem: either 1
diff --git a/include/linux/platform_data/atmel_mxt_ts.h b/include/linux/platform_data/atmel_mxt_ts.h
deleted file mode 100644
index 695035a8d7fb..000000000000
--- a/include/linux/platform_data/atmel_mxt_ts.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Atmel maXTouch Touchscreen driver
- *
- * Copyright (C) 2010 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H
-#define __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H
-
-#include <linux/types.h>
-
-enum mxt_suspend_mode {
-	MXT_SUSPEND_DEEP_SLEEP	= 0,
-	MXT_SUSPEND_T9_CTRL	= 1,
-};
-
-/* The platform data for the Atmel maXTouch touchscreen driver */
-struct mxt_platform_data {
-	unsigned long irqflags;
-	u8 t19_num_keys;
-	const unsigned int *t19_keymap;
-	enum mxt_suspend_mode suspend_mode;
-};
-
-#endif /* __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H */
diff --git a/include/linux/platform_data/clk-da8xx-cfgchip.h b/include/linux/platform_data/clk-da8xx-cfgchip.h
new file mode 100644
index 000000000000..de0f77d38669
--- /dev/null
+++ b/include/linux/platform_data/clk-da8xx-cfgchip.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * clk-da8xx-cfgchip - TI DaVinci DA8xx CFGCHIP clock driver
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_CLK_DA8XX_CFGCHIP_H__
+#define __LINUX_PLATFORM_DATA_CLK_DA8XX_CFGCHIP_H__
+
+#include <linux/regmap.h>
+
+/**
+ * da8xx_cfgchip_clk_platform_data
+ * @cfgchip: CFGCHIP syscon regmap
+ */
+struct da8xx_cfgchip_clk_platform_data {
+	struct regmap *cfgchip;
+};
+
+#endif /* __LINUX_PLATFORM_DATA_CLK_DA8XX_CFGCHIP_H__ */
diff --git a/include/linux/platform_data/clk-davinci-pll.h b/include/linux/platform_data/clk-davinci-pll.h
new file mode 100644
index 000000000000..e55dab1d578b
--- /dev/null
+++ b/include/linux/platform_data/clk-davinci-pll.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PLL clock driver for TI Davinci SoCs
+ *
+ * Copyright (C) 2018 David Lechner <david@lechnology.com>
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__
+#define __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__
+
+#include <linux/regmap.h>
+
+/**
+ * davinci_pll_platform_data
+ * @cfgchip: CFGCHIP syscon regmap
+ */
+struct davinci_pll_platform_data {
+	struct regmap *cfgchip;
+};
+
+#endif /* __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ */
diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index fcdc707eab99..2744cff1b297 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -129,6 +129,8 @@ struct mlxreg_core_platform_data {
  * @mask: top aggregation interrupt common mask;
  * @cell_low: location of low aggregation interrupt register;
  * @mask_low: low aggregation interrupt common mask;
+ * @deferred_nr: I2C adapter number must be exist prior probing execution;
+ * @shift_nr: I2C adapter numbers must be incremented by this value;
  */
 struct mlxreg_core_hotplug_platform_data {
 	struct mlxreg_core_item *items;
@@ -139,6 +141,8 @@ struct mlxreg_core_hotplug_platform_data {
 	u32 mask;
 	u32 cell_low;
 	u32 mask_low;
+	int deferred_nr;
+	int shift_nr;
 };
 
 #endif /* __LINUX_PLATFORM_DATA_MLXREG_H */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index fc55ff31eca7..34149e8b5f73 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -104,25 +104,29 @@ struct radix_tree_node {
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
-#define ROOT_IS_IDR	((__force gfp_t)(1 << __GFP_BITS_SHIFT))
-#define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT + 1)
+/* The IDR tag is stored in the low bits of the GFP flags */
+#define ROOT_IS_IDR	((__force gfp_t)4)
+/* The top bits of gfp_mask are used to store the root tags */
+#define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT)
 
 struct radix_tree_root {
+	spinlock_t		xa_lock;
 	gfp_t			gfp_mask;
 	struct radix_tree_node	__rcu *rnode;
 };
 
-#define RADIX_TREE_INIT(mask)	{					\
+#define RADIX_TREE_INIT(name, mask)	{				\
+	.xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),			\
 	.gfp_mask = (mask),						\
 	.rnode = NULL,							\
 }
 
 #define RADIX_TREE(name, mask) \
-	struct radix_tree_root name = RADIX_TREE_INIT(mask)
+	struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
 #define INIT_RADIX_TREE(root, mask)					\
 do {									\
+	spin_lock_init(&(root)->xa_lock);				\
 	(root)->gfp_mask = (mask);					\
 	(root)->rnode = NULL;						\
 } while (0)
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a366cc314479..ea8505204fdf 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -106,6 +106,10 @@ extern const struct raid6_calls raid6_avx512x1;
 extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 728d421fffe9..d09a9c7af109 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -344,7 +344,7 @@ struct rproc_ops {
 	int (*stop)(struct rproc *rproc);
 	void (*kick)(struct rproc *rproc, int vqid);
 	void * (*da_to_va)(struct rproc *rproc, u64 da, int len);
-	int (*load_rsc_table)(struct rproc *rproc, const struct firmware *fw);
+	int (*parse_fw)(struct rproc *rproc, const struct firmware *fw);
 	struct resource_table *(*find_loaded_rsc_table)(
 				struct rproc *rproc, const struct firmware *fw);
 	int (*load)(struct rproc *rproc, const struct firmware *fw);
@@ -395,6 +395,21 @@ enum rproc_crash_type {
 };
 
 /**
+ * struct rproc_dump_segment - segment info from ELF header
+ * @node:	list node related to the rproc segment list
+ * @da:		device address of the segment
+ * @size:	size of the segment
+ */
+struct rproc_dump_segment {
+	struct list_head node;
+
+	dma_addr_t da;
+	size_t size;
+
+	loff_t offset;
+};
+
+/**
  * struct rproc - represents a physical remote processor device
  * @node: list node of this rproc object
  * @domain: iommu domain
@@ -424,6 +439,7 @@ enum rproc_crash_type {
  * @cached_table: copy of the resource table
  * @table_sz: size of @cached_table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
+ * @dump_segments: list of segments in the firmware
  */
 struct rproc {
 	struct list_head node;
@@ -455,19 +471,21 @@ struct rproc {
 	size_t table_sz;
 	bool has_iommu;
 	bool auto_boot;
+	struct list_head dump_segments;
 };
 
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
  * @probe: probe function, called as the rproc is started
- * @remove: remove function, called as the rproc is stopped
+ * @remove: remove function, called as the rproc is being stopped, the @crashed
+ *	    parameter indicates if this originates from the a recovery
  */
 struct rproc_subdev {
 	struct list_head node;
 
 	int (*probe)(struct rproc_subdev *subdev);
-	void (*remove)(struct rproc_subdev *subdev);
+	void (*remove)(struct rproc_subdev *subdev, bool crashed);
 };
 
 /* we currently support only two vrings per rvdev */
@@ -534,6 +552,7 @@ void rproc_free(struct rproc *rproc);
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
+int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size);
 
 static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
 {
@@ -550,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
 		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev));
+		      void (*remove)(struct rproc_subdev *subdev, bool graceful));
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 7d9eb39fa76a..a0233edc0718 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -34,10 +34,12 @@ struct ring_buffer_event {
  *				 array[0] = time delta (28 .. 59)
  *				 size = 8 bytes
  *
- * @RINGBUF_TYPE_TIME_STAMP:	Sync time stamp with external clock
- *				 array[0]    = tv_nsec
- *				 array[1..2] = tv_sec
- *				 size = 16 bytes
+ * @RINGBUF_TYPE_TIME_STAMP:	Absolute timestamp
+ *				 Same format as TIME_EXTEND except that the
+ *				 value is an absolute timestamp, not a delta
+ *				 event.time_delta contains bottom 27 bits
+ *				 array[0] = top (28 .. 59) bits
+ *				 size = 8 bytes
  *
  * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
  *				Data record
@@ -54,12 +56,12 @@ enum ring_buffer_type {
 	RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
 	RINGBUF_TYPE_PADDING,
 	RINGBUF_TYPE_TIME_EXTEND,
-	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
 	RINGBUF_TYPE_TIME_STAMP,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
 void *ring_buffer_event_data(struct ring_buffer_event *event);
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event);
 
 /*
  * ring_buffer_discard_commit will remove an event that has not
@@ -115,6 +117,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 int ring_buffer_write(struct ring_buffer *buffer,
 		      unsigned long length, void *data);
 
+void ring_buffer_nest_start(struct ring_buffer *buffer);
+void ring_buffer_nest_end(struct ring_buffer *buffer);
+
 struct ring_buffer_event *
 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
 		 unsigned long *lost_events);
@@ -178,6 +183,8 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
 				      int cpu, u64 *ts);
 void ring_buffer_set_clock(struct ring_buffer *buffer,
 			   u64 (*clock)(void));
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs);
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
 
 size_t ring_buffer_page_len(void *page);
 
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index fc6c90b57be0..4c007f69082f 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -145,12 +145,17 @@ struct rtc_device {
 
 	bool registered;
 
-	struct nvmem_config *nvmem_config;
 	struct nvmem_device *nvmem;
 	/* Old ABI support */
 	bool nvram_old_abi;
 	struct bin_attribute *nvram;
 
+	time64_t range_min;
+	timeu64_t range_max;
+	time64_t start_secs;
+	time64_t offset_secs;
+	bool set_start_time;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
@@ -164,6 +169,11 @@ struct rtc_device {
 };
 #define to_rtc_device(d) container_of(d, struct rtc_device, dev)
 
+/* useful timestamps */
+#define RTC_TIMESTAMP_BEGIN_1900	-2208989361LL /* 1900-01-01 00:00:00 */
+#define RTC_TIMESTAMP_BEGIN_2000	946684800LL /* 2000-01-01 00:00:00 */
+#define RTC_TIMESTAMP_END_2099		4102444799LL /* 2099-12-31 23:59:59 */
+
 extern struct rtc_device *rtc_device_register(const char *name,
 					struct device *dev,
 					const struct rtc_class_ops *ops,
@@ -212,10 +222,6 @@ void rtc_aie_update_irq(void *private);
 void rtc_uie_update_irq(void *private);
 enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer);
 
-int rtc_register(rtc_task_t *task);
-int rtc_unregister(rtc_task_t *task);
-int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
-
 void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data);
 int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer,
 		    ktime_t expires, ktime_t period);
@@ -271,4 +277,17 @@ extern int rtc_hctosys_ret;
 #define rtc_hctosys_ret -ENODEV
 #endif
 
+#ifdef CONFIG_RTC_NVMEM
+int rtc_nvmem_register(struct rtc_device *rtc,
+		       struct nvmem_config *nvmem_config);
+void rtc_nvmem_unregister(struct rtc_device *rtc);
+#else
+static inline int rtc_nvmem_register(struct rtc_device *rtc,
+				     struct nvmem_config *nvmem_config)
+{
+	return -ENODEV;
+}
+static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
+#endif
+
 #endif /* _LINUX_RTC_H_ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f228c6033832..b3d697f3b573 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -300,7 +300,7 @@ struct util_est {
 	unsigned int			enqueued;
 	unsigned int			ewma;
 #define UTIL_EST_WEIGHT_SHIFT		2
-};
+} __attribute__((__aligned__(sizeof(u64))));
 
 /*
  * The load_avg/util_avg accumulates an infinite geometric series
@@ -364,7 +364,7 @@ struct sched_avg {
 	unsigned long			runnable_load_avg;
 	unsigned long			util_avg;
 	struct util_est			util_est;
-};
+} ____cacheline_aligned;
 
 struct sched_statistics {
 #ifdef CONFIG_SCHEDSTATS
@@ -435,7 +435,7 @@ struct sched_entity {
 	 * Put into separate cache line so it does not
 	 * collide with read-mostly values above.
 	 */
-	struct sched_avg		avg ____cacheline_aligned_in_smp;
+	struct sched_avg		avg;
 #endif
 };
 
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 9806184bb3d5..2c570cd934af 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -104,7 +104,8 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 #endif /* CONFIG_MEMCG */
 
 #ifdef CONFIG_MMU
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
+extern void arch_pick_mmap_layout(struct mm_struct *mm,
+				  struct rlimit *rlim_stack);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
@@ -113,7 +114,8 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long len, unsigned long pgoff,
 			  unsigned long flags);
 #else
-static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+static inline void arch_pick_mmap_layout(struct mm_struct *mm,
+					 struct rlimit *rlim_stack) {}
 #endif
 
 static inline bool in_vfork(struct task_struct *tsk)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 23b4f9cb82db..a7ce74c74e49 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -319,7 +319,7 @@ extern int force_sig_info(int, struct siginfo *, struct task_struct *);
 extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
 extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
 extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
-				const struct cred *, u32);
+				const struct cred *);
 extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
 extern __must_check bool do_notify_parent(struct task_struct *, int);
diff --git a/include/linux/security.h b/include/linux/security.h
index 17ffd1ef6074..200920f521a1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -322,6 +322,7 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
 void security_cred_free(struct cred *cred);
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_transfer_creds(struct cred *new, const struct cred *old);
+void security_cred_getsecid(const struct cred *c, u32 *secid);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
@@ -345,7 +346,7 @@ int security_task_setscheduler(struct task_struct *p);
 int security_task_getscheduler(struct task_struct *p);
 int security_task_movememory(struct task_struct *p);
 int security_task_kill(struct task_struct *p, struct siginfo *info,
-			int sig, u32 secid);
+			int sig, const struct cred *cred);
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			unsigned long arg4, unsigned long arg5);
 void security_task_to_inode(struct task_struct *p, struct inode *inode);
@@ -1008,7 +1009,7 @@ static inline int security_task_movememory(struct task_struct *p)
 
 static inline int security_task_kill(struct task_struct *p,
 				     struct siginfo *info, int sig,
-				     u32 secid)
+				     const struct cred *cred)
 {
 	return 0;
 }
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index ab437dd2e3b9..a121982af0f5 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,9 +118,14 @@ __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+			       unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 			 unsigned long long num);
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
+void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
+		    unsigned long long v, unsigned int width);
+
 void seq_escape(struct seq_file *m, const char *s, const char *esc);
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
@@ -235,4 +240,5 @@ extern struct hlist_node *seq_hlist_start_percpu(struct hlist_head __percpu *hea
 
 extern struct hlist_node *seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos);
 
+void seq_file_init(void);
 #endif
diff --git a/arch/x86/purgatory/sha256.h b/include/linux/sha256.h
index 2867d9825a57..244fe01a65fb 100644
--- a/arch/x86/purgatory/sha256.h
+++ b/include/linux/sha256.h
@@ -13,9 +13,18 @@
 #include <linux/types.h>
 #include <crypto/sha.h>
 
+/*
+ * Stand-alone implementation of the SHA256 algorithm. It is designed to
+ * have as little dependencies as possible so it can be used in the
+ * kexec_file purgatory. In other cases you should use the implementation in
+ * crypto/.
+ *
+ * For details see lib/sha256.c
+ */
+
 extern int sha256_init(struct sha256_state *sctx);
 extern int sha256_update(struct sha256_state *sctx, const u8 *input,
-				unsigned int length);
+			 unsigned int length);
 extern int sha256_final(struct sha256_state *sctx, u8 *hash);
 
 #endif /* SHA256_H */
diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h
index bd8e0864b059..5b98bbdabc25 100644
--- a/include/linux/soc/qcom/mdt_loader.h
+++ b/include/linux/soc/qcom/mdt_loader.h
@@ -14,6 +14,7 @@ struct firmware;
 ssize_t qcom_mdt_get_size(const struct firmware *fw);
 int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 		  const char *fw_name, int pas_id, void *mem_region,
-		  phys_addr_t mem_phys, size_t mem_size);
+		  phys_addr_t mem_phys, size_t mem_size,
+		  phys_addr_t *reloc_base);
 
 #endif
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ed761f751ecb..9b11b6a0978c 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -217,5 +217,12 @@ void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *);
 bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
 			const struct sockaddr *sap);
 void rpc_cleanup_clids(void);
+
+static inline int rpc_reply_expected(struct rpc_task *task)
+{
+	return (task->tk_msg.rpc_proc != NULL) &&
+		(task->tk_msg.rpc_proc->p_decode != NULL);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index d950223c64b1..2bd68177a442 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -253,6 +253,12 @@ xdr_stream_remaining(const struct xdr_stream *xdr)
 	return xdr->nwords << 2;
 }
 
+ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr,
+		size_t size);
+ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
+		size_t maxlen, gfp_t gfp_flags);
+ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str,
+		size_t size);
 ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
 		size_t maxlen, gfp_t gfp_flags);
 /**
@@ -313,6 +319,31 @@ xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n)
 }
 
 /**
+ * xdr_stream_encode_opaque_inline - Encode opaque xdr data
+ * @xdr: pointer to xdr_stream
+ * @ptr: pointer to void pointer
+ * @len: size of object
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len)
+{
+	size_t count = sizeof(__u32) + xdr_align_size(len);
+	__be32 *p = xdr_reserve_space(xdr, count);
+
+	if (unlikely(!p)) {
+		*ptr = NULL;
+		return -EMSGSIZE;
+	}
+	xdr_encode_opaque(p, NULL, len);
+	*ptr = ++p;
+	return count;
+}
+
+/**
  * xdr_stream_encode_opaque_fixed - Encode fixed length opaque xdr data
  * @xdr: pointer to xdr_stream
  * @ptr: pointer to opaque data object
@@ -356,6 +387,31 @@ xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len)
 }
 
 /**
+ * xdr_stream_encode_uint32_array - Encode variable length array of integers
+ * @xdr: pointer to xdr_stream
+ * @array: array of integers
+ * @array_size: number of elements in @array
+ *
+ * Return values:
+ *   On success, returns length in bytes of XDR buffer consumed
+ *   %-EMSGSIZE on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_encode_uint32_array(struct xdr_stream *xdr,
+		const __u32 *array, size_t array_size)
+{
+	ssize_t ret = (array_size+1) * sizeof(__u32);
+	__be32 *p = xdr_reserve_space(xdr, ret);
+
+	if (unlikely(!p))
+		return -EMSGSIZE;
+	*p++ = cpu_to_be32(array_size);
+	for (; array_size > 0; p++, array++, array_size--)
+		*p = cpu_to_be32p(array);
+	return ret;
+}
+
+/**
  * xdr_stream_decode_u32 - Decode a 32-bit integer
  * @xdr: pointer to xdr_stream
  * @ptr: location to store integer
@@ -432,6 +488,44 @@ xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxle
 	}
 	return len;
 }
+
+/**
+ * xdr_stream_decode_uint32_array - Decode variable length array of integers
+ * @xdr: pointer to xdr_stream
+ * @array: location to store the integer array or NULL
+ * @array_size: number of elements to store
+ *
+ * Return values:
+ *   On success, returns number of elements stored in @array
+ *   %-EBADMSG on XDR buffer overflow
+ *   %-EMSGSIZE if the size of the array exceeds @array_size
+ */
+static inline ssize_t
+xdr_stream_decode_uint32_array(struct xdr_stream *xdr,
+		__u32 *array, size_t array_size)
+{
+	__be32 *p;
+	__u32 len;
+	ssize_t retval;
+
+	if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0))
+		return -EBADMSG;
+	p = xdr_inline_decode(xdr, len * sizeof(*p));
+	if (unlikely(!p))
+		return -EBADMSG;
+	if (array == NULL)
+		return len;
+	if (len <= array_size) {
+		if (len < array_size)
+			memset(array+len, 0, (array_size-len)*sizeof(*array));
+		array_size = len;
+		retval = len;
+	} else
+		retval = -EMSGSIZE;
+	for (; array_size > 0; p++, array++, array_size--)
+		*array = be32_to_cpup(p);
+	return retval;
+}
 #endif /* __KERNEL__ */
 
 #endif /* _SUNRPC_XDR_H_ */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 7fad83881ce1..5fea0fb420df 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -197,7 +197,7 @@ struct rpc_xprt {
 	struct list_head	free;		/* free slots */
 	unsigned int		max_reqs;	/* max number of slots */
 	unsigned int		min_reqs;	/* min number of slots */
-	atomic_t		num_reqs;	/* total slots */
+	unsigned int		num_reqs;	/* total slots */
 	unsigned long		state;		/* transport state */
 	unsigned char		resvport   : 1; /* use a reserved port */
 	atomic_t		swapper;	/* we're swapping over this
@@ -373,6 +373,7 @@ void			xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
 void			xprt_write_space(struct rpc_xprt *xprt);
 void			xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
 struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
+void			xprt_update_rtt(struct rpc_task *task);
 void			xprt_complete_rqst(struct rpc_task *task, int copied);
 void			xprt_pin_rqst(struct rpc_rqst *req);
 void			xprt_unpin_rqst(struct rpc_rqst *req);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b961184f597a..70fcda1a9049 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -81,6 +81,17 @@ union bpf_attr;
 #include <linux/key.h>
 #include <trace/syscall.h>
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+/*
+ * It may be useful for an architecture to override the definitions of the
+ * SYSCALL_DEFINE0() and __SYSCALL_DEFINEx() macros, in particular to use a
+ * different calling convention for syscalls. To allow for that, the prototypes
+ * for the sys_*() functions below will *not* be included if
+ * CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled.
+ */
+#include <asm/syscall_wrapper.h>
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 /*
  * __MAP - apply a macro to syscall arguments
  * __MAP(n, m, t1, a1, t2, a2, ..., tn, an) will expand to
@@ -91,7 +102,7 @@ union bpf_attr;
  * for SYSCALL_DEFINE<n>/COMPAT_SYSCALL_DEFINE<n>
  */
 #define __MAP0(m,...)
-#define __MAP1(m,t,a) m(t,a)
+#define __MAP1(m,t,a,...) m(t,a)
 #define __MAP2(m,t,a,...) m(t,a), __MAP1(m,__VA_ARGS__)
 #define __MAP3(m,t,a,...) m(t,a), __MAP2(m,__VA_ARGS__)
 #define __MAP4(m,t,a,...) m(t,a), __MAP3(m,__VA_ARGS__)
@@ -189,11 +200,13 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
 }
 #endif
 
+#ifndef SYSCALL_DEFINE0
 #define SYSCALL_DEFINE0(sname)					\
 	SYSCALL_METADATA(_##sname, 0);				\
 	asmlinkage long sys_##sname(void);			\
 	ALLOW_ERROR_INJECTION(sys_##sname, ERRNO);		\
 	asmlinkage long sys_##sname(void)
+#endif /* SYSCALL_DEFINE0 */
 
 #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
 #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
@@ -209,20 +222,28 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
 	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
 
 #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
+
+/*
+ * The asmlinkage stub is aliased to a function named __se_sys_*() which
+ * sign-extends 32-bit ints to longs whenever needed. The actual work is
+ * done within __do_sys_*().
+ */
+#ifndef __SYSCALL_DEFINEx
 #define __SYSCALL_DEFINEx(x, name, ...)					\
 	asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
-		__attribute__((alias(__stringify(SyS##name))));		\
+		__attribute__((alias(__stringify(__se_sys##name))));	\
 	ALLOW_ERROR_INJECTION(sys##name, ERRNO);			\
-	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
-	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
-	asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+	asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
+	asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
 	{								\
-		long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));	\
+		long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
 		__MAP(x,__SC_TEST,__VA_ARGS__);				\
 		__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));	\
 		return ret;						\
 	}								\
-	static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+	static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+#endif /* __SYSCALL_DEFINEx */
 
 /*
  * Called before coming back to user-mode. Returning to user-mode with an
@@ -252,7 +273,12 @@ static inline void addr_limit_user_check(void)
  * Please note that these prototypes here are only provided for information
  * purposes, for static analysis, and for linking from the syscall table.
  * These functions should not be called elsewhere from kernel code.
+ *
+ * As the syscall calling convention may be different from the default
+ * for architectures overriding the syscall calling convention, do not
+ * include the prototypes if CONFIG_ARCH_HAS_SYSCALL_WRAPPER is enabled.
  */
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
 asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx);
 asmlinkage long sys_io_destroy(aio_context_t ctx);
 asmlinkage long sys_io_submit(aio_context_t, long,
@@ -1076,6 +1102,8 @@ asmlinkage long sys_old_mmap(struct mmap_arg_struct __user *arg);
  */
 asmlinkage long sys_ni_syscall(void);
 
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 
 /*
  * Kernel code should not call syscalls (i.e., sys_xyzyyz()) directly.
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 8c5302374eaa..7834be668d80 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -148,6 +148,7 @@ struct thermal_cooling_device {
 	struct device device;
 	struct device_node *np;
 	void *devdata;
+	void *stats;
 	const struct thermal_cooling_device_ops *ops;
 	bool updated; /* true if the cooling device does not need update */
 	struct mutex lock; /* protect thermal_instances list */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7f8c9a127f5a..55388ab45fd4 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -115,27 +115,46 @@ enum tick_dep_bits {
 extern bool tick_nohz_enabled;
 extern bool tick_nohz_tick_stopped(void);
 extern bool tick_nohz_tick_stopped_cpu(int cpu);
+extern void tick_nohz_idle_stop_tick(void);
+extern void tick_nohz_idle_retain_tick(void);
+extern void tick_nohz_idle_restart_tick(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
-extern ktime_t tick_nohz_get_sleep_length(void);
+extern bool tick_nohz_idle_got_tick(void);
+extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
 extern unsigned long tick_nohz_get_idle_calls(void);
 extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
+
+static inline void tick_nohz_idle_stop_tick_protected(void)
+{
+	local_irq_disable();
+	tick_nohz_idle_stop_tick();
+	local_irq_enable();
+}
+
 #else /* !CONFIG_NO_HZ_COMMON */
 #define tick_nohz_enabled (0)
 static inline int tick_nohz_tick_stopped(void) { return 0; }
 static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
+static inline void tick_nohz_idle_stop_tick(void) { }
+static inline void tick_nohz_idle_retain_tick(void) { }
+static inline void tick_nohz_idle_restart_tick(void) { }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
+static inline bool tick_nohz_idle_got_tick(void) { return false; }
 
-static inline ktime_t tick_nohz_get_sleep_length(void)
+static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 {
-	return NSEC_PER_SEC / HZ;
+	*delta_next = TICK_NSEC;
+	return *delta_next;
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
+
+static inline void tick_nohz_idle_stop_tick_protected(void) { }
 #endif /* !CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 82c219dfd3bb..9737fbec7019 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -31,6 +31,7 @@ struct timespec64 get_monotonic_coarse64(void);
 extern void getrawmonotonic64(struct timespec64 *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 extern time64_t ktime_get_seconds(void);
+extern time64_t __ktime_get_real_seconds(void);
 extern time64_t ktime_get_real_seconds(void);
 extern void ktime_get_active_ts64(struct timespec64 *ts);
 
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index bcdd3790e94d..06639fb6ab85 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -44,7 +44,7 @@ struct tpm_class_ops {
 	bool (*update_timeouts)(struct tpm_chip *chip,
 				unsigned long *timeout_cap);
 	int (*request_locality)(struct tpm_chip *chip, int loc);
-	void (*relinquish_locality)(struct tpm_chip *chip, int loc);
+	int (*relinquish_locality)(struct tpm_chip *chip, int loc);
 	void (*clk_enable)(struct tpm_chip *chip, bool value);
 };
 
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index e0e98000b665..2bde3eff564c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -430,11 +430,13 @@ enum event_trigger_type {
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
 
-extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
-						   void *rec);
-extern void event_triggers_post_call(struct trace_event_file *file,
-				     enum event_trigger_type tt,
-				     void *rec);
+extern enum event_trigger_type
+event_triggers_call(struct trace_event_file *file, void *rec,
+		    struct ring_buffer_event *event);
+extern void
+event_triggers_post_call(struct trace_event_file *file,
+			 enum event_trigger_type tt,
+			 void *rec, struct ring_buffer_event *event);
 
 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 
@@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
 
 	if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
 		if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
-			event_triggers_call(file, NULL);
+			event_triggers_call(file, NULL, NULL);
 		if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 			return true;
 		if (eflags & EVENT_FILE_FL_PID_FILTER)
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index c8060c2ecd04..44429d9142ca 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -44,6 +44,8 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 {
 	kref_put(&ns->kref, free_uts_ns);
 }
+
+void uts_ns_init(void);
 #else
 static inline void get_uts_ns(struct uts_namespace *ns)
 {
@@ -61,6 +63,10 @@ static inline struct uts_namespace *copy_utsname(unsigned long flags,
 
 	return old_ns;
 }
+
+static inline void uts_ns_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index a4c2317d8b9f..f25cef84b41d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -20,6 +20,17 @@ extern int sysctl_vm_numa_stat_handler(struct ctl_table *table,
 		int write, void __user *buffer, size_t *length, loff_t *ppos);
 #endif
 
+struct reclaim_stat {
+	unsigned nr_dirty;
+	unsigned nr_unqueued_dirty;
+	unsigned nr_congested;
+	unsigned nr_writeback;
+	unsigned nr_immediate;
+	unsigned nr_activate;
+	unsigned nr_ref_keep;
+	unsigned nr_unmap_fail;
+};
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 /*
  * Light weight per cpu counter implementation.
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
new file mode 100644
index 000000000000..2dfc8006fe64
--- /dev/null
+++ b/include/linux/xarray.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _LINUX_XARRAY_H
+#define _LINUX_XARRAY_H
+/*
+ * eXtensible Arrays
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ */
+
+#include <linux/spinlock.h>
+
+#define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
+#define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
+#define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
+#define xa_lock_bh(xa)		spin_lock_bh(&(xa)->xa_lock)
+#define xa_unlock_bh(xa)	spin_unlock_bh(&(xa)->xa_lock)
+#define xa_lock_irq(xa)		spin_lock_irq(&(xa)->xa_lock)
+#define xa_unlock_irq(xa)	spin_unlock_irq(&(xa)->xa_lock)
+#define xa_lock_irqsave(xa, flags) \
+				spin_lock_irqsave(&(xa)->xa_lock, flags)
+#define xa_unlock_irqrestore(xa, flags) \
+				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
+
+#endif /* _LINUX_XARRAY_H */
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 54b689247937..160bca96d524 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -320,6 +320,7 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
  *	set of resolutions contained in an array of a driver specific struct.
  *
  * @array: a driver specific array of image sizes
+ * @array_size: the length of the driver specific array of image sizes
  * @width_field: the name of the width field in the driver specific struct
  * @height_field: the name of the height field in the driver specific struct
  * @width: desired width.
@@ -332,13 +333,13 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
  *
  * Returns the best match or NULL if the length of the array is zero.
  */
-#define v4l2_find_nearest_size(array, width_field, height_field, \
+#define v4l2_find_nearest_size(array, array_size, width_field, height_field, \
 			       width, height)				\
 	({								\
 		BUILD_BUG_ON(sizeof((array)->width_field) != sizeof(u32) || \
 			     sizeof((array)->height_field) != sizeof(u32)); \
 		(typeof(&(*(array))))__v4l2_find_nearest_size(		\
-			(array), ARRAY_SIZE(array), sizeof(*(array)),	\
+			(array), array_size, sizeof(*(array)),		\
 			offsetof(typeof(*(array)), width_field),	\
 			offsetof(typeof(*(array)), height_field),	\
 			width, height);					\
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 27634e8d2585..f60cf9cf3b9c 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -33,13 +33,13 @@
  */
 enum vfl_devnode_type {
 	VFL_TYPE_GRABBER	= 0,
-	VFL_TYPE_VBI		= 1,
-	VFL_TYPE_RADIO		= 2,
-	VFL_TYPE_SUBDEV		= 3,
-	VFL_TYPE_SDR		= 4,
-	VFL_TYPE_TOUCH		= 5,
+	VFL_TYPE_VBI,
+	VFL_TYPE_RADIO,
+	VFL_TYPE_SUBDEV,
+	VFL_TYPE_SDR,
+	VFL_TYPE_TOUCH,
+	VFL_TYPE_MAX /* Shall be the last one */
 };
-#define VFL_TYPE_MAX VFL_TYPE_TOUCH
 
 /**
  * enum  vfl_direction - Identifies if a &struct video_device corresponds
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 95ccc1eef558..b619a190ff12 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -895,7 +895,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
 				     u16 conn_timeout);
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 				u8 dst_type, u8 sec_level, u16 conn_timeout,
-				u8 role);
+				u8 role, bdaddr_t *direct_rpa);
 struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
 				 u8 sec_level, u8 auth_type);
 struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index e21d8cadd480..2e4f71e16e95 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -232,14 +232,6 @@ struct devlink_dpipe_headers {
 };
 
 /**
- * struct devlink_resource_ops - resource ops
- * @occ_get: get the occupied size
- */
-struct devlink_resource_ops {
-	u64 (*occ_get)(struct devlink *devlink);
-};
-
-/**
  * struct devlink_resource_size_params - resource's size parameters
  * @size_min: minimum size which can be set
  * @size_max: maximum size which can be set
@@ -265,6 +257,8 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para
 	size_params->unit = unit;
 }
 
+typedef u64 devlink_resource_occ_get_t(void *priv);
+
 /**
  * struct devlink_resource - devlink resource
  * @name: name of the resource
@@ -277,7 +271,6 @@ devlink_resource_size_params_init(struct devlink_resource_size_params *size_para
  * @size_params: size parameters
  * @list: parent list
  * @resource_list: list of child resources
- * @resource_ops: resource ops
  */
 struct devlink_resource {
 	const char *name;
@@ -289,7 +282,8 @@ struct devlink_resource {
 	struct devlink_resource_size_params size_params;
 	struct list_head list;
 	struct list_head resource_list;
-	const struct devlink_resource_ops *resource_ops;
+	devlink_resource_occ_get_t *occ_get;
+	void *occ_get_priv;
 };
 
 #define DEVLINK_RESOURCE_ID_PARENT_TOP 0
@@ -409,8 +403,7 @@ int devlink_resource_register(struct devlink *devlink,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
-			      const struct devlink_resource_size_params *size_params,
-			      const struct devlink_resource_ops *resource_ops);
+			      const struct devlink_resource_size_params *size_params);
 void devlink_resources_unregister(struct devlink *devlink,
 				  struct devlink_resource *resource);
 int devlink_resource_size_get(struct devlink *devlink,
@@ -419,6 +412,12 @@ int devlink_resource_size_get(struct devlink *devlink,
 int devlink_dpipe_table_resource_set(struct devlink *devlink,
 				     const char *table_name, u64 resource_id,
 				     u64 resource_units);
+void devlink_resource_occ_get_register(struct devlink *devlink,
+				       u64 resource_id,
+				       devlink_resource_occ_get_t *occ_get,
+				       void *occ_get_priv);
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+					 u64 resource_id);
 
 #else
 
@@ -562,8 +561,7 @@ devlink_resource_register(struct devlink *devlink,
 			  u64 resource_size,
 			  u64 resource_id,
 			  u64 parent_resource_id,
-			  const struct devlink_resource_size_params *size_params,
-			  const struct devlink_resource_ops *resource_ops)
+			  const struct devlink_resource_size_params *size_params)
 {
 	return 0;
 }
@@ -589,6 +587,20 @@ devlink_dpipe_table_resource_set(struct devlink *devlink,
 	return -EOPNOTSUPP;
 }
 
+static inline void
+devlink_resource_occ_get_register(struct devlink *devlink,
+				  u64 resource_id,
+				  devlink_resource_occ_get_t *occ_get,
+				  void *occ_get_priv)
+{
+}
+
+static inline void
+devlink_resource_occ_get_unregister(struct devlink *devlink,
+				    u64 resource_id)
+{
+}
+
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 899495589a7e..c7be1ca8e562 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -43,6 +43,7 @@ struct inet_timewait_sock {
 #define tw_family		__tw_common.skc_family
 #define tw_state		__tw_common.skc_state
 #define tw_reuse		__tw_common.skc_reuse
+#define tw_reuseport		__tw_common.skc_reuseport
 #define tw_ipv6only		__tw_common.skc_ipv6only
 #define tw_bound_dev_if		__tw_common.skc_bound_dev_if
 #define tw_node			__tw_common.skc_nulls_node
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 36bb794f5cd6..902ff382a6dc 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -7,7 +7,7 @@
 
 static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining)
 {
-	return remaining >= sizeof(*rtnh) &&
+	return remaining >= (int)sizeof(*rtnh) &&
 	       rtnh->rtnh_len >= sizeof(*rtnh) &&
 	       rtnh->rtnh_len <= remaining;
 }
diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h
index 8716d5942b65..8fcf8908a694 100644
--- a/include/net/slhc_vj.h
+++ b/include/net/slhc_vj.h
@@ -127,6 +127,7 @@ typedef __u32 int32;
  */
 struct cstate {
 	byte_t	cs_this;	/* connection id number (xmit) */
+	bool	initialized;	/* true if initialized */
 	struct cstate *next;	/* next in ring (xmit) */
 	struct iphdr cs_ip;	/* ip/tcp hdr from most recent packet */
 	struct tcphdr cs_tcp;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 63815f66b274..f0820554caa9 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -49,6 +49,7 @@ enum afs_fs_operation {
 	afs_FS_ExtendLock		= 157,	/* AFS Extend a file lock */
 	afs_FS_ReleaseLock		= 158,	/* AFS Release a file lock */
 	afs_FS_Lookup			= 161,	/* AFS lookup file in directory */
+	afs_FS_InlineBulkStatus		= 65536, /* AFS Fetch multiple file statuses with errors */
 	afs_FS_FetchData64		= 65537, /* AFS Fetch file data */
 	afs_FS_StoreData64		= 65538, /* AFS Store file data */
 	afs_FS_GiveUpAllCallBacks	= 65539, /* AFS Give up all our callbacks on a server */
@@ -62,6 +63,27 @@ enum afs_vl_operation {
 	afs_VL_GetCapabilities	= 65537,	/* AFS Get VL server capabilities */
 };
 
+enum afs_edit_dir_op {
+	afs_edit_dir_create,
+	afs_edit_dir_create_error,
+	afs_edit_dir_create_inval,
+	afs_edit_dir_create_nospc,
+	afs_edit_dir_delete,
+	afs_edit_dir_delete_error,
+	afs_edit_dir_delete_inval,
+	afs_edit_dir_delete_noent,
+};
+
+enum afs_edit_dir_reason {
+	afs_edit_dir_for_create,
+	afs_edit_dir_for_link,
+	afs_edit_dir_for_mkdir,
+	afs_edit_dir_for_rename,
+	afs_edit_dir_for_rmdir,
+	afs_edit_dir_for_symlink,
+	afs_edit_dir_for_unlink,
+};
+
 #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
 /*
@@ -93,6 +115,7 @@ enum afs_vl_operation {
 	EM(afs_FS_ExtendLock,			"FS.ExtendLock") \
 	EM(afs_FS_ReleaseLock,			"FS.ReleaseLock") \
 	EM(afs_FS_Lookup,			"FS.Lookup") \
+	EM(afs_FS_InlineBulkStatus,		"FS.InlineBulkStatus") \
 	EM(afs_FS_FetchData64,			"FS.FetchData64") \
 	EM(afs_FS_StoreData64,			"FS.StoreData64") \
 	EM(afs_FS_GiveUpAllCallBacks,		"FS.GiveUpAllCallBacks") \
@@ -104,6 +127,25 @@ enum afs_vl_operation {
 	EM(afs_YFSVL_GetEndpoints,		"YFSVL.GetEndpoints") \
 	E_(afs_VL_GetCapabilities,		"VL.GetCapabilities")
 
+#define afs_edit_dir_ops				  \
+	EM(afs_edit_dir_create,			"create") \
+	EM(afs_edit_dir_create_error,		"c_fail") \
+	EM(afs_edit_dir_create_inval,		"c_invl") \
+	EM(afs_edit_dir_create_nospc,		"c_nspc") \
+	EM(afs_edit_dir_delete,			"delete") \
+	EM(afs_edit_dir_delete_error,		"d_err ") \
+	EM(afs_edit_dir_delete_inval,		"d_invl") \
+	E_(afs_edit_dir_delete_noent,		"d_nent")
+
+#define afs_edit_dir_reasons				  \
+	EM(afs_edit_dir_for_create,		"Create") \
+	EM(afs_edit_dir_for_link,		"Link  ") \
+	EM(afs_edit_dir_for_mkdir,		"MkDir ") \
+	EM(afs_edit_dir_for_rename,		"Rename") \
+	EM(afs_edit_dir_for_rmdir,		"RmDir ") \
+	EM(afs_edit_dir_for_symlink,		"Symlnk") \
+	E_(afs_edit_dir_for_unlink,		"Unlink")
+
 
 /*
  * Export enum symbols via userspace.
@@ -116,6 +158,8 @@ enum afs_vl_operation {
 afs_call_traces;
 afs_fs_operations;
 afs_vl_operations;
+afs_edit_dir_ops;
+afs_edit_dir_reasons;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -462,6 +506,75 @@ TRACE_EVENT(afs_call_state,
 		      __entry->ret, __entry->abort)
 	    );
 
+TRACE_EVENT(afs_edit_dir,
+	    TP_PROTO(struct afs_vnode *dvnode,
+		     enum afs_edit_dir_reason why,
+		     enum afs_edit_dir_op op,
+		     unsigned int block,
+		     unsigned int slot,
+		     unsigned int f_vnode,
+		     unsigned int f_unique,
+		     const char *name),
+
+	    TP_ARGS(dvnode, why, op, block, slot, f_vnode, f_unique, name),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,		vnode		)
+		    __field(unsigned int,		unique		)
+		    __field(enum afs_edit_dir_reason,	why		)
+		    __field(enum afs_edit_dir_op,	op		)
+		    __field(unsigned int,		block		)
+		    __field(unsigned short,		slot		)
+		    __field(unsigned int,		f_vnode		)
+		    __field(unsigned int,		f_unique	)
+		    __array(char,			name, 18	)
+			     ),
+
+	    TP_fast_assign(
+		    int __len = strlen(name);
+		    __len = min(__len, 17);
+		    __entry->vnode	= dvnode->fid.vnode;
+		    __entry->unique	= dvnode->fid.unique;
+		    __entry->why	= why;
+		    __entry->op		= op;
+		    __entry->block	= block;
+		    __entry->slot	= slot;
+		    __entry->f_vnode	= f_vnode;
+		    __entry->f_unique	= f_unique;
+		    memcpy(__entry->name, name, __len);
+		    __entry->name[__len] = 0;
+			   ),
+
+	    TP_printk("d=%x:%x %s %s %u[%u] f=%x:%x %s",
+		      __entry->vnode, __entry->unique,
+		      __print_symbolic(__entry->why, afs_edit_dir_reasons),
+		      __print_symbolic(__entry->op, afs_edit_dir_ops),
+		      __entry->block, __entry->slot,
+		      __entry->f_vnode, __entry->f_unique,
+		      __entry->name)
+	    );
+
+TRACE_EVENT(afs_protocol_error,
+	    TP_PROTO(struct afs_call *call, int error, const void *where),
+
+	    TP_ARGS(call, error, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,	call		)
+		    __field(int,		error		)
+		    __field(const void *,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call ? call->debug_id : 0;
+		    __entry->error = error;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("c=%08x r=%d sp=%pSR",
+		      __entry->call, __entry->error, __entry->where)
+	    );
+
 #endif /* _TRACE_AFS_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
new file mode 100644
index 000000000000..aa86e7dba511
--- /dev/null
+++ b/include/trace/events/cachefiles.h
@@ -0,0 +1,325 @@
+/* CacheFiles tracepoints
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cachefiles
+
+#if !defined(_TRACE_CACHEFILES_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CACHEFILES_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Define enums for tracing information.
+ */
+#ifndef __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __CACHEFILES_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+enum cachefiles_obj_ref_trace {
+	cachefiles_obj_put_wait_retry = fscache_obj_ref__nr_traces,
+	cachefiles_obj_put_wait_timeo,
+	cachefiles_obj_ref__nr_traces
+};
+
+#endif
+
+/*
+ * Define enum -> string mappings for display.
+ */
+#define cachefiles_obj_kill_traces				\
+	EM(FSCACHE_OBJECT_IS_STALE,	"stale")		\
+	EM(FSCACHE_OBJECT_NO_SPACE,	"no_space")		\
+	EM(FSCACHE_OBJECT_WAS_RETIRED,	"was_retired")		\
+	E_(FSCACHE_OBJECT_WAS_CULLED,	"was_culled")
+
+#define cachefiles_obj_ref_traces					\
+	EM(fscache_obj_get_add_to_deps,		"GET add_to_deps")	\
+	EM(fscache_obj_get_queue,		"GET queue")		\
+	EM(fscache_obj_put_alloc_fail,		"PUT alloc_fail")	\
+	EM(fscache_obj_put_attach_fail,		"PUT attach_fail")	\
+	EM(fscache_obj_put_drop_obj,		"PUT drop_obj")		\
+	EM(fscache_obj_put_enq_dep,		"PUT enq_dep")		\
+	EM(fscache_obj_put_queue,		"PUT queue")		\
+	EM(fscache_obj_put_work,		"PUT work")		\
+	EM(cachefiles_obj_put_wait_retry,	"PUT wait_retry")	\
+	E_(cachefiles_obj_put_wait_timeo,	"PUT wait_timeo")
+
+/*
+ * Export enum symbols via userspace.
+ */
+#undef EM
+#undef E_
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+cachefiles_obj_kill_traces;
+cachefiles_obj_ref_traces;
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef E_
+#define EM(a, b)	{ a, b },
+#define E_(a, b)	{ a, b }
+
+
+TRACE_EVENT(cachefiles_ref,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct fscache_cookie *cookie,
+		     enum cachefiles_obj_ref_trace why,
+		     int usage),
+
+	    TP_ARGS(obj, cookie, why, usage),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,		obj		)
+		    __field(struct fscache_cookie *,		cookie		)
+		    __field(enum cachefiles_obj_ref_trace,	why		)
+		    __field(int,				usage		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->cookie	= cookie;
+		    __entry->usage	= usage;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("c=%p o=%p u=%d %s",
+		      __entry->cookie, __entry->obj, __entry->usage,
+		      __print_symbolic(__entry->why, cachefiles_obj_ref_traces))
+	    );
+
+TRACE_EVENT(cachefiles_lookup,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     struct inode *inode),
+
+	    TP_ARGS(obj, de, inode),
+
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj	)
+		    __field(struct dentry *,		de	)
+		    __field(struct inode *,		inode	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->inode	= inode;
+			   ),
+
+	    TP_printk("o=%p d=%p i=%p",
+		      __entry->obj, __entry->de, __entry->inode)
+	    );
+
+TRACE_EVENT(cachefiles_mkdir,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de, int ret),
+
+	    TP_ARGS(obj, de, ret),
+
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj	)
+		    __field(struct dentry *,		de	)
+		    __field(int,			ret	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->ret	= ret;
+			   ),
+
+	    TP_printk("o=%p d=%p r=%u",
+		      __entry->obj, __entry->de, __entry->ret)
+	    );
+
+TRACE_EVENT(cachefiles_create,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de, int ret),
+
+	    TP_ARGS(obj, de, ret),
+
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj	)
+		    __field(struct dentry *,		de	)
+		    __field(int,			ret	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->ret	= ret;
+			   ),
+
+	    TP_printk("o=%p d=%p r=%u",
+		      __entry->obj, __entry->de, __entry->ret)
+	    );
+
+TRACE_EVENT(cachefiles_unlink,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     enum fscache_why_object_killed why),
+
+	    TP_ARGS(obj, de, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj		)
+		    __field(struct dentry *,		de		)
+		    __field(enum fscache_why_object_killed, why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("o=%p d=%p w=%s",
+		      __entry->obj, __entry->de,
+		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
+	    );
+
+TRACE_EVENT(cachefiles_rename,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     struct dentry *to,
+		     enum fscache_why_object_killed why),
+
+	    TP_ARGS(obj, de, to, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj		)
+		    __field(struct dentry *,		de		)
+		    __field(struct dentry *,		to		)
+		    __field(enum fscache_why_object_killed, why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->to		= to;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("o=%p d=%p t=%p w=%s",
+		      __entry->obj, __entry->de, __entry->to,
+		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
+	    );
+
+TRACE_EVENT(cachefiles_mark_active,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de),
+
+	    TP_ARGS(obj, de),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj		)
+		    __field(struct dentry *,		de		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+			   ),
+
+	    TP_printk("o=%p d=%p",
+		      __entry->obj, __entry->de)
+	    );
+
+TRACE_EVENT(cachefiles_wait_active,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     struct cachefiles_object *xobj),
+
+	    TP_ARGS(obj, de, xobj),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj		)
+		    __field(struct dentry *,		de		)
+		    __field(struct cachefiles_object *,	xobj		)
+		    __field(u16,			flags		)
+		    __field(u16,			fsc_flags	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->xobj	= xobj;
+		    __entry->flags	= xobj->flags;
+		    __entry->fsc_flags	= xobj->fscache.flags;
+			   ),
+
+	    TP_printk("o=%p d=%p wo=%p wf=%x wff=%x",
+		      __entry->obj, __entry->de, __entry->xobj,
+		      __entry->flags, __entry->fsc_flags)
+	    );
+
+TRACE_EVENT(cachefiles_mark_inactive,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     struct inode *inode),
+
+	    TP_ARGS(obj, de, inode),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj		)
+		    __field(struct dentry *,		de		)
+		    __field(struct inode *,		inode		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->inode	= inode;
+			   ),
+
+	    TP_printk("o=%p d=%p i=%p",
+		      __entry->obj, __entry->de, __entry->inode)
+	    );
+
+TRACE_EVENT(cachefiles_mark_buried,
+	    TP_PROTO(struct cachefiles_object *obj,
+		     struct dentry *de,
+		     enum fscache_why_object_killed why),
+
+	    TP_ARGS(obj, de, why),
+
+	    /* Note that obj may be NULL */
+	    TP_STRUCT__entry(
+		    __field(struct cachefiles_object *,	obj		)
+		    __field(struct dentry *,		de		)
+		    __field(enum fscache_why_object_killed, why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->obj	= obj;
+		    __entry->de		= de;
+		    __entry->why	= why;
+			   ),
+
+	    TP_printk("o=%p d=%p w=%s",
+		      __entry->obj, __entry->de,
+		      __print_symbolic(__entry->why, cachefiles_obj_kill_traces))
+	    );
+
+#endif /* _TRACE_CACHEFILES_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h
new file mode 100644
index 000000000000..686cfe997ed2
--- /dev/null
+++ b/include/trace/events/fscache.h
@@ -0,0 +1,537 @@
+/* FS-Cache tracepoints
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM fscache
+
+#if !defined(_TRACE_FSCACHE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FSCACHE_H
+
+#include <linux/fscache.h>
+#include <linux/tracepoint.h>
+
+/*
+ * Define enums for tracing information.
+ */
+#ifndef __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __FSCACHE_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+enum fscache_cookie_trace {
+	fscache_cookie_collision,
+	fscache_cookie_discard,
+	fscache_cookie_get_acquire_parent,
+	fscache_cookie_get_attach_object,
+	fscache_cookie_get_reacquire,
+	fscache_cookie_get_register_netfs,
+	fscache_cookie_put_acquire_nobufs,
+	fscache_cookie_put_dup_netfs,
+	fscache_cookie_put_relinquish,
+	fscache_cookie_put_object,
+	fscache_cookie_put_parent,
+};
+
+enum fscache_page_trace {
+	fscache_page_cached,
+	fscache_page_inval,
+	fscache_page_maybe_release,
+	fscache_page_radix_clear_store,
+	fscache_page_radix_delete,
+	fscache_page_radix_insert,
+	fscache_page_radix_pend2store,
+	fscache_page_radix_set_pend,
+	fscache_page_uncache,
+	fscache_page_write,
+	fscache_page_write_end,
+	fscache_page_write_end_pend,
+	fscache_page_write_end_noc,
+	fscache_page_write_wait,
+	fscache_page_trace__nr
+};
+
+enum fscache_op_trace {
+	fscache_op_cancel,
+	fscache_op_cancel_all,
+	fscache_op_cancelled,
+	fscache_op_completed,
+	fscache_op_enqueue_async,
+	fscache_op_enqueue_mythread,
+	fscache_op_gc,
+	fscache_op_init,
+	fscache_op_put,
+	fscache_op_run,
+	fscache_op_signal,
+	fscache_op_submit,
+	fscache_op_submit_ex,
+	fscache_op_work,
+	fscache_op_trace__nr
+};
+
+enum fscache_page_op_trace {
+	fscache_page_op_alloc_one,
+	fscache_page_op_attr_changed,
+	fscache_page_op_check_consistency,
+	fscache_page_op_invalidate,
+	fscache_page_op_retr_multi,
+	fscache_page_op_retr_one,
+	fscache_page_op_write_one,
+	fscache_page_op_trace__nr
+};
+
+#endif
+
+/*
+ * Declare tracing information enums and their string mappings for display.
+ */
+#define fscache_cookie_traces						\
+	EM(fscache_cookie_collision,		"*COLLISION*")		\
+	EM(fscache_cookie_discard,		"DISCARD")		\
+	EM(fscache_cookie_get_acquire_parent,	"GET prn")		\
+	EM(fscache_cookie_get_attach_object,	"GET obj")		\
+	EM(fscache_cookie_get_reacquire,	"GET raq")		\
+	EM(fscache_cookie_get_register_netfs,	"GET net")		\
+	EM(fscache_cookie_put_acquire_nobufs,	"PUT nbf")		\
+	EM(fscache_cookie_put_dup_netfs,	"PUT dnt")		\
+	EM(fscache_cookie_put_relinquish,	"PUT rlq")		\
+	EM(fscache_cookie_put_object,		"PUT obj")		\
+	E_(fscache_cookie_put_parent,		"PUT prn")
+
+#define fscache_page_traces						\
+	EM(fscache_page_cached,			"Cached ")		\
+	EM(fscache_page_inval,			"InvalPg")		\
+	EM(fscache_page_maybe_release,		"MayRels")		\
+	EM(fscache_page_uncache,		"Uncache")		\
+	EM(fscache_page_radix_clear_store,	"RxCStr ")		\
+	EM(fscache_page_radix_delete,		"RxDel  ")		\
+	EM(fscache_page_radix_insert,		"RxIns  ")		\
+	EM(fscache_page_radix_pend2store,	"RxP2S  ")		\
+	EM(fscache_page_radix_set_pend,		"RxSPend ")		\
+	EM(fscache_page_write,			"WritePg")		\
+	EM(fscache_page_write_end,		"EndPgWr")		\
+	EM(fscache_page_write_end_pend,		"EndPgWP")		\
+	EM(fscache_page_write_end_noc,		"EndPgNC")		\
+	E_(fscache_page_write_wait,		"WtOnWrt")
+
+#define fscache_op_traces						\
+	EM(fscache_op_cancel,			"Cancel1")		\
+	EM(fscache_op_cancel_all,		"CancelA")		\
+	EM(fscache_op_cancelled,		"Canclld")		\
+	EM(fscache_op_completed,		"Complet")		\
+	EM(fscache_op_enqueue_async,		"EnqAsyn")		\
+	EM(fscache_op_enqueue_mythread,		"EnqMyTh")		\
+	EM(fscache_op_gc,			"GC     ")		\
+	EM(fscache_op_init,			"Init   ")		\
+	EM(fscache_op_put,			"Put    ")		\
+	EM(fscache_op_run,			"Run    ")		\
+	EM(fscache_op_signal,			"Signal ")		\
+	EM(fscache_op_submit,			"Submit ")		\
+	EM(fscache_op_submit_ex,		"SubmitX")		\
+	E_(fscache_op_work,			"Work   ")
+
+#define fscache_page_op_traces						\
+	EM(fscache_page_op_alloc_one,		"Alloc1 ")		\
+	EM(fscache_page_op_attr_changed,	"AttrChg")		\
+	EM(fscache_page_op_check_consistency,	"CheckCn")		\
+	EM(fscache_page_op_invalidate,		"Inval  ")		\
+	EM(fscache_page_op_retr_multi,		"RetrMul")		\
+	EM(fscache_page_op_retr_one,		"Retr1  ")		\
+	E_(fscache_page_op_write_one,		"Write1 ")
+
+/*
+ * Export enum symbols via userspace.
+ */
+#undef EM
+#undef E_
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define E_(a, b) TRACE_DEFINE_ENUM(a);
+
+fscache_cookie_traces;
+
+/*
+ * Now redefine the EM() and E_() macros to map the enums to the strings that
+ * will be printed in the output.
+ */
+#undef EM
+#undef E_
+#define EM(a, b)	{ a, b },
+#define E_(a, b)	{ a, b }
+
+
+TRACE_EVENT(fscache_cookie,
+	    TP_PROTO(struct fscache_cookie *cookie,
+		     enum fscache_cookie_trace where,
+		     int usage),
+
+	    TP_ARGS(cookie, where, usage),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(struct fscache_cookie *,	parent		)
+		    __field(enum fscache_cookie_trace,	where		)
+		    __field(int,			usage		)
+		    __field(int,			n_children	)
+		    __field(int,			n_active	)
+		    __field(u8,				flags		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie;
+		    __entry->parent	= cookie->parent;
+		    __entry->where	= where;
+		    __entry->usage	= usage;
+		    __entry->n_children	= atomic_read(&cookie->n_children);
+		    __entry->n_active	= atomic_read(&cookie->n_active);
+		    __entry->flags	= cookie->flags;
+			   ),
+
+	    TP_printk("%s c=%p u=%d p=%p Nc=%d Na=%d f=%02x",
+		      __print_symbolic(__entry->where, fscache_cookie_traces),
+		      __entry->cookie, __entry->usage,
+		      __entry->parent, __entry->n_children, __entry->n_active,
+		      __entry->flags)
+	    );
+
+TRACE_EVENT(fscache_netfs,
+	    TP_PROTO(struct fscache_netfs *netfs),
+
+	    TP_ARGS(netfs),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __array(char,			name, 8		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= netfs->primary_index;
+		    strncpy(__entry->name, netfs->name, 8);
+		    __entry->name[7]		= 0;
+			   ),
+
+	    TP_printk("c=%p n=%s",
+		      __entry->cookie, __entry->name)
+	    );
+
+TRACE_EVENT(fscache_acquire,
+	    TP_PROTO(struct fscache_cookie *cookie),
+
+	    TP_ARGS(cookie),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(struct fscache_cookie *,	parent		)
+		    __array(char,			name, 8		)
+		    __field(int,			p_usage		)
+		    __field(int,			p_n_children	)
+		    __field(u8,				p_flags		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+		    __entry->parent		= cookie->parent;
+		    __entry->p_usage		= atomic_read(&cookie->parent->usage);
+		    __entry->p_n_children	= atomic_read(&cookie->parent->n_children);
+		    __entry->p_flags		= cookie->parent->flags;
+		    memcpy(__entry->name, cookie->def->name, 8);
+		    __entry->name[7]		= 0;
+			   ),
+
+	    TP_printk("c=%p p=%p pu=%d pc=%d pf=%02x n=%s",
+		      __entry->cookie, __entry->parent, __entry->p_usage,
+		      __entry->p_n_children, __entry->p_flags, __entry->name)
+	    );
+
+TRACE_EVENT(fscache_relinquish,
+	    TP_PROTO(struct fscache_cookie *cookie, bool retire),
+
+	    TP_ARGS(cookie, retire),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(struct fscache_cookie *,	parent		)
+		    __field(int,			usage		)
+		    __field(int,			n_children	)
+		    __field(int,			n_active	)
+		    __field(u8,				flags		)
+		    __field(bool,			retire		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie;
+		    __entry->parent	= cookie->parent;
+		    __entry->usage	= atomic_read(&cookie->usage);
+		    __entry->n_children	= atomic_read(&cookie->n_children);
+		    __entry->n_active	= atomic_read(&cookie->n_active);
+		    __entry->flags	= cookie->flags;
+		    __entry->retire	= retire;
+			   ),
+
+	    TP_printk("c=%p u=%d p=%p Nc=%d Na=%d f=%02x r=%u",
+		      __entry->cookie, __entry->usage,
+		      __entry->parent, __entry->n_children, __entry->n_active,
+		      __entry->flags, __entry->retire)
+	    );
+
+TRACE_EVENT(fscache_enable,
+	    TP_PROTO(struct fscache_cookie *cookie),
+
+	    TP_ARGS(cookie),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(int,			usage		)
+		    __field(int,			n_children	)
+		    __field(int,			n_active	)
+		    __field(u8,				flags		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie;
+		    __entry->usage	= atomic_read(&cookie->usage);
+		    __entry->n_children	= atomic_read(&cookie->n_children);
+		    __entry->n_active	= atomic_read(&cookie->n_active);
+		    __entry->flags	= cookie->flags;
+			   ),
+
+	    TP_printk("c=%p u=%d Nc=%d Na=%d f=%02x",
+		      __entry->cookie, __entry->usage,
+		      __entry->n_children, __entry->n_active, __entry->flags)
+	    );
+
+TRACE_EVENT(fscache_disable,
+	    TP_PROTO(struct fscache_cookie *cookie),
+
+	    TP_ARGS(cookie),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(int,			usage		)
+		    __field(int,			n_children	)
+		    __field(int,			n_active	)
+		    __field(u8,				flags		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie	= cookie;
+		    __entry->usage	= atomic_read(&cookie->usage);
+		    __entry->n_children	= atomic_read(&cookie->n_children);
+		    __entry->n_active	= atomic_read(&cookie->n_active);
+		    __entry->flags	= cookie->flags;
+			   ),
+
+	    TP_printk("c=%p u=%d Nc=%d Na=%d f=%02x",
+		      __entry->cookie, __entry->usage,
+		      __entry->n_children, __entry->n_active, __entry->flags)
+	    );
+
+TRACE_EVENT(fscache_osm,
+	    TP_PROTO(struct fscache_object *object,
+		     const struct fscache_state *state,
+		     bool wait, bool oob, s8 event_num),
+
+	    TP_ARGS(object, state, wait, oob, event_num),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(struct fscache_object *,	object		)
+		    __array(char,			state, 8	)
+		    __field(bool,			wait		)
+		    __field(bool,			oob		)
+		    __field(s8,				event_num	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= object->cookie;
+		    __entry->object		= object;
+		    __entry->wait		= wait;
+		    __entry->oob		= oob;
+		    __entry->event_num		= event_num;
+		    memcpy(__entry->state, state->short_name, 8);
+			   ),
+
+	    TP_printk("c=%p o=%p %s %s%sev=%d",
+		      __entry->cookie,
+		      __entry->object,
+		      __entry->state,
+		      __print_symbolic(__entry->wait,
+				       { true,  "WAIT" },
+				       { false, "WORK" }),
+		      __print_symbolic(__entry->oob,
+				       { true,  " OOB " },
+				       { false, " " }),
+		      __entry->event_num)
+	    );
+
+TRACE_EVENT(fscache_page,
+	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
+		     enum fscache_page_trace why),
+
+	    TP_ARGS(cookie, page, why),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(pgoff_t,			page		)
+		    __field(enum fscache_page_trace,	why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+		    __entry->page		= page->index;
+		    __entry->why		= why;
+			   ),
+
+	    TP_printk("c=%p %s pg=%lx",
+		      __entry->cookie,
+		      __print_symbolic(__entry->why, fscache_page_traces),
+		      __entry->page)
+	    );
+
+TRACE_EVENT(fscache_check_page,
+	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
+		     void *val, int n),
+
+	    TP_ARGS(cookie, page, val, n),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(void *,			page		)
+		    __field(void *,			val		)
+		    __field(int,			n		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+		    __entry->page		= page;
+		    __entry->val		= val;
+		    __entry->n			= n;
+			   ),
+
+	    TP_printk("c=%p pg=%p val=%p n=%d",
+		      __entry->cookie, __entry->page, __entry->val, __entry->n)
+	    );
+
+TRACE_EVENT(fscache_wake_cookie,
+	    TP_PROTO(struct fscache_cookie *cookie),
+
+	    TP_ARGS(cookie),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+			   ),
+
+	    TP_printk("c=%p", __entry->cookie)
+	    );
+
+TRACE_EVENT(fscache_op,
+	    TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op,
+		     enum fscache_op_trace why),
+
+	    TP_ARGS(cookie, op, why),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(struct fscache_operation *,	op		)
+		    __field(enum fscache_op_trace,	why		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+		    __entry->op			= op;
+		    __entry->why		= why;
+			   ),
+
+	    TP_printk("c=%p op=%p %s",
+		      __entry->cookie, __entry->op,
+		      __print_symbolic(__entry->why, fscache_op_traces))
+	    );
+
+TRACE_EVENT(fscache_page_op,
+	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
+		     struct fscache_operation *op, enum fscache_page_op_trace what),
+
+	    TP_ARGS(cookie, page, op, what),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(pgoff_t,			page		)
+		    __field(struct fscache_operation *,	op		)
+		    __field(enum fscache_page_op_trace,	what		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+		    __entry->page		= page ? page->index : 0;
+		    __entry->op			= op;
+		    __entry->what		= what;
+			   ),
+
+	    TP_printk("c=%p %s pg=%lx op=%p",
+		      __entry->cookie,
+		      __print_symbolic(__entry->what, fscache_page_op_traces),
+		      __entry->page, __entry->op)
+	    );
+
+TRACE_EVENT(fscache_wrote_page,
+	    TP_PROTO(struct fscache_cookie *cookie, struct page *page,
+		     struct fscache_operation *op, int ret),
+
+	    TP_ARGS(cookie, page, op, ret),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(pgoff_t,			page		)
+		    __field(struct fscache_operation *,	op		)
+		    __field(int,			ret		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+		    __entry->page		= page->index;
+		    __entry->op			= op;
+		    __entry->ret		= ret;
+			   ),
+
+	    TP_printk("c=%p pg=%lx op=%p ret=%d",
+		      __entry->cookie, __entry->page, __entry->op, __entry->ret)
+	    );
+
+TRACE_EVENT(fscache_gang_lookup,
+	    TP_PROTO(struct fscache_cookie *cookie, struct fscache_operation *op,
+		     void **results, int n, pgoff_t store_limit),
+
+	    TP_ARGS(cookie, op, results, n, store_limit),
+
+	    TP_STRUCT__entry(
+		    __field(struct fscache_cookie *,	cookie		)
+		    __field(struct fscache_operation *,	op		)
+		    __field(pgoff_t,			results0	)
+		    __field(int,			n		)
+		    __field(pgoff_t,			store_limit	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->cookie		= cookie;
+		    __entry->op			= op;
+		    __entry->results0		= results[0] ? ((struct page *)results[0])->index : (pgoff_t)-1;
+		    __entry->n			= n;
+		    __entry->store_limit	= store_limit;
+			   ),
+
+	    TP_printk("c=%p op=%p r0=%lx n=%d sl=%lx",
+		      __entry->cookie, __entry->op, __entry->results0, __entry->n,
+		      __entry->store_limit)
+	    );
+
+#endif /* _TRACE_FSCACHE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/initcall.h b/include/trace/events/initcall.h
new file mode 100644
index 000000000000..8d6cf10d27c9
--- /dev/null
+++ b/include/trace/events/initcall.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM initcall
+
+#if !defined(_TRACE_INITCALL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_INITCALL_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(initcall_level,
+
+	TP_PROTO(const char *level),
+
+	TP_ARGS(level),
+
+	TP_STRUCT__entry(
+		__string(level, level)
+	),
+
+	TP_fast_assign(
+		__assign_str(level, level);
+	),
+
+	TP_printk("level=%s", __get_str(level))
+);
+
+TRACE_EVENT(initcall_start,
+
+	TP_PROTO(initcall_t func),
+
+	TP_ARGS(func),
+
+	TP_STRUCT__entry(
+		__field(initcall_t, func)
+	),
+
+	TP_fast_assign(
+		__entry->func = func;
+	),
+
+	TP_printk("func=%pS", __entry->func)
+);
+
+TRACE_EVENT(initcall_finish,
+
+	TP_PROTO(initcall_t func, int ret),
+
+	TP_ARGS(func, ret),
+
+	TP_STRUCT__entry(
+		__field(initcall_t,	func)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->func = func;
+		__entry->ret = ret;
+	),
+
+	TP_printk("func=%pS ret=%d", __entry->func, __entry->ret)
+);
+
+#endif /* if !defined(_TRACE_GPIO_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rtc.h b/include/trace/events/rtc.h
new file mode 100644
index 000000000000..621333f1c890
--- /dev/null
+++ b/include/trace/events/rtc.h
@@ -0,0 +1,206 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rtc
+
+#if !defined(_TRACE_RTC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RTC_H
+
+#include <linux/rtc.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(rtc_time_alarm_class,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err),
+
+	TP_STRUCT__entry(
+		__field(time64_t, secs)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->secs = secs;
+		__entry->err = err;
+	),
+
+	TP_printk("UTC (%lld) (%d)",
+		  __entry->secs, __entry->err
+	)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_set_time,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_read_time,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_set_alarm,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_read_alarm,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+TRACE_EVENT(rtc_irq_set_freq,
+
+	TP_PROTO(int freq, int err),
+
+	TP_ARGS(freq, err),
+
+	TP_STRUCT__entry(
+		__field(int, freq)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->freq = freq;
+		__entry->err = err;
+	),
+
+	TP_printk("set RTC periodic IRQ frequency:%u (%d)",
+		  __entry->freq, __entry->err
+	)
+);
+
+TRACE_EVENT(rtc_irq_set_state,
+
+	TP_PROTO(int enabled, int err),
+
+	TP_ARGS(enabled, err),
+
+	TP_STRUCT__entry(
+		__field(int, enabled)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->enabled = enabled;
+		__entry->err = err;
+	),
+
+	TP_printk("%s RTC 2^N Hz periodic IRQs (%d)",
+		  __entry->enabled ? "enable" : "disable",
+		  __entry->err
+	)
+);
+
+TRACE_EVENT(rtc_alarm_irq_enable,
+
+	TP_PROTO(unsigned int enabled, int err),
+
+	TP_ARGS(enabled, err),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, enabled)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->enabled = enabled;
+		__entry->err = err;
+	),
+
+	TP_printk("%s RTC alarm IRQ (%d)",
+		  __entry->enabled ? "enable" : "disable",
+		  __entry->err
+	)
+);
+
+DECLARE_EVENT_CLASS(rtc_offset_class,
+
+	TP_PROTO(long offset, int err),
+
+	TP_ARGS(offset, err),
+
+	TP_STRUCT__entry(
+		__field(long, offset)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->offset = offset;
+		__entry->err = err;
+	),
+
+	TP_printk("RTC offset: %ld (%d)",
+		  __entry->offset, __entry->err
+	)
+);
+
+DEFINE_EVENT(rtc_offset_class, rtc_set_offset,
+
+	TP_PROTO(long offset, int err),
+
+	TP_ARGS(offset, err)
+);
+
+DEFINE_EVENT(rtc_offset_class, rtc_read_offset,
+
+	TP_PROTO(long offset, int err),
+
+	TP_ARGS(offset, err)
+);
+
+DECLARE_EVENT_CLASS(rtc_timer_class,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field(struct rtc_timer *, timer)
+		__field(ktime_t, expires)
+		__field(ktime_t, period)
+	),
+
+	TP_fast_assign(
+		__entry->timer = timer;
+		__entry->expires = timer->node.expires;
+		__entry->period = timer->period;
+	),
+
+	TP_printk("RTC timer:(%p) expires:%lld period:%lld",
+		  __entry->timer, __entry->expires, __entry->period
+	)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_enqueue,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_dequeue,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_fired,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer)
+);
+
+#endif /* _TRACE_RTC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 922cb8968fb2..335d87242439 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -50,9 +50,9 @@ DEFINE_EVENT(rpc_task_status, rpc_bind_status,
 );
 
 TRACE_EVENT(rpc_connect_status,
-	TP_PROTO(struct rpc_task *task, int status),
+	TP_PROTO(const struct rpc_task *task),
 
-	TP_ARGS(task, status),
+	TP_ARGS(task),
 
 	TP_STRUCT__entry(
 		__field(unsigned int, task_id)
@@ -63,7 +63,7 @@ TRACE_EVENT(rpc_connect_status,
 	TP_fast_assign(
 		__entry->task_id = task->tk_pid;
 		__entry->client_id = task->tk_client->cl_clid;
-		__entry->status = status;
+		__entry->status = task->tk_status;
 	),
 
 	TP_printk("task:%u@%u status=%d",
@@ -103,9 +103,9 @@ TRACE_EVENT(rpc_request,
 
 DECLARE_EVENT_CLASS(rpc_task_running,
 
-	TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+	TP_PROTO(const struct rpc_task *task, const void *action),
 
-	TP_ARGS(clnt, task, action),
+	TP_ARGS(task, action),
 
 	TP_STRUCT__entry(
 		__field(unsigned int, task_id)
@@ -117,7 +117,8 @@ DECLARE_EVENT_CLASS(rpc_task_running,
 		),
 
 	TP_fast_assign(
-		__entry->client_id = clnt ? clnt->cl_clid : -1;
+		__entry->client_id = task->tk_client ?
+				     task->tk_client->cl_clid : -1;
 		__entry->task_id = task->tk_pid;
 		__entry->action = action;
 		__entry->runstate = task->tk_runstate;
@@ -136,33 +137,33 @@ DECLARE_EVENT_CLASS(rpc_task_running,
 
 DEFINE_EVENT(rpc_task_running, rpc_task_begin,
 
-	TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+	TP_PROTO(const struct rpc_task *task, const void *action),
 
-	TP_ARGS(clnt, task, action)
+	TP_ARGS(task, action)
 
 );
 
 DEFINE_EVENT(rpc_task_running, rpc_task_run_action,
 
-	TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+	TP_PROTO(const struct rpc_task *task, const void *action),
 
-	TP_ARGS(clnt, task, action)
+	TP_ARGS(task, action)
 
 );
 
 DEFINE_EVENT(rpc_task_running, rpc_task_complete,
 
-	TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const void *action),
+	TP_PROTO(const struct rpc_task *task, const void *action),
 
-	TP_ARGS(clnt, task, action)
+	TP_ARGS(task, action)
 
 );
 
 DECLARE_EVENT_CLASS(rpc_task_queued,
 
-	TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
+	TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q),
 
-	TP_ARGS(clnt, task, q),
+	TP_ARGS(task, q),
 
 	TP_STRUCT__entry(
 		__field(unsigned int, task_id)
@@ -175,7 +176,8 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
 		),
 
 	TP_fast_assign(
-		__entry->client_id = clnt ? clnt->cl_clid : -1;
+		__entry->client_id = task->tk_client ?
+				     task->tk_client->cl_clid : -1;
 		__entry->task_id = task->tk_pid;
 		__entry->timeout = task->tk_timeout;
 		__entry->runstate = task->tk_runstate;
@@ -196,18 +198,63 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
 
 DEFINE_EVENT(rpc_task_queued, rpc_task_sleep,
 
-	TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
+	TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q),
 
-	TP_ARGS(clnt, task, q)
+	TP_ARGS(task, q)
 
 );
 
 DEFINE_EVENT(rpc_task_queued, rpc_task_wakeup,
 
-	TP_PROTO(const struct rpc_clnt *clnt, const struct rpc_task *task, const struct rpc_wait_queue *q),
+	TP_PROTO(const struct rpc_task *task, const struct rpc_wait_queue *q),
+
+	TP_ARGS(task, q)
+
+);
+
+TRACE_EVENT(rpc_stats_latency,
+
+	TP_PROTO(
+		const struct rpc_task *task,
+		ktime_t backlog,
+		ktime_t rtt,
+		ktime_t execute
+	),
 
-	TP_ARGS(clnt, task, q)
+	TP_ARGS(task, backlog, rtt, execute),
 
+	TP_STRUCT__entry(
+		__field(u32, xid)
+		__field(int, version)
+		__string(progname, task->tk_client->cl_program->name)
+		__string(procname, rpc_proc_name(task))
+		__field(unsigned long, backlog)
+		__field(unsigned long, rtt)
+		__field(unsigned long, execute)
+		__string(addr,
+			 task->tk_xprt->address_strings[RPC_DISPLAY_ADDR])
+		__string(port,
+			 task->tk_xprt->address_strings[RPC_DISPLAY_PORT])
+	),
+
+	TP_fast_assign(
+		__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+		__entry->version = task->tk_client->cl_vers;
+		__assign_str(progname, task->tk_client->cl_program->name)
+		__assign_str(procname, rpc_proc_name(task))
+		__entry->backlog = ktime_to_us(backlog);
+		__entry->rtt = ktime_to_us(rtt);
+		__entry->execute = ktime_to_us(execute);
+		__assign_str(addr,
+			     task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]);
+		__assign_str(port,
+			     task->tk_xprt->address_strings[RPC_DISPLAY_PORT]);
+	),
+
+	TP_printk("peer=[%s]:%s xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu",
+		__get_str(addr), __get_str(port), __entry->xid,
+		__get_str(progname), __entry->version, __get_str(procname),
+		__entry->backlog, __entry->rtt, __entry->execute)
 );
 
 /*
@@ -406,6 +453,27 @@ DEFINE_EVENT(rpc_xprt_event, xprt_complete_rqst,
 	TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
 	TP_ARGS(xprt, xid, status));
 
+TRACE_EVENT(xprt_ping,
+	TP_PROTO(const struct rpc_xprt *xprt, int status),
+
+	TP_ARGS(xprt, status),
+
+	TP_STRUCT__entry(
+		__field(int, status)
+		__string(addr, xprt->address_strings[RPC_DISPLAY_ADDR])
+		__string(port, xprt->address_strings[RPC_DISPLAY_PORT])
+	),
+
+	TP_fast_assign(
+		__entry->status = status;
+		__assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]);
+		__assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]);
+	),
+
+	TP_printk("peer=[%s]:%s status=%d",
+			__get_str(addr), __get_str(port), __entry->status)
+);
+
 TRACE_EVENT(xs_tcp_data_ready,
 	TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total),
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 6570c5b45ba1..a1cb91342231 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -346,15 +346,9 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 
 	TP_PROTO(int nid,
 		unsigned long nr_scanned, unsigned long nr_reclaimed,
-		unsigned long nr_dirty, unsigned long nr_writeback,
-		unsigned long nr_congested, unsigned long nr_immediate,
-		unsigned long nr_activate, unsigned long nr_ref_keep,
-		unsigned long nr_unmap_fail,
-		int priority, int file),
+		struct reclaim_stat *stat, int priority, int file),
 
-	TP_ARGS(nid, nr_scanned, nr_reclaimed, nr_dirty, nr_writeback,
-		nr_congested, nr_immediate, nr_activate, nr_ref_keep,
-		nr_unmap_fail, priority, file),
+	TP_ARGS(nid, nr_scanned, nr_reclaimed, stat, priority, file),
 
 	TP_STRUCT__entry(
 		__field(int, nid)
@@ -375,13 +369,13 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 		__entry->nid = nid;
 		__entry->nr_scanned = nr_scanned;
 		__entry->nr_reclaimed = nr_reclaimed;
-		__entry->nr_dirty = nr_dirty;
-		__entry->nr_writeback = nr_writeback;
-		__entry->nr_congested = nr_congested;
-		__entry->nr_immediate = nr_immediate;
-		__entry->nr_activate = nr_activate;
-		__entry->nr_ref_keep = nr_ref_keep;
-		__entry->nr_unmap_fail = nr_unmap_fail;
+		__entry->nr_dirty = stat->nr_dirty;
+		__entry->nr_writeback = stat->nr_writeback;
+		__entry->nr_congested = stat->nr_congested;
+		__entry->nr_immediate = stat->nr_immediate;
+		__entry->nr_activate = stat->nr_activate;
+		__entry->nr_ref_keep = stat->nr_ref_keep;
+		__entry->nr_unmap_fail = stat->nr_unmap_fail;
 		__entry->priority = priority;
 		__entry->reclaim_flags = trace_shrink_flags(file);
 	),
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index f8b134f5608f..e7ee32861d51 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -27,6 +27,9 @@
 # define MAP_UNINITIALIZED 0x0		/* Don't support this flag */
 #endif
 
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE	0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
+
 /*
  * Flags for mlock
  */
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 544208fd3db1..558b902f18d4 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -211,7 +211,8 @@ typedef struct siginfo {
 #define __FPE_INVASC	12	/* invalid ASCII digit */
 #define __FPE_INVDEC	13	/* invalid decimal digit */
 #define FPE_FLTUNK	14	/* undiagnosed floating-point exception */
-#define NSIGFPE		14
+#define FPE_CONDTRAP	15	/* trap on condition */
+#define NSIGFPE		15
 
 /*
  * SIGSEGV si_codes
diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
index 92537757590a..5ed721ad5b19 100644
--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* const.h: Macros for dealing with constants.  */
 
-#ifndef _LINUX_CONST_H
-#define _LINUX_CONST_H
+#ifndef _UAPI_LINUX_CONST_H
+#define _UAPI_LINUX_CONST_H
 
 /* Some constant macros are used in both assembler and
  * C code.  Therefore we cannot annotate them always with
@@ -22,7 +22,10 @@
 #define _AT(T,X)	((T)(X))
 #endif
 
-#define _BITUL(x)	(_AC(1,UL) << (x))
-#define _BITULL(x)	(_AC(1,ULL) << (x))
+#define _UL(x)		(_AC(x, UL))
+#define _ULL(x)		(_AC(x, ULL))
 
-#endif /* !(_LINUX_CONST_H) */
+#define _BITUL(x)	(_UL(1) << (x))
+#define _BITULL(x)	(_ULL(1) << (x))
+
+#endif /* _UAPI_LINUX_CONST_H */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7b26d4b0b052..1065006c9bf5 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -396,6 +396,10 @@ struct kvm_run {
 		char padding[256];
 	};
 
+	/* 2048 is the size of the char array used to bound/pad the size
+	 * of the union that holds sync regs.
+	 */
+	#define SYNC_REGS_SIZE_BYTES 2048
 	/*
 	 * shared registers between kvm and userspace.
 	 * kvm_valid_regs specifies the register classes set by the host
@@ -407,7 +411,7 @@ struct kvm_run {
 	__u64 kvm_dirty_regs;
 	union {
 		struct kvm_sync_regs regs;
-		char padding[2048];
+		char padding[SYNC_REGS_SIZE_BYTES];
 	} s;
 };
 
@@ -925,7 +929,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_SPAPR_TCE_VFIO 142
-#define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_X86_DISABLE_EXITS 143
 #define KVM_CAP_ARM_USER_IRQ 144
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
@@ -936,6 +940,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PPC_GET_CPU_CHAR 151
 #define KVM_CAP_S390_BPB 152
 #define KVM_CAP_GET_MSR_FEATURES 153
+#define KVM_CAP_HYPERV_EVENTFD 154
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1375,6 +1380,10 @@ struct kvm_enc_region {
 #define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
 #define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
 
+/* Available with KVM_CAP_HYPERV_EVENTFD */
+#define KVM_HYPERV_EVENTFD        _IOW(KVMIO,  0xbd, struct kvm_hyperv_eventfd)
+
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
@@ -1515,4 +1524,14 @@ struct kvm_assigned_msix_entry {
 #define KVM_ARM_DEV_EL1_PTIMER		(1 << 1)
 #define KVM_ARM_DEV_PMU			(1 << 2)
 
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK		0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
 #endif /* __LINUX_KVM_H */
diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h
index 5d5ab81dc9be..e4a0d9a9a9e8 100644
--- a/include/uapi/linux/msg.h
+++ b/include/uapi/linux/msg.h
@@ -7,6 +7,7 @@
 /* ipcs ctl commands */
 #define MSG_STAT 11
 #define MSG_INFO 12
+#define MSG_STAT_ANY 13
 
 /* msgrcv options */
 #define MSG_NOERROR     010000  /* no error if message is too big */
diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h
index 9c3e745b0656..39a1876f039e 100644
--- a/include/uapi/linux/sem.h
+++ b/include/uapi/linux/sem.h
@@ -19,6 +19,7 @@
 /* ipcs ctl cmds */
 #define SEM_STAT 18
 #define SEM_INFO 19
+#define SEM_STAT_ANY 20
 
 /* Obsolete, used only for backwards compatibility and libc5 compiles */
 struct semid_ds {
diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index 4de12a39b075..dde1344f047c 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -83,8 +83,9 @@ struct shmid_ds {
 #define SHM_UNLOCK 	12
 
 /* ipcs ctl commands */
-#define SHM_STAT 	13
-#define SHM_INFO 	14
+#define SHM_STAT	13
+#define SHM_INFO	14
+#define SHM_STAT_ANY    15
 
 /* Obsolete, used only for backwards compatibility */
 struct	shminfo {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 4e8b8304b793..40297a3181ed 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -53,7 +53,9 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
 #define VIRTIO_BALLOON_S_AVAIL    6   /* Available memory as in /proc */
 #define VIRTIO_BALLOON_S_CACHES   7   /* Disk caches */
-#define VIRTIO_BALLOON_S_NR       8
+#define VIRTIO_BALLOON_S_HTLB_PGALLOC  8  /* Hugetlb page allocations */
+#define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
+#define VIRTIO_BALLOON_S_NR       10
 
 /*
  * Memory statistics structure.
diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h
index 956455fc9f9a..bb29e5954000 100644
--- a/include/video/of_display_timing.h
+++ b/include/video/of_display_timing.h
@@ -19,7 +19,6 @@ struct display_timings;
 int of_get_display_timing(const struct device_node *np, const char *name,
 		struct display_timing *dt);
 struct display_timings *of_get_display_timings(const struct device_node *np);
-int of_display_timings_exist(const struct device_node *np);
 #else
 static inline int of_get_display_timing(const struct device_node *np,
 		const char *name, struct display_timing *dt)
@@ -31,10 +30,6 @@ of_get_display_timings(const struct device_node *np)
 {
 	return NULL;
 }
-static inline int of_display_timings_exist(const struct device_node *np)
-{
-	return -ENOSYS;
-}
 #endif
 
 #endif
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index 9b0eb574f0d1..6d1384abfbdf 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -42,6 +42,9 @@
 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
 #define XENFEAT_mmu_pt_update_preserve_ad  5
 
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist             6
+
 /*
  * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
  * available pte bits.
@@ -60,6 +63,26 @@
 /* operation as Dom0 is supported */
 #define XENFEAT_dom0                      11
 
+/* Xen also maps grant references at pfn = mfn.
+ * This feature flag is deprecated and should not be used.
+#define XENFEAT_grant_map_identity        12
+ */
+
+/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
+#define XENFEAT_memory_op_vnode_supported 13
+
+/* arm: Hypervisor supports ARM SMC calling convention. */
+#define XENFEAT_ARM_SMCCC_supported       14
+
+/*
+ * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP
+ * must be located in lower 1MB, as required by ACPI Specification for IA-PC
+ * systems.
+ * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains
+ * the "linux" string.
+ */
+#define XENFEAT_linux_rsdp_unrestricted   15
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
diff --git a/init/Kconfig b/init/Kconfig
index 9d167a5b4a94..f013afc74b11 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1923,3 +1923,13 @@ source "kernel/Kconfig.locks"
 
 config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
 	bool
+
+# It may be useful for an architecture to override the definitions of the
+# SYSCALL_DEFINE() and __SYSCALL_DEFINEx() macros in <linux/syscalls.h>
+# and the COMPAT_ variants in <linux/compat.h>, in particular to use a
+# different calling convention for syscalls. They can also override the
+# macros for not-implemented syscalls in kernel/sys_ni.c and
+# kernel/time/posix-stubs.c. All these overrides need to be available in
+# <asm/syscall_wrapper.h>.
+config ARCH_HAS_SYSCALL_WRAPPER
+	def_bool n
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 12c159824c7b..035a5f0ab26b 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -255,7 +255,7 @@ int __init rd_load_image(char *from)
 		nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
 	for (i = 0, disk = 1; i < nblocks; i++) {
 		if (i && (i % devblocks == 0)) {
-			printk("done disk #%d.\n", disk++);
+			pr_cont("done disk #%d.\n", disk++);
 			rotate = 0;
 			if (ksys_close(in_fd)) {
 				printk("Error closing the disk.\n");
@@ -278,7 +278,7 @@ int __init rd_load_image(char *from)
 		}
 #endif
 	}
-	printk("done.\n");
+	pr_cont("done.\n");
 
 successful_load:
 	res = 1;
diff --git a/init/main.c b/init/main.c
index e4a3160991ea..b795aa341a3a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -51,6 +51,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/unistd.h>
+#include <linux/utsname.h>
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
@@ -97,6 +98,9 @@
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/initcall.h>
+
 static int kernel_init(void *);
 
 extern void init_IRQ(void);
@@ -491,6 +495,17 @@ void __init __weak thread_stack_cache_init(void)
 
 void __init __weak mem_encrypt_init(void) { }
 
+bool initcall_debug;
+core_param(initcall_debug, initcall_debug, bool, 0644);
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void);
+#else
+static inline void initcall_debug_enable(void)
+{
+}
+#endif
+
 /*
  * Set up kernel memory allocators
  */
@@ -612,6 +627,9 @@ asmlinkage __visible void __init start_kernel(void)
 	/* Trace events are available after this */
 	trace_init();
 
+	if (initcall_debug)
+		initcall_debug_enable();
+
 	context_tracking_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
@@ -689,6 +707,7 @@ asmlinkage __visible void __init start_kernel(void)
 	cred_init();
 	fork_init();
 	proc_caches_init();
+	uts_ns_init();
 	buffer_init();
 	key_init();
 	security_init();
@@ -696,6 +715,7 @@ asmlinkage __visible void __init start_kernel(void)
 	vfs_caches_init();
 	pagecache_init();
 	signals_init();
+	seq_file_init();
 	proc_root_init();
 	nsfs_init();
 	cpuset_init();
@@ -728,9 +748,6 @@ static void __init do_ctors(void)
 #endif
 }
 
-bool initcall_debug;
-core_param(initcall_debug, initcall_debug, bool, 0644);
-
 #ifdef CONFIG_KALLSYMS
 struct blacklist_entry {
 	struct list_head next;
@@ -800,37 +817,71 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn)
 #endif
 __setup("initcall_blacklist=", initcall_blacklist);
 
-static int __init_or_module do_one_initcall_debug(initcall_t fn)
+static __init_or_module void
+trace_initcall_start_cb(void *data, initcall_t fn)
 {
-	ktime_t calltime, delta, rettime;
-	unsigned long long duration;
-	int ret;
+	ktime_t *calltime = (ktime_t *)data;
 
 	printk(KERN_DEBUG "calling  %pF @ %i\n", fn, task_pid_nr(current));
-	calltime = ktime_get();
-	ret = fn();
+	*calltime = ktime_get();
+}
+
+static __init_or_module void
+trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
+{
+	ktime_t *calltime = (ktime_t *)data;
+	ktime_t delta, rettime;
+	unsigned long long duration;
+
 	rettime = ktime_get();
-	delta = ktime_sub(rettime, calltime);
+	delta = ktime_sub(rettime, *calltime);
 	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 	printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
 		 fn, ret, duration);
+}
 
-	return ret;
+static ktime_t initcall_calltime;
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void)
+{
+	int ret;
+
+	ret = register_trace_initcall_start(trace_initcall_start_cb,
+					    &initcall_calltime);
+	ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
+					      &initcall_calltime);
+	WARN(ret, "Failed to register initcall tracepoints\n");
 }
+# define do_trace_initcall_start	trace_initcall_start
+# define do_trace_initcall_finish	trace_initcall_finish
+#else
+static inline void do_trace_initcall_start(initcall_t fn)
+{
+	if (!initcall_debug)
+		return;
+	trace_initcall_start_cb(&initcall_calltime, fn);
+}
+static inline void do_trace_initcall_finish(initcall_t fn, int ret)
+{
+	if (!initcall_debug)
+		return;
+	trace_initcall_finish_cb(&initcall_calltime, fn, ret);
+}
+#endif /* !TRACEPOINTS_ENABLED */
 
 int __init_or_module do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
-	int ret;
 	char msgbuf[64];
+	int ret;
 
 	if (initcall_blacklisted(fn))
 		return -EPERM;
 
-	if (initcall_debug)
-		ret = do_one_initcall_debug(fn);
-	else
-		ret = fn();
+	do_trace_initcall_start(fn);
+	ret = fn();
+	do_trace_initcall_finish(fn, ret);
 
 	msgbuf[0] = 0;
 
@@ -874,7 +925,7 @@ static initcall_t *initcall_levels[] __initdata = {
 
 /* Keep these in sync with initcalls in include/linux/init.h */
 static char *initcall_level_names[] __initdata = {
-	"early",
+	"pure",
 	"core",
 	"postcore",
 	"arch",
@@ -895,6 +946,7 @@ static void __init do_initcall_level(int level)
 		   level, level,
 		   NULL, &repair_env_string);
 
+	trace_initcall_level(initcall_level_names[level]);
 	for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
 		do_one_initcall(*fn);
 }
@@ -929,6 +981,7 @@ static void __init do_pre_smp_initcalls(void)
 {
 	initcall_t *fn;
 
+	trace_initcall_level("early");
 	for (fn = __initcall_start; fn < __initcall0_start; fn++)
 		do_one_initcall(*fn);
 }
diff --git a/ipc/msg.c b/ipc/msg.c
index 114a21189613..56fd1c73eedc 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -497,14 +497,14 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
 	memset(p, 0, sizeof(*p));
 
 	rcu_read_lock();
-	if (cmd == MSG_STAT) {
+	if (cmd == MSG_STAT || cmd == MSG_STAT_ANY) {
 		msq = msq_obtain_object(ns, msqid);
 		if (IS_ERR(msq)) {
 			err = PTR_ERR(msq);
 			goto out_unlock;
 		}
 		id = msq->q_perm.id;
-	} else {
+	} else { /* IPC_STAT */
 		msq = msq_obtain_object_check(ns, msqid);
 		if (IS_ERR(msq)) {
 			err = PTR_ERR(msq);
@@ -512,9 +512,14 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
 		}
 	}
 
-	err = -EACCES;
-	if (ipcperms(ns, &msq->q_perm, S_IRUGO))
-		goto out_unlock;
+	/* see comment for SHM_STAT_ANY */
+	if (cmd == MSG_STAT_ANY)
+		audit_ipc_obj(&msq->q_perm);
+	else {
+		err = -EACCES;
+		if (ipcperms(ns, &msq->q_perm, S_IRUGO))
+			goto out_unlock;
+	}
 
 	err = security_msg_queue_msgctl(&msq->q_perm, cmd);
 	if (err)
@@ -572,6 +577,7 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 		return err;
 	}
 	case MSG_STAT:	/* msqid is an index rather than a msg queue id */
+	case MSG_STAT_ANY:
 	case IPC_STAT:
 		err = msgctl_stat(ns, msqid, cmd, &msqid64);
 		if (err < 0)
@@ -690,6 +696,7 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
 	}
 	case IPC_STAT:
 	case MSG_STAT:
+	case MSG_STAT_ANY:
 		err = msgctl_stat(ns, msqid, cmd, &msqid64);
 		if (err < 0)
 			return err;
diff --git a/ipc/sem.c b/ipc/sem.c
index 2994da8ccc7f..06be75d9217a 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1220,14 +1220,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
 	memset(semid64, 0, sizeof(*semid64));
 
 	rcu_read_lock();
-	if (cmd == SEM_STAT) {
+	if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) {
 		sma = sem_obtain_object(ns, semid);
 		if (IS_ERR(sma)) {
 			err = PTR_ERR(sma);
 			goto out_unlock;
 		}
 		id = sma->sem_perm.id;
-	} else {
+	} else { /* IPC_STAT */
 		sma = sem_obtain_object_check(ns, semid);
 		if (IS_ERR(sma)) {
 			err = PTR_ERR(sma);
@@ -1235,9 +1235,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
 		}
 	}
 
-	err = -EACCES;
-	if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
-		goto out_unlock;
+	/* see comment for SHM_STAT_ANY */
+	if (cmd == SEM_STAT_ANY)
+		audit_ipc_obj(&sma->sem_perm);
+	else {
+		err = -EACCES;
+		if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
+			goto out_unlock;
+	}
 
 	err = security_sem_semctl(&sma->sem_perm, cmd);
 	if (err)
@@ -1626,6 +1631,7 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
 		return semctl_info(ns, semid, cmd, p);
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		err = semctl_stat(ns, semid, cmd, &semid64);
 		if (err < 0)
 			return err;
@@ -1732,6 +1738,7 @@ long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
 		return semctl_info(ns, semid, cmd, p);
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		err = semctl_stat(ns, semid, cmd, &semid64);
 		if (err < 0)
 			return err;
diff --git a/ipc/shm.c b/ipc/shm.c
index acefe44fefef..3cf48988d68c 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -225,6 +225,12 @@ static int __shm_open(struct vm_area_struct *vma)
 	if (IS_ERR(shp))
 		return PTR_ERR(shp);
 
+	if (shp->shm_file != sfd->file) {
+		/* ID was reused */
+		shm_unlock(shp);
+		return -EINVAL;
+	}
+
 	shp->shm_atim = ktime_get_real_seconds();
 	ipc_update_pid(&shp->shm_lprid, task_tgid(current));
 	shp->shm_nattch++;
@@ -415,7 +421,7 @@ static int shm_split(struct vm_area_struct *vma, unsigned long addr)
 	struct file *file = vma->vm_file;
 	struct shm_file_data *sfd = shm_file_data(file);
 
-	if (sfd->vm_ops && sfd->vm_ops->split)
+	if (sfd->vm_ops->split)
 		return sfd->vm_ops->split(vma, addr);
 
 	return 0;
@@ -455,8 +461,9 @@ static int shm_mmap(struct file *file, struct vm_area_struct *vma)
 	int ret;
 
 	/*
-	 * In case of remap_file_pages() emulation, the file can represent
-	 * removed IPC ID: propogate shm_lock() error to caller.
+	 * In case of remap_file_pages() emulation, the file can represent an
+	 * IPC ID that was removed, and possibly even reused by another shm
+	 * segment already.  Propagate this case as an error to caller.
 	 */
 	ret = __shm_open(vma);
 	if (ret)
@@ -480,6 +487,7 @@ static int shm_release(struct inode *ino, struct file *file)
 	struct shm_file_data *sfd = shm_file_data(file);
 
 	put_ipc_ns(sfd->ns);
+	fput(sfd->file);
 	shm_file_data(file) = NULL;
 	kfree(sfd);
 	return 0;
@@ -947,14 +955,14 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
 	memset(tbuf, 0, sizeof(*tbuf));
 
 	rcu_read_lock();
-	if (cmd == SHM_STAT) {
+	if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
 		shp = shm_obtain_object(ns, shmid);
 		if (IS_ERR(shp)) {
 			err = PTR_ERR(shp);
 			goto out_unlock;
 		}
 		id = shp->shm_perm.id;
-	} else {
+	} else { /* IPC_STAT */
 		shp = shm_obtain_object_check(ns, shmid);
 		if (IS_ERR(shp)) {
 			err = PTR_ERR(shp);
@@ -962,9 +970,20 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
 		}
 	}
 
-	err = -EACCES;
-	if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
-		goto out_unlock;
+	/*
+	 * Semantically SHM_STAT_ANY ought to be identical to
+	 * that functionality provided by the /proc/sysvipc/
+	 * interface. As such, only audit these calls and
+	 * do not do traditional S_IRUGO permission checks on
+	 * the ipc object.
+	 */
+	if (cmd == SHM_STAT_ANY)
+		audit_ipc_obj(&shp->shm_perm);
+	else {
+		err = -EACCES;
+		if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
+			goto out_unlock;
+	}
 
 	err = security_shm_shmctl(&shp->shm_perm, cmd);
 	if (err)
@@ -1104,6 +1123,7 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 		return err;
 	}
 	case SHM_STAT:
+	case SHM_STAT_ANY:
 	case IPC_STAT: {
 		err = shmctl_stat(ns, shmid, cmd, &sem64);
 		if (err < 0)
@@ -1282,6 +1302,7 @@ long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
 		return err;
 	}
 	case IPC_STAT:
+	case SHM_STAT_ANY:
 	case SHM_STAT:
 		err = shmctl_stat(ns, shmid, cmd, &sem64);
 		if (err < 0)
@@ -1432,7 +1453,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg,
 	file->f_mapping = shp->shm_file->f_mapping;
 	sfd->id = shp->shm_perm.id;
 	sfd->ns = get_ipc_ns(ns);
-	sfd->file = shp->shm_file;
+	/*
+	 * We need to take a reference to the real shm file to prevent the
+	 * pointer from becoming stale in cases where the lifetime of the outer
+	 * file extends beyond that of the shm segment.  It's not usually
+	 * possible, but it can happen during remap_file_pages() emulation as
+	 * that unmaps the memory, then does ->mmap() via file reference only.
+	 * We'll deny the ->mmap() if the shm segment was since removed, but to
+	 * detect shm ID reuse we need to compare the file pointers.
+	 */
+	sfd->file = get_file(shp->shm_file);
 	sfd->vm_ops = NULL;
 
 	err = security_mmap_file(file, prot, flags);
diff --git a/ipc/util.c b/ipc/util.c
index 3783b7991cc7..4e81182fa0ac 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -89,6 +89,7 @@ static int __init ipc_init(void)
 {
 	int err_sem, err_msg;
 
+	proc_mkdir("sysvipc", NULL);
 	err_sem = sem_init();
 	WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
 	err_msg = msg_init();
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index d2bda5aa25d7..8dd9210d7db7 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -182,8 +182,10 @@ static void bpf_tcp_release(struct sock *sk)
 		psock->cork = NULL;
 	}
 
-	sk->sk_prot = psock->sk_proto;
-	psock->sk_proto = NULL;
+	if (psock->sk_proto) {
+		sk->sk_prot = psock->sk_proto;
+		psock->sk_proto = NULL;
+	}
 out:
 	rcu_read_unlock();
 }
@@ -211,6 +213,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 	close_fun = psock->save_close;
 
 	write_lock_bh(&sk->sk_callback_lock);
+	if (psock->cork) {
+		free_start_sg(psock->sock, psock->cork);
+		kfree(psock->cork);
+		psock->cork = NULL;
+	}
+
 	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
 		list_del(&md->list);
 		free_start_sg(psock->sock, md);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0244973ee544..4ca46df19c9a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1226,18 +1226,6 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type,
 	}
 }
 
-static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
-					     enum bpf_attach_type attach_type)
-{
-	switch (prog->type) {
-	case BPF_PROG_TYPE_CGROUP_SOCK:
-	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
-		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
-	default:
-		return 0;
-	}
-}
-
 /* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD expected_attach_type
 
@@ -1465,6 +1453,18 @@ out_free_tp:
 
 #ifdef CONFIG_CGROUP_BPF
 
+static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
+					     enum bpf_attach_type attach_type)
+{
+	switch (prog->type) {
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
+	default:
+		return 0;
+	}
+}
+
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
 static int sockmap_get_from_fd(const union bpf_attr *attr,
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index a93590cdd9e1..f7674d676889 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -454,6 +454,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_lru);
 	VMCOREINFO_NUMBER(PG_private);
 	VMCOREINFO_NUMBER(PG_swapcache);
+	VMCOREINFO_NUMBER(PG_swapbacked);
 	VMCOREINFO_NUMBER(PG_slab);
 #ifdef CONFIG_MEMORY_FAILURE
 	VMCOREINFO_NUMBER(PG_hwpoison);
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index 90ff129c88a2..62c301ad0773 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -242,11 +242,11 @@ static void kdb_printbp(kdb_bp_t *bp, int i)
 	kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT);
 
 	if (bp->bp_enabled)
-		kdb_printf("\n    is enabled");
+		kdb_printf("\n    is enabled ");
 	else
 		kdb_printf("\n    is disabled");
 
-	kdb_printf("\taddr at %016lx, hardtype=%d installed=%d\n",
+	kdb_printf("  addr at %016lx, hardtype=%d installed=%d\n",
 		   bp->bp_addr, bp->bp_type, bp->bp_installed);
 
 	kdb_printf("\n");
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index dbb0781a0533..e405677ee08d 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1150,6 +1150,16 @@ void kdb_set_current_task(struct task_struct *p)
 	kdb_current_regs = NULL;
 }
 
+static void drop_newline(char *buf)
+{
+	size_t len = strlen(buf);
+
+	if (len == 0)
+		return;
+	if (*(buf + len - 1) == '\n')
+		*(buf + len - 1) = '\0';
+}
+
 /*
  * kdb_local - The main code for kdb.  This routine is invoked on a
  *	specific processor, it is not global.  The main kdb() routine
@@ -1327,6 +1337,7 @@ do_full_getstr:
 		cmdptr = cmd_head;
 		diag = kdb_parse(cmdbuf);
 		if (diag == KDB_NOTFOUND) {
+			drop_newline(cmdbuf);
 			kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
 			diag = 0;
 		}
@@ -1566,6 +1577,7 @@ static int kdb_md(int argc, const char **argv)
 	int symbolic = 0;
 	int valid = 0;
 	int phys = 0;
+	int raw = 0;
 
 	kdbgetintenv("MDCOUNT", &mdcount);
 	kdbgetintenv("RADIX", &radix);
@@ -1575,9 +1587,10 @@ static int kdb_md(int argc, const char **argv)
 	repeat = mdcount * 16 / bytesperword;
 
 	if (strcmp(argv[0], "mdr") == 0) {
-		if (argc != 2)
+		if (argc == 2 || (argc == 0 && last_addr != 0))
+			valid = raw = 1;
+		else
 			return KDB_ARGCOUNT;
-		valid = 1;
 	} else if (isdigit(argv[0][2])) {
 		bytesperword = (int)(argv[0][2] - '0');
 		if (bytesperword == 0) {
@@ -1613,7 +1626,10 @@ static int kdb_md(int argc, const char **argv)
 		radix = last_radix;
 		bytesperword = last_bytesperword;
 		repeat = last_repeat;
-		mdcount = ((repeat * bytesperword) + 15) / 16;
+		if (raw)
+			mdcount = repeat;
+		else
+			mdcount = ((repeat * bytesperword) + 15) / 16;
 	}
 
 	if (argc) {
@@ -1630,7 +1646,10 @@ static int kdb_md(int argc, const char **argv)
 			diag = kdbgetularg(argv[nextarg], &val);
 			if (!diag) {
 				mdcount = (int) val;
-				repeat = mdcount * 16 / bytesperword;
+				if (raw)
+					repeat = mdcount;
+				else
+					repeat = mdcount * 16 / bytesperword;
 			}
 		}
 		if (argc >= nextarg+1) {
@@ -1640,8 +1659,15 @@ static int kdb_md(int argc, const char **argv)
 		}
 	}
 
-	if (strcmp(argv[0], "mdr") == 0)
-		return kdb_mdr(addr, mdcount);
+	if (strcmp(argv[0], "mdr") == 0) {
+		int ret;
+		last_addr = addr;
+		ret = kdb_mdr(addr, mdcount);
+		last_addr += mdcount;
+		last_repeat = mdcount;
+		last_bytesperword = bytesperword; // to make REPEAT happy
+		return ret;
+	}
 
 	switch (radix) {
 	case 10:
@@ -2473,41 +2499,6 @@ static int kdb_kill(int argc, const char **argv)
 	return 0;
 }
 
-struct kdb_tm {
-	int tm_sec;	/* seconds */
-	int tm_min;	/* minutes */
-	int tm_hour;	/* hours */
-	int tm_mday;	/* day of the month */
-	int tm_mon;	/* month */
-	int tm_year;	/* year */
-};
-
-static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
-{
-	/* This will work from 1970-2099, 2100 is not a leap year */
-	static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31,
-				 31, 30, 31, 30, 31 };
-	memset(tm, 0, sizeof(*tm));
-	tm->tm_sec  = tv->tv_sec % (24 * 60 * 60);
-	tm->tm_mday = tv->tv_sec / (24 * 60 * 60) +
-		(2 * 365 + 1); /* shift base from 1970 to 1968 */
-	tm->tm_min =  tm->tm_sec / 60 % 60;
-	tm->tm_hour = tm->tm_sec / 60 / 60;
-	tm->tm_sec =  tm->tm_sec % 60;
-	tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
-	tm->tm_mday %= (4*365+1);
-	mon_day[1] = 29;
-	while (tm->tm_mday >= mon_day[tm->tm_mon]) {
-		tm->tm_mday -= mon_day[tm->tm_mon];
-		if (++tm->tm_mon == 12) {
-			tm->tm_mon = 0;
-			++tm->tm_year;
-			mon_day[1] = 28;
-		}
-	}
-	++tm->tm_mday;
-}
-
 /*
  * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
  * I cannot call that code directly from kdb, it has an unconditional
@@ -2515,10 +2506,10 @@ static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
  */
 static void kdb_sysinfo(struct sysinfo *val)
 {
-	struct timespec uptime;
-	ktime_get_ts(&uptime);
+	u64 uptime = ktime_get_mono_fast_ns();
+
 	memset(val, 0, sizeof(*val));
-	val->uptime = uptime.tv_sec;
+	val->uptime = div_u64(uptime, NSEC_PER_SEC);
 	val->loads[0] = avenrun[0];
 	val->loads[1] = avenrun[1];
 	val->loads[2] = avenrun[2];
@@ -2533,8 +2524,8 @@ static void kdb_sysinfo(struct sysinfo *val)
  */
 static int kdb_summary(int argc, const char **argv)
 {
-	struct timespec now;
-	struct kdb_tm tm;
+	time64_t now;
+	struct tm tm;
 	struct sysinfo val;
 
 	if (argc)
@@ -2548,9 +2539,9 @@ static int kdb_summary(int argc, const char **argv)
 	kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
 	kdb_printf("ccversion  %s\n", __stringify(CCVERSION));
 
-	now = __current_kernel_time();
-	kdb_gmtime(&now, &tm);
-	kdb_printf("date       %04d-%02d-%02d %02d:%02d:%02d "
+	now = __ktime_get_real_seconds();
+	time64_to_tm(now, 0, &tm);
+	kdb_printf("date       %04ld-%02d-%02d %02d:%02d:%02d "
 		   "tz_minuteswest %d\n",
 		1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
 		tm.tm_hour, tm.tm_min, tm.tm_sec,
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index d35cc2d3a4cc..990b3cc526c8 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -129,13 +129,13 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
 		}
 		if (i >= ARRAY_SIZE(kdb_name_table)) {
 			debug_kfree(kdb_name_table[0]);
-			memcpy(kdb_name_table, kdb_name_table+1,
+			memmove(kdb_name_table, kdb_name_table+1,
 			       sizeof(kdb_name_table[0]) *
 			       (ARRAY_SIZE(kdb_name_table)-1));
 		} else {
 			debug_kfree(knt1);
 			knt1 = kdb_name_table[i];
-			memcpy(kdb_name_table+i, kdb_name_table+i+1,
+			memmove(kdb_name_table+i, kdb_name_table+i+1,
 			       sizeof(kdb_name_table[0]) *
 			       (ARRAY_SIZE(kdb_name_table)-i-1));
 		}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc1c330c6bd6..2d5fe26551f8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4447,6 +4447,9 @@ static void _free_event(struct perf_event *event)
 	if (event->ctx)
 		put_ctx(event->ctx);
 
+	if (event->hw.target)
+		put_task_struct(event->hw.target);
+
 	exclusive_event_destroy(event);
 	module_put(event->pmu->module);
 
@@ -8397,6 +8400,10 @@ static int perf_kprobe_event_init(struct perf_event *event)
 
 	if (event->attr.type != perf_kprobe.type)
 		return -ENOENT;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	/*
 	 * no branch sampling for probe events
 	 */
@@ -8434,6 +8441,10 @@ static int perf_uprobe_event_init(struct perf_event *event)
 
 	if (event->attr.type != perf_uprobe.type)
 		return -ENOENT;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
 	/*
 	 * no branch sampling for probe events
 	 */
@@ -9955,6 +9966,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		 * and we cannot use the ctx information because we need the
 		 * pmu before we get a ctx.
 		 */
+		get_task_struct(task);
 		event->hw.target = task;
 	}
 
@@ -10070,6 +10082,8 @@ err_ns:
 		perf_detach_cgroup(event);
 	if (event->ns)
 		put_pid_ns(event->ns);
+	if (event->hw.target)
+		put_task_struct(event->hw.target);
 	kfree(event);
 
 	return ERR_PTR(err);
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index a37a3b4b6342..f4f29b9d90ee 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -39,7 +39,7 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
 	}
 }
 
-static cpumask_var_t *alloc_node_to_possible_cpumask(void)
+static cpumask_var_t *alloc_node_to_cpumask(void)
 {
 	cpumask_var_t *masks;
 	int node;
@@ -62,7 +62,7 @@ out_unwind:
 	return NULL;
 }
 
-static void free_node_to_possible_cpumask(cpumask_var_t *masks)
+static void free_node_to_cpumask(cpumask_var_t *masks)
 {
 	int node;
 
@@ -71,7 +71,7 @@ static void free_node_to_possible_cpumask(cpumask_var_t *masks)
 	kfree(masks);
 }
 
-static void build_node_to_possible_cpumask(cpumask_var_t *masks)
+static void build_node_to_cpumask(cpumask_var_t *masks)
 {
 	int cpu;
 
@@ -79,14 +79,14 @@ static void build_node_to_possible_cpumask(cpumask_var_t *masks)
 		cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
 }
 
-static int get_nodes_in_cpumask(cpumask_var_t *node_to_possible_cpumask,
+static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
 				const struct cpumask *mask, nodemask_t *nodemsk)
 {
 	int n, nodes = 0;
 
 	/* Calculate the number of nodes in the supplied affinity mask */
 	for_each_node(n) {
-		if (cpumask_intersects(mask, node_to_possible_cpumask[n])) {
+		if (cpumask_intersects(mask, node_to_cpumask[n])) {
 			node_set(n, *nodemsk);
 			nodes++;
 		}
@@ -94,73 +94,46 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_possible_cpumask,
 	return nodes;
 }
 
-/**
- * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
- * @nvecs:	The total number of vectors
- * @affd:	Description of the affinity requirements
- *
- * Returns the masks pointer or NULL if allocation failed.
- */
-struct cpumask *
-irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
+static int irq_build_affinity_masks(const struct irq_affinity *affd,
+				    int startvec, int numvecs,
+				    cpumask_var_t *node_to_cpumask,
+				    const struct cpumask *cpu_mask,
+				    struct cpumask *nmsk,
+				    struct cpumask *masks)
 {
-	int n, nodes, cpus_per_vec, extra_vecs, curvec;
-	int affv = nvecs - affd->pre_vectors - affd->post_vectors;
-	int last_affv = affv + affd->pre_vectors;
+	int n, nodes, cpus_per_vec, extra_vecs, done = 0;
+	int last_affv = affd->pre_vectors + numvecs;
+	int curvec = startvec;
 	nodemask_t nodemsk = NODE_MASK_NONE;
-	struct cpumask *masks;
-	cpumask_var_t nmsk, *node_to_possible_cpumask;
-
-	/*
-	 * If there aren't any vectors left after applying the pre/post
-	 * vectors don't bother with assigning affinity.
-	 */
-	if (!affv)
-		return NULL;
-
-	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
-		return NULL;
-
-	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
-	if (!masks)
-		goto out;
 
-	node_to_possible_cpumask = alloc_node_to_possible_cpumask();
-	if (!node_to_possible_cpumask)
-		goto out;
+	if (!cpumask_weight(cpu_mask))
+		return 0;
 
-	/* Fill out vectors at the beginning that don't need affinity */
-	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
-		cpumask_copy(masks + curvec, irq_default_affinity);
-
-	/* Stabilize the cpumasks */
-	get_online_cpus();
-	build_node_to_possible_cpumask(node_to_possible_cpumask);
-	nodes = get_nodes_in_cpumask(node_to_possible_cpumask, cpu_possible_mask,
-				     &nodemsk);
+	nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
 
 	/*
 	 * If the number of nodes in the mask is greater than or equal the
 	 * number of vectors we just spread the vectors across the nodes.
 	 */
-	if (affv <= nodes) {
+	if (numvecs <= nodes) {
 		for_each_node_mask(n, nodemsk) {
-			cpumask_copy(masks + curvec,
-				     node_to_possible_cpumask[n]);
-			if (++curvec == last_affv)
+			cpumask_copy(masks + curvec, node_to_cpumask[n]);
+			if (++done == numvecs)
 				break;
+			if (++curvec == last_affv)
+				curvec = affd->pre_vectors;
 		}
-		goto done;
+		goto out;
 	}
 
 	for_each_node_mask(n, nodemsk) {
 		int ncpus, v, vecs_to_assign, vecs_per_node;
 
 		/* Spread the vectors per node */
-		vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
+		vecs_per_node = (numvecs - (curvec - affd->pre_vectors)) / nodes;
 
 		/* Get the cpus on this node which are in the mask */
-		cpumask_and(nmsk, cpu_possible_mask, node_to_possible_cpumask[n]);
+		cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
 
 		/* Calculate the number of cpus per vector */
 		ncpus = cpumask_weight(nmsk);
@@ -181,19 +154,96 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 			irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
 		}
 
-		if (curvec >= last_affv)
+		done += v;
+		if (done >= numvecs)
 			break;
+		if (curvec >= last_affv)
+			curvec = affd->pre_vectors;
 		--nodes;
 	}
 
-done:
+out:
+	return done;
+}
+
+/**
+ * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
+ * @nvecs:	The total number of vectors
+ * @affd:	Description of the affinity requirements
+ *
+ * Returns the masks pointer or NULL if allocation failed.
+ */
+struct cpumask *
+irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
+{
+	int affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
+	int curvec, usedvecs;
+	cpumask_var_t nmsk, npresmsk, *node_to_cpumask;
+	struct cpumask *masks = NULL;
+
+	/*
+	 * If there aren't any vectors left after applying the pre/post
+	 * vectors don't bother with assigning affinity.
+	 */
+	if (nvecs == affd->pre_vectors + affd->post_vectors)
+		return NULL;
+
+	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+		return NULL;
+
+	if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
+		goto outcpumsk;
+
+	node_to_cpumask = alloc_node_to_cpumask();
+	if (!node_to_cpumask)
+		goto outnpresmsk;
+
+	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		goto outnodemsk;
+
+	/* Fill out vectors at the beginning that don't need affinity */
+	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
+		cpumask_copy(masks + curvec, irq_default_affinity);
+
+	/* Stabilize the cpumasks */
+	get_online_cpus();
+	build_node_to_cpumask(node_to_cpumask);
+
+	/* Spread on present CPUs starting from affd->pre_vectors */
+	usedvecs = irq_build_affinity_masks(affd, curvec, affvecs,
+					    node_to_cpumask, cpu_present_mask,
+					    nmsk, masks);
+
+	/*
+	 * Spread on non present CPUs starting from the next vector to be
+	 * handled. If the spreading of present CPUs already exhausted the
+	 * vector space, assign the non present CPUs to the already spread
+	 * out vectors.
+	 */
+	if (usedvecs >= affvecs)
+		curvec = affd->pre_vectors;
+	else
+		curvec = affd->pre_vectors + usedvecs;
+	cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
+	usedvecs += irq_build_affinity_masks(affd, curvec, affvecs,
+					     node_to_cpumask, npresmsk,
+					     nmsk, masks);
 	put_online_cpus();
 
 	/* Fill out vectors at the end that don't need affinity */
+	if (usedvecs >= affvecs)
+		curvec = affd->pre_vectors + affvecs;
+	else
+		curvec = affd->pre_vectors + usedvecs;
 	for (; curvec < nvecs; curvec++)
 		cpumask_copy(masks + curvec, irq_default_affinity);
-	free_node_to_possible_cpumask(node_to_possible_cpumask);
-out:
+
+outnodemsk:
+	free_node_to_cpumask(node_to_cpumask);
+outnpresmsk:
+	free_cpumask_var(npresmsk);
+outcpumsk:
 	free_cpumask_var(nmsk);
 	return masks;
 }
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index e5bcd94c1efb..75d8e7cf040e 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -22,50 +22,123 @@
 #include <linux/ima.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/vmalloc.h>
 #include "kexec_internal.h"
 
 static int kexec_calculate_store_digests(struct kimage *image);
 
+/*
+ * Currently this is the only default function that is exported as some
+ * architectures need it to do additional handlings.
+ * In the future, other default functions may be exported too if required.
+ */
+int kexec_image_probe_default(struct kimage *image, void *buf,
+			      unsigned long buf_len)
+{
+	const struct kexec_file_ops * const *fops;
+	int ret = -ENOEXEC;
+
+	for (fops = &kexec_file_loaders[0]; *fops && (*fops)->probe; ++fops) {
+		ret = (*fops)->probe(buf, buf_len);
+		if (!ret) {
+			image->fops = *fops;
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
 /* Architectures can provide this probe function */
 int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
 					 unsigned long buf_len)
 {
-	return -ENOEXEC;
+	return kexec_image_probe_default(image, buf, buf_len);
+}
+
+static void *kexec_image_load_default(struct kimage *image)
+{
+	if (!image->fops || !image->fops->load)
+		return ERR_PTR(-ENOEXEC);
+
+	return image->fops->load(image, image->kernel_buf,
+				 image->kernel_buf_len, image->initrd_buf,
+				 image->initrd_buf_len, image->cmdline_buf,
+				 image->cmdline_buf_len);
 }
 
 void * __weak arch_kexec_kernel_image_load(struct kimage *image)
 {
-	return ERR_PTR(-ENOEXEC);
+	return kexec_image_load_default(image);
+}
+
+static int kexec_image_post_load_cleanup_default(struct kimage *image)
+{
+	if (!image->fops || !image->fops->cleanup)
+		return 0;
+
+	return image->fops->cleanup(image->image_loader_data);
 }
 
 int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
 {
-	return -EINVAL;
+	return kexec_image_post_load_cleanup_default(image);
 }
 
 #ifdef CONFIG_KEXEC_VERIFY_SIG
+static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
+					  unsigned long buf_len)
+{
+	if (!image->fops || !image->fops->verify_sig) {
+		pr_debug("kernel loader does not support signature verification.\n");
+		return -EKEYREJECTED;
+	}
+
+	return image->fops->verify_sig(buf, buf_len);
+}
+
 int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
 					unsigned long buf_len)
 {
-	return -EKEYREJECTED;
+	return kexec_image_verify_sig_default(image, buf, buf_len);
 }
 #endif
 
-/* Apply relocations of type RELA */
+/*
+ * arch_kexec_apply_relocations_add - apply relocations of type RELA
+ * @pi:		Purgatory to be relocated.
+ * @section:	Section relocations applying to.
+ * @relsec:	Section containing RELAs.
+ * @symtab:	Corresponding symtab.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
 int __weak
-arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
-				 unsigned int relsec)
+arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section,
+				 const Elf_Shdr *relsec, const Elf_Shdr *symtab)
 {
 	pr_err("RELA relocation unsupported.\n");
 	return -ENOEXEC;
 }
 
-/* Apply relocations of type REL */
+/*
+ * arch_kexec_apply_relocations - apply relocations of type REL
+ * @pi:		Purgatory to be relocated.
+ * @section:	Section relocations applying to.
+ * @relsec:	Section containing RELs.
+ * @symtab:	Corresponding symtab.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
 int __weak
-arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
-			     unsigned int relsec)
+arch_kexec_apply_relocations(struct purgatory_info *pi, Elf_Shdr *section,
+			     const Elf_Shdr *relsec, const Elf_Shdr *symtab)
 {
 	pr_err("REL relocation unsupported.\n");
 	return -ENOEXEC;
@@ -532,6 +605,9 @@ static int kexec_calculate_store_digests(struct kimage *image)
 	struct kexec_sha_region *sha_regions;
 	struct purgatory_info *pi = &image->purgatory_info;
 
+	if (!IS_ENABLED(CONFIG_ARCH_HAS_KEXEC_PURGATORY))
+		return 0;
+
 	zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
 	zero_buf_sz = PAGE_SIZE;
 
@@ -633,87 +709,29 @@ out:
 	return ret;
 }
 
-/* Actually load purgatory. Lot of code taken from kexec-tools */
-static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
-				  unsigned long max, int top_down)
+#ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY
+/*
+ * kexec_purgatory_setup_kbuf - prepare buffer to load purgatory.
+ * @pi:		Purgatory to be loaded.
+ * @kbuf:	Buffer to setup.
+ *
+ * Allocates the memory needed for the buffer. Caller is responsible to free
+ * the memory after use.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+static int kexec_purgatory_setup_kbuf(struct purgatory_info *pi,
+				      struct kexec_buf *kbuf)
 {
-	struct purgatory_info *pi = &image->purgatory_info;
-	unsigned long align, bss_align, bss_sz, bss_pad;
-	unsigned long entry, load_addr, curr_load_addr, bss_addr, offset;
-	unsigned char *buf_addr, *src;
-	int i, ret = 0, entry_sidx = -1;
-	const Elf_Shdr *sechdrs_c;
-	Elf_Shdr *sechdrs = NULL;
-	struct kexec_buf kbuf = { .image = image, .bufsz = 0, .buf_align = 1,
-				  .buf_min = min, .buf_max = max,
-				  .top_down = top_down };
-
-	/*
-	 * sechdrs_c points to section headers in purgatory and are read
-	 * only. No modifications allowed.
-	 */
-	sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
-
-	/*
-	 * We can not modify sechdrs_c[] and its fields. It is read only.
-	 * Copy it over to a local copy where one can store some temporary
-	 * data and free it at the end. We need to modify ->sh_addr and
-	 * ->sh_offset fields to keep track of permanent and temporary
-	 * locations of sections.
-	 */
-	sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
-	if (!sechdrs)
-		return -ENOMEM;
-
-	memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
-
-	/*
-	 * We seem to have multiple copies of sections. First copy is which
-	 * is embedded in kernel in read only section. Some of these sections
-	 * will be copied to a temporary buffer and relocated. And these
-	 * sections will finally be copied to their final destination at
-	 * segment load time.
-	 *
-	 * Use ->sh_offset to reflect section address in memory. It will
-	 * point to original read only copy if section is not allocatable.
-	 * Otherwise it will point to temporary copy which will be relocated.
-	 *
-	 * Use ->sh_addr to contain final address of the section where it
-	 * will go during execution time.
-	 */
-	for (i = 0; i < pi->ehdr->e_shnum; i++) {
-		if (sechdrs[i].sh_type == SHT_NOBITS)
-			continue;
-
-		sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
-						sechdrs[i].sh_offset;
-	}
-
-	/*
-	 * Identify entry point section and make entry relative to section
-	 * start.
-	 */
-	entry = pi->ehdr->e_entry;
-	for (i = 0; i < pi->ehdr->e_shnum; i++) {
-		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
-			continue;
-
-		if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
-			continue;
-
-		/* Make entry section relative */
-		if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
-		    ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
-		     pi->ehdr->e_entry)) {
-			entry_sidx = i;
-			entry -= sechdrs[i].sh_addr;
-			break;
-		}
-	}
+	const Elf_Shdr *sechdrs;
+	unsigned long bss_align;
+	unsigned long bss_sz;
+	unsigned long align;
+	int i, ret;
 
-	/* Determine how much memory is needed to load relocatable object. */
-	bss_align = 1;
-	bss_sz = 0;
+	sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+	kbuf->buf_align = bss_align = 1;
+	kbuf->bufsz = bss_sz = 0;
 
 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
@@ -721,111 +739,124 @@ static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
 
 		align = sechdrs[i].sh_addralign;
 		if (sechdrs[i].sh_type != SHT_NOBITS) {
-			if (kbuf.buf_align < align)
-				kbuf.buf_align = align;
-			kbuf.bufsz = ALIGN(kbuf.bufsz, align);
-			kbuf.bufsz += sechdrs[i].sh_size;
+			if (kbuf->buf_align < align)
+				kbuf->buf_align = align;
+			kbuf->bufsz = ALIGN(kbuf->bufsz, align);
+			kbuf->bufsz += sechdrs[i].sh_size;
 		} else {
-			/* bss section */
 			if (bss_align < align)
 				bss_align = align;
 			bss_sz = ALIGN(bss_sz, align);
 			bss_sz += sechdrs[i].sh_size;
 		}
 	}
+	kbuf->bufsz = ALIGN(kbuf->bufsz, bss_align);
+	kbuf->memsz = kbuf->bufsz + bss_sz;
+	if (kbuf->buf_align < bss_align)
+		kbuf->buf_align = bss_align;
 
-	/* Determine the bss padding required to align bss properly */
-	bss_pad = 0;
-	if (kbuf.bufsz & (bss_align - 1))
-		bss_pad = bss_align - (kbuf.bufsz & (bss_align - 1));
-
-	kbuf.memsz = kbuf.bufsz + bss_pad + bss_sz;
+	kbuf->buffer = vzalloc(kbuf->bufsz);
+	if (!kbuf->buffer)
+		return -ENOMEM;
+	pi->purgatory_buf = kbuf->buffer;
 
-	/* Allocate buffer for purgatory */
-	kbuf.buffer = vzalloc(kbuf.bufsz);
-	if (!kbuf.buffer) {
-		ret = -ENOMEM;
+	ret = kexec_add_buffer(kbuf);
+	if (ret)
 		goto out;
-	}
 
-	if (kbuf.buf_align < bss_align)
-		kbuf.buf_align = bss_align;
+	return 0;
+out:
+	vfree(pi->purgatory_buf);
+	pi->purgatory_buf = NULL;
+	return ret;
+}
 
-	/* Add buffer to segment list */
-	ret = kexec_add_buffer(&kbuf);
-	if (ret)
-		goto out;
-	pi->purgatory_load_addr = kbuf.mem;
+/*
+ * kexec_purgatory_setup_sechdrs - prepares the pi->sechdrs buffer.
+ * @pi:		Purgatory to be loaded.
+ * @kbuf:	Buffer prepared to store purgatory.
+ *
+ * Allocates the memory needed for the buffer. Caller is responsible to free
+ * the memory after use.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi,
+					 struct kexec_buf *kbuf)
+{
+	unsigned long bss_addr;
+	unsigned long offset;
+	Elf_Shdr *sechdrs;
+	int i;
+
+	/*
+	 * The section headers in kexec_purgatory are read-only. In order to
+	 * have them modifiable make a temporary copy.
+	 */
+	sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+	if (!sechdrs)
+		return -ENOMEM;
+	memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff,
+	       pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+	pi->sechdrs = sechdrs;
 
-	/* Load SHF_ALLOC sections */
-	buf_addr = kbuf.buffer;
-	load_addr = curr_load_addr = pi->purgatory_load_addr;
-	bss_addr = load_addr + kbuf.bufsz + bss_pad;
+	offset = 0;
+	bss_addr = kbuf->mem + kbuf->bufsz;
+	kbuf->image->start = pi->ehdr->e_entry;
 
 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
+		unsigned long align;
+		void *src, *dst;
+
 		if (!(sechdrs[i].sh_flags & SHF_ALLOC))
 			continue;
 
 		align = sechdrs[i].sh_addralign;
-		if (sechdrs[i].sh_type != SHT_NOBITS) {
-			curr_load_addr = ALIGN(curr_load_addr, align);
-			offset = curr_load_addr - load_addr;
-			/* We already modifed ->sh_offset to keep src addr */
-			src = (char *) sechdrs[i].sh_offset;
-			memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
-
-			/* Store load address and source address of section */
-			sechdrs[i].sh_addr = curr_load_addr;
-
-			/*
-			 * This section got copied to temporary buffer. Update
-			 * ->sh_offset accordingly.
-			 */
-			sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
-
-			/* Advance to the next address */
-			curr_load_addr += sechdrs[i].sh_size;
-		} else {
+		if (sechdrs[i].sh_type == SHT_NOBITS) {
 			bss_addr = ALIGN(bss_addr, align);
 			sechdrs[i].sh_addr = bss_addr;
 			bss_addr += sechdrs[i].sh_size;
+			continue;
 		}
-	}
 
-	/* Update entry point based on load address of text section */
-	if (entry_sidx >= 0)
-		entry += sechdrs[entry_sidx].sh_addr;
+		offset = ALIGN(offset, align);
+		if (sechdrs[i].sh_flags & SHF_EXECINSTR &&
+		    pi->ehdr->e_entry >= sechdrs[i].sh_addr &&
+		    pi->ehdr->e_entry < (sechdrs[i].sh_addr
+					 + sechdrs[i].sh_size)) {
+			kbuf->image->start -= sechdrs[i].sh_addr;
+			kbuf->image->start += kbuf->mem + offset;
+		}
 
-	/* Make kernel jump to purgatory after shutdown */
-	image->start = entry;
+		src = (void *)pi->ehdr + sechdrs[i].sh_offset;
+		dst = pi->purgatory_buf + offset;
+		memcpy(dst, src, sechdrs[i].sh_size);
 
-	/* Used later to get/set symbol values */
-	pi->sechdrs = sechdrs;
+		sechdrs[i].sh_addr = kbuf->mem + offset;
+		sechdrs[i].sh_offset = offset;
+		offset += sechdrs[i].sh_size;
+	}
 
-	/*
-	 * Used later to identify which section is purgatory and skip it
-	 * from checksumming.
-	 */
-	pi->purgatory_buf = kbuf.buffer;
-	return ret;
-out:
-	vfree(sechdrs);
-	vfree(kbuf.buffer);
-	return ret;
+	return 0;
 }
 
 static int kexec_apply_relocations(struct kimage *image)
 {
 	int i, ret;
 	struct purgatory_info *pi = &image->purgatory_info;
-	Elf_Shdr *sechdrs = pi->sechdrs;
+	const Elf_Shdr *sechdrs;
+
+	sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
 
-	/* Apply relocations */
 	for (i = 0; i < pi->ehdr->e_shnum; i++) {
-		Elf_Shdr *section, *symtab;
+		const Elf_Shdr *relsec;
+		const Elf_Shdr *symtab;
+		Elf_Shdr *section;
+
+		relsec = sechdrs + i;
 
-		if (sechdrs[i].sh_type != SHT_RELA &&
-		    sechdrs[i].sh_type != SHT_REL)
+		if (relsec->sh_type != SHT_RELA &&
+		    relsec->sh_type != SHT_REL)
 			continue;
 
 		/*
@@ -834,12 +865,12 @@ static int kexec_apply_relocations(struct kimage *image)
 		 * symbol table. And ->sh_info contains section header
 		 * index of section to which relocations apply.
 		 */
-		if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
-		    sechdrs[i].sh_link >= pi->ehdr->e_shnum)
+		if (relsec->sh_info >= pi->ehdr->e_shnum ||
+		    relsec->sh_link >= pi->ehdr->e_shnum)
 			return -ENOEXEC;
 
-		section = &sechdrs[sechdrs[i].sh_info];
-		symtab = &sechdrs[sechdrs[i].sh_link];
+		section = pi->sechdrs + relsec->sh_info;
+		symtab = sechdrs + relsec->sh_link;
 
 		if (!(section->sh_flags & SHF_ALLOC))
 			continue;
@@ -856,12 +887,12 @@ static int kexec_apply_relocations(struct kimage *image)
 		 * Respective architecture needs to provide support for applying
 		 * relocations of type SHT_RELA/SHT_REL.
 		 */
-		if (sechdrs[i].sh_type == SHT_RELA)
-			ret = arch_kexec_apply_relocations_add(pi->ehdr,
-							       sechdrs, i);
-		else if (sechdrs[i].sh_type == SHT_REL)
-			ret = arch_kexec_apply_relocations(pi->ehdr,
-							   sechdrs, i);
+		if (relsec->sh_type == SHT_RELA)
+			ret = arch_kexec_apply_relocations_add(pi, section,
+							       relsec, symtab);
+		else if (relsec->sh_type == SHT_REL)
+			ret = arch_kexec_apply_relocations(pi, section,
+							   relsec, symtab);
 		if (ret)
 			return ret;
 	}
@@ -869,10 +900,18 @@ static int kexec_apply_relocations(struct kimage *image)
 	return 0;
 }
 
-/* Load relocatable purgatory object and relocate it appropriately */
-int kexec_load_purgatory(struct kimage *image, unsigned long min,
-			 unsigned long max, int top_down,
-			 unsigned long *load_addr)
+/*
+ * kexec_load_purgatory - Load and relocate the purgatory object.
+ * @image:	Image to add the purgatory to.
+ * @kbuf:	Memory parameters to use.
+ *
+ * Allocates the memory needed for image->purgatory_info.sechdrs and
+ * image->purgatory_info.purgatory_buf/kbuf->buffer. Caller is responsible
+ * to free the memory after use.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf)
 {
 	struct purgatory_info *pi = &image->purgatory_info;
 	int ret;
@@ -880,55 +919,51 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
 	if (kexec_purgatory_size <= 0)
 		return -EINVAL;
 
-	if (kexec_purgatory_size < sizeof(Elf_Ehdr))
-		return -ENOEXEC;
-
-	pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
-
-	if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
-	    || pi->ehdr->e_type != ET_REL
-	    || !elf_check_arch(pi->ehdr)
-	    || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
-		return -ENOEXEC;
-
-	if (pi->ehdr->e_shoff >= kexec_purgatory_size
-	    || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
-	    kexec_purgatory_size - pi->ehdr->e_shoff))
-		return -ENOEXEC;
+	pi->ehdr = (const Elf_Ehdr *)kexec_purgatory;
 
-	ret = __kexec_load_purgatory(image, min, max, top_down);
+	ret = kexec_purgatory_setup_kbuf(pi, kbuf);
 	if (ret)
 		return ret;
 
+	ret = kexec_purgatory_setup_sechdrs(pi, kbuf);
+	if (ret)
+		goto out_free_kbuf;
+
 	ret = kexec_apply_relocations(image);
 	if (ret)
 		goto out;
 
-	*load_addr = pi->purgatory_load_addr;
 	return 0;
 out:
 	vfree(pi->sechdrs);
 	pi->sechdrs = NULL;
-
+out_free_kbuf:
 	vfree(pi->purgatory_buf);
 	pi->purgatory_buf = NULL;
 	return ret;
 }
 
-static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
-					    const char *name)
+/*
+ * kexec_purgatory_find_symbol - find a symbol in the purgatory
+ * @pi:		Purgatory to search in.
+ * @name:	Name of the symbol.
+ *
+ * Return: pointer to symbol in read-only symtab on success, NULL on error.
+ */
+static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+						  const char *name)
 {
-	Elf_Sym *syms;
-	Elf_Shdr *sechdrs;
-	Elf_Ehdr *ehdr;
-	int i, k;
+	const Elf_Shdr *sechdrs;
+	const Elf_Ehdr *ehdr;
+	const Elf_Sym *syms;
 	const char *strtab;
+	int i, k;
 
-	if (!pi->sechdrs || !pi->ehdr)
+	if (!pi->ehdr)
 		return NULL;
 
-	sechdrs = pi->sechdrs;
 	ehdr = pi->ehdr;
+	sechdrs = (void *)ehdr + ehdr->e_shoff;
 
 	for (i = 0; i < ehdr->e_shnum; i++) {
 		if (sechdrs[i].sh_type != SHT_SYMTAB)
@@ -937,8 +972,8 @@ static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
 		if (sechdrs[i].sh_link >= ehdr->e_shnum)
 			/* Invalid strtab section number */
 			continue;
-		strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
-		syms = (Elf_Sym *)sechdrs[i].sh_offset;
+		strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset;
+		syms = (void *)ehdr + sechdrs[i].sh_offset;
 
 		/* Go through symbols for a match */
 		for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
@@ -966,7 +1001,7 @@ static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
 {
 	struct purgatory_info *pi = &image->purgatory_info;
-	Elf_Sym *sym;
+	const Elf_Sym *sym;
 	Elf_Shdr *sechdr;
 
 	sym = kexec_purgatory_find_symbol(pi, name);
@@ -989,9 +1024,9 @@ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
 int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
 				   void *buf, unsigned int size, bool get_value)
 {
-	Elf_Sym *sym;
-	Elf_Shdr *sechdrs;
 	struct purgatory_info *pi = &image->purgatory_info;
+	const Elf_Sym *sym;
+	Elf_Shdr *sec;
 	char *sym_buf;
 
 	sym = kexec_purgatory_find_symbol(pi, name);
@@ -1004,16 +1039,15 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
 		return -EINVAL;
 	}
 
-	sechdrs = pi->sechdrs;
+	sec = pi->sechdrs + sym->st_shndx;
 
-	if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+	if (sec->sh_type == SHT_NOBITS) {
 		pr_err("symbol %s is in a bss section. Cannot %s\n", name,
 		       get_value ? "get" : "set");
 		return -EINVAL;
 	}
 
-	sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
-					sym->st_value;
+	sym_buf = (char *)pi->purgatory_buf + sec->sh_offset + sym->st_value;
 
 	if (get_value)
 		memcpy((void *)buf, sym_buf, size);
@@ -1022,3 +1056,174 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
 
 	return 0;
 }
+#endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */
+
+int crash_exclude_mem_range(struct crash_mem *mem,
+			    unsigned long long mstart, unsigned long long mend)
+{
+	int i, j;
+	unsigned long long start, end;
+	struct crash_mem_range temp_range = {0, 0};
+
+	for (i = 0; i < mem->nr_ranges; i++) {
+		start = mem->ranges[i].start;
+		end = mem->ranges[i].end;
+
+		if (mstart > end || mend < start)
+			continue;
+
+		/* Truncate any area outside of range */
+		if (mstart < start)
+			mstart = start;
+		if (mend > end)
+			mend = end;
+
+		/* Found completely overlapping range */
+		if (mstart == start && mend == end) {
+			mem->ranges[i].start = 0;
+			mem->ranges[i].end = 0;
+			if (i < mem->nr_ranges - 1) {
+				/* Shift rest of the ranges to left */
+				for (j = i; j < mem->nr_ranges - 1; j++) {
+					mem->ranges[j].start =
+						mem->ranges[j+1].start;
+					mem->ranges[j].end =
+							mem->ranges[j+1].end;
+				}
+			}
+			mem->nr_ranges--;
+			return 0;
+		}
+
+		if (mstart > start && mend < end) {
+			/* Split original range */
+			mem->ranges[i].end = mstart - 1;
+			temp_range.start = mend + 1;
+			temp_range.end = end;
+		} else if (mstart != start)
+			mem->ranges[i].end = mstart - 1;
+		else
+			mem->ranges[i].start = mend + 1;
+		break;
+	}
+
+	/* If a split happened, add the split to array */
+	if (!temp_range.end)
+		return 0;
+
+	/* Split happened */
+	if (i == mem->max_nr_ranges - 1)
+		return -ENOMEM;
+
+	/* Location where new range should go */
+	j = i + 1;
+	if (j < mem->nr_ranges) {
+		/* Move over all ranges one slot towards the end */
+		for (i = mem->nr_ranges - 1; i >= j; i--)
+			mem->ranges[i + 1] = mem->ranges[i];
+	}
+
+	mem->ranges[j].start = temp_range.start;
+	mem->ranges[j].end = temp_range.end;
+	mem->nr_ranges++;
+	return 0;
+}
+
+int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
+			  void **addr, unsigned long *sz)
+{
+	Elf64_Ehdr *ehdr;
+	Elf64_Phdr *phdr;
+	unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
+	unsigned char *buf;
+	unsigned int cpu, i;
+	unsigned long long notes_addr;
+	unsigned long mstart, mend;
+
+	/* extra phdr for vmcoreinfo elf note */
+	nr_phdr = nr_cpus + 1;
+	nr_phdr += mem->nr_ranges;
+
+	/*
+	 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
+	 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64).
+	 * I think this is required by tools like gdb. So same physical
+	 * memory will be mapped in two elf headers. One will contain kernel
+	 * text virtual addresses and other will have __va(physical) addresses.
+	 */
+
+	nr_phdr++;
+	elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
+	elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
+
+	buf = vzalloc(elf_sz);
+	if (!buf)
+		return -ENOMEM;
+
+	ehdr = (Elf64_Ehdr *)buf;
+	phdr = (Elf64_Phdr *)(ehdr + 1);
+	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+	ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+	ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+	ehdr->e_ident[EI_OSABI] = ELF_OSABI;
+	memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+	ehdr->e_type = ET_CORE;
+	ehdr->e_machine = ELF_ARCH;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_phoff = sizeof(Elf64_Ehdr);
+	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+	ehdr->e_phentsize = sizeof(Elf64_Phdr);
+
+	/* Prepare one phdr of type PT_NOTE for each present cpu */
+	for_each_present_cpu(cpu) {
+		phdr->p_type = PT_NOTE;
+		notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
+		phdr->p_offset = phdr->p_paddr = notes_addr;
+		phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
+		(ehdr->e_phnum)++;
+		phdr++;
+	}
+
+	/* Prepare one PT_NOTE header for vmcoreinfo */
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
+	phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE;
+	(ehdr->e_phnum)++;
+	phdr++;
+
+	/* Prepare PT_LOAD type program header for kernel text region */
+	if (kernel_map) {
+		phdr->p_type = PT_LOAD;
+		phdr->p_flags = PF_R|PF_W|PF_X;
+		phdr->p_vaddr = (Elf64_Addr)_text;
+		phdr->p_filesz = phdr->p_memsz = _end - _text;
+		phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
+		ehdr->e_phnum++;
+		phdr++;
+	}
+
+	/* Go through all the ranges in mem->ranges[] and prepare phdr */
+	for (i = 0; i < mem->nr_ranges; i++) {
+		mstart = mem->ranges[i].start;
+		mend = mem->ranges[i].end;
+
+		phdr->p_type = PT_LOAD;
+		phdr->p_flags = PF_R|PF_W|PF_X;
+		phdr->p_offset  = mstart;
+
+		phdr->p_paddr = mstart;
+		phdr->p_vaddr = (unsigned long long) __va(mstart);
+		phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
+		phdr->p_align = 0;
+		ehdr->e_phnum++;
+		phdr++;
+		pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
+			phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
+			ehdr->e_phnum, phdr->p_offset);
+	}
+
+	*addr = buf;
+	*sz = elf_sz;
+	return 0;
+}
diff --git a/kernel/panic.c b/kernel/panic.c
index 9d833d913c84..42e487488554 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,7 +34,8 @@
 #define PANIC_BLINK_SPD 18
 
 int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
-static unsigned long tainted_mask;
+static unsigned long tainted_mask =
+	IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
@@ -308,52 +309,40 @@ EXPORT_SYMBOL(panic);
  * is being removed anyway.
  */
 const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
-	{ 'P', 'G', true },	/* TAINT_PROPRIETARY_MODULE */
-	{ 'F', ' ', true },	/* TAINT_FORCED_MODULE */
-	{ 'S', ' ', false },	/* TAINT_CPU_OUT_OF_SPEC */
-	{ 'R', ' ', false },	/* TAINT_FORCED_RMMOD */
-	{ 'M', ' ', false },	/* TAINT_MACHINE_CHECK */
-	{ 'B', ' ', false },	/* TAINT_BAD_PAGE */
-	{ 'U', ' ', false },	/* TAINT_USER */
-	{ 'D', ' ', false },	/* TAINT_DIE */
-	{ 'A', ' ', false },	/* TAINT_OVERRIDDEN_ACPI_TABLE */
-	{ 'W', ' ', false },	/* TAINT_WARN */
-	{ 'C', ' ', true },	/* TAINT_CRAP */
-	{ 'I', ' ', false },	/* TAINT_FIRMWARE_WORKAROUND */
-	{ 'O', ' ', true },	/* TAINT_OOT_MODULE */
-	{ 'E', ' ', true },	/* TAINT_UNSIGNED_MODULE */
-	{ 'L', ' ', false },	/* TAINT_SOFTLOCKUP */
-	{ 'K', ' ', true },	/* TAINT_LIVEPATCH */
-	{ 'X', ' ', true },	/* TAINT_AUX */
+	[ TAINT_PROPRIETARY_MODULE ]	= { 'P', 'G', true },
+	[ TAINT_FORCED_MODULE ]		= { 'F', ' ', true },
+	[ TAINT_CPU_OUT_OF_SPEC ]	= { 'S', ' ', false },
+	[ TAINT_FORCED_RMMOD ]		= { 'R', ' ', false },
+	[ TAINT_MACHINE_CHECK ]		= { 'M', ' ', false },
+	[ TAINT_BAD_PAGE ]		= { 'B', ' ', false },
+	[ TAINT_USER ]			= { 'U', ' ', false },
+	[ TAINT_DIE ]			= { 'D', ' ', false },
+	[ TAINT_OVERRIDDEN_ACPI_TABLE ]	= { 'A', ' ', false },
+	[ TAINT_WARN ]			= { 'W', ' ', false },
+	[ TAINT_CRAP ]			= { 'C', ' ', true },
+	[ TAINT_FIRMWARE_WORKAROUND ]	= { 'I', ' ', false },
+	[ TAINT_OOT_MODULE ]		= { 'O', ' ', true },
+	[ TAINT_UNSIGNED_MODULE ]	= { 'E', ' ', true },
+	[ TAINT_SOFTLOCKUP ]		= { 'L', ' ', false },
+	[ TAINT_LIVEPATCH ]		= { 'K', ' ', true },
+	[ TAINT_AUX ]			= { 'X', ' ', true },
+	[ TAINT_RANDSTRUCT ]		= { 'T', ' ', true },
 };
 
 /**
- *	print_tainted - return a string to represent the kernel taint state.
+ * print_tainted - return a string to represent the kernel taint state.
  *
- *  'P' - Proprietary module has been loaded.
- *  'F' - Module has been forcibly loaded.
- *  'S' - SMP with CPUs not designed for SMP.
- *  'R' - User forced a module unload.
- *  'M' - System experienced a machine check exception.
- *  'B' - System has hit bad_page.
- *  'U' - Userspace-defined naughtiness.
- *  'D' - Kernel has oopsed before
- *  'A' - ACPI table overridden.
- *  'W' - Taint on warning.
- *  'C' - modules from drivers/staging are loaded.
- *  'I' - Working around severe firmware bug.
- *  'O' - Out-of-tree module has been loaded.
- *  'E' - Unsigned module has been loaded.
- *  'L' - A soft lockup has previously occurred.
- *  'K' - Kernel has been live patched.
- *  'X' - Auxiliary taint, for distros' use.
+ * For individual taint flag meanings, see Documentation/sysctl/kernel.txt
  *
- *	The string is overwritten by the next call to print_tainted().
+ * The string is overwritten by the next call to print_tainted(),
+ * but is always NULL terminated.
  */
 const char *print_tainted(void)
 {
 	static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];
 
+	BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT);
+
 	if (tainted_mask) {
 		char *s;
 		int i;
@@ -554,6 +543,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 	else
 		dump_stack();
 
+	print_irqtrace_events(current);
+
 	print_oops_end_marker();
 
 	/* Just a warning, don't kill lockdep. */
diff --git a/kernel/params.c b/kernel/params.c
index cc9108c2a1fd..ce89f757e6da 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -111,8 +111,8 @@ bool parameq(const char *a, const char *b)
 static void param_check_unsafe(const struct kernel_param *kp)
 {
 	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
-		pr_warn("Setting dangerous option %s - tainting kernel\n",
-			kp->name);
+		pr_notice("Setting dangerous option %s - tainting kernel\n",
+			  kp->name);
 		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 	}
 }
diff --git a/kernel/pid.c b/kernel/pid.c
index ed6c343fe50d..157fe4b19971 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -70,7 +70,7 @@ int pid_max_max = PID_MAX_LIMIT;
  */
 struct pid_namespace init_pid_ns = {
 	.kref = KREF_INIT(2),
-	.idr = IDR_INIT,
+	.idr = IDR_INIT(init_pid_ns.idr),
 	.pid_allocated = PIDNS_ADDING,
 	.level = 0,
 	.child_reaper = &init_task,
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 9d7503910ce2..fa39092b7aea 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -295,6 +295,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 		 * changed
 		 */
 		plist_del(node, &c->list);
+		/* fall through */
 	case PM_QOS_ADD_REQ:
 		plist_node_init(node, new_value);
 		plist_add(node, &c->list);
@@ -367,6 +368,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf,
 		break;
 	case PM_QOS_UPDATE_REQ:
 		pm_qos_flags_remove_req(pqf, req);
+		/* fall through */
 	case PM_QOS_ADD_REQ:
 		req->flags = val;
 		INIT_LIST_HEAD(&req->node);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 704e55129c3a..2f4af216bd6e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -51,6 +51,7 @@
 #include <linux/uaccess.h>
 #include <asm/sections.h>
 
+#include <trace/events/initcall.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/printk.h>
 
@@ -2780,6 +2781,7 @@ EXPORT_SYMBOL(unregister_console);
  */
 void __init console_init(void)
 {
+	int ret;
 	initcall_t *call;
 
 	/* Setup the default TTY line discipline. */
@@ -2790,8 +2792,11 @@ void __init console_init(void)
 	 * inform about problems etc..
 	 */
 	call = __con_initcall_start;
+	trace_initcall_level("console");
 	while (call < __con_initcall_end) {
-		(*call)();
+		trace_initcall_start((*call));
+		ret = (*call)();
+		trace_initcall_finish((*call), ret);
 		call++;
 	}
 }
diff --git a/kernel/resource.c b/kernel/resource.c
index e270b5048988..2af6c03858b9 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -651,7 +651,8 @@ static int __find_resource(struct resource *root, struct resource *old,
 			alloc.start = constraint->alignf(constraint->alignf_data, &avail,
 					size, constraint->align);
 			alloc.end = alloc.start + size - 1;
-			if (resource_contains(&avail, &alloc)) {
+			if (alloc.start <= alloc.end &&
+			    resource_contains(&avail, &alloc)) {
 				new->start = alloc.start;
 				new->end = alloc.end;
 				return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e8afd6086f23..5e10aaeebfcc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -874,7 +874,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 	 * this case, we can save a useless back to back clock update.
 	 */
 	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
-		rq_clock_skip_update(rq, true);
+		rq_clock_skip_update(rq);
 }
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2b124811947d..d2c6083304b4 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -631,10 +631,9 @@ fail:
 
 stop_kthread:
 	sugov_kthread_stop(sg_policy);
-
-free_sg_policy:
 	mutex_unlock(&global_tunables_lock);
 
+free_sg_policy:
 	sugov_policy_free(sg_policy);
 
 disable_fast_switch:
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d1c7bf7c7e5b..e7b3008b85bb 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1560,7 +1560,7 @@ static void yield_task_dl(struct rq *rq)
 	 * so we don't do microscopic update in schedule()
 	 * and double the fastpath cost.
 	 */
-	rq_clock_skip_update(rq, true);
+	rq_clock_skip_update(rq);
 }
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0951d1c58d2f..54dc31e7ab9b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7089,7 +7089,7 @@ static void yield_task_fair(struct rq *rq)
 		 * so we don't do microscopic update in schedule()
 		 * and double the fastpath cost.
 		 */
-		rq_clock_skip_update(rq, true);
+		rq_clock_skip_update(rq);
 	}
 
 	set_skip_buddy(se);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 2975f195e1c4..1a3e9bddd17b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -141,13 +141,15 @@ static void cpuidle_idle_call(void)
 	}
 
 	/*
-	 * Tell the RCU framework we are entering an idle section,
-	 * so no more rcu read side critical sections and one more
+	 * The RCU framework needs to be told that we are entering an idle
+	 * section, so no more rcu read side critical sections and one more
 	 * step to the grace period
 	 */
-	rcu_idle_enter();
 
 	if (cpuidle_not_available(drv, dev)) {
+		tick_nohz_idle_stop_tick();
+		rcu_idle_enter();
+
 		default_idle_call();
 		goto exit_idle;
 	}
@@ -164,20 +166,37 @@ static void cpuidle_idle_call(void)
 
 	if (idle_should_enter_s2idle() || dev->use_deepest_state) {
 		if (idle_should_enter_s2idle()) {
+			rcu_idle_enter();
+
 			entered_state = cpuidle_enter_s2idle(drv, dev);
 			if (entered_state > 0) {
 				local_irq_enable();
 				goto exit_idle;
 			}
+
+			rcu_idle_exit();
 		}
 
+		tick_nohz_idle_stop_tick();
+		rcu_idle_enter();
+
 		next_state = cpuidle_find_deepest_state(drv, dev);
 		call_cpuidle(drv, dev, next_state);
 	} else {
+		bool stop_tick = true;
+
 		/*
 		 * Ask the cpuidle framework to choose a convenient idle state.
 		 */
-		next_state = cpuidle_select(drv, dev);
+		next_state = cpuidle_select(drv, dev, &stop_tick);
+
+		if (stop_tick)
+			tick_nohz_idle_stop_tick();
+		else
+			tick_nohz_idle_retain_tick();
+
+		rcu_idle_enter();
+
 		entered_state = call_cpuidle(drv, dev, next_state);
 		/*
 		 * Give the governor an opportunity to reflect on the outcome
@@ -222,6 +241,7 @@ static void do_idle(void)
 		rmb();
 
 		if (cpu_is_offline(cpu)) {
+			tick_nohz_idle_stop_tick_protected();
 			cpuhp_report_idle_dead();
 			arch_cpu_idle_dead();
 		}
@@ -235,10 +255,12 @@ static void do_idle(void)
 		 * broadcast device expired for us, we don't want to go deep
 		 * idle as we know that the IPI is going to arrive right away.
 		 */
-		if (cpu_idle_force_poll || tick_check_broadcast_expired())
+		if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
+			tick_nohz_idle_restart_tick();
 			cpu_idle_poll();
-		else
+		} else {
 			cpuidle_idle_call();
+		}
 		arch_cpu_idle_exit();
 	}
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 86b77987435e..7aef6b4e885a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -839,6 +839,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 			continue;
 
 		raw_spin_lock(&rq->lock);
+		update_rq_clock(rq);
+
 		if (rt_rq->rt_time) {
 			u64 runtime;
 
@@ -859,7 +861,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 				 * 'runtime'.
 				 */
 				if (rt_rq->rt_nr_running && rq->curr == rq->idle)
-					rq_clock_skip_update(rq, false);
+					rq_clock_cancel_skipupdate(rq);
 			}
 			if (rt_rq->rt_time || rt_rq->rt_nr_running)
 				idle = 0;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c3deaee7a7a2..15750c222ca2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -976,13 +976,20 @@ static inline u64 rq_clock_task(struct rq *rq)
 	return rq->clock_task;
 }
 
-static inline void rq_clock_skip_update(struct rq *rq, bool skip)
+static inline void rq_clock_skip_update(struct rq *rq)
 {
 	lockdep_assert_held(&rq->lock);
-	if (skip)
-		rq->clock_update_flags |= RQCF_REQ_SKIP;
-	else
-		rq->clock_update_flags &= ~RQCF_REQ_SKIP;
+	rq->clock_update_flags |= RQCF_REQ_SKIP;
+}
+
+/*
+ * See rt task throttoling, which is the only time a skip
+ * request is cancelled.
+ */
+static inline void rq_clock_cancel_skipupdate(struct rq *rq)
+{
+	lockdep_assert_held(&rq->lock);
+	rq->clock_update_flags &= ~RQCF_REQ_SKIP;
 }
 
 struct rq_flags {
diff --git a/kernel/signal.c b/kernel/signal.c
index 47491aa3e790..d4ccea599692 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -770,7 +770,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 		}
 	}
 
-	return security_task_kill(t, info, sig, 0);
+	return security_task_kill(t, info, sig, NULL);
 }
 
 /**
@@ -1361,7 +1361,7 @@ static int kill_as_cred_perm(const struct cred *cred,
 
 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
 int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid,
-			 const struct cred *cred, u32 secid)
+			 const struct cred *cred)
 {
 	int ret = -EINVAL;
 	struct task_struct *p;
@@ -1380,7 +1380,7 @@ int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid,
 		ret = -EPERM;
 		goto out_unlock;
 	}
-	ret = security_task_kill(p, info, sig, secid);
+	ret = security_task_kill(p, info, sig, cred);
 	if (ret)
 		goto out_unlock;
 
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 6cafc008f6db..9791364925dc 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -5,6 +5,11 @@
 
 #include <asm/unistd.h>
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+/* Architectures may override COND_SYSCALL and COND_SYSCALL_COMPAT */
+#include <asm/syscall_wrapper.h>
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
+
 /*  we can't #include <linux/syscalls.h> here,
     but tell gcc to not warn with -Wmissing-prototypes  */
 asmlinkage long sys_ni_syscall(void);
@@ -17,8 +22,13 @@ asmlinkage long sys_ni_syscall(void)
 	return -ENOSYS;
 }
 
+#ifndef COND_SYSCALL
 #define COND_SYSCALL(name) cond_syscall(sys_##name)
+#endif /* COND_SYSCALL */
+
+#ifndef COND_SYSCALL_COMPAT
 #define COND_SYSCALL_COMPAT(name) cond_syscall(compat_sys_##name)
+#endif /* COND_SYSCALL_COMPAT */
 
 /*
  * This list is kept in the same order as include/uapi/asm-generic/unistd.h.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bdf7090b106d..6a78cf70761d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1340,7 +1340,7 @@ static struct ctl_table vm_table[] = {
 	{
 		.procname	= "dirtytime_expire_seconds",
 		.data		= &dirtytime_expire_interval,
-		.maxlen		= sizeof(dirty_expire_interval),
+		.maxlen		= sizeof(dirtytime_expire_interval),
 		.mode		= 0644,
 		.proc_handler	= dirtytime_interval_handler,
 		.extra1		= &zero,
@@ -2511,6 +2511,15 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 }
 #endif
 
+/**
+ * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
+ * @min: pointer to minimum allowable value
+ * @max: pointer to maximum allowable value
+ *
+ * The do_proc_dointvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_dointvec_minmax() handler.
+ */
 struct do_proc_dointvec_minmax_conv_param {
 	int *min;
 	int *max;
@@ -2554,7 +2563,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
  * This routine will ensure the values are within the range specified by
  * table->extra1 (min) and table->extra2 (max).
  *
- * Returns 0 on success.
+ * Returns 0 on success or -EINVAL on write when the range check fails.
  */
 int proc_dointvec_minmax(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2567,6 +2576,15 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
+/**
+ * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
+ * @min: pointer to minimum allowable value
+ * @max: pointer to maximum allowable value
+ *
+ * The do_proc_douintvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_douintvec_minmax() handler.
+ */
 struct do_proc_douintvec_minmax_conv_param {
 	unsigned int *min;
 	unsigned int *max;
@@ -2614,7 +2632,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
  * check for UINT_MAX to avoid having to support wrap around uses from
  * userspace.
  *
- * Returns 0 on success.
+ * Returns 0 on success or -ERANGE on write when the range check fails.
  */
 int proc_douintvec_minmax(struct ctl_table *table, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 9b082ce86325..eda1210ce50f 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -480,6 +480,7 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
 	while ((base = __next_base((cpu_base), &(active))))
 
 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
+					 const struct hrtimer *exclude,
 					 unsigned int active,
 					 ktime_t expires_next)
 {
@@ -492,9 +493,22 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
 
 		next = timerqueue_getnext(&base->active);
 		timer = container_of(next, struct hrtimer, node);
+		if (timer == exclude) {
+			/* Get to the next timer in the queue. */
+			next = timerqueue_iterate_next(next);
+			if (!next)
+				continue;
+
+			timer = container_of(next, struct hrtimer, node);
+		}
 		expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 		if (expires < expires_next) {
 			expires_next = expires;
+
+			/* Skip cpu_base update if a timer is being excluded. */
+			if (exclude)
+				continue;
+
 			if (timer->is_soft)
 				cpu_base->softirq_next_timer = timer;
 			else
@@ -538,7 +552,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
 	if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
 		active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
 		cpu_base->softirq_next_timer = NULL;
-		expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX);
+		expires_next = __hrtimer_next_event_base(cpu_base, NULL,
+							 active, KTIME_MAX);
 
 		next_timer = cpu_base->softirq_next_timer;
 	}
@@ -546,7 +561,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
 	if (active_mask & HRTIMER_ACTIVE_HARD) {
 		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
 		cpu_base->next_timer = next_timer;
-		expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+		expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
+							 expires_next);
 	}
 
 	return expires_next;
@@ -1190,6 +1206,39 @@ u64 hrtimer_get_next_event(void)
 
 	return expires;
 }
+
+/**
+ * hrtimer_next_event_without - time until next expiry event w/o one timer
+ * @exclude:	timer to exclude
+ *
+ * Returns the next expiry time over all timers except for the @exclude one or
+ * KTIME_MAX if none of them is pending.
+ */
+u64 hrtimer_next_event_without(const struct hrtimer *exclude)
+{
+	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	u64 expires = KTIME_MAX;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
+
+	if (__hrtimer_hres_active(cpu_base)) {
+		unsigned int active;
+
+		if (!cpu_base->softirq_activated) {
+			active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
+			expires = __hrtimer_next_event_base(cpu_base, exclude,
+							    active, KTIME_MAX);
+		}
+		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+		expires = __hrtimer_next_event_base(cpu_base, exclude, active,
+						    expires);
+	}
+
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+
+	return expires;
+}
 #endif
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8d70da1b9a0d..a09ded765f6c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -31,7 +31,7 @@
 
 
 /* USER_HZ period (usecs): */
-unsigned long			tick_usec = TICK_USEC;
+unsigned long			tick_usec = USER_TICK_USEC;
 
 /* SHIFTED_HZ period (nsecs): */
 unsigned long			tick_nsec;
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 6259dbc0191a..e0dbae98db9d 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -19,6 +19,11 @@
 #include <linux/posix-timers.h>
 #include <linux/compat.h>
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+/* Architectures may override SYS_NI and COMPAT_SYS_NI */
+#include <asm/syscall_wrapper.h>
+#endif
+
 asmlinkage long sys_ni_posix_timers(void)
 {
 	pr_err_once("process %d (%s) attempted a POSIX timer syscall "
@@ -27,8 +32,13 @@ asmlinkage long sys_ni_posix_timers(void)
 	return -ENOSYS;
 }
 
+#ifndef SYS_NI
 #define SYS_NI(name)  SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers)
+#endif
+
+#ifndef COMPAT_SYS_NI
 #define COMPAT_SYS_NI(name)  SYSCALL_ALIAS(compat_sys_##name, sys_ni_posix_timers)
+#endif
 
 SYS_NI(timer_create);
 SYS_NI(timer_gettime);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f3ab08caa2c3..646645e981f9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -122,8 +122,7 @@ static ktime_t tick_init_jiffy_update(void)
 	return period;
 }
 
-
-static void tick_sched_do_timer(ktime_t now)
+static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
 	int cpu = smp_processor_id();
 
@@ -143,6 +142,9 @@ static void tick_sched_do_timer(ktime_t now)
 	/* Check, if the jiffies need an update */
 	if (tick_do_timer_cpu == cpu)
 		tick_do_update_jiffies64(now);
+
+	if (ts->inidle)
+		ts->got_idle_tick = 1;
 }
 
 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
@@ -474,7 +476,9 @@ __setup("nohz=", setup_tick_nohz);
 
 bool tick_nohz_tick_stopped(void)
 {
-	return __this_cpu_read(tick_cpu_sched.tick_stopped);
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	return ts->tick_stopped;
 }
 
 bool tick_nohz_tick_stopped_cpu(int cpu)
@@ -537,14 +541,11 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 	sched_clock_idle_wakeup_event();
 }
 
-static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
+static void tick_nohz_start_idle(struct tick_sched *ts)
 {
-	ktime_t now = ktime_get();
-
-	ts->idle_entrytime = now;
+	ts->idle_entrytime = ktime_get();
 	ts->idle_active = 1;
 	sched_clock_idle_sleep_event();
-	return now;
 }
 
 /**
@@ -653,13 +654,10 @@ static inline bool local_timer_softirq_pending(void)
 	return local_softirq_pending() & TIMER_SOFTIRQ;
 }
 
-static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
-					 ktime_t now, int cpu)
+static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
-	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
 	unsigned long seq, basejiff;
-	ktime_t	tick;
 
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
@@ -668,6 +666,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		basejiff = jiffies;
 	} while (read_seqretry(&jiffies_lock, seq));
 	ts->last_jiffies = basejiff;
+	ts->timer_expires_base = basemono;
 
 	/*
 	 * Keep the periodic tick, when RCU, architecture or irq_work
@@ -712,47 +711,63 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		 * next period, so no point in stopping it either, bail.
 		 */
 		if (!ts->tick_stopped) {
-			tick = 0;
+			ts->timer_expires = 0;
 			goto out;
 		}
 	}
 
 	/*
+	 * If this CPU is the one which had the do_timer() duty last, we limit
+	 * the sleep time to the timekeeping max_deferment value.
+	 * Otherwise we can sleep as long as we want.
+	 */
+	delta = timekeeping_max_deferment();
+	if (cpu != tick_do_timer_cpu &&
+	    (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+		delta = KTIME_MAX;
+
+	/* Calculate the next expiry time */
+	if (delta < (KTIME_MAX - basemono))
+		expires = basemono + delta;
+	else
+		expires = KTIME_MAX;
+
+	ts->timer_expires = min_t(u64, expires, next_tick);
+
+out:
+	return ts->timer_expires;
+}
+
+static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
+{
+	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+	u64 basemono = ts->timer_expires_base;
+	u64 expires = ts->timer_expires;
+	ktime_t tick = expires;
+
+	/* Make sure we won't be trying to stop it twice in a row. */
+	ts->timer_expires_base = 0;
+
+	/*
 	 * If this CPU is the one which updates jiffies, then give up
 	 * the assignment and let it be taken by the CPU which runs
 	 * the tick timer next, which might be this CPU as well. If we
 	 * don't drop this here the jiffies might be stale and
 	 * do_timer() never invoked. Keep track of the fact that it
-	 * was the one which had the do_timer() duty last. If this CPU
-	 * is the one which had the do_timer() duty last, we limit the
-	 * sleep time to the timekeeping max_deferment value.
-	 * Otherwise we can sleep as long as we want.
+	 * was the one which had the do_timer() duty last.
 	 */
-	delta = timekeeping_max_deferment();
 	if (cpu == tick_do_timer_cpu) {
 		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 		ts->do_timer_last = 1;
 	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
-		delta = KTIME_MAX;
 		ts->do_timer_last = 0;
-	} else if (!ts->do_timer_last) {
-		delta = KTIME_MAX;
 	}
 
-	/* Calculate the next expiry time */
-	if (delta < (KTIME_MAX - basemono))
-		expires = basemono + delta;
-	else
-		expires = KTIME_MAX;
-
-	expires = min_t(u64, expires, next_tick);
-	tick = expires;
-
 	/* Skip reprogram of event if its not changed */
 	if (ts->tick_stopped && (expires == ts->next_tick)) {
 		/* Sanity check: make sure clockevent is actually programmed */
 		if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
-			goto out;
+			return;
 
 		WARN_ON_ONCE(1);
 		printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
@@ -786,7 +801,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	if (unlikely(expires == KTIME_MAX)) {
 		if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
 			hrtimer_cancel(&ts->sched_timer);
-		goto out;
+		return;
 	}
 
 	hrtimer_set_expires(&ts->sched_timer, tick);
@@ -795,15 +810,23 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
 	else
 		tick_program_event(tick, 1);
-out:
-	/*
-	 * Update the estimated sleep length until the next timer
-	 * (not only the tick).
-	 */
-	ts->sleep_length = ktime_sub(dev->next_event, now);
-	return tick;
 }
 
+static void tick_nohz_retain_tick(struct tick_sched *ts)
+{
+	ts->timer_expires_base = 0;
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
+{
+	if (tick_nohz_next_event(ts, cpu))
+		tick_nohz_stop_tick(ts, cpu);
+	else
+		tick_nohz_retain_tick(ts);
+}
+#endif /* CONFIG_NO_HZ_FULL */
+
 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 {
 	/* Update jiffies first */
@@ -839,7 +862,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
 		return;
 
 	if (can_stop_full_tick(cpu, ts))
-		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
+		tick_nohz_stop_sched_tick(ts, cpu);
 	else if (ts->tick_stopped)
 		tick_nohz_restart_sched_tick(ts, ktime_get());
 #endif
@@ -865,10 +888,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 		return false;
 	}
 
-	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
-		ts->sleep_length = NSEC_PER_SEC / HZ;
+	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
 		return false;
-	}
 
 	if (need_resched())
 		return false;
@@ -903,42 +924,65 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	return true;
 }
 
-static void __tick_nohz_idle_enter(struct tick_sched *ts)
+static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
 {
-	ktime_t now, expires;
+	ktime_t expires;
 	int cpu = smp_processor_id();
 
-	now = tick_nohz_start_idle(ts);
+	/*
+	 * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
+	 * tick timer expiration time is known already.
+	 */
+	if (ts->timer_expires_base)
+		expires = ts->timer_expires;
+	else if (can_stop_idle_tick(cpu, ts))
+		expires = tick_nohz_next_event(ts, cpu);
+	else
+		return;
+
+	ts->idle_calls++;
 
-	if (can_stop_idle_tick(cpu, ts)) {
+	if (expires > 0LL) {
 		int was_stopped = ts->tick_stopped;
 
-		ts->idle_calls++;
+		tick_nohz_stop_tick(ts, cpu);
 
-		expires = tick_nohz_stop_sched_tick(ts, now, cpu);
-		if (expires > 0LL) {
-			ts->idle_sleeps++;
-			ts->idle_expires = expires;
-		}
+		ts->idle_sleeps++;
+		ts->idle_expires = expires;
 
 		if (!was_stopped && ts->tick_stopped) {
 			ts->idle_jiffies = ts->last_jiffies;
 			nohz_balance_enter_idle(cpu);
 		}
+	} else {
+		tick_nohz_retain_tick(ts);
 	}
 }
 
 /**
- * tick_nohz_idle_enter - stop the idle tick from the idle task
+ * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
  *
  * When the next event is more than a tick into the future, stop the idle tick
- * Called when we start the idle loop.
- *
- * The arch is responsible of calling:
+ */
+void tick_nohz_idle_stop_tick(void)
+{
+	__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
+}
+
+void tick_nohz_idle_retain_tick(void)
+{
+	tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
+	/*
+	 * Undo the effect of get_next_timer_interrupt() called from
+	 * tick_nohz_next_event().
+	 */
+	timer_clear_idle();
+}
+
+/**
+ * tick_nohz_idle_enter - prepare for entering idle on the current CPU
  *
- * - rcu_idle_enter() after its last use of RCU before the CPU is put
- *  to sleep.
- * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
+ * Called when we start the idle loop.
  */
 void tick_nohz_idle_enter(void)
 {
@@ -949,8 +993,11 @@ void tick_nohz_idle_enter(void)
 	local_irq_disable();
 
 	ts = this_cpu_ptr(&tick_cpu_sched);
+
+	WARN_ON_ONCE(ts->timer_expires_base);
+
 	ts->inidle = 1;
-	__tick_nohz_idle_enter(ts);
+	tick_nohz_start_idle(ts);
 
 	local_irq_enable();
 }
@@ -968,21 +1015,62 @@ void tick_nohz_irq_exit(void)
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 
 	if (ts->inidle)
-		__tick_nohz_idle_enter(ts);
+		tick_nohz_start_idle(ts);
 	else
 		tick_nohz_full_update_tick(ts);
 }
 
 /**
- * tick_nohz_get_sleep_length - return the length of the current sleep
+ * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
+ */
+bool tick_nohz_idle_got_tick(void)
+{
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	if (ts->got_idle_tick) {
+		ts->got_idle_tick = 0;
+		return true;
+	}
+	return false;
+}
+
+/**
+ * tick_nohz_get_sleep_length - return the expected length of the current sleep
+ * @delta_next: duration until the next event if the tick cannot be stopped
  *
  * Called from power state control code with interrupts disabled
  */
-ktime_t tick_nohz_get_sleep_length(void)
+ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 {
+	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+	int cpu = smp_processor_id();
+	/*
+	 * The idle entry time is expected to be a sufficient approximation of
+	 * the current time at this point.
+	 */
+	ktime_t now = ts->idle_entrytime;
+	ktime_t next_event;
+
+	WARN_ON_ONCE(!ts->inidle);
+
+	*delta_next = ktime_sub(dev->next_event, now);
+
+	if (!can_stop_idle_tick(cpu, ts))
+		return *delta_next;
+
+	next_event = tick_nohz_next_event(ts, cpu);
+	if (!next_event)
+		return *delta_next;
+
+	/*
+	 * If the next highres timer to expire is earlier than next_event, the
+	 * idle governor needs to know that.
+	 */
+	next_event = min_t(u64, next_event,
+			   hrtimer_next_event_without(&ts->sched_timer));
 
-	return ts->sleep_length;
+	return ktime_sub(next_event, now);
 }
 
 /**
@@ -1031,6 +1119,20 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 #endif
 }
 
+static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
+{
+	tick_nohz_restart_sched_tick(ts, now);
+	tick_nohz_account_idle_ticks(ts);
+}
+
+void tick_nohz_idle_restart_tick(void)
+{
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	if (ts->tick_stopped)
+		__tick_nohz_idle_restart_tick(ts, ktime_get());
+}
+
 /**
  * tick_nohz_idle_exit - restart the idle tick from the idle task
  *
@@ -1041,24 +1143,26 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 void tick_nohz_idle_exit(void)
 {
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+	bool idle_active, tick_stopped;
 	ktime_t now;
 
 	local_irq_disable();
 
 	WARN_ON_ONCE(!ts->inidle);
+	WARN_ON_ONCE(ts->timer_expires_base);
 
 	ts->inidle = 0;
+	idle_active = ts->idle_active;
+	tick_stopped = ts->tick_stopped;
 
-	if (ts->idle_active || ts->tick_stopped)
+	if (idle_active || tick_stopped)
 		now = ktime_get();
 
-	if (ts->idle_active)
+	if (idle_active)
 		tick_nohz_stop_idle(ts, now);
 
-	if (ts->tick_stopped) {
-		tick_nohz_restart_sched_tick(ts, now);
-		tick_nohz_account_idle_ticks(ts);
-	}
+	if (tick_stopped)
+		__tick_nohz_idle_restart_tick(ts, now);
 
 	local_irq_enable();
 }
@@ -1074,7 +1178,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 
 	dev->next_event = KTIME_MAX;
 
-	tick_sched_do_timer(now);
+	tick_sched_do_timer(ts, now);
 	tick_sched_handle(ts, regs);
 
 	/* No need to reprogram if we are running tickless  */
@@ -1169,7 +1273,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 	struct pt_regs *regs = get_irq_regs();
 	ktime_t now = ktime_get();
 
-	tick_sched_do_timer(now);
+	tick_sched_do_timer(ts, now);
 
 	/*
 	 * Do not call, when we are not in irq context and have
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 954b43dbf21c..6de959a854b2 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -38,31 +38,37 @@ enum tick_nohz_mode {
  * @idle_exittime:	Time when the idle state was left
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
  * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @sleep_length:	Duration of the current idle sleep
+ * @timer_expires:	Anticipated timer expiration time (in case sched tick is stopped)
+ * @timer_expires_base:	Base time clock monotonic for @timer_expires
  * @do_timer_lst:	CPU was the last one doing do_timer before going idle
+ * @got_idle_tick:	Tick timer function has run with @inidle set
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
+
+	unsigned int			inidle		: 1;
+	unsigned int			tick_stopped	: 1;
+	unsigned int			idle_active	: 1;
+	unsigned int			do_timer_last	: 1;
+	unsigned int			got_idle_tick	: 1;
+
 	ktime_t				last_tick;
 	ktime_t				next_tick;
-	int				inidle;
-	int				tick_stopped;
 	unsigned long			idle_jiffies;
 	unsigned long			idle_calls;
 	unsigned long			idle_sleeps;
-	int				idle_active;
 	ktime_t				idle_entrytime;
 	ktime_t				idle_waketime;
 	ktime_t				idle_exittime;
 	ktime_t				idle_sleeptime;
 	ktime_t				iowait_sleeptime;
-	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
+	u64				timer_expires;
+	u64				timer_expires_base;
 	u64				next_timer;
 	ktime_t				idle_expires;
-	int				do_timer_last;
 	atomic_t			tick_dep_mask;
 };
 
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index fdbeeb02dde9..cf5c0828ee31 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -31,6 +31,4 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
 }
 #endif
 
-extern time64_t __ktime_get_real_seconds(void);
-
 #endif /* _TIMEKEEPING_INTERNAL_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 0b249e2f0c3c..c4f0f2e4126e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -606,7 +606,10 @@ config HIST_TRIGGERS
 	  event activity as an initial guide for further investigation
 	  using more advanced tools.
 
-	  See Documentation/trace/events.txt.
+	  Inter-event tracing of quantities such as latencies is also
+	  supported using hist triggers under this option.
+
+	  See Documentation/trace/histogram.txt.
 	  If in doubt, say N.
 
 config MMIOTRACE_TEST
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eac9ce2c57a2..16bbf062018f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3902,14 +3902,13 @@ static bool module_exists(const char *module)
 {
 	/* All modules have the symbol __this_module */
 	const char this_mod[] = "__this_module";
-	const int modname_size = MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 1;
-	char modname[modname_size + 1];
+	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
 	unsigned long val;
 	int n;
 
-	n = snprintf(modname, modname_size + 1, "%s:%s", module, this_mod);
+	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
 
-	if (n > modname_size)
+	if (n > sizeof(modname) - 1)
 		return false;
 
 	val = module_kallsyms_lookup_name(modname);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dcf1c4dd3efe..c9cb9767d49b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -22,6 +22,7 @@
 #include <linux/hash.h>
 #include <linux/list.h>
 #include <linux/cpu.h>
+#include <linux/oom.h>
 
 #include <asm/local.h>
 
@@ -41,6 +42,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 			 RINGBUF_TYPE_PADDING);
 	trace_seq_printf(s, "\ttime_extend : type == %d\n",
 			 RINGBUF_TYPE_TIME_EXTEND);
+	trace_seq_printf(s, "\ttime_stamp : type == %d\n",
+			 RINGBUF_TYPE_TIME_STAMP);
 	trace_seq_printf(s, "\tdata max type_len  == %d\n",
 			 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 
@@ -140,12 +143,15 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 
 enum {
 	RB_LEN_TIME_EXTEND = 8,
-	RB_LEN_TIME_STAMP = 16,
+	RB_LEN_TIME_STAMP =  8,
 };
 
 #define skip_time_extend(event) \
 	((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
 
+#define extended_time(event) \
+	(event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
+
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
 	return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -209,7 +215,7 @@ rb_event_ts_length(struct ring_buffer_event *event)
 {
 	unsigned len = 0;
 
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+	if (extended_time(event)) {
 		/* time extends include the data event after it */
 		len = RB_LEN_TIME_EXTEND;
 		event = skip_time_extend(event);
@@ -231,7 +237,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
 	unsigned length;
 
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+	if (extended_time(event))
 		event = skip_time_extend(event);
 
 	length = rb_event_length(event);
@@ -248,7 +254,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static __always_inline void *
 rb_event_data(struct ring_buffer_event *event)
 {
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+	if (extended_time(event))
 		event = skip_time_extend(event);
 	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 	/* If length is in len field, then array[0] has the data */
@@ -275,6 +281,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 #define TS_MASK		((1ULL << TS_SHIFT) - 1)
 #define TS_DELTA_TEST	(~TS_MASK)
 
+/**
+ * ring_buffer_event_time_stamp - return the event's extended timestamp
+ * @event: the event to get the timestamp of
+ *
+ * Returns the extended timestamp associated with a data event.
+ * An extended time_stamp is a 64-bit timestamp represented
+ * internally in a special way that makes the best use of space
+ * contained within a ring buffer event.  This function decodes
+ * it and maps it to a straight u64 value.
+ */
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+{
+	u64 ts;
+
+	ts = event->array[0];
+	ts <<= TS_SHIFT;
+	ts += event->time_delta;
+
+	return ts;
+}
+
 /* Flag when events were overwritten */
 #define RB_MISSED_EVENTS	(1 << 31)
 /* Missed count stored at end */
@@ -451,6 +478,7 @@ struct ring_buffer_per_cpu {
 	struct buffer_page		*reader_page;
 	unsigned long			lost_events;
 	unsigned long			last_overrun;
+	unsigned long			nest;
 	local_t				entries_bytes;
 	local_t				entries;
 	local_t				overrun;
@@ -488,6 +516,7 @@ struct ring_buffer {
 	u64				(*clock)(void);
 
 	struct rb_irq_work		irq_work;
+	bool				time_stamp_abs;
 };
 
 struct ring_buffer_iter {
@@ -1134,30 +1163,60 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
 {
 	struct buffer_page *bpage, *tmp;
+	bool user_thread = current->mm != NULL;
+	gfp_t mflags;
 	long i;
 
+	/*
+	 * Check if the available memory is there first.
+	 * Note, si_mem_available() only gives us a rough estimate of available
+	 * memory. It may not be accurate. But we don't care, we just want
+	 * to prevent doing any allocation when it is obvious that it is
+	 * not going to succeed.
+	 */
+	i = si_mem_available();
+	if (i < nr_pages)
+		return -ENOMEM;
+
+	/*
+	 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
+	 * gracefully without invoking oom-killer and the system is not
+	 * destabilized.
+	 */
+	mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
+
+	/*
+	 * If a user thread allocates too much, and si_mem_available()
+	 * reports there's enough memory, even though there is not.
+	 * Make sure the OOM killer kills this thread. This can happen
+	 * even with RETRY_MAYFAIL because another task may be doing
+	 * an allocation after this task has taken all memory.
+	 * This is the task the OOM killer needs to take out during this
+	 * loop, even if it was triggered by an allocation somewhere else.
+	 */
+	if (user_thread)
+		set_current_oom_origin();
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page;
-		/*
-		 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
-		 * gracefully without invoking oom-killer and the system is not
-		 * destabilized.
-		 */
+
 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
-				    GFP_KERNEL | __GFP_RETRY_MAYFAIL,
-				    cpu_to_node(cpu));
+				    mflags, cpu_to_node(cpu));
 		if (!bpage)
 			goto free_pages;
 
 		list_add(&bpage->list, pages);
 
-		page = alloc_pages_node(cpu_to_node(cpu),
-					GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
+		page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
 		if (!page)
 			goto free_pages;
 		bpage->page = page_address(page);
 		rb_init_page(bpage->page);
+
+		if (user_thread && fatal_signal_pending(current))
+			goto free_pages;
 	}
+	if (user_thread)
+		clear_current_oom_origin();
 
 	return 0;
 
@@ -1166,6 +1225,8 @@ free_pages:
 		list_del_init(&bpage->list);
 		free_buffer_page(bpage);
 	}
+	if (user_thread)
+		clear_current_oom_origin();
 
 	return -ENOMEM;
 }
@@ -1382,6 +1443,16 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
 	buffer->clock = clock;
 }
 
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
+{
+	buffer->time_stamp_abs = abs;
+}
+
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
+{
+	return buffer->time_stamp_abs;
+}
+
 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
 
 static inline unsigned long rb_page_entries(struct buffer_page *bpage)
@@ -2206,12 +2277,15 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
 /* Slow path, do not inline */
 static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
 {
-	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+	if (abs)
+		event->type_len = RINGBUF_TYPE_TIME_STAMP;
+	else
+		event->type_len = RINGBUF_TYPE_TIME_EXTEND;
 
-	/* Not the first event on the page? */
-	if (rb_event_index(event)) {
+	/* Not the first event on the page, or not delta? */
+	if (abs || rb_event_index(event)) {
 		event->time_delta = delta & TS_MASK;
 		event->array[0] = delta >> TS_SHIFT;
 	} else {
@@ -2254,7 +2328,9 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 	 * add it to the start of the resevered space.
 	 */
 	if (unlikely(info->add_timestamp)) {
-		event = rb_add_time_stamp(event, delta);
+		bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+
+		event = rb_add_time_stamp(event, info->delta, abs);
 		length -= RB_LEN_TIME_EXTEND;
 		delta = 0;
 	}
@@ -2442,7 +2518,7 @@ static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer
 
 static inline void rb_event_discard(struct ring_buffer_event *event)
 {
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+	if (extended_time(event))
 		event = skip_time_extend(event);
 
 	/* array[0] holds the actual length for the discarded event */
@@ -2486,10 +2562,11 @@ rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 			cpu_buffer->write_stamp =
 				cpu_buffer->commit_page->page->time_stamp;
 		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
-			delta = event->array[0];
-			delta <<= TS_SHIFT;
-			delta += event->time_delta;
+			delta = ring_buffer_event_time_stamp(event);
 			cpu_buffer->write_stamp += delta;
+		} else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+			delta = ring_buffer_event_time_stamp(event);
+			cpu_buffer->write_stamp = delta;
 		} else
 			cpu_buffer->write_stamp += event->time_delta;
 	}
@@ -2581,10 +2658,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 		bit = pc & NMI_MASK ? RB_CTX_NMI :
 			pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
 
-	if (unlikely(val & (1 << bit)))
+	if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
 		return 1;
 
-	val |= (1 << bit);
+	val |= (1 << (bit + cpu_buffer->nest));
 	cpu_buffer->current_context = val;
 
 	return 0;
@@ -2593,7 +2670,57 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 static __always_inline void
 trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+	cpu_buffer->current_context &=
+		cpu_buffer->current_context - (1 << cpu_buffer->nest);
+}
+
+/* The recursive locking above uses 4 bits */
+#define NESTED_BITS 4
+
+/**
+ * ring_buffer_nest_start - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * The ring buffer has a safty mechanism to prevent recursion.
+ * But there may be a case where a trace needs to be done while
+ * tracing something else. In this case, calling this function
+ * will allow this function to nest within a currently active
+ * ring_buffer_lock_reserve().
+ *
+ * Call this function before calling another ring_buffer_lock_reserve() and
+ * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_start(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* Enabled by ring_buffer_nest_end() */
+	preempt_disable_notrace();
+	cpu = raw_smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+	/* This is the shift value for the above recusive locking */
+	cpu_buffer->nest += NESTED_BITS;
+}
+
+/**
+ * ring_buffer_nest_end - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * Must be called after ring_buffer_nest_start() and after the
+ * ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_end(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* disabled by ring_buffer_nest_start() */
+	cpu = raw_smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+	/* This is the shift value for the above recusive locking */
+	cpu_buffer->nest -= NESTED_BITS;
+	preempt_enable_notrace();
 }
 
 /**
@@ -2637,7 +2764,8 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
 		  sched_clock_stable() ? "" :
 		  "If you just came from a suspend/resume,\n"
 		  "please switch to the trace global clock:\n"
-		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
+		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n"
+		  "or add trace_clock=global to the kernel command line\n");
 	info->add_timestamp = 1;
 }
 
@@ -2669,7 +2797,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	 * If this is the first commit on the page, then it has the same
 	 * timestamp as the page itself.
 	 */
-	if (!tail)
+	if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
 		info->delta = 0;
 
 	/* See if we shot pass the end of this buffer page */
@@ -2746,8 +2874,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	/* make sure this diff is calculated here */
 	barrier();
 
-	/* Did the write stamp get updated already? */
-	if (likely(info.ts >= cpu_buffer->write_stamp)) {
+	if (ring_buffer_time_stamp_abs(buffer)) {
+		info.delta = info.ts;
+		rb_handle_timestamp(cpu_buffer, &info);
+	} else /* Did the write stamp get updated already? */
+		if (likely(info.ts >= cpu_buffer->write_stamp)) {
 		info.delta = diff;
 		if (unlikely(test_time_stamp(info.delta)))
 			rb_handle_timestamp(cpu_buffer, &info);
@@ -3429,14 +3560,13 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		return;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
-		delta = event->array[0];
-		delta <<= TS_SHIFT;
-		delta += event->time_delta;
+		delta = ring_buffer_event_time_stamp(event);
 		cpu_buffer->read_stamp += delta;
 		return;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		delta = ring_buffer_event_time_stamp(event);
+		cpu_buffer->read_stamp = delta;
 		return;
 
 	case RINGBUF_TYPE_DATA:
@@ -3460,14 +3590,13 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 		return;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
-		delta = event->array[0];
-		delta <<= TS_SHIFT;
-		delta += event->time_delta;
+		delta = ring_buffer_event_time_stamp(event);
 		iter->read_stamp += delta;
 		return;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		delta = ring_buffer_event_time_stamp(event);
+		iter->read_stamp = delta;
 		return;
 
 	case RINGBUF_TYPE_DATA:
@@ -3691,6 +3820,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
 	struct buffer_page *reader;
 	int nr_loops = 0;
 
+	if (ts)
+		*ts = 0;
  again:
 	/*
 	 * We repeat when a time extend is encountered.
@@ -3727,12 +3858,17 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
 		goto again;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		if (ts) {
+			*ts = ring_buffer_event_time_stamp(event);
+			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+							 cpu_buffer->cpu, ts);
+		}
+		/* Internal data, OK to advance */
 		rb_advance_reader(cpu_buffer);
 		goto again;
 
 	case RINGBUF_TYPE_DATA:
-		if (ts) {
+		if (ts && !(*ts)) {
 			*ts = cpu_buffer->read_stamp + event->time_delta;
 			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
 							 cpu_buffer->cpu, ts);
@@ -3757,6 +3893,9 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	struct ring_buffer_event *event;
 	int nr_loops = 0;
 
+	if (ts)
+		*ts = 0;
+
 	cpu_buffer = iter->cpu_buffer;
 	buffer = cpu_buffer->buffer;
 
@@ -3809,12 +3948,17 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 		goto again;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		if (ts) {
+			*ts = ring_buffer_event_time_stamp(event);
+			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+							 cpu_buffer->cpu, ts);
+		}
+		/* Internal data, OK to advance */
 		rb_advance_iter(iter);
 		goto again;
 
 	case RINGBUF_TYPE_DATA:
-		if (ts) {
+		if (ts && !(*ts)) {
 			*ts = iter->read_stamp + event->time_delta;
 			ring_buffer_normalize_time_stamp(buffer,
 							 cpu_buffer->cpu, ts);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5071931eb943..dfbcf9ee1447 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -41,6 +41,7 @@
 #include <linux/nmi.h>
 #include <linux/fs.h>
 #include <linux/trace.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/rt.h>
 
 #include "trace.h"
@@ -1168,6 +1169,14 @@ static struct {
 	ARCH_TRACE_CLOCKS
 };
 
+bool trace_clock_in_ns(struct trace_array *tr)
+{
+	if (trace_clocks[tr->clock_id].in_ns)
+		return true;
+
+	return false;
+}
+
 /*
  * trace_parser_get_init - gets the buffer for trace parser
  */
@@ -2269,7 +2278,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
 
 	*current_rb = trace_file->tr->trace_buffer.buffer;
 
-	if ((trace_file->flags &
+	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
 	    (entry = this_cpu_read(trace_buffered_event))) {
 		/* Try to use the per cpu buffer first */
@@ -4515,6 +4524,9 @@ static const char readme_msg[] =
 #ifdef CONFIG_X86_64
 	"     x86-tsc:   TSC cycle counter\n"
 #endif
+	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
+	"       delta:   Delta difference against a buffer-wide timestamp\n"
+	"    absolute:   Absolute (standalone) timestamp\n"
 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
@@ -4691,8 +4703,9 @@ static const char readme_msg[] =
 	"\t            .sym        display an address as a symbol\n"
 	"\t            .sym-offset display an address as a symbol and offset\n"
 	"\t            .execname   display a common_pid as a program name\n"
-	"\t            .syscall    display a syscall id as a syscall name\n\n"
-	"\t            .log2       display log2 value rather than raw number\n\n"
+	"\t            .syscall    display a syscall id as a syscall name\n"
+	"\t            .log2       display log2 value rather than raw number\n"
+	"\t            .usecs      display a common_timestamp in microseconds\n\n"
 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
 	"\t    trigger or to start a hist trigger but not log any events\n"
 	"\t    until told to do so.  'continue' can be used to start or\n"
@@ -6202,7 +6215,7 @@ static int tracing_clock_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
+int tracing_set_clock(struct trace_array *tr, const char *clockstr)
 {
 	int i;
 
@@ -6282,6 +6295,71 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
+{
+	struct trace_array *tr = m->private;
+
+	mutex_lock(&trace_types_lock);
+
+	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
+		seq_puts(m, "delta [absolute]\n");
+	else
+		seq_puts(m, "[delta] absolute\n");
+
+	mutex_unlock(&trace_types_lock);
+
+	return 0;
+}
+
+static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
+{
+	struct trace_array *tr = inode->i_private;
+	int ret;
+
+	if (tracing_disabled)
+		return -ENODEV;
+
+	if (trace_array_get(tr))
+		return -ENODEV;
+
+	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
+	if (ret < 0)
+		trace_array_put(tr);
+
+	return ret;
+}
+
+int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+{
+	int ret = 0;
+
+	mutex_lock(&trace_types_lock);
+
+	if (abs && tr->time_stamp_abs_ref++)
+		goto out;
+
+	if (!abs) {
+		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (--tr->time_stamp_abs_ref)
+			goto out;
+	}
+
+	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+	if (tr->max_buffer.buffer)
+		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
+#endif
+ out:
+	mutex_unlock(&trace_types_lock);
+
+	return ret;
+}
+
 struct ftrace_buffer_info {
 	struct trace_iterator	iter;
 	void			*spare;
@@ -6529,6 +6607,13 @@ static const struct file_operations trace_clock_fops = {
 	.write		= tracing_clock_write,
 };
 
+static const struct file_operations trace_time_stamp_mode_fops = {
+	.open		= tracing_time_stamp_mode_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= tracing_single_release_tr,
+};
+
 #ifdef CONFIG_TRACER_SNAPSHOT
 static const struct file_operations snapshot_fops = {
 	.open		= tracing_snapshot_open,
@@ -7699,6 +7784,7 @@ static int instance_mkdir(const char *name)
 
 	INIT_LIST_HEAD(&tr->systems);
 	INIT_LIST_HEAD(&tr->events);
+	INIT_LIST_HEAD(&tr->hist_vars);
 
 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
 		goto out_free_tr;
@@ -7851,6 +7937,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 	trace_create_file("tracing_on", 0644, d_tracer,
 			  tr, &rb_simple_fops);
 
+	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
+			  &trace_time_stamp_mode_fops);
+
 	create_trace_options_dir(tr);
 
 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
@@ -8446,6 +8535,7 @@ __init static int tracer_alloc_buffers(void)
 
 	INIT_LIST_HEAD(&global_trace.systems);
 	INIT_LIST_HEAD(&global_trace.events);
+	INIT_LIST_HEAD(&global_trace.hist_vars);
 	list_add(&global_trace.list, &ftrace_trace_arrays);
 
 	apply_trace_boot_options();
@@ -8507,3 +8597,21 @@ __init static int clear_boot_tracer(void)
 
 fs_initcall(tracer_init_tracefs);
 late_initcall_sync(clear_boot_tracer);
+
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__init static int tracing_set_default_clock(void)
+{
+	/* sched_clock_stable() is determined in late_initcall */
+	if (!trace_boot_clock && !sched_clock_stable()) {
+		printk(KERN_WARNING
+		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
+		       "If you want to keep using the local clock, then add:\n"
+		       "  \"trace_clock=local\"\n"
+		       "on the kernel command line\n");
+		tracing_set_clock(&global_trace, "global");
+	}
+
+	return 0;
+}
+late_initcall_sync(tracing_set_default_clock);
+#endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2a6d0325a761..6fb46a06c9dc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -273,6 +273,8 @@ struct trace_array {
 	/* function tracing enabled */
 	int			function_enabled;
 #endif
+	int			time_stamp_abs_ref;
+	struct list_head	hist_vars;
 };
 
 enum {
@@ -286,6 +288,11 @@ extern struct mutex trace_types_lock;
 extern int trace_array_get(struct trace_array *tr);
 extern void trace_array_put(struct trace_array *tr);
 
+extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
+
+extern bool trace_clock_in_ns(struct trace_array *tr);
+
 /*
  * The global tracer (top) should be the first trace array added,
  * but we check the flag anyway.
@@ -1209,12 +1216,11 @@ struct ftrace_event_field {
 	int			is_signed;
 };
 
+struct prog_entry;
+
 struct event_filter {
-	int			n_preds;	/* Number assigned */
-	int			a_preds;	/* allocated */
-	struct filter_pred __rcu	*preds;
-	struct filter_pred __rcu	*root;
-	char				*filter_string;
+	struct prog_entry __rcu	*prog;
+	char			*filter_string;
 };
 
 struct event_subsystem {
@@ -1291,7 +1297,7 @@ __event_trigger_test_discard(struct trace_event_file *file,
 	unsigned long eflags = file->flags;
 
 	if (eflags & EVENT_FILE_FL_TRIGGER_COND)
-		*tt = event_triggers_call(file, entry);
+		*tt = event_triggers_call(file, entry, event);
 
 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
@@ -1328,7 +1334,7 @@ event_trigger_unlock_commit(struct trace_event_file *file,
 		trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
 
 	if (tt)
-		event_triggers_post_call(file, tt, entry);
+		event_triggers_post_call(file, tt, entry, event);
 }
 
 /**
@@ -1361,7 +1367,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
 						irq_flags, pc, regs);
 
 	if (tt)
-		event_triggers_post_call(file, tt, entry);
+		event_triggers_post_call(file, tt, entry, event);
 }
 
 #define FILTER_PRED_INVALID	((unsigned short)-1)
@@ -1406,12 +1412,8 @@ struct filter_pred {
 	unsigned short		*ops;
 	struct ftrace_event_field *field;
 	int 			offset;
-	int 			not;
+	int			not;
 	int 			op;
-	unsigned short		index;
-	unsigned short		parent;
-	unsigned short		left;
-	unsigned short		right;
 };
 
 static inline bool is_string_field(struct ftrace_event_field *field)
@@ -1543,6 +1545,8 @@ extern void pause_named_trigger(struct event_trigger_data *data);
 extern void unpause_named_trigger(struct event_trigger_data *data);
 extern void set_named_trigger_data(struct event_trigger_data *data,
 				   struct event_trigger_data *named_data);
+extern struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data);
 extern int register_event_command(struct event_command *cmd);
 extern int unregister_event_command(struct event_command *cmd);
 extern int register_trigger_hist_enable_disable_cmds(void);
@@ -1586,7 +1590,8 @@ extern int register_trigger_hist_enable_disable_cmds(void);
  */
 struct event_trigger_ops {
 	void			(*func)(struct event_trigger_data *data,
-					void *rec);
+					void *rec,
+					struct ring_buffer_event *rbe);
 	int			(*init)(struct event_trigger_ops *ops,
 					struct event_trigger_data *data);
 	void			(*free)(struct event_trigger_ops *ops,
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 5fdc779f411d..d8a188e0418a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -96,7 +96,7 @@ u64 notrace trace_clock_global(void)
 	int this_cpu;
 	u64 now;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 
 	this_cpu = raw_smp_processor_id();
 	now = sched_clock_cpu(this_cpu);
@@ -122,7 +122,7 @@ u64 notrace trace_clock_global(void)
 	arch_spin_unlock(&trace_clock_struct.lock);
 
  out:
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	return now;
 }
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 2c416509b834..c79193e598f5 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -252,6 +252,8 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
 		ret = strncpy_from_user(
 			func, u64_to_user_ptr(p_event->attr.kprobe_func),
 			KSYM_NAME_LEN);
+		if (ret == KSYM_NAME_LEN)
+			ret = -E2BIG;
 		if (ret < 0)
 			goto out;
 
@@ -300,6 +302,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
 		return -ENOMEM;
 	ret = strncpy_from_user(
 		path, u64_to_user_ptr(p_event->attr.uprobe_path), PATH_MAX);
+	if (ret == PATH_MAX)
+		return -E2BIG;
 	if (ret < 0)
 		goto out;
 	if (path[0] == '\0') {
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index a764aec3c9a1..9b4716bb8bb0 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -33,163 +33,595 @@
 	"# Only events with the given fields will be affected.\n"	\
 	"# If no events are modified, an error message will be displayed here"
 
-enum filter_op_ids
-{
-	OP_OR,
-	OP_AND,
-	OP_GLOB,
-	OP_NE,
-	OP_EQ,
-	OP_LT,
-	OP_LE,
-	OP_GT,
-	OP_GE,
-	OP_BAND,
-	OP_NOT,
-	OP_NONE,
-	OP_OPEN_PAREN,
-};
+/* Due to token parsing '<=' must be before '<' and '>=' must be before '>' */
+#define OPS					\
+	C( OP_GLOB,	"~"  ),			\
+	C( OP_NE,	"!=" ),			\
+	C( OP_EQ,	"==" ),			\
+	C( OP_LE,	"<=" ),			\
+	C( OP_LT,	"<"  ),			\
+	C( OP_GE,	">=" ),			\
+	C( OP_GT,	">"  ),			\
+	C( OP_BAND,	"&"  ),			\
+	C( OP_MAX,	NULL )
 
-struct filter_op {
-	int id;
-	char *string;
-	int precedence;
-};
+#undef C
+#define C(a, b)	a
 
-/* Order must be the same as enum filter_op_ids above */
-static struct filter_op filter_ops[] = {
-	{ OP_OR,	"||",		1 },
-	{ OP_AND,	"&&",		2 },
-	{ OP_GLOB,	"~",		4 },
-	{ OP_NE,	"!=",		4 },
-	{ OP_EQ,	"==",		4 },
-	{ OP_LT,	"<",		5 },
-	{ OP_LE,	"<=",		5 },
-	{ OP_GT,	">",		5 },
-	{ OP_GE,	">=",		5 },
-	{ OP_BAND,	"&",		6 },
-	{ OP_NOT,	"!",		6 },
-	{ OP_NONE,	"OP_NONE",	0 },
-	{ OP_OPEN_PAREN, "(",		0 },
-};
+enum filter_op_ids { OPS };
 
-enum {
-	FILT_ERR_NONE,
-	FILT_ERR_INVALID_OP,
-	FILT_ERR_UNBALANCED_PAREN,
-	FILT_ERR_TOO_MANY_OPERANDS,
-	FILT_ERR_OPERAND_TOO_LONG,
-	FILT_ERR_FIELD_NOT_FOUND,
-	FILT_ERR_ILLEGAL_FIELD_OP,
-	FILT_ERR_ILLEGAL_INTVAL,
-	FILT_ERR_BAD_SUBSYS_FILTER,
-	FILT_ERR_TOO_MANY_PREDS,
-	FILT_ERR_MISSING_FIELD,
-	FILT_ERR_INVALID_FILTER,
-	FILT_ERR_IP_FIELD_ONLY,
-	FILT_ERR_ILLEGAL_NOT_OP,
-};
+#undef C
+#define C(a, b)	b
 
-static char *err_text[] = {
-	"No error",
-	"Invalid operator",
-	"Unbalanced parens",
-	"Too many operands",
-	"Operand too long",
-	"Field not found",
-	"Illegal operation for field type",
-	"Illegal integer value",
-	"Couldn't find or set field in one of a subsystem's events",
-	"Too many terms in predicate expression",
-	"Missing field name and/or value",
-	"Meaningless filter expression",
-	"Only 'ip' field is supported for function trace",
-	"Illegal use of '!'",
-};
+static const char * ops[] = { OPS };
 
-struct opstack_op {
-	enum filter_op_ids op;
-	struct list_head list;
-};
+/*
+ * pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND
+ * pred_funcs_##type below must match the order of them above.
+ */
+#define PRED_FUNC_START			OP_LE
+#define PRED_FUNC_MAX			(OP_BAND - PRED_FUNC_START)
+
+#define ERRORS								\
+	C(NONE,			"No error"),				\
+	C(INVALID_OP,		"Invalid operator"),			\
+	C(TOO_MANY_OPEN,	"Too many '('"),			\
+	C(TOO_MANY_CLOSE,	"Too few '('"),				\
+	C(MISSING_QUOTE,	"Missing matching quote"),		\
+	C(OPERAND_TOO_LONG,	"Operand too long"),			\
+	C(EXPECT_STRING,	"Expecting string field"),		\
+	C(EXPECT_DIGIT,		"Expecting numeric field"),		\
+	C(ILLEGAL_FIELD_OP,	"Illegal operation for field type"),	\
+	C(FIELD_NOT_FOUND,	"Field not found"),			\
+	C(ILLEGAL_INTVAL,	"Illegal integer value"),		\
+	C(BAD_SUBSYS_FILTER,	"Couldn't find or set field in one of a subsystem's events"), \
+	C(TOO_MANY_PREDS,	"Too many terms in predicate expression"), \
+	C(INVALID_FILTER,	"Meaningless filter expression"),	\
+	C(IP_FIELD_ONLY,	"Only 'ip' field is supported for function trace"), \
+	C(INVALID_VALUE,	"Invalid value (did you forget quotes)?"),
+
+#undef C
+#define C(a, b)		FILT_ERR_##a
+
+enum { ERRORS };
+
+#undef C
+#define C(a, b)		b
+
+static char *err_text[] = { ERRORS };
+
+/* Called after a '!' character but "!=" and "!~" are not "not"s */
+static bool is_not(const char *str)
+{
+	switch (str[1]) {
+	case '=':
+	case '~':
+		return false;
+	}
+	return true;
+}
 
-struct postfix_elt {
-	enum filter_op_ids op;
-	char *operand;
-	struct list_head list;
+/**
+ * prog_entry - a singe entry in the filter program
+ * @target:	     Index to jump to on a branch (actually one minus the index)
+ * @when_to_branch:  The value of the result of the predicate to do a branch
+ * @pred:	     The predicate to execute.
+ */
+struct prog_entry {
+	int			target;
+	int			when_to_branch;
+	struct filter_pred	*pred;
 };
 
-struct filter_parse_state {
-	struct filter_op *ops;
-	struct list_head opstack;
-	struct list_head postfix;
+/**
+ * update_preds- assign a program entry a label target
+ * @prog: The program array
+ * @N: The index of the current entry in @prog
+ * @when_to_branch: What to assign a program entry for its branch condition
+ *
+ * The program entry at @N has a target that points to the index of a program
+ * entry that can have its target and when_to_branch fields updated.
+ * Update the current program entry denoted by index @N target field to be
+ * that of the updated entry. This will denote the entry to update if
+ * we are processing an "||" after an "&&"
+ */
+static void update_preds(struct prog_entry *prog, int N, int invert)
+{
+	int t, s;
+
+	t = prog[N].target;
+	s = prog[t].target;
+	prog[t].when_to_branch = invert;
+	prog[t].target = N;
+	prog[N].target = s;
+}
+
+struct filter_parse_error {
 	int lasterr;
 	int lasterr_pos;
-
-	struct {
-		char *string;
-		unsigned int cnt;
-		unsigned int tail;
-	} infix;
-
-	struct {
-		char string[MAX_FILTER_STR_VAL];
-		int pos;
-		unsigned int tail;
-	} operand;
 };
 
-struct pred_stack {
-	struct filter_pred	**preds;
-	int			index;
+static void parse_error(struct filter_parse_error *pe, int err, int pos)
+{
+	pe->lasterr = err;
+	pe->lasterr_pos = pos;
+}
+
+typedef int (*parse_pred_fn)(const char *str, void *data, int pos,
+			     struct filter_parse_error *pe,
+			     struct filter_pred **pred);
+
+enum {
+	INVERT		= 1,
+	PROCESS_AND	= 2,
+	PROCESS_OR	= 4,
 };
 
-/* If not of not match is equal to not of not, then it is a match */
+/*
+ * Without going into a formal proof, this explains the method that is used in
+ * parsing the logical expressions.
+ *
+ * For example, if we have: "a && !(!b || (c && g)) || d || e && !f"
+ * The first pass will convert it into the following program:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto l4;
+ * n4: r=g; r=!r; l4: if (r) goto l5;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto l7;
+ * n7: r=f; r=!r; l7: if (!r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * To do this, we use a data structure to represent each of the above
+ * predicate and conditions that has:
+ *
+ *  predicate, when_to_branch, invert, target
+ *
+ * The "predicate" will hold the function to determine the result "r".
+ * The "when_to_branch" denotes what "r" should be if a branch is to be taken
+ * "&&" would contain "!r" or (0) and "||" would contain "r" or (1).
+ * The "invert" holds whether the value should be reversed before testing.
+ * The "target" contains the label "l#" to jump to.
+ *
+ * A stack is created to hold values when parentheses are used.
+ *
+ * To simplify the logic, the labels will start at 0 and not 1.
+ *
+ * The possible invert values are 1 and 0. The number of "!"s that are in scope
+ * before the predicate determines the invert value, if the number is odd then
+ * the invert value is 1 and 0 otherwise. This means the invert value only
+ * needs to be toggled when a new "!" is introduced compared to what is stored
+ * on the stack, where parentheses were used.
+ *
+ * The top of the stack and "invert" are initialized to zero.
+ *
+ * ** FIRST PASS **
+ *
+ * #1 A loop through all the tokens is done:
+ *
+ * #2 If the token is an "(", the stack is push, and the current stack value
+ *    gets the current invert value, and the loop continues to the next token.
+ *    The top of the stack saves the "invert" value to keep track of what
+ *    the current inversion is. As "!(a && !b || c)" would require all
+ *    predicates being affected separately by the "!" before the parentheses.
+ *    And that would end up being equivalent to "(!a || b) && !c"
+ *
+ * #3 If the token is an "!", the current "invert" value gets inverted, and
+ *    the loop continues. Note, if the next token is a predicate, then
+ *    this "invert" value is only valid for the current program entry,
+ *    and does not affect other predicates later on.
+ *
+ * The only other acceptable token is the predicate string.
+ *
+ * #4 A new entry into the program is added saving: the predicate and the
+ *    current value of "invert". The target is currently assigned to the
+ *    previous program index (this will not be its final value).
+ *
+ * #5 We now enter another loop and look at the next token. The only valid
+ *    tokens are ")", "&&", "||" or end of the input string "\0".
+ *
+ * #6 The invert variable is reset to the current value saved on the top of
+ *    the stack.
+ *
+ * #7 The top of the stack holds not only the current invert value, but also
+ *    if a "&&" or "||" needs to be processed. Note, the "&&" takes higher
+ *    precedence than "||". That is "a && b || c && d" is equivalent to
+ *    "(a && b) || (c && d)". Thus the first thing to do is to see if "&&" needs
+ *    to be processed. This is the case if an "&&" was the last token. If it was
+ *    then we call update_preds(). This takes the program, the current index in
+ *    the program, and the current value of "invert".  More will be described
+ *    below about this function.
+ *
+ * #8 If the next token is "&&" then we set a flag in the top of the stack
+ *    that denotes that "&&" needs to be processed, break out of this loop
+ *    and continue with the outer loop.
+ *
+ * #9 Otherwise, if a "||" needs to be processed then update_preds() is called.
+ *    This is called with the program, the current index in the program, but
+ *    this time with an inverted value of "invert" (that is !invert). This is
+ *    because the value taken will become the "when_to_branch" value of the
+ *    program.
+ *    Note, this is called when the next token is not an "&&". As stated before,
+ *    "&&" takes higher precedence, and "||" should not be processed yet if the
+ *    next logical operation is "&&".
+ *
+ * #10 If the next token is "||" then we set a flag in the top of the stack
+ *     that denotes that "||" needs to be processed, break out of this loop
+ *     and continue with the outer loop.
+ *
+ * #11 If this is the end of the input string "\0" then we break out of both
+ *     loops.
+ *
+ * #12 Otherwise, the next token is ")", where we pop the stack and continue
+ *     this inner loop.
+ *
+ * Now to discuss the update_pred() function, as that is key to the setting up
+ * of the program. Remember the "target" of the program is initialized to the
+ * previous index and not the "l" label. The target holds the index into the
+ * program that gets affected by the operand. Thus if we have something like
+ *  "a || b && c", when we process "a" the target will be "-1" (undefined).
+ * When we process "b", its target is "0", which is the index of "a", as that's
+ * the predicate that is affected by "||". But because the next token after "b"
+ * is "&&" we don't call update_preds(). Instead continue to "c". As the
+ * next token after "c" is not "&&" but the end of input, we first process the
+ * "&&" by calling update_preds() for the "&&" then we process the "||" by
+ * callin updates_preds() with the values for processing "||".
+ *
+ * What does that mean? What update_preds() does is to first save the "target"
+ * of the program entry indexed by the current program entry's "target"
+ * (remember the "target" is initialized to previous program entry), and then
+ * sets that "target" to the current index which represents the label "l#".
+ * That entry's "when_to_branch" is set to the value passed in (the "invert"
+ * or "!invert"). Then it sets the current program entry's target to the saved
+ * "target" value (the old value of the program that had its "target" updated
+ * to the label).
+ *
+ * Looking back at "a || b && c", we have the following steps:
+ *  "a"  - prog[0] = { "a", X, -1 } // pred, when_to_branch, target
+ *  "||" - flag that we need to process "||"; continue outer loop
+ *  "b"  - prog[1] = { "b", X, 0 }
+ *  "&&" - flag that we need to process "&&"; continue outer loop
+ * (Notice we did not process "||")
+ *  "c"  - prog[2] = { "c", X, 1 }
+ *  update_preds(prog, 2, 0); // invert = 0 as we are processing "&&"
+ *    t = prog[2].target; // t = 1
+ *    s = prog[t].target; // s = 0
+ *    prog[t].target = 2; // Set target to "l2"
+ *    prog[t].when_to_branch = 0;
+ *    prog[2].target = s;
+ * update_preds(prog, 2, 1); // invert = 1 as we are now processing "||"
+ *    t = prog[2].target; // t = 0
+ *    s = prog[t].target; // s = -1
+ *    prog[t].target = 2; // Set target to "l2"
+ *    prog[t].when_to_branch = 1;
+ *    prog[2].target = s;
+ *
+ * #13 Which brings us to the final step of the first pass, which is to set
+ *     the last program entry's when_to_branch and target, which will be
+ *     when_to_branch = 0; target = N; ( the label after the program entry after
+ *     the last program entry processed above).
+ *
+ * If we denote "TRUE" to be the entry after the last program entry processed,
+ * and "FALSE" the program entry after that, we are now done with the first
+ * pass.
+ *
+ * Making the above "a || b && c" have a progam of:
+ *  prog[0] = { "a", 1, 2 }
+ *  prog[1] = { "b", 0, 2 }
+ *  prog[2] = { "c", 0, 3 }
+ *
+ * Which translates into:
+ * n0: r = a; l0: if (r) goto l2;
+ * n1: r = b; l1: if (!r) goto l2;
+ * n2: r = c; l2: if (!r) goto l3;  // Which is the same as "goto F;"
+ * T: return TRUE; l3:
+ * F: return FALSE
+ *
+ * Although, after the first pass, the program is correct, it is
+ * inefficient. The simple sample of "a || b && c" could be easily been
+ * converted into:
+ * n0: r = a; if (r) goto T
+ * n1: r = b; if (!r) goto F
+ * n2: r = c; if (!r) goto F
+ * T: return TRUE;
+ * F: return FALSE;
+ *
+ * The First Pass is over the input string. The next too passes are over
+ * the program itself.
+ *
+ * ** SECOND PASS **
+ *
+ * Which brings us to the second pass. If a jump to a label has the
+ * same condition as that label, it can instead jump to its target.
+ * The original example of "a && !(!b || (c && g)) || d || e && !f"
+ * where the first pass gives us:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto l4;
+ * n4: r=g; r=!r; l4: if (r) goto l5;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto l7;
+ * n7: r=f; r=!r; l7: if (!r) goto F:
+ * T: return TRUE;
+ * F: return FALSE
+ *
+ * We can see that "l3: if (r) goto l4;" and at l4, we have "if (r) goto l5;".
+ * And "l5: if (r) goto T", we could optimize this by converting l3 and l4
+ * to go directly to T. To accomplish this, we start from the last
+ * entry in the program and work our way back. If the target of the entry
+ * has the same "when_to_branch" then we could use that entry's target.
+ * Doing this, the above would end up as:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto T;
+ * n4: r=g; r=!r; l4: if (r) goto T;
+ * n5: r=d;       l5: if (r) goto T;
+ * n6: r=e;       l6: if (!r) goto F;
+ * n7: r=f; r=!r; l7: if (!r) goto F;
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * In that same pass, if the "when_to_branch" doesn't match, we can simply
+ * go to the program entry after the label. That is, "l2: if (!r) goto l4;"
+ * where "l4: if (r) goto T;", then we can convert l2 to be:
+ * "l2: if (!r) goto n5;".
+ *
+ * This will have the second pass give us:
+ * n1: r=a;       l1: if (!r) goto n5;
+ * n2: r=b;       l2: if (!r) goto n5;
+ * n3: r=c; r=!r; l3: if (r) goto T;
+ * n4: r=g; r=!r; l4: if (r) goto T;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto F;
+ * n7: r=f; r=!r; l7: if (!r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * Notice, all the "l#" labels are no longer used, and they can now
+ * be discarded.
+ *
+ * ** THIRD PASS **
+ *
+ * For the third pass we deal with the inverts. As they simply just
+ * make the "when_to_branch" get inverted, a simple loop over the
+ * program to that does: "when_to_branch ^= invert;" will do the
+ * job, leaving us with:
+ * n1: r=a; if (!r) goto n5;
+ * n2: r=b; if (!r) goto n5;
+ * n3: r=c: if (!r) goto T;
+ * n4: r=g; if (!r) goto T;
+ * n5: r=d; if (r) goto T
+ * n6: r=e; if (!r) goto F;
+ * n7: r=f; if (r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * As "r = a; if (!r) goto n5;" is obviously the same as
+ * "if (!a) goto n5;" without doing anything we can interperate the
+ * program as:
+ * n1: if (!a) goto n5;
+ * n2: if (!b) goto n5;
+ * n3: if (!c) goto T;
+ * n4: if (!g) goto T;
+ * n5: if (d) goto T
+ * n6: if (!e) goto F;
+ * n7: if (f) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * Since the inverts are discarded at the end, there's no reason to store
+ * them in the program array (and waste memory). A separate array to hold
+ * the inverts is used and freed at the end.
+ */
+static struct prog_entry *
+predicate_parse(const char *str, int nr_parens, int nr_preds,
+		parse_pred_fn parse_pred, void *data,
+		struct filter_parse_error *pe)
+{
+	struct prog_entry *prog_stack;
+	struct prog_entry *prog;
+	const char *ptr = str;
+	char *inverts = NULL;
+	int *op_stack;
+	int *top;
+	int invert = 0;
+	int ret = -ENOMEM;
+	int len;
+	int N = 0;
+	int i;
+
+	nr_preds += 2; /* For TRUE and FALSE */
+
+	op_stack = kmalloc(sizeof(*op_stack) * nr_parens, GFP_KERNEL);
+	if (!op_stack)
+		return ERR_PTR(-ENOMEM);
+	prog_stack = kmalloc(sizeof(*prog_stack) * nr_preds, GFP_KERNEL);
+	if (!prog_stack) {
+		parse_error(pe, -ENOMEM, 0);
+		goto out_free;
+	}
+	inverts = kmalloc(sizeof(*inverts) * nr_preds, GFP_KERNEL);
+	if (!inverts) {
+		parse_error(pe, -ENOMEM, 0);
+		goto out_free;
+	}
+
+	top = op_stack;
+	prog = prog_stack;
+	*top = 0;
+
+	/* First pass */
+	while (*ptr) {						/* #1 */
+		const char *next = ptr++;
+
+		if (isspace(*next))
+			continue;
+
+		switch (*next) {
+		case '(':					/* #2 */
+			if (top - op_stack > nr_parens)
+				return ERR_PTR(-EINVAL);
+			*(++top) = invert;
+			continue;
+		case '!':					/* #3 */
+			if (!is_not(next))
+				break;
+			invert = !invert;
+			continue;
+		}
+
+		if (N >= nr_preds) {
+			parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str);
+			goto out_free;
+		}
+
+		inverts[N] = invert;				/* #4 */
+		prog[N].target = N-1;
+
+		len = parse_pred(next, data, ptr - str, pe, &prog[N].pred);
+		if (len < 0) {
+			ret = len;
+			goto out_free;
+		}
+		ptr = next + len;
+
+		N++;
+
+		ret = -1;
+		while (1) {					/* #5 */
+			next = ptr++;
+			if (isspace(*next))
+				continue;
+
+			switch (*next) {
+			case ')':
+			case '\0':
+				break;
+			case '&':
+			case '|':
+				if (next[1] == next[0]) {
+					ptr++;
+					break;
+				}
+			default:
+				parse_error(pe, FILT_ERR_TOO_MANY_PREDS,
+					    next - str);
+				goto out_free;
+			}
+
+			invert = *top & INVERT;
+
+			if (*top & PROCESS_AND) {		/* #7 */
+				update_preds(prog, N - 1, invert);
+				*top &= ~PROCESS_AND;
+			}
+			if (*next == '&') {			/* #8 */
+				*top |= PROCESS_AND;
+				break;
+			}
+			if (*top & PROCESS_OR) {		/* #9 */
+				update_preds(prog, N - 1, !invert);
+				*top &= ~PROCESS_OR;
+			}
+			if (*next == '|') {			/* #10 */
+				*top |= PROCESS_OR;
+				break;
+			}
+			if (!*next)				/* #11 */
+				goto out;
+
+			if (top == op_stack) {
+				ret = -1;
+				/* Too few '(' */
+				parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, ptr - str);
+				goto out_free;
+			}
+			top--;					/* #12 */
+		}
+	}
+ out:
+	if (top != op_stack) {
+		/* Too many '(' */
+		parse_error(pe, FILT_ERR_TOO_MANY_OPEN, ptr - str);
+		goto out_free;
+	}
+
+	prog[N].pred = NULL;					/* #13 */
+	prog[N].target = 1;		/* TRUE */
+	prog[N+1].pred = NULL;
+	prog[N+1].target = 0;		/* FALSE */
+	prog[N-1].target = N;
+	prog[N-1].when_to_branch = false;
+
+	/* Second Pass */
+	for (i = N-1 ; i--; ) {
+		int target = prog[i].target;
+		if (prog[i].when_to_branch == prog[target].when_to_branch)
+			prog[i].target = prog[target].target;
+	}
+
+	/* Third Pass */
+	for (i = 0; i < N; i++) {
+		invert = inverts[i] ^ prog[i].when_to_branch;
+		prog[i].when_to_branch = invert;
+		/* Make sure the program always moves forward */
+		if (WARN_ON(prog[i].target <= i)) {
+			ret = -EINVAL;
+			goto out_free;
+		}
+	}
+
+	return prog;
+out_free:
+	kfree(op_stack);
+	kfree(prog_stack);
+	kfree(inverts);
+	return ERR_PTR(ret);
+}
+
 #define DEFINE_COMPARISON_PRED(type)					\
 static int filter_pred_LT_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr < val);					\
-	return !!match == !pred->not;					\
+	return *addr < val;						\
 }									\
 static int filter_pred_LE_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr <= val);					\
-	return !!match == !pred->not;					\
+	return *addr <= val;						\
 }									\
 static int filter_pred_GT_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr > val);					\
-	return !!match == !pred->not;					\
+	return *addr > val;					\
 }									\
 static int filter_pred_GE_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr >= val);					\
-	return !!match == !pred->not;					\
+	return *addr >= val;						\
 }									\
 static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = !!(*addr & val);					\
-	return match == !pred->not;					\
+	return !!(*addr & val);						\
 }									\
 static const filter_pred_fn_t pred_funcs_##type[] = {			\
-	filter_pred_LT_##type,						\
 	filter_pred_LE_##type,						\
-	filter_pred_GT_##type,						\
+	filter_pred_LT_##type,						\
 	filter_pred_GE_##type,						\
+	filter_pred_GT_##type,						\
 	filter_pred_BAND_##type,					\
 };
 
-#define PRED_FUNC_START			OP_LT
-
 #define DEFINE_EQUALITY_PRED(size)					\
 static int filter_pred_##size(struct filter_pred *pred, void *event)	\
 {									\
@@ -272,44 +704,36 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
 static int filter_pred_cpu(struct filter_pred *pred, void *event)
 {
 	int cpu, cmp;
-	int match = 0;
 
 	cpu = raw_smp_processor_id();
 	cmp = pred->val;
 
 	switch (pred->op) {
 	case OP_EQ:
-		match = cpu == cmp;
-		break;
+		return cpu == cmp;
+	case OP_NE:
+		return cpu != cmp;
 	case OP_LT:
-		match = cpu < cmp;
-		break;
+		return cpu < cmp;
 	case OP_LE:
-		match = cpu <= cmp;
-		break;
+		return cpu <= cmp;
 	case OP_GT:
-		match = cpu > cmp;
-		break;
+		return cpu > cmp;
 	case OP_GE:
-		match = cpu >= cmp;
-		break;
+		return cpu >= cmp;
 	default:
-		break;
+		return 0;
 	}
-
-	return !!match == !pred->not;
 }
 
 /* Filter predicate for COMM. */
 static int filter_pred_comm(struct filter_pred *pred, void *event)
 {
-	int cmp, match;
+	int cmp;
 
 	cmp = pred->regex.match(current->comm, &pred->regex,
-				pred->regex.field_len);
-	match = cmp ^ pred->not;
-
-	return match;
+				TASK_COMM_LEN);
+	return cmp ^ pred->not;
 }
 
 static int filter_pred_none(struct filter_pred *pred, void *event)
@@ -366,6 +790,7 @@ static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused)
 		return 1;
 	return 0;
 }
+
 /**
  * filter_parse_regex - parse a basic regex
  * @buff:   the raw regex
@@ -426,10 +851,9 @@ static void filter_build_regex(struct filter_pred *pred)
 	struct regex *r = &pred->regex;
 	char *search;
 	enum regex_type type = MATCH_FULL;
-	int not = 0;
 
 	if (pred->op == OP_GLOB) {
-		type = filter_parse_regex(r->pattern, r->len, &search, &not);
+		type = filter_parse_regex(r->pattern, r->len, &search, &pred->not);
 		r->len = strlen(search);
 		memmove(r->pattern, search, r->len+1);
 	}
@@ -451,210 +875,32 @@ static void filter_build_regex(struct filter_pred *pred)
 		r->match = regex_match_glob;
 		break;
 	}
-
-	pred->not ^= not;
-}
-
-enum move_type {
-	MOVE_DOWN,
-	MOVE_UP_FROM_LEFT,
-	MOVE_UP_FROM_RIGHT
-};
-
-static struct filter_pred *
-get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
-		int index, enum move_type *move)
-{
-	if (pred->parent & FILTER_PRED_IS_RIGHT)
-		*move = MOVE_UP_FROM_RIGHT;
-	else
-		*move = MOVE_UP_FROM_LEFT;
-	pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
-
-	return pred;
-}
-
-enum walk_return {
-	WALK_PRED_ABORT,
-	WALK_PRED_PARENT,
-	WALK_PRED_DEFAULT,
-};
-
-typedef int (*filter_pred_walkcb_t) (enum move_type move,
-				     struct filter_pred *pred,
-				     int *err, void *data);
-
-static int walk_pred_tree(struct filter_pred *preds,
-			  struct filter_pred *root,
-			  filter_pred_walkcb_t cb, void *data)
-{
-	struct filter_pred *pred = root;
-	enum move_type move = MOVE_DOWN;
-	int done = 0;
-
-	if  (!preds)
-		return -EINVAL;
-
-	do {
-		int err = 0, ret;
-
-		ret = cb(move, pred, &err, data);
-		if (ret == WALK_PRED_ABORT)
-			return err;
-		if (ret == WALK_PRED_PARENT)
-			goto get_parent;
-
-		switch (move) {
-		case MOVE_DOWN:
-			if (pred->left != FILTER_PRED_INVALID) {
-				pred = &preds[pred->left];
-				continue;
-			}
-			goto get_parent;
-		case MOVE_UP_FROM_LEFT:
-			pred = &preds[pred->right];
-			move = MOVE_DOWN;
-			continue;
-		case MOVE_UP_FROM_RIGHT:
- get_parent:
-			if (pred == root)
-				break;
-			pred = get_pred_parent(pred, preds,
-					       pred->parent,
-					       &move);
-			continue;
-		}
-		done = 1;
-	} while (!done);
-
-	/* We are fine. */
-	return 0;
-}
-
-/*
- * A series of AND or ORs where found together. Instead of
- * climbing up and down the tree branches, an array of the
- * ops were made in order of checks. We can just move across
- * the array and short circuit if needed.
- */
-static int process_ops(struct filter_pred *preds,
-		       struct filter_pred *op, void *rec)
-{
-	struct filter_pred *pred;
-	int match = 0;
-	int type;
-	int i;
-
-	/*
-	 * Micro-optimization: We set type to true if op
-	 * is an OR and false otherwise (AND). Then we
-	 * just need to test if the match is equal to
-	 * the type, and if it is, we can short circuit the
-	 * rest of the checks:
-	 *
-	 * if ((match && op->op == OP_OR) ||
-	 *     (!match && op->op == OP_AND))
-	 *	  return match;
-	 */
-	type = op->op == OP_OR;
-
-	for (i = 0; i < op->val; i++) {
-		pred = &preds[op->ops[i]];
-		if (!WARN_ON_ONCE(!pred->fn))
-			match = pred->fn(pred, rec);
-		if (!!match == type)
-			break;
-	}
-	/* If not of not match is equal to not of not, then it is a match */
-	return !!match == !op->not;
-}
-
-struct filter_match_preds_data {
-	struct filter_pred *preds;
-	int match;
-	void *rec;
-};
-
-static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
-				 int *err, void *data)
-{
-	struct filter_match_preds_data *d = data;
-
-	*err = 0;
-	switch (move) {
-	case MOVE_DOWN:
-		/* only AND and OR have children */
-		if (pred->left != FILTER_PRED_INVALID) {
-			/* If ops is set, then it was folded. */
-			if (!pred->ops)
-				return WALK_PRED_DEFAULT;
-			/* We can treat folded ops as a leaf node */
-			d->match = process_ops(d->preds, pred, d->rec);
-		} else {
-			if (!WARN_ON_ONCE(!pred->fn))
-				d->match = pred->fn(pred, d->rec);
-		}
-
-		return WALK_PRED_PARENT;
-	case MOVE_UP_FROM_LEFT:
-		/*
-		 * Check for short circuits.
-		 *
-		 * Optimization: !!match == (pred->op == OP_OR)
-		 *   is the same as:
-		 * if ((match && pred->op == OP_OR) ||
-		 *     (!match && pred->op == OP_AND))
-		 */
-		if (!!d->match == (pred->op == OP_OR))
-			return WALK_PRED_PARENT;
-		break;
-	case MOVE_UP_FROM_RIGHT:
-		break;
-	}
-
-	return WALK_PRED_DEFAULT;
 }
 
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct event_filter *filter, void *rec)
 {
-	struct filter_pred *preds;
-	struct filter_pred *root;
-	struct filter_match_preds_data data = {
-		/* match is currently meaningless */
-		.match = -1,
-		.rec   = rec,
-	};
-	int n_preds, ret;
+	struct prog_entry *prog;
+	int i;
 
 	/* no filter is considered a match */
 	if (!filter)
 		return 1;
 
-	n_preds = filter->n_preds;
-	if (!n_preds)
+	prog = rcu_dereference_sched(filter->prog);
+	if (!prog)
 		return 1;
 
-	/*
-	 * n_preds, root and filter->preds are protect with preemption disabled.
-	 */
-	root = rcu_dereference_sched(filter->root);
-	if (!root)
-		return 1;
-
-	data.preds = preds = rcu_dereference_sched(filter->preds);
-	ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data);
-	WARN_ON(ret);
-	return data.match;
+	for (i = 0; prog[i].pred; i++) {
+		struct filter_pred *pred = prog[i].pred;
+		int match = pred->fn(pred, rec);
+		if (match == prog[i].when_to_branch)
+			i = prog[i].target;
+	}
+	return prog[i].target;
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-static void parse_error(struct filter_parse_state *ps, int err, int pos)
-{
-	ps->lasterr = err;
-	ps->lasterr_pos = pos;
-}
-
 static void remove_filter_string(struct event_filter *filter)
 {
 	if (!filter)
@@ -664,57 +910,44 @@ static void remove_filter_string(struct event_filter *filter)
 	filter->filter_string = NULL;
 }
 
-static int replace_filter_string(struct event_filter *filter,
-				 char *filter_string)
-{
-	kfree(filter->filter_string);
-	filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
-	if (!filter->filter_string)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static int append_filter_string(struct event_filter *filter,
-				char *string)
-{
-	int newlen;
-	char *new_filter_string;
-
-	BUG_ON(!filter->filter_string);
-	newlen = strlen(filter->filter_string) + strlen(string) + 1;
-	new_filter_string = kmalloc(newlen, GFP_KERNEL);
-	if (!new_filter_string)
-		return -ENOMEM;
-
-	strcpy(new_filter_string, filter->filter_string);
-	strcat(new_filter_string, string);
-	kfree(filter->filter_string);
-	filter->filter_string = new_filter_string;
-
-	return 0;
-}
-
-static void append_filter_err(struct filter_parse_state *ps,
+static void append_filter_err(struct filter_parse_error *pe,
 			      struct event_filter *filter)
 {
-	int pos = ps->lasterr_pos;
-	char *buf, *pbuf;
+	struct trace_seq *s;
+	int pos = pe->lasterr_pos;
+	char *buf;
+	int len;
 
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
+	if (WARN_ON(!filter->filter_string))
 		return;
 
-	append_filter_string(filter, "\n");
-	memset(buf, ' ', PAGE_SIZE);
-	if (pos > PAGE_SIZE - 128)
-		pos = 0;
-	buf[pos] = '^';
-	pbuf = &buf[pos] + 1;
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return;
+	trace_seq_init(s);
+
+	len = strlen(filter->filter_string);
+	if (pos > len)
+		pos = len;
+
+	/* indexing is off by one */
+	if (pos)
+		pos++;
 
-	sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
-	append_filter_string(filter, buf);
-	free_page((unsigned long) buf);
+	trace_seq_puts(s, filter->filter_string);
+	if (pe->lasterr > 0) {
+		trace_seq_printf(s, "\n%*s", pos, "^");
+		trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]);
+	} else {
+		trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr);
+	}
+	trace_seq_putc(s, 0);
+	buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL);
+	if (buf) {
+		kfree(filter->filter_string);
+		filter->filter_string = buf;
+	}
+	kfree(s);
 }
 
 static inline struct event_filter *event_filter(struct trace_event_file *file)
@@ -747,108 +980,18 @@ void print_subsystem_event_filter(struct event_subsystem *system,
 	mutex_unlock(&event_mutex);
 }
 
-static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
-{
-	stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL);
-	if (!stack->preds)
-		return -ENOMEM;
-	stack->index = n_preds;
-	return 0;
-}
-
-static void __free_pred_stack(struct pred_stack *stack)
-{
-	kfree(stack->preds);
-	stack->index = 0;
-}
-
-static int __push_pred_stack(struct pred_stack *stack,
-			     struct filter_pred *pred)
-{
-	int index = stack->index;
-
-	if (WARN_ON(index == 0))
-		return -ENOSPC;
-
-	stack->preds[--index] = pred;
-	stack->index = index;
-	return 0;
-}
-
-static struct filter_pred *
-__pop_pred_stack(struct pred_stack *stack)
-{
-	struct filter_pred *pred;
-	int index = stack->index;
-
-	pred = stack->preds[index++];
-	if (!pred)
-		return NULL;
-
-	stack->index = index;
-	return pred;
-}
-
-static int filter_set_pred(struct event_filter *filter,
-			   int idx,
-			   struct pred_stack *stack,
-			   struct filter_pred *src)
-{
-	struct filter_pred *dest = &filter->preds[idx];
-	struct filter_pred *left;
-	struct filter_pred *right;
-
-	*dest = *src;
-	dest->index = idx;
-
-	if (dest->op == OP_OR || dest->op == OP_AND) {
-		right = __pop_pred_stack(stack);
-		left = __pop_pred_stack(stack);
-		if (!left || !right)
-			return -EINVAL;
-		/*
-		 * If both children can be folded
-		 * and they are the same op as this op or a leaf,
-		 * then this op can be folded.
-		 */
-		if (left->index & FILTER_PRED_FOLD &&
-		    ((left->op == dest->op && !left->not) ||
-		     left->left == FILTER_PRED_INVALID) &&
-		    right->index & FILTER_PRED_FOLD &&
-		    ((right->op == dest->op && !right->not) ||
-		     right->left == FILTER_PRED_INVALID))
-			dest->index |= FILTER_PRED_FOLD;
-
-		dest->left = left->index & ~FILTER_PRED_FOLD;
-		dest->right = right->index & ~FILTER_PRED_FOLD;
-		left->parent = dest->index & ~FILTER_PRED_FOLD;
-		right->parent = dest->index | FILTER_PRED_IS_RIGHT;
-	} else {
-		/*
-		 * Make dest->left invalid to be used as a quick
-		 * way to know this is a leaf node.
-		 */
-		dest->left = FILTER_PRED_INVALID;
-
-		/* All leafs allow folding the parent ops. */
-		dest->index |= FILTER_PRED_FOLD;
-	}
-
-	return __push_pred_stack(stack, dest);
-}
-
-static void __free_preds(struct event_filter *filter)
+static void free_prog(struct event_filter *filter)
 {
+	struct prog_entry *prog;
 	int i;
 
-	if (filter->preds) {
-		for (i = 0; i < filter->n_preds; i++)
-			kfree(filter->preds[i].ops);
-		kfree(filter->preds);
-		filter->preds = NULL;
-	}
-	filter->a_preds = 0;
-	filter->n_preds = 0;
+	prog = rcu_access_pointer(filter->prog);
+	if (!prog)
+		return;
+
+	for (i = 0; prog[i].pred; i++)
+		kfree(prog[i].pred);
+	kfree(prog);
 }
 
 static void filter_disable(struct trace_event_file *file)
@@ -866,7 +1009,7 @@ static void __free_filter(struct event_filter *filter)
 	if (!filter)
 		return;
 
-	__free_preds(filter);
+	free_prog(filter);
 	kfree(filter->filter_string);
 	kfree(filter);
 }
@@ -876,38 +1019,6 @@ void free_event_filter(struct event_filter *filter)
 	__free_filter(filter);
 }
 
-static struct event_filter *__alloc_filter(void)
-{
-	struct event_filter *filter;
-
-	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	return filter;
-}
-
-static int __alloc_preds(struct event_filter *filter, int n_preds)
-{
-	struct filter_pred *pred;
-	int i;
-
-	if (filter->preds)
-		__free_preds(filter);
-
-	filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL);
-
-	if (!filter->preds)
-		return -ENOMEM;
-
-	filter->a_preds = n_preds;
-	filter->n_preds = 0;
-
-	for (i = 0; i < n_preds; i++) {
-		pred = &filter->preds[i];
-		pred->fn = filter_pred_none;
-	}
-
-	return 0;
-}
-
 static inline void __remove_filter(struct trace_event_file *file)
 {
 	filter_disable(file);
@@ -944,27 +1055,6 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
 	}
 }
 
-static int filter_add_pred(struct filter_parse_state *ps,
-			   struct event_filter *filter,
-			   struct filter_pred *pred,
-			   struct pred_stack *stack)
-{
-	int err;
-
-	if (WARN_ON(filter->n_preds == filter->a_preds)) {
-		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-		return -ENOSPC;
-	}
-
-	err = filter_set_pred(filter, filter->n_preds, stack, pred);
-	if (err)
-		return err;
-
-	filter->n_preds++;
-
-	return 0;
-}
-
 int filter_assign_type(const char *type)
 {
 	if (strstr(type, "__data_loc") && strstr(type, "char"))
@@ -976,761 +1066,449 @@ int filter_assign_type(const char *type)
 	return FILTER_OTHER;
 }
 
-static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids op)
-{
-	if (is_string_field(field) &&
-	    (op != OP_EQ && op != OP_NE && op != OP_GLOB))
-		return false;
-	if (!is_string_field(field) && op == OP_GLOB)
-		return false;
-
-	return true;
-}
-
 static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
 					    int field_size, int field_is_signed)
 {
 	filter_pred_fn_t fn = NULL;
+	int pred_func_index = -1;
+
+	switch (op) {
+	case OP_EQ:
+	case OP_NE:
+		break;
+	default:
+		if (WARN_ON_ONCE(op < PRED_FUNC_START))
+			return NULL;
+		pred_func_index = op - PRED_FUNC_START;
+		if (WARN_ON_ONCE(pred_func_index > PRED_FUNC_MAX))
+			return NULL;
+	}
 
 	switch (field_size) {
 	case 8:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_64;
 		else if (field_is_signed)
-			fn = pred_funcs_s64[op - PRED_FUNC_START];
+			fn = pred_funcs_s64[pred_func_index];
 		else
-			fn = pred_funcs_u64[op - PRED_FUNC_START];
+			fn = pred_funcs_u64[pred_func_index];
 		break;
 	case 4:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_32;
 		else if (field_is_signed)
-			fn = pred_funcs_s32[op - PRED_FUNC_START];
+			fn = pred_funcs_s32[pred_func_index];
 		else
-			fn = pred_funcs_u32[op - PRED_FUNC_START];
+			fn = pred_funcs_u32[pred_func_index];
 		break;
 	case 2:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_16;
 		else if (field_is_signed)
-			fn = pred_funcs_s16[op - PRED_FUNC_START];
+			fn = pred_funcs_s16[pred_func_index];
 		else
-			fn = pred_funcs_u16[op - PRED_FUNC_START];
+			fn = pred_funcs_u16[pred_func_index];
 		break;
 	case 1:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_8;
 		else if (field_is_signed)
-			fn = pred_funcs_s8[op - PRED_FUNC_START];
+			fn = pred_funcs_s8[pred_func_index];
 		else
-			fn = pred_funcs_u8[op - PRED_FUNC_START];
+			fn = pred_funcs_u8[pred_func_index];
 		break;
 	}
 
 	return fn;
 }
 
-static int init_pred(struct filter_parse_state *ps,
-		     struct ftrace_event_field *field,
-		     struct filter_pred *pred)
-
+/* Called when a predicate is encountered by predicate_parse() */
+static int parse_pred(const char *str, void *data,
+		      int pos, struct filter_parse_error *pe,
+		      struct filter_pred **pred_ptr)
 {
-	filter_pred_fn_t fn = filter_pred_none;
-	unsigned long long val;
+	struct trace_event_call *call = data;
+	struct ftrace_event_field *field;
+	struct filter_pred *pred = NULL;
+	char num_buf[24];	/* Big enough to hold an address */
+	char *field_name;
+	char q;
+	u64 val;
+	int len;
 	int ret;
+	int op;
+	int s;
+	int i = 0;
 
-	pred->offset = field->offset;
-
-	if (!is_legal_op(field, pred->op)) {
-		parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
-		return -EINVAL;
-	}
-
-	if (field->filter_type == FILTER_COMM) {
-		filter_build_regex(pred);
-		fn = filter_pred_comm;
-		pred->regex.field_len = TASK_COMM_LEN;
-	} else if (is_string_field(field)) {
-		filter_build_regex(pred);
-
-		if (field->filter_type == FILTER_STATIC_STRING) {
-			fn = filter_pred_string;
-			pred->regex.field_len = field->size;
-		} else if (field->filter_type == FILTER_DYN_STRING)
-			fn = filter_pred_strloc;
-		else
-			fn = filter_pred_pchar;
-	} else if (is_function_field(field)) {
-		if (strcmp(field->name, "ip")) {
-			parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
-			return -EINVAL;
-		}
-	} else {
-		if (field->is_signed)
-			ret = kstrtoll(pred->regex.pattern, 0, &val);
-		else
-			ret = kstrtoull(pred->regex.pattern, 0, &val);
-		if (ret) {
-			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
-			return -EINVAL;
-		}
-		pred->val = val;
-
-		if (field->filter_type == FILTER_CPU)
-			fn = filter_pred_cpu;
-		else
-			fn = select_comparison_fn(pred->op, field->size,
-					  field->is_signed);
-		if (!fn) {
-			parse_error(ps, FILT_ERR_INVALID_OP, 0);
-			return -EINVAL;
-		}
-	}
-
-	if (pred->op == OP_NE)
-		pred->not ^= 1;
-
-	pred->fn = fn;
-	return 0;
-}
-
-static void parse_init(struct filter_parse_state *ps,
-		       struct filter_op *ops,
-		       char *infix_string)
-{
-	memset(ps, '\0', sizeof(*ps));
-
-	ps->infix.string = infix_string;
-	ps->infix.cnt = strlen(infix_string);
-	ps->ops = ops;
-
-	INIT_LIST_HEAD(&ps->opstack);
-	INIT_LIST_HEAD(&ps->postfix);
-}
-
-static char infix_next(struct filter_parse_state *ps)
-{
-	if (!ps->infix.cnt)
-		return 0;
-
-	ps->infix.cnt--;
+	/* First find the field to associate to */
+	while (isspace(str[i]))
+		i++;
+	s = i;
 
-	return ps->infix.string[ps->infix.tail++];
-}
-
-static char infix_peek(struct filter_parse_state *ps)
-{
-	if (ps->infix.tail == strlen(ps->infix.string))
-		return 0;
-
-	return ps->infix.string[ps->infix.tail];
-}
+	while (isalnum(str[i]) || str[i] == '_')
+		i++;
 
-static void infix_advance(struct filter_parse_state *ps)
-{
-	if (!ps->infix.cnt)
-		return;
+	len = i - s;
 
-	ps->infix.cnt--;
-	ps->infix.tail++;
-}
+	if (!len)
+		return -1;
 
-static inline int is_precedence_lower(struct filter_parse_state *ps,
-				      int a, int b)
-{
-	return ps->ops[a].precedence < ps->ops[b].precedence;
-}
+	field_name = kmemdup_nul(str + s, len, GFP_KERNEL);
+	if (!field_name)
+		return -ENOMEM;
 
-static inline int is_op_char(struct filter_parse_state *ps, char c)
-{
-	int i;
+	/* Make sure that the field exists */
 
-	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-		if (ps->ops[i].string[0] == c)
-			return 1;
+	field = trace_find_event_field(call, field_name);
+	kfree(field_name);
+	if (!field) {
+		parse_error(pe, FILT_ERR_FIELD_NOT_FOUND, pos + i);
+		return -EINVAL;
 	}
 
-	return 0;
-}
-
-static int infix_get_op(struct filter_parse_state *ps, char firstc)
-{
-	char nextc = infix_peek(ps);
-	char opstr[3];
-	int i;
-
-	opstr[0] = firstc;
-	opstr[1] = nextc;
-	opstr[2] = '\0';
+	while (isspace(str[i]))
+		i++;
 
-	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-		if (!strcmp(opstr, ps->ops[i].string)) {
-			infix_advance(ps);
-			return ps->ops[i].id;
-		}
+	/* Make sure this op is supported */
+	for (op = 0; ops[op]; op++) {
+		/* This is why '<=' must come before '<' in ops[] */
+		if (strncmp(str + i, ops[op], strlen(ops[op])) == 0)
+			break;
 	}
 
-	opstr[1] = '\0';
-
-	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-		if (!strcmp(opstr, ps->ops[i].string))
-			return ps->ops[i].id;
+	if (!ops[op]) {
+		parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
+		goto err_free;
 	}
 
-	return OP_NONE;
-}
-
-static inline void clear_operand_string(struct filter_parse_state *ps)
-{
-	memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
-	ps->operand.tail = 0;
-}
-
-static inline int append_operand_char(struct filter_parse_state *ps, char c)
-{
-	if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
-		return -EINVAL;
+	i += strlen(ops[op]);
 
-	ps->operand.string[ps->operand.tail++] = c;
+	while (isspace(str[i]))
+		i++;
 
-	return 0;
-}
+	s = i;
 
-static int filter_opstack_push(struct filter_parse_state *ps,
-			       enum filter_op_ids op)
-{
-	struct opstack_op *opstack_op;
-
-	opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
-	if (!opstack_op)
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
 		return -ENOMEM;
 
-	opstack_op->op = op;
-	list_add(&opstack_op->list, &ps->opstack);
-
-	return 0;
-}
+	pred->field = field;
+	pred->offset = field->offset;
+	pred->op = op;
 
-static int filter_opstack_empty(struct filter_parse_state *ps)
-{
-	return list_empty(&ps->opstack);
-}
+	if (ftrace_event_is_function(call)) {
+		/*
+		 * Perf does things different with function events.
+		 * It only allows an "ip" field, and expects a string.
+		 * But the string does not need to be surrounded by quotes.
+		 * If it is a string, the assigned function as a nop,
+		 * (perf doesn't use it) and grab everything.
+		 */
+		if (strcmp(field->name, "ip") != 0) {
+			 parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i);
+			 goto err_free;
+		 }
+		 pred->fn = filter_pred_none;
+
+		 /*
+		  * Quotes are not required, but if they exist then we need
+		  * to read them till we hit a matching one.
+		  */
+		 if (str[i] == '\'' || str[i] == '"')
+			 q = str[i];
+		 else
+			 q = 0;
+
+		 for (i++; str[i]; i++) {
+			 if (q && str[i] == q)
+				 break;
+			 if (!q && (str[i] == ')' || str[i] == '&' ||
+				    str[i] == '|'))
+				 break;
+		 }
+		 /* Skip quotes */
+		 if (q)
+			 s++;
+		len = i - s;
+		if (len >= MAX_FILTER_STR_VAL) {
+			parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+			goto err_free;
+		}
 
-static int filter_opstack_top(struct filter_parse_state *ps)
-{
-	struct opstack_op *opstack_op;
+		pred->regex.len = len;
+		strncpy(pred->regex.pattern, str + s, len);
+		pred->regex.pattern[len] = 0;
+
+	/* This is either a string, or an integer */
+	} else if (str[i] == '\'' || str[i] == '"') {
+		char q = str[i];
+
+		/* Make sure the op is OK for strings */
+		switch (op) {
+		case OP_NE:
+			pred->not = 1;
+			/* Fall through */
+		case OP_GLOB:
+		case OP_EQ:
+			break;
+		default:
+			parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+			goto err_free;
+		}
 
-	if (filter_opstack_empty(ps))
-		return OP_NONE;
+		/* Make sure the field is OK for strings */
+		if (!is_string_field(field)) {
+			parse_error(pe, FILT_ERR_EXPECT_DIGIT, pos + i);
+			goto err_free;
+		}
 
-	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+		for (i++; str[i]; i++) {
+			if (str[i] == q)
+				break;
+		}
+		if (!str[i]) {
+			parse_error(pe, FILT_ERR_MISSING_QUOTE, pos + i);
+			goto err_free;
+		}
 
-	return opstack_op->op;
-}
+		/* Skip quotes */
+		s++;
+		len = i - s;
+		if (len >= MAX_FILTER_STR_VAL) {
+			parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+			goto err_free;
+		}
 
-static int filter_opstack_pop(struct filter_parse_state *ps)
-{
-	struct opstack_op *opstack_op;
-	enum filter_op_ids op;
+		pred->regex.len = len;
+		strncpy(pred->regex.pattern, str + s, len);
+		pred->regex.pattern[len] = 0;
 
-	if (filter_opstack_empty(ps))
-		return OP_NONE;
+		filter_build_regex(pred);
 
-	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
-	op = opstack_op->op;
-	list_del(&opstack_op->list);
+		if (field->filter_type == FILTER_COMM) {
+			pred->fn = filter_pred_comm;
 
-	kfree(opstack_op);
+		} else if (field->filter_type == FILTER_STATIC_STRING) {
+			pred->fn = filter_pred_string;
+			pred->regex.field_len = field->size;
 
-	return op;
-}
+		} else if (field->filter_type == FILTER_DYN_STRING)
+			pred->fn = filter_pred_strloc;
+		else
+			pred->fn = filter_pred_pchar;
+		/* go past the last quote */
+		i++;
 
-static void filter_opstack_clear(struct filter_parse_state *ps)
-{
-	while (!filter_opstack_empty(ps))
-		filter_opstack_pop(ps);
-}
+	} else if (isdigit(str[i])) {
 
-static char *curr_operand(struct filter_parse_state *ps)
-{
-	return ps->operand.string;
-}
+		/* Make sure the field is not a string */
+		if (is_string_field(field)) {
+			parse_error(pe, FILT_ERR_EXPECT_STRING, pos + i);
+			goto err_free;
+		}
 
-static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
-{
-	struct postfix_elt *elt;
+		if (op == OP_GLOB) {
+			parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+			goto err_free;
+		}
 
-	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
-	if (!elt)
-		return -ENOMEM;
+		/* We allow 0xDEADBEEF */
+		while (isalnum(str[i]))
+			i++;
 
-	elt->op = OP_NONE;
-	elt->operand = kstrdup(operand, GFP_KERNEL);
-	if (!elt->operand) {
-		kfree(elt);
-		return -ENOMEM;
-	}
+		len = i - s;
+		/* 0xfeedfacedeadbeef is 18 chars max */
+		if (len >= sizeof(num_buf)) {
+			parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+			goto err_free;
+		}
 
-	list_add_tail(&elt->list, &ps->postfix);
+		strncpy(num_buf, str + s, len);
+		num_buf[len] = 0;
 
-	return 0;
-}
+		/* Make sure it is a value */
+		if (field->is_signed)
+			ret = kstrtoll(num_buf, 0, &val);
+		else
+			ret = kstrtoull(num_buf, 0, &val);
+		if (ret) {
+			parse_error(pe, FILT_ERR_ILLEGAL_INTVAL, pos + s);
+			goto err_free;
+		}
 
-static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids op)
-{
-	struct postfix_elt *elt;
+		pred->val = val;
 
-	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
-	if (!elt)
-		return -ENOMEM;
+		if (field->filter_type == FILTER_CPU)
+			pred->fn = filter_pred_cpu;
+		else {
+			pred->fn = select_comparison_fn(pred->op, field->size,
+							field->is_signed);
+			if (pred->op == OP_NE)
+				pred->not = 1;
+		}
 
-	elt->op = op;
-	elt->operand = NULL;
+	} else {
+		parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
+		goto err_free;
+	}
 
-	list_add_tail(&elt->list, &ps->postfix);
+	*pred_ptr = pred;
+	return i;
 
-	return 0;
+err_free:
+	kfree(pred);
+	return -EINVAL;
 }
 
-static void postfix_clear(struct filter_parse_state *ps)
-{
-	struct postfix_elt *elt;
+enum {
+	TOO_MANY_CLOSE		= -1,
+	TOO_MANY_OPEN		= -2,
+	MISSING_QUOTE		= -3,
+};
 
-	while (!list_empty(&ps->postfix)) {
-		elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
-		list_del(&elt->list);
-		kfree(elt->operand);
-		kfree(elt);
-	}
-}
+/*
+ * Read the filter string once to calculate the number of predicates
+ * as well as how deep the parentheses go.
+ *
+ * Returns:
+ *   0 - everything is fine (err is undefined)
+ *  -1 - too many ')'
+ *  -2 - too many '('
+ *  -3 - No matching quote
+ */
+static int calc_stack(const char *str, int *parens, int *preds, int *err)
+{
+	bool is_pred = false;
+	int nr_preds = 0;
+	int open = 1; /* Count the expression as "(E)" */
+	int last_quote = 0;
+	int max_open = 1;
+	int quote = 0;
+	int i;
 
-static int filter_parse(struct filter_parse_state *ps)
-{
-	enum filter_op_ids op, top_op;
-	int in_string = 0;
-	char ch;
+	*err = 0;
 
-	while ((ch = infix_next(ps))) {
-		if (ch == '"') {
-			in_string ^= 1;
+	for (i = 0; str[i]; i++) {
+		if (isspace(str[i]))
 			continue;
-		}
-
-		if (in_string)
-			goto parse_operand;
-
-		if (isspace(ch))
+		if (quote) {
+			if (str[i] == quote)
+			       quote = 0;
 			continue;
+		}
 
-		if (is_op_char(ps, ch)) {
-			op = infix_get_op(ps, ch);
-			if (op == OP_NONE) {
-				parse_error(ps, FILT_ERR_INVALID_OP, 0);
-				return -EINVAL;
-			}
-
-			if (strlen(curr_operand(ps))) {
-				postfix_append_operand(ps, curr_operand(ps));
-				clear_operand_string(ps);
-			}
-
-			while (!filter_opstack_empty(ps)) {
-				top_op = filter_opstack_top(ps);
-				if (!is_precedence_lower(ps, top_op, op)) {
-					top_op = filter_opstack_pop(ps);
-					postfix_append_op(ps, top_op);
-					continue;
-				}
+		switch (str[i]) {
+		case '\'':
+		case '"':
+			quote = str[i];
+			last_quote = i;
+			break;
+		case '|':
+		case '&':
+			if (str[i+1] != str[i])
 				break;
-			}
-
-			filter_opstack_push(ps, op);
+			is_pred = false;
 			continue;
-		}
-
-		if (ch == '(') {
-			filter_opstack_push(ps, OP_OPEN_PAREN);
+		case '(':
+			is_pred = false;
+			open++;
+			if (open > max_open)
+				max_open = open;
 			continue;
-		}
-
-		if (ch == ')') {
-			if (strlen(curr_operand(ps))) {
-				postfix_append_operand(ps, curr_operand(ps));
-				clear_operand_string(ps);
-			}
-
-			top_op = filter_opstack_pop(ps);
-			while (top_op != OP_NONE) {
-				if (top_op == OP_OPEN_PAREN)
-					break;
-				postfix_append_op(ps, top_op);
-				top_op = filter_opstack_pop(ps);
-			}
-			if (top_op == OP_NONE) {
-				parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
-				return -EINVAL;
+		case ')':
+			is_pred = false;
+			if (open == 1) {
+				*err = i;
+				return TOO_MANY_CLOSE;
 			}
+			open--;
 			continue;
 		}
-parse_operand:
-		if (append_operand_char(ps, ch)) {
-			parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
-			return -EINVAL;
-		}
-	}
-
-	if (strlen(curr_operand(ps)))
-		postfix_append_operand(ps, curr_operand(ps));
-
-	while (!filter_opstack_empty(ps)) {
-		top_op = filter_opstack_pop(ps);
-		if (top_op == OP_NONE)
-			break;
-		if (top_op == OP_OPEN_PAREN) {
-			parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
-			return -EINVAL;
+		if (!is_pred) {
+			nr_preds++;
+			is_pred = true;
 		}
-		postfix_append_op(ps, top_op);
 	}
 
-	return 0;
-}
-
-static struct filter_pred *create_pred(struct filter_parse_state *ps,
-				       struct trace_event_call *call,
-				       enum filter_op_ids op,
-				       char *operand1, char *operand2)
-{
-	struct ftrace_event_field *field;
-	static struct filter_pred pred;
-
-	memset(&pred, 0, sizeof(pred));
-	pred.op = op;
-
-	if (op == OP_AND || op == OP_OR)
-		return &pred;
-
-	if (!operand1 || !operand2) {
-		parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
-		return NULL;
-	}
-
-	field = trace_find_event_field(call, operand1);
-	if (!field) {
-		parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
-		return NULL;
+	if (quote) {
+		*err = last_quote;
+		return MISSING_QUOTE;
 	}
 
-	strcpy(pred.regex.pattern, operand2);
-	pred.regex.len = strlen(pred.regex.pattern);
-	pred.field = field;
-	return init_pred(ps, field, &pred) ? NULL : &pred;
-}
-
-static int check_preds(struct filter_parse_state *ps)
-{
-	int n_normal_preds = 0, n_logical_preds = 0;
-	struct postfix_elt *elt;
-	int cnt = 0;
+	if (open != 1) {
+		int level = open;
 
-	list_for_each_entry(elt, &ps->postfix, list) {
-		if (elt->op == OP_NONE) {
-			cnt++;
-			continue;
-		}
-
-		if (elt->op == OP_AND || elt->op == OP_OR) {
-			n_logical_preds++;
-			cnt--;
-			continue;
+		/* find the bad open */
+		for (i--; i; i--) {
+			if (quote) {
+				if (str[i] == quote)
+					quote = 0;
+				continue;
+			}
+			switch (str[i]) {
+			case '(':
+				if (level == open) {
+					*err = i;
+					return TOO_MANY_OPEN;
+				}
+				level--;
+				break;
+			case ')':
+				level++;
+				break;
+			case '\'':
+			case '"':
+				quote = str[i];
+				break;
+			}
 		}
-		if (elt->op != OP_NOT)
-			cnt--;
-		n_normal_preds++;
-		/* all ops should have operands */
-		if (cnt < 0)
-			break;
-	}
-
-	if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) {
-		parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
-		return -EINVAL;
+		/* First character is the '(' with missing ')' */
+		*err = 0;
+		return TOO_MANY_OPEN;
 	}
 
+	/* Set the size of the required stacks */
+	*parens = max_open;
+	*preds = nr_preds;
 	return 0;
 }
 
-static int count_preds(struct filter_parse_state *ps)
-{
-	struct postfix_elt *elt;
-	int n_preds = 0;
-
-	list_for_each_entry(elt, &ps->postfix, list) {
-		if (elt->op == OP_NONE)
-			continue;
-		n_preds++;
-	}
-
-	return n_preds;
-}
-
-struct check_pred_data {
-	int count;
-	int max;
-};
-
-static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
-			      int *err, void *data)
-{
-	struct check_pred_data *d = data;
-
-	if (WARN_ON(d->count++ > d->max)) {
-		*err = -EINVAL;
-		return WALK_PRED_ABORT;
-	}
-	return WALK_PRED_DEFAULT;
-}
-
-/*
- * The tree is walked at filtering of an event. If the tree is not correctly
- * built, it may cause an infinite loop. Check here that the tree does
- * indeed terminate.
- */
-static int check_pred_tree(struct event_filter *filter,
-			   struct filter_pred *root)
-{
-	struct check_pred_data data = {
-		/*
-		 * The max that we can hit a node is three times.
-		 * Once going down, once coming up from left, and
-		 * once coming up from right. This is more than enough
-		 * since leafs are only hit a single time.
-		 */
-		.max   = 3 * filter->n_preds,
-		.count = 0,
-	};
-
-	return walk_pred_tree(filter->preds, root,
-			      check_pred_tree_cb, &data);
-}
-
-static int count_leafs_cb(enum move_type move, struct filter_pred *pred,
-			  int *err, void *data)
-{
-	int *count = data;
-
-	if ((move == MOVE_DOWN) &&
-	    (pred->left == FILTER_PRED_INVALID))
-		(*count)++;
-
-	return WALK_PRED_DEFAULT;
-}
-
-static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
-{
-	int count = 0, ret;
-
-	ret = walk_pred_tree(preds, root, count_leafs_cb, &count);
-	WARN_ON(ret);
-	return count;
-}
-
-struct fold_pred_data {
-	struct filter_pred *root;
-	int count;
-	int children;
-};
-
-static int fold_pred_cb(enum move_type move, struct filter_pred *pred,
-			int *err, void *data)
-{
-	struct fold_pred_data *d = data;
-	struct filter_pred *root = d->root;
-
-	if (move != MOVE_DOWN)
-		return WALK_PRED_DEFAULT;
-	if (pred->left != FILTER_PRED_INVALID)
-		return WALK_PRED_DEFAULT;
-
-	if (WARN_ON(d->count == d->children)) {
-		*err = -EINVAL;
-		return WALK_PRED_ABORT;
-	}
-
-	pred->index &= ~FILTER_PRED_FOLD;
-	root->ops[d->count++] = pred->index;
-	return WALK_PRED_DEFAULT;
-}
-
-static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
-{
-	struct fold_pred_data data = {
-		.root  = root,
-		.count = 0,
-	};
-	int children;
-
-	/* No need to keep the fold flag */
-	root->index &= ~FILTER_PRED_FOLD;
-
-	/* If the root is a leaf then do nothing */
-	if (root->left == FILTER_PRED_INVALID)
-		return 0;
-
-	/* count the children */
-	children = count_leafs(preds, &preds[root->left]);
-	children += count_leafs(preds, &preds[root->right]);
-
-	root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL);
-	if (!root->ops)
-		return -ENOMEM;
-
-	root->val = children;
-	data.children = children;
-	return walk_pred_tree(preds, root, fold_pred_cb, &data);
-}
-
-static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred,
-			     int *err, void *data)
-{
-	struct filter_pred *preds = data;
-
-	if (move != MOVE_DOWN)
-		return WALK_PRED_DEFAULT;
-	if (!(pred->index & FILTER_PRED_FOLD))
-		return WALK_PRED_DEFAULT;
-
-	*err = fold_pred(preds, pred);
-	if (*err)
-		return WALK_PRED_ABORT;
-
-	/* eveyrhing below is folded, continue with parent */
-	return WALK_PRED_PARENT;
-}
-
-/*
- * To optimize the processing of the ops, if we have several "ors" or
- * "ands" together, we can put them in an array and process them all
- * together speeding up the filter logic.
- */
-static int fold_pred_tree(struct event_filter *filter,
-			   struct filter_pred *root)
-{
-	return walk_pred_tree(filter->preds, root, fold_pred_tree_cb,
-			      filter->preds);
-}
-
-static int replace_preds(struct trace_event_call *call,
+static int process_preds(struct trace_event_call *call,
+			 const char *filter_string,
 			 struct event_filter *filter,
-			 struct filter_parse_state *ps,
-			 bool dry_run)
+			 struct filter_parse_error *pe)
 {
-	char *operand1 = NULL, *operand2 = NULL;
-	struct filter_pred *pred;
-	struct filter_pred *root;
-	struct postfix_elt *elt;
-	struct pred_stack stack = { }; /* init to NULL */
-	int err;
-	int n_preds = 0;
-
-	n_preds = count_preds(ps);
-	if (n_preds >= MAX_FILTER_PRED) {
-		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-		return -ENOSPC;
-	}
-
-	err = check_preds(ps);
-	if (err)
-		return err;
-
-	if (!dry_run) {
-		err = __alloc_pred_stack(&stack, n_preds);
-		if (err)
-			return err;
-		err = __alloc_preds(filter, n_preds);
-		if (err)
-			goto fail;
-	}
-
-	n_preds = 0;
-	list_for_each_entry(elt, &ps->postfix, list) {
-		if (elt->op == OP_NONE) {
-			if (!operand1)
-				operand1 = elt->operand;
-			else if (!operand2)
-				operand2 = elt->operand;
-			else {
-				parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
-				err = -EINVAL;
-				goto fail;
-			}
-			continue;
-		}
-
-		if (elt->op == OP_NOT) {
-			if (!n_preds || operand1 || operand2) {
-				parse_error(ps, FILT_ERR_ILLEGAL_NOT_OP, 0);
-				err = -EINVAL;
-				goto fail;
-			}
-			if (!dry_run)
-				filter->preds[n_preds - 1].not ^= 1;
-			continue;
-		}
-
-		if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
-			parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-			err = -ENOSPC;
-			goto fail;
-		}
-
-		pred = create_pred(ps, call, elt->op, operand1, operand2);
-		if (!pred) {
-			err = -EINVAL;
-			goto fail;
-		}
+	struct prog_entry *prog;
+	int nr_parens;
+	int nr_preds;
+	int index;
+	int ret;
 
-		if (!dry_run) {
-			err = filter_add_pred(ps, filter, pred, &stack);
-			if (err)
-				goto fail;
+	ret = calc_stack(filter_string, &nr_parens, &nr_preds, &index);
+	if (ret < 0) {
+		switch (ret) {
+		case MISSING_QUOTE:
+			parse_error(pe, FILT_ERR_MISSING_QUOTE, index);
+			break;
+		case TOO_MANY_OPEN:
+			parse_error(pe, FILT_ERR_TOO_MANY_OPEN, index);
+			break;
+		default:
+			parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, index);
 		}
-
-		operand1 = operand2 = NULL;
+		return ret;
 	}
 
-	if (!dry_run) {
-		/* We should have one item left on the stack */
-		pred = __pop_pred_stack(&stack);
-		if (!pred)
-			return -EINVAL;
-		/* This item is where we start from in matching */
-		root = pred;
-		/* Make sure the stack is empty */
-		pred = __pop_pred_stack(&stack);
-		if (WARN_ON(pred)) {
-			err = -EINVAL;
-			filter->root = NULL;
-			goto fail;
-		}
-		err = check_pred_tree(filter, root);
-		if (err)
-			goto fail;
-
-		/* Optimize the tree */
-		err = fold_pred_tree(filter, root);
-		if (err)
-			goto fail;
-
-		/* We don't set root until we know it works */
-		barrier();
-		filter->root = root;
+	if (!nr_preds) {
+		prog = NULL;
+	} else {
+		prog = predicate_parse(filter_string, nr_parens, nr_preds,
+			       parse_pred, call, pe);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
 	}
-
-	err = 0;
-fail:
-	__free_pred_stack(&stack);
-	return err;
+	rcu_assign_pointer(filter->prog, prog);
+	return 0;
 }
 
 static inline void event_set_filtered_flag(struct trace_event_file *file)
@@ -1780,72 +1558,53 @@ struct filter_list {
 	struct event_filter	*filter;
 };
 
-static int replace_system_preds(struct trace_subsystem_dir *dir,
+static int process_system_preds(struct trace_subsystem_dir *dir,
 				struct trace_array *tr,
-				struct filter_parse_state *ps,
+				struct filter_parse_error *pe,
 				char *filter_string)
 {
 	struct trace_event_file *file;
 	struct filter_list *filter_item;
+	struct event_filter *filter = NULL;
 	struct filter_list *tmp;
 	LIST_HEAD(filter_list);
 	bool fail = true;
 	int err;
 
 	list_for_each_entry(file, &tr->events, list) {
-		if (file->system != dir)
-			continue;
-
-		/*
-		 * Try to see if the filter can be applied
-		 *  (filter arg is ignored on dry_run)
-		 */
-		err = replace_preds(file->event_call, NULL, ps, true);
-		if (err)
-			event_set_no_set_filter_flag(file);
-		else
-			event_clear_no_set_filter_flag(file);
-	}
-
-	list_for_each_entry(file, &tr->events, list) {
-		struct event_filter *filter;
 
 		if (file->system != dir)
 			continue;
 
-		if (event_no_set_filter_flag(file))
-			continue;
-
-		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
-		if (!filter_item)
-			goto fail_mem;
-
-		list_add_tail(&filter_item->list, &filter_list);
-
-		filter_item->filter = __alloc_filter();
-		if (!filter_item->filter)
+		filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+		if (!filter)
 			goto fail_mem;
-		filter = filter_item->filter;
 
-		/* Can only fail on no memory */
-		err = replace_filter_string(filter, filter_string);
-		if (err)
+		filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+		if (!filter->filter_string)
 			goto fail_mem;
 
-		err = replace_preds(file->event_call, filter, ps, false);
+		err = process_preds(file->event_call, filter_string, filter, pe);
 		if (err) {
 			filter_disable(file);
-			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-			append_filter_err(ps, filter);
+			parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+			append_filter_err(pe, filter);
 		} else
 			event_set_filtered_flag(file);
+
+
+		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
+		if (!filter_item)
+			goto fail_mem;
+
+		list_add_tail(&filter_item->list, &filter_list);
 		/*
 		 * Regardless of if this returned an error, we still
 		 * replace the filter for the call.
 		 */
-		filter = event_filter(file);
-		event_set_filter(file, filter_item->filter);
-		filter_item->filter = filter;
+		filter_item->filter = event_filter(file);
+		event_set_filter(file, filter);
+		filter = NULL;
 
 		fail = false;
 	}
@@ -1871,9 +1630,10 @@ static int replace_system_preds(struct trace_subsystem_dir *dir,
 		list_del(&filter_item->list);
 		kfree(filter_item);
 	}
-	parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+	parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
 	return -EINVAL;
  fail_mem:
+	kfree(filter);
 	/* If any call succeeded, we still need to sync */
 	if (!fail)
 		synchronize_sched();
@@ -1885,47 +1645,42 @@ static int replace_system_preds(struct trace_subsystem_dir *dir,
 	return -ENOMEM;
 }
 
-static int create_filter_start(char *filter_str, bool set_str,
-			       struct filter_parse_state **psp,
+static int create_filter_start(char *filter_string, bool set_str,
+			       struct filter_parse_error **pse,
 			       struct event_filter **filterp)
 {
 	struct event_filter *filter;
-	struct filter_parse_state *ps = NULL;
+	struct filter_parse_error *pe = NULL;
 	int err = 0;
 
-	WARN_ON_ONCE(*psp || *filterp);
+	if (WARN_ON_ONCE(*pse || *filterp))
+		return -EINVAL;
 
-	/* allocate everything, and if any fails, free all and fail */
-	filter = __alloc_filter();
-	if (filter && set_str)
-		err = replace_filter_string(filter, filter_str);
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (filter && set_str) {
+		filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+		if (!filter->filter_string)
+			err = -ENOMEM;
+	}
 
-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
 
-	if (!filter || !ps || err) {
-		kfree(ps);
+	if (!filter || !pe || err) {
+		kfree(pe);
 		__free_filter(filter);
 		return -ENOMEM;
 	}
 
 	/* we're committed to creating a new filter */
 	*filterp = filter;
-	*psp = ps;
+	*pse = pe;
 
-	parse_init(ps, filter_ops, filter_str);
-	err = filter_parse(ps);
-	if (err && set_str)
-		append_filter_err(ps, filter);
-	return err;
+	return 0;
 }
 
-static void create_filter_finish(struct filter_parse_state *ps)
+static void create_filter_finish(struct filter_parse_error *pe)
 {
-	if (ps) {
-		filter_opstack_clear(ps);
-		postfix_clear(ps);
-		kfree(ps);
-	}
+	kfree(pe);
 }
 
 /**
@@ -1945,26 +1700,20 @@ static void create_filter_finish(struct filter_parse_state *ps)
  * freeing it.
  */
 static int create_filter(struct trace_event_call *call,
-			 char *filter_str, bool set_str,
+			 char *filter_string, bool set_str,
 			 struct event_filter **filterp)
 {
-	struct event_filter *filter = NULL;
-	struct filter_parse_state *ps = NULL;
+	struct filter_parse_error *pe = NULL;
 	int err;
 
-	err = create_filter_start(filter_str, set_str, &ps, &filter);
-	if (!err) {
-		err = replace_preds(call, filter, ps, false);
-		if (err && set_str)
-			append_filter_err(ps, filter);
-	}
-	if (err && !set_str) {
-		free_event_filter(filter);
-		filter = NULL;
-	}
-	create_filter_finish(ps);
+	err = create_filter_start(filter_string, set_str, &pe, filterp);
+	if (err)
+		return err;
+
+	err = process_preds(call, filter_string, *filterp, pe);
+	if (err && set_str)
+		append_filter_err(pe, *filterp);
 
-	*filterp = filter;
 	return err;
 }
 
@@ -1988,24 +1737,22 @@ static int create_system_filter(struct trace_subsystem_dir *dir,
 				struct trace_array *tr,
 				char *filter_str, struct event_filter **filterp)
 {
-	struct event_filter *filter = NULL;
-	struct filter_parse_state *ps = NULL;
+	struct filter_parse_error *pe = NULL;
 	int err;
 
-	err = create_filter_start(filter_str, true, &ps, &filter);
+	err = create_filter_start(filter_str, true, &pe, filterp);
 	if (!err) {
-		err = replace_system_preds(dir, tr, ps, filter_str);
+		err = process_system_preds(dir, tr, pe, filter_str);
 		if (!err) {
 			/* System filters just show a default message */
-			kfree(filter->filter_string);
-			filter->filter_string = NULL;
+			kfree((*filterp)->filter_string);
+			(*filterp)->filter_string = NULL;
 		} else {
-			append_filter_err(ps, filter);
+			append_filter_err(pe, *filterp);
 		}
 	}
-	create_filter_finish(ps);
+	create_filter_finish(pe);
 
-	*filterp = filter;
 	return err;
 }
 
@@ -2013,7 +1760,7 @@ static int create_system_filter(struct trace_subsystem_dir *dir,
 int apply_event_filter(struct trace_event_file *file, char *filter_string)
 {
 	struct trace_event_call *call = file->event_call;
-	struct event_filter *filter;
+	struct event_filter *filter = NULL;
 	int err;
 
 	if (!strcmp(strstrip(filter_string), "0")) {
@@ -2066,7 +1813,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
 {
 	struct event_subsystem *system = dir->subsystem;
 	struct trace_array *tr = dir->tr;
-	struct event_filter *filter;
+	struct event_filter *filter = NULL;
 	int err = 0;
 
 	mutex_lock(&event_mutex);
@@ -2186,66 +1933,80 @@ static int __ftrace_function_set_filter(int filter, char *buf, int len,
 	return ret;
 }
 
-static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
+static int ftrace_function_check_pred(struct filter_pred *pred)
 {
 	struct ftrace_event_field *field = pred->field;
 
-	if (leaf) {
-		/*
-		 * Check the leaf predicate for function trace, verify:
-		 *  - only '==' and '!=' is used
-		 *  - the 'ip' field is used
-		 */
-		if ((pred->op != OP_EQ) && (pred->op != OP_NE))
-			return -EINVAL;
+	/*
+	 * Check the predicate for function trace, verify:
+	 *  - only '==' and '!=' is used
+	 *  - the 'ip' field is used
+	 */
+	if ((pred->op != OP_EQ) && (pred->op != OP_NE))
+		return -EINVAL;
 
-		if (strcmp(field->name, "ip"))
-			return -EINVAL;
-	} else {
-		/*
-		 * Check the non leaf predicate for function trace, verify:
-		 *  - only '||' is used
-		*/
-		if (pred->op != OP_OR)
-			return -EINVAL;
-	}
+	if (strcmp(field->name, "ip"))
+		return -EINVAL;
 
 	return 0;
 }
 
-static int ftrace_function_set_filter_cb(enum move_type move,
-					 struct filter_pred *pred,
-					 int *err, void *data)
+static int ftrace_function_set_filter_pred(struct filter_pred *pred,
+					   struct function_filter_data *data)
 {
+	int ret;
+
 	/* Checking the node is valid for function trace. */
-	if ((move != MOVE_DOWN) ||
-	    (pred->left != FILTER_PRED_INVALID)) {
-		*err = ftrace_function_check_pred(pred, 0);
-	} else {
-		*err = ftrace_function_check_pred(pred, 1);
-		if (*err)
-			return WALK_PRED_ABORT;
-
-		*err = __ftrace_function_set_filter(pred->op == OP_EQ,
-						    pred->regex.pattern,
-						    pred->regex.len,
-						    data);
-	}
+	ret = ftrace_function_check_pred(pred);
+	if (ret)
+		return ret;
 
-	return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
+	return __ftrace_function_set_filter(pred->op == OP_EQ,
+					    pred->regex.pattern,
+					    pred->regex.len,
+					    data);
+}
+
+static bool is_or(struct prog_entry *prog, int i)
+{
+	int target;
+
+	/*
+	 * Only "||" is allowed for function events, thus,
+	 * all true branches should jump to true, and any
+	 * false branch should jump to false.
+	 */
+	target = prog[i].target + 1;
+	/* True and false have NULL preds (all prog entries should jump to one */
+	if (prog[target].pred)
+		return false;
+
+	/* prog[target].target is 1 for TRUE, 0 for FALSE */
+	return prog[i].when_to_branch == prog[target].target;
 }
 
 static int ftrace_function_set_filter(struct perf_event *event,
 				      struct event_filter *filter)
 {
+	struct prog_entry *prog = rcu_dereference_protected(filter->prog,
+						lockdep_is_held(&event_mutex));
 	struct function_filter_data data = {
 		.first_filter  = 1,
 		.first_notrace = 1,
 		.ops           = &event->ftrace_ops,
 	};
+	int i;
+
+	for (i = 0; prog[i].pred; i++) {
+		struct filter_pred *pred = prog[i].pred;
 
-	return walk_pred_tree(filter->preds, filter->root,
-			      ftrace_function_set_filter_cb, &data);
+		if (!is_or(prog, i))
+			return -EINVAL;
+
+		if (ftrace_function_set_filter_pred(pred, &data) < 0)
+			return -EINVAL;
+	}
+	return 0;
 }
 #else
 static int ftrace_function_set_filter(struct perf_event *event,
@@ -2259,7 +2020,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 			      char *filter_str)
 {
 	int err;
-	struct event_filter *filter;
+	struct event_filter *filter = NULL;
 	struct trace_event_call *call;
 
 	mutex_lock(&event_mutex);
@@ -2375,7 +2136,7 @@ static struct test_filter_data_t {
 #undef YES
 #undef NO
 
-#define DATA_CNT (sizeof(test_filter_data)/sizeof(struct test_filter_data_t))
+#define DATA_CNT ARRAY_SIZE(test_filter_data)
 
 static int test_pred_visited;
 
@@ -2388,26 +2149,28 @@ static int test_pred_visited_fn(struct filter_pred *pred, void *event)
 	return 1;
 }
 
-static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred,
-			     int *err, void *data)
+static void update_pred_fn(struct event_filter *filter, char *fields)
 {
-	char *fields = data;
+	struct prog_entry *prog = rcu_dereference_protected(filter->prog,
+						lockdep_is_held(&event_mutex));
+	int i;
 
-	if ((move == MOVE_DOWN) &&
-	    (pred->left == FILTER_PRED_INVALID)) {
+	for (i = 0; prog[i].pred; i++) {
+		struct filter_pred *pred = prog[i].pred;
 		struct ftrace_event_field *field = pred->field;
 
+		WARN_ON_ONCE(!pred->fn);
+
 		if (!field) {
-			WARN(1, "all leafs should have field defined");
-			return WALK_PRED_DEFAULT;
+			WARN_ONCE(1, "all leafs should have field defined %d", i);
+			continue;
 		}
+
 		if (!strchr(fields, *field->name))
-			return WALK_PRED_DEFAULT;
+			continue;
 
-		WARN_ON(!pred->fn);
 		pred->fn = test_pred_visited_fn;
 	}
-	return WALK_PRED_DEFAULT;
 }
 
 static __init int ftrace_test_event_filter(void)
@@ -2431,20 +2194,22 @@ static __init int ftrace_test_event_filter(void)
 			break;
 		}
 
+		/* Needed to dereference filter->prog */
+		mutex_lock(&event_mutex);
 		/*
 		 * The preemption disabling is not really needed for self
 		 * tests, but the rcu dereference will complain without it.
 		 */
 		preempt_disable();
 		if (*d->not_visited)
-			walk_pred_tree(filter->preds, filter->root,
-				       test_walk_pred_cb,
-				       d->not_visited);
+			update_pred_fn(filter, d->not_visited);
 
 		test_pred_visited = 0;
 		err = filter_match_preds(filter, &d->rec);
 		preempt_enable();
 
+		mutex_unlock(&event_mutex);
+
 		__free_filter(filter);
 
 		if (test_pred_visited) {
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1e1558c99d56..0d7b3ffbecc2 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -20,15 +20,39 @@
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/rculist.h>
+#include <linux/tracefs.h>
 
 #include "tracing_map.h"
 #include "trace.h"
 
+#define SYNTH_SYSTEM		"synthetic"
+#define SYNTH_FIELDS_MAX	16
+
+#define STR_VAR_LEN_MAX		32 /* must be multiple of sizeof(u64) */
+
 struct hist_field;
 
-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event);
+typedef u64 (*hist_field_fn_t) (struct hist_field *field,
+				struct tracing_map_elt *elt,
+				struct ring_buffer_event *rbe,
+				void *event);
 
 #define HIST_FIELD_OPERANDS_MAX	2
+#define HIST_FIELDS_MAX		(TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
+#define HIST_ACTIONS_MAX	8
+
+enum field_op_id {
+	FIELD_OP_NONE,
+	FIELD_OP_PLUS,
+	FIELD_OP_MINUS,
+	FIELD_OP_UNARY_MINUS,
+};
+
+struct hist_var {
+	char				*name;
+	struct hist_trigger_data	*hist_data;
+	unsigned int			idx;
+};
 
 struct hist_field {
 	struct ftrace_event_field	*field;
@@ -37,27 +61,49 @@ struct hist_field {
 	unsigned int			size;
 	unsigned int			offset;
 	unsigned int                    is_signed;
+	const char			*type;
 	struct hist_field		*operands[HIST_FIELD_OPERANDS_MAX];
+	struct hist_trigger_data	*hist_data;
+	struct hist_var			var;
+	enum field_op_id		operator;
+	char				*system;
+	char				*event_name;
+	char				*name;
+	unsigned int			var_idx;
+	unsigned int			var_ref_idx;
+	bool                            read_once;
 };
 
-static u64 hist_field_none(struct hist_field *field, void *event)
+static u64 hist_field_none(struct hist_field *field,
+			   struct tracing_map_elt *elt,
+			   struct ring_buffer_event *rbe,
+			   void *event)
 {
 	return 0;
 }
 
-static u64 hist_field_counter(struct hist_field *field, void *event)
+static u64 hist_field_counter(struct hist_field *field,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *event)
 {
 	return 1;
 }
 
-static u64 hist_field_string(struct hist_field *hist_field, void *event)
+static u64 hist_field_string(struct hist_field *hist_field,
+			     struct tracing_map_elt *elt,
+			     struct ring_buffer_event *rbe,
+			     void *event)
 {
 	char *addr = (char *)(event + hist_field->field->offset);
 
 	return (u64)(unsigned long)addr;
 }
 
-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_dynstring(struct hist_field *hist_field,
+				struct tracing_map_elt *elt,
+				struct ring_buffer_event *rbe,
+				void *event)
 {
 	u32 str_item = *(u32 *)(event + hist_field->field->offset);
 	int str_loc = str_item & 0xffff;
@@ -66,24 +112,74 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
 	return (u64)(unsigned long)addr;
 }
 
-static u64 hist_field_pstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_pstring(struct hist_field *hist_field,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *event)
 {
 	char **addr = (char **)(event + hist_field->field->offset);
 
 	return (u64)(unsigned long)*addr;
 }
 
-static u64 hist_field_log2(struct hist_field *hist_field, void *event)
+static u64 hist_field_log2(struct hist_field *hist_field,
+			   struct tracing_map_elt *elt,
+			   struct ring_buffer_event *rbe,
+			   void *event)
 {
 	struct hist_field *operand = hist_field->operands[0];
 
-	u64 val = operand->fn(operand, event);
+	u64 val = operand->fn(operand, elt, rbe, event);
 
 	return (u64) ilog2(roundup_pow_of_two(val));
 }
 
+static u64 hist_field_plus(struct hist_field *hist_field,
+			   struct tracing_map_elt *elt,
+			   struct ring_buffer_event *rbe,
+			   void *event)
+{
+	struct hist_field *operand1 = hist_field->operands[0];
+	struct hist_field *operand2 = hist_field->operands[1];
+
+	u64 val1 = operand1->fn(operand1, elt, rbe, event);
+	u64 val2 = operand2->fn(operand2, elt, rbe, event);
+
+	return val1 + val2;
+}
+
+static u64 hist_field_minus(struct hist_field *hist_field,
+			    struct tracing_map_elt *elt,
+			    struct ring_buffer_event *rbe,
+			    void *event)
+{
+	struct hist_field *operand1 = hist_field->operands[0];
+	struct hist_field *operand2 = hist_field->operands[1];
+
+	u64 val1 = operand1->fn(operand1, elt, rbe, event);
+	u64 val2 = operand2->fn(operand2, elt, rbe, event);
+
+	return val1 - val2;
+}
+
+static u64 hist_field_unary_minus(struct hist_field *hist_field,
+				  struct tracing_map_elt *elt,
+				  struct ring_buffer_event *rbe,
+				  void *event)
+{
+	struct hist_field *operand = hist_field->operands[0];
+
+	s64 sval = (s64)operand->fn(operand, elt, rbe, event);
+	u64 val = (u64)-sval;
+
+	return val;
+}
+
 #define DEFINE_HIST_FIELD_FN(type)					\
-static u64 hist_field_##type(struct hist_field *hist_field, void *event)\
+	static u64 hist_field_##type(struct hist_field *hist_field,	\
+				     struct tracing_map_elt *elt,	\
+				     struct ring_buffer_event *rbe,	\
+				     void *event)			\
 {									\
 	type *addr = (type *)(event + hist_field->field->offset);	\
 									\
@@ -126,6 +222,19 @@ enum hist_field_flags {
 	HIST_FIELD_FL_SYSCALL		= 1 << 7,
 	HIST_FIELD_FL_STACKTRACE	= 1 << 8,
 	HIST_FIELD_FL_LOG2		= 1 << 9,
+	HIST_FIELD_FL_TIMESTAMP		= 1 << 10,
+	HIST_FIELD_FL_TIMESTAMP_USECS	= 1 << 11,
+	HIST_FIELD_FL_VAR		= 1 << 12,
+	HIST_FIELD_FL_EXPR		= 1 << 13,
+	HIST_FIELD_FL_VAR_REF		= 1 << 14,
+	HIST_FIELD_FL_CPU		= 1 << 15,
+	HIST_FIELD_FL_ALIAS		= 1 << 16,
+};
+
+struct var_defs {
+	unsigned int	n_vars;
+	char		*name[TRACING_MAP_VARS_MAX];
+	char		*expr[TRACING_MAP_VARS_MAX];
 };
 
 struct hist_trigger_attrs {
@@ -133,25 +242,1437 @@ struct hist_trigger_attrs {
 	char		*vals_str;
 	char		*sort_key_str;
 	char		*name;
+	char		*clock;
 	bool		pause;
 	bool		cont;
 	bool		clear;
+	bool		ts_in_usecs;
 	unsigned int	map_bits;
+
+	char		*assignment_str[TRACING_MAP_VARS_MAX];
+	unsigned int	n_assignments;
+
+	char		*action_str[HIST_ACTIONS_MAX];
+	unsigned int	n_actions;
+
+	struct var_defs	var_defs;
+};
+
+struct field_var {
+	struct hist_field	*var;
+	struct hist_field	*val;
+};
+
+struct field_var_hist {
+	struct hist_trigger_data	*hist_data;
+	char				*cmd;
 };
 
 struct hist_trigger_data {
-	struct hist_field               *fields[TRACING_MAP_FIELDS_MAX];
+	struct hist_field               *fields[HIST_FIELDS_MAX];
 	unsigned int			n_vals;
 	unsigned int			n_keys;
 	unsigned int			n_fields;
+	unsigned int			n_vars;
 	unsigned int			key_size;
 	struct tracing_map_sort_key	sort_keys[TRACING_MAP_SORT_KEYS_MAX];
 	unsigned int			n_sort_keys;
 	struct trace_event_file		*event_file;
 	struct hist_trigger_attrs	*attrs;
 	struct tracing_map		*map;
+	bool				enable_timestamps;
+	bool				remove;
+	struct hist_field               *var_refs[TRACING_MAP_VARS_MAX];
+	unsigned int			n_var_refs;
+
+	struct action_data		*actions[HIST_ACTIONS_MAX];
+	unsigned int			n_actions;
+
+	struct hist_field               *synth_var_refs[SYNTH_FIELDS_MAX];
+	unsigned int                    n_synth_var_refs;
+	struct field_var		*field_vars[SYNTH_FIELDS_MAX];
+	unsigned int			n_field_vars;
+	unsigned int			n_field_var_str;
+	struct field_var_hist		*field_var_hists[SYNTH_FIELDS_MAX];
+	unsigned int			n_field_var_hists;
+
+	struct field_var		*max_vars[SYNTH_FIELDS_MAX];
+	unsigned int			n_max_vars;
+	unsigned int			n_max_var_str;
+};
+
+struct synth_field {
+	char *type;
+	char *name;
+	size_t size;
+	bool is_signed;
+	bool is_string;
+};
+
+struct synth_event {
+	struct list_head			list;
+	int					ref;
+	char					*name;
+	struct synth_field			**fields;
+	unsigned int				n_fields;
+	unsigned int				n_u64;
+	struct trace_event_class		class;
+	struct trace_event_call			call;
+	struct tracepoint			*tp;
+};
+
+struct action_data;
+
+typedef void (*action_fn_t) (struct hist_trigger_data *hist_data,
+			     struct tracing_map_elt *elt, void *rec,
+			     struct ring_buffer_event *rbe,
+			     struct action_data *data, u64 *var_ref_vals);
+
+struct action_data {
+	action_fn_t		fn;
+	unsigned int		n_params;
+	char			*params[SYNTH_FIELDS_MAX];
+
+	union {
+		struct {
+			unsigned int		var_ref_idx;
+			char			*match_event;
+			char			*match_event_system;
+			char			*synth_event_name;
+			struct synth_event	*synth_event;
+		} onmatch;
+
+		struct {
+			char			*var_str;
+			char			*fn_name;
+			unsigned int		max_var_ref_idx;
+			struct hist_field	*max_var;
+			struct hist_field	*var;
+		} onmax;
+	};
+};
+
+
+static char last_hist_cmd[MAX_FILTER_STR_VAL];
+static char hist_err_str[MAX_FILTER_STR_VAL];
+
+static void last_cmd_set(char *str)
+{
+	if (!str)
+		return;
+
+	strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1);
+}
+
+static void hist_err(char *str, char *var)
+{
+	int maxlen = MAX_FILTER_STR_VAL - 1;
+
+	if (!str)
+		return;
+
+	if (strlen(hist_err_str))
+		return;
+
+	if (!var)
+		var = "";
+
+	if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen)
+		return;
+
+	strcat(hist_err_str, str);
+	strcat(hist_err_str, var);
+}
+
+static void hist_err_event(char *str, char *system, char *event, char *var)
+{
+	char err[MAX_FILTER_STR_VAL];
+
+	if (system && var)
+		snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var);
+	else if (system)
+		snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event);
+	else
+		strncpy(err, var, MAX_FILTER_STR_VAL);
+
+	hist_err(str, err);
+}
+
+static void hist_err_clear(void)
+{
+	hist_err_str[0] = '\0';
+}
+
+static bool have_hist_err(void)
+{
+	if (strlen(hist_err_str))
+		return true;
+
+	return false;
+}
+
+static LIST_HEAD(synth_event_list);
+static DEFINE_MUTEX(synth_event_mutex);
+
+struct synth_trace_event {
+	struct trace_entry	ent;
+	u64			fields[];
+};
+
+static int synth_event_define_fields(struct trace_event_call *call)
+{
+	struct synth_trace_event trace;
+	int offset = offsetof(typeof(trace), fields);
+	struct synth_event *event = call->data;
+	unsigned int i, size, n_u64;
+	char *name, *type;
+	bool is_signed;
+	int ret = 0;
+
+	for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+		size = event->fields[i]->size;
+		is_signed = event->fields[i]->is_signed;
+		type = event->fields[i]->type;
+		name = event->fields[i]->name;
+		ret = trace_define_field(call, type, name, offset, size,
+					 is_signed, FILTER_OTHER);
+		if (ret)
+			break;
+
+		if (event->fields[i]->is_string) {
+			offset += STR_VAR_LEN_MAX;
+			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+		} else {
+			offset += sizeof(u64);
+			n_u64++;
+		}
+	}
+
+	event->n_u64 = n_u64;
+
+	return ret;
+}
+
+static bool synth_field_signed(char *type)
+{
+	if (strncmp(type, "u", 1) == 0)
+		return false;
+
+	return true;
+}
+
+static int synth_field_is_string(char *type)
+{
+	if (strstr(type, "char[") != NULL)
+		return true;
+
+	return false;
+}
+
+static int synth_field_string_size(char *type)
+{
+	char buf[4], *end, *start;
+	unsigned int len;
+	int size, err;
+
+	start = strstr(type, "char[");
+	if (start == NULL)
+		return -EINVAL;
+	start += strlen("char[");
+
+	end = strchr(type, ']');
+	if (!end || end < start)
+		return -EINVAL;
+
+	len = end - start;
+	if (len > 3)
+		return -EINVAL;
+
+	strncpy(buf, start, len);
+	buf[len] = '\0';
+
+	err = kstrtouint(buf, 0, &size);
+	if (err)
+		return err;
+
+	if (size > STR_VAR_LEN_MAX)
+		return -EINVAL;
+
+	return size;
+}
+
+static int synth_field_size(char *type)
+{
+	int size = 0;
+
+	if (strcmp(type, "s64") == 0)
+		size = sizeof(s64);
+	else if (strcmp(type, "u64") == 0)
+		size = sizeof(u64);
+	else if (strcmp(type, "s32") == 0)
+		size = sizeof(s32);
+	else if (strcmp(type, "u32") == 0)
+		size = sizeof(u32);
+	else if (strcmp(type, "s16") == 0)
+		size = sizeof(s16);
+	else if (strcmp(type, "u16") == 0)
+		size = sizeof(u16);
+	else if (strcmp(type, "s8") == 0)
+		size = sizeof(s8);
+	else if (strcmp(type, "u8") == 0)
+		size = sizeof(u8);
+	else if (strcmp(type, "char") == 0)
+		size = sizeof(char);
+	else if (strcmp(type, "unsigned char") == 0)
+		size = sizeof(unsigned char);
+	else if (strcmp(type, "int") == 0)
+		size = sizeof(int);
+	else if (strcmp(type, "unsigned int") == 0)
+		size = sizeof(unsigned int);
+	else if (strcmp(type, "long") == 0)
+		size = sizeof(long);
+	else if (strcmp(type, "unsigned long") == 0)
+		size = sizeof(unsigned long);
+	else if (strcmp(type, "pid_t") == 0)
+		size = sizeof(pid_t);
+	else if (synth_field_is_string(type))
+		size = synth_field_string_size(type);
+
+	return size;
+}
+
+static const char *synth_field_fmt(char *type)
+{
+	const char *fmt = "%llu";
+
+	if (strcmp(type, "s64") == 0)
+		fmt = "%lld";
+	else if (strcmp(type, "u64") == 0)
+		fmt = "%llu";
+	else if (strcmp(type, "s32") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "u32") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "s16") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "u16") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "s8") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "u8") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "char") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "unsigned char") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "int") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "unsigned int") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "long") == 0)
+		fmt = "%ld";
+	else if (strcmp(type, "unsigned long") == 0)
+		fmt = "%lu";
+	else if (strcmp(type, "pid_t") == 0)
+		fmt = "%d";
+	else if (synth_field_is_string(type))
+		fmt = "%s";
+
+	return fmt;
+}
+
+static enum print_line_t print_synth_event(struct trace_iterator *iter,
+					   int flags,
+					   struct trace_event *event)
+{
+	struct trace_array *tr = iter->tr;
+	struct trace_seq *s = &iter->seq;
+	struct synth_trace_event *entry;
+	struct synth_event *se;
+	unsigned int i, n_u64;
+	char print_fmt[32];
+	const char *fmt;
+
+	entry = (struct synth_trace_event *)iter->ent;
+	se = container_of(event, struct synth_event, call.event);
+
+	trace_seq_printf(s, "%s: ", se->name);
+
+	for (i = 0, n_u64 = 0; i < se->n_fields; i++) {
+		if (trace_seq_has_overflowed(s))
+			goto end;
+
+		fmt = synth_field_fmt(se->fields[i]->type);
+
+		/* parameter types */
+		if (tr->trace_flags & TRACE_ITER_VERBOSE)
+			trace_seq_printf(s, "%s ", fmt);
+
+		snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt);
+
+		/* parameter values */
+		if (se->fields[i]->is_string) {
+			trace_seq_printf(s, print_fmt, se->fields[i]->name,
+					 (char *)&entry->fields[n_u64],
+					 i == se->n_fields - 1 ? "" : " ");
+			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+		} else {
+			trace_seq_printf(s, print_fmt, se->fields[i]->name,
+					 entry->fields[n_u64],
+					 i == se->n_fields - 1 ? "" : " ");
+			n_u64++;
+		}
+	}
+end:
+	trace_seq_putc(s, '\n');
+
+	return trace_handle_return(s);
+}
+
+static struct trace_event_functions synth_event_funcs = {
+	.trace		= print_synth_event
+};
+
+static notrace void trace_event_raw_event_synth(void *__data,
+						u64 *var_ref_vals,
+						unsigned int var_ref_idx)
+{
+	struct trace_event_file *trace_file = __data;
+	struct synth_trace_event *entry;
+	struct trace_event_buffer fbuffer;
+	struct ring_buffer *buffer;
+	struct synth_event *event;
+	unsigned int i, n_u64;
+	int fields_size = 0;
+
+	event = trace_file->event_call->data;
+
+	if (trace_trigger_soft_disabled(trace_file))
+		return;
+
+	fields_size = event->n_u64 * sizeof(u64);
+
+	/*
+	 * Avoid ring buffer recursion detection, as this event
+	 * is being performed within another event.
+	 */
+	buffer = trace_file->tr->trace_buffer.buffer;
+	ring_buffer_nest_start(buffer);
+
+	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+					   sizeof(*entry) + fields_size);
+	if (!entry)
+		goto out;
+
+	for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+		if (event->fields[i]->is_string) {
+			char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i];
+			char *str_field = (char *)&entry->fields[n_u64];
+
+			strscpy(str_field, str_val, STR_VAR_LEN_MAX);
+			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+		} else {
+			entry->fields[n_u64] = var_ref_vals[var_ref_idx + i];
+			n_u64++;
+		}
+	}
+
+	trace_event_buffer_commit(&fbuffer);
+out:
+	ring_buffer_nest_end(buffer);
+}
+
+static void free_synth_event_print_fmt(struct trace_event_call *call)
+{
+	if (call) {
+		kfree(call->print_fmt);
+		call->print_fmt = NULL;
+	}
+}
+
+static int __set_synth_event_print_fmt(struct synth_event *event,
+				       char *buf, int len)
+{
+	const char *fmt;
+	int pos = 0;
+	int i;
+
+	/* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+
+	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
+	for (i = 0; i < event->n_fields; i++) {
+		fmt = synth_field_fmt(event->fields[i]->type);
+		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s",
+				event->fields[i]->name, fmt,
+				i == event->n_fields - 1 ? "" : ", ");
+	}
+	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
+
+	for (i = 0; i < event->n_fields; i++) {
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				", REC->%s", event->fields[i]->name);
+	}
+
+#undef LEN_OR_ZERO
+
+	/* return the length of print_fmt */
+	return pos;
+}
+
+static int set_synth_event_print_fmt(struct trace_event_call *call)
+{
+	struct synth_event *event = call->data;
+	char *print_fmt;
+	int len;
+
+	/* First: called with 0 length to calculate the needed length */
+	len = __set_synth_event_print_fmt(event, NULL, 0);
+
+	print_fmt = kmalloc(len + 1, GFP_KERNEL);
+	if (!print_fmt)
+		return -ENOMEM;
+
+	/* Second: actually write the @print_fmt */
+	__set_synth_event_print_fmt(event, print_fmt, len + 1);
+	call->print_fmt = print_fmt;
+
+	return 0;
+}
+
+static void free_synth_field(struct synth_field *field)
+{
+	kfree(field->type);
+	kfree(field->name);
+	kfree(field);
+}
+
+static struct synth_field *parse_synth_field(char *field_type,
+					     char *field_name)
+{
+	struct synth_field *field;
+	int len, ret = 0;
+	char *array;
+
+	if (field_type[0] == ';')
+		field_type++;
+
+	len = strlen(field_name);
+	if (field_name[len - 1] == ';')
+		field_name[len - 1] = '\0';
+
+	field = kzalloc(sizeof(*field), GFP_KERNEL);
+	if (!field)
+		return ERR_PTR(-ENOMEM);
+
+	len = strlen(field_type) + 1;
+	array = strchr(field_name, '[');
+	if (array)
+		len += strlen(array);
+	field->type = kzalloc(len, GFP_KERNEL);
+	if (!field->type) {
+		ret = -ENOMEM;
+		goto free;
+	}
+	strcat(field->type, field_type);
+	if (array) {
+		strcat(field->type, array);
+		*array = '\0';
+	}
+
+	field->size = synth_field_size(field->type);
+	if (!field->size) {
+		ret = -EINVAL;
+		goto free;
+	}
+
+	if (synth_field_is_string(field->type))
+		field->is_string = true;
+
+	field->is_signed = synth_field_signed(field->type);
+
+	field->name = kstrdup(field_name, GFP_KERNEL);
+	if (!field->name) {
+		ret = -ENOMEM;
+		goto free;
+	}
+ out:
+	return field;
+ free:
+	free_synth_field(field);
+	field = ERR_PTR(ret);
+	goto out;
+}
+
+static void free_synth_tracepoint(struct tracepoint *tp)
+{
+	if (!tp)
+		return;
+
+	kfree(tp->name);
+	kfree(tp);
+}
+
+static struct tracepoint *alloc_synth_tracepoint(char *name)
+{
+	struct tracepoint *tp;
+
+	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+	if (!tp)
+		return ERR_PTR(-ENOMEM);
+
+	tp->name = kstrdup(name, GFP_KERNEL);
+	if (!tp->name) {
+		kfree(tp);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return tp;
+}
+
+typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
+				    unsigned int var_ref_idx);
+
+static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+			       unsigned int var_ref_idx)
+{
+	struct tracepoint *tp = event->tp;
+
+	if (unlikely(atomic_read(&tp->key.enabled) > 0)) {
+		struct tracepoint_func *probe_func_ptr;
+		synth_probe_func_t probe_func;
+		void *__data;
+
+		if (!(cpu_online(raw_smp_processor_id())))
+			return;
+
+		probe_func_ptr = rcu_dereference_sched((tp)->funcs);
+		if (probe_func_ptr) {
+			do {
+				probe_func = probe_func_ptr->func;
+				__data = probe_func_ptr->data;
+				probe_func(__data, var_ref_vals, var_ref_idx);
+			} while ((++probe_func_ptr)->func);
+		}
+	}
+}
+
+static struct synth_event *find_synth_event(const char *name)
+{
+	struct synth_event *event;
+
+	list_for_each_entry(event, &synth_event_list, list) {
+		if (strcmp(event->name, name) == 0)
+			return event;
+	}
+
+	return NULL;
+}
+
+static int register_synth_event(struct synth_event *event)
+{
+	struct trace_event_call *call = &event->call;
+	int ret = 0;
+
+	event->call.class = &event->class;
+	event->class.system = kstrdup(SYNTH_SYSTEM, GFP_KERNEL);
+	if (!event->class.system) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	event->tp = alloc_synth_tracepoint(event->name);
+	if (IS_ERR(event->tp)) {
+		ret = PTR_ERR(event->tp);
+		event->tp = NULL;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&call->class->fields);
+	call->event.funcs = &synth_event_funcs;
+	call->class->define_fields = synth_event_define_fields;
+
+	ret = register_trace_event(&call->event);
+	if (!ret) {
+		ret = -ENODEV;
+		goto out;
+	}
+	call->flags = TRACE_EVENT_FL_TRACEPOINT;
+	call->class->reg = trace_event_reg;
+	call->class->probe = trace_event_raw_event_synth;
+	call->data = event;
+	call->tp = event->tp;
+
+	ret = trace_add_event_call(call);
+	if (ret) {
+		pr_warn("Failed to register synthetic event: %s\n",
+			trace_event_name(call));
+		goto err;
+	}
+
+	ret = set_synth_event_print_fmt(call);
+	if (ret < 0) {
+		trace_remove_event_call(call);
+		goto err;
+	}
+ out:
+	return ret;
+ err:
+	unregister_trace_event(&call->event);
+	goto out;
+}
+
+static int unregister_synth_event(struct synth_event *event)
+{
+	struct trace_event_call *call = &event->call;
+	int ret;
+
+	ret = trace_remove_event_call(call);
+
+	return ret;
+}
+
+static void free_synth_event(struct synth_event *event)
+{
+	unsigned int i;
+
+	if (!event)
+		return;
+
+	for (i = 0; i < event->n_fields; i++)
+		free_synth_field(event->fields[i]);
+
+	kfree(event->fields);
+	kfree(event->name);
+	kfree(event->class.system);
+	free_synth_tracepoint(event->tp);
+	free_synth_event_print_fmt(&event->call);
+	kfree(event);
+}
+
+static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
+					     struct synth_field **fields)
+{
+	struct synth_event *event;
+	unsigned int i;
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event) {
+		event = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	event->name = kstrdup(event_name, GFP_KERNEL);
+	if (!event->name) {
+		kfree(event);
+		event = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	event->fields = kcalloc(n_fields, sizeof(*event->fields), GFP_KERNEL);
+	if (!event->fields) {
+		free_synth_event(event);
+		event = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	for (i = 0; i < n_fields; i++)
+		event->fields[i] = fields[i];
+
+	event->n_fields = n_fields;
+ out:
+	return event;
+}
+
+static void action_trace(struct hist_trigger_data *hist_data,
+			 struct tracing_map_elt *elt, void *rec,
+			 struct ring_buffer_event *rbe,
+			 struct action_data *data, u64 *var_ref_vals)
+{
+	struct synth_event *event = data->onmatch.synth_event;
+
+	trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx);
+}
+
+struct hist_var_data {
+	struct list_head list;
+	struct hist_trigger_data *hist_data;
+};
+
+static void add_or_delete_synth_event(struct synth_event *event, int delete)
+{
+	if (delete)
+		free_synth_event(event);
+	else {
+		mutex_lock(&synth_event_mutex);
+		if (!find_synth_event(event->name))
+			list_add(&event->list, &synth_event_list);
+		else
+			free_synth_event(event);
+		mutex_unlock(&synth_event_mutex);
+	}
+}
+
+static int create_synth_event(int argc, char **argv)
+{
+	struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
+	struct synth_event *event = NULL;
+	bool delete_event = false;
+	int i, n_fields = 0, ret = 0;
+	char *name;
+
+	mutex_lock(&synth_event_mutex);
+
+	/*
+	 * Argument syntax:
+	 *  - Add synthetic event: <event_name> field[;field] ...
+	 *  - Remove synthetic event: !<event_name> field[;field] ...
+	 *      where 'field' = type field_name
+	 */
+	if (argc < 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	name = argv[0];
+	if (name[0] == '!') {
+		delete_event = true;
+		name++;
+	}
+
+	event = find_synth_event(name);
+	if (event) {
+		if (delete_event) {
+			if (event->ref) {
+				event = NULL;
+				ret = -EBUSY;
+				goto out;
+			}
+			list_del(&event->list);
+			goto out;
+		}
+		event = NULL;
+		ret = -EEXIST;
+		goto out;
+	} else if (delete_event)
+		goto out;
+
+	if (argc < 2) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (i = 1; i < argc - 1; i++) {
+		if (strcmp(argv[i], ";") == 0)
+			continue;
+		if (n_fields == SYNTH_FIELDS_MAX) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		field = parse_synth_field(argv[i], argv[i + 1]);
+		if (IS_ERR(field)) {
+			ret = PTR_ERR(field);
+			goto err;
+		}
+		fields[n_fields] = field;
+		i++; n_fields++;
+	}
+
+	if (i < argc) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	event = alloc_synth_event(name, n_fields, fields);
+	if (IS_ERR(event)) {
+		ret = PTR_ERR(event);
+		event = NULL;
+		goto err;
+	}
+ out:
+	mutex_unlock(&synth_event_mutex);
+
+	if (event) {
+		if (delete_event) {
+			ret = unregister_synth_event(event);
+			add_or_delete_synth_event(event, !ret);
+		} else {
+			ret = register_synth_event(event);
+			add_or_delete_synth_event(event, ret);
+		}
+	}
+
+	return ret;
+ err:
+	mutex_unlock(&synth_event_mutex);
+
+	for (i = 0; i < n_fields; i++)
+		free_synth_field(fields[i]);
+	free_synth_event(event);
+
+	return ret;
+}
+
+static int release_all_synth_events(void)
+{
+	struct list_head release_events;
+	struct synth_event *event, *e;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&release_events);
+
+	mutex_lock(&synth_event_mutex);
+
+	list_for_each_entry(event, &synth_event_list, list) {
+		if (event->ref) {
+			mutex_unlock(&synth_event_mutex);
+			return -EBUSY;
+		}
+	}
+
+	list_splice_init(&event->list, &release_events);
+
+	mutex_unlock(&synth_event_mutex);
+
+	list_for_each_entry_safe(event, e, &release_events, list) {
+		list_del(&event->list);
+
+		ret = unregister_synth_event(event);
+		add_or_delete_synth_event(event, !ret);
+	}
+
+	return ret;
+}
+
+
+static void *synth_events_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&synth_event_mutex);
+
+	return seq_list_start(&synth_event_list, *pos);
+}
+
+static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &synth_event_list, pos);
+}
+
+static void synth_events_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&synth_event_mutex);
+}
+
+static int synth_events_seq_show(struct seq_file *m, void *v)
+{
+	struct synth_field *field;
+	struct synth_event *event = v;
+	unsigned int i;
+
+	seq_printf(m, "%s\t", event->name);
+
+	for (i = 0; i < event->n_fields; i++) {
+		field = event->fields[i];
+
+		/* parameter values */
+		seq_printf(m, "%s %s%s", field->type, field->name,
+			   i == event->n_fields - 1 ? "" : "; ");
+	}
+
+	seq_putc(m, '\n');
+
+	return 0;
+}
+
+static const struct seq_operations synth_events_seq_op = {
+	.start  = synth_events_seq_start,
+	.next   = synth_events_seq_next,
+	.stop   = synth_events_seq_stop,
+	.show   = synth_events_seq_show
+};
+
+static int synth_events_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+		ret = release_all_synth_events();
+		if (ret < 0)
+			return ret;
+	}
+
+	return seq_open(file, &synth_events_seq_op);
+}
+
+static ssize_t synth_events_write(struct file *file,
+				  const char __user *buffer,
+				  size_t count, loff_t *ppos)
+{
+	return trace_parse_run_command(file, buffer, count, ppos,
+				       create_synth_event);
+}
+
+static const struct file_operations synth_events_fops = {
+	.open           = synth_events_open,
+	.write		= synth_events_write,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static u64 hist_field_timestamp(struct hist_field *hist_field,
+				struct tracing_map_elt *elt,
+				struct ring_buffer_event *rbe,
+				void *event)
+{
+	struct hist_trigger_data *hist_data = hist_field->hist_data;
+	struct trace_array *tr = hist_data->event_file->tr;
+
+	u64 ts = ring_buffer_event_time_stamp(rbe);
+
+	if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr))
+		ts = ns2usecs(ts);
+
+	return ts;
+}
+
+static u64 hist_field_cpu(struct hist_field *hist_field,
+			  struct tracing_map_elt *elt,
+			  struct ring_buffer_event *rbe,
+			  void *event)
+{
+	int cpu = smp_processor_id();
+
+	return cpu;
+}
+
+static struct hist_field *
+check_field_for_var_ref(struct hist_field *hist_field,
+			struct hist_trigger_data *var_data,
+			unsigned int var_idx)
+{
+	struct hist_field *found = NULL;
+
+	if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+		if (hist_field->var.idx == var_idx &&
+		    hist_field->var.hist_data == var_data) {
+			found = hist_field;
+		}
+	}
+
+	return found;
+}
+
+static struct hist_field *
+check_field_for_var_refs(struct hist_trigger_data *hist_data,
+			 struct hist_field *hist_field,
+			 struct hist_trigger_data *var_data,
+			 unsigned int var_idx,
+			 unsigned int level)
+{
+	struct hist_field *found = NULL;
+	unsigned int i;
+
+	if (level > 3)
+		return found;
+
+	if (!hist_field)
+		return found;
+
+	found = check_field_for_var_ref(hist_field, var_data, var_idx);
+	if (found)
+		return found;
+
+	for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+		struct hist_field *operand;
+
+		operand = hist_field->operands[i];
+		found = check_field_for_var_refs(hist_data, operand, var_data,
+						 var_idx, level + 1);
+		if (found)
+			return found;
+	}
+
+	return found;
+}
+
+static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data,
+				       struct hist_trigger_data *var_data,
+				       unsigned int var_idx)
+{
+	struct hist_field *hist_field, *found = NULL;
+	unsigned int i;
+
+	for_each_hist_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		found = check_field_for_var_refs(hist_data, hist_field,
+						 var_data, var_idx, 0);
+		if (found)
+			return found;
+	}
+
+	for (i = 0; i < hist_data->n_synth_var_refs; i++) {
+		hist_field = hist_data->synth_var_refs[i];
+		found = check_field_for_var_refs(hist_data, hist_field,
+						 var_data, var_idx, 0);
+		if (found)
+			return found;
+	}
+
+	return found;
+}
+
+static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
+					   unsigned int var_idx)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_field *found = NULL;
+	struct hist_var_data *var_data;
+
+	list_for_each_entry(var_data, &tr->hist_vars, list) {
+		if (var_data->hist_data == hist_data)
+			continue;
+		found = find_var_ref(var_data->hist_data, hist_data, var_idx);
+		if (found)
+			break;
+	}
+
+	return found;
+}
+
+static bool check_var_refs(struct hist_trigger_data *hist_data)
+{
+	struct hist_field *field;
+	bool found = false;
+	int i;
+
+	for_each_hist_field(i, hist_data) {
+		field = hist_data->fields[i];
+		if (field && field->flags & HIST_FIELD_FL_VAR) {
+			if (find_any_var_ref(hist_data, field->var.idx)) {
+				found = true;
+				break;
+			}
+		}
+	}
+
+	return found;
+}
+
+static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_var_data *var_data, *found = NULL;
+
+	list_for_each_entry(var_data, &tr->hist_vars, list) {
+		if (var_data->hist_data == hist_data) {
+			found = var_data;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static bool field_has_hist_vars(struct hist_field *hist_field,
+				unsigned int level)
+{
+	int i;
+
+	if (level > 3)
+		return false;
+
+	if (!hist_field)
+		return false;
+
+	if (hist_field->flags & HIST_FIELD_FL_VAR ||
+	    hist_field->flags & HIST_FIELD_FL_VAR_REF)
+		return true;
+
+	for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+		struct hist_field *operand;
+
+		operand = hist_field->operands[i];
+		if (field_has_hist_vars(operand, level + 1))
+			return true;
+	}
+
+	return false;
+}
+
+static bool has_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct hist_field *hist_field;
+	int i;
+
+	for_each_hist_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		if (field_has_hist_vars(hist_field, 0))
+			return true;
+	}
+
+	return false;
+}
+
+static int save_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_var_data *var_data;
+
+	var_data = find_hist_vars(hist_data);
+	if (var_data)
+		return 0;
+
+	if (trace_array_get(tr) < 0)
+		return -ENODEV;
+
+	var_data = kzalloc(sizeof(*var_data), GFP_KERNEL);
+	if (!var_data) {
+		trace_array_put(tr);
+		return -ENOMEM;
+	}
+
+	var_data->hist_data = hist_data;
+	list_add(&var_data->list, &tr->hist_vars);
+
+	return 0;
+}
+
+static void remove_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_var_data *var_data;
+
+	var_data = find_hist_vars(hist_data);
+	if (!var_data)
+		return;
+
+	if (WARN_ON(check_var_refs(hist_data)))
+		return;
+
+	list_del(&var_data->list);
+
+	kfree(var_data);
+
+	trace_array_put(tr);
+}
+
+static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
+					 const char *var_name)
+{
+	struct hist_field *hist_field, *found = NULL;
+	int i;
+
+	for_each_hist_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR &&
+		    strcmp(hist_field->var.name, var_name) == 0) {
+			found = hist_field;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static struct hist_field *find_var(struct hist_trigger_data *hist_data,
+				   struct trace_event_file *file,
+				   const char *var_name)
+{
+	struct hist_trigger_data *test_data;
+	struct event_trigger_data *test;
+	struct hist_field *hist_field;
+
+	hist_field = find_var_field(hist_data, var_name);
+	if (hist_field)
+		return hist_field;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			test_data = test->private_data;
+			hist_field = find_var_field(test_data, var_name);
+			if (hist_field)
+				return hist_field;
+		}
+	}
+
+	return NULL;
+}
+
+static struct trace_event_file *find_var_file(struct trace_array *tr,
+					      char *system,
+					      char *event_name,
+					      char *var_name)
+{
+	struct hist_trigger_data *var_hist_data;
+	struct hist_var_data *var_data;
+	struct trace_event_file *file, *found = NULL;
+
+	if (system)
+		return find_event_file(tr, system, event_name);
+
+	list_for_each_entry(var_data, &tr->hist_vars, list) {
+		var_hist_data = var_data->hist_data;
+		file = var_hist_data->event_file;
+		if (file == found)
+			continue;
+
+		if (find_var_field(var_hist_data, var_name)) {
+			if (found) {
+				hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+				return NULL;
+			}
+
+			found = file;
+		}
+	}
+
+	return found;
+}
+
+static struct hist_field *find_file_var(struct trace_event_file *file,
+					const char *var_name)
+{
+	struct hist_trigger_data *test_data;
+	struct event_trigger_data *test;
+	struct hist_field *hist_field;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			test_data = test->private_data;
+			hist_field = find_var_field(test_data, var_name);
+			if (hist_field)
+				return hist_field;
+		}
+	}
+
+	return NULL;
+}
+
+static struct hist_field *
+find_match_var(struct hist_trigger_data *hist_data, char *var_name)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_field *hist_field, *found = NULL;
+	struct trace_event_file *file;
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == action_trace) {
+			char *system = data->onmatch.match_event_system;
+			char *event_name = data->onmatch.match_event;
+
+			file = find_var_file(tr, system, event_name, var_name);
+			if (!file)
+				continue;
+			hist_field = find_file_var(file, var_name);
+			if (hist_field) {
+				if (found) {
+					hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+					return ERR_PTR(-EINVAL);
+				}
+
+				found = hist_field;
+			}
+		}
+	}
+	return found;
+}
+
+static struct hist_field *find_event_var(struct hist_trigger_data *hist_data,
+					 char *system,
+					 char *event_name,
+					 char *var_name)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_field *hist_field = NULL;
+	struct trace_event_file *file;
+
+	if (!system || !event_name) {
+		hist_field = find_match_var(hist_data, var_name);
+		if (IS_ERR(hist_field))
+			return NULL;
+		if (hist_field)
+			return hist_field;
+	}
+
+	file = find_var_file(tr, system, event_name, var_name);
+	if (!file)
+		return NULL;
+
+	hist_field = find_file_var(file, var_name);
+
+	return hist_field;
+}
+
+struct hist_elt_data {
+	char *comm;
+	u64 *var_ref_vals;
+	char *field_var_str[SYNTH_FIELDS_MAX];
 };
 
+static u64 hist_field_var_ref(struct hist_field *hist_field,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *event)
+{
+	struct hist_elt_data *elt_data;
+	u64 var_val = 0;
+
+	elt_data = elt->private_data;
+	var_val = elt_data->var_ref_vals[hist_field->var_ref_idx];
+
+	return var_val;
+}
+
+static bool resolve_var_refs(struct hist_trigger_data *hist_data, void *key,
+			     u64 *var_ref_vals, bool self)
+{
+	struct hist_trigger_data *var_data;
+	struct tracing_map_elt *var_elt;
+	struct hist_field *hist_field;
+	unsigned int i, var_idx;
+	bool resolved = true;
+	u64 var_val = 0;
+
+	for (i = 0; i < hist_data->n_var_refs; i++) {
+		hist_field = hist_data->var_refs[i];
+		var_idx = hist_field->var.idx;
+		var_data = hist_field->var.hist_data;
+
+		if (var_data == NULL) {
+			resolved = false;
+			break;
+		}
+
+		if ((self && var_data != hist_data) ||
+		    (!self && var_data == hist_data))
+			continue;
+
+		var_elt = tracing_map_lookup(var_data->map, key);
+		if (!var_elt) {
+			resolved = false;
+			break;
+		}
+
+		if (!tracing_map_var_set(var_elt, var_idx)) {
+			resolved = false;
+			break;
+		}
+
+		if (self || !hist_field->read_once)
+			var_val = tracing_map_read_var(var_elt, var_idx);
+		else
+			var_val = tracing_map_read_var_once(var_elt, var_idx);
+
+		var_ref_vals[i] = var_val;
+	}
+
+	return resolved;
+}
+
 static const char *hist_field_name(struct hist_field *field,
 				   unsigned int level)
 {
@@ -162,8 +1683,26 @@ static const char *hist_field_name(struct hist_field *field,
 
 	if (field->field)
 		field_name = field->field->name;
-	else if (field->flags & HIST_FIELD_FL_LOG2)
+	else if (field->flags & HIST_FIELD_FL_LOG2 ||
+		 field->flags & HIST_FIELD_FL_ALIAS)
 		field_name = hist_field_name(field->operands[0], ++level);
+	else if (field->flags & HIST_FIELD_FL_CPU)
+		field_name = "cpu";
+	else if (field->flags & HIST_FIELD_FL_EXPR ||
+		 field->flags & HIST_FIELD_FL_VAR_REF) {
+		if (field->system) {
+			static char full_name[MAX_FILTER_STR_VAL];
+
+			strcat(full_name, field->system);
+			strcat(full_name, ".");
+			strcat(full_name, field->event_name);
+			strcat(full_name, ".");
+			strcat(full_name, field->name);
+			field_name = full_name;
+		} else
+			field_name = field->name;
+	} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
+		field_name = "common_timestamp";
 
 	if (field_name == NULL)
 		field_name = "";
@@ -232,16 +1771,119 @@ static int parse_map_size(char *str)
 
 static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
 {
+	unsigned int i;
+
 	if (!attrs)
 		return;
 
+	for (i = 0; i < attrs->n_assignments; i++)
+		kfree(attrs->assignment_str[i]);
+
+	for (i = 0; i < attrs->n_actions; i++)
+		kfree(attrs->action_str[i]);
+
 	kfree(attrs->name);
 	kfree(attrs->sort_key_str);
 	kfree(attrs->keys_str);
 	kfree(attrs->vals_str);
+	kfree(attrs->clock);
 	kfree(attrs);
 }
 
+static int parse_action(char *str, struct hist_trigger_attrs *attrs)
+{
+	int ret = -EINVAL;
+
+	if (attrs->n_actions >= HIST_ACTIONS_MAX)
+		return ret;
+
+	if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) ||
+	    (strncmp(str, "onmax(", strlen("onmax(")) == 0)) {
+		attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL);
+		if (!attrs->action_str[attrs->n_actions]) {
+			ret = -ENOMEM;
+			return ret;
+		}
+		attrs->n_actions++;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
+{
+	int ret = 0;
+
+	if ((strncmp(str, "key=", strlen("key=")) == 0) ||
+	    (strncmp(str, "keys=", strlen("keys=")) == 0)) {
+		attrs->keys_str = kstrdup(str, GFP_KERNEL);
+		if (!attrs->keys_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
+		 (strncmp(str, "vals=", strlen("vals=")) == 0) ||
+		 (strncmp(str, "values=", strlen("values=")) == 0)) {
+		attrs->vals_str = kstrdup(str, GFP_KERNEL);
+		if (!attrs->vals_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "sort=", strlen("sort=")) == 0) {
+		attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
+		if (!attrs->sort_key_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "name=", strlen("name=")) == 0) {
+		attrs->name = kstrdup(str, GFP_KERNEL);
+		if (!attrs->name) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "clock=", strlen("clock=")) == 0) {
+		strsep(&str, "=");
+		if (!str) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		str = strstrip(str);
+		attrs->clock = kstrdup(str, GFP_KERNEL);
+		if (!attrs->clock) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "size=", strlen("size=")) == 0) {
+		int map_bits = parse_map_size(str);
+
+		if (map_bits < 0) {
+			ret = map_bits;
+			goto out;
+		}
+		attrs->map_bits = map_bits;
+	} else {
+		char *assignment;
+
+		if (attrs->n_assignments == TRACING_MAP_VARS_MAX) {
+			hist_err("Too many variables defined: ", str);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		assignment = kstrdup(str, GFP_KERNEL);
+		if (!assignment) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		attrs->assignment_str[attrs->n_assignments++] = assignment;
+	}
+ out:
+	return ret;
+}
+
 static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
 {
 	struct hist_trigger_attrs *attrs;
@@ -254,35 +1896,21 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
 	while (trigger_str) {
 		char *str = strsep(&trigger_str, ":");
 
-		if ((strncmp(str, "key=", strlen("key=")) == 0) ||
-		    (strncmp(str, "keys=", strlen("keys=")) == 0))
-			attrs->keys_str = kstrdup(str, GFP_KERNEL);
-		else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
-			 (strncmp(str, "vals=", strlen("vals=")) == 0) ||
-			 (strncmp(str, "values=", strlen("values=")) == 0))
-			attrs->vals_str = kstrdup(str, GFP_KERNEL);
-		else if (strncmp(str, "sort=", strlen("sort=")) == 0)
-			attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
-		else if (strncmp(str, "name=", strlen("name=")) == 0)
-			attrs->name = kstrdup(str, GFP_KERNEL);
-		else if (strcmp(str, "pause") == 0)
+		if (strchr(str, '=')) {
+			ret = parse_assignment(str, attrs);
+			if (ret)
+				goto free;
+		} else if (strcmp(str, "pause") == 0)
 			attrs->pause = true;
 		else if ((strcmp(str, "cont") == 0) ||
 			 (strcmp(str, "continue") == 0))
 			attrs->cont = true;
 		else if (strcmp(str, "clear") == 0)
 			attrs->clear = true;
-		else if (strncmp(str, "size=", strlen("size=")) == 0) {
-			int map_bits = parse_map_size(str);
-
-			if (map_bits < 0) {
-				ret = map_bits;
+		else {
+			ret = parse_action(str, attrs);
+			if (ret)
 				goto free;
-			}
-			attrs->map_bits = map_bits;
-		} else {
-			ret = -EINVAL;
-			goto free;
 		}
 	}
 
@@ -291,6 +1919,14 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
 		goto free;
 	}
 
+	if (!attrs->clock) {
+		attrs->clock = kstrdup("global", GFP_KERNEL);
+		if (!attrs->clock) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
 	return attrs;
  free:
 	destroy_hist_trigger_attrs(attrs);
@@ -313,64 +1949,203 @@ static inline void save_comm(char *comm, struct task_struct *task)
 	memcpy(comm, task->comm, TASK_COMM_LEN);
 }
 
-static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt)
+static void hist_elt_data_free(struct hist_elt_data *elt_data)
 {
-	kfree((char *)elt->private_data);
+	unsigned int i;
+
+	for (i = 0; i < SYNTH_FIELDS_MAX; i++)
+		kfree(elt_data->field_var_str[i]);
+
+	kfree(elt_data->comm);
+	kfree(elt_data);
 }
 
-static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt)
+static void hist_trigger_elt_data_free(struct tracing_map_elt *elt)
+{
+	struct hist_elt_data *elt_data = elt->private_data;
+
+	hist_elt_data_free(elt_data);
+}
+
+static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
 {
 	struct hist_trigger_data *hist_data = elt->map->private_data;
+	unsigned int size = TASK_COMM_LEN;
+	struct hist_elt_data *elt_data;
 	struct hist_field *key_field;
-	unsigned int i;
+	unsigned int i, n_str;
+
+	elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
+	if (!elt_data)
+		return -ENOMEM;
 
 	for_each_hist_key_field(i, hist_data) {
 		key_field = hist_data->fields[i];
 
 		if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
-			unsigned int size = TASK_COMM_LEN + 1;
-
-			elt->private_data = kzalloc(size, GFP_KERNEL);
-			if (!elt->private_data)
+			elt_data->comm = kzalloc(size, GFP_KERNEL);
+			if (!elt_data->comm) {
+				kfree(elt_data);
 				return -ENOMEM;
+			}
 			break;
 		}
 	}
 
+	n_str = hist_data->n_field_var_str + hist_data->n_max_var_str;
+
+	size = STR_VAR_LEN_MAX;
+
+	for (i = 0; i < n_str; i++) {
+		elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL);
+		if (!elt_data->field_var_str[i]) {
+			hist_elt_data_free(elt_data);
+			return -ENOMEM;
+		}
+	}
+
+	elt->private_data = elt_data;
+
 	return 0;
 }
 
-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to,
-				       struct tracing_map_elt *from)
+static void hist_trigger_elt_data_init(struct tracing_map_elt *elt)
+{
+	struct hist_elt_data *elt_data = elt->private_data;
+
+	if (elt_data->comm)
+		save_comm(elt_data->comm, current);
+}
+
+static const struct tracing_map_ops hist_trigger_elt_data_ops = {
+	.elt_alloc	= hist_trigger_elt_data_alloc,
+	.elt_free	= hist_trigger_elt_data_free,
+	.elt_init	= hist_trigger_elt_data_init,
+};
+
+static const char *get_hist_field_flags(struct hist_field *hist_field)
+{
+	const char *flags_str = NULL;
+
+	if (hist_field->flags & HIST_FIELD_FL_HEX)
+		flags_str = "hex";
+	else if (hist_field->flags & HIST_FIELD_FL_SYM)
+		flags_str = "sym";
+	else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
+		flags_str = "sym-offset";
+	else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
+		flags_str = "execname";
+	else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
+		flags_str = "syscall";
+	else if (hist_field->flags & HIST_FIELD_FL_LOG2)
+		flags_str = "log2";
+	else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+		flags_str = "usecs";
+
+	return flags_str;
+}
+
+static void expr_field_str(struct hist_field *field, char *expr)
 {
-	char *comm_from = from->private_data;
-	char *comm_to = to->private_data;
+	if (field->flags & HIST_FIELD_FL_VAR_REF)
+		strcat(expr, "$");
+
+	strcat(expr, hist_field_name(field, 0));
 
-	if (comm_from)
-		memcpy(comm_to, comm_from, TASK_COMM_LEN + 1);
+	if (field->flags && !(field->flags & HIST_FIELD_FL_VAR_REF)) {
+		const char *flags_str = get_hist_field_flags(field);
+
+		if (flags_str) {
+			strcat(expr, ".");
+			strcat(expr, flags_str);
+		}
+	}
 }
 
-static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt)
+static char *expr_str(struct hist_field *field, unsigned int level)
 {
-	char *comm = elt->private_data;
+	char *expr;
+
+	if (level > 1)
+		return NULL;
+
+	expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+	if (!expr)
+		return NULL;
+
+	if (!field->operands[0]) {
+		expr_field_str(field, expr);
+		return expr;
+	}
+
+	if (field->operator == FIELD_OP_UNARY_MINUS) {
+		char *subexpr;
 
-	if (comm)
-		save_comm(comm, current);
+		strcat(expr, "-(");
+		subexpr = expr_str(field->operands[0], ++level);
+		if (!subexpr) {
+			kfree(expr);
+			return NULL;
+		}
+		strcat(expr, subexpr);
+		strcat(expr, ")");
+
+		kfree(subexpr);
+
+		return expr;
+	}
+
+	expr_field_str(field->operands[0], expr);
+
+	switch (field->operator) {
+	case FIELD_OP_MINUS:
+		strcat(expr, "-");
+		break;
+	case FIELD_OP_PLUS:
+		strcat(expr, "+");
+		break;
+	default:
+		kfree(expr);
+		return NULL;
+	}
+
+	expr_field_str(field->operands[1], expr);
+
+	return expr;
 }
 
-static const struct tracing_map_ops hist_trigger_elt_comm_ops = {
-	.elt_alloc	= hist_trigger_elt_comm_alloc,
-	.elt_copy	= hist_trigger_elt_comm_copy,
-	.elt_free	= hist_trigger_elt_comm_free,
-	.elt_init	= hist_trigger_elt_comm_init,
-};
+static int contains_operator(char *str)
+{
+	enum field_op_id field_op = FIELD_OP_NONE;
+	char *op;
+
+	op = strpbrk(str, "+-");
+	if (!op)
+		return FIELD_OP_NONE;
+
+	switch (*op) {
+	case '-':
+		if (*str == '-')
+			field_op = FIELD_OP_UNARY_MINUS;
+		else
+			field_op = FIELD_OP_MINUS;
+		break;
+	case '+':
+		field_op = FIELD_OP_PLUS;
+		break;
+	default:
+		break;
+	}
+
+	return field_op;
+}
 
 static void destroy_hist_field(struct hist_field *hist_field,
 			       unsigned int level)
 {
 	unsigned int i;
 
-	if (level > 2)
+	if (level > 3)
 		return;
 
 	if (!hist_field)
@@ -379,11 +2154,17 @@ static void destroy_hist_field(struct hist_field *hist_field,
 	for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
 		destroy_hist_field(hist_field->operands[i], level + 1);
 
+	kfree(hist_field->var.name);
+	kfree(hist_field->name);
+	kfree(hist_field->type);
+
 	kfree(hist_field);
 }
 
-static struct hist_field *create_hist_field(struct ftrace_event_field *field,
-					    unsigned long flags)
+static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
+					    struct ftrace_event_field *field,
+					    unsigned long flags,
+					    char *var_name)
 {
 	struct hist_field *hist_field;
 
@@ -394,8 +2175,22 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	if (!hist_field)
 		return NULL;
 
+	hist_field->hist_data = hist_data;
+
+	if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS)
+		goto out; /* caller will populate */
+
+	if (flags & HIST_FIELD_FL_VAR_REF) {
+		hist_field->fn = hist_field_var_ref;
+		goto out;
+	}
+
 	if (flags & HIST_FIELD_FL_HITCOUNT) {
 		hist_field->fn = hist_field_counter;
+		hist_field->size = sizeof(u64);
+		hist_field->type = kstrdup("u64", GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
 		goto out;
 	}
 
@@ -407,8 +2202,29 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	if (flags & HIST_FIELD_FL_LOG2) {
 		unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
 		hist_field->fn = hist_field_log2;
-		hist_field->operands[0] = create_hist_field(field, fl);
+		hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
 		hist_field->size = hist_field->operands[0]->size;
+		hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+		goto out;
+	}
+
+	if (flags & HIST_FIELD_FL_TIMESTAMP) {
+		hist_field->fn = hist_field_timestamp;
+		hist_field->size = sizeof(u64);
+		hist_field->type = kstrdup("u64", GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+		goto out;
+	}
+
+	if (flags & HIST_FIELD_FL_CPU) {
+		hist_field->fn = hist_field_cpu;
+		hist_field->size = sizeof(int);
+		hist_field->type = kstrdup("unsigned int", GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
 		goto out;
 	}
 
@@ -418,6 +2234,11 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	if (is_string_field(field)) {
 		flags |= HIST_FIELD_FL_STRING;
 
+		hist_field->size = MAX_FILTER_STR_VAL;
+		hist_field->type = kstrdup(field->type, GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+
 		if (field->filter_type == FILTER_STATIC_STRING)
 			hist_field->fn = hist_field_string;
 		else if (field->filter_type == FILTER_DYN_STRING)
@@ -425,6 +2246,12 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 		else
 			hist_field->fn = hist_field_pstring;
 	} else {
+		hist_field->size = field->size;
+		hist_field->is_signed = field->is_signed;
+		hist_field->type = kstrdup(field->type, GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+
 		hist_field->fn = select_value_fn(field->size,
 						 field->is_signed);
 		if (!hist_field->fn) {
@@ -436,14 +2263,23 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	hist_field->field = field;
 	hist_field->flags = flags;
 
+	if (var_name) {
+		hist_field->var.name = kstrdup(var_name, GFP_KERNEL);
+		if (!hist_field->var.name)
+			goto free;
+	}
+
 	return hist_field;
+ free:
+	destroy_hist_field(hist_field, 0);
+	return NULL;
 }
 
 static void destroy_hist_fields(struct hist_trigger_data *hist_data)
 {
 	unsigned int i;
 
-	for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) {
+	for (i = 0; i < HIST_FIELDS_MAX; i++) {
 		if (hist_data->fields[i]) {
 			destroy_hist_field(hist_data->fields[i], 0);
 			hist_data->fields[i] = NULL;
@@ -451,69 +2287,1610 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data)
 	}
 }
 
-static int create_hitcount_val(struct hist_trigger_data *hist_data)
+static int init_var_ref(struct hist_field *ref_field,
+			struct hist_field *var_field,
+			char *system, char *event_name)
 {
-	hist_data->fields[HITCOUNT_IDX] =
-		create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT);
-	if (!hist_data->fields[HITCOUNT_IDX])
-		return -ENOMEM;
+	int err = 0;
+
+	ref_field->var.idx = var_field->var.idx;
+	ref_field->var.hist_data = var_field->hist_data;
+	ref_field->size = var_field->size;
+	ref_field->is_signed = var_field->is_signed;
+	ref_field->flags |= var_field->flags &
+		(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+	if (system) {
+		ref_field->system = kstrdup(system, GFP_KERNEL);
+		if (!ref_field->system)
+			return -ENOMEM;
+	}
 
-	hist_data->n_vals++;
+	if (event_name) {
+		ref_field->event_name = kstrdup(event_name, GFP_KERNEL);
+		if (!ref_field->event_name) {
+			err = -ENOMEM;
+			goto free;
+		}
+	}
 
-	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+	if (var_field->var.name) {
+		ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL);
+		if (!ref_field->name) {
+			err = -ENOMEM;
+			goto free;
+		}
+	} else if (var_field->name) {
+		ref_field->name = kstrdup(var_field->name, GFP_KERNEL);
+		if (!ref_field->name) {
+			err = -ENOMEM;
+			goto free;
+		}
+	}
+
+	ref_field->type = kstrdup(var_field->type, GFP_KERNEL);
+	if (!ref_field->type) {
+		err = -ENOMEM;
+		goto free;
+	}
+ out:
+	return err;
+ free:
+	kfree(ref_field->system);
+	kfree(ref_field->event_name);
+	kfree(ref_field->name);
+
+	goto out;
+}
+
+static struct hist_field *create_var_ref(struct hist_field *var_field,
+					 char *system, char *event_name)
+{
+	unsigned long flags = HIST_FIELD_FL_VAR_REF;
+	struct hist_field *ref_field;
+
+	ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+	if (ref_field) {
+		if (init_var_ref(ref_field, var_field, system, event_name)) {
+			destroy_hist_field(ref_field, 0);
+			return NULL;
+		}
+	}
+
+	return ref_field;
+}
+
+static bool is_var_ref(char *var_name)
+{
+	if (!var_name || strlen(var_name) < 2 || var_name[0] != '$')
+		return false;
+
+	return true;
+}
+
+static char *field_name_from_var(struct hist_trigger_data *hist_data,
+				 char *var_name)
+{
+	char *name, *field;
+	unsigned int i;
+
+	for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+		name = hist_data->attrs->var_defs.name[i];
+
+		if (strcmp(var_name, name) == 0) {
+			field = hist_data->attrs->var_defs.expr[i];
+			if (contains_operator(field) || is_var_ref(field))
+				continue;
+			return field;
+		}
+	}
+
+	return NULL;
+}
+
+static char *local_field_var_ref(struct hist_trigger_data *hist_data,
+				 char *system, char *event_name,
+				 char *var_name)
+{
+	struct trace_event_call *call;
+
+	if (system && event_name) {
+		call = hist_data->event_file->event_call;
+
+		if (strcmp(system, call->class->system) != 0)
+			return NULL;
+
+		if (strcmp(event_name, trace_event_name(call)) != 0)
+			return NULL;
+	}
+
+	if (!!system != !!event_name)
+		return NULL;
+
+	if (!is_var_ref(var_name))
+		return NULL;
+
+	var_name++;
+
+	return field_name_from_var(hist_data, var_name);
+}
+
+static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
+					char *system, char *event_name,
+					char *var_name)
+{
+	struct hist_field *var_field = NULL, *ref_field = NULL;
+
+	if (!is_var_ref(var_name))
+		return NULL;
+
+	var_name++;
+
+	var_field = find_event_var(hist_data, system, event_name, var_name);
+	if (var_field)
+		ref_field = create_var_ref(var_field, system, event_name);
+
+	if (!ref_field)
+		hist_err_event("Couldn't find variable: $",
+			       system, event_name, var_name);
+
+	return ref_field;
+}
+
+static struct ftrace_event_field *
+parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+	    char *field_str, unsigned long *flags)
+{
+	struct ftrace_event_field *field = NULL;
+	char *field_name, *modifier, *str;
+
+	modifier = str = kstrdup(field_str, GFP_KERNEL);
+	if (!modifier)
+		return ERR_PTR(-ENOMEM);
+
+	field_name = strsep(&modifier, ".");
+	if (modifier) {
+		if (strcmp(modifier, "hex") == 0)
+			*flags |= HIST_FIELD_FL_HEX;
+		else if (strcmp(modifier, "sym") == 0)
+			*flags |= HIST_FIELD_FL_SYM;
+		else if (strcmp(modifier, "sym-offset") == 0)
+			*flags |= HIST_FIELD_FL_SYM_OFFSET;
+		else if ((strcmp(modifier, "execname") == 0) &&
+			 (strcmp(field_name, "common_pid") == 0))
+			*flags |= HIST_FIELD_FL_EXECNAME;
+		else if (strcmp(modifier, "syscall") == 0)
+			*flags |= HIST_FIELD_FL_SYSCALL;
+		else if (strcmp(modifier, "log2") == 0)
+			*flags |= HIST_FIELD_FL_LOG2;
+		else if (strcmp(modifier, "usecs") == 0)
+			*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
+		else {
+			field = ERR_PTR(-EINVAL);
+			goto out;
+		}
+	}
+
+	if (strcmp(field_name, "common_timestamp") == 0) {
+		*flags |= HIST_FIELD_FL_TIMESTAMP;
+		hist_data->enable_timestamps = true;
+		if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+			hist_data->attrs->ts_in_usecs = true;
+	} else if (strcmp(field_name, "cpu") == 0)
+		*flags |= HIST_FIELD_FL_CPU;
+	else {
+		field = trace_find_event_field(file->event_call, field_name);
+		if (!field || !field->size) {
+			field = ERR_PTR(-EINVAL);
+			goto out;
+		}
+	}
+ out:
+	kfree(str);
+
+	return field;
+}
+
+static struct hist_field *create_alias(struct hist_trigger_data *hist_data,
+				       struct hist_field *var_ref,
+				       char *var_name)
+{
+	struct hist_field *alias = NULL;
+	unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR;
+
+	alias = create_hist_field(hist_data, NULL, flags, var_name);
+	if (!alias)
+		return NULL;
+
+	alias->fn = var_ref->fn;
+	alias->operands[0] = var_ref;
+
+	if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) {
+		destroy_hist_field(alias, 0);
+		return NULL;
+	}
+
+	return alias;
+}
+
+static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file, char *str,
+				     unsigned long *flags, char *var_name)
+{
+	char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str;
+	struct ftrace_event_field *field = NULL;
+	struct hist_field *hist_field = NULL;
+	int ret = 0;
+
+	s = strchr(str, '.');
+	if (s) {
+		s = strchr(++s, '.');
+		if (s) {
+			ref_system = strsep(&str, ".");
+			if (!str) {
+				ret = -EINVAL;
+				goto out;
+			}
+			ref_event = strsep(&str, ".");
+			if (!str) {
+				ret = -EINVAL;
+				goto out;
+			}
+			ref_var = str;
+		}
+	}
+
+	s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var);
+	if (!s) {
+		hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var);
+		if (hist_field) {
+			hist_data->var_refs[hist_data->n_var_refs] = hist_field;
+			hist_field->var_ref_idx = hist_data->n_var_refs++;
+			if (var_name) {
+				hist_field = create_alias(hist_data, hist_field, var_name);
+				if (!hist_field) {
+					ret = -ENOMEM;
+					goto out;
+				}
+			}
+			return hist_field;
+		}
+	} else
+		str = s;
+
+	field = parse_field(hist_data, file, str, flags);
+	if (IS_ERR(field)) {
+		ret = PTR_ERR(field);
+		goto out;
+	}
+
+	hist_field = create_hist_field(hist_data, field, *flags, var_name);
+	if (!hist_field) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	return hist_field;
+ out:
+	return ERR_PTR(ret);
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file,
+				     char *str, unsigned long flags,
+				     char *var_name, unsigned int level);
+
+static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
+				      struct trace_event_file *file,
+				      char *str, unsigned long flags,
+				      char *var_name, unsigned int level)
+{
+	struct hist_field *operand1, *expr = NULL;
+	unsigned long operand_flags;
+	int ret = 0;
+	char *s;
+
+	/* we support only -(xxx) i.e. explicit parens required */
+
+	if (level > 3) {
+		hist_err("Too many subexpressions (3 max): ", str);
+		ret = -EINVAL;
+		goto free;
+	}
+
+	str++; /* skip leading '-' */
+
+	s = strchr(str, '(');
+	if (s)
+		str++;
+	else {
+		ret = -EINVAL;
+		goto free;
+	}
+
+	s = strrchr(str, ')');
+	if (s)
+		*s = '\0';
+	else {
+		ret = -EINVAL; /* no closing ')' */
+		goto free;
+	}
+
+	flags |= HIST_FIELD_FL_EXPR;
+	expr = create_hist_field(hist_data, NULL, flags, var_name);
+	if (!expr) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	operand_flags = 0;
+	operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+	if (IS_ERR(operand1)) {
+		ret = PTR_ERR(operand1);
+		goto free;
+	}
+
+	expr->flags |= operand1->flags &
+		(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+	expr->fn = hist_field_unary_minus;
+	expr->operands[0] = operand1;
+	expr->operator = FIELD_OP_UNARY_MINUS;
+	expr->name = expr_str(expr, 0);
+	expr->type = kstrdup(operand1->type, GFP_KERNEL);
+	if (!expr->type) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return expr;
+ free:
+	destroy_hist_field(expr, 0);
+	return ERR_PTR(ret);
+}
+
+static int check_expr_operands(struct hist_field *operand1,
+			       struct hist_field *operand2)
+{
+	unsigned long operand1_flags = operand1->flags;
+	unsigned long operand2_flags = operand2->flags;
+
+	if ((operand1_flags & HIST_FIELD_FL_VAR_REF) ||
+	    (operand1_flags & HIST_FIELD_FL_ALIAS)) {
+		struct hist_field *var;
+
+		var = find_var_field(operand1->var.hist_data, operand1->name);
+		if (!var)
+			return -EINVAL;
+		operand1_flags = var->flags;
+	}
+
+	if ((operand2_flags & HIST_FIELD_FL_VAR_REF) ||
+	    (operand2_flags & HIST_FIELD_FL_ALIAS)) {
+		struct hist_field *var;
+
+		var = find_var_field(operand2->var.hist_data, operand2->name);
+		if (!var)
+			return -EINVAL;
+		operand2_flags = var->flags;
+	}
+
+	if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
+	    (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) {
+		hist_err("Timestamp units in expression don't match", NULL);
 		return -EINVAL;
+	}
 
 	return 0;
 }
 
-static int create_val_field(struct hist_trigger_data *hist_data,
-			    unsigned int val_idx,
-			    struct trace_event_file *file,
-			    char *field_str)
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file,
+				     char *str, unsigned long flags,
+				     char *var_name, unsigned int level)
 {
-	struct ftrace_event_field *field = NULL;
-	unsigned long flags = 0;
-	char *field_name;
+	struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL;
+	unsigned long operand_flags;
+	int field_op, ret = -EINVAL;
+	char *sep, *operand1_str;
+
+	if (level > 3) {
+		hist_err("Too many subexpressions (3 max): ", str);
+		return ERR_PTR(-EINVAL);
+	}
+
+	field_op = contains_operator(str);
+
+	if (field_op == FIELD_OP_NONE)
+		return parse_atom(hist_data, file, str, &flags, var_name);
+
+	if (field_op == FIELD_OP_UNARY_MINUS)
+		return parse_unary(hist_data, file, str, flags, var_name, ++level);
+
+	switch (field_op) {
+	case FIELD_OP_MINUS:
+		sep = "-";
+		break;
+	case FIELD_OP_PLUS:
+		sep = "+";
+		break;
+	default:
+		goto free;
+	}
+
+	operand1_str = strsep(&str, sep);
+	if (!operand1_str || !str)
+		goto free;
+
+	operand_flags = 0;
+	operand1 = parse_atom(hist_data, file, operand1_str,
+			      &operand_flags, NULL);
+	if (IS_ERR(operand1)) {
+		ret = PTR_ERR(operand1);
+		operand1 = NULL;
+		goto free;
+	}
+
+	/* rest of string could be another expression e.g. b+c in a+b+c */
+	operand_flags = 0;
+	operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+	if (IS_ERR(operand2)) {
+		ret = PTR_ERR(operand2);
+		operand2 = NULL;
+		goto free;
+	}
+
+	ret = check_expr_operands(operand1, operand2);
+	if (ret)
+		goto free;
+
+	flags |= HIST_FIELD_FL_EXPR;
+
+	flags |= operand1->flags &
+		(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+	expr = create_hist_field(hist_data, NULL, flags, var_name);
+	if (!expr) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	operand1->read_once = true;
+	operand2->read_once = true;
+
+	expr->operands[0] = operand1;
+	expr->operands[1] = operand2;
+	expr->operator = field_op;
+	expr->name = expr_str(expr, 0);
+	expr->type = kstrdup(operand1->type, GFP_KERNEL);
+	if (!expr->type) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	switch (field_op) {
+	case FIELD_OP_MINUS:
+		expr->fn = hist_field_minus;
+		break;
+	case FIELD_OP_PLUS:
+		expr->fn = hist_field_plus;
+		break;
+	default:
+		ret = -EINVAL;
+		goto free;
+	}
+
+	return expr;
+ free:
+	destroy_hist_field(operand1, 0);
+	destroy_hist_field(operand2, 0);
+	destroy_hist_field(expr, 0);
+
+	return ERR_PTR(ret);
+}
+
+static char *find_trigger_filter(struct hist_trigger_data *hist_data,
+				 struct trace_event_file *file)
+{
+	struct event_trigger_data *test;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			if (test->private_data == hist_data)
+				return test->filter_str;
+		}
+	}
+
+	return NULL;
+}
+
+static struct event_command trigger_hist_cmd;
+static int event_hist_trigger_func(struct event_command *cmd_ops,
+				   struct trace_event_file *file,
+				   char *glob, char *cmd, char *param);
+
+static bool compatible_keys(struct hist_trigger_data *target_hist_data,
+			    struct hist_trigger_data *hist_data,
+			    unsigned int n_keys)
+{
+	struct hist_field *target_hist_field, *hist_field;
+	unsigned int n, i, j;
+
+	if (hist_data->n_fields - hist_data->n_vals != n_keys)
+		return false;
+
+	i = hist_data->n_vals;
+	j = target_hist_data->n_vals;
+
+	for (n = 0; n < n_keys; n++) {
+		hist_field = hist_data->fields[i + n];
+		target_hist_field = target_hist_data->fields[j + n];
+
+		if (strcmp(hist_field->type, target_hist_field->type) != 0)
+			return false;
+		if (hist_field->size != target_hist_field->size)
+			return false;
+		if (hist_field->is_signed != target_hist_field->is_signed)
+			return false;
+	}
+
+	return true;
+}
+
+static struct hist_trigger_data *
+find_compatible_hist(struct hist_trigger_data *target_hist_data,
+		     struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data;
+	struct event_trigger_data *test;
+	unsigned int n_keys;
+
+	n_keys = target_hist_data->n_fields - target_hist_data->n_vals;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			hist_data = test->private_data;
+
+			if (compatible_keys(target_hist_data, hist_data, n_keys))
+				return hist_data;
+		}
+	}
+
+	return NULL;
+}
+
+static struct trace_event_file *event_file(struct trace_array *tr,
+					   char *system, char *event_name)
+{
+	struct trace_event_file *file;
+
+	file = find_event_file(tr, system, event_name);
+	if (!file)
+		return ERR_PTR(-EINVAL);
+
+	return file;
+}
+
+static struct hist_field *
+find_synthetic_field_var(struct hist_trigger_data *target_hist_data,
+			 char *system, char *event_name, char *field_name)
+{
+	struct hist_field *event_var;
+	char *synthetic_name;
+
+	synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+	if (!synthetic_name)
+		return ERR_PTR(-ENOMEM);
+
+	strcpy(synthetic_name, "synthetic_");
+	strcat(synthetic_name, field_name);
+
+	event_var = find_event_var(target_hist_data, system, event_name, synthetic_name);
+
+	kfree(synthetic_name);
+
+	return event_var;
+}
+
+/**
+ * create_field_var_hist - Automatically create a histogram and var for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @field_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events.  However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * If a user specifies a field on an event that isn't the event the
+ * histogram currently being defined (the target event histogram), the
+ * only way that can be accomplished is if a new hist trigger is
+ * created and the field variable defined on that.
+ *
+ * This function creates a new histogram compatible with the target
+ * event (meaning a histogram with the same key as the target
+ * histogram), and creates a variable for the specified field, but
+ * with 'synthetic_' prepended to the variable name in order to avoid
+ * collision with normal field variables.
+ *
+ * Return: The variable created for the field.
+ */
+static struct hist_field *
+create_field_var_hist(struct hist_trigger_data *target_hist_data,
+		      char *subsys_name, char *event_name, char *field_name)
+{
+	struct trace_array *tr = target_hist_data->event_file->tr;
+	struct hist_field *event_var = ERR_PTR(-EINVAL);
+	struct hist_trigger_data *hist_data;
+	unsigned int i, n, first = true;
+	struct field_var_hist *var_hist;
+	struct trace_event_file *file;
+	struct hist_field *key_field;
+	char *saved_filter;
+	char *cmd;
+	int ret;
+
+	if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) {
+		hist_err_event("onmatch: Too many field variables defined: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	file = event_file(tr, subsys_name, event_name);
+
+	if (IS_ERR(file)) {
+		hist_err_event("onmatch: Event file not found: ",
+			       subsys_name, event_name, field_name);
+		ret = PTR_ERR(file);
+		return ERR_PTR(ret);
+	}
+
+	/*
+	 * Look for a histogram compatible with target.  We'll use the
+	 * found histogram specification to create a new matching
+	 * histogram with our variable on it.  target_hist_data is not
+	 * yet a registered histogram so we can't use that.
+	 */
+	hist_data = find_compatible_hist(target_hist_data, file);
+	if (!hist_data) {
+		hist_err_event("onmatch: Matching event histogram not found: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* See if a synthetic field variable has already been created */
+	event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+					     event_name, field_name);
+	if (!IS_ERR_OR_NULL(event_var))
+		return event_var;
+
+	var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL);
+	if (!var_hist)
+		return ERR_PTR(-ENOMEM);
+
+	cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+	if (!cmd) {
+		kfree(var_hist);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Use the same keys as the compatible histogram */
+	strcat(cmd, "keys=");
+
+	for_each_hist_key_field(i, hist_data) {
+		key_field = hist_data->fields[i];
+		if (!first)
+			strcat(cmd, ",");
+		strcat(cmd, key_field->field->name);
+		first = false;
+	}
+
+	/* Create the synthetic field variable specification */
+	strcat(cmd, ":synthetic_");
+	strcat(cmd, field_name);
+	strcat(cmd, "=");
+	strcat(cmd, field_name);
+
+	/* Use the same filter as the compatible histogram */
+	saved_filter = find_trigger_filter(hist_data, file);
+	if (saved_filter) {
+		strcat(cmd, " if ");
+		strcat(cmd, saved_filter);
+	}
+
+	var_hist->cmd = kstrdup(cmd, GFP_KERNEL);
+	if (!var_hist->cmd) {
+		kfree(cmd);
+		kfree(var_hist);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Save the compatible histogram information */
+	var_hist->hist_data = hist_data;
+
+	/* Create the new histogram with our variable */
+	ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+				      "", "hist", cmd);
+	if (ret) {
+		kfree(cmd);
+		kfree(var_hist->cmd);
+		kfree(var_hist);
+		hist_err_event("onmatch: Couldn't create histogram for field: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(ret);
+	}
+
+	kfree(cmd);
+
+	/* If we can't find the variable, something went wrong */
+	event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+					     event_name, field_name);
+	if (IS_ERR_OR_NULL(event_var)) {
+		kfree(var_hist->cmd);
+		kfree(var_hist);
+		hist_err_event("onmatch: Couldn't find synthetic variable: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	n = target_hist_data->n_field_var_hists;
+	target_hist_data->field_var_hists[n] = var_hist;
+	target_hist_data->n_field_var_hists++;
+
+	return event_var;
+}
+
+static struct hist_field *
+find_target_event_var(struct hist_trigger_data *hist_data,
+		      char *subsys_name, char *event_name, char *var_name)
+{
+	struct trace_event_file *file = hist_data->event_file;
+	struct hist_field *hist_field = NULL;
+
+	if (subsys_name) {
+		struct trace_event_call *call;
+
+		if (!event_name)
+			return NULL;
+
+		call = file->event_call;
+
+		if (strcmp(subsys_name, call->class->system) != 0)
+			return NULL;
+
+		if (strcmp(event_name, trace_event_name(call)) != 0)
+			return NULL;
+	}
+
+	hist_field = find_var_field(hist_data, var_name);
+
+	return hist_field;
+}
+
+static inline void __update_field_vars(struct tracing_map_elt *elt,
+				       struct ring_buffer_event *rbe,
+				       void *rec,
+				       struct field_var **field_vars,
+				       unsigned int n_field_vars,
+				       unsigned int field_var_str_start)
+{
+	struct hist_elt_data *elt_data = elt->private_data;
+	unsigned int i, j, var_idx;
+	u64 var_val;
+
+	for (i = 0, j = field_var_str_start; i < n_field_vars; i++) {
+		struct field_var *field_var = field_vars[i];
+		struct hist_field *var = field_var->var;
+		struct hist_field *val = field_var->val;
+
+		var_val = val->fn(val, elt, rbe, rec);
+		var_idx = var->var.idx;
+
+		if (val->flags & HIST_FIELD_FL_STRING) {
+			char *str = elt_data->field_var_str[j++];
+			char *val_str = (char *)(uintptr_t)var_val;
+
+			strscpy(str, val_str, STR_VAR_LEN_MAX);
+			var_val = (u64)(uintptr_t)str;
+		}
+		tracing_map_set_var(elt, var_idx, var_val);
+	}
+}
+
+static void update_field_vars(struct hist_trigger_data *hist_data,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *rec)
+{
+	__update_field_vars(elt, rbe, rec, hist_data->field_vars,
+			    hist_data->n_field_vars, 0);
+}
+
+static void update_max_vars(struct hist_trigger_data *hist_data,
+			    struct tracing_map_elt *elt,
+			    struct ring_buffer_event *rbe,
+			    void *rec)
+{
+	__update_field_vars(elt, rbe, rec, hist_data->max_vars,
+			    hist_data->n_max_vars, hist_data->n_field_var_str);
+}
+
+static struct hist_field *create_var(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file,
+				     char *name, int size, const char *type)
+{
+	struct hist_field *var;
+	int idx;
+
+	if (find_var(hist_data, file, name) && !hist_data->remove) {
+		var = ERR_PTR(-EINVAL);
+		goto out;
+	}
+
+	var = kzalloc(sizeof(struct hist_field), GFP_KERNEL);
+	if (!var) {
+		var = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	idx = tracing_map_add_var(hist_data->map);
+	if (idx < 0) {
+		kfree(var);
+		var = ERR_PTR(-EINVAL);
+		goto out;
+	}
+
+	var->flags = HIST_FIELD_FL_VAR;
+	var->var.idx = idx;
+	var->var.hist_data = var->hist_data = hist_data;
+	var->size = size;
+	var->var.name = kstrdup(name, GFP_KERNEL);
+	var->type = kstrdup(type, GFP_KERNEL);
+	if (!var->var.name || !var->type) {
+		kfree(var->var.name);
+		kfree(var->type);
+		kfree(var);
+		var = ERR_PTR(-ENOMEM);
+	}
+ out:
+	return var;
+}
+
+static struct field_var *create_field_var(struct hist_trigger_data *hist_data,
+					  struct trace_event_file *file,
+					  char *field_name)
+{
+	struct hist_field *val = NULL, *var = NULL;
+	unsigned long flags = HIST_FIELD_FL_VAR;
+	struct field_var *field_var;
 	int ret = 0;
 
-	if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
+	if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) {
+		hist_err("Too many field variables defined: ", field_name);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	val = parse_atom(hist_data, file, field_name, &flags, NULL);
+	if (IS_ERR(val)) {
+		hist_err("Couldn't parse field variable: ", field_name);
+		ret = PTR_ERR(val);
+		goto err;
+	}
+
+	var = create_var(hist_data, file, field_name, val->size, val->type);
+	if (IS_ERR(var)) {
+		hist_err("Couldn't create or find variable: ", field_name);
+		kfree(val);
+		ret = PTR_ERR(var);
+		goto err;
+	}
+
+	field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL);
+	if (!field_var) {
+		kfree(val);
+		kfree(var);
+		ret =  -ENOMEM;
+		goto err;
+	}
+
+	field_var->var = var;
+	field_var->val = val;
+ out:
+	return field_var;
+ err:
+	field_var = ERR_PTR(ret);
+	goto out;
+}
+
+/**
+ * create_target_field_var - Automatically create a variable for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @var_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events.  However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * This function creates a field variable with the name var_name on
+ * the hist trigger currently being defined on the target event.  If
+ * subsys_name and event_name are specified, this function simply
+ * verifies that they do in fact match the target event subsystem and
+ * event name.
+ *
+ * Return: The variable created for the field.
+ */
+static struct field_var *
+create_target_field_var(struct hist_trigger_data *target_hist_data,
+			char *subsys_name, char *event_name, char *var_name)
+{
+	struct trace_event_file *file = target_hist_data->event_file;
+
+	if (subsys_name) {
+		struct trace_event_call *call;
+
+		if (!event_name)
+			return NULL;
+
+		call = file->event_call;
+
+		if (strcmp(subsys_name, call->class->system) != 0)
+			return NULL;
+
+		if (strcmp(event_name, trace_event_name(call)) != 0)
+			return NULL;
+	}
+
+	return create_field_var(target_hist_data, file, var_name);
+}
+
+static void onmax_print(struct seq_file *m,
+			struct hist_trigger_data *hist_data,
+			struct tracing_map_elt *elt,
+			struct action_data *data)
+{
+	unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx;
+
+	seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx));
+
+	for (i = 0; i < hist_data->n_max_vars; i++) {
+		struct hist_field *save_val = hist_data->max_vars[i]->val;
+		struct hist_field *save_var = hist_data->max_vars[i]->var;
+		u64 val;
+
+		save_var_idx = save_var->var.idx;
+
+		val = tracing_map_read_var(elt, save_var_idx);
+
+		if (save_val->flags & HIST_FIELD_FL_STRING) {
+			seq_printf(m, "  %s: %-32s", save_var->var.name,
+				   (char *)(uintptr_t)(val));
+		} else
+			seq_printf(m, "  %s: %10llu", save_var->var.name, val);
+	}
+}
+
+static void onmax_save(struct hist_trigger_data *hist_data,
+		       struct tracing_map_elt *elt, void *rec,
+		       struct ring_buffer_event *rbe,
+		       struct action_data *data, u64 *var_ref_vals)
+{
+	unsigned int max_idx = data->onmax.max_var->var.idx;
+	unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx;
+
+	u64 var_val, max_val;
+
+	var_val = var_ref_vals[max_var_ref_idx];
+	max_val = tracing_map_read_var(elt, max_idx);
+
+	if (var_val <= max_val)
+		return;
+
+	tracing_map_set_var(elt, max_idx, var_val);
+
+	update_max_vars(hist_data, elt, rbe, rec);
+}
+
+static void onmax_destroy(struct action_data *data)
+{
+	unsigned int i;
+
+	destroy_hist_field(data->onmax.max_var, 0);
+	destroy_hist_field(data->onmax.var, 0);
+
+	kfree(data->onmax.var_str);
+	kfree(data->onmax.fn_name);
+
+	for (i = 0; i < data->n_params; i++)
+		kfree(data->params[i]);
+
+	kfree(data);
+}
+
+static int onmax_create(struct hist_trigger_data *hist_data,
+			struct action_data *data)
+{
+	struct trace_event_file *file = hist_data->event_file;
+	struct hist_field *var_field, *ref_field, *max_var;
+	unsigned int var_ref_idx = hist_data->n_var_refs;
+	struct field_var *field_var;
+	char *onmax_var_str, *param;
+	unsigned long flags;
+	unsigned int i;
+	int ret = 0;
+
+	onmax_var_str = data->onmax.var_str;
+	if (onmax_var_str[0] != '$') {
+		hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str);
 		return -EINVAL;
+	}
+	onmax_var_str++;
 
-	field_name = strsep(&field_str, ".");
-	if (field_str) {
-		if (strcmp(field_str, "hex") == 0)
-			flags |= HIST_FIELD_FL_HEX;
-		else {
+	var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str);
+	if (!var_field) {
+		hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str);
+		return -EINVAL;
+	}
+
+	flags = HIST_FIELD_FL_VAR_REF;
+	ref_field = create_hist_field(hist_data, NULL, flags, NULL);
+	if (!ref_field)
+		return -ENOMEM;
+
+	if (init_var_ref(ref_field, var_field, NULL, NULL)) {
+		destroy_hist_field(ref_field, 0);
+		ret = -ENOMEM;
+		goto out;
+	}
+	hist_data->var_refs[hist_data->n_var_refs] = ref_field;
+	ref_field->var_ref_idx = hist_data->n_var_refs++;
+	data->onmax.var = ref_field;
+
+	data->fn = onmax_save;
+	data->onmax.max_var_ref_idx = var_ref_idx;
+	max_var = create_var(hist_data, file, "max", sizeof(u64), "u64");
+	if (IS_ERR(max_var)) {
+		hist_err("onmax: Couldn't create onmax variable: ", "max");
+		ret = PTR_ERR(max_var);
+		goto out;
+	}
+	data->onmax.max_var = max_var;
+
+	for (i = 0; i < data->n_params; i++) {
+		param = kstrdup(data->params[i], GFP_KERNEL);
+		if (!param) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		field_var = create_target_field_var(hist_data, NULL, NULL, param);
+		if (IS_ERR(field_var)) {
+			hist_err("onmax: Couldn't create field variable: ", param);
+			ret = PTR_ERR(field_var);
+			kfree(param);
+			goto out;
+		}
+
+		hist_data->max_vars[hist_data->n_max_vars++] = field_var;
+		if (field_var->val->flags & HIST_FIELD_FL_STRING)
+			hist_data->n_max_var_str++;
+
+		kfree(param);
+	}
+ out:
+	return ret;
+}
+
+static int parse_action_params(char *params, struct action_data *data)
+{
+	char *param, *saved_param;
+	int ret = 0;
+
+	while (params) {
+		if (data->n_params >= SYNTH_FIELDS_MAX)
+			goto out;
+
+		param = strsep(&params, ",");
+		if (!param) {
 			ret = -EINVAL;
 			goto out;
 		}
+
+		param = strstrip(param);
+		if (strlen(param) < 2) {
+			hist_err("Invalid action param: ", param);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		saved_param = kstrdup(param, GFP_KERNEL);
+		if (!saved_param) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		data->params[data->n_params++] = saved_param;
 	}
+ out:
+	return ret;
+}
 
-	field = trace_find_event_field(file->event_call, field_name);
-	if (!field || !field->size) {
+static struct action_data *onmax_parse(char *str)
+{
+	char *onmax_fn_name, *onmax_var_str;
+	struct action_data *data;
+	int ret = -EINVAL;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	onmax_var_str = strsep(&str, ")");
+	if (!onmax_var_str || !str) {
 		ret = -EINVAL;
-		goto out;
+		goto free;
+	}
+
+	data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL);
+	if (!data->onmax.var_str) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	strsep(&str, ".");
+	if (!str)
+		goto free;
+
+	onmax_fn_name = strsep(&str, "(");
+	if (!onmax_fn_name || !str)
+		goto free;
+
+	if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) {
+		char *params = strsep(&str, ")");
+
+		if (!params) {
+			ret = -EINVAL;
+			goto free;
+		}
+
+		ret = parse_action_params(params, data);
+		if (ret)
+			goto free;
+	} else
+		goto free;
+
+	data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL);
+	if (!data->onmax.fn_name) {
+		ret = -ENOMEM;
+		goto free;
+	}
+ out:
+	return data;
+ free:
+	onmax_destroy(data);
+	data = ERR_PTR(ret);
+	goto out;
+}
+
+static void onmatch_destroy(struct action_data *data)
+{
+	unsigned int i;
+
+	mutex_lock(&synth_event_mutex);
+
+	kfree(data->onmatch.match_event);
+	kfree(data->onmatch.match_event_system);
+	kfree(data->onmatch.synth_event_name);
+
+	for (i = 0; i < data->n_params; i++)
+		kfree(data->params[i]);
+
+	if (data->onmatch.synth_event)
+		data->onmatch.synth_event->ref--;
+
+	kfree(data);
+
+	mutex_unlock(&synth_event_mutex);
+}
+
+static void destroy_field_var(struct field_var *field_var)
+{
+	if (!field_var)
+		return;
+
+	destroy_hist_field(field_var->var, 0);
+	destroy_hist_field(field_var->val, 0);
+
+	kfree(field_var);
+}
+
+static void destroy_field_vars(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_field_vars; i++)
+		destroy_field_var(hist_data->field_vars[i]);
+}
+
+static void save_field_var(struct hist_trigger_data *hist_data,
+			   struct field_var *field_var)
+{
+	hist_data->field_vars[hist_data->n_field_vars++] = field_var;
+
+	if (field_var->val->flags & HIST_FIELD_FL_STRING)
+		hist_data->n_field_var_str++;
+}
+
+
+static void destroy_synth_var_refs(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_synth_var_refs; i++)
+		destroy_hist_field(hist_data->synth_var_refs[i], 0);
+}
+
+static void save_synth_var_ref(struct hist_trigger_data *hist_data,
+			 struct hist_field *var_ref)
+{
+	hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref;
+
+	hist_data->var_refs[hist_data->n_var_refs] = var_ref;
+	var_ref->var_ref_idx = hist_data->n_var_refs++;
+}
+
+static int check_synth_field(struct synth_event *event,
+			     struct hist_field *hist_field,
+			     unsigned int field_pos)
+{
+	struct synth_field *field;
+
+	if (field_pos >= event->n_fields)
+		return -EINVAL;
+
+	field = event->fields[field_pos];
+
+	if (strcmp(field->type, hist_field->type) != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct hist_field *
+onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data,
+		 char *system, char *event, char *var)
+{
+	struct hist_field *hist_field;
+
+	var++; /* skip '$' */
+
+	hist_field = find_target_event_var(hist_data, system, event, var);
+	if (!hist_field) {
+		if (!system) {
+			system = data->onmatch.match_event_system;
+			event = data->onmatch.match_event;
+		}
+
+		hist_field = find_event_var(hist_data, system, event, var);
+	}
+
+	if (!hist_field)
+		hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var);
+
+	return hist_field;
+}
+
+static struct hist_field *
+onmatch_create_field_var(struct hist_trigger_data *hist_data,
+			 struct action_data *data, char *system,
+			 char *event, char *var)
+{
+	struct hist_field *hist_field = NULL;
+	struct field_var *field_var;
+
+	/*
+	 * First try to create a field var on the target event (the
+	 * currently being defined).  This will create a variable for
+	 * unqualified fields on the target event, or if qualified,
+	 * target fields that have qualified names matching the target.
+	 */
+	field_var = create_target_field_var(hist_data, system, event, var);
+
+	if (field_var && !IS_ERR(field_var)) {
+		save_field_var(hist_data, field_var);
+		hist_field = field_var->var;
+	} else {
+		field_var = NULL;
+		/*
+		 * If no explicit system.event is specfied, default to
+		 * looking for fields on the onmatch(system.event.xxx)
+		 * event.
+		 */
+		if (!system) {
+			system = data->onmatch.match_event_system;
+			event = data->onmatch.match_event;
+		}
+
+		/*
+		 * At this point, we're looking at a field on another
+		 * event.  Because we can't modify a hist trigger on
+		 * another event to add a variable for a field, we need
+		 * to create a new trigger on that event and create the
+		 * variable at the same time.
+		 */
+		hist_field = create_field_var_hist(hist_data, system, event, var);
+		if (IS_ERR(hist_field))
+			goto free;
+	}
+ out:
+	return hist_field;
+ free:
+	destroy_field_var(field_var);
+	hist_field = NULL;
+	goto out;
+}
+
+static int onmatch_create(struct hist_trigger_data *hist_data,
+			  struct trace_event_file *file,
+			  struct action_data *data)
+{
+	char *event_name, *param, *system = NULL;
+	struct hist_field *hist_field, *var_ref;
+	unsigned int i, var_ref_idx;
+	unsigned int field_pos = 0;
+	struct synth_event *event;
+	int ret = 0;
+
+	mutex_lock(&synth_event_mutex);
+	event = find_synth_event(data->onmatch.synth_event_name);
+	if (!event) {
+		hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name);
+		mutex_unlock(&synth_event_mutex);
+		return -EINVAL;
+	}
+	event->ref++;
+	mutex_unlock(&synth_event_mutex);
+
+	var_ref_idx = hist_data->n_var_refs;
+
+	for (i = 0; i < data->n_params; i++) {
+		char *p;
+
+		p = param = kstrdup(data->params[i], GFP_KERNEL);
+		if (!param) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		system = strsep(&param, ".");
+		if (!param) {
+			param = (char *)system;
+			system = event_name = NULL;
+		} else {
+			event_name = strsep(&param, ".");
+			if (!param) {
+				kfree(p);
+				ret = -EINVAL;
+				goto err;
+			}
+		}
+
+		if (param[0] == '$')
+			hist_field = onmatch_find_var(hist_data, data, system,
+						      event_name, param);
+		else
+			hist_field = onmatch_create_field_var(hist_data, data,
+							      system,
+							      event_name,
+							      param);
+
+		if (!hist_field) {
+			kfree(p);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		if (check_synth_field(event, hist_field, field_pos) == 0) {
+			var_ref = create_var_ref(hist_field, system, event_name);
+			if (!var_ref) {
+				kfree(p);
+				ret = -ENOMEM;
+				goto err;
+			}
+
+			save_synth_var_ref(hist_data, var_ref);
+			field_pos++;
+			kfree(p);
+			continue;
+		}
+
+		hist_err_event("onmatch: Param type doesn't match synthetic event field type: ",
+			       system, event_name, param);
+		kfree(p);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (field_pos != event->n_fields) {
+		hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	data->fn = action_trace;
+	data->onmatch.synth_event = event;
+	data->onmatch.var_ref_idx = var_ref_idx;
+ out:
+	return ret;
+ err:
+	mutex_lock(&synth_event_mutex);
+	event->ref--;
+	mutex_unlock(&synth_event_mutex);
+
+	goto out;
+}
+
+static struct action_data *onmatch_parse(struct trace_array *tr, char *str)
+{
+	char *match_event, *match_event_system;
+	char *synth_event_name, *params;
+	struct action_data *data;
+	int ret = -EINVAL;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	match_event = strsep(&str, ")");
+	if (!match_event || !str) {
+		hist_err("onmatch: Missing closing paren: ", match_event);
+		goto free;
+	}
+
+	match_event_system = strsep(&match_event, ".");
+	if (!match_event) {
+		hist_err("onmatch: Missing subsystem for match event: ", match_event_system);
+		goto free;
+	}
+
+	if (IS_ERR(event_file(tr, match_event_system, match_event))) {
+		hist_err_event("onmatch: Invalid subsystem or event name: ",
+			       match_event_system, match_event, NULL);
+		goto free;
+	}
+
+	data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL);
+	if (!data->onmatch.match_event) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL);
+	if (!data->onmatch.match_event_system) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	strsep(&str, ".");
+	if (!str) {
+		hist_err("onmatch: Missing . after onmatch(): ", str);
+		goto free;
+	}
+
+	synth_event_name = strsep(&str, "(");
+	if (!synth_event_name || !str) {
+		hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name);
+		goto free;
 	}
 
-	hist_data->fields[val_idx] = create_hist_field(field, flags);
-	if (!hist_data->fields[val_idx]) {
+	data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL);
+	if (!data->onmatch.synth_event_name) {
 		ret = -ENOMEM;
+		goto free;
+	}
+
+	params = strsep(&str, ")");
+	if (!params || !str || (str && strlen(str))) {
+		hist_err("onmatch: Missing closing paramlist paren: ", params);
+		goto free;
+	}
+
+	ret = parse_action_params(params, data);
+	if (ret)
+		goto free;
+ out:
+	return data;
+ free:
+	onmatch_destroy(data);
+	data = ERR_PTR(ret);
+	goto out;
+}
+
+static int create_hitcount_val(struct hist_trigger_data *hist_data)
+{
+	hist_data->fields[HITCOUNT_IDX] =
+		create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL);
+	if (!hist_data->fields[HITCOUNT_IDX])
+		return -ENOMEM;
+
+	hist_data->n_vals++;
+	hist_data->n_fields++;
+
+	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __create_val_field(struct hist_trigger_data *hist_data,
+			      unsigned int val_idx,
+			      struct trace_event_file *file,
+			      char *var_name, char *field_str,
+			      unsigned long flags)
+{
+	struct hist_field *hist_field;
+	int ret = 0;
+
+	hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0);
+	if (IS_ERR(hist_field)) {
+		ret = PTR_ERR(hist_field);
 		goto out;
 	}
 
+	hist_data->fields[val_idx] = hist_field;
+
 	++hist_data->n_vals;
+	++hist_data->n_fields;
 
-	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
 		ret = -EINVAL;
  out:
 	return ret;
 }
 
+static int create_val_field(struct hist_trigger_data *hist_data,
+			    unsigned int val_idx,
+			    struct trace_event_file *file,
+			    char *field_str)
+{
+	if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
+		return -EINVAL;
+
+	return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
+}
+
+static int create_var_field(struct hist_trigger_data *hist_data,
+			    unsigned int val_idx,
+			    struct trace_event_file *file,
+			    char *var_name, char *expr_str)
+{
+	unsigned long flags = 0;
+
+	if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
+		return -EINVAL;
+
+	if (find_var(hist_data, file, var_name) && !hist_data->remove) {
+		hist_err("Variable already defined: ", var_name);
+		return -EINVAL;
+	}
+
+	flags |= HIST_FIELD_FL_VAR;
+	hist_data->n_vars++;
+	if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX))
+		return -EINVAL;
+
+	return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+}
+
 static int create_val_fields(struct hist_trigger_data *hist_data,
 			     struct trace_event_file *file)
 {
 	char *fields_str, *field_str;
-	unsigned int i, j;
+	unsigned int i, j = 1;
 	int ret;
 
 	ret = create_hitcount_val(hist_data);
@@ -533,12 +3910,15 @@ static int create_val_fields(struct hist_trigger_data *hist_data,
 		field_str = strsep(&fields_str, ",");
 		if (!field_str)
 			break;
+
 		if (strcmp(field_str, "hitcount") == 0)
 			continue;
+
 		ret = create_val_field(hist_data, j++, file, field_str);
 		if (ret)
 			goto out;
 	}
+
 	if (fields_str && (strcmp(fields_str, "hitcount") != 0))
 		ret = -EINVAL;
  out:
@@ -551,12 +3931,13 @@ static int create_key_field(struct hist_trigger_data *hist_data,
 			    struct trace_event_file *file,
 			    char *field_str)
 {
-	struct ftrace_event_field *field = NULL;
+	struct hist_field *hist_field = NULL;
+
 	unsigned long flags = 0;
 	unsigned int key_size;
 	int ret = 0;
 
-	if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX))
+	if (WARN_ON(key_idx >= HIST_FIELDS_MAX))
 		return -EINVAL;
 
 	flags |= HIST_FIELD_FL_KEY;
@@ -564,57 +3945,40 @@ static int create_key_field(struct hist_trigger_data *hist_data,
 	if (strcmp(field_str, "stacktrace") == 0) {
 		flags |= HIST_FIELD_FL_STACKTRACE;
 		key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH;
+		hist_field = create_hist_field(hist_data, NULL, flags, NULL);
 	} else {
-		char *field_name = strsep(&field_str, ".");
-
-		if (field_str) {
-			if (strcmp(field_str, "hex") == 0)
-				flags |= HIST_FIELD_FL_HEX;
-			else if (strcmp(field_str, "sym") == 0)
-				flags |= HIST_FIELD_FL_SYM;
-			else if (strcmp(field_str, "sym-offset") == 0)
-				flags |= HIST_FIELD_FL_SYM_OFFSET;
-			else if ((strcmp(field_str, "execname") == 0) &&
-				 (strcmp(field_name, "common_pid") == 0))
-				flags |= HIST_FIELD_FL_EXECNAME;
-			else if (strcmp(field_str, "syscall") == 0)
-				flags |= HIST_FIELD_FL_SYSCALL;
-			else if (strcmp(field_str, "log2") == 0)
-				flags |= HIST_FIELD_FL_LOG2;
-			else {
-				ret = -EINVAL;
-				goto out;
-			}
+		hist_field = parse_expr(hist_data, file, field_str, flags,
+					NULL, 0);
+		if (IS_ERR(hist_field)) {
+			ret = PTR_ERR(hist_field);
+			goto out;
 		}
 
-		field = trace_find_event_field(file->event_call, field_name);
-		if (!field || !field->size) {
+		if (hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+			hist_err("Using variable references as keys not supported: ", field_str);
+			destroy_hist_field(hist_field, 0);
 			ret = -EINVAL;
 			goto out;
 		}
 
-		if (is_string_field(field))
-			key_size = MAX_FILTER_STR_VAL;
-		else
-			key_size = field->size;
+		key_size = hist_field->size;
 	}
 
-	hist_data->fields[key_idx] = create_hist_field(field, flags);
-	if (!hist_data->fields[key_idx]) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	hist_data->fields[key_idx] = hist_field;
 
 	key_size = ALIGN(key_size, sizeof(u64));
 	hist_data->fields[key_idx]->size = key_size;
 	hist_data->fields[key_idx]->offset = key_offset;
+
 	hist_data->key_size += key_size;
+
 	if (hist_data->key_size > HIST_KEY_SIZE_MAX) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	hist_data->n_keys++;
+	hist_data->n_fields++;
 
 	if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX))
 		return -EINVAL;
@@ -658,21 +4022,113 @@ static int create_key_fields(struct hist_trigger_data *hist_data,
 	return ret;
 }
 
+static int create_var_fields(struct hist_trigger_data *hist_data,
+			     struct trace_event_file *file)
+{
+	unsigned int i, j = hist_data->n_vals;
+	int ret = 0;
+
+	unsigned int n_vars = hist_data->attrs->var_defs.n_vars;
+
+	for (i = 0; i < n_vars; i++) {
+		char *var_name = hist_data->attrs->var_defs.name[i];
+		char *expr = hist_data->attrs->var_defs.expr[i];
+
+		ret = create_var_field(hist_data, j++, file, var_name, expr);
+		if (ret)
+			goto out;
+	}
+ out:
+	return ret;
+}
+
+static void free_var_defs(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+		kfree(hist_data->attrs->var_defs.name[i]);
+		kfree(hist_data->attrs->var_defs.expr[i]);
+	}
+
+	hist_data->attrs->var_defs.n_vars = 0;
+}
+
+static int parse_var_defs(struct hist_trigger_data *hist_data)
+{
+	char *s, *str, *var_name, *field_str;
+	unsigned int i, j, n_vars = 0;
+	int ret = 0;
+
+	for (i = 0; i < hist_data->attrs->n_assignments; i++) {
+		str = hist_data->attrs->assignment_str[i];
+		for (j = 0; j < TRACING_MAP_VARS_MAX; j++) {
+			field_str = strsep(&str, ",");
+			if (!field_str)
+				break;
+
+			var_name = strsep(&field_str, "=");
+			if (!var_name || !field_str) {
+				hist_err("Malformed assignment: ", var_name);
+				ret = -EINVAL;
+				goto free;
+			}
+
+			if (n_vars == TRACING_MAP_VARS_MAX) {
+				hist_err("Too many variables defined: ", var_name);
+				ret = -EINVAL;
+				goto free;
+			}
+
+			s = kstrdup(var_name, GFP_KERNEL);
+			if (!s) {
+				ret = -ENOMEM;
+				goto free;
+			}
+			hist_data->attrs->var_defs.name[n_vars] = s;
+
+			s = kstrdup(field_str, GFP_KERNEL);
+			if (!s) {
+				kfree(hist_data->attrs->var_defs.name[n_vars]);
+				ret = -ENOMEM;
+				goto free;
+			}
+			hist_data->attrs->var_defs.expr[n_vars++] = s;
+
+			hist_data->attrs->var_defs.n_vars = n_vars;
+		}
+	}
+
+	return ret;
+ free:
+	free_var_defs(hist_data);
+
+	return ret;
+}
+
 static int create_hist_fields(struct hist_trigger_data *hist_data,
 			      struct trace_event_file *file)
 {
 	int ret;
 
+	ret = parse_var_defs(hist_data);
+	if (ret)
+		goto out;
+
 	ret = create_val_fields(hist_data, file);
 	if (ret)
 		goto out;
 
-	ret = create_key_fields(hist_data, file);
+	ret = create_var_fields(hist_data, file);
 	if (ret)
 		goto out;
 
-	hist_data->n_fields = hist_data->n_vals + hist_data->n_keys;
+	ret = create_key_fields(hist_data, file);
+	if (ret)
+		goto out;
  out:
+	free_var_defs(hist_data);
+
 	return ret;
 }
 
@@ -695,7 +4151,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
 	char *fields_str = hist_data->attrs->sort_key_str;
 	struct tracing_map_sort_key *sort_key;
 	int descending, ret = 0;
-	unsigned int i, j;
+	unsigned int i, j, k;
 
 	hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */
 
@@ -743,12 +4199,19 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
 			continue;
 		}
 
-		for (j = 1; j < hist_data->n_fields; j++) {
+		for (j = 1, k = 1; j < hist_data->n_fields; j++) {
+			unsigned int idx;
+
 			hist_field = hist_data->fields[j];
+			if (hist_field->flags & HIST_FIELD_FL_VAR)
+				continue;
+
+			idx = k++;
+
 			test_name = hist_field_name(hist_field, 0);
 
 			if (strcmp(field_name, test_name) == 0) {
-				sort_key->field_idx = j;
+				sort_key->field_idx = idx;
 				descending = is_descending(field_str);
 				if (descending < 0) {
 					ret = descending;
@@ -763,16 +4226,230 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
 			break;
 		}
 	}
+
 	hist_data->n_sort_keys = i;
  out:
 	return ret;
 }
 
+static void destroy_actions(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == action_trace)
+			onmatch_destroy(data);
+		else if (data->fn == onmax_save)
+			onmax_destroy(data);
+		else
+			kfree(data);
+	}
+}
+
+static int parse_actions(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct action_data *data;
+	unsigned int i;
+	int ret = 0;
+	char *str;
+
+	for (i = 0; i < hist_data->attrs->n_actions; i++) {
+		str = hist_data->attrs->action_str[i];
+
+		if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) {
+			char *action_str = str + strlen("onmatch(");
+
+			data = onmatch_parse(tr, action_str);
+			if (IS_ERR(data)) {
+				ret = PTR_ERR(data);
+				break;
+			}
+			data->fn = action_trace;
+		} else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) {
+			char *action_str = str + strlen("onmax(");
+
+			data = onmax_parse(action_str);
+			if (IS_ERR(data)) {
+				ret = PTR_ERR(data);
+				break;
+			}
+			data->fn = onmax_save;
+		} else {
+			ret = -EINVAL;
+			break;
+		}
+
+		hist_data->actions[hist_data->n_actions++] = data;
+	}
+
+	return ret;
+}
+
+static int create_actions(struct hist_trigger_data *hist_data,
+			  struct trace_event_file *file)
+{
+	struct action_data *data;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < hist_data->attrs->n_actions; i++) {
+		data = hist_data->actions[i];
+
+		if (data->fn == action_trace) {
+			ret = onmatch_create(hist_data, file, data);
+			if (ret)
+				return ret;
+		} else if (data->fn == onmax_save) {
+			ret = onmax_create(hist_data, data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void print_actions(struct seq_file *m,
+			  struct hist_trigger_data *hist_data,
+			  struct tracing_map_elt *elt)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == onmax_save)
+			onmax_print(m, hist_data, elt, data);
+	}
+}
+
+static void print_onmax_spec(struct seq_file *m,
+			     struct hist_trigger_data *hist_data,
+			     struct action_data *data)
+{
+	unsigned int i;
+
+	seq_puts(m, ":onmax(");
+	seq_printf(m, "%s", data->onmax.var_str);
+	seq_printf(m, ").%s(", data->onmax.fn_name);
+
+	for (i = 0; i < hist_data->n_max_vars; i++) {
+		seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name);
+		if (i < hist_data->n_max_vars - 1)
+			seq_puts(m, ",");
+	}
+	seq_puts(m, ")");
+}
+
+static void print_onmatch_spec(struct seq_file *m,
+			       struct hist_trigger_data *hist_data,
+			       struct action_data *data)
+{
+	unsigned int i;
+
+	seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system,
+		   data->onmatch.match_event);
+
+	seq_printf(m, "%s(", data->onmatch.synth_event->name);
+
+	for (i = 0; i < data->n_params; i++) {
+		if (i)
+			seq_puts(m, ",");
+		seq_printf(m, "%s", data->params[i]);
+	}
+
+	seq_puts(m, ")");
+}
+
+static bool actions_match(struct hist_trigger_data *hist_data,
+			  struct hist_trigger_data *hist_data_test)
+{
+	unsigned int i, j;
+
+	if (hist_data->n_actions != hist_data_test->n_actions)
+		return false;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+		struct action_data *data_test = hist_data_test->actions[i];
+
+		if (data->fn != data_test->fn)
+			return false;
+
+		if (data->n_params != data_test->n_params)
+			return false;
+
+		for (j = 0; j < data->n_params; j++) {
+			if (strcmp(data->params[j], data_test->params[j]) != 0)
+				return false;
+		}
+
+		if (data->fn == action_trace) {
+			if (strcmp(data->onmatch.synth_event_name,
+				   data_test->onmatch.synth_event_name) != 0)
+				return false;
+			if (strcmp(data->onmatch.match_event_system,
+				   data_test->onmatch.match_event_system) != 0)
+				return false;
+			if (strcmp(data->onmatch.match_event,
+				   data_test->onmatch.match_event) != 0)
+				return false;
+		} else if (data->fn == onmax_save) {
+			if (strcmp(data->onmax.var_str,
+				   data_test->onmax.var_str) != 0)
+				return false;
+			if (strcmp(data->onmax.fn_name,
+				   data_test->onmax.fn_name) != 0)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+
+static void print_actions_spec(struct seq_file *m,
+			       struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == action_trace)
+			print_onmatch_spec(m, hist_data, data);
+		else if (data->fn == onmax_save)
+			print_onmax_spec(m, hist_data, data);
+	}
+}
+
+static void destroy_field_var_hists(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_field_var_hists; i++) {
+		kfree(hist_data->field_var_hists[i]->cmd);
+		kfree(hist_data->field_var_hists[i]);
+	}
+}
+
 static void destroy_hist_data(struct hist_trigger_data *hist_data)
 {
+	if (!hist_data)
+		return;
+
 	destroy_hist_trigger_attrs(hist_data->attrs);
 	destroy_hist_fields(hist_data);
 	tracing_map_destroy(hist_data->map);
+
+	destroy_actions(hist_data);
+	destroy_field_vars(hist_data);
+	destroy_field_var_hists(hist_data);
+	destroy_synth_var_refs(hist_data);
+
 	kfree(hist_data);
 }
 
@@ -781,7 +4458,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
 	struct tracing_map *map = hist_data->map;
 	struct ftrace_event_field *field;
 	struct hist_field *hist_field;
-	int i, idx;
+	int i, idx = 0;
 
 	for_each_hist_field(i, hist_data) {
 		hist_field = hist_data->fields[i];
@@ -792,6 +4469,9 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
 
 			if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
 				cmp_fn = tracing_map_cmp_none;
+			else if (!field)
+				cmp_fn = tracing_map_cmp_num(hist_field->size,
+							     hist_field->is_signed);
 			else if (is_string_field(field))
 				cmp_fn = tracing_map_cmp_string;
 			else
@@ -800,36 +4480,29 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
 			idx = tracing_map_add_key_field(map,
 							hist_field->offset,
 							cmp_fn);
-
-		} else
+		} else if (!(hist_field->flags & HIST_FIELD_FL_VAR))
 			idx = tracing_map_add_sum_field(map);
 
 		if (idx < 0)
 			return idx;
-	}
-
-	return 0;
-}
-
-static bool need_tracing_map_ops(struct hist_trigger_data *hist_data)
-{
-	struct hist_field *key_field;
-	unsigned int i;
-
-	for_each_hist_key_field(i, hist_data) {
-		key_field = hist_data->fields[i];
 
-		if (key_field->flags & HIST_FIELD_FL_EXECNAME)
-			return true;
+		if (hist_field->flags & HIST_FIELD_FL_VAR) {
+			idx = tracing_map_add_var(map);
+			if (idx < 0)
+				return idx;
+			hist_field->var.idx = idx;
+			hist_field->var.hist_data = hist_data;
+		}
 	}
 
-	return false;
+	return 0;
 }
 
 static struct hist_trigger_data *
 create_hist_data(unsigned int map_bits,
 		 struct hist_trigger_attrs *attrs,
-		 struct trace_event_file *file)
+		 struct trace_event_file *file,
+		 bool remove)
 {
 	const struct tracing_map_ops *map_ops = NULL;
 	struct hist_trigger_data *hist_data;
@@ -840,6 +4513,12 @@ create_hist_data(unsigned int map_bits,
 		return ERR_PTR(-ENOMEM);
 
 	hist_data->attrs = attrs;
+	hist_data->remove = remove;
+	hist_data->event_file = file;
+
+	ret = parse_actions(hist_data);
+	if (ret)
+		goto free;
 
 	ret = create_hist_fields(hist_data, file);
 	if (ret)
@@ -849,8 +4528,7 @@ create_hist_data(unsigned int map_bits,
 	if (ret)
 		goto free;
 
-	if (need_tracing_map_ops(hist_data))
-		map_ops = &hist_trigger_elt_comm_ops;
+	map_ops = &hist_trigger_elt_data_ops;
 
 	hist_data->map = tracing_map_create(map_bits, hist_data->key_size,
 					    map_ops, hist_data);
@@ -863,12 +4541,6 @@ create_hist_data(unsigned int map_bits,
 	ret = create_tracing_map_fields(hist_data);
 	if (ret)
 		goto free;
-
-	ret = tracing_map_init(hist_data->map);
-	if (ret)
-		goto free;
-
-	hist_data->event_file = file;
  out:
 	return hist_data;
  free:
@@ -882,18 +4554,39 @@ create_hist_data(unsigned int map_bits,
 }
 
 static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
-				    struct tracing_map_elt *elt,
-				    void *rec)
+				    struct tracing_map_elt *elt, void *rec,
+				    struct ring_buffer_event *rbe,
+				    u64 *var_ref_vals)
 {
+	struct hist_elt_data *elt_data;
 	struct hist_field *hist_field;
-	unsigned int i;
+	unsigned int i, var_idx;
 	u64 hist_val;
 
+	elt_data = elt->private_data;
+	elt_data->var_ref_vals = var_ref_vals;
+
 	for_each_hist_val_field(i, hist_data) {
 		hist_field = hist_data->fields[i];
-		hist_val = hist_field->fn(hist_field, rec);
+		hist_val = hist_field->fn(hist_field, elt, rbe, rec);
+		if (hist_field->flags & HIST_FIELD_FL_VAR) {
+			var_idx = hist_field->var.idx;
+			tracing_map_set_var(elt, var_idx, hist_val);
+			continue;
+		}
 		tracing_map_update_sum(elt, i, hist_val);
 	}
+
+	for_each_hist_key_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		if (hist_field->flags & HIST_FIELD_FL_VAR) {
+			hist_val = hist_field->fn(hist_field, elt, rbe, rec);
+			var_idx = hist_field->var.idx;
+			tracing_map_set_var(elt, var_idx, hist_val);
+		}
+	}
+
+	update_field_vars(hist_data, elt, rbe, rec);
 }
 
 static inline void add_to_key(char *compound_key, void *key,
@@ -920,15 +4613,31 @@ static inline void add_to_key(char *compound_key, void *key,
 	memcpy(compound_key + key_field->offset, key, size);
 }
 
-static void event_hist_trigger(struct event_trigger_data *data, void *rec)
+static void
+hist_trigger_actions(struct hist_trigger_data *hist_data,
+		     struct tracing_map_elt *elt, void *rec,
+		     struct ring_buffer_event *rbe, u64 *var_ref_vals)
+{
+	struct action_data *data;
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		data = hist_data->actions[i];
+		data->fn(hist_data, elt, rec, rbe, data, var_ref_vals);
+	}
+}
+
+static void event_hist_trigger(struct event_trigger_data *data, void *rec,
+			       struct ring_buffer_event *rbe)
 {
 	struct hist_trigger_data *hist_data = data->private_data;
 	bool use_compound_key = (hist_data->n_keys > 1);
 	unsigned long entries[HIST_STACKTRACE_DEPTH];
+	u64 var_ref_vals[TRACING_MAP_VARS_MAX];
 	char compound_key[HIST_KEY_SIZE_MAX];
+	struct tracing_map_elt *elt = NULL;
 	struct stack_trace stacktrace;
 	struct hist_field *key_field;
-	struct tracing_map_elt *elt;
 	u64 field_contents;
 	void *key = NULL;
 	unsigned int i;
@@ -949,7 +4658,7 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec)
 
 			key = entries;
 		} else {
-			field_contents = key_field->fn(key_field, rec);
+			field_contents = key_field->fn(key_field, elt, rbe, rec);
 			if (key_field->flags & HIST_FIELD_FL_STRING) {
 				key = (void *)(unsigned long)field_contents;
 				use_compound_key = true;
@@ -964,9 +4673,18 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec)
 	if (use_compound_key)
 		key = compound_key;
 
+	if (hist_data->n_var_refs &&
+	    !resolve_var_refs(hist_data, key, var_ref_vals, false))
+		return;
+
 	elt = tracing_map_insert(hist_data->map, key);
-	if (elt)
-		hist_trigger_elt_update(hist_data, elt, rec);
+	if (!elt)
+		return;
+
+	hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals);
+
+	if (resolve_var_refs(hist_data, key, var_ref_vals, true))
+		hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals);
 }
 
 static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -1023,7 +4741,13 @@ hist_trigger_entry_print(struct seq_file *m,
 			seq_printf(m, "%s: [%llx] %-55s", field_name,
 				   uval, str);
 		} else if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
-			char *comm = elt->private_data;
+			struct hist_elt_data *elt_data = elt->private_data;
+			char *comm;
+
+			if (WARN_ON_ONCE(!elt_data))
+				return;
+
+			comm = elt_data->comm;
 
 			uval = *(u64 *)(key + key_field->offset);
 			seq_printf(m, "%s: %-16s[%10llu]", field_name,
@@ -1067,6 +4791,10 @@ hist_trigger_entry_print(struct seq_file *m,
 	for (i = 1; i < hist_data->n_vals; i++) {
 		field_name = hist_field_name(hist_data->fields[i], 0);
 
+		if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR ||
+		    hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
+			continue;
+
 		if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
 			seq_printf(m, "  %s: %10llx", field_name,
 				   tracing_map_read_sum(elt, i));
@@ -1076,6 +4804,8 @@ hist_trigger_entry_print(struct seq_file *m,
 		}
 	}
 
+	print_actions(m, hist_data, elt);
+
 	seq_puts(m, "\n");
 }
 
@@ -1144,6 +4874,11 @@ static int hist_show(struct seq_file *m, void *v)
 			hist_trigger_show(m, data, n++);
 	}
 
+	if (have_hist_err()) {
+		seq_printf(m, "\nERROR: %s\n", hist_err_str);
+		seq_printf(m, "  Last command: %s\n", last_hist_cmd);
+	}
+
  out_unlock:
 	mutex_unlock(&event_mutex);
 
@@ -1162,37 +4897,22 @@ const struct file_operations event_hist_fops = {
 	.release = single_release,
 };
 
-static const char *get_hist_field_flags(struct hist_field *hist_field)
-{
-	const char *flags_str = NULL;
-
-	if (hist_field->flags & HIST_FIELD_FL_HEX)
-		flags_str = "hex";
-	else if (hist_field->flags & HIST_FIELD_FL_SYM)
-		flags_str = "sym";
-	else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
-		flags_str = "sym-offset";
-	else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
-		flags_str = "execname";
-	else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
-		flags_str = "syscall";
-	else if (hist_field->flags & HIST_FIELD_FL_LOG2)
-		flags_str = "log2";
-
-	return flags_str;
-}
-
 static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
 {
 	const char *field_name = hist_field_name(hist_field, 0);
 
-	seq_printf(m, "%s", field_name);
-	if (hist_field->flags) {
-		const char *flags_str = get_hist_field_flags(hist_field);
-
-		if (flags_str)
-			seq_printf(m, ".%s", flags_str);
-	}
+	if (hist_field->var.name)
+		seq_printf(m, "%s=", hist_field->var.name);
+
+	if (hist_field->flags & HIST_FIELD_FL_CPU)
+		seq_puts(m, "cpu");
+	else if (field_name) {
+		if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
+		    hist_field->flags & HIST_FIELD_FL_ALIAS)
+			seq_putc(m, '$');
+		seq_printf(m, "%s", field_name);
+	} else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
+		seq_puts(m, "common_timestamp");
 }
 
 static int event_hist_trigger_print(struct seq_file *m,
@@ -1200,7 +4920,8 @@ static int event_hist_trigger_print(struct seq_file *m,
 				    struct event_trigger_data *data)
 {
 	struct hist_trigger_data *hist_data = data->private_data;
-	struct hist_field *key_field;
+	struct hist_field *field;
+	bool have_var = false;
 	unsigned int i;
 
 	seq_puts(m, "hist:");
@@ -1211,25 +4932,47 @@ static int event_hist_trigger_print(struct seq_file *m,
 	seq_puts(m, "keys=");
 
 	for_each_hist_key_field(i, hist_data) {
-		key_field = hist_data->fields[i];
+		field = hist_data->fields[i];
 
 		if (i > hist_data->n_vals)
 			seq_puts(m, ",");
 
-		if (key_field->flags & HIST_FIELD_FL_STACKTRACE)
+		if (field->flags & HIST_FIELD_FL_STACKTRACE)
 			seq_puts(m, "stacktrace");
 		else
-			hist_field_print(m, key_field);
+			hist_field_print(m, field);
 	}
 
 	seq_puts(m, ":vals=");
 
 	for_each_hist_val_field(i, hist_data) {
+		field = hist_data->fields[i];
+		if (field->flags & HIST_FIELD_FL_VAR) {
+			have_var = true;
+			continue;
+		}
+
 		if (i == HITCOUNT_IDX)
 			seq_puts(m, "hitcount");
 		else {
 			seq_puts(m, ",");
-			hist_field_print(m, hist_data->fields[i]);
+			hist_field_print(m, field);
+		}
+	}
+
+	if (have_var) {
+		unsigned int n = 0;
+
+		seq_puts(m, ":");
+
+		for_each_hist_val_field(i, hist_data) {
+			field = hist_data->fields[i];
+
+			if (field->flags & HIST_FIELD_FL_VAR) {
+				if (n++)
+					seq_puts(m, ",");
+				hist_field_print(m, field);
+			}
 		}
 	}
 
@@ -1237,28 +4980,36 @@ static int event_hist_trigger_print(struct seq_file *m,
 
 	for (i = 0; i < hist_data->n_sort_keys; i++) {
 		struct tracing_map_sort_key *sort_key;
+		unsigned int idx, first_key_idx;
+
+		/* skip VAR vals */
+		first_key_idx = hist_data->n_vals - hist_data->n_vars;
 
 		sort_key = &hist_data->sort_keys[i];
+		idx = sort_key->field_idx;
+
+		if (WARN_ON(idx >= HIST_FIELDS_MAX))
+			return -EINVAL;
 
 		if (i > 0)
 			seq_puts(m, ",");
 
-		if (sort_key->field_idx == HITCOUNT_IDX)
+		if (idx == HITCOUNT_IDX)
 			seq_puts(m, "hitcount");
 		else {
-			unsigned int idx = sort_key->field_idx;
-
-			if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
-				return -EINVAL;
-
+			if (idx >= first_key_idx)
+				idx += hist_data->n_vars;
 			hist_field_print(m, hist_data->fields[idx]);
 		}
 
 		if (sort_key->descending)
 			seq_puts(m, ".descending");
 	}
-
 	seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
+	if (hist_data->enable_timestamps)
+		seq_printf(m, ":clock=%s", hist_data->attrs->clock);
+
+	print_actions_spec(m, hist_data);
 
 	if (data->filter_str)
 		seq_printf(m, " if %s", data->filter_str);
@@ -1286,6 +5037,21 @@ static int event_hist_trigger_init(struct event_trigger_ops *ops,
 	return 0;
 }
 
+static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
+{
+	struct trace_event_file *file;
+	unsigned int i;
+	char *cmd;
+	int ret;
+
+	for (i = 0; i < hist_data->n_field_var_hists; i++) {
+		file = hist_data->field_var_hists[i]->hist_data->event_file;
+		cmd = hist_data->field_var_hists[i]->cmd;
+		ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+					      "!hist", "hist", cmd);
+	}
+}
+
 static void event_hist_trigger_free(struct event_trigger_ops *ops,
 				    struct event_trigger_data *data)
 {
@@ -1298,7 +5064,13 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops,
 	if (!data->ref) {
 		if (data->name)
 			del_named_trigger(data);
+
 		trigger_data_free(data);
+
+		remove_hist_vars(hist_data);
+
+		unregister_field_var_hists(hist_data);
+
 		destroy_hist_data(hist_data);
 	}
 }
@@ -1425,6 +5197,15 @@ static bool hist_trigger_match(struct event_trigger_data *data,
 			return false;
 		if (key_field->offset != key_field_test->offset)
 			return false;
+		if (key_field->size != key_field_test->size)
+			return false;
+		if (key_field->is_signed != key_field_test->is_signed)
+			return false;
+		if (!!key_field->var.name != !!key_field_test->var.name)
+			return false;
+		if (key_field->var.name &&
+		    strcmp(key_field->var.name, key_field_test->var.name) != 0)
+			return false;
 	}
 
 	for (i = 0; i < hist_data->n_sort_keys; i++) {
@@ -1440,6 +5221,9 @@ static bool hist_trigger_match(struct event_trigger_data *data,
 	    (strcmp(data->filter_str, data_test->filter_str) != 0))
 		return false;
 
+	if (!actions_match(hist_data, hist_data_test))
+		return false;
+
 	return true;
 }
 
@@ -1456,6 +5240,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 		if (named_data) {
 			if (!hist_trigger_match(data, named_data, named_data,
 						true)) {
+				hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name);
 				ret = -EINVAL;
 				goto out;
 			}
@@ -1475,13 +5260,16 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 				test->paused = false;
 			else if (hist_data->attrs->clear)
 				hist_clear(test);
-			else
+			else {
+				hist_err("Hist trigger already exists", NULL);
 				ret = -EEXIST;
+			}
 			goto out;
 		}
 	}
  new:
 	if (hist_data->attrs->cont || hist_data->attrs->clear) {
+		hist_err("Can't clear or continue a nonexistent hist trigger", NULL);
 		ret = -ENOENT;
 		goto out;
 	}
@@ -1490,7 +5278,6 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 		data->paused = true;
 
 	if (named_data) {
-		destroy_hist_data(data->private_data);
 		data->private_data = named_data->private_data;
 		set_named_trigger_data(data, named_data);
 		data->ops = &event_hist_trigger_named_ops;
@@ -1502,8 +5289,32 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 			goto out;
 	}
 
-	list_add_rcu(&data->list, &file->triggers);
+	if (hist_data->enable_timestamps) {
+		char *clock = hist_data->attrs->clock;
+
+		ret = tracing_set_clock(file->tr, hist_data->attrs->clock);
+		if (ret) {
+			hist_err("Couldn't set trace_clock: ", clock);
+			goto out;
+		}
+
+		tracing_set_time_stamp_abs(file->tr, true);
+	}
+
+	if (named_data)
+		destroy_hist_data(hist_data);
+
 	ret++;
+ out:
+	return ret;
+}
+
+static int hist_trigger_enable(struct event_trigger_data *data,
+			       struct trace_event_file *file)
+{
+	int ret = 0;
+
+	list_add_tail_rcu(&data->list, &file->triggers);
 
 	update_cond_flag(file);
 
@@ -1512,10 +5323,55 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 		update_cond_flag(file);
 		ret--;
 	}
- out:
+
 	return ret;
 }
 
+static bool have_hist_trigger_match(struct event_trigger_data *data,
+				    struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data = data->private_data;
+	struct event_trigger_data *test, *named_data = NULL;
+	bool match = false;
+
+	if (hist_data->attrs->name)
+		named_data = find_named_trigger(hist_data->attrs->name);
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			if (hist_trigger_match(data, test, named_data, false)) {
+				match = true;
+				break;
+			}
+		}
+	}
+
+	return match;
+}
+
+static bool hist_trigger_check_refs(struct event_trigger_data *data,
+				    struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data = data->private_data;
+	struct event_trigger_data *test, *named_data = NULL;
+
+	if (hist_data->attrs->name)
+		named_data = find_named_trigger(hist_data->attrs->name);
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			if (!hist_trigger_match(data, test, named_data, false))
+				continue;
+			hist_data = test->private_data;
+			if (check_var_refs(hist_data))
+				return true;
+			break;
+		}
+	}
+
+	return false;
+}
+
 static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
 				    struct event_trigger_data *data,
 				    struct trace_event_file *file)
@@ -1541,17 +5397,55 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
 
 	if (unregistered && test->ops->free)
 		test->ops->free(test->ops, test);
+
+	if (hist_data->enable_timestamps) {
+		if (!hist_data->remove || unregistered)
+			tracing_set_time_stamp_abs(file->tr, false);
+	}
+}
+
+static bool hist_file_check_refs(struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data;
+	struct event_trigger_data *test;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			hist_data = test->private_data;
+			if (check_var_refs(hist_data))
+				return true;
+		}
+	}
+
+	return false;
 }
 
 static void hist_unreg_all(struct trace_event_file *file)
 {
 	struct event_trigger_data *test, *n;
+	struct hist_trigger_data *hist_data;
+	struct synth_event *se;
+	const char *se_name;
+
+	if (hist_file_check_refs(file))
+		return;
 
 	list_for_each_entry_safe(test, n, &file->triggers, list) {
 		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			hist_data = test->private_data;
 			list_del_rcu(&test->list);
 			trace_event_trigger_enable_disable(file, 0);
+
+			mutex_lock(&synth_event_mutex);
+			se_name = trace_event_name(file->event_call);
+			se = find_synth_event(se_name);
+			if (se)
+				se->ref--;
+			mutex_unlock(&synth_event_mutex);
+
 			update_cond_flag(file);
+			if (hist_data->enable_timestamps)
+				tracing_set_time_stamp_abs(file->tr, false);
 			if (test->ops->free)
 				test->ops->free(test->ops, test);
 		}
@@ -1567,16 +5461,54 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 	struct hist_trigger_attrs *attrs;
 	struct event_trigger_ops *trigger_ops;
 	struct hist_trigger_data *hist_data;
-	char *trigger;
+	struct synth_event *se;
+	const char *se_name;
+	bool remove = false;
+	char *trigger, *p;
 	int ret = 0;
 
+	if (glob && strlen(glob)) {
+		last_cmd_set(param);
+		hist_err_clear();
+	}
+
 	if (!param)
 		return -EINVAL;
 
-	/* separate the trigger from the filter (k:v [if filter]) */
-	trigger = strsep(&param, " \t");
-	if (!trigger)
-		return -EINVAL;
+	if (glob[0] == '!')
+		remove = true;
+
+	/*
+	 * separate the trigger from the filter (k:v [if filter])
+	 * allowing for whitespace in the trigger
+	 */
+	p = trigger = param;
+	do {
+		p = strstr(p, "if");
+		if (!p)
+			break;
+		if (p == param)
+			return -EINVAL;
+		if (*(p - 1) != ' ' && *(p - 1) != '\t') {
+			p++;
+			continue;
+		}
+		if (p >= param + strlen(param) - strlen("if") - 1)
+			return -EINVAL;
+		if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') {
+			p++;
+			continue;
+		}
+		break;
+	} while (p);
+
+	if (!p)
+		param = NULL;
+	else {
+		*(p - 1) = '\0';
+		param = strstrip(p);
+		trigger = strstrip(trigger);
+	}
 
 	attrs = parse_hist_trigger_attrs(trigger);
 	if (IS_ERR(attrs))
@@ -1585,7 +5517,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 	if (attrs->map_bits)
 		hist_trigger_bits = attrs->map_bits;
 
-	hist_data = create_hist_data(hist_trigger_bits, attrs, file);
+	hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove);
 	if (IS_ERR(hist_data)) {
 		destroy_hist_trigger_attrs(attrs);
 		return PTR_ERR(hist_data);
@@ -1593,10 +5525,11 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 
 	trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
 
-	ret = -ENOMEM;
 	trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
-	if (!trigger_data)
+	if (!trigger_data) {
+		ret = -ENOMEM;
 		goto out_free;
+	}
 
 	trigger_data->count = -1;
 	trigger_data->ops = trigger_ops;
@@ -1614,8 +5547,24 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 			goto out_free;
 	}
 
-	if (glob[0] == '!') {
+	if (remove) {
+		if (!have_hist_trigger_match(trigger_data, file))
+			goto out_free;
+
+		if (hist_trigger_check_refs(trigger_data, file)) {
+			ret = -EBUSY;
+			goto out_free;
+		}
+
 		cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+
+		mutex_lock(&synth_event_mutex);
+		se_name = trace_event_name(file->event_call);
+		se = find_synth_event(se_name);
+		if (se)
+			se->ref--;
+		mutex_unlock(&synth_event_mutex);
+
 		ret = 0;
 		goto out_free;
 	}
@@ -1632,14 +5581,47 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 		goto out_free;
 	} else if (ret < 0)
 		goto out_free;
+
+	if (get_named_trigger_data(trigger_data))
+		goto enable;
+
+	if (has_hist_vars(hist_data))
+		save_hist_vars(hist_data);
+
+	ret = create_actions(hist_data, file);
+	if (ret)
+		goto out_unreg;
+
+	ret = tracing_map_init(hist_data->map);
+	if (ret)
+		goto out_unreg;
+enable:
+	ret = hist_trigger_enable(trigger_data, file);
+	if (ret)
+		goto out_unreg;
+
+	mutex_lock(&synth_event_mutex);
+	se_name = trace_event_name(file->event_call);
+	se = find_synth_event(se_name);
+	if (se)
+		se->ref++;
+	mutex_unlock(&synth_event_mutex);
+
 	/* Just return zero, not the number of registered triggers */
 	ret = 0;
  out:
+	if (ret == 0)
+		hist_err_clear();
+
 	return ret;
+ out_unreg:
+	cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
  out_free:
 	if (cmd_ops->set_filter)
 		cmd_ops->set_filter(NULL, trigger_data, NULL);
 
+	remove_hist_vars(hist_data);
+
 	kfree(trigger_data);
 
 	destroy_hist_data(hist_data);
@@ -1669,7 +5651,8 @@ __init int register_trigger_hist_cmd(void)
 }
 
 static void
-hist_enable_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_trigger(struct event_trigger_data *data, void *rec,
+		    struct ring_buffer_event *event)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 	struct event_trigger_data *test;
@@ -1685,7 +5668,8 @@ hist_enable_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_count_trigger(struct event_trigger_data *data, void *rec,
+			  struct ring_buffer_event *event)
 {
 	if (!data->count)
 		return;
@@ -1693,7 +5677,7 @@ hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	hist_enable_trigger(data, rec);
+	hist_enable_trigger(data, rec, event);
 }
 
 static struct event_trigger_ops hist_enable_trigger_ops = {
@@ -1798,3 +5782,31 @@ __init int register_trigger_hist_enable_disable_cmds(void)
 
 	return ret;
 }
+
+static __init int trace_events_hist_init(void)
+{
+	struct dentry *entry = NULL;
+	struct dentry *d_tracer;
+	int err = 0;
+
+	d_tracer = tracing_init_dentry();
+	if (IS_ERR(d_tracer)) {
+		err = PTR_ERR(d_tracer);
+		goto err;
+	}
+
+	entry = tracefs_create_file("synthetic_events", 0644, d_tracer,
+				    NULL, &synth_events_fops);
+	if (!entry) {
+		err = -ENODEV;
+		goto err;
+	}
+
+	return err;
+ err:
+	pr_warn("Could not create tracefs 'synthetic_events' entry\n");
+
+	return err;
+}
+
+fs_initcall(trace_events_hist_init);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 87411482a46f..d251cabcf69a 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trigger_data *data)
  * any trigger that should be deferred, ETT_NONE if nothing to defer.
  */
 enum event_trigger_type
-event_triggers_call(struct trace_event_file *file, void *rec)
+event_triggers_call(struct trace_event_file *file, void *rec,
+		    struct ring_buffer_event *event)
 {
 	struct event_trigger_data *data;
 	enum event_trigger_type tt = ETT_NONE;
@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
 		if (data->paused)
 			continue;
 		if (!rec) {
-			data->ops->func(data, rec);
+			data->ops->func(data, rec, event);
 			continue;
 		}
 		filter = rcu_dereference_sched(data->filter);
@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
 			tt |= data->cmd_ops->trigger_type;
 			continue;
 		}
-		data->ops->func(data, rec);
+		data->ops->func(data, rec, event);
 	}
 	return tt;
 }
@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
 void
 event_triggers_post_call(struct trace_event_file *file,
 			 enum event_trigger_type tt,
-			 void *rec)
+			 void *rec, struct ring_buffer_event *event)
 {
 	struct event_trigger_data *data;
 
@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_event_file *file,
 		if (data->paused)
 			continue;
 		if (data->cmd_ops->trigger_type & tt)
-			data->ops->func(data, rec);
+			data->ops->func(data, rec, event);
 	}
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -908,8 +909,15 @@ void set_named_trigger_data(struct event_trigger_data *data,
 	data->named_data = named_data;
 }
 
+struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data)
+{
+	return data->named_data;
+}
+
 static void
-traceon_trigger(struct event_trigger_data *data, void *rec)
+traceon_trigger(struct event_trigger_data *data, void *rec,
+		struct ring_buffer_event *event)
 {
 	if (tracing_is_on())
 		return;
@@ -918,7 +926,8 @@ traceon_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceon_count_trigger(struct event_trigger_data *data, void *rec)
+traceon_count_trigger(struct event_trigger_data *data, void *rec,
+		      struct ring_buffer_event *event)
 {
 	if (tracing_is_on())
 		return;
@@ -933,7 +942,8 @@ traceon_count_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceoff_trigger(struct event_trigger_data *data, void *rec)
+traceoff_trigger(struct event_trigger_data *data, void *rec,
+		 struct ring_buffer_event *event)
 {
 	if (!tracing_is_on())
 		return;
@@ -942,7 +952,8 @@ traceoff_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceoff_count_trigger(struct event_trigger_data *data, void *rec)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec,
+		       struct ring_buffer_event *event)
 {
 	if (!tracing_is_on())
 		return;
@@ -1039,13 +1050,15 @@ static struct event_command trigger_traceoff_cmd = {
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 static void
-snapshot_trigger(struct event_trigger_data *data, void *rec)
+snapshot_trigger(struct event_trigger_data *data, void *rec,
+		 struct ring_buffer_event *event)
 {
 	tracing_snapshot();
 }
 
 static void
-snapshot_count_trigger(struct event_trigger_data *data, void *rec)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec,
+		       struct ring_buffer_event *event)
 {
 	if (!data->count)
 		return;
@@ -1053,7 +1066,7 @@ snapshot_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	snapshot_trigger(data, rec);
+	snapshot_trigger(data, rec, event);
 }
 
 static int
@@ -1141,13 +1154,15 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
 #endif
 
 static void
-stacktrace_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_trigger(struct event_trigger_data *data, void *rec,
+		   struct ring_buffer_event *event)
 {
 	trace_dump_stack(STACK_SKIP);
 }
 
 static void
-stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec,
+			 struct ring_buffer_event *event)
 {
 	if (!data->count)
 		return;
@@ -1155,7 +1170,7 @@ stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	stacktrace_trigger(data, rec);
+	stacktrace_trigger(data, rec, event);
 }
 
 static int
@@ -1217,7 +1232,8 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void)
 }
 
 static void
-event_enable_trigger(struct event_trigger_data *data, void *rec)
+event_enable_trigger(struct event_trigger_data *data, void *rec,
+		     struct ring_buffer_event *event)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1228,7 +1244,8 @@ event_enable_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-event_enable_count_trigger(struct event_trigger_data *data, void *rec)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec,
+			   struct ring_buffer_event *event)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1242,7 +1259,7 @@ event_enable_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	event_enable_trigger(data, rec);
+	event_enable_trigger(data, rec, event);
 }
 
 int event_enable_trigger_print(struct seq_file *m,
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 2014f4351ae0..34fd0e0ec51d 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -151,6 +151,8 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
 		return;
 
 	ret = strncpy_from_user(dst, src, maxlen);
+	if (ret == maxlen)
+		dst[--ret] = '\0';
 
 	if (ret < 0) {	/* Failed to fetch string */
 		((u8 *)get_rloc_data(dest))[0] = '\0';
@@ -446,7 +448,7 @@ static int create_trace_uprobe(int argc, char **argv)
 	if (ret)
 		goto fail_address_parse;
 
-	inode = igrab(d_inode(path.dentry));
+	inode = igrab(d_real_inode(path.dentry));
 	path_put(&path);
 
 	if (!inode || !S_ISREG(inode->i_mode)) {
@@ -602,24 +604,9 @@ static int probes_seq_show(struct seq_file *m, void *v)
 	char c = is_ret_probe(tu) ? 'r' : 'p';
 	int i;
 
-	seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system,
-			trace_event_name(&tu->tp.call));
-	seq_printf(m, " %s:", tu->filename);
-
-	/* Don't print "0x  (null)" when offset is 0 */
-	if (tu->offset) {
-		seq_printf(m, "0x%px", (void *)tu->offset);
-	} else {
-		switch (sizeof(void *)) {
-		case 4:
-			seq_printf(m, "0x00000000");
-			break;
-		case 8:
-		default:
-			seq_printf(m, "0x0000000000000000");
-			break;
-		}
-	}
+	seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, tu->tp.call.class->system,
+			trace_event_name(&tu->tp.call), tu->filename,
+			(int)(sizeof(void *) * 2), tu->offset);
 
 	for (i = 0; i < tu->tp.nr_args; i++)
 		seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 07e75344725b..5cadb1b8b5fe 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -66,6 +66,73 @@ u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i)
 	return (u64)atomic64_read(&elt->fields[i].sum);
 }
 
+/**
+ * tracing_map_set_var - Assign a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ * @n: The value to assign
+ *
+ * Assign n to variable i associated with the specified tracing_map_elt
+ * instance.  The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.
+ */
+void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n)
+{
+	atomic64_set(&elt->vars[i], n);
+	elt->var_set[i] = true;
+}
+
+/**
+ * tracing_map_var_set - Return whether or not a variable has been set
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Return true if the variable has been set, false otherwise.  The
+ * index i is the index returned by the call to tracing_map_add_var()
+ * when the tracing map was set up.
+ */
+bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i)
+{
+	return elt->var_set[i];
+}
+
+/**
+ * tracing_map_read_var - Return the value of a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance.  The index i is the index returned by the
+ * call to tracing_map_add_var() when the tracing map was set
+ * up.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i)
+{
+	return (u64)atomic64_read(&elt->vars[i]);
+}
+
+/**
+ * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance, and reset the variable to the 'not set'
+ * state.  The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.  The reset
+ * essentially makes the variable a read-once variable if it's only
+ * accessed using this function.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
+{
+	elt->var_set[i] = false;
+	return (u64)atomic64_read(&elt->vars[i]);
+}
+
 int tracing_map_cmp_string(void *val_a, void *val_b)
 {
 	char *a = val_a;
@@ -171,6 +238,28 @@ int tracing_map_add_sum_field(struct tracing_map *map)
 }
 
 /**
+ * tracing_map_add_var - Add a field describing a tracing_map var
+ * @map: The tracing_map
+ *
+ * Add a var to the map and return the index identifying it in the map
+ * and associated tracing_map_elts.  This is the index used for
+ * instance to update a var for a particular tracing_map_elt using
+ * tracing_map_update_var() or reading it via tracing_map_read_var().
+ *
+ * Return: The index identifying the var in the map and associated
+ * tracing_map_elts, or -EINVAL on error.
+ */
+int tracing_map_add_var(struct tracing_map *map)
+{
+	int ret = -EINVAL;
+
+	if (map->n_vars < TRACING_MAP_VARS_MAX)
+		ret = map->n_vars++;
+
+	return ret;
+}
+
+/**
  * tracing_map_add_key_field - Add a field describing a tracing_map key
  * @map: The tracing_map
  * @offset: The offset within the key
@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct tracing_map_elt *elt)
 		if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64)
 			atomic64_set(&elt->fields[i].sum, 0);
 
+	for (i = 0; i < elt->map->n_vars; i++) {
+		atomic64_set(&elt->vars[i], 0);
+		elt->var_set[i] = false;
+	}
+
 	if (elt->map->ops && elt->map->ops->elt_clear)
 		elt->map->ops->elt_clear(elt);
 }
@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct tracing_map_elt *elt)
 	if (elt->map->ops && elt->map->ops->elt_free)
 		elt->map->ops->elt_free(elt);
 	kfree(elt->fields);
+	kfree(elt->vars);
+	kfree(elt->var_set);
 	kfree(elt->key);
 	kfree(elt);
 }
@@ -333,6 +429,18 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map)
 		goto free;
 	}
 
+	elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL);
+	if (!elt->vars) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL);
+	if (!elt->var_set) {
+		err = -ENOMEM;
+		goto free;
+	}
+
 	tracing_map_elt_init_fields(elt);
 
 	if (map->ops && map->ops->elt_alloc) {
@@ -414,7 +522,9 @@ static inline struct tracing_map_elt *
 __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 {
 	u32 idx, key_hash, test_key;
+	int dup_try = 0;
 	struct tracing_map_entry *entry;
+	struct tracing_map_elt *val;
 
 	key_hash = jhash(key, map->key_size, 0);
 	if (key_hash == 0)
@@ -426,11 +536,33 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 		entry = TRACING_MAP_ENTRY(map->map, idx);
 		test_key = entry->key;
 
-		if (test_key && test_key == key_hash && entry->val &&
-		    keys_match(key, entry->val->key, map->key_size)) {
-			if (!lookup_only)
-				atomic64_inc(&map->hits);
-			return entry->val;
+		if (test_key && test_key == key_hash) {
+			val = READ_ONCE(entry->val);
+			if (val &&
+			    keys_match(key, val->key, map->key_size)) {
+				if (!lookup_only)
+					atomic64_inc(&map->hits);
+				return val;
+			} else if (unlikely(!val)) {
+				/*
+				 * The key is present. But, val (pointer to elt
+				 * struct) is still NULL. which means some other
+				 * thread is in the process of inserting an
+				 * element.
+				 *
+				 * On top of that, it's key_hash is same as the
+				 * one being inserted right now. So, it's
+				 * possible that the element has the same
+				 * key as well.
+				 */
+
+				dup_try++;
+				if (dup_try > map->map_size) {
+					atomic64_inc(&map->drops);
+					break;
+				}
+				continue;
+			}
 		}
 
 		if (!test_key) {
@@ -452,6 +584,13 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 				atomic64_inc(&map->hits);
 
 				return entry->val;
+			} else {
+				/*
+				 * cmpxchg() failed. Loop around once
+				 * more to check what key was inserted.
+				 */
+				dup_try++;
+				continue;
 			}
 		}
 
@@ -816,67 +955,15 @@ create_sort_entry(void *key, struct tracing_map_elt *elt)
 	return sort_entry;
 }
 
-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt)
-{
-	struct tracing_map_elt *dup_elt;
-	unsigned int i;
-
-	dup_elt = tracing_map_elt_alloc(elt->map);
-	if (IS_ERR(dup_elt))
-		return NULL;
-
-	if (elt->map->ops && elt->map->ops->elt_copy)
-		elt->map->ops->elt_copy(dup_elt, elt);
-
-	dup_elt->private_data = elt->private_data;
-	memcpy(dup_elt->key, elt->key, elt->map->key_size);
-
-	for (i = 0; i < elt->map->n_fields; i++) {
-		atomic64_set(&dup_elt->fields[i].sum,
-			     atomic64_read(&elt->fields[i].sum));
-		dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn;
-	}
-
-	return dup_elt;
-}
-
-static int merge_dup(struct tracing_map_sort_entry **sort_entries,
-		     unsigned int target, unsigned int dup)
-{
-	struct tracing_map_elt *target_elt, *elt;
-	bool first_dup = (target - dup) == 1;
-	int i;
-
-	if (first_dup) {
-		elt = sort_entries[target]->elt;
-		target_elt = copy_elt(elt);
-		if (!target_elt)
-			return -ENOMEM;
-		sort_entries[target]->elt = target_elt;
-		sort_entries[target]->elt_copied = true;
-	} else
-		target_elt = sort_entries[target]->elt;
-
-	elt = sort_entries[dup]->elt;
-
-	for (i = 0; i < elt->map->n_fields; i++)
-		atomic64_add(atomic64_read(&elt->fields[i].sum),
-			     &target_elt->fields[i].sum);
-
-	sort_entries[dup]->dup = true;
-
-	return 0;
-}
-
-static int merge_dups(struct tracing_map_sort_entry **sort_entries,
+static void detect_dups(struct tracing_map_sort_entry **sort_entries,
 		      int n_entries, unsigned int key_size)
 {
 	unsigned int dups = 0, total_dups = 0;
-	int err, i, j;
+	int i;
 	void *key;
 
 	if (n_entries < 2)
-		return total_dups;
+		return;
 
 	sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *),
 	     (int (*)(const void *, const void *))cmp_entries_dup, NULL);
@@ -885,30 +972,14 @@ static int merge_dups(struct tracing_map_sort_entry **sort_entries,
 	for (i = 1; i < n_entries; i++) {
 		if (!memcmp(sort_entries[i]->key, key, key_size)) {
 			dups++; total_dups++;
-			err = merge_dup(sort_entries, i - dups, i);
-			if (err)
-				return err;
 			continue;
 		}
 		key = sort_entries[i]->key;
 		dups = 0;
 	}
 
-	if (!total_dups)
-		return total_dups;
-
-	for (i = 0, j = 0; i < n_entries; i++) {
-		if (!sort_entries[i]->dup) {
-			sort_entries[j] = sort_entries[i];
-			if (j++ != i)
-				sort_entries[i] = NULL;
-		} else {
-			destroy_sort_entry(sort_entries[i]);
-			sort_entries[i] = NULL;
-		}
-	}
-
-	return total_dups;
+	WARN_ONCE(total_dups > 0,
+		  "Duplicates detected: %d\n", total_dups);
 }
 
 static bool is_key(struct tracing_map *map, unsigned int field_idx)
@@ -1034,10 +1105,7 @@ int tracing_map_sort_entries(struct tracing_map *map,
 		return 1;
 	}
 
-	ret = merge_dups(entries, n_entries, map->key_size);
-	if (ret < 0)
-		goto free;
-	n_entries -= ret;
+	detect_dups(entries, n_entries, map->key_size);
 
 	if (is_key(map, sort_keys[0].field_idx))
 		cmp_entries_fn = cmp_entries_key;
diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h
index 5b5bbf8ae550..053eb92b2d31 100644
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -10,6 +10,7 @@
 #define TRACING_MAP_VALS_MAX		3
 #define TRACING_MAP_FIELDS_MAX		(TRACING_MAP_KEYS_MAX + \
 					 TRACING_MAP_VALS_MAX)
+#define TRACING_MAP_VARS_MAX		16
 #define TRACING_MAP_SORT_KEYS_MAX	2
 
 typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b);
@@ -137,6 +138,8 @@ struct tracing_map_field {
 struct tracing_map_elt {
 	struct tracing_map		*map;
 	struct tracing_map_field	*fields;
+	atomic64_t			*vars;
+	bool				*var_set;
 	void				*key;
 	void				*private_data;
 };
@@ -192,6 +195,7 @@ struct tracing_map {
 	int				key_idx[TRACING_MAP_KEYS_MAX];
 	unsigned int			n_keys;
 	struct tracing_map_sort_key	sort_key;
+	unsigned int			n_vars;
 	atomic64_t			hits;
 	atomic64_t			drops;
 };
@@ -215,11 +219,6 @@ struct tracing_map {
  *	Element allocation occurs before tracing begins, when the
  *	tracing_map_init() call is made by client code.
  *
- * @elt_copy: At certain points in the lifetime of an element, it may
- *	need to be copied.  The copy should include a copy of the
- *	client-allocated data, which can be copied into the 'to'
- *	element from the 'from' element.
- *
  * @elt_free: When a tracing_map_elt is freed, this function is called
  *	and allows client-allocated per-element data to be freed.
  *
@@ -233,8 +232,6 @@ struct tracing_map {
  */
 struct tracing_map_ops {
 	int			(*elt_alloc)(struct tracing_map_elt *elt);
-	void			(*elt_copy)(struct tracing_map_elt *to,
-					    struct tracing_map_elt *from);
 	void			(*elt_free)(struct tracing_map_elt *elt);
 	void			(*elt_clear)(struct tracing_map_elt *elt);
 	void			(*elt_init)(struct tracing_map_elt *elt);
@@ -248,6 +245,7 @@ tracing_map_create(unsigned int map_bits,
 extern int tracing_map_init(struct tracing_map *map);
 
 extern int tracing_map_add_sum_field(struct tracing_map *map);
+extern int tracing_map_add_var(struct tracing_map *map);
 extern int tracing_map_add_key_field(struct tracing_map *map,
 				     unsigned int offset,
 				     tracing_map_cmp_fn_t cmp_fn);
@@ -267,7 +265,13 @@ extern int tracing_map_cmp_none(void *val_a, void *val_b);
 
 extern void tracing_map_update_sum(struct tracing_map_elt *elt,
 				   unsigned int i, u64 n);
+extern void tracing_map_set_var(struct tracing_map_elt *elt,
+				unsigned int i, u64 n);
+extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i);
 extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i);
+
 extern void tracing_map_set_field_descr(struct tracing_map *map,
 					unsigned int i,
 					unsigned int key_offset,
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 913fe4336d2b..dcd6be1996fe 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -19,6 +19,8 @@
 #include <linux/proc_ns.h>
 #include <linux/sched/task.h>
 
+static struct kmem_cache *uts_ns_cache __ro_after_init;
+
 static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
 {
 	return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES);
@@ -33,7 +35,7 @@ static struct uts_namespace *create_uts_ns(void)
 {
 	struct uts_namespace *uts_ns;
 
-	uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+	uts_ns = kmem_cache_alloc(uts_ns_cache, GFP_KERNEL);
 	if (uts_ns)
 		kref_init(&uts_ns->kref);
 	return uts_ns;
@@ -42,7 +44,7 @@ static struct uts_namespace *create_uts_ns(void)
 /*
  * Clone a new ns copying an original utsname, setting refcount to 1
  * @old_ns: namespace to clone
- * Return ERR_PTR(-ENOMEM) on error (failure to kmalloc), new ns otherwise
+ * Return ERR_PTR(-ENOMEM) on error (failure to allocate), new ns otherwise
  */
 static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 					  struct uts_namespace *old_ns)
@@ -75,7 +77,7 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	return ns;
 
 fail_free:
-	kfree(ns);
+	kmem_cache_free(uts_ns_cache, ns);
 fail_dec:
 	dec_uts_namespaces(ucounts);
 fail:
@@ -113,7 +115,7 @@ void free_uts_ns(struct kref *kref)
 	dec_uts_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
-	kfree(ns);
+	kmem_cache_free(uts_ns_cache, ns);
 }
 
 static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
@@ -169,3 +171,13 @@ const struct proc_ns_operations utsns_operations = {
 	.install	= utsns_install,
 	.owner		= utsns_owner,
 };
+
+void __init uts_ns_init(void)
+{
+	uts_ns_cache = kmem_cache_create_usercopy(
+			"uts_namespace", sizeof(struct uts_namespace), 0,
+			SLAB_PANIC|SLAB_ACCOUNT,
+			offsetof(struct uts_namespace, name),
+			sizeof_field(struct uts_namespace, name),
+			NULL);
+}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 51c6bf0d93c6..c40c7b734cd1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -800,6 +800,30 @@ config SOFTLOCKUP_DETECTOR
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+	bool "Panic (Reboot) On Soft Lockups"
+	depends on SOFTLOCKUP_DETECTOR
+	help
+	  Say Y here to enable the kernel to panic on "soft lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode for more than 20 seconds (configurable using the watchdog_thresh
+	  sysctl), without giving other tasks a chance to run.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after a
+	  lockup has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a lockup must be resolved ASAP.
+
+	  Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+	int
+	depends on SOFTLOCKUP_DETECTOR
+	range 0 1
+	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config HARDLOCKUP_DETECTOR_PERF
 	bool
 	select SOFTLOCKUP_DETECTOR
@@ -849,30 +873,6 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
 	default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
 	default 1 if BOOTPARAM_HARDLOCKUP_PANIC
 
-config BOOTPARAM_SOFTLOCKUP_PANIC
-	bool "Panic (Reboot) On Soft Lockups"
-	depends on SOFTLOCKUP_DETECTOR
-	help
-	  Say Y here to enable the kernel to panic on "soft lockups",
-	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 20 seconds (configurable using the watchdog_thresh
-	  sysctl), without giving other tasks a chance to run.
-
-	  The panic can be used in combination with panic_timeout,
-	  to cause the system to reboot automatically after a
-	  lockup has been detected. This feature is useful for
-	  high-availability systems that have uptime guarantees and
-	  where a lockup must be resolved ASAP.
-
-	  Say N if unsure.
-
-config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
-	int
-	depends on SOFTLOCKUP_DETECTOR
-	range 0 1
-	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
-	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
-
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index a669c193b878..19d42ea75ec2 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -46,3 +46,10 @@ config UBSAN_NULL
 	help
 	  This option enables detection of memory accesses via a
 	  null pointer.
+
+config TEST_UBSAN
+	tristate "Module for testing for undefined behavior detection"
+	depends on m && UBSAN
+	help
+	  This is a test module for UBSAN.
+	  It triggers various undefined behavior, and detect it.
diff --git a/lib/Makefile b/lib/Makefile
index 8fc0d3a9b34f..ce20696d5a92 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -53,6 +53,9 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+CFLAGS_test_kasan.o += -fno-builtin
+obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
+UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/list_debug.c b/lib/list_debug.c
index a34db8d27667..5d5424b51b74 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -21,13 +21,13 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
 		      struct list_head *next)
 {
 	if (CHECK_DATA_CORRUPTION(next->prev != prev,
-			"list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+			"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
 			prev, next->prev, next) ||
 	    CHECK_DATA_CORRUPTION(prev->next != next,
-			"list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+			"list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
 			next, prev->next, prev) ||
 	    CHECK_DATA_CORRUPTION(new == prev || new == next,
-			"list_add double add: new=%p, prev=%p, next=%p.\n",
+			"list_add double add: new=%px, prev=%px, next=%px.\n",
 			new, prev, next))
 		return false;
 
@@ -43,16 +43,16 @@ bool __list_del_entry_valid(struct list_head *entry)
 	next = entry->next;
 
 	if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
-			"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+			"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
 			entry, LIST_POISON1) ||
 	    CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
-			"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+			"list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
 			entry, LIST_POISON2) ||
 	    CHECK_DATA_CORRUPTION(prev->next != entry,
-			"list_del corruption. prev->next should be %p, but was %p\n",
+			"list_del corruption. prev->next should be %px, but was %px\n",
 			entry, prev->next) ||
 	    CHECK_DATA_CORRUPTION(next->prev != entry,
-			"list_del corruption. next->prev should be %p, but was %p\n",
+			"list_del corruption. next->prev should be %px, but was %px\n",
 			entry, next->prev))
 		return false;
 
diff --git a/lib/lockref.c b/lib/lockref.c
index 47169ed7e964..3d468b53d4c9 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -81,6 +81,34 @@ int lockref_get_not_zero(struct lockref *lockref)
 EXPORT_SYMBOL(lockref_get_not_zero);
 
 /**
+ * lockref_put_not_zero - Decrements count unless count <= 1 before decrement
+ * @lockref: pointer to lockref structure
+ * Return: 1 if count updated successfully or 0 if count would become zero
+ */
+int lockref_put_not_zero(struct lockref *lockref)
+{
+	int retval;
+
+	CMPXCHG_LOOP(
+		new.count--;
+		if (old.count <= 1)
+			return 0;
+	,
+		return 1;
+	);
+
+	spin_lock(&lockref->lock);
+	retval = 0;
+	if (lockref->count > 1) {
+		lockref->count--;
+		retval = 1;
+	}
+	spin_unlock(&lockref->lock);
+	return retval;
+}
+EXPORT_SYMBOL(lockref_put_not_zero);
+
+/**
  * lockref_get_or_lock - Increments count unless the count is 0 or dead
  * @lockref: pointer to lockref structure
  * Return: 1 if count updated successfully or 0 if count was zero
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e00138d593f..da9e10c827df 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -146,7 +146,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 
 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
-	return root->gfp_mask & __GFP_BITS_MASK;
+	return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
 }
 
 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -2285,6 +2285,7 @@ void __init radix_tree_init(void)
 	int ret;
 
 	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
+	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index f01b1cb04f91..3de0d8921286 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -4,3 +4,4 @@ int*.c
 tables.c
 neon?.c
 s390vx?.c
+vpermxor*.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 44d6b46df051..2f8b61dfd9b0 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,7 +5,8 @@ raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
 		   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+                              vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
@@ -90,6 +91,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index c65aa80d67ed..5065b1e7e327 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec2,
 	&raid6_altivec4,
 	&raid6_altivec8,
+	&raid6_vpermxor1,
+	&raid6_vpermxor2,
+	&raid6_vpermxor4,
+	&raid6_vpermxor8,
 #endif
 #if defined(CONFIG_S390)
 	&raid6_s390vx8,
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
index 682aae8a1fef..d20ed0d11411 100644
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -24,10 +24,13 @@
 
 #include <linux/raid/pq.h>
 
+#ifdef CONFIG_ALTIVEC
+
 #include <altivec.h>
 #ifdef __KERNEL__
 # include <asm/cputable.h>
 # include <asm/switch_to.h>
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index fabc477b1417..5d73f5cb4d8a 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes)
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
-                         gcc -c -x c - >&/dev/null && \
-                         rm ./-.o && echo yes)
+                         gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
-                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+                CFLAGS += -I../../../arch/powerpc/include
+                CFLAGS += -DCONFIG_ALTIVEC
+                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+                        vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
         endif
 endif
 
@@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=1 < int.uc > $@
 
@@ -117,7 +131,7 @@ tables.c: mktables
 	./mktables > tables.c
 
 clean:
-	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
 
 spotless: clean
 	rm -f *~
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644
index 000000000000..10475dc423c1
--- /dev/null
+++ b/lib/raid6/vpermxor.uc
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and vpermxor instruction.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+					    0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+					    0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+					     0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+					     0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+							void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u8 *p, *q;
+	int d, z, z0;
+	unative_t wp$$, wq$$, wd$$;
+
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0+1];		/* XOR parity */
+	q = dptr[z0+2];		/* RS syndrome */
+
+	for (d = 0; d < bytes; d += NSIZE*$#) {
+		wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+		for (z = z0-1; z>=0; z--) {
+			wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+			/* P syndrome */
+			wp$$ = vec_xor(wp$$, wd$$);
+
+			/* Q syndrome */
+			asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+			wq$$ = vec_xor(wq$$, wd$$);
+		}
+		*(unative_t *)&p[d+NSIZE*$$] = wp$$;
+		*(unative_t *)&q[d+NSIZE*$$] = wq$$;
+	}
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	preempt_disable();
+	enable_kernel_altivec();
+
+	raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+	disable_kernel_altivec();
+	preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+	/* Check if arch has both altivec and the vpermxor instructions */
+# ifdef __KERNEL__
+	return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+		cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+	return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+	raid6_vpermxor$#_gen_syndrome,
+	NULL,
+	raid6_have_altivec_vpermxor,
+	"vpermxor$#",
+	0
+};
+#endif
diff --git a/arch/x86/purgatory/sha256.c b/lib/sha256.c
index 548ca675a14a..4400c832e2aa 100644
--- a/arch/x86/purgatory/sha256.c
+++ b/lib/sha256.c
@@ -16,9 +16,9 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/sha256.h>
+#include <linux/string.h>
 #include <asm/byteorder.h>
-#include "sha256.h"
-#include "../boot/string.h"
 
 static inline u32 Ch(u32 x, u32 y, u32 z)
 {
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 47aeb04c1997..fece57566d45 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -719,7 +719,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		goto out_warn;
 
 	*dma_handle = __phys_to_dma(dev, phys_addr);
-	if (dma_coherent_ok(dev, *dma_handle, size))
+	if (!dma_coherent_ok(dev, *dma_handle, size))
 		goto out_unmap;
 
 	memset(phys_to_virt(phys_addr), 0, size);
@@ -1087,6 +1087,6 @@ const struct dma_map_ops swiotlb_dma_ops = {
 	.unmap_sg		= swiotlb_unmap_sg_attrs,
 	.map_page		= swiotlb_map_page,
 	.unmap_page		= swiotlb_unmap_page,
-	.dma_supported		= swiotlb_dma_supported,
+	.dma_supported		= dma_direct_supported,
 };
 #endif /* CONFIG_DMA_DIRECT_OPS */
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 413367cf569e..de16f7869fb1 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -296,15 +296,17 @@ static void __init test_bitmap_parselist(void)
 	}
 }
 
+#define EXP_BYTES	(sizeof(exp) * 8)
+
 static void __init test_bitmap_arr32(void)
 {
-	unsigned int nbits, next_bit, len = sizeof(exp) * 8;
+	unsigned int nbits, next_bit;
 	u32 arr[sizeof(exp) / 4];
-	DECLARE_BITMAP(bmap2, len);
+	DECLARE_BITMAP(bmap2, EXP_BYTES);
 
 	memset(arr, 0xa5, sizeof(arr));
 
-	for (nbits = 0; nbits < len; ++nbits) {
+	for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
 		bitmap_to_arr32(arr, exp, nbits);
 		bitmap_from_arr32(bmap2, arr, nbits);
 		expect_eq_bitmap(bmap2, exp, nbits);
@@ -316,7 +318,7 @@ static void __init test_bitmap_arr32(void)
 				" tail is not safely cleared: %d\n",
 				nbits, next_bit);
 
-		if (nbits < len - 32)
+		if (nbits < EXP_BYTES - 32)
 			expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
 								0xa5a5a5a5);
 	}
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 98854a64b014..ec657105edbf 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -567,7 +567,15 @@ static noinline void __init kmem_cache_invalid_free(void)
 		return;
 	}
 
+	/* Trigger invalid free, the object doesn't get freed */
 	kmem_cache_free(cache, p + 1);
+
+	/*
+	 * Properly free the object to prevent the "Objects remaining in
+	 * test_cache on __kmem_cache_shutdown" BUG failure.
+	 */
+	kmem_cache_free(cache, p);
+
 	kmem_cache_destroy(cache);
 }
 
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
new file mode 100644
index 000000000000..280f4979d00e
--- /dev/null
+++ b/lib/test_ubsan.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+typedef void(*test_ubsan_fp)(void);
+
+static void test_ubsan_add_overflow(void)
+{
+	volatile int val = INT_MAX;
+
+	val += 2;
+}
+
+static void test_ubsan_sub_overflow(void)
+{
+	volatile int val = INT_MIN;
+	volatile int val2 = 2;
+
+	val -= val2;
+}
+
+static void test_ubsan_mul_overflow(void)
+{
+	volatile int val = INT_MAX / 2;
+
+	val *= 3;
+}
+
+static void test_ubsan_negate_overflow(void)
+{
+	volatile int val = INT_MIN;
+
+	val = -val;
+}
+
+static void test_ubsan_divrem_overflow(void)
+{
+	volatile int val = 16;
+	volatile int val2 = 0;
+
+	val /= val2;
+}
+
+static void test_ubsan_vla_bound_not_positive(void)
+{
+	volatile int size = -1;
+	char buf[size];
+
+	(void)buf;
+}
+
+static void test_ubsan_shift_out_of_bounds(void)
+{
+	volatile int val = -1;
+	int val2 = 10;
+
+	val2 <<= val;
+}
+
+static void test_ubsan_out_of_bounds(void)
+{
+	volatile int i = 4, j = 5;
+	volatile int arr[i];
+
+	arr[j] = i;
+}
+
+static void test_ubsan_load_invalid_value(void)
+{
+	volatile char *dst, *src;
+	bool val, val2, *ptr;
+	char c = 4;
+
+	dst = (char *)&val;
+	src = &c;
+	*dst = *src;
+
+	ptr = &val2;
+	val2 = val;
+}
+
+static void test_ubsan_null_ptr_deref(void)
+{
+	volatile int *ptr = NULL;
+	int val;
+
+	val = *ptr;
+}
+
+static void test_ubsan_misaligned_access(void)
+{
+	volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
+	volatile int *ptr, val = 6;
+
+	ptr = (int *)(arr + 1);
+	*ptr = val;
+}
+
+static void test_ubsan_object_size_mismatch(void)
+{
+	/* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
+	volatile int val __aligned(8) = 4;
+	volatile long long *ptr, val2;
+
+	ptr = (long long *)&val;
+	val2 = *ptr;
+}
+
+static const test_ubsan_fp test_ubsan_array[] = {
+	test_ubsan_add_overflow,
+	test_ubsan_sub_overflow,
+	test_ubsan_mul_overflow,
+	test_ubsan_negate_overflow,
+	test_ubsan_divrem_overflow,
+	test_ubsan_vla_bound_not_positive,
+	test_ubsan_shift_out_of_bounds,
+	test_ubsan_out_of_bounds,
+	test_ubsan_load_invalid_value,
+	//test_ubsan_null_ptr_deref, /* exclude it because there is a crash */
+	test_ubsan_misaligned_access,
+	test_ubsan_object_size_mismatch,
+};
+
+static int __init test_ubsan_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_ubsan_array); i++)
+		test_ubsan_array[i]();
+
+	(void)test_ubsan_null_ptr_deref; /* to avoid unsed-function warning */
+	return 0;
+}
+module_init(test_ubsan_init);
+
+static void __exit test_ubsan_exit(void)
+{
+	/* do nothing */
+}
+module_exit(test_ubsan_exit);
+
+MODULE_AUTHOR("Jinbum Park <jinb.park7@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d7a708f82559..30c0cb8cc9bc 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -336,7 +336,7 @@ char *put_dec(char *buf, unsigned long long n)
  *
  * If speed is not important, use snprintf(). It's easy to read the code.
  */
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 {
 	/* put_dec requires 2-byte alignment of the buffer. */
 	char tmp[sizeof(num) * 3] __aligned(2);
@@ -350,11 +350,21 @@ int num_to_str(char *buf, int size, unsigned long long num)
 		len = put_dec(tmp, num) - tmp;
 	}
 
-	if (len > size)
+	if (len > size || width > size)
 		return 0;
+
+	if (width > len) {
+		width = width - len;
+		for (idx = 0; idx < width; idx++)
+			buf[idx] = ' ';
+	} else {
+		width = 0;
+	}
+
 	for (idx = 0; idx < len; ++idx)
-		buf[idx] = tmp[len - idx - 1];
-	return len;
+		buf[idx + width] = tmp[len - idx - 1];
+
+	return len + width;
 }
 
 #define SIGN	1		/* unsigned/signed, must be 1 */
@@ -2591,6 +2601,8 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 			case 's':
 			case 'F':
 			case 'f':
+			case 'x':
+			case 'K':
 				save_arg(void *);
 				break;
 			default:
@@ -2765,6 +2777,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			case 's':
 			case 'F':
 			case 'f':
+			case 'x':
+			case 'K':
 				process = true;
 				break;
 			default:
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 08b9aab631ab..023190c69dce 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1020,23 +1020,18 @@ EXPORT_SYMBOL(congestion_wait);
 
 /**
  * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
- * @pgdat: A pgdat to check if it is heavily congested
  * @sync: SYNC or ASYNC IO
  * @timeout: timeout in jiffies
  *
- * In the event of a congested backing_dev (any backing_dev) and the given
- * @pgdat has experienced recent congestion, this waits for up to @timeout
- * jiffies for either a BDI to exit congestion of the given @sync queue
- * or a write to complete.
- *
- * In the absence of pgdat congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the event of a congested backing_dev (any backing_dev) this waits
+ * for up to @timeout jiffies for either a BDI to exit congestion of the
+ * given @sync queue or a write to complete.
  *
  * The return value is 0 if the sleep is for the full timeout. Otherwise,
  * it is the number of jiffies that were still remaining when the function
  * returned. return_value == timeout implies the function did not sleep.
  */
-long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
+long wait_iff_congested(int sync, long timeout)
 {
 	long ret;
 	unsigned long start = jiffies;
@@ -1044,12 +1039,10 @@ long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
 	wait_queue_head_t *wqh = &congestion_wqh[sync];
 
 	/*
-	 * If there is no congestion, or heavy congestion is not being
-	 * encountered in the current pgdat, yield if necessary instead
+	 * If there is no congestion, yield if necessary instead
 	 * of sleeping on the congestion queue
 	 */
-	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
-	    !test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
+	if (atomic_read(&nr_wb_congested[sync]) == 0) {
 		cond_resched();
 
 		/* In case we scheduled, work out time remaining */
diff --git a/mm/cma.c b/mm/cma.c
index 5809bbe360d7..aa40e6c7b042 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -39,6 +39,7 @@
 #include <trace/events/cma.h>
 
 #include "cma.h"
+#include "internal.h"
 
 struct cma cma_areas[MAX_CMA_AREAS];
 unsigned cma_area_count;
@@ -109,23 +110,25 @@ static int __init cma_activate_area(struct cma *cma)
 	if (!cma->bitmap)
 		return -ENOMEM;
 
-	WARN_ON_ONCE(!pfn_valid(pfn));
-	zone = page_zone(pfn_to_page(pfn));
-
 	do {
 		unsigned j;
 
 		base_pfn = pfn;
+		if (!pfn_valid(base_pfn))
+			goto err;
+
+		zone = page_zone(pfn_to_page(base_pfn));
 		for (j = pageblock_nr_pages; j; --j, pfn++) {
-			WARN_ON_ONCE(!pfn_valid(pfn));
+			if (!pfn_valid(pfn))
+				goto err;
+
 			/*
-			 * alloc_contig_range requires the pfn range
-			 * specified to be in the same zone. Make this
-			 * simple by forcing the entire CMA resv range
-			 * to be in the same zone.
+			 * In init_cma_reserved_pageblock(), present_pages
+			 * is adjusted with assumption that all pages in
+			 * the pageblock come from a single zone.
 			 */
 			if (page_zone(pfn_to_page(pfn)) != zone)
-				goto not_in_zone;
+				goto err;
 		}
 		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
 	} while (--i);
@@ -139,7 +142,7 @@ static int __init cma_activate_area(struct cma *cma)
 
 	return 0;
 
-not_in_zone:
+err:
 	pr_err("CMA area %s could not be activated\n", cma->name);
 	kfree(cma->bitmap);
 	cma->count = 0;
@@ -149,6 +152,41 @@ not_in_zone:
 static int __init cma_init_reserved_areas(void)
 {
 	int i;
+	struct zone *zone;
+	pg_data_t *pgdat;
+
+	if (!cma_area_count)
+		return 0;
+
+	for_each_online_pgdat(pgdat) {
+		unsigned long start_pfn = UINT_MAX, end_pfn = 0;
+
+		zone = &pgdat->node_zones[ZONE_MOVABLE];
+
+		/*
+		 * In this case, we cannot adjust the zone range
+		 * since it is now maximum node span and we don't
+		 * know original zone range.
+		 */
+		if (populated_zone(zone))
+			continue;
+
+		for (i = 0; i < cma_area_count; i++) {
+			if (pfn_to_nid(cma_areas[i].base_pfn) !=
+				pgdat->node_id)
+				continue;
+
+			start_pfn = min(start_pfn, cma_areas[i].base_pfn);
+			end_pfn = max(end_pfn, cma_areas[i].base_pfn +
+						cma_areas[i].count);
+		}
+
+		if (!end_pfn)
+			continue;
+
+		zone->zone_start_pfn = start_pfn;
+		zone->spanned_pages = end_pfn - start_pfn;
+	}
 
 	for (i = 0; i < cma_area_count; i++) {
 		int ret = cma_activate_area(&cma_areas[i]);
@@ -157,9 +195,32 @@ static int __init cma_init_reserved_areas(void)
 			return ret;
 	}
 
+	/*
+	 * Reserved pages for ZONE_MOVABLE are now activated and
+	 * this would change ZONE_MOVABLE's managed page counter and
+	 * the other zones' present counter. We need to re-calculate
+	 * various zone information that depends on this initialization.
+	 */
+	build_all_zonelists(NULL);
+	for_each_populated_zone(zone) {
+		if (zone_idx(zone) == ZONE_MOVABLE) {
+			zone_pcp_reset(zone);
+			setup_zone_pageset(zone);
+		} else
+			zone_pcp_update(zone);
+
+		set_zone_contiguous(zone);
+	}
+
+	/*
+	 * We need to re-init per zone wmark by calling
+	 * init_per_zone_wmark_min() but doesn't call here because it is
+	 * registered on core_initcall and it will be called later than us.
+	 */
+
 	return 0;
 }
-core_initcall(cma_init_reserved_areas);
+pure_initcall(cma_init_reserved_areas);
 
 /**
  * cma_init_reserved_mem() - create custom contiguous area from reserved memory
diff --git a/mm/compaction.c b/mm/compaction.c
index 88d01a50a015..028b7210a669 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1166,8 +1166,7 @@ static void isolate_freepages(struct compact_control *cc)
  * from the isolated freelists in the block we are migrating to.
  */
 static struct page *compaction_alloc(struct page *migratepage,
-					unsigned long data,
-					int **result)
+					unsigned long data)
 {
 	struct compact_control *cc = (struct compact_control *)data;
 	struct page *freepage;
@@ -1451,14 +1450,12 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 	 * if compaction succeeds.
 	 * For costly orders, we require low watermark instead of min for
 	 * compaction to proceed to increase its chances.
-	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
-	 * suitable migration targets
 	 */
 	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
 				low_wmark_pages(zone) : min_wmark_pages(zone);
 	watermark += compact_gap(order);
 	if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
-						ALLOC_CMA, wmark_target))
+						0, wmark_target))
 		return COMPACT_SKIPPED;
 
 	return COMPACT_CONTINUE;
diff --git a/mm/filemap.c b/mm/filemap.c
index 693f62212a59..9276bdb2343c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -66,7 +66,7 @@
  *  ->i_mmap_rwsem		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
- *        ->mapping->tree_lock
+ *        ->i_pages lock
  *
  *  ->i_mutex
  *    ->i_mmap_rwsem		(truncate->unmap_mapping_range)
@@ -74,7 +74,7 @@
  *  ->mmap_sem
  *    ->i_mmap_rwsem
  *      ->page_table_lock or pte_lock	(various, mainly in memory.c)
- *        ->mapping->tree_lock	(arch-dependent flush_dcache_mmap_lock)
+ *        ->i_pages lock	(arch-dependent flush_dcache_mmap_lock)
  *
  *  ->mmap_sem
  *    ->lock_page		(access_process_vm)
@@ -84,7 +84,7 @@
  *
  *  bdi->wb.list_lock
  *    sb_lock			(fs/fs-writeback.c)
- *    ->mapping->tree_lock	(__sync_single_inode)
+ *    ->i_pages lock		(__sync_single_inode)
  *
  *  ->i_mmap_rwsem
  *    ->anon_vma.lock		(vma_adjust)
@@ -95,11 +95,11 @@
  *  ->page_table_lock or pte_lock
  *    ->swap_lock		(try_to_unmap_one)
  *    ->private_lock		(try_to_unmap_one)
- *    ->tree_lock		(try_to_unmap_one)
+ *    ->i_pages lock		(try_to_unmap_one)
  *    ->zone_lru_lock(zone)	(follow_page->mark_page_accessed)
  *    ->zone_lru_lock(zone)	(check_pte_range->isolate_lru_page)
  *    ->private_lock		(page_remove_rmap->set_page_dirty)
- *    ->tree_lock		(page_remove_rmap->set_page_dirty)
+ *    ->i_pages lock		(page_remove_rmap->set_page_dirty)
  *    bdi.wb->list_lock		(page_remove_rmap->set_page_dirty)
  *    ->inode->i_lock		(page_remove_rmap->set_page_dirty)
  *    ->memcg->move_lock	(page_remove_rmap->lock_page_memcg)
@@ -118,14 +118,15 @@ static int page_cache_tree_insert(struct address_space *mapping,
 	void **slot;
 	int error;
 
-	error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+	error = __radix_tree_create(&mapping->i_pages, page->index, 0,
 				    &node, &slot);
 	if (error)
 		return error;
 	if (*slot) {
 		void *p;
 
-		p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		p = radix_tree_deref_slot_protected(slot,
+						    &mapping->i_pages.xa_lock);
 		if (!radix_tree_exceptional_entry(p))
 			return -EEXIST;
 
@@ -133,7 +134,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 		if (shadowp)
 			*shadowp = p;
 	}
-	__radix_tree_replace(&mapping->page_tree, node, slot, page,
+	__radix_tree_replace(&mapping->i_pages, node, slot, page,
 			     workingset_lookup_update(mapping));
 	mapping->nrpages++;
 	return 0;
@@ -155,13 +156,13 @@ static void page_cache_tree_delete(struct address_space *mapping,
 		struct radix_tree_node *node;
 		void **slot;
 
-		__radix_tree_lookup(&mapping->page_tree, page->index + i,
+		__radix_tree_lookup(&mapping->i_pages, page->index + i,
 				    &node, &slot);
 
 		VM_BUG_ON_PAGE(!node && nr != 1, page);
 
-		radix_tree_clear_tags(&mapping->page_tree, node, slot);
-		__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
+		radix_tree_clear_tags(&mapping->i_pages, node, slot);
+		__radix_tree_replace(&mapping->i_pages, node, slot, shadow,
 				workingset_lookup_update(mapping));
 	}
 
@@ -253,7 +254,7 @@ static void unaccount_page_cache_page(struct address_space *mapping,
 /*
  * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
- * is safe.  The caller must hold the mapping's tree_lock.
+ * is safe.  The caller must hold the i_pages lock.
  */
 void __delete_from_page_cache(struct page *page, void *shadow)
 {
@@ -296,9 +297,9 @@ void delete_from_page_cache(struct page *page)
 	unsigned long flags;
 
 	BUG_ON(!PageLocked(page));
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	__delete_from_page_cache(page, NULL);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	page_cache_free_page(mapping, page);
 }
@@ -309,14 +310,14 @@ EXPORT_SYMBOL(delete_from_page_cache);
  * @mapping: the mapping to which pages belong
  * @pvec: pagevec with pages to delete
  *
- * The function walks over mapping->page_tree and removes pages passed in @pvec
- * from the radix tree. The function expects @pvec to be sorted by page index.
- * It tolerates holes in @pvec (radix tree entries at those indices are not
+ * The function walks over mapping->i_pages and removes pages passed in @pvec
+ * from the mapping. The function expects @pvec to be sorted by page index.
+ * It tolerates holes in @pvec (mapping entries at those indices are not
  * modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the radix
- * tree as well.
+ * @pvec and takes care to delete all corresponding tail pages from the
+ * mapping as well.
  *
- * The function expects mapping->tree_lock to be held.
+ * The function expects the i_pages lock to be held.
  */
 static void
 page_cache_tree_delete_batch(struct address_space *mapping,
@@ -330,11 +331,11 @@ page_cache_tree_delete_batch(struct address_space *mapping,
 	pgoff_t start;
 
 	start = pvec->pages[0]->index;
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		if (i >= pagevec_count(pvec) && !tail_pages)
 			break;
 		page = radix_tree_deref_slot_protected(slot,
-						       &mapping->tree_lock);
+						       &mapping->i_pages.xa_lock);
 		if (radix_tree_exceptional_entry(page))
 			continue;
 		if (!tail_pages) {
@@ -357,8 +358,8 @@ page_cache_tree_delete_batch(struct address_space *mapping,
 		} else {
 			tail_pages--;
 		}
-		radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
-		__radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
+		radix_tree_clear_tags(&mapping->i_pages, iter.node, slot);
+		__radix_tree_replace(&mapping->i_pages, iter.node, slot, NULL,
 				workingset_lookup_update(mapping));
 		total_pages++;
 	}
@@ -374,14 +375,14 @@ void delete_from_page_cache_batch(struct address_space *mapping,
 	if (!pagevec_count(pvec))
 		return;
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
 
 		unaccount_page_cache_page(mapping, pvec->pages[i]);
 	}
 	page_cache_tree_delete_batch(mapping, pvec);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	for (i = 0; i < pagevec_count(pvec); i++)
 		page_cache_free_page(mapping, pvec->pages[i]);
@@ -798,7 +799,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		new->mapping = mapping;
 		new->index = offset;
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		__delete_from_page_cache(old, NULL);
 		error = page_cache_tree_insert(mapping, new, NULL);
 		BUG_ON(error);
@@ -810,7 +811,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 			__inc_node_page_state(new, NR_FILE_PAGES);
 		if (PageSwapBacked(new))
 			__inc_node_page_state(new, NR_SHMEM);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		mem_cgroup_migrate(old, new);
 		radix_tree_preload_end();
 		if (freepage)
@@ -852,7 +853,7 @@ static int __add_to_page_cache_locked(struct page *page,
 	page->mapping = mapping;
 	page->index = offset;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	error = page_cache_tree_insert(mapping, page, shadowp);
 	radix_tree_preload_end();
 	if (unlikely(error))
@@ -861,7 +862,7 @@ static int __add_to_page_cache_locked(struct page *page,
 	/* hugetlb pages do not participate in page cache accounting. */
 	if (!huge)
 		__inc_node_page_state(page, NR_FILE_PAGES);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_commit_charge(page, memcg, false, false);
 	trace_mm_filemap_add_to_page_cache(page);
@@ -869,7 +870,7 @@ static int __add_to_page_cache_locked(struct page *page,
 err_insert:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
 	put_page(page);
@@ -1353,7 +1354,7 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
 	for (i = 0; i < max_scan; i++) {
 		struct page *page;
 
-		page = radix_tree_lookup(&mapping->page_tree, index);
+		page = radix_tree_lookup(&mapping->i_pages, index);
 		if (!page || radix_tree_exceptional_entry(page))
 			break;
 		index++;
@@ -1394,7 +1395,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
 	for (i = 0; i < max_scan; i++) {
 		struct page *page;
 
-		page = radix_tree_lookup(&mapping->page_tree, index);
+		page = radix_tree_lookup(&mapping->i_pages, index);
 		if (!page || radix_tree_exceptional_entry(page))
 			break;
 		index--;
@@ -1427,7 +1428,7 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 	rcu_read_lock();
 repeat:
 	page = NULL;
-	pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
+	pagep = radix_tree_lookup_slot(&mapping->i_pages, offset);
 	if (pagep) {
 		page = radix_tree_deref_slot(pagep);
 		if (unlikely(!page))
@@ -1633,7 +1634,7 @@ unsigned find_get_entries(struct address_space *mapping,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -1710,7 +1711,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, *start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, *start) {
 		struct page *head, *page;
 
 		if (iter.index > end)
@@ -1795,7 +1796,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
+	radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
 		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -1875,8 +1876,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, *index, tag) {
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, *index, tag) {
 		struct page *head, *page;
 
 		if (iter.index > end)
@@ -1969,8 +1969,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, start, tag) {
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, tag) {
 		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -2624,8 +2623,7 @@ void filemap_map_pages(struct vm_fault *vmf,
 	struct page *head, *page;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
-			start_pgoff) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) {
 		if (iter.index > end_pgoff)
 			break;
 repeat:
@@ -2721,7 +2719,6 @@ out:
 	sb_end_pagefault(inode->i_sb);
 	return ret;
 }
-EXPORT_SYMBOL(filemap_page_mkwrite);
 
 const struct vm_operations_struct generic_file_vm_ops = {
 	.fault		= filemap_fault,
@@ -2752,6 +2749,10 @@ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
 	return generic_file_mmap(file, vma);
 }
 #else
+int filemap_page_mkwrite(struct vm_fault *vmf)
+{
+	return -ENOSYS;
+}
 int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
 	return -ENOSYS;
@@ -2762,6 +2763,7 @@ int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
 }
 #endif /* CONFIG_MMU */
 
+EXPORT_SYMBOL(filemap_page_mkwrite);
 EXPORT_SYMBOL(generic_file_mmap);
 EXPORT_SYMBOL(generic_file_readonly_mmap);
 
diff --git a/mm/gup.c b/mm/gup.c
index f296df6cf666..76af4cfeaf68 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1740,7 +1740,9 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
 
 /*
  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
- * the regular GUP. It will only return non-negative values.
+ * the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
  */
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
@@ -1806,9 +1808,12 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	len = (unsigned long) nr_pages << PAGE_SHIFT;
 	end = start + len;
 
+	if (nr_pages <= 0)
+		return 0;
+
 	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
 					(void __user *)start, len)))
-		return 0;
+		return -EFAULT;
 
 	if (gup_fast_permitted(start, nr_pages, write)) {
 		local_irq_disable();
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 5c8e2abeaa15..0f44759486e2 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -23,7 +23,7 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
 	struct page **pages;
 
 	nr_pages = gup->size / PAGE_SIZE;
-	pages = kvmalloc(sizeof(void *) * nr_pages, GFP_KERNEL);
+	pages = kvzalloc(sizeof(void *) * nr_pages, GFP_KERNEL);
 	if (!pages)
 		return -ENOMEM;
 
@@ -41,6 +41,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
 		}
 
 		nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i);
+		if (nr <= 0)
+			break;
 		i += nr;
 	}
 	end_time = ktime_get();
diff --git a/mm/hmm.c b/mm/hmm.c
index 320545b98ff5..486dc394a5a3 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -160,6 +160,32 @@ static void hmm_invalidate_range(struct hmm *hmm,
 	up_read(&hmm->mirrors_sem);
 }
 
+static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct hmm_mirror *mirror;
+	struct hmm *hmm = mm->hmm;
+
+	down_write(&hmm->mirrors_sem);
+	mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
+					  list);
+	while (mirror) {
+		list_del_init(&mirror->list);
+		if (mirror->ops->release) {
+			/*
+			 * Drop mirrors_sem so callback can wait on any pending
+			 * work that might itself trigger mmu_notifier callback
+			 * and thus would deadlock with us.
+			 */
+			up_write(&hmm->mirrors_sem);
+			mirror->ops->release(mirror);
+			down_write(&hmm->mirrors_sem);
+		}
+		mirror = list_first_entry_or_null(&hmm->mirrors,
+						  struct hmm_mirror, list);
+	}
+	up_write(&hmm->mirrors_sem);
+}
+
 static void hmm_invalidate_range_start(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
 				       unsigned long start,
@@ -185,6 +211,7 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
+	.release		= hmm_release,
 	.invalidate_range_start	= hmm_invalidate_range_start,
 	.invalidate_range_end	= hmm_invalidate_range_end,
 };
@@ -206,13 +233,24 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
 	if (!mm || !mirror || !mirror->ops)
 		return -EINVAL;
 
+again:
 	mirror->hmm = hmm_register(mm);
 	if (!mirror->hmm)
 		return -ENOMEM;
 
 	down_write(&mirror->hmm->mirrors_sem);
-	list_add(&mirror->list, &mirror->hmm->mirrors);
-	up_write(&mirror->hmm->mirrors_sem);
+	if (mirror->hmm->mm == NULL) {
+		/*
+		 * A racing hmm_mirror_unregister() is about to destroy the hmm
+		 * struct. Try again to allocate a new one.
+		 */
+		up_write(&mirror->hmm->mirrors_sem);
+		mirror->hmm = NULL;
+		goto again;
+	} else {
+		list_add(&mirror->list, &mirror->hmm->mirrors);
+		up_write(&mirror->hmm->mirrors_sem);
+	}
 
 	return 0;
 }
@@ -227,11 +265,32 @@ EXPORT_SYMBOL(hmm_mirror_register);
  */
 void hmm_mirror_unregister(struct hmm_mirror *mirror)
 {
-	struct hmm *hmm = mirror->hmm;
+	bool should_unregister = false;
+	struct mm_struct *mm;
+	struct hmm *hmm;
 
+	if (mirror->hmm == NULL)
+		return;
+
+	hmm = mirror->hmm;
 	down_write(&hmm->mirrors_sem);
-	list_del(&mirror->list);
+	list_del_init(&mirror->list);
+	should_unregister = list_empty(&hmm->mirrors);
+	mirror->hmm = NULL;
+	mm = hmm->mm;
+	hmm->mm = NULL;
 	up_write(&hmm->mirrors_sem);
+
+	if (!should_unregister || mm == NULL)
+		return;
+
+	spin_lock(&mm->page_table_lock);
+	if (mm->hmm == hmm)
+		mm->hmm = NULL;
+	spin_unlock(&mm->page_table_lock);
+
+	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+	kfree(hmm);
 }
 EXPORT_SYMBOL(hmm_mirror_unregister);
 
@@ -240,110 +299,275 @@ struct hmm_vma_walk {
 	unsigned long		last;
 	bool			fault;
 	bool			block;
-	bool			write;
 };
 
-static int hmm_vma_do_fault(struct mm_walk *walk,
-			    unsigned long addr,
-			    hmm_pfn_t *pfn)
+static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
+			    bool write_fault, uint64_t *pfn)
 {
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE;
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
 	struct vm_area_struct *vma = walk->vma;
 	int r;
 
 	flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
-	flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0;
+	flags |= write_fault ? FAULT_FLAG_WRITE : 0;
 	r = handle_mm_fault(vma, addr, flags);
 	if (r & VM_FAULT_RETRY)
 		return -EBUSY;
 	if (r & VM_FAULT_ERROR) {
-		*pfn = HMM_PFN_ERROR;
+		*pfn = range->values[HMM_PFN_ERROR];
 		return -EFAULT;
 	}
 
 	return -EAGAIN;
 }
 
-static void hmm_pfns_special(hmm_pfn_t *pfns,
-			     unsigned long addr,
-			     unsigned long end)
-{
-	for (; addr < end; addr += PAGE_SIZE, pfns++)
-		*pfns = HMM_PFN_SPECIAL;
-}
-
 static int hmm_pfns_bad(unsigned long addr,
 			unsigned long end,
 			struct mm_walk *walk)
 {
-	struct hmm_range *range = walk->private;
-	hmm_pfn_t *pfns = range->pfns;
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	uint64_t *pfns = range->pfns;
 	unsigned long i;
 
 	i = (addr - range->start) >> PAGE_SHIFT;
 	for (; addr < end; addr += PAGE_SIZE, i++)
-		pfns[i] = HMM_PFN_ERROR;
+		pfns[i] = range->values[HMM_PFN_ERROR];
 
 	return 0;
 }
 
-static void hmm_pfns_clear(hmm_pfn_t *pfns,
-			   unsigned long addr,
-			   unsigned long end)
-{
-	for (; addr < end; addr += PAGE_SIZE, pfns++)
-		*pfns = 0;
-}
-
-static int hmm_vma_walk_hole(unsigned long addr,
-			     unsigned long end,
-			     struct mm_walk *walk)
+/*
+ * hmm_vma_walk_hole() - handle a range lacking valid pmd or pte(s)
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @fault: should we fault or not ?
+ * @write_fault: write fault ?
+ * @walk: mm_walk structure
+ * Returns: 0 on success, -EAGAIN after page fault, or page fault error
+ *
+ * This function will be called whenever pmd_none() or pte_none() returns true,
+ * or whenever there is no page directory covering the virtual address range.
+ */
+static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
+			      bool fault, bool write_fault,
+			      struct mm_walk *walk)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	hmm_pfn_t *pfns = range->pfns;
+	uint64_t *pfns = range->pfns;
 	unsigned long i;
 
 	hmm_vma_walk->last = addr;
 	i = (addr - range->start) >> PAGE_SHIFT;
 	for (; addr < end; addr += PAGE_SIZE, i++) {
-		pfns[i] = HMM_PFN_EMPTY;
-		if (hmm_vma_walk->fault) {
+		pfns[i] = range->values[HMM_PFN_NONE];
+		if (fault || write_fault) {
 			int ret;
 
-			ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
+			ret = hmm_vma_do_fault(walk, addr, write_fault,
+					       &pfns[i]);
 			if (ret != -EAGAIN)
 				return ret;
 		}
 	}
 
-	return hmm_vma_walk->fault ? -EAGAIN : 0;
+	return (fault || write_fault) ? -EAGAIN : 0;
 }
 
-static int hmm_vma_walk_clear(unsigned long addr,
-			      unsigned long end,
-			      struct mm_walk *walk)
+static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+				      uint64_t pfns, uint64_t cpu_flags,
+				      bool *fault, bool *write_fault)
 {
-	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	hmm_pfn_t *pfns = range->pfns;
+
+	*fault = *write_fault = false;
+	if (!hmm_vma_walk->fault)
+		return;
+
+	/* We aren't ask to do anything ... */
+	if (!(pfns & range->flags[HMM_PFN_VALID]))
+		return;
+	/* If this is device memory than only fault if explicitly requested */
+	if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
+		/* Do we fault on device memory ? */
+		if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
+			*write_fault = pfns & range->flags[HMM_PFN_WRITE];
+			*fault = true;
+		}
+		return;
+	}
+
+	/* If CPU page table is not valid then we need to fault */
+	*fault = !(cpu_flags & range->flags[HMM_PFN_VALID]);
+	/* Need to write fault ? */
+	if ((pfns & range->flags[HMM_PFN_WRITE]) &&
+	    !(cpu_flags & range->flags[HMM_PFN_WRITE])) {
+		*write_fault = true;
+		*fault = true;
+	}
+}
+
+static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+				 const uint64_t *pfns, unsigned long npages,
+				 uint64_t cpu_flags, bool *fault,
+				 bool *write_fault)
+{
 	unsigned long i;
 
-	hmm_vma_walk->last = addr;
+	if (!hmm_vma_walk->fault) {
+		*fault = *write_fault = false;
+		return;
+	}
+
+	for (i = 0; i < npages; ++i) {
+		hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags,
+				   fault, write_fault);
+		if ((*fault) || (*write_fault))
+			return;
+	}
+}
+
+static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
+			     struct mm_walk *walk)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	bool fault, write_fault;
+	unsigned long i, npages;
+	uint64_t *pfns;
+
 	i = (addr - range->start) >> PAGE_SHIFT;
-	for (; addr < end; addr += PAGE_SIZE, i++) {
-		pfns[i] = 0;
-		if (hmm_vma_walk->fault) {
-			int ret;
+	npages = (end - addr) >> PAGE_SHIFT;
+	pfns = &range->pfns[i];
+	hmm_range_need_fault(hmm_vma_walk, pfns, npages,
+			     0, &fault, &write_fault);
+	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+}
 
-			ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
-			if (ret != -EAGAIN)
-				return ret;
+static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
+{
+	if (pmd_protnone(pmd))
+		return 0;
+	return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
+				range->flags[HMM_PFN_WRITE] :
+				range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_handle_pmd(struct mm_walk *walk,
+			      unsigned long addr,
+			      unsigned long end,
+			      uint64_t *pfns,
+			      pmd_t pmd)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	unsigned long pfn, npages, i;
+	bool fault, write_fault;
+	uint64_t cpu_flags;
+
+	npages = (end - addr) >> PAGE_SHIFT;
+	cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
+	hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags,
+			     &fault, &write_fault);
+
+	if (pmd_protnone(pmd) || fault || write_fault)
+		return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+
+	pfn = pmd_pfn(pmd) + pte_index(addr);
+	for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
+		pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+	hmm_vma_walk->last = end;
+	return 0;
+}
+
+static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
+{
+	if (pte_none(pte) || !pte_present(pte))
+		return 0;
+	return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
+				range->flags[HMM_PFN_WRITE] :
+				range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
+			      unsigned long end, pmd_t *pmdp, pte_t *ptep,
+			      uint64_t *pfn)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	struct vm_area_struct *vma = walk->vma;
+	bool fault, write_fault;
+	uint64_t cpu_flags;
+	pte_t pte = *ptep;
+	uint64_t orig_pfn = *pfn;
+
+	*pfn = range->values[HMM_PFN_NONE];
+	cpu_flags = pte_to_hmm_pfn_flags(range, pte);
+	hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+			   &fault, &write_fault);
+
+	if (pte_none(pte)) {
+		if (fault || write_fault)
+			goto fault;
+		return 0;
+	}
+
+	if (!pte_present(pte)) {
+		swp_entry_t entry = pte_to_swp_entry(pte);
+
+		if (!non_swap_entry(entry)) {
+			if (fault || write_fault)
+				goto fault;
+			return 0;
 		}
+
+		/*
+		 * This is a special swap entry, ignore migration, use
+		 * device and report anything else as error.
+		 */
+		if (is_device_private_entry(entry)) {
+			cpu_flags = range->flags[HMM_PFN_VALID] |
+				range->flags[HMM_PFN_DEVICE_PRIVATE];
+			cpu_flags |= is_write_device_private_entry(entry) ?
+				range->flags[HMM_PFN_WRITE] : 0;
+			hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+					   &fault, &write_fault);
+			if (fault || write_fault)
+				goto fault;
+			*pfn = hmm_pfn_from_pfn(range, swp_offset(entry));
+			*pfn |= cpu_flags;
+			return 0;
+		}
+
+		if (is_migration_entry(entry)) {
+			if (fault || write_fault) {
+				pte_unmap(ptep);
+				hmm_vma_walk->last = addr;
+				migration_entry_wait(vma->vm_mm,
+						     pmdp, addr);
+				return -EAGAIN;
+			}
+			return 0;
+		}
+
+		/* Report error for everything else */
+		*pfn = range->values[HMM_PFN_ERROR];
+		return -EFAULT;
 	}
 
-	return hmm_vma_walk->fault ? -EAGAIN : 0;
+	if (fault || write_fault)
+		goto fault;
+
+	*pfn = hmm_pfn_from_pfn(range, pte_pfn(pte)) | cpu_flags;
+	return 0;
+
+fault:
+	pte_unmap(ptep);
+	/* Fault any virtual address we were asked to fault */
+	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 }
 
 static int hmm_vma_walk_pmd(pmd_t *pmdp,
@@ -353,26 +577,20 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	struct vm_area_struct *vma = walk->vma;
-	hmm_pfn_t *pfns = range->pfns;
+	uint64_t *pfns = range->pfns;
 	unsigned long addr = start, i;
-	bool write_fault;
-	hmm_pfn_t flag;
 	pte_t *ptep;
 
 	i = (addr - range->start) >> PAGE_SHIFT;
-	flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0;
-	write_fault = hmm_vma_walk->fault & hmm_vma_walk->write;
 
 again:
 	if (pmd_none(*pmdp))
 		return hmm_vma_walk_hole(start, end, walk);
 
-	if (pmd_huge(*pmdp) && vma->vm_flags & VM_HUGETLB)
+	if (pmd_huge(*pmdp) && (range->vma->vm_flags & VM_HUGETLB))
 		return hmm_pfns_bad(start, end, walk);
 
 	if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) {
-		unsigned long pfn;
 		pmd_t pmd;
 
 		/*
@@ -388,17 +606,8 @@ again:
 		barrier();
 		if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
 			goto again;
-		if (pmd_protnone(pmd))
-			return hmm_vma_walk_clear(start, end, walk);
 
-		if (write_fault && !pmd_write(pmd))
-			return hmm_vma_walk_clear(start, end, walk);
-
-		pfn = pmd_pfn(pmd) + pte_index(addr);
-		flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
-		for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
-			pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
-		return 0;
+		return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd);
 	}
 
 	if (pmd_bad(*pmdp))
@@ -406,79 +615,43 @@ again:
 
 	ptep = pte_offset_map(pmdp, addr);
 	for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
-		pte_t pte = *ptep;
-
-		pfns[i] = 0;
+		int r;
 
-		if (pte_none(pte)) {
-			pfns[i] = HMM_PFN_EMPTY;
-			if (hmm_vma_walk->fault)
-				goto fault;
-			continue;
+		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
+		if (r) {
+			/* hmm_vma_handle_pte() did unmap pte directory */
+			hmm_vma_walk->last = addr;
+			return r;
 		}
-
-		if (!pte_present(pte)) {
-			swp_entry_t entry = pte_to_swp_entry(pte);
-
-			if (!non_swap_entry(entry)) {
-				if (hmm_vma_walk->fault)
-					goto fault;
-				continue;
-			}
-
-			/*
-			 * This is a special swap entry, ignore migration, use
-			 * device and report anything else as error.
-			 */
-			if (is_device_private_entry(entry)) {
-				pfns[i] = hmm_pfn_t_from_pfn(swp_offset(entry));
-				if (is_write_device_private_entry(entry)) {
-					pfns[i] |= HMM_PFN_WRITE;
-				} else if (write_fault)
-					goto fault;
-				pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE;
-				pfns[i] |= flag;
-			} else if (is_migration_entry(entry)) {
-				if (hmm_vma_walk->fault) {
-					pte_unmap(ptep);
-					hmm_vma_walk->last = addr;
-					migration_entry_wait(vma->vm_mm,
-							     pmdp, addr);
-					return -EAGAIN;
-				}
-				continue;
-			} else {
-				/* Report error for everything else */
-				pfns[i] = HMM_PFN_ERROR;
-			}
-			continue;
-		}
-
-		if (write_fault && !pte_write(pte))
-			goto fault;
-
-		pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
-		pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
-		continue;
-
-fault:
-		pte_unmap(ptep);
-		/* Fault all pages in range */
-		return hmm_vma_walk_clear(start, end, walk);
 	}
 	pte_unmap(ptep - 1);
 
+	hmm_vma_walk->last = addr;
 	return 0;
 }
 
+static void hmm_pfns_clear(struct hmm_range *range,
+			   uint64_t *pfns,
+			   unsigned long addr,
+			   unsigned long end)
+{
+	for (; addr < end; addr += PAGE_SIZE, pfns++)
+		*pfns = range->values[HMM_PFN_NONE];
+}
+
+static void hmm_pfns_special(struct hmm_range *range)
+{
+	unsigned long addr = range->start, i = 0;
+
+	for (; addr < range->end; addr += PAGE_SIZE, i++)
+		range->pfns[i] = range->values[HMM_PFN_SPECIAL];
+}
+
 /*
  * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses
- * @vma: virtual memory area containing the virtual address range
- * @range: used to track snapshot validity
- * @start: range virtual start address (inclusive)
- * @end: range virtual end address (exclusive)
- * @entries: array of hmm_pfn_t: provided by the caller, filled in by function
- * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, 0 success
+ * @range: range being snapshotted
+ * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
+ *          vma permission, 0 success
  *
  * This snapshots the CPU page table for a range of virtual addresses. Snapshot
  * validity is tracked by range struct. See hmm_vma_range_done() for further
@@ -491,26 +664,17 @@ fault:
  * NOT CALLING hmm_vma_range_done() IF FUNCTION RETURNS 0 WILL LEAD TO SERIOUS
  * MEMORY CORRUPTION ! YOU HAVE BEEN WARNED !
  */
-int hmm_vma_get_pfns(struct vm_area_struct *vma,
-		     struct hmm_range *range,
-		     unsigned long start,
-		     unsigned long end,
-		     hmm_pfn_t *pfns)
+int hmm_vma_get_pfns(struct hmm_range *range)
 {
+	struct vm_area_struct *vma = range->vma;
 	struct hmm_vma_walk hmm_vma_walk;
 	struct mm_walk mm_walk;
 	struct hmm *hmm;
 
-	/* FIXME support hugetlb fs */
-	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
-		hmm_pfns_special(pfns, start, end);
-		return -EINVAL;
-	}
-
 	/* Sanity check, this really should not happen ! */
-	if (start < vma->vm_start || start >= vma->vm_end)
+	if (range->start < vma->vm_start || range->start >= vma->vm_end)
 		return -EINVAL;
-	if (end < vma->vm_start || end > vma->vm_end)
+	if (range->end < vma->vm_start || range->end > vma->vm_end)
 		return -EINVAL;
 
 	hmm = hmm_register(vma->vm_mm);
@@ -520,10 +684,24 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma,
 	if (!hmm->mmu_notifier.ops)
 		return -EINVAL;
 
+	/* FIXME support hugetlb fs */
+	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+		hmm_pfns_special(range);
+		return -EINVAL;
+	}
+
+	if (!(vma->vm_flags & VM_READ)) {
+		/*
+		 * If vma do not allow read access, then assume that it does
+		 * not allow write access, either. Architecture that allow
+		 * write without read access are not supported by HMM, because
+		 * operations such has atomic access would not work.
+		 */
+		hmm_pfns_clear(range, range->pfns, range->start, range->end);
+		return -EPERM;
+	}
+
 	/* Initialize range to track CPU page table update */
-	range->start = start;
-	range->pfns = pfns;
-	range->end = end;
 	spin_lock(&hmm->lock);
 	range->valid = true;
 	list_add_rcu(&range->list, &hmm->ranges);
@@ -541,14 +719,13 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma,
 	mm_walk.pmd_entry = hmm_vma_walk_pmd;
 	mm_walk.pte_hole = hmm_vma_walk_hole;
 
-	walk_page_range(start, end, &mm_walk);
+	walk_page_range(range->start, range->end, &mm_walk);
 	return 0;
 }
 EXPORT_SYMBOL(hmm_vma_get_pfns);
 
 /*
  * hmm_vma_range_done() - stop tracking change to CPU page table over a range
- * @vma: virtual memory area containing the virtual address range
  * @range: range being tracked
  * Returns: false if range data has been invalidated, true otherwise
  *
@@ -568,10 +745,10 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
  *
  * There are two ways to use this :
  * again:
- *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
  *   trans = device_build_page_table_update_transaction(pfns);
  *   device_page_table_lock();
- *   if (!hmm_vma_range_done(vma, range)) {
+ *   if (!hmm_vma_range_done(range)) {
  *     device_page_table_unlock();
  *     goto again;
  *   }
@@ -579,13 +756,13 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
  *   device_page_table_unlock();
  *
  * Or:
- *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
  *   device_page_table_lock();
- *   hmm_vma_range_done(vma, range);
- *   device_update_page_table(pfns);
+ *   hmm_vma_range_done(range);
+ *   device_update_page_table(range->pfns);
  *   device_page_table_unlock();
  */
-bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
+bool hmm_vma_range_done(struct hmm_range *range)
 {
 	unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
 	struct hmm *hmm;
@@ -595,7 +772,7 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
 		return false;
 	}
 
-	hmm = hmm_register(vma->vm_mm);
+	hmm = hmm_register(range->vma->vm_mm);
 	if (!hmm) {
 		memset(range->pfns, 0, sizeof(*range->pfns) * npages);
 		return false;
@@ -611,36 +788,34 @@ EXPORT_SYMBOL(hmm_vma_range_done);
 
 /*
  * hmm_vma_fault() - try to fault some address in a virtual address range
- * @vma: virtual memory area containing the virtual address range
- * @range: use to track pfns array content validity
- * @start: fault range virtual start address (inclusive)
- * @end: fault range virtual end address (exclusive)
- * @pfns: array of hmm_pfn_t, only entry with fault flag set will be faulted
- * @write: is it a write fault
+ * @range: range being faulted
  * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
  * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop)
  *
  * This is similar to a regular CPU page fault except that it will not trigger
  * any memory migration if the memory being faulted is not accessible by CPUs.
  *
- * On error, for one virtual address in the range, the function will set the
- * hmm_pfn_t error flag for the corresponding pfn entry.
+ * On error, for one virtual address in the range, the function will mark the
+ * corresponding HMM pfn entry with an error flag.
  *
  * Expected use pattern:
  * retry:
  *   down_read(&mm->mmap_sem);
  *   // Find vma and address device wants to fault, initialize hmm_pfn_t
  *   // array accordingly
- *   ret = hmm_vma_fault(vma, start, end, pfns, allow_retry);
+ *   ret = hmm_vma_fault(range, write, block);
  *   switch (ret) {
  *   case -EAGAIN:
- *     hmm_vma_range_done(vma, range);
+ *     hmm_vma_range_done(range);
  *     // You might want to rate limit or yield to play nicely, you may
  *     // also commit any valid pfn in the array assuming that you are
  *     // getting true from hmm_vma_range_monitor_end()
  *     goto retry;
  *   case 0:
  *     break;
+ *   case -ENOMEM:
+ *   case -EINVAL:
+ *   case -EPERM:
  *   default:
  *     // Handle error !
  *     up_read(&mm->mmap_sem)
@@ -648,7 +823,7 @@ EXPORT_SYMBOL(hmm_vma_range_done);
  *   }
  *   // Take device driver lock that serialize device page table update
  *   driver_lock_device_page_table_update();
- *   hmm_vma_range_done(vma, range);
+ *   hmm_vma_range_done(range);
  *   // Commit pfns we got from hmm_vma_fault()
  *   driver_unlock_device_page_table_update();
  *   up_read(&mm->mmap_sem)
@@ -658,51 +833,54 @@ EXPORT_SYMBOL(hmm_vma_range_done);
  *
  * YOU HAVE BEEN WARNED !
  */
-int hmm_vma_fault(struct vm_area_struct *vma,
-		  struct hmm_range *range,
-		  unsigned long start,
-		  unsigned long end,
-		  hmm_pfn_t *pfns,
-		  bool write,
-		  bool block)
+int hmm_vma_fault(struct hmm_range *range, bool block)
 {
+	struct vm_area_struct *vma = range->vma;
+	unsigned long start = range->start;
 	struct hmm_vma_walk hmm_vma_walk;
 	struct mm_walk mm_walk;
 	struct hmm *hmm;
 	int ret;
 
 	/* Sanity check, this really should not happen ! */
-	if (start < vma->vm_start || start >= vma->vm_end)
+	if (range->start < vma->vm_start || range->start >= vma->vm_end)
 		return -EINVAL;
-	if (end < vma->vm_start || end > vma->vm_end)
+	if (range->end < vma->vm_start || range->end > vma->vm_end)
 		return -EINVAL;
 
 	hmm = hmm_register(vma->vm_mm);
 	if (!hmm) {
-		hmm_pfns_clear(pfns, start, end);
+		hmm_pfns_clear(range, range->pfns, range->start, range->end);
 		return -ENOMEM;
 	}
 	/* Caller must have registered a mirror using hmm_mirror_register() */
 	if (!hmm->mmu_notifier.ops)
 		return -EINVAL;
 
+	/* FIXME support hugetlb fs */
+	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+		hmm_pfns_special(range);
+		return -EINVAL;
+	}
+
+	if (!(vma->vm_flags & VM_READ)) {
+		/*
+		 * If vma do not allow read access, then assume that it does
+		 * not allow write access, either. Architecture that allow
+		 * write without read access are not supported by HMM, because
+		 * operations such has atomic access would not work.
+		 */
+		hmm_pfns_clear(range, range->pfns, range->start, range->end);
+		return -EPERM;
+	}
+
 	/* Initialize range to track CPU page table update */
-	range->start = start;
-	range->pfns = pfns;
-	range->end = end;
 	spin_lock(&hmm->lock);
 	range->valid = true;
 	list_add_rcu(&range->list, &hmm->ranges);
 	spin_unlock(&hmm->lock);
 
-	/* FIXME support hugetlb fs */
-	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
-		hmm_pfns_special(pfns, start, end);
-		return 0;
-	}
-
 	hmm_vma_walk.fault = true;
-	hmm_vma_walk.write = write;
 	hmm_vma_walk.block = block;
 	hmm_vma_walk.range = range;
 	mm_walk.private = &hmm_vma_walk;
@@ -717,7 +895,7 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 	mm_walk.pte_hole = hmm_vma_walk_hole;
 
 	do {
-		ret = walk_page_range(start, end, &mm_walk);
+		ret = walk_page_range(start, range->end, &mm_walk);
 		start = hmm_vma_walk.last;
 	} while (ret == -EAGAIN);
 
@@ -725,8 +903,9 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 		unsigned long i;
 
 		i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-		hmm_pfns_clear(&pfns[i], hmm_vma_walk.last, end);
-		hmm_vma_range_done(vma, range);
+		hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
+			       range->end);
+		hmm_vma_range_done(range);
 	}
 	return ret;
 }
@@ -845,13 +1024,6 @@ static void hmm_devmem_release(struct device *dev, void *data)
 	hmm_devmem_radix_release(resource);
 }
 
-static struct hmm_devmem *hmm_devmem_find(resource_size_t phys)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	return radix_tree_lookup(&hmm_devmem_radix, phys >> PA_SECTION_SHIFT);
-}
-
 static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
 {
 	resource_size_t key, align_start, align_size, align_end;
@@ -892,9 +1064,8 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
 	for (key = align_start; key <= align_end; key += PA_SECTION_SIZE) {
 		struct hmm_devmem *dup;
 
-		rcu_read_lock();
-		dup = hmm_devmem_find(key);
-		rcu_read_unlock();
+		dup = radix_tree_lookup(&hmm_devmem_radix,
+					key >> PA_SECTION_SHIFT);
 		if (dup) {
 			dev_err(device, "%s: collides with mapping for %s\n",
 				__func__, dev_name(dup->device));
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f0ae8d1d4329..14ed6ee5e02f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -555,8 +555,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
-				  true)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1317,7 +1316,7 @@ alloc:
 	}
 
 	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
-				huge_gfp | __GFP_NORETRY, &memcg, true))) {
+					huge_gfp, &memcg, true))) {
 		put_page(new_page);
 		split_huge_pmd(vma, vmf->pmd, vmf->address);
 		if (page)
@@ -2402,6 +2401,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
 
 	page_tail->index = head->index + tail;
 	page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
+
+	/*
+	 * always add to the tail because some iterators expect new
+	 * pages to show after the currently processed elements - e.g.
+	 * migrate_pages
+	 */
 	lru_add_page_tail(head, page_tail, lruvec, list);
 }
 
@@ -2445,7 +2450,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	} else {
 		/* Additional pin to radix tree */
 		page_ref_add(head, 2);
-		spin_unlock(&head->mapping->tree_lock);
+		xa_unlock(&head->mapping->i_pages);
 	}
 
 	spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
@@ -2653,15 +2658,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	if (mapping) {
 		void **pslot;
 
-		spin_lock(&mapping->tree_lock);
-		pslot = radix_tree_lookup_slot(&mapping->page_tree,
+		xa_lock(&mapping->i_pages);
+		pslot = radix_tree_lookup_slot(&mapping->i_pages,
 				page_index(head));
 		/*
 		 * Check if the head page is present in radix tree.
 		 * We assume all tail are present too, if head is there.
 		 */
 		if (radix_tree_deref_slot_protected(pslot,
-					&mapping->tree_lock) != head)
+					&mapping->i_pages.xa_lock) != head)
 			goto fail;
 	}
 
@@ -2695,7 +2700,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		}
 		spin_unlock(&pgdata->split_queue_lock);
 fail:		if (mapping)
-			spin_unlock(&mapping->tree_lock);
+			xa_unlock(&mapping->i_pages);
 		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
 		unfreeze_page(head);
 		ret = -EBUSY;
diff --git a/mm/internal.h b/mm/internal.h
index e6bd35182dae..62d8c34e63d5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -168,6 +168,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
 
+extern void set_zone_contiguous(struct zone *zone);
+extern void clear_zone_contiguous(struct zone *zone);
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
@@ -495,7 +498,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 #define ALLOC_HARDER		0x10 /* try to alloc harder */
 #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
-#define ALLOC_CMA		0x80 /* allow allocations from CMA areas */
 
 enum ttu_flags;
 struct tlbflush_unmap_batch;
@@ -538,4 +540,5 @@ static inline bool is_migrate_highatomic_page(struct page *page)
 }
 
 void setup_zone_pageset(struct zone *zone);
+extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e42568284e06..d7b2a4bf8671 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -965,9 +965,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out_nolock;
 	}
 
-	/* Do not oom kill for khugepaged charges */
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
-					   &memcg, true))) {
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out_nolock;
 	}
@@ -1326,9 +1324,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		goto out;
 	}
 
-	/* Do not oom kill for khugepaged charges */
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
-					   &memcg, true))) {
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out;
 	}
@@ -1348,8 +1344,8 @@ static void collapse_shmem(struct mm_struct *mm,
 	 */
 
 	index = start;
-	spin_lock_irq(&mapping->tree_lock);
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	xa_lock_irq(&mapping->i_pages);
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		int n = min(iter.index, end) - index;
 
 		/*
@@ -1362,7 +1358,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		}
 		nr_none += n;
 		for (; index < min(iter.index, end); index++) {
-			radix_tree_insert(&mapping->page_tree, index,
+			radix_tree_insert(&mapping->i_pages, index,
 					new_page + (index % HPAGE_PMD_NR));
 		}
 
@@ -1371,16 +1367,16 @@ static void collapse_shmem(struct mm_struct *mm,
 			break;
 
 		page = radix_tree_deref_slot_protected(slot,
-				&mapping->tree_lock);
+				&mapping->i_pages.xa_lock);
 		if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			/* swap in or instantiate fallocated page */
 			if (shmem_getpage(mapping->host, index, &page,
 						SGP_NOHUGE)) {
 				result = SCAN_FAIL;
 				goto tree_unlocked;
 			}
-			spin_lock_irq(&mapping->tree_lock);
+			xa_lock_irq(&mapping->i_pages);
 		} else if (trylock_page(page)) {
 			get_page(page);
 		} else {
@@ -1389,7 +1385,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		}
 
 		/*
-		 * The page must be locked, so we can drop the tree_lock
+		 * The page must be locked, so we can drop the i_pages lock
 		 * without racing with truncate.
 		 */
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -1400,7 +1396,7 @@ static void collapse_shmem(struct mm_struct *mm,
 			result = SCAN_TRUNCATED;
 			goto out_unlock;
 		}
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 
 		if (isolate_lru_page(page)) {
 			result = SCAN_DEL_PAGE_LRU;
@@ -1410,11 +1406,11 @@ static void collapse_shmem(struct mm_struct *mm,
 		if (page_mapped(page))
 			unmap_mapping_pages(mapping, index, 1, false);
 
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 
-		slot = radix_tree_lookup_slot(&mapping->page_tree, index);
+		slot = radix_tree_lookup_slot(&mapping->i_pages, index);
 		VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
-					&mapping->tree_lock), page);
+					&mapping->i_pages.xa_lock), page);
 		VM_BUG_ON_PAGE(page_mapped(page), page);
 
 		/*
@@ -1435,14 +1431,14 @@ static void collapse_shmem(struct mm_struct *mm,
 		list_add_tail(&page->lru, &pagelist);
 
 		/* Finally, replace with the new page. */
-		radix_tree_replace_slot(&mapping->page_tree, slot,
+		radix_tree_replace_slot(&mapping->i_pages, slot,
 				new_page + (index % HPAGE_PMD_NR));
 
 		slot = radix_tree_iter_resume(slot, &iter);
 		index++;
 		continue;
 out_lru:
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		putback_lru_page(page);
 out_isolate_failed:
 		unlock_page(page);
@@ -1468,14 +1464,14 @@ out_unlock:
 		}
 
 		for (; index < end; index++) {
-			radix_tree_insert(&mapping->page_tree, index,
+			radix_tree_insert(&mapping->i_pages, index,
 					new_page + (index % HPAGE_PMD_NR));
 		}
 		nr_none += n;
 	}
 
 tree_locked:
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 tree_unlocked:
 
 	if (result == SCAN_SUCCEED) {
@@ -1524,9 +1520,8 @@ tree_unlocked:
 	} else {
 		/* Something went wrong: rollback changes to the radix-tree */
 		shmem_uncharge(mapping->host, nr_none);
-		spin_lock_irq(&mapping->tree_lock);
-		radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
-				start) {
+		xa_lock_irq(&mapping->i_pages);
+		radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 			if (iter.index >= end)
 				break;
 			page = list_first_entry_or_null(&pagelist,
@@ -1536,8 +1531,7 @@ tree_unlocked:
 					break;
 				nr_none--;
 				/* Put holes back where they were */
-				radix_tree_delete(&mapping->page_tree,
-						  iter.index);
+				radix_tree_delete(&mapping->i_pages, iter.index);
 				continue;
 			}
 
@@ -1546,16 +1540,15 @@ tree_unlocked:
 			/* Unfreeze the page. */
 			list_del(&page->lru);
 			page_ref_unfreeze(page, 2);
-			radix_tree_replace_slot(&mapping->page_tree,
-						slot, page);
+			radix_tree_replace_slot(&mapping->i_pages, slot, page);
 			slot = radix_tree_iter_resume(slot, &iter);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			putback_lru_page(page);
 			unlock_page(page);
-			spin_lock_irq(&mapping->tree_lock);
+			xa_lock_irq(&mapping->i_pages);
 		}
 		VM_BUG_ON(nr_none);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 
 		/* Unfreeze new_page, caller would take care about freeing it */
 		page_ref_unfreeze(new_page, 1);
@@ -1583,7 +1576,7 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
 	swap = 0;
 	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		if (iter.index >= start + HPAGE_PMD_NR)
 			break;
 
@@ -1883,8 +1876,16 @@ static void set_recommended_min_free_kbytes(void)
 	int nr_zones = 0;
 	unsigned long recommended_min;
 
-	for_each_populated_zone(zone)
+	for_each_populated_zone(zone) {
+		/*
+		 * We don't need to worry about fragmentation of
+		 * ZONE_MOVABLE since it only has movable pages.
+		 */
+		if (zone_idx(zone) > gfp_zone(GFP_USER))
+			continue;
+
 		nr_zones++;
+	}
 
 	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
 	recommended_min = pageblock_nr_pages * nr_zones * 2;
diff --git a/mm/ksm.c b/mm/ksm.c
index e8d6c6210b80..e3cbf9a92f3c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1131,6 +1131,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 	} else {
 		newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
 					       vma->vm_page_prot));
+		/*
+		 * We're replacing an anonymous page with a zero page, which is
+		 * not anonymous. We need to do proper accounting otherwise we
+		 * will get wrong values in /proc, and a BUG message in dmesg
+		 * when tearing down the mm.
+		 */
+		dec_mm_counter(mm, MM_ANONPAGES);
 	}
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
diff --git a/mm/memblock.c b/mm/memblock.c
index 9b04568ad42a..5108356ad8aa 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1163,7 +1163,7 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					flags);
 }
 
-static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
+phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t max_addr,
 					int nid, ulong flags)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9ec024b862ac..e074f7c637aa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1485,7 +1485,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	if (!current->memcg_may_oom)
+	if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)
 		return;
 	/*
 	 * We are in the middle of the charge context here, so we
@@ -1839,7 +1839,7 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 			}
 		}
 
-		for (i = 0; i < MEMCG_NR_EVENTS; i++) {
+		for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
 			long x;
 
 			x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
@@ -1858,7 +1858,7 @@ static void reclaim_high(struct mem_cgroup *memcg,
 	do {
 		if (page_counter_read(&memcg->memory) <= memcg->high)
 			continue;
-		mem_cgroup_event(memcg, MEMCG_HIGH);
+		memcg_memory_event(memcg, MEMCG_HIGH);
 		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
 	} while ((memcg = parent_mem_cgroup(memcg)));
 }
@@ -1949,7 +1949,7 @@ retry:
 	if (!gfpflags_allow_blocking(gfp_mask))
 		goto nomem;
 
-	mem_cgroup_event(mem_over_limit, MEMCG_MAX);
+	memcg_memory_event(mem_over_limit, MEMCG_MAX);
 
 	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
 						    gfp_mask, may_swap);
@@ -1992,7 +1992,7 @@ retry:
 	if (fatal_signal_pending(current))
 		goto force;
 
-	mem_cgroup_event(mem_over_limit, MEMCG_OOM);
+	memcg_memory_event(mem_over_limit, MEMCG_OOM);
 
 	mem_cgroup_oom(mem_over_limit, gfp_mask,
 		       get_order(nr_pages * PAGE_SIZE));
@@ -2688,10 +2688,10 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
 	struct mem_cgroup *iter;
 	int i;
 
-	memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS);
+	memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);
 
 	for_each_mem_cgroup_tree(iter, memcg) {
-		for (i = 0; i < MEMCG_NR_EVENTS; i++)
+		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
 			events[i] += memcg_sum_events(iter, i);
 	}
 }
@@ -4108,6 +4108,9 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
 	struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 
+	if (!pn)
+		return;
+
 	free_percpu(pn->lruvec_stat_cpu);
 	kfree(pn);
 }
@@ -5178,7 +5181,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 			continue;
 		}
 
-		mem_cgroup_event(memcg, MEMCG_OOM);
+		memcg_memory_event(memcg, MEMCG_OOM);
 		if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
 			break;
 	}
@@ -5191,10 +5194,14 @@ static int memory_events_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
 
-	seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW));
-	seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
-	seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
-	seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+	seq_printf(m, "low %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
+	seq_printf(m, "high %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_HIGH]));
+	seq_printf(m, "max %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
+	seq_printf(m, "oom %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
 	seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 
 	return 0;
@@ -5204,7 +5211,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
 	unsigned long stat[MEMCG_NR_STAT];
-	unsigned long events[MEMCG_NR_EVENTS];
+	unsigned long events[NR_VM_EVENT_ITEMS];
 	int i;
 
 	/*
@@ -5967,9 +5974,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 
 	/*
 	 * Interrupts should be disabled here because the caller holds the
-	 * mapping->tree_lock lock which is taken with interrupts-off. It is
+	 * i_pages lock which is taken with interrupts-off. It is
 	 * important here to have the interrupts disabled because it is the
-	 * only synchronisation we have for udpating the per-CPU variables.
+	 * only synchronisation we have for updating the per-CPU variables.
 	 */
 	VM_BUG_ON(!irqs_disabled());
 	mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page),
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2d4bf647cf01..9d142b9b86dc 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1487,7 +1487,7 @@ int unpoison_memory(unsigned long pfn)
 }
 EXPORT_SYMBOL(unpoison_memory);
 
-static struct page *new_page(struct page *p, unsigned long private, int **x)
+static struct page *new_page(struct page *p, unsigned long private)
 {
 	int nid = page_to_nid(p);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index cc6dfa5832ca..f74826cdceea 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1329,8 +1329,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 	return 0;
 }
 
-static struct page *new_node_page(struct page *page, unsigned long private,
-		int **result)
+static struct page *new_node_page(struct page *page, unsigned long private)
 {
 	int nid = page_to_nid(page);
 	nodemask_t nmask = node_states[N_MEMORY];
@@ -1373,7 +1372,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 			if (isolate_huge_page(page, &source))
 				move_pages -= 1 << compound_order(head);
 			continue;
-		} else if (thp_migration_supported() && PageTransHuge(page))
+		} else if (PageTransHuge(page))
 			pfn = page_to_pfn(compound_head(page))
 				+ hpage_nr_pages(page) - 1;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 01cbb7078d6c..9ac49ef17b4e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -446,15 +446,6 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
 		__split_huge_pmd(walk->vma, pmd, addr, false, NULL);
 		goto out;
 	}
-	if (!thp_migration_supported()) {
-		get_page(page);
-		spin_unlock(ptl);
-		lock_page(page);
-		ret = split_huge_page(page);
-		unlock_page(page);
-		put_page(page);
-		goto out;
-	}
 	if (!queue_pages_required(page, qp)) {
 		ret = 1;
 		goto unlock;
@@ -495,7 +486,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 
 	if (pmd_trans_unstable(pmd))
 		return 0;
-retry:
+
 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		if (!pte_present(*pte))
@@ -511,22 +502,6 @@ retry:
 			continue;
 		if (!queue_pages_required(page, qp))
 			continue;
-		if (PageTransCompound(page) && !thp_migration_supported()) {
-			get_page(page);
-			pte_unmap_unlock(pte, ptl);
-			lock_page(page);
-			ret = split_huge_page(page);
-			unlock_page(page);
-			put_page(page);
-			/* Failed to split -- skip. */
-			if (ret) {
-				pte = pte_offset_map_lock(walk->mm, pmd,
-						addr, &ptl);
-				continue;
-			}
-			goto retry;
-		}
-
 		migrate_page_add(page, qp->pagelist, flags);
 	}
 	pte_unmap_unlock(pte - 1, ptl);
@@ -942,12 +917,13 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 	}
 }
 
-static struct page *new_node_page(struct page *page, unsigned long node, int **x)
+/* page allocation callback for NUMA node migration */
+struct page *alloc_new_node_page(struct page *page, unsigned long node)
 {
 	if (PageHuge(page))
 		return alloc_huge_page_node(page_hstate(compound_head(page)),
 					node);
-	else if (thp_migration_supported() && PageTransHuge(page)) {
+	else if (PageTransHuge(page)) {
 		struct page *thp;
 
 		thp = alloc_pages_node(node,
@@ -986,7 +962,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 			flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 
 	if (!list_empty(&pagelist)) {
-		err = migrate_pages(&pagelist, new_node_page, NULL, dest,
+		err = migrate_pages(&pagelist, alloc_new_node_page, NULL, dest,
 					MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
 			putback_movable_pages(&pagelist);
@@ -1107,7 +1083,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
  * list of pages handed to migrate_pages()--which is how we get here--
  * is in virtual address order.
  */
-static struct page *new_page(struct page *page, unsigned long start, int **x)
+static struct page *new_page(struct page *page, unsigned long start)
 {
 	struct vm_area_struct *vma;
 	unsigned long uninitialized_var(address);
@@ -1123,7 +1099,7 @@ static struct page *new_page(struct page *page, unsigned long start, int **x)
 	if (PageHuge(page)) {
 		return alloc_huge_page_vma(page_hstate(compound_head(page)),
 				vma, address);
-	} else if (thp_migration_supported() && PageTransHuge(page)) {
+	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
 		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
@@ -1152,7 +1128,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 	return -ENOSYS;
 }
 
-static struct page *new_page(struct page *page, unsigned long start, int **x)
+static struct page *new_page(struct page *page, unsigned long start)
 {
 	return NULL;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 003886606a22..f65dd69e1fd1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -467,20 +467,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	oldzone = page_zone(page);
 	newzone = page_zone(newpage);
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 
-	pslot = radix_tree_lookup_slot(&mapping->page_tree,
+	pslot = radix_tree_lookup_slot(&mapping->i_pages,
  					page_index(page));
 
 	expected_count += 1 + page_has_private(page);
 	if (page_count(page) != expected_count ||
-		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-		spin_unlock_irq(&mapping->tree_lock);
+		radix_tree_deref_slot_protected(pslot,
+					&mapping->i_pages.xa_lock) != page) {
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
 	if (!page_ref_freeze(page, expected_count)) {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
@@ -494,7 +495,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	if (mode == MIGRATE_ASYNC && head &&
 			!buffer_migrate_lock_buffers(head, mode)) {
 		page_ref_unfreeze(page, expected_count);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
@@ -522,7 +523,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 		SetPageDirty(newpage);
 	}
 
-	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+	radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
 	/*
 	 * Drop cache reference from old page by unfreezing
@@ -531,7 +532,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 */
 	page_ref_unfreeze(page, expected_count - 1);
 
-	spin_unlock(&mapping->tree_lock);
+	xa_unlock(&mapping->i_pages);
 	/* Leave irq disabled to prevent preemption while updating stats */
 
 	/*
@@ -574,20 +575,19 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	int expected_count;
 	void **pslot;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 
-	pslot = radix_tree_lookup_slot(&mapping->page_tree,
-					page_index(page));
+	pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
 
 	expected_count = 2 + page_has_private(page);
 	if (page_count(page) != expected_count ||
-		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-		spin_unlock_irq(&mapping->tree_lock);
+		radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
 	if (!page_ref_freeze(page, expected_count)) {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
@@ -596,11 +596,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 	get_page(newpage);
 
-	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+	radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
 	page_ref_unfreeze(page, expected_count - 1);
 
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 
 	return MIGRATEPAGE_SUCCESS;
 }
@@ -1137,10 +1137,12 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 				   enum migrate_reason reason)
 {
 	int rc = MIGRATEPAGE_SUCCESS;
-	int *result = NULL;
 	struct page *newpage;
 
-	newpage = get_new_page(page, private, &result);
+	if (!thp_migration_supported() && PageTransHuge(page))
+		return -ENOMEM;
+
+	newpage = get_new_page(page, private);
 	if (!newpage)
 		return -ENOMEM;
 
@@ -1161,14 +1163,6 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 		goto out;
 	}
 
-	if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
-		lock_page(page);
-		rc = split_huge_page(page);
-		unlock_page(page);
-		if (rc)
-			goto out;
-	}
-
 	rc = __unmap_and_move(page, newpage, force, mode);
 	if (rc == MIGRATEPAGE_SUCCESS)
 		set_page_owner_migrate_reason(newpage, reason);
@@ -1231,12 +1225,6 @@ put_new:
 			put_page(newpage);
 	}
 
-	if (result) {
-		if (rc)
-			*result = rc;
-		else
-			*result = page_to_nid(newpage);
-	}
 	return rc;
 }
 
@@ -1264,7 +1252,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 				enum migrate_mode mode, int reason)
 {
 	int rc = -EAGAIN;
-	int *result = NULL;
 	int page_was_mapped = 0;
 	struct page *new_hpage;
 	struct anon_vma *anon_vma = NULL;
@@ -1281,7 +1268,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 		return -ENOSYS;
 	}
 
-	new_hpage = get_new_page(hpage, private, &result);
+	new_hpage = get_new_page(hpage, private);
 	if (!new_hpage)
 		return -ENOMEM;
 
@@ -1345,12 +1332,6 @@ out:
 	else
 		putback_active_hugepage(new_hpage);
 
-	if (result) {
-		if (rc)
-			*result = rc;
-		else
-			*result = page_to_nid(new_hpage);
-	}
 	return rc;
 }
 
@@ -1395,6 +1376,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		retry = 0;
 
 		list_for_each_entry_safe(page, page2, from, lru) {
+retry:
 			cond_resched();
 
 			if (PageHuge(page))
@@ -1408,6 +1390,26 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 
 			switch(rc) {
 			case -ENOMEM:
+				/*
+				 * THP migration might be unsupported or the
+				 * allocation could've failed so we should
+				 * retry on the same page with the THP split
+				 * to base pages.
+				 *
+				 * Head page is retried immediately and tail
+				 * pages are added to the tail of the list so
+				 * we encounter them after the rest of the list
+				 * is processed.
+				 */
+				if (PageTransHuge(page)) {
+					lock_page(page);
+					rc = split_huge_page_to_list(page, from);
+					unlock_page(page);
+					if (!rc) {
+						list_safe_reset_next(page, page2, lru);
+						goto retry;
+					}
+				}
 				nr_failed++;
 				goto out;
 			case -EAGAIN:
@@ -1444,141 +1446,101 @@ out:
 }
 
 #ifdef CONFIG_NUMA
-/*
- * Move a list of individual pages
- */
-struct page_to_node {
-	unsigned long addr;
-	struct page *page;
-	int node;
-	int status;
-};
 
-static struct page *new_page_node(struct page *p, unsigned long private,
-		int **result)
+static int store_status(int __user *status, int start, int value, int nr)
 {
-	struct page_to_node *pm = (struct page_to_node *)private;
-
-	while (pm->node != MAX_NUMNODES && pm->page != p)
-		pm++;
+	while (nr-- > 0) {
+		if (put_user(value, status + start))
+			return -EFAULT;
+		start++;
+	}
 
-	if (pm->node == MAX_NUMNODES)
-		return NULL;
+	return 0;
+}
 
-	*result = &pm->status;
+static int do_move_pages_to_node(struct mm_struct *mm,
+		struct list_head *pagelist, int node)
+{
+	int err;
 
-	if (PageHuge(p))
-		return alloc_huge_page_node(page_hstate(compound_head(p)),
-					pm->node);
-	else if (thp_migration_supported() && PageTransHuge(p)) {
-		struct page *thp;
+	if (list_empty(pagelist))
+		return 0;
 
-		thp = alloc_pages_node(pm->node,
-			(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
-			HPAGE_PMD_ORDER);
-		if (!thp)
-			return NULL;
-		prep_transhuge_page(thp);
-		return thp;
-	} else
-		return __alloc_pages_node(pm->node,
-				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
+	err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
+			MIGRATE_SYNC, MR_SYSCALL);
+	if (err)
+		putback_movable_pages(pagelist);
+	return err;
 }
 
 /*
- * Move a set of pages as indicated in the pm array. The addr
- * field must be set to the virtual address of the page to be moved
- * and the node number must contain a valid target node.
- * The pm array ends with node = MAX_NUMNODES.
+ * Resolves the given address to a struct page, isolates it from the LRU and
+ * puts it to the given pagelist.
+ * Returns -errno if the page cannot be found/isolated or 0 when it has been
+ * queued or the page doesn't need to be migrated because it is already on
+ * the target node
  */
-static int do_move_page_to_node_array(struct mm_struct *mm,
-				      struct page_to_node *pm,
-				      int migrate_all)
+static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+		int node, struct list_head *pagelist, bool migrate_all)
 {
+	struct vm_area_struct *vma;
+	struct page *page;
+	unsigned int follflags;
 	int err;
-	struct page_to_node *pp;
-	LIST_HEAD(pagelist);
 
 	down_read(&mm->mmap_sem);
+	err = -EFAULT;
+	vma = find_vma(mm, addr);
+	if (!vma || addr < vma->vm_start || !vma_migratable(vma))
+		goto out;
 
-	/*
-	 * Build a list of pages to migrate
-	 */
-	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
-		struct vm_area_struct *vma;
-		struct page *page;
-		struct page *head;
-		unsigned int follflags;
-
-		err = -EFAULT;
-		vma = find_vma(mm, pp->addr);
-		if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
-			goto set_status;
-
-		/* FOLL_DUMP to ignore special (like zero) pages */
-		follflags = FOLL_GET | FOLL_DUMP;
-		if (!thp_migration_supported())
-			follflags |= FOLL_SPLIT;
-		page = follow_page(vma, pp->addr, follflags);
+	/* FOLL_DUMP to ignore special (like zero) pages */
+	follflags = FOLL_GET | FOLL_DUMP;
+	page = follow_page(vma, addr, follflags);
 
-		err = PTR_ERR(page);
-		if (IS_ERR(page))
-			goto set_status;
+	err = PTR_ERR(page);
+	if (IS_ERR(page))
+		goto out;
 
-		err = -ENOENT;
-		if (!page)
-			goto set_status;
+	err = -ENOENT;
+	if (!page)
+		goto out;
 
-		err = page_to_nid(page);
+	err = 0;
+	if (page_to_nid(page) == node)
+		goto out_putpage;
 
-		if (err == pp->node)
-			/*
-			 * Node already in the right place
-			 */
-			goto put_and_set;
+	err = -EACCES;
+	if (page_mapcount(page) > 1 && !migrate_all)
+		goto out_putpage;
 
-		err = -EACCES;
-		if (page_mapcount(page) > 1 &&
-				!migrate_all)
-			goto put_and_set;
-
-		if (PageHuge(page)) {
-			if (PageHead(page)) {
-				isolate_huge_page(page, &pagelist);
-				err = 0;
-				pp->page = page;
-			}
-			goto put_and_set;
+	if (PageHuge(page)) {
+		if (PageHead(page)) {
+			isolate_huge_page(page, pagelist);
+			err = 0;
 		}
+	} else {
+		struct page *head;
 
-		pp->page = compound_head(page);
 		head = compound_head(page);
 		err = isolate_lru_page(head);
-		if (!err) {
-			list_add_tail(&head->lru, &pagelist);
-			mod_node_page_state(page_pgdat(head),
-				NR_ISOLATED_ANON + page_is_file_cache(head),
-				hpage_nr_pages(head));
-		}
-put_and_set:
-		/*
-		 * Either remove the duplicate refcount from
-		 * isolate_lru_page() or drop the page ref if it was
-		 * not isolated.
-		 */
-		put_page(page);
-set_status:
-		pp->status = err;
-	}
-
-	err = 0;
-	if (!list_empty(&pagelist)) {
-		err = migrate_pages(&pagelist, new_page_node, NULL,
-				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
-			putback_movable_pages(&pagelist);
-	}
+			goto out_putpage;
 
+		err = 0;
+		list_add_tail(&head->lru, pagelist);
+		mod_node_page_state(page_pgdat(head),
+			NR_ISOLATED_ANON + page_is_file_cache(head),
+			hpage_nr_pages(head));
+	}
+out_putpage:
+	/*
+	 * Either remove the duplicate refcount from
+	 * isolate_lru_page() or drop the page ref if it was
+	 * not isolated.
+	 */
+	put_page(page);
+out:
 	up_read(&mm->mmap_sem);
 	return err;
 }
@@ -1593,79 +1555,79 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
 			 const int __user *nodes,
 			 int __user *status, int flags)
 {
-	struct page_to_node *pm;
-	unsigned long chunk_nr_pages;
-	unsigned long chunk_start;
-	int err;
-
-	err = -ENOMEM;
-	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
-	if (!pm)
-		goto out;
+	int current_node = NUMA_NO_NODE;
+	LIST_HEAD(pagelist);
+	int start, i;
+	int err = 0, err1;
 
 	migrate_prep();
 
-	/*
-	 * Store a chunk of page_to_node array in a page,
-	 * but keep the last one as a marker
-	 */
-	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
-
-	for (chunk_start = 0;
-	     chunk_start < nr_pages;
-	     chunk_start += chunk_nr_pages) {
-		int j;
+	for (i = start = 0; i < nr_pages; i++) {
+		const void __user *p;
+		unsigned long addr;
+		int node;
 
-		if (chunk_start + chunk_nr_pages > nr_pages)
-			chunk_nr_pages = nr_pages - chunk_start;
-
-		/* fill the chunk pm with addrs and nodes from user-space */
-		for (j = 0; j < chunk_nr_pages; j++) {
-			const void __user *p;
-			int node;
-
-			err = -EFAULT;
-			if (get_user(p, pages + j + chunk_start))
-				goto out_pm;
-			pm[j].addr = (unsigned long) p;
-
-			if (get_user(node, nodes + j + chunk_start))
-				goto out_pm;
-
-			err = -ENODEV;
-			if (node < 0 || node >= MAX_NUMNODES)
-				goto out_pm;
-
-			if (!node_state(node, N_MEMORY))
-				goto out_pm;
-
-			err = -EACCES;
-			if (!node_isset(node, task_nodes))
-				goto out_pm;
+		err = -EFAULT;
+		if (get_user(p, pages + i))
+			goto out_flush;
+		if (get_user(node, nodes + i))
+			goto out_flush;
+		addr = (unsigned long)p;
+
+		err = -ENODEV;
+		if (node < 0 || node >= MAX_NUMNODES)
+			goto out_flush;
+		if (!node_state(node, N_MEMORY))
+			goto out_flush;
 
-			pm[j].node = node;
+		err = -EACCES;
+		if (!node_isset(node, task_nodes))
+			goto out_flush;
+
+		if (current_node == NUMA_NO_NODE) {
+			current_node = node;
+			start = i;
+		} else if (node != current_node) {
+			err = do_move_pages_to_node(mm, &pagelist, current_node);
+			if (err)
+				goto out;
+			err = store_status(status, start, current_node, i - start);
+			if (err)
+				goto out;
+			start = i;
+			current_node = node;
 		}
 
-		/* End marker for this chunk */
-		pm[chunk_nr_pages].node = MAX_NUMNODES;
-
-		/* Migrate this chunk */
-		err = do_move_page_to_node_array(mm, pm,
-						 flags & MPOL_MF_MOVE_ALL);
-		if (err < 0)
-			goto out_pm;
+		/*
+		 * Errors in the page lookup or isolation are not fatal and we simply
+		 * report them via status
+		 */
+		err = add_page_for_migration(mm, addr, current_node,
+				&pagelist, flags & MPOL_MF_MOVE_ALL);
+		if (!err)
+			continue;
 
-		/* Return status information */
-		for (j = 0; j < chunk_nr_pages; j++)
-			if (put_user(pm[j].status, status + j + chunk_start)) {
-				err = -EFAULT;
-				goto out_pm;
-			}
-	}
-	err = 0;
+		err = store_status(status, i, err, 1);
+		if (err)
+			goto out_flush;
 
-out_pm:
-	free_page((unsigned long)pm);
+		err = do_move_pages_to_node(mm, &pagelist, current_node);
+		if (err)
+			goto out;
+		if (i > start) {
+			err = store_status(status, start, current_node, i - start);
+			if (err)
+				goto out;
+		}
+		current_node = NUMA_NO_NODE;
+	}
+out_flush:
+	/* Make sure we do not overwrite the existing error */
+	err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+	if (!err1)
+		err1 = store_status(status, start, current_node, i - start);
+	if (!err)
+		err = err1;
 out:
 	return err;
 }
@@ -1866,8 +1828,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
 }
 
 static struct page *alloc_misplaced_dst_page(struct page *page,
-					   unsigned long data,
-					   int **result)
+					   unsigned long data)
 {
 	int nid = (int) data;
 	struct page *newpage;
@@ -1987,6 +1948,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 		goto out;
 
 	/*
+	 * Also do not migrate dirty pages as not all filesystems can move
+	 * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles.
+	 */
+	if (page_is_file_cache(page) && PageDirty(page))
+		goto out;
+
+	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
 	 * Optimal placement is no good if the memory bus is saturated and
 	 * all the time is being spent migrating!
@@ -2339,7 +2307,8 @@ again:
 			ptep_get_and_clear(mm, addr, ptep);
 
 			/* Setup special migration page table entry */
-			entry = make_migration_entry(page, pte_write(pte));
+			entry = make_migration_entry(page, mpfn &
+						     MIGRATE_PFN_WRITE);
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pte))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
diff --git a/mm/mmap.c b/mm/mmap.c
index f2154fc2548b..188f195883b9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1342,6 +1342,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 		if (!(file && path_noexec(&file->f_path)))
 			prot |= PROT_EXEC;
 
+	/* force arch specific MAP_FIXED handling in get_unmapped_area */
+	if (flags & MAP_FIXED_NOREPLACE)
+		flags |= MAP_FIXED;
+
 	if (!(flags & MAP_FIXED))
 		addr = round_hint_to_min(addr);
 
@@ -1365,6 +1369,13 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (offset_in_page(addr))
 		return addr;
 
+	if (flags & MAP_FIXED_NOREPLACE) {
+		struct vm_area_struct *vma = find_vma(mm, addr);
+
+		if (vma && vma->vm_start <= addr)
+			return -EEXIST;
+	}
+
 	if (prot == PROT_EXEC) {
 		pkey = execute_only_pkey(mm);
 		if (pkey < 0)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c1d6af7455da..625608bc8962 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -27,6 +27,7 @@
 #include <linux/pkeys.h>
 #include <linux/ksm.h>
 #include <linux/uaccess.h>
+#include <linux/mm_inline.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
@@ -89,6 +90,14 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				    page_mapcount(page) != 1)
 					continue;
 
+				/*
+				 * While migration can move some dirty pages,
+				 * it cannot move them all from MIGRATE_ASYNC
+				 * context.
+				 */
+				if (page_is_file_cache(page) && PageDirty(page))
+					continue;
+
 				/* Avoid TLB flush if possible */
 				if (pte_protnone(oldpte))
 					continue;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 586f31261c83..5c1a3279e63f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2099,7 +2099,8 @@ void __init page_writeback_init(void)
  * so that it can tag pages faster than a dirtying process can create them).
  */
 /*
- * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency.
+ * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
+ * latency.
  */
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end)
@@ -2109,22 +2110,22 @@ void tag_pages_for_writeback(struct address_space *mapping,
 	struct radix_tree_iter iter;
 	void **slot;
 
-	spin_lock_irq(&mapping->tree_lock);
-	radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start,
+	xa_lock_irq(&mapping->i_pages);
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
 							PAGECACHE_TAG_DIRTY) {
 		if (iter.index > end)
 			break;
-		radix_tree_iter_tag_set(&mapping->page_tree, &iter,
+		radix_tree_iter_tag_set(&mapping->i_pages, &iter,
 							PAGECACHE_TAG_TOWRITE);
 		tagged++;
 		if ((tagged % WRITEBACK_TAG_BATCH) != 0)
 			continue;
 		slot = radix_tree_iter_resume(slot, &iter);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		cond_resched();
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 	}
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
@@ -2467,13 +2468,13 @@ int __set_page_dirty_nobuffers(struct page *page)
 			return 1;
 		}
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		BUG_ON(page_mapping(page) != mapping);
 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
-		radix_tree_tag_set(&mapping->page_tree, page_index(page),
+		radix_tree_tag_set(&mapping->i_pages, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		unlock_page_memcg(page);
 
 		if (mapping->host) {
@@ -2718,11 +2719,10 @@ int test_clear_page_writeback(struct page *page)
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		ret = TestClearPageWriteback(page);
 		if (ret) {
-			radix_tree_tag_clear(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
@@ -2736,7 +2736,7 @@ int test_clear_page_writeback(struct page *page)
 						     PAGECACHE_TAG_WRITEBACK))
 			sb_clear_inode_writeback(mapping->host);
 
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 	} else {
 		ret = TestClearPageWriteback(page);
 	}
@@ -2766,7 +2766,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		ret = TestSetPageWriteback(page);
 		if (!ret) {
 			bool on_wblist;
@@ -2774,8 +2774,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 			on_wblist = mapping_tagged(mapping,
 						   PAGECACHE_TAG_WRITEBACK);
 
-			radix_tree_tag_set(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_set(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
 				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
@@ -2789,14 +2788,12 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 				sb_mark_inode_writeback(mapping->host);
 		}
 		if (!PageDirty(page))
-			radix_tree_tag_clear(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_DIRTY);
 		if (!keep_write)
-			radix_tree_tag_clear(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_TOWRITE);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
 	}
@@ -2816,7 +2813,7 @@ EXPORT_SYMBOL(__test_set_page_writeback);
  */
 int mapping_tagged(struct address_space *mapping, int tag)
 {
-	return radix_tree_tagged(&mapping->page_tree, tag);
+	return radix_tree_tagged(&mapping->i_pages, tag);
 }
 EXPORT_SYMBOL(mapping_tagged);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0b97b8ece4a9..905db9d7962f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,7 +46,6 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
-#include <xen/xen.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
@@ -205,17 +204,18 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  * TBD: should special case ZONE_DMA32 machines here - in those we normally
  * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
 #ifdef CONFIG_ZONE_DMA
-	 256,
+	[ZONE_DMA] = 256,
 #endif
 #ifdef CONFIG_ZONE_DMA32
-	 256,
+	[ZONE_DMA32] = 256,
 #endif
+	[ZONE_NORMAL] = 32,
 #ifdef CONFIG_HIGHMEM
-	 32,
+	[ZONE_HIGHMEM] = 0,
 #endif
-	 32,
+	[ZONE_MOVABLE] = 0,
 };
 
 EXPORT_SYMBOL(totalram_pages);
@@ -316,9 +316,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
 	/* Always populate low zones for address-constrained allocations */
 	if (zone_end < pgdat_end_pfn(pgdat))
 		return true;
-	/* Xen PV domains need page structures early */
-	if (xen_pv_domain())
-		return true;
 	(*nr_initialised)++;
 	if ((*nr_initialised > pgdat->static_init_pgcnt) &&
 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
@@ -1746,16 +1743,38 @@ void __init page_alloc_init_late(void)
 }
 
 #ifdef CONFIG_CMA
+static void __init adjust_present_page_count(struct page *page, long count)
+{
+	struct zone *zone = page_zone(page);
+
+	/* We don't need to hold a lock since it is boot-up process */
+	zone->present_pages += count;
+}
+
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void __init init_cma_reserved_pageblock(struct page *page)
 {
 	unsigned i = pageblock_nr_pages;
+	unsigned long pfn = page_to_pfn(page);
 	struct page *p = page;
+	int nid = page_to_nid(page);
+
+	/*
+	 * ZONE_MOVABLE will steal present pages from other zones by
+	 * changing page links so page_zone() is changed. Before that,
+	 * we need to adjust previous zone's page count first.
+	 */
+	adjust_present_page_count(page, -pageblock_nr_pages);
 
 	do {
 		__ClearPageReserved(p);
 		set_page_count(p, 0);
-	} while (++p, --i);
+
+		/* Steal pages from other zones */
+		set_page_links(p, ZONE_MOVABLE, nid, pfn);
+	} while (++p, ++pfn, --i);
+
+	adjust_present_page_count(page, pageblock_nr_pages);
 
 	set_pageblock_migratetype(page, MIGRATE_CMA);
 
@@ -2870,7 +2889,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
 		 * exists.
 		 */
 		watermark = min_wmark_pages(zone) + (1UL << order);
-		if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+		if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
 			return 0;
 
 		__mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3146,12 +3165,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 	}
 
 
-#ifdef CONFIG_CMA
-	/* If allocation can't use CMA areas don't use free CMA pages */
-	if (!(alloc_flags & ALLOC_CMA))
-		free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
-
 	/*
 	 * Check watermarks for an order-0 allocation request. If these
 	 * are not met, then a high-order request also cannot go ahead
@@ -3178,10 +3191,8 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 		}
 
 #ifdef CONFIG_CMA
-		if ((alloc_flags & ALLOC_CMA) &&
-		    !list_empty(&area->free_list[MIGRATE_CMA])) {
+		if (!list_empty(&area->free_list[MIGRATE_CMA]))
 			return true;
-		}
 #endif
 		if (alloc_harder &&
 			!list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
@@ -3201,13 +3212,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
 		unsigned long mark, int classzone_idx, unsigned int alloc_flags)
 {
 	long free_pages = zone_page_state(z, NR_FREE_PAGES);
-	long cma_pages = 0;
-
-#ifdef CONFIG_CMA
-	/* If allocation can't use CMA areas don't use free CMA pages */
-	if (!(alloc_flags & ALLOC_CMA))
-		cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
 
 	/*
 	 * Fast check for order-0 only. If this fails then the reserves
@@ -3216,7 +3220,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
 	 * the caller is !atomic then it'll uselessly search the free
 	 * list. That corner case is then slower but it is harmless.
 	 */
-	if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+	if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx])
 		return true;
 
 	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
@@ -3852,10 +3856,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 	} else if (unlikely(rt_task(current)) && !in_interrupt())
 		alloc_flags |= ALLOC_HARDER;
 
-#ifdef CONFIG_CMA
-	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
-		alloc_flags |= ALLOC_CMA;
-#endif
 	return alloc_flags;
 }
 
@@ -4322,9 +4322,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 	if (should_fail_alloc_page(gfp_mask, order))
 		return false;
 
-	if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
-		*alloc_flags |= ALLOC_CMA;
-
 	return true;
 }
 
@@ -4734,6 +4731,13 @@ long si_mem_available(void)
 		     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
 			 wmark_low);
 
+	/*
+	 * Part of the kernel memory, which can be released under memory
+	 * pressure.
+	 */
+	available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
+		PAGE_SHIFT;
+
 	if (available < 0)
 		available = 0;
 	return available;
@@ -6200,6 +6204,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 {
 	enum zone_type j;
 	int nid = pgdat->node_id;
+	unsigned long node_end_pfn = 0;
 
 	pgdat_resize_init(pgdat);
 #ifdef CONFIG_NUMA_BALANCING
@@ -6227,9 +6232,13 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, freesize, memmap_pages;
 		unsigned long zone_start_pfn = zone->zone_start_pfn;
+		unsigned long movable_size = 0;
 
 		size = zone->spanned_pages;
 		realsize = freesize = zone->present_pages;
+		if (zone_end_pfn(zone) > node_end_pfn)
+			node_end_pfn = zone_end_pfn(zone);
+
 
 		/*
 		 * Adjust freesize so that it accounts for how much memory
@@ -6278,12 +6287,30 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 		zone_seqlock_init(zone);
 		zone_pcp_init(zone);
 
-		if (!size)
+		/*
+		 * The size of the CMA area is unknown now so we need to
+		 * prepare the memory for the usemap at maximum.
+		 */
+		if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE &&
+			pgdat->node_spanned_pages) {
+			movable_size = node_end_pfn - pgdat->node_start_pfn;
+		}
+
+		if (!size && !movable_size)
 			continue;
 
 		set_pageblock_order();
-		setup_usemap(pgdat, zone, zone_start_pfn, size);
-		init_currently_empty_zone(zone, zone_start_pfn, size);
+		if (movable_size) {
+			zone->zone_start_pfn = pgdat->node_start_pfn;
+			zone->spanned_pages = movable_size;
+			setup_usemap(pgdat, zone,
+				pgdat->node_start_pfn, movable_size);
+			init_currently_empty_zone(zone,
+				pgdat->node_start_pfn, movable_size);
+		} else {
+			setup_usemap(pgdat, zone, zone_start_pfn, size);
+			init_currently_empty_zone(zone, zone_start_pfn, size);
+		}
 		memmap_init(size, nid, j, zone_start_pfn);
 	}
 }
@@ -7125,13 +7152,15 @@ static void setup_per_zone_lowmem_reserve(void)
 				struct zone *lower_zone;
 
 				idx--;
-
-				if (sysctl_lowmem_reserve_ratio[idx] < 1)
-					sysctl_lowmem_reserve_ratio[idx] = 1;
-
 				lower_zone = pgdat->node_zones + idx;
-				lower_zone->lowmem_reserve[j] = managed_pages /
-					sysctl_lowmem_reserve_ratio[idx];
+
+				if (sysctl_lowmem_reserve_ratio[idx] < 1) {
+					sysctl_lowmem_reserve_ratio[idx] = 0;
+					lower_zone->lowmem_reserve[j] = 0;
+				} else {
+					lower_zone->lowmem_reserve[j] =
+						managed_pages / sysctl_lowmem_reserve_ratio[idx];
+				}
 				managed_pages += lower_zone->managed_pages;
 			}
 		}
@@ -7922,7 +7951,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 }
 #endif
 
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA
 /*
  * The zone indicated has a new number of managed_pages; batch sizes and percpu
  * page high values need to be recalulated.
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 61dee77bb211..43e085608846 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -309,8 +309,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 	return pfn < end_pfn ? -EBUSY : 0;
 }
 
-struct page *alloc_migrate_target(struct page *page, unsigned long private,
-				  int **resultp)
+struct page *alloc_migrate_target(struct page *page, unsigned long private)
 {
 	return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
 }
diff --git a/mm/readahead.c b/mm/readahead.c
index 4d57b4644f98..539bbb6c1fad 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -175,7 +175,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			break;
 
 		rcu_read_lock();
-		page = radix_tree_lookup(&mapping->page_tree, page_offset);
+		page = radix_tree_lookup(&mapping->i_pages, page_offset);
 		rcu_read_unlock();
 		if (page && !radix_tree_exceptional_entry(page))
 			continue;
diff --git a/mm/rmap.c b/mm/rmap.c
index 9122787c4947..f0dd4e4565bc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -32,11 +32,11 @@
  *                 mmlist_lock (in mmput, drain_mmlist and others)
  *                 mapping->private_lock (in __set_page_dirty_buffers)
  *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
- *                     mapping->tree_lock (widely used)
+ *                     i_pages lock (widely used)
  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
- *                   mapping->tree_lock (widely used, in set_page_dirty,
+ *                   i_pages lock (widely used, in set_page_dirty,
  *                             in arch-dependent flush_dcache_mmap_lock,
  *                             within bdi.wb->list_lock in __sync_single_inode)
  *
diff --git a/mm/shmem.c b/mm/shmem.c
index 4424fc0c33aa..9d6c7e595415 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -332,12 +332,12 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
 
 	VM_BUG_ON(!expected);
 	VM_BUG_ON(!replacement);
-	item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot);
+	item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot);
 	if (!item)
 		return -ENOENT;
 	if (item != expected)
 		return -ENOENT;
-	__radix_tree_replace(&mapping->page_tree, node, pslot,
+	__radix_tree_replace(&mapping->i_pages, node, pslot,
 			     replacement, NULL);
 	return 0;
 }
@@ -355,7 +355,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 	void *item;
 
 	rcu_read_lock();
-	item = radix_tree_lookup(&mapping->page_tree, index);
+	item = radix_tree_lookup(&mapping->i_pages, index);
 	rcu_read_unlock();
 	return item == swp_to_radix_entry(swap);
 }
@@ -590,14 +590,14 @@ static int shmem_add_to_page_cache(struct page *page,
 	page->mapping = mapping;
 	page->index = index;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	if (PageTransHuge(page)) {
 		void __rcu **results;
 		pgoff_t idx;
 		int i;
 
 		error = 0;
-		if (radix_tree_gang_lookup_slot(&mapping->page_tree,
+		if (radix_tree_gang_lookup_slot(&mapping->i_pages,
 					&results, &idx, index, 1) &&
 				idx < index + HPAGE_PMD_NR) {
 			error = -EEXIST;
@@ -605,14 +605,14 @@ static int shmem_add_to_page_cache(struct page *page,
 
 		if (!error) {
 			for (i = 0; i < HPAGE_PMD_NR; i++) {
-				error = radix_tree_insert(&mapping->page_tree,
+				error = radix_tree_insert(&mapping->i_pages,
 						index + i, page + i);
 				VM_BUG_ON(error);
 			}
 			count_vm_event(THP_FILE_ALLOC);
 		}
 	} else if (!expected) {
-		error = radix_tree_insert(&mapping->page_tree, index, page);
+		error = radix_tree_insert(&mapping->i_pages, index, page);
 	} else {
 		error = shmem_radix_tree_replace(mapping, index, expected,
 								 page);
@@ -624,10 +624,10 @@ static int shmem_add_to_page_cache(struct page *page,
 			__inc_node_page_state(page, NR_SHMEM_THPS);
 		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 	} else {
 		page->mapping = NULL;
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		page_ref_sub(page, nr);
 	}
 	return error;
@@ -643,13 +643,13 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 
 	VM_BUG_ON_PAGE(PageCompound(page), page);
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
 	page->mapping = NULL;
 	mapping->nrpages--;
 	__dec_node_page_state(page, NR_FILE_PAGES);
 	__dec_node_page_state(page, NR_SHMEM);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	put_page(page);
 	BUG_ON(error);
 }
@@ -662,9 +662,9 @@ static int shmem_free_swap(struct address_space *mapping,
 {
 	void *old;
 
-	spin_lock_irq(&mapping->tree_lock);
-	old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
+	old = radix_tree_delete_item(&mapping->i_pages, index, radswap);
+	xa_unlock_irq(&mapping->i_pages);
 	if (old != radswap)
 		return -ENOENT;
 	free_swap_and_cache(radix_to_swp_entry(radswap));
@@ -675,7 +675,7 @@ static int shmem_free_swap(struct address_space *mapping,
  * Determine (in bytes) how many of the shmem object's pages mapped by the
  * given offsets are swapped out.
  *
- * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
  * as long as the inode doesn't go away and racy results are not a problem.
  */
 unsigned long shmem_partial_swap_usage(struct address_space *mapping,
@@ -688,7 +688,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 
 	rcu_read_lock();
 
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		if (iter.index >= end)
 			break;
 
@@ -717,7 +717,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
  * Determine (in bytes) how many of the shmem object's pages mapped by the
  * given vma is swapped out.
  *
- * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
  * as long as the inode doesn't go away and racy results are not a problem.
  */
 unsigned long shmem_swap_usage(struct vm_area_struct *vma)
@@ -1132,7 +1132,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 	int error = 0;
 
 	radswap = swp_to_radix_entry(swap);
-	index = find_swap_entry(&mapping->page_tree, radswap);
+	index = find_swap_entry(&mapping->i_pages, radswap);
 	if (index == -1)
 		return -EAGAIN;	/* tell shmem_unuse we found nothing */
 
@@ -1448,7 +1448,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	hindex = round_down(index, HPAGE_PMD_NR);
 	rcu_read_lock();
-	if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx,
+	if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx,
 				hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
 		rcu_read_unlock();
 		return NULL;
@@ -1561,14 +1561,14 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 	 * Our caller will very soon move newpage out of swapcache, but it's
 	 * a nice clean interface for us to replace oldpage by newpage there.
 	 */
-	spin_lock_irq(&swap_mapping->tree_lock);
+	xa_lock_irq(&swap_mapping->i_pages);
 	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
 								   newpage);
 	if (!error) {
 		__inc_node_page_state(newpage, NR_FILE_PAGES);
 		__dec_node_page_state(oldpage, NR_FILE_PAGES);
 	}
-	spin_unlock_irq(&swap_mapping->tree_lock);
+	xa_unlock_irq(&swap_mapping->i_pages);
 
 	if (unlikely(error)) {
 		/*
@@ -2634,7 +2634,7 @@ static void shmem_tag_pins(struct address_space *mapping)
 	start = 0;
 	rcu_read_lock();
 
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
 			if (radix_tree_deref_retry(page)) {
@@ -2642,10 +2642,10 @@ static void shmem_tag_pins(struct address_space *mapping)
 				continue;
 			}
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			spin_lock_irq(&mapping->tree_lock);
-			radix_tree_tag_set(&mapping->page_tree, iter.index,
+			xa_lock_irq(&mapping->i_pages);
+			radix_tree_tag_set(&mapping->i_pages, iter.index,
 					   SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 		}
 
 		if (need_resched()) {
@@ -2677,7 +2677,7 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 
 	error = 0;
 	for (scan = 0; scan <= LAST_SCAN; scan++) {
-		if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
+		if (!radix_tree_tagged(&mapping->i_pages, SHMEM_TAG_PINNED))
 			break;
 
 		if (!scan)
@@ -2687,7 +2687,7 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 
 		start = 0;
 		rcu_read_lock();
-		radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
+		radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
 					   start, SHMEM_TAG_PINNED) {
 
 			page = radix_tree_deref_slot(slot);
@@ -2713,10 +2713,10 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 				error = -EBUSY;
 			}
 
-			spin_lock_irq(&mapping->tree_lock);
-			radix_tree_tag_clear(&mapping->page_tree,
+			xa_lock_irq(&mapping->i_pages);
+			radix_tree_tag_clear(&mapping->i_pages,
 					     iter.index, SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 continue_resched:
 			if (need_resched()) {
 				slot = radix_tree_iter_resume(slot, &iter);
diff --git a/mm/slab.c b/mm/slab.c
index e3a9b8e23306..2f308253c3d7 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4086,7 +4086,8 @@ next:
 	next_reap_node();
 out:
 	/* Set up the next iteration */
-	schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC));
+	schedule_delayed_work_on(smp_processor_id(), work,
+				round_jiffies_relative(REAPTIMEOUT_AC));
 }
 
 void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
diff --git a/mm/slub.c b/mm/slub.c
index 4fb037c98782..44aa7847324a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1363,10 +1363,8 @@ static __always_inline void kfree_hook(void *x)
 	kasan_kfree_large(x, _RET_IP_);
 }
 
-static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x)
+static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
 {
-	void *freeptr;
-
 	kmemleak_free_recursive(x, s->flags);
 
 	/*
@@ -1386,17 +1384,12 @@ static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x)
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(x, s->object_size);
 
-	freeptr = get_freepointer(s, x);
-	/*
-	 * kasan_slab_free() may put x into memory quarantine, delaying its
-	 * reuse. In this case the object's freelist pointer is changed.
-	 */
-	kasan_slab_free(s, x, _RET_IP_);
-	return freeptr;
+	/* KASAN might put x into memory quarantine, delaying its reuse */
+	return kasan_slab_free(s, x, _RET_IP_);
 }
 
-static inline void slab_free_freelist_hook(struct kmem_cache *s,
-					   void *head, void *tail)
+static inline bool slab_free_freelist_hook(struct kmem_cache *s,
+					   void **head, void **tail)
 {
 /*
  * Compiler cannot detect this function can be removed if slab_free_hook()
@@ -1407,13 +1400,33 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s,
 	defined(CONFIG_DEBUG_OBJECTS_FREE) ||	\
 	defined(CONFIG_KASAN)
 
-	void *object = head;
-	void *tail_obj = tail ? : head;
-	void *freeptr;
+	void *object;
+	void *next = *head;
+	void *old_tail = *tail ? *tail : *head;
+
+	/* Head and tail of the reconstructed freelist */
+	*head = NULL;
+	*tail = NULL;
 
 	do {
-		freeptr = slab_free_hook(s, object);
-	} while ((object != tail_obj) && (object = freeptr));
+		object = next;
+		next = get_freepointer(s, object);
+		/* If object's reuse doesn't have to be delayed */
+		if (!slab_free_hook(s, object)) {
+			/* Move object to the new freelist */
+			set_freepointer(s, object, *head);
+			*head = object;
+			if (!*tail)
+				*tail = object;
+		}
+	} while (object != old_tail);
+
+	if (*head == *tail)
+		*tail = NULL;
+
+	return *head != NULL;
+#else
+	return true;
 #endif
 }
 
@@ -2968,14 +2981,12 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
 				      void *head, void *tail, int cnt,
 				      unsigned long addr)
 {
-	slab_free_freelist_hook(s, head, tail);
 	/*
-	 * slab_free_freelist_hook() could have put the items into quarantine.
-	 * If so, no need to free them.
+	 * With KASAN enabled slab_free_freelist_hook modifies the freelist
+	 * to remove objects, whose reuse must be delayed.
 	 */
-	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
-		return;
-	do_slab_free(s, page, head, tail, cnt, addr);
+	if (slab_free_freelist_hook(s, &head, &tail))
+		do_slab_free(s, page, head, tail, cnt, addr);
 }
 
 #ifdef CONFIG_KASAN
diff --git a/mm/swap_state.c b/mm/swap_state.c
index f233dccd3b1b..07f9aa2340c3 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -124,10 +124,10 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 	SetPageSwapCache(page);
 
 	address_space = swap_address_space(entry);
-	spin_lock_irq(&address_space->tree_lock);
+	xa_lock_irq(&address_space->i_pages);
 	for (i = 0; i < nr; i++) {
 		set_page_private(page + i, entry.val + i);
-		error = radix_tree_insert(&address_space->page_tree,
+		error = radix_tree_insert(&address_space->i_pages,
 					  idx + i, page + i);
 		if (unlikely(error))
 			break;
@@ -145,13 +145,13 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 		VM_BUG_ON(error == -EEXIST);
 		set_page_private(page + i, 0UL);
 		while (i--) {
-			radix_tree_delete(&address_space->page_tree, idx + i);
+			radix_tree_delete(&address_space->i_pages, idx + i);
 			set_page_private(page + i, 0UL);
 		}
 		ClearPageSwapCache(page);
 		page_ref_sub(page, nr);
 	}
-	spin_unlock_irq(&address_space->tree_lock);
+	xa_unlock_irq(&address_space->i_pages);
 
 	return error;
 }
@@ -188,7 +188,7 @@ void __delete_from_swap_cache(struct page *page)
 	address_space = swap_address_space(entry);
 	idx = swp_offset(entry);
 	for (i = 0; i < nr; i++) {
-		radix_tree_delete(&address_space->page_tree, idx + i);
+		radix_tree_delete(&address_space->i_pages, idx + i);
 		set_page_private(page + i, 0);
 	}
 	ClearPageSwapCache(page);
@@ -272,9 +272,9 @@ void delete_from_swap_cache(struct page *page)
 	entry.val = page_private(page);
 
 	address_space = swap_address_space(entry);
-	spin_lock_irq(&address_space->tree_lock);
+	xa_lock_irq(&address_space->i_pages);
 	__delete_from_swap_cache(page);
-	spin_unlock_irq(&address_space->tree_lock);
+	xa_unlock_irq(&address_space->i_pages);
 
 	put_swap_page(page, entry);
 	page_ref_sub(page, hpage_nr_pages(page));
@@ -628,12 +628,11 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 		return -ENOMEM;
 	for (i = 0; i < nr; i++) {
 		space = spaces + i;
-		INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
+		INIT_RADIX_TREE(&space->i_pages, GFP_ATOMIC|__GFP_NOWARN);
 		atomic_set(&space->i_mmap_writable, 0);
 		space->a_ops = &swap_aops;
 		/* swap cache doesn't use writeback related tags */
 		mapping_set_no_writeback_tags(space);
-		spin_lock_init(&space->tree_lock);
 	}
 	nr_swapper_spaces[type] = nr;
 	rcu_assign_pointer(swapper_spaces[type], spaces);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c7a33717d079..cc2cf04d9018 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -85,7 +85,7 @@ PLIST_HEAD(swap_active_head);
  * is held and the locking order requires swap_lock to be taken
  * before any swap_info_struct->lock.
  */
-struct plist_head *swap_avail_heads;
+static struct plist_head *swap_avail_heads;
 static DEFINE_SPINLOCK(swap_avail_lock);
 
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
@@ -2961,6 +2961,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	maxpages = swp_offset(pte_to_swp_entry(
 			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
 	last_page = swap_header->info.last_page;
+	if (!last_page) {
+		pr_warn("Empty swap-file\n");
+		return 0;
+	}
 	if (last_page > maxpages) {
 		pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
 			maxpages << (PAGE_SHIFT - 10),
diff --git a/mm/truncate.c b/mm/truncate.c
index c34e2fd4f583..1d2fb2dca96f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -36,11 +36,11 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
 	struct radix_tree_node *node;
 	void **slot;
 
-	if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
+	if (!__radix_tree_lookup(&mapping->i_pages, index, &node, &slot))
 		return;
 	if (*slot != entry)
 		return;
-	__radix_tree_replace(&mapping->page_tree, node, slot, NULL,
+	__radix_tree_replace(&mapping->i_pages, node, slot, NULL,
 			     workingset_update_node);
 	mapping->nrexceptional--;
 }
@@ -48,9 +48,9 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
 static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
 			       void *entry)
 {
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	__clear_shadow_entry(mapping, index, entry);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 }
 
 /*
@@ -79,7 +79,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 	dax = dax_mapping(mapping);
 	lock = !dax && indices[j] < end;
 	if (lock)
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 
 	for (i = j; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
@@ -102,7 +102,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 	}
 
 	if (lock)
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 	pvec->nr = j;
 }
 
@@ -518,8 +518,8 @@ void truncate_inode_pages_final(struct address_space *mapping)
 		 * modification that does not see AS_EXITING is
 		 * completed before starting the final truncate.
 		 */
-		spin_lock_irq(&mapping->tree_lock);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
+		xa_unlock_irq(&mapping->i_pages);
 
 		truncate_inode_pages(mapping, 0);
 	}
@@ -627,13 +627,13 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	if (PageDirty(page))
 		goto failed;
 
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -641,7 +641,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	put_page(page);	/* pagecache ref */
 	return 1;
 failed:
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 	return 0;
 }
 
diff --git a/mm/util.c b/mm/util.c
index 029fc2f3b395..45fc3169e7b0 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -287,7 +287,7 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
 }
 
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
 	mm->get_unmapped_area = arch_get_unmapped_area;
@@ -297,8 +297,10 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 /*
  * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
  * back to the regular GUP.
- * If the architecture not support this function, simply return with no
- * page pinned
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
+ * If the architecture does not support this function, simply return with no
+ * pages pinned.
  */
 int __weak __get_user_pages_fast(unsigned long start,
 				 int nr_pages, int write, struct page **pages)
@@ -668,6 +670,13 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		free += global_node_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
+		 * Part of the kernel memory, which can be released
+		 * under memory pressure.
+		 */
+		free += global_node_page_state(
+			NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
+
+		/*
 		 * Leave reserved pages. The pages are not for anonymous pages.
 		 */
 		if (free <= totalreserve_pages)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4390a8d5be41..8b920ce3ae02 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -116,6 +116,16 @@ struct scan_control {
 
 	/* Number of pages freed so far during a call to shrink_zones() */
 	unsigned long nr_reclaimed;
+
+	struct {
+		unsigned int dirty;
+		unsigned int unqueued_dirty;
+		unsigned int congested;
+		unsigned int writeback;
+		unsigned int immediate;
+		unsigned int file_taken;
+		unsigned int taken;
+	} nr;
 };
 
 #ifdef ARCH_HAS_PREFETCH
@@ -190,6 +200,29 @@ static bool sane_reclaim(struct scan_control *sc)
 #endif
 	return false;
 }
+
+static void set_memcg_congestion(pg_data_t *pgdat,
+				struct mem_cgroup *memcg,
+				bool congested)
+{
+	struct mem_cgroup_per_node *mn;
+
+	if (!memcg)
+		return;
+
+	mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+	WRITE_ONCE(mn->congested, congested);
+}
+
+static bool memcg_congested(pg_data_t *pgdat,
+			struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_per_node *mn;
+
+	mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+	return READ_ONCE(mn->congested);
+
+}
 #else
 static bool global_reclaim(struct scan_control *sc)
 {
@@ -200,6 +233,18 @@ static bool sane_reclaim(struct scan_control *sc)
 {
 	return true;
 }
+
+static inline void set_memcg_congestion(struct pglist_data *pgdat,
+				struct mem_cgroup *memcg, bool congested)
+{
+}
+
+static inline bool memcg_congested(struct pglist_data *pgdat,
+			struct mem_cgroup *memcg)
+{
+	return false;
+
+}
 #endif
 
 /*
@@ -648,7 +693,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	/*
 	 * The non racy check for a busy page.
 	 *
@@ -672,7 +717,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	 * load is not satisfied before that of page->_refcount.
 	 *
 	 * Note that if SetPageDirty is always performed via set_page_dirty,
-	 * and thus under tree_lock, then this ordering is not required.
+	 * and thus under the i_pages lock, then this ordering is not required.
 	 */
 	if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
 		refcount = 1 + HPAGE_PMD_NR;
@@ -690,7 +735,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		swp_entry_t swap = { .val = page_private(page) };
 		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		put_swap_page(page, swap);
 	} else {
 		void (*freepage)(struct page *);
@@ -711,13 +756,13 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		 * only page cache pages found in these are zero pages
 		 * covering holes, and because we don't want to mix DAX
 		 * exceptional entries and shadow exceptional entries in the
-		 * same page_tree.
+		 * same address_space.
 		 */
 		if (reclaimed && page_is_file_cache(page) &&
 		    !mapping_exiting(mapping) && !dax_mapping(mapping))
 			shadow = workingset_eviction(mapping, page);
 		__delete_from_page_cache(page, shadow);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -726,7 +771,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	return 1;
 
 cannot_free:
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 	return 0;
 }
 
@@ -857,17 +902,6 @@ static void page_check_dirty_writeback(struct page *page,
 		mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
 }
 
-struct reclaim_stat {
-	unsigned nr_dirty;
-	unsigned nr_unqueued_dirty;
-	unsigned nr_congested;
-	unsigned nr_writeback;
-	unsigned nr_immediate;
-	unsigned nr_activate;
-	unsigned nr_ref_keep;
-	unsigned nr_unmap_fail;
-};
-
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -926,7 +960,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
 
 		/*
-		 * The number of dirty pages determines if a zone is marked
+		 * The number of dirty pages determines if a node is marked
 		 * reclaim_congested which affects wait_iff_congested. kswapd
 		 * will stall and start writing pages if the tail of the LRU
 		 * is all dirty unqueued pages.
@@ -1755,23 +1789,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	free_unref_page_list(&page_list);
 
 	/*
-	 * If reclaim is isolating dirty pages under writeback, it implies
-	 * that the long-lived page allocation rate is exceeding the page
-	 * laundering rate. Either the global limits are not being effective
-	 * at throttling processes due to the page distribution throughout
-	 * zones or there is heavy usage of a slow backing device. The
-	 * only option is to throttle from reclaim context which is not ideal
-	 * as there is no guarantee the dirtying process is throttled in the
-	 * same way balance_dirty_pages() manages.
-	 *
-	 * Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number
-	 * of pages under pages flagged for immediate reclaim and stall if any
-	 * are encountered in the nr_immediate check below.
-	 */
-	if (stat.nr_writeback && stat.nr_writeback == nr_taken)
-		set_bit(PGDAT_WRITEBACK, &pgdat->flags);
-
-	/*
 	 * If dirty pages are scanned that are not queued for IO, it
 	 * implies that flushers are not doing their job. This can
 	 * happen when memory pressure pushes dirty pages to the end of
@@ -1785,48 +1802,17 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	if (stat.nr_unqueued_dirty == nr_taken)
 		wakeup_flusher_threads(WB_REASON_VMSCAN);
 
-	/*
-	 * Legacy memcg will stall in page writeback so avoid forcibly
-	 * stalling here.
-	 */
-	if (sane_reclaim(sc)) {
-		/*
-		 * Tag a zone as congested if all the dirty pages scanned were
-		 * backed by a congested BDI and wait_iff_congested will stall.
-		 */
-		if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
-			set_bit(PGDAT_CONGESTED, &pgdat->flags);
-
-		/* Allow kswapd to start writing pages during reclaim. */
-		if (stat.nr_unqueued_dirty == nr_taken)
-			set_bit(PGDAT_DIRTY, &pgdat->flags);
-
-		/*
-		 * If kswapd scans pages marked marked for immediate
-		 * reclaim and under writeback (nr_immediate), it implies
-		 * that pages are cycling through the LRU faster than
-		 * they are written so also forcibly stall.
-		 */
-		if (stat.nr_immediate && current_may_throttle())
-			congestion_wait(BLK_RW_ASYNC, HZ/10);
-	}
-
-	/*
-	 * Stall direct reclaim for IO completions if underlying BDIs or zone
-	 * is congested. Allow kswapd to continue until it starts encountering
-	 * unqueued dirty pages or cycling through the LRU too quickly.
-	 */
-	if (!sc->hibernation_mode && !current_is_kswapd() &&
-	    current_may_throttle())
-		wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10);
+	sc->nr.dirty += stat.nr_dirty;
+	sc->nr.congested += stat.nr_congested;
+	sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
+	sc->nr.writeback += stat.nr_writeback;
+	sc->nr.immediate += stat.nr_immediate;
+	sc->nr.taken += nr_taken;
+	if (file)
+		sc->nr.file_taken += nr_taken;
 
 	trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
-			nr_scanned, nr_reclaimed,
-			stat.nr_dirty,  stat.nr_writeback,
-			stat.nr_congested, stat.nr_immediate,
-			stat.nr_activate, stat.nr_ref_keep,
-			stat.nr_unmap_fail,
-			sc->priority, file);
+			nr_scanned, nr_reclaimed, &stat, sc->priority, file);
 	return nr_reclaimed;
 }
 
@@ -2507,6 +2493,12 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 	return true;
 }
 
+static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
+{
+	return test_bit(PGDAT_CONGESTED, &pgdat->flags) ||
+		(memcg && memcg_congested(pgdat, memcg));
+}
+
 static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 {
 	struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2522,6 +2514,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 		unsigned long node_lru_pages = 0;
 		struct mem_cgroup *memcg;
 
+		memset(&sc->nr, 0, sizeof(sc->nr));
+
 		nr_reclaimed = sc->nr_reclaimed;
 		nr_scanned = sc->nr_scanned;
 
@@ -2536,7 +2530,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 					sc->memcg_low_skipped = 1;
 					continue;
 				}
-				mem_cgroup_event(memcg, MEMCG_LOW);
+				memcg_memory_event(memcg, MEMCG_LOW);
 			}
 
 			reclaimed = sc->nr_reclaimed;
@@ -2587,6 +2581,67 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 		if (sc->nr_reclaimed - nr_reclaimed)
 			reclaimable = true;
 
+		if (current_is_kswapd()) {
+			/*
+			 * If reclaim is isolating dirty pages under writeback,
+			 * it implies that the long-lived page allocation rate
+			 * is exceeding the page laundering rate. Either the
+			 * global limits are not being effective at throttling
+			 * processes due to the page distribution throughout
+			 * zones or there is heavy usage of a slow backing
+			 * device. The only option is to throttle from reclaim
+			 * context which is not ideal as there is no guarantee
+			 * the dirtying process is throttled in the same way
+			 * balance_dirty_pages() manages.
+			 *
+			 * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+			 * count the number of pages under pages flagged for
+			 * immediate reclaim and stall if any are encountered
+			 * in the nr_immediate check below.
+			 */
+			if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+				set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+
+			/*
+			 * Tag a node as congested if all the dirty pages
+			 * scanned were backed by a congested BDI and
+			 * wait_iff_congested will stall.
+			 */
+			if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+				set_bit(PGDAT_CONGESTED, &pgdat->flags);
+
+			/* Allow kswapd to start writing pages during reclaim.*/
+			if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+				set_bit(PGDAT_DIRTY, &pgdat->flags);
+
+			/*
+			 * If kswapd scans pages marked marked for immediate
+			 * reclaim and under writeback (nr_immediate), it
+			 * implies that pages are cycling through the LRU
+			 * faster than they are written so also forcibly stall.
+			 */
+			if (sc->nr.immediate)
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
+		}
+
+		/*
+		 * Legacy memcg will stall in page writeback so avoid forcibly
+		 * stalling in wait_iff_congested().
+		 */
+		if (!global_reclaim(sc) && sane_reclaim(sc) &&
+		    sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+			set_memcg_congestion(pgdat, root, true);
+
+		/*
+		 * Stall direct reclaim for IO completions if underlying BDIs
+		 * and node is congested. Allow kswapd to continue until it
+		 * starts encountering unqueued dirty pages or cycling through
+		 * the LRU too quickly.
+		 */
+		if (!sc->hibernation_mode && !current_is_kswapd() &&
+		   current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+			wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+
 	} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
 					 sc->nr_scanned - nr_scanned, sc));
 
@@ -2802,6 +2857,7 @@ retry:
 			continue;
 		last_pgdat = zone->zone_pgdat;
 		snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+		set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false);
 	}
 
 	delayacct_freepages_end();
@@ -3808,7 +3864,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 
 	if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
 		/*
-		 * Free memory by calling shrink zone with increasing
+		 * Free memory by calling shrink node with increasing
 		 * priorities until we have enough memory freed.
 		 */
 		do {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 33581be705f0..536332e988b8 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1161,6 +1161,7 @@ const char * const vmstat_text[] = {
 	"nr_vmscan_immediate_reclaim",
 	"nr_dirtied",
 	"nr_written",
+	"nr_indirectly_reclaimable",
 
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
diff --git a/mm/workingset.c b/mm/workingset.c
index b7d616a3bbbe..40ee02c83978 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -202,7 +202,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
  * @mapping: address space the page was backing
  * @page: the page being evicted
  *
- * Returns a shadow entry to be stored in @mapping->page_tree in place
+ * Returns a shadow entry to be stored in @mapping->i_pages in place
  * of the evicted @page so that a later refault can be detected.
  */
 void *workingset_eviction(struct address_space *mapping, struct page *page)
@@ -348,7 +348,7 @@ void workingset_update_node(struct radix_tree_node *node)
 	 *
 	 * Avoid acquiring the list_lru lock when the nodes are
 	 * already where they should be. The list_empty() test is safe
-	 * as node->private_list is protected by &mapping->tree_lock.
+	 * as node->private_list is protected by the i_pages lock.
 	 */
 	if (node->count && node->count == node->exceptional) {
 		if (list_empty(&node->private_list))
@@ -366,7 +366,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	unsigned long nodes;
 	unsigned long cache;
 
-	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
+	/* list_lru lock nests inside the IRQ-safe i_pages lock */
 	local_irq_disable();
 	nodes = list_lru_shrink_count(&shadow_nodes, sc);
 	local_irq_enable();
@@ -419,21 +419,21 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 
 	/*
 	 * Page cache insertions and deletions synchroneously maintain
-	 * the shadow node LRU under the mapping->tree_lock and the
+	 * the shadow node LRU under the i_pages lock and the
 	 * lru_lock.  Because the page cache tree is emptied before
 	 * the inode can be destroyed, holding the lru_lock pins any
 	 * address_space that has radix tree nodes on the LRU.
 	 *
-	 * We can then safely transition to the mapping->tree_lock to
+	 * We can then safely transition to the i_pages lock to
 	 * pin only the address_space of the particular node we want
 	 * to reclaim, take the node off-LRU, and drop the lru_lock.
 	 */
 
 	node = container_of(item, struct radix_tree_node, private_list);
-	mapping = container_of(node->root, struct address_space, page_tree);
+	mapping = container_of(node->root, struct address_space, i_pages);
 
 	/* Coming from the list, invert the lock order */
-	if (!spin_trylock(&mapping->tree_lock)) {
+	if (!xa_trylock(&mapping->i_pages)) {
 		spin_unlock(lru_lock);
 		ret = LRU_RETRY;
 		goto out;
@@ -468,11 +468,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	if (WARN_ON_ONCE(node->exceptional))
 		goto out_invalid;
 	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
-	__radix_tree_delete_node(&mapping->page_tree, node,
+	__radix_tree_delete_node(&mapping->i_pages, node,
 				 workingset_lookup_update(mapping));
 
 out_invalid:
-	spin_unlock(&mapping->tree_lock);
+	xa_unlock(&mapping->i_pages);
 	ret = LRU_REMOVED_RETRY;
 out:
 	local_irq_enable();
@@ -487,7 +487,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
 {
 	unsigned long ret;
 
-	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
+	/* list_lru lock nests inside the IRQ-safe i_pages lock */
 	local_irq_disable();
 	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
 	local_irq_enable();
@@ -503,7 +503,7 @@ static struct shrinker workingset_shadow_shrinker = {
 
 /*
  * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe
- * mapping->tree_lock.
+ * i_pages lock.
  */
 static struct lock_class_key shadow_nodes_key;
 
diff --git a/mm/z3fold.c b/mm/z3fold.c
index f579ad4a8100..c0bca6153b95 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -467,6 +467,8 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 	spin_lock_init(&pool->lock);
 	spin_lock_init(&pool->stale_lock);
 	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
+	if (!pool->unbuddied)
+		goto out_pool;
 	for_each_possible_cpu(cpu) {
 		struct list_head *unbuddied =
 				per_cpu_ptr(pool->unbuddied, cpu);
@@ -479,7 +481,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 	pool->name = name;
 	pool->compact_wq = create_singlethread_workqueue(pool->name);
 	if (!pool->compact_wq)
-		goto out;
+		goto out_unbuddied;
 	pool->release_wq = create_singlethread_workqueue(pool->name);
 	if (!pool->release_wq)
 		goto out_wq;
@@ -489,8 +491,11 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 
 out_wq:
 	destroy_workqueue(pool->compact_wq);
-out:
+out_unbuddied:
+	free_percpu(pool->unbuddied);
+out_pool:
 	kfree(pool);
+out:
 	return NULL;
 }
 
@@ -533,7 +538,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 	struct z3fold_header *zhdr = NULL;
 	struct page *page = NULL;
 	enum buddy bud;
-	bool can_sleep = (gfp & __GFP_RECLAIM) == __GFP_RECLAIM;
+	bool can_sleep = gfpflags_allow_blocking(gfp);
 
 	if (!size || (gfp & __GFP_HIGHMEM))
 		return -EINVAL;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a9682534c377..45ff5dc124cc 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -749,18 +749,31 @@ static bool conn_use_rpa(struct hci_conn *conn)
 }
 
 static void hci_req_add_le_create_conn(struct hci_request *req,
-				       struct hci_conn *conn)
+				       struct hci_conn *conn,
+				       bdaddr_t *direct_rpa)
 {
 	struct hci_cp_le_create_conn cp;
 	struct hci_dev *hdev = conn->hdev;
 	u8 own_addr_type;
 
-	/* Update random address, but set require_privacy to false so
-	 * that we never connect with an non-resolvable address.
+	/* If direct address was provided we use it instead of current
+	 * address.
 	 */
-	if (hci_update_random_address(req, false, conn_use_rpa(conn),
-				      &own_addr_type))
-		return;
+	if (direct_rpa) {
+		if (bacmp(&req->hdev->random_addr, direct_rpa))
+			hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
+								direct_rpa);
+
+		/* direct address is always RPA */
+		own_addr_type = ADDR_LE_DEV_RANDOM;
+	} else {
+		/* Update random address, but set require_privacy to false so
+		 * that we never connect with an non-resolvable address.
+		 */
+		if (hci_update_random_address(req, false, conn_use_rpa(conn),
+					      &own_addr_type))
+			return;
+	}
 
 	memset(&cp, 0, sizeof(cp));
 
@@ -825,7 +838,7 @@ static void hci_req_directed_advertising(struct hci_request *req,
 
 struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 				u8 dst_type, u8 sec_level, u16 conn_timeout,
-				u8 role)
+				u8 role, bdaddr_t *direct_rpa)
 {
 	struct hci_conn_params *params;
 	struct hci_conn *conn;
@@ -940,7 +953,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 		hci_dev_set_flag(hdev, HCI_LE_SCAN_INTERRUPTED);
 	}
 
-	hci_req_add_le_create_conn(&req, conn);
+	hci_req_add_le_create_conn(&req, conn, direct_rpa);
 
 create_conn:
 	err = hci_req_run(&req, create_le_conn_complete);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cd3bbb766c24..139707cd9d35 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4648,7 +4648,8 @@ static void hci_le_conn_update_complete_evt(struct hci_dev *hdev,
 /* This function requires the caller holds hdev->lock */
 static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 					      bdaddr_t *addr,
-					      u8 addr_type, u8 adv_type)
+					      u8 addr_type, u8 adv_type,
+					      bdaddr_t *direct_rpa)
 {
 	struct hci_conn *conn;
 	struct hci_conn_params *params;
@@ -4699,7 +4700,8 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 	}
 
 	conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
-			      HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER);
+			      HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER,
+			      direct_rpa);
 	if (!IS_ERR(conn)) {
 		/* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
 		 * by higher layer that tried to connect, if no then
@@ -4808,8 +4810,13 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 		bdaddr_type = irk->addr_type;
 	}
 
-	/* Check if we have been requested to connect to this device */
-	conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type);
+	/* Check if we have been requested to connect to this device.
+	 *
+	 * direct_addr is set only for directed advertising reports (it is NULL
+	 * for advertising reports) and is already verified to be RPA above.
+	 */
+	conn = check_pending_le_conn(hdev, bdaddr, bdaddr_type, type,
+								direct_addr);
 	if (conn && type == LE_ADV_IND) {
 		/* Store report for later inclusion by
 		 * mgmt_device_connected
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index fc6615d59165..9b7907ebfa01 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7156,7 +7156,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
 			hcon = hci_connect_le(hdev, dst, dst_type,
 					      chan->sec_level,
 					      HCI_LE_CONN_TIMEOUT,
-					      HCI_ROLE_SLAVE);
+					      HCI_ROLE_SLAVE, NULL);
 		else
 			hcon = hci_connect_le_scan(hdev, dst, dst_type,
 						   chan->sec_level,
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index b4bded4b5396..12bf49772d24 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
 	mon_client.o \
 	cls_lock_client.o \
 	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+	striper.o \
 	debugfs.o \
 	auth.o auth_none.o \
 	crypto.o armor.o \
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4adf07826f4a..584fdbef2088 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,7 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
 	case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
 	case CEPH_MSG_MDS_MAP: return "mds_map";
+	case CEPH_MSG_FS_MAP_USER: return "fs_map_user";
 	case CEPH_MSG_CLIENT_SESSION: return "client_session";
 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
 	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
@@ -79,8 +80,13 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
 	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
 	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+	case CEPH_MSG_CLIENT_QUOTA: return "client_quota";
 	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
 	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+	case CEPH_MSG_POOLOP_REPLY: return "poolop_reply";
+	case CEPH_MSG_POOLOP: return "poolop";
+	case CEPH_MSG_MON_COMMAND: return "mon_command";
+	case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack";
 	case CEPH_MSG_OSD_MAP: return "osd_map";
 	case CEPH_MSG_OSD_OP: return "osd_op";
 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
@@ -217,7 +223,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 
 	if (i == 16)
 		err = 0;
-	dout("parse_fsid ret %d got fsid %pU", err, fsid);
+	dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
 	return err;
 }
 
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index bf9d079cbafd..02172c408ff2 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -347,10 +347,12 @@ struct key_type key_type_ceph = {
 	.destroy	= ceph_key_destroy,
 };
 
-int ceph_crypto_init(void) {
+int __init ceph_crypto_init(void)
+{
 	return register_key_type(&key_type_ceph);
 }
 
-void ceph_crypto_shutdown(void) {
+void ceph_crypto_shutdown(void)
+{
 	unregister_key_type(&key_type_ceph);
 }
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 1eef6806aa1a..02952605d121 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show)
 CEPH_DEFINE_SHOW_FUNC(osdc_show)
 CEPH_DEFINE_SHOW_FUNC(client_options_show)
 
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
 {
 	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
 	if (!ceph_debugfs_dir)
@@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->monc.debugfs_file = debugfs_create_file("monc",
-						      0600,
+						      0400,
 						      client->debugfs_dir,
 						      client,
 						      &monc_show_fops);
@@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->osdc.debugfs_file = debugfs_create_file("osdc",
-						      0600,
+						      0400,
 						      client->debugfs_dir,
 						      client,
 						      &osdc_show_fops);
@@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->debugfs_monmap = debugfs_create_file("monmap",
-					0600,
+					0400,
 					client->debugfs_dir,
 					client,
 					&monmap_show_fops);
@@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->debugfs_osdmap = debugfs_create_file("osdmap",
-					0600,
+					0400,
 					client->debugfs_dir,
 					client,
 					&osdmap_show_fops);
@@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->debugfs_options = debugfs_create_file("client_options",
-					0600,
+					0400,
 					client->debugfs_dir,
 					client,
 					&client_options_show_fops);
@@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
 
 #else  /* CONFIG_DEBUG_FS */
 
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
 {
 	return 0;
 }
@@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
 }
 
 #endif  /* CONFIG_DEBUG_FS */
-
-EXPORT_SYMBOL(ceph_debugfs_init);
-EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8a4d3758030b..fcb40c12b1f8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void)
 	ceph_msgr_slab_exit();
 }
 
-int ceph_msgr_init(void)
+int __init ceph_msgr_init(void)
 {
 	if (ceph_msgr_slab_init())
 		return -ENOMEM;
@@ -299,7 +299,6 @@ int ceph_msgr_init(void)
 
 	return -ENOMEM;
 }
-EXPORT_SYMBOL(ceph_msgr_init);
 
 void ceph_msgr_exit(void)
 {
@@ -307,7 +306,6 @@ void ceph_msgr_exit(void)
 
 	_ceph_msgr_exit();
 }
-EXPORT_SYMBOL(ceph_msgr_exit);
 
 void ceph_msgr_flush(void)
 {
@@ -839,93 +837,112 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
 					size_t length)
 {
 	struct ceph_msg_data *data = cursor->data;
-	struct bio *bio;
+	struct ceph_bio_iter *it = &cursor->bio_iter;
 
-	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+	cursor->resid = min_t(size_t, length, data->bio_length);
+	*it = data->bio_pos;
+	if (cursor->resid < it->iter.bi_size)
+		it->iter.bi_size = cursor->resid;
 
-	bio = data->bio;
-	BUG_ON(!bio);
-
-	cursor->resid = min(length, data->bio_length);
-	cursor->bio = bio;
-	cursor->bvec_iter = bio->bi_iter;
-	cursor->last_piece =
-		cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
+	BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+	cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
 }
 
 static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
 						size_t *page_offset,
 						size_t *length)
 {
-	struct ceph_msg_data *data = cursor->data;
-	struct bio *bio;
-	struct bio_vec bio_vec;
-
-	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
-
-	bio = cursor->bio;
-	BUG_ON(!bio);
-
-	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
-
-	*page_offset = (size_t) bio_vec.bv_offset;
-	BUG_ON(*page_offset >= PAGE_SIZE);
-	if (cursor->last_piece) /* pagelist offset is always 0 */
-		*length = cursor->resid;
-	else
-		*length = (size_t) bio_vec.bv_len;
-	BUG_ON(*length > cursor->resid);
-	BUG_ON(*page_offset + *length > PAGE_SIZE);
+	struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
+					   cursor->bio_iter.iter);
 
-	return bio_vec.bv_page;
+	*page_offset = bv.bv_offset;
+	*length = bv.bv_len;
+	return bv.bv_page;
 }
 
 static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
 					size_t bytes)
 {
-	struct bio *bio;
-	struct bio_vec bio_vec;
+	struct ceph_bio_iter *it = &cursor->bio_iter;
 
-	BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+	BUG_ON(bytes > cursor->resid);
+	BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
+	cursor->resid -= bytes;
+	bio_advance_iter(it->bio, &it->iter, bytes);
 
-	bio = cursor->bio;
-	BUG_ON(!bio);
+	if (!cursor->resid) {
+		BUG_ON(!cursor->last_piece);
+		return false;   /* no more data */
+	}
 
-	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+	if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+		return false;	/* more bytes to process in this segment */
 
-	/* Advance the cursor offset */
+	if (!it->iter.bi_size) {
+		it->bio = it->bio->bi_next;
+		it->iter = it->bio->bi_iter;
+		if (cursor->resid < it->iter.bi_size)
+			it->iter.bi_size = cursor->resid;
+	}
 
-	BUG_ON(cursor->resid < bytes);
-	cursor->resid -= bytes;
+	BUG_ON(cursor->last_piece);
+	BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+	cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
+	return true;
+}
+#endif /* CONFIG_BLOCK */
 
-	bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct bio_vec *bvecs = data->bvec_pos.bvecs;
 
-	if (bytes < bio_vec.bv_len)
-		return false;	/* more bytes to process in this segment */
+	cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size);
+	cursor->bvec_iter = data->bvec_pos.iter;
+	cursor->bvec_iter.bi_size = cursor->resid;
 
-	/* Move on to the next segment, and possibly the next bio */
+	BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+	cursor->last_piece =
+	    cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
+}
 
-	if (!cursor->bvec_iter.bi_size) {
-		bio = bio->bi_next;
-		cursor->bio = bio;
-		if (bio)
-			cursor->bvec_iter = bio->bi_iter;
-		else
-			memset(&cursor->bvec_iter, 0,
-			       sizeof(cursor->bvec_iter));
-	}
+static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
+						size_t *page_offset,
+						size_t *length)
+{
+	struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs,
+					   cursor->bvec_iter);
+
+	*page_offset = bv.bv_offset;
+	*length = bv.bv_len;
+	return bv.bv_page;
+}
+
+static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
+					size_t bytes)
+{
+	struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+
+	BUG_ON(bytes > cursor->resid);
+	BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
+	cursor->resid -= bytes;
+	bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
 
-	if (!cursor->last_piece) {
-		BUG_ON(!cursor->resid);
-		BUG_ON(!bio);
-		/* A short read is OK, so use <= rather than == */
-		if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
-			cursor->last_piece = true;
+	if (!cursor->resid) {
+		BUG_ON(!cursor->last_piece);
+		return false;   /* no more data */
 	}
 
+	if (!bytes || cursor->bvec_iter.bi_bvec_done)
+		return false;	/* more bytes to process in this segment */
+
+	BUG_ON(cursor->last_piece);
+	BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+	cursor->last_piece =
+	    cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
 	return true;
 }
-#endif /* CONFIG_BLOCK */
 
 /*
  * For a page array, a piece comes from the first page in the array
@@ -1110,6 +1127,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
 		ceph_msg_data_bio_cursor_init(cursor, length);
 		break;
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
+		ceph_msg_data_bvecs_cursor_init(cursor, length);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		/* BUG(); */
@@ -1158,14 +1178,19 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
 		page = ceph_msg_data_bio_next(cursor, page_offset, length);
 		break;
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
+		page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		page = NULL;
 		break;
 	}
+
 	BUG_ON(!page);
 	BUG_ON(*page_offset + *length > PAGE_SIZE);
 	BUG_ON(!*length);
+	BUG_ON(*length > cursor->resid);
 	if (last_piece)
 		*last_piece = cursor->last_piece;
 
@@ -1194,6 +1219,9 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 		new_piece = ceph_msg_data_bio_advance(cursor, bytes);
 		break;
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
+		new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		BUG();
@@ -1575,13 +1603,18 @@ static int write_partial_message_data(struct ceph_connection *con)
 	 * been revoked, so use the zero page.
 	 */
 	crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
-	while (cursor->resid) {
+	while (cursor->total_resid) {
 		struct page *page;
 		size_t page_offset;
 		size_t length;
 		bool last_piece;
 		int ret;
 
+		if (!cursor->resid) {
+			ceph_msg_data_advance(cursor, 0);
+			continue;
+		}
+
 		page = ceph_msg_data_next(cursor, &page_offset, &length,
 					  &last_piece);
 		ret = ceph_tcp_sendpage(con->sock, page, page_offset,
@@ -2297,7 +2330,12 @@ static int read_partial_msg_data(struct ceph_connection *con)
 
 	if (do_datacrc)
 		crc = con->in_data_crc;
-	while (cursor->resid) {
+	while (cursor->total_resid) {
+		if (!cursor->resid) {
+			ceph_msg_data_advance(cursor, 0);
+			continue;
+		}
+
 		page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
 		ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
 		if (ret <= 0) {
@@ -3262,16 +3300,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
 
 #ifdef	CONFIG_BLOCK
-void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
-		size_t length)
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+			   u32 length)
 {
 	struct ceph_msg_data *data;
 
-	BUG_ON(!bio);
-
 	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
 	BUG_ON(!data);
-	data->bio = bio;
+	data->bio_pos = *bio_pos;
 	data->bio_length = length;
 
 	list_add_tail(&data->links, &msg->data);
@@ -3280,6 +3316,20 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
 EXPORT_SYMBOL(ceph_msg_data_add_bio);
 #endif	/* CONFIG_BLOCK */
 
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+			     struct ceph_bvec_iter *bvec_pos)
+{
+	struct ceph_msg_data *data;
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
+	BUG_ON(!data);
+	data->bvec_pos = *bvec_pos;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += bvec_pos->iter.bi_size;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+
 /*
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 1547107f4854..b3dac24412d3 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -60,7 +60,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 	num_mon = ceph_decode_32(&p);
 	ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
 
-	if (num_mon >= CEPH_MAX_MON)
+	if (num_mon > CEPH_MAX_MON)
 		goto bad;
 	m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
 	if (m == NULL)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 2814dba5902d..ea2a6c9fb7ce 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -20,6 +20,7 @@
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
 #include <linux/ceph/pagelist.h>
+#include <linux/ceph/striper.h>
 
 #define OSD_OPREPLY_FRONT_LEN	512
 
@@ -103,13 +104,12 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
 			u64 *objnum, u64 *objoff, u64 *objlen)
 {
 	u64 orig_len = *plen;
-	int r;
+	u32 xlen;
 
 	/* object extent? */
-	r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
-					  objoff, objlen);
-	if (r < 0)
-		return r;
+	ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+					  objoff, &xlen);
+	*objlen = xlen;
 	if (*objlen < orig_len) {
 		*plen = *objlen;
 		dout(" skipping last %llu, final file extent %llu~%llu\n",
@@ -117,7 +117,6 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
 	}
 
 	dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
-
 	return 0;
 }
 
@@ -148,14 +147,22 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
 
 #ifdef CONFIG_BLOCK
 static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
-			struct bio *bio, size_t bio_length)
+				   struct ceph_bio_iter *bio_pos,
+				   u32 bio_length)
 {
 	osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
-	osd_data->bio = bio;
+	osd_data->bio_pos = *bio_pos;
 	osd_data->bio_length = bio_length;
 }
 #endif /* CONFIG_BLOCK */
 
+static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
+				     struct ceph_bvec_iter *bvec_pos)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_BVECS;
+	osd_data->bvec_pos = *bvec_pos;
+}
+
 #define osd_req_op_data(oreq, whch, typ, fld)				\
 ({									\
 	struct ceph_osd_request *__oreq = (oreq);			\
@@ -218,16 +225,29 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
 
 #ifdef CONFIG_BLOCK
 void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
-			unsigned int which, struct bio *bio, size_t bio_length)
+				    unsigned int which,
+				    struct ceph_bio_iter *bio_pos,
+				    u32 bio_length)
 {
 	struct ceph_osd_data *osd_data;
 
 	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
-	ceph_osd_data_bio_init(osd_data, bio, bio_length);
+	ceph_osd_data_bio_init(osd_data, bio_pos, bio_length);
 }
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
 #endif /* CONFIG_BLOCK */
 
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+					 unsigned int which,
+					 struct ceph_bvec_iter *bvec_pos)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_bvecs_init(osd_data, bvec_pos);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+
 static void osd_req_op_cls_request_info_pagelist(
 			struct ceph_osd_request *osd_req,
 			unsigned int which, struct ceph_pagelist *pagelist)
@@ -265,6 +285,23 @@ void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
 }
 EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
 
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+				       unsigned int which,
+				       struct bio_vec *bvecs, u32 bytes)
+{
+	struct ceph_osd_data *osd_data;
+	struct ceph_bvec_iter it = {
+		.bvecs = bvecs,
+		.iter = { .bi_size = bytes },
+	};
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_bvecs_init(osd_data, &it);
+	osd_req->r_ops[which].cls.indata_len += bytes;
+	osd_req->r_ops[which].indata_len += bytes;
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs);
+
 void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
 			unsigned int which, struct page **pages, u64 length,
 			u32 alignment, bool pages_from_pool, bool own_pages)
@@ -290,6 +327,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
 	case CEPH_OSD_DATA_TYPE_BIO:
 		return (u64)osd_data->bio_length;
 #endif /* CONFIG_BLOCK */
+	case CEPH_OSD_DATA_TYPE_BVECS:
+		return osd_data->bvec_pos.iter.bi_size;
 	default:
 		WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
 		return 0;
@@ -828,8 +867,10 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
 		ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
 #ifdef CONFIG_BLOCK
 	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
-		ceph_msg_data_add_bio(msg, osd_data->bio, length);
+		ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length);
 #endif
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
+		ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
 	} else {
 		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
 	}
@@ -5065,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
-int ceph_osdc_setup(void)
+int __init ceph_osdc_setup(void)
 {
 	size_t size = sizeof(struct ceph_osd_request) +
 	    CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
@@ -5076,7 +5117,6 @@ int ceph_osdc_setup(void)
 
 	return ceph_osd_request_cache ? 0 : -ENOMEM;
 }
-EXPORT_SYMBOL(ceph_osdc_setup);
 
 void ceph_osdc_cleanup(void)
 {
@@ -5084,7 +5124,6 @@ void ceph_osdc_cleanup(void)
 	kmem_cache_destroy(ceph_osd_request_cache);
 	ceph_osd_request_cache = NULL;
 }
-EXPORT_SYMBOL(ceph_osdc_cleanup);
 
 /*
  * handle incoming message
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 0da27c66349a..9645ffd6acfb 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -4,7 +4,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/div64.h>
 
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/osdmap.h>
@@ -2141,76 +2140,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
 }
 
 /*
- * calculate file layout from given offset, length.
- * fill in correct oid, logical length, and object extent
- * offset, length.
- *
- * for now, we write only a single su, until we can
- * pass a stride back to the caller.
- */
-int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-				   u64 off, u64 len,
-				   u64 *ono,
-				   u64 *oxoff, u64 *oxlen)
-{
-	u32 osize = layout->object_size;
-	u32 su = layout->stripe_unit;
-	u32 sc = layout->stripe_count;
-	u32 bl, stripeno, stripepos, objsetno;
-	u32 su_per_object;
-	u64 t, su_offset;
-
-	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
-	     osize, su);
-	if (su == 0 || sc == 0)
-		goto invalid;
-	su_per_object = osize / su;
-	if (su_per_object == 0)
-		goto invalid;
-	dout("osize %u / su %u = su_per_object %u\n", osize, su,
-	     su_per_object);
-
-	if ((su & ~PAGE_MASK) != 0)
-		goto invalid;
-
-	/* bl = *off / su; */
-	t = off;
-	do_div(t, su);
-	bl = t;
-	dout("off %llu / su %u = bl %u\n", off, su, bl);
-
-	stripeno = bl / sc;
-	stripepos = bl % sc;
-	objsetno = stripeno / su_per_object;
-
-	*ono = objsetno * sc + stripepos;
-	dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono);
-
-	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
-	t = off;
-	su_offset = do_div(t, su);
-	*oxoff = su_offset + (stripeno % su_per_object) * su;
-
-	/*
-	 * Calculate the length of the extent being written to the selected
-	 * object. This is the minimum of the full length requested (len) or
-	 * the remainder of the current stripe being written to.
-	 */
-	*oxlen = min_t(u64, len, su - su_offset);
-
-	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
-	return 0;
-
-invalid:
-	dout(" invalid layout\n");
-	*ono = 0;
-	*oxoff = 0;
-	*oxlen = 0;
-	return -EINVAL;
-}
-EXPORT_SYMBOL(ceph_calc_file_object_mapping);
-
-/*
  * Map an object into a PG.
  *
  * Should only be called with target_oid and target_oloc (as opposed to
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
new file mode 100644
index 000000000000..c36462dc86b7
--- /dev/null
+++ b/net/ceph/striper.c
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/math64.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/striper.h>
+#include <linux/ceph/types.h>
+
+/*
+ * Map a file extent to a stripe unit within an object.
+ * Fill in objno, offset into object, and object extent length (i.e. the
+ * number of bytes mapped, less than or equal to @l->stripe_unit).
+ *
+ * Example for stripe_count = 3, stripes_per_object = 4:
+ *
+ * blockno   |  0  3  6  9 |  1  4  7 10 |  2  5  8 11 | 12 15 18 21 | 13 16 19
+ * stripeno  |  0  1  2  3 |  0  1  2  3 |  0  1  2  3 |  4  5  6  7 |  4  5  6
+ * stripepos |      0      |      1      |      2      |      0      |      1
+ * objno     |      0      |      1      |      2      |      3      |      4
+ * objsetno  |                    0                    |                    1
+ */
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+				   u64 off, u64 len,
+				   u64 *objno, u64 *objoff, u32 *xlen)
+{
+	u32 stripes_per_object = l->object_size / l->stripe_unit;
+	u64 blockno;	/* which su in the file (i.e. globally) */
+	u32 blockoff;	/* offset into su */
+	u64 stripeno;	/* which stripe */
+	u32 stripepos;	/* which su in the stripe,
+			   which object in the object set */
+	u64 objsetno;	/* which object set */
+	u32 objsetpos;	/* which stripe in the object set */
+
+	blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
+	stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
+	objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
+
+	*objno = objsetno * l->stripe_count + stripepos;
+	*objoff = objsetpos * l->stripe_unit + blockoff;
+	*xlen = min_t(u64, len, l->stripe_unit - blockoff);
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * Return the last extent with given objno (@object_extents is sorted
+ * by objno).  If not found, return NULL and set @add_pos so that the
+ * new extent can be added with list_add(add_pos, new_ex).
+ */
+static struct ceph_object_extent *
+lookup_last(struct list_head *object_extents, u64 objno,
+	    struct list_head **add_pos)
+{
+	struct list_head *pos;
+
+	list_for_each_prev(pos, object_extents) {
+		struct ceph_object_extent *ex =
+		    list_entry(pos, typeof(*ex), oe_item);
+
+		if (ex->oe_objno == objno)
+			return ex;
+
+		if (ex->oe_objno < objno)
+			break;
+	}
+
+	*add_pos = pos;
+	return NULL;
+}
+
+static struct ceph_object_extent *
+lookup_containing(struct list_head *object_extents, u64 objno,
+		  u64 objoff, u32 xlen)
+{
+	struct ceph_object_extent *ex;
+
+	list_for_each_entry(ex, object_extents, oe_item) {
+		if (ex->oe_objno == objno &&
+		    ex->oe_off <= objoff &&
+		    ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
+			return ex;
+
+		if (ex->oe_objno > objno)
+			break;
+	}
+
+	return NULL;
+}
+
+/*
+ * Map a file extent to a sorted list of object extents.
+ *
+ * We want only one (or as few as possible) object extents per object.
+ * Adjacent object extents will be merged together, each returned object
+ * extent may reverse map to multiple different file extents.
+ *
+ * Call @alloc_fn for each new object extent and @action_fn for each
+ * mapped stripe unit, whether it was merged into an already allocated
+ * object extent or started a new object extent.
+ *
+ * Newly allocated object extents are added to @object_extents.
+ * To keep @object_extents sorted, successive calls to this function
+ * must map successive file extents (i.e. the list of file extents that
+ * are mapped using the same @object_extents must be sorted).
+ *
+ * The caller is responsible for @object_extents.
+ */
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 struct ceph_object_extent *alloc_fn(void *arg),
+			 void *alloc_arg,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg)
+{
+	struct ceph_object_extent *last_ex, *ex;
+
+	while (len) {
+		struct list_head *add_pos = NULL;
+		u64 objno, objoff;
+		u32 xlen;
+
+		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+					      &xlen);
+
+		last_ex = lookup_last(object_extents, objno, &add_pos);
+		if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
+			ex = alloc_fn(alloc_arg);
+			if (!ex)
+				return -ENOMEM;
+
+			ex->oe_objno = objno;
+			ex->oe_off = objoff;
+			ex->oe_len = xlen;
+			if (action_fn)
+				action_fn(ex, xlen, action_arg);
+
+			if (!last_ex)
+				list_add(&ex->oe_item, add_pos);
+			else
+				list_add(&ex->oe_item, &last_ex->oe_item);
+		} else {
+			last_ex->oe_len += xlen;
+			if (action_fn)
+				action_fn(last_ex, xlen, action_arg);
+		}
+
+		off += xlen;
+		len -= xlen;
+	}
+
+	for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
+	     ex = list_next_entry(last_ex, oe_item);
+	     &ex->oe_item != object_extents;
+	     last_ex = ex, ex = list_next_entry(ex, oe_item)) {
+		if (last_ex->oe_objno > ex->oe_objno ||
+		    (last_ex->oe_objno == ex->oe_objno &&
+		     last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
+			WARN(1, "%s: object_extents list not sorted!\n",
+			     __func__);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ceph_file_to_extents);
+
+/*
+ * A stripped down, non-allocating version of ceph_file_to_extents(),
+ * for when @object_extents is already populated.
+ */
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg)
+{
+	while (len) {
+		struct ceph_object_extent *ex;
+		u64 objno, objoff;
+		u32 xlen;
+
+		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+					      &xlen);
+
+		ex = lookup_containing(object_extents, objno, objoff, xlen);
+		if (!ex) {
+			WARN(1, "%s: objno %llu %llu~%u not found!\n",
+			     __func__, objno, objoff, xlen);
+			return -EINVAL;
+		}
+
+		action_fn(ex, xlen, action_arg);
+
+		off += xlen;
+		len -= xlen;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ceph_iterate_extents);
+
+/*
+ * Reverse map an object extent to a sorted list of file extents.
+ *
+ * On success, the caller is responsible for:
+ *
+ *     kfree(file_extents)
+ */
+int ceph_extent_to_file(struct ceph_file_layout *l,
+			u64 objno, u64 objoff, u64 objlen,
+			struct ceph_file_extent **file_extents,
+			u32 *num_file_extents)
+{
+	u32 stripes_per_object = l->object_size / l->stripe_unit;
+	u64 blockno;	/* which su */
+	u32 blockoff;	/* offset into su */
+	u64 stripeno;	/* which stripe */
+	u32 stripepos;	/* which su in the stripe,
+			   which object in the object set */
+	u64 objsetno;	/* which object set */
+	u32 i = 0;
+
+	if (!objlen) {
+		*file_extents = NULL;
+		*num_file_extents = 0;
+		return 0;
+	}
+
+	*num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
+				     DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
+	*file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
+				      GFP_NOIO);
+	if (!*file_extents)
+		return -ENOMEM;
+
+	div_u64_rem(objoff, l->stripe_unit, &blockoff);
+	while (objlen) {
+		u64 off, len;
+
+		objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
+		stripeno = div_u64(objoff, l->stripe_unit) +
+						objsetno * stripes_per_object;
+		blockno = stripeno * l->stripe_count + stripepos;
+		off = blockno * l->stripe_unit + blockoff;
+		len = min_t(u64, objlen, l->stripe_unit - blockoff);
+
+		(*file_extents)[i].fe_off = off;
+		(*file_extents)[i].fe_len = len;
+
+		blockoff = 0;
+		objoff += len;
+		objlen -= len;
+		i++;
+	}
+
+	BUG_ON(i != *num_file_extents);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_extent_to_file);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9b04a9fd1dfd..969462ebb296 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1027,7 +1027,7 @@ bool dev_valid_name(const char *name)
 {
 	if (*name == '\0')
 		return false;
-	if (strlen(name) >= IFNAMSIZ)
+	if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
 		return false;
 	if (!strcmp(name, ".") || !strcmp(name, ".."))
 		return false;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index c0548d268e1a..e3e6a3e2ca22 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -57,8 +57,8 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
 		return -EINVAL;
 
 	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    ha->type == addr_type) {
+		if (ha->type == addr_type &&
+		    !memcmp(ha->addr, addr, addr_len)) {
 			if (global) {
 				/* check if addr is already used as global */
 				if (ha->global_use)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 9236e421bd62..ad1317376798 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2405,6 +2405,16 @@ devlink_resource_size_params_put(struct devlink_resource *resource,
 	return 0;
 }
 
+static int devlink_resource_occ_put(struct devlink_resource *resource,
+				    struct sk_buff *skb)
+{
+	if (!resource->occ_get)
+		return 0;
+	return nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
+				 resource->occ_get(resource->occ_get_priv),
+				 DEVLINK_ATTR_PAD);
+}
+
 static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
 				struct devlink_resource *resource)
 {
@@ -2425,11 +2435,8 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb,
 	if (resource->size != resource->size_new)
 		nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW,
 				  resource->size_new, DEVLINK_ATTR_PAD);
-	if (resource->resource_ops && resource->resource_ops->occ_get)
-		if (nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_OCC,
-				      resource->resource_ops->occ_get(devlink),
-				      DEVLINK_ATTR_PAD))
-			goto nla_put_failure;
+	if (devlink_resource_occ_put(resource, skb))
+		goto nla_put_failure;
 	if (devlink_resource_size_params_put(resource, skb))
 		goto nla_put_failure;
 	if (list_empty(&resource->resource_list))
@@ -3162,15 +3169,13 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_unregister);
  *	@resource_id: resource's id
  *	@parent_reosurce_id: resource's parent id
  *	@size params: size parameters
- *	@resource_ops: resource ops
  */
 int devlink_resource_register(struct devlink *devlink,
 			      const char *resource_name,
 			      u64 resource_size,
 			      u64 resource_id,
 			      u64 parent_resource_id,
-			      const struct devlink_resource_size_params *size_params,
-			      const struct devlink_resource_ops *resource_ops)
+			      const struct devlink_resource_size_params *size_params)
 {
 	struct devlink_resource *resource;
 	struct list_head *resource_list;
@@ -3213,7 +3218,6 @@ int devlink_resource_register(struct devlink *devlink,
 	resource->size = resource_size;
 	resource->size_new = resource_size;
 	resource->id = resource_id;
-	resource->resource_ops = resource_ops;
 	resource->size_valid = true;
 	memcpy(&resource->size_params, size_params,
 	       sizeof(resource->size_params));
@@ -3315,6 +3319,58 @@ out:
 }
 EXPORT_SYMBOL_GPL(devlink_dpipe_table_resource_set);
 
+/**
+ *	devlink_resource_occ_get_register - register occupancy getter
+ *
+ *	@devlink: devlink
+ *	@resource_id: resource id
+ *	@occ_get: occupancy getter callback
+ *	@occ_get_priv: occupancy getter callback priv
+ */
+void devlink_resource_occ_get_register(struct devlink *devlink,
+				       u64 resource_id,
+				       devlink_resource_occ_get_t *occ_get,
+				       void *occ_get_priv)
+{
+	struct devlink_resource *resource;
+
+	mutex_lock(&devlink->lock);
+	resource = devlink_resource_find(devlink, NULL, resource_id);
+	if (WARN_ON(!resource))
+		goto out;
+	WARN_ON(resource->occ_get);
+
+	resource->occ_get = occ_get;
+	resource->occ_get_priv = occ_get_priv;
+out:
+	mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_register);
+
+/**
+ *	devlink_resource_occ_get_unregister - unregister occupancy getter
+ *
+ *	@devlink: devlink
+ *	@resource_id: resource id
+ */
+void devlink_resource_occ_get_unregister(struct devlink *devlink,
+					 u64 resource_id)
+{
+	struct devlink_resource *resource;
+
+	mutex_lock(&devlink->lock);
+	resource = devlink_resource_find(devlink, NULL, resource_id);
+	if (WARN_ON(!resource))
+		goto out;
+	WARN_ON(!resource->occ_get);
+
+	resource->occ_get = NULL;
+	resource->occ_get_priv = NULL;
+out:
+	mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1bca1e0fc8f7..345b51837ca8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -857,6 +857,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->cloned = 1;
 	n->nohdr = 0;
+	n->peeked = 0;
 	n->destructor = NULL;
 	C(tail);
 	C(end);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e65fcb45c3f6..b08feb219b44 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -614,6 +614,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+	ireq->ir_mark = inet_request_mark(sk, skb);
 	ireq->ireq_family = AF_INET;
 	ireq->ir_iif = sk->sk_bound_dev_if;
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5df7857fc0f3..6344f1b18a6a 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -351,6 +351,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 	ireq->ireq_family = AF_INET6;
+	ireq->ir_mark = inet_request_mark(sk, skb);
 
 	if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 70de7895e5b8..053731473c99 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -126,6 +126,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
 	struct dsa_port *cpu_dp = dev->dsa_ptr;
 	struct dsa_switch_tree *dst = cpu_dp->dst;
 	struct dsa_switch *ds;
+	struct dsa_port *slave_port;
 
 	if (device < 0 || device >= DSA_MAX_SWITCHES)
 		return NULL;
@@ -137,7 +138,12 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
 	if (port < 0 || port >= ds->num_ports)
 		return NULL;
 
-	return ds->ports[port].slave;
+	slave_port = &ds->ports[port];
+
+	if (unlikely(slave_port->type != DSA_PORT_TYPE_USER))
+		return NULL;
+
+	return slave_port->slave;
 }
 
 /* port.c */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index be4c595edccb..bf6c2d4d4fdc 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -437,7 +437,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 	/*unsigned long now; */
 	struct net *net = dev_net(dev);
 
-	rt = ip_route_output(net, sip, tip, 0, 0);
+	rt = ip_route_output(net, sip, tip, 0, l3mdev_master_ifindex_rcu(dev));
 	if (IS_ERR(rt))
 		return 1;
 	if (rt->dst.dev != dev) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c3ea4906d237..88c5069b5d20 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -178,6 +178,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 		tw->tw_dport	    = inet->inet_dport;
 		tw->tw_family	    = sk->sk_family;
 		tw->tw_reuse	    = sk->sk_reuse;
+		tw->tw_reuseport    = sk->sk_reuseport;
 		tw->tw_hash	    = sk->sk_hash;
 		tw->tw_ipv6only	    = 0;
 		tw->tw_transparent  = inet->transparent;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 1f04bd91fc2e..d757b9642d0d 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -211,6 +211,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
 		p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
 		if (p) {
 			p->daddr = *daddr;
+			p->dtime = (__u32)jiffies;
 			refcount_set(&p->refcnt, 2);
 			atomic_set(&p->rid, 0);
 			p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a8772a978224..9c169bb2444d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -781,8 +781,14 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
 		    tunnel->encap.type == TUNNEL_ENCAP_NONE) {
 			dev->features |= NETIF_F_GSO_SOFTWARE;
 			dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+		} else {
+			dev->features &= ~NETIF_F_GSO_SOFTWARE;
+			dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 		}
 		dev->features |= NETIF_F_LLTX;
+	} else {
+		dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
+		dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
 	}
 }
 
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index de6d94482fe7..6b0e362cc99b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -253,13 +253,14 @@ static struct net_device *__ip_tunnel_create(struct net *net,
 	struct net_device *dev;
 	char name[IFNAMSIZ];
 
-	if (parms->name[0])
+	err = -E2BIG;
+	if (parms->name[0]) {
+		if (!dev_valid_name(parms->name))
+			goto failed;
 		strlcpy(name, parms->name, IFNAMSIZ);
-	else {
-		if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
-			err = -E2BIG;
+	} else {
+		if (strlen(ops->kind) > (IFNAMSIZ - 3))
 			goto failed;
-		}
 		strlcpy(name, ops->kind, IFNAMSIZ);
 		strncat(name, "%d", 2);
 	}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 594a1c605c92..ccb25d80f679 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2296,13 +2296,14 @@ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
 					const struct sk_buff *skb)
 {
 	__u8 tos = RT_FL_TOS(fl4);
-	struct fib_result res;
+	struct fib_result res = {
+		.type		= RTN_UNSPEC,
+		.fi		= NULL,
+		.table		= NULL,
+		.tclassid	= 0,
+	};
 	struct rtable *rth;
 
-	res.tclassid	= 0;
-	res.fi		= NULL;
-	res.table	= NULL;
-
 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
 	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
 	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f8a103bdbd60..69727bc168cb 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -335,11 +335,13 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
 	if (t || !create)
 		return t;
 
-	if (parms->name[0])
+	if (parms->name[0]) {
+		if (!dev_valid_name(parms->name))
+			return NULL;
 		strlcpy(name, parms->name, IFNAMSIZ);
-	else
+	} else {
 		strcpy(name, "ip6gre%d");
-
+	}
 	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
 			   ip6gre_tunnel_setup);
 	if (!dev)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b8ee50e94af3..2e891d2c30ef 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -375,6 +375,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 				     struct sk_buff *skb)
 {
+	struct dst_entry *dst = skb_dst(skb);
+
+	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+
 	return dst_output(net, sk, skb);
 }
 
@@ -569,8 +574,6 @@ int ip6_forward(struct sk_buff *skb)
 
 	hdr->hop_limit--;
 
-	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 		       net, NULL, skb, skb->dev, dst->dev,
 		       ip6_forward_finish);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index df4c29f7d59f..da66aaac51ce 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -297,13 +297,16 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 	struct net_device *dev;
 	struct ip6_tnl *t;
 	char name[IFNAMSIZ];
-	int err = -ENOMEM;
+	int err = -E2BIG;
 
-	if (p->name[0])
+	if (p->name[0]) {
+		if (!dev_valid_name(p->name))
+			goto failed;
 		strlcpy(name, p->name, IFNAMSIZ);
-	else
+	} else {
 		sprintf(name, "ip6tnl%%d");
-
+	}
+	err = -ENOMEM;
 	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
 			   ip6_tnl_dev_setup);
 	if (!dev)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 6ebb2e8777f4..c214ffec02f0 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -212,10 +212,13 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p
 	char name[IFNAMSIZ];
 	int err;
 
-	if (p->name[0])
+	if (p->name[0]) {
+		if (!dev_valid_name(p->name))
+			goto failed;
 		strlcpy(name, p->name, IFNAMSIZ);
-	else
+	} else {
 		sprintf(name, "ip6_vti%%d");
+	}
 
 	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup);
 	if (!dev)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1522bcfd253f..2afce37a7177 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -250,11 +250,13 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
 	if (!create)
 		goto failed;
 
-	if (parms->name[0])
+	if (parms->name[0]) {
+		if (!dev_valid_name(parms->name))
+			goto failed;
 		strlcpy(name, parms->name, IFNAMSIZ);
-	else
+	} else {
 		strcpy(name, "sit%d");
-
+	}
 	dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN,
 			   ipip6_tunnel_setup);
 	if (!dev)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 14b67dfacc4b..0fbd3ee26165 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -335,26 +335,6 @@ err_tlock:
 }
 EXPORT_SYMBOL_GPL(l2tp_session_register);
 
-/* Lookup a tunnel by id
- */
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id)
-{
-	struct l2tp_tunnel *tunnel;
-	struct l2tp_net *pn = l2tp_pernet(net);
-
-	rcu_read_lock_bh();
-	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
-		if (tunnel->tunnel_id == tunnel_id) {
-			rcu_read_unlock_bh();
-			return tunnel;
-		}
-	}
-	rcu_read_unlock_bh();
-
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
-
 struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth)
 {
 	struct l2tp_net *pn = l2tp_pernet(net);
@@ -1436,74 +1416,11 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 {
 	struct l2tp_tunnel *tunnel = NULL;
 	int err;
-	struct socket *sock = NULL;
-	struct sock *sk = NULL;
-	struct l2tp_net *pn;
 	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
-	/* Get the tunnel socket from the fd, which was opened by
-	 * the userspace L2TP daemon. If not specified, create a
-	 * kernel socket.
-	 */
-	if (fd < 0) {
-		err = l2tp_tunnel_sock_create(net, tunnel_id, peer_tunnel_id,
-				cfg, &sock);
-		if (err < 0)
-			goto err;
-	} else {
-		sock = sockfd_lookup(fd, &err);
-		if (!sock) {
-			pr_err("tunl %u: sockfd_lookup(fd=%d) returned %d\n",
-			       tunnel_id, fd, err);
-			err = -EBADF;
-			goto err;
-		}
-
-		/* Reject namespace mismatches */
-		if (!net_eq(sock_net(sock->sk), net)) {
-			pr_err("tunl %u: netns mismatch\n", tunnel_id);
-			err = -EINVAL;
-			goto err;
-		}
-	}
-
-	sk = sock->sk;
-
 	if (cfg != NULL)
 		encap = cfg->encap;
 
-	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_type != SOCK_DGRAM) {
-		pr_debug("tunl %hu: fd %d wrong socket type\n",
-			 tunnel_id, fd);
-		goto err;
-	}
-	switch (encap) {
-	case L2TP_ENCAPTYPE_UDP:
-		if (sk->sk_protocol != IPPROTO_UDP) {
-			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-			goto err;
-		}
-		break;
-	case L2TP_ENCAPTYPE_IP:
-		if (sk->sk_protocol != IPPROTO_L2TP) {
-			pr_err("tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
-			goto err;
-		}
-		break;
-	}
-
-	/* Check if this socket has already been prepped */
-	tunnel = l2tp_tunnel(sk);
-	if (tunnel != NULL) {
-		/* This socket has already been prepped */
-		err = -EBUSY;
-		goto err;
-	}
-
 	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
 	if (tunnel == NULL) {
 		err = -ENOMEM;
@@ -1520,72 +1437,126 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	rwlock_init(&tunnel->hlist_lock);
 	tunnel->acpt_newsess = true;
 
-	/* The net we belong to */
-	tunnel->l2tp_net = net;
-	pn = l2tp_pernet(net);
-
 	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
 	tunnel->encap = encap;
-	if (encap == L2TP_ENCAPTYPE_UDP) {
-		struct udp_tunnel_sock_cfg udp_cfg = { };
-
-		udp_cfg.sk_user_data = tunnel;
-		udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP;
-		udp_cfg.encap_rcv = l2tp_udp_encap_recv;
-		udp_cfg.encap_destroy = l2tp_udp_encap_destroy;
-
-		setup_udp_tunnel_sock(net, sock, &udp_cfg);
-	} else {
-		sk->sk_user_data = tunnel;
-	}
 
-	/* Bump the reference count. The tunnel context is deleted
-	 * only when this drops to zero. A reference is also held on
-	 * the tunnel socket to ensure that it is not released while
-	 * the tunnel is extant. Must be done before sk_destruct is
-	 * set.
-	 */
 	refcount_set(&tunnel->ref_count, 1);
-	sock_hold(sk);
-	tunnel->sock = sk;
 	tunnel->fd = fd;
 
-	/* Hook on the tunnel socket destructor so that we can cleanup
-	 * if the tunnel socket goes away.
-	 */
-	tunnel->old_sk_destruct = sk->sk_destruct;
-	sk->sk_destruct = &l2tp_tunnel_destruct;
-	lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class, "l2tp_sock");
-
-	sk->sk_allocation = GFP_ATOMIC;
-
 	/* Init delete workqueue struct */
 	INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work);
 
-	/* Add tunnel to our list */
 	INIT_LIST_HEAD(&tunnel->list);
-	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
-	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
 
 	err = 0;
 err:
 	if (tunnelp)
 		*tunnelp = tunnel;
 
-	/* If tunnel's socket was created by the kernel, it doesn't
-	 *  have a file.
-	 */
-	if (sock && sock->file)
-		sockfd_put(sock);
-
 	return err;
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 
+static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
+				enum l2tp_encap_type encap)
+{
+	if (!net_eq(sock_net(sk), net))
+		return -EINVAL;
+
+	if (sk->sk_type != SOCK_DGRAM)
+		return -EPROTONOSUPPORT;
+
+	if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) ||
+	    (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP))
+		return -EPROTONOSUPPORT;
+
+	if (sk->sk_user_data)
+		return -EBUSY;
+
+	return 0;
+}
+
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+			 struct l2tp_tunnel_cfg *cfg)
+{
+	struct l2tp_tunnel *tunnel_walk;
+	struct l2tp_net *pn;
+	struct socket *sock;
+	struct sock *sk;
+	int ret;
+
+	if (tunnel->fd < 0) {
+		ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
+					      tunnel->peer_tunnel_id, cfg,
+					      &sock);
+		if (ret < 0)
+			goto err;
+	} else {
+		sock = sockfd_lookup(tunnel->fd, &ret);
+		if (!sock)
+			goto err;
+
+		ret = l2tp_validate_socket(sock->sk, net, tunnel->encap);
+		if (ret < 0)
+			goto err_sock;
+	}
+
+	sk = sock->sk;
+
+	sock_hold(sk);
+	tunnel->sock = sk;
+	tunnel->l2tp_net = net;
+
+	pn = l2tp_pernet(net);
+
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
+		if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
+			spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+			ret = -EEXIST;
+			goto err_sock;
+		}
+	}
+	list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+		struct udp_tunnel_sock_cfg udp_cfg = {
+			.sk_user_data = tunnel,
+			.encap_type = UDP_ENCAP_L2TPINUDP,
+			.encap_rcv = l2tp_udp_encap_recv,
+			.encap_destroy = l2tp_udp_encap_destroy,
+		};
+
+		setup_udp_tunnel_sock(net, sock, &udp_cfg);
+	} else {
+		sk->sk_user_data = tunnel;
+	}
+
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct = &l2tp_tunnel_destruct;
+	lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
+				   "l2tp_sock");
+	sk->sk_allocation = GFP_ATOMIC;
+
+	if (tunnel->fd >= 0)
+		sockfd_put(sock);
+
+	return 0;
+
+err_sock:
+	if (tunnel->fd < 0)
+		sock_release(sock);
+	else
+		sockfd_put(sock);
+err:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
+
 /* This function is used by the netlink TUNNEL_DELETE command.
  */
 void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2718d0b284d0..ba33cbec71eb 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -220,12 +220,14 @@ struct l2tp_session *l2tp_session_get(const struct net *net,
 struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth);
 struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net,
 						const char *ifname);
-struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id);
 struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth);
 
 int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
 		       u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
 		       struct l2tp_tunnel **tunnelp);
+int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
+			 struct l2tp_tunnel_cfg *cfg);
+
 void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 struct l2tp_session *l2tp_session_create(int priv_size,
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index e7ea9c4b89ff..b05dbd9ffcb2 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -236,12 +236,6 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 	if (info->attrs[L2TP_ATTR_DEBUG])
 		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
 
-	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel != NULL) {
-		ret = -EEXIST;
-		goto out;
-	}
-
 	ret = -EINVAL;
 	switch (cfg.encap) {
 	case L2TP_ENCAPTYPE_UDP:
@@ -251,9 +245,19 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 		break;
 	}
 
-	if (ret >= 0)
-		ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
-					 tunnel, L2TP_CMD_TUNNEL_CREATE);
+	if (ret < 0)
+		goto out;
+
+	l2tp_tunnel_inc_refcount(tunnel);
+	ret = l2tp_tunnel_register(tunnel, net, &cfg);
+	if (ret < 0) {
+		kfree(tunnel);
+		goto out;
+	}
+	ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel,
+				 L2TP_CMD_TUNNEL_CREATE);
+	l2tp_tunnel_dec_refcount(tunnel);
+
 out:
 	return ret;
 }
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d6deca11da19..896bbca9bdaa 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -698,6 +698,15 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 			error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
 			if (error < 0)
 				goto end;
+
+			l2tp_tunnel_inc_refcount(tunnel);
+			error = l2tp_tunnel_register(tunnel, sock_net(sk),
+						     &tcfg);
+			if (error < 0) {
+				kfree(tunnel);
+				goto end;
+			}
+			drop_tunnel = true;
 		}
 	} else {
 		/* Error if we can't find the tunnel */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fa556fdef57d..55342c4d5cec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1844,6 +1844,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (msg->msg_namelen) {
 		err = -EINVAL;
+		if (msg->msg_namelen < sizeof(struct sockaddr_nl))
+			goto out;
 		if (addr->nl_family != AF_NETLINK)
 			goto out;
 		dst_portid = addr->nl_pid;
diff --git a/net/rds/send.c b/net/rds/send.c
index acad04243b41..94c7f74909be 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 Oracle.  All rights reserved.
+ * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -1017,10 +1017,15 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
 	if (conn->c_npaths == 0 && hash != 0) {
 		rds_send_ping(conn, 0);
 
-		if (conn->c_npaths == 0) {
-			wait_event_interruptible(conn->c_hs_waitq,
-						 (conn->c_npaths != 0));
-		}
+		/* The underlying connection is not up yet.  Need to wait
+		 * until it is up to be sure that the non-zero c_path can be
+		 * used.  But if we are interrupted, we have to use the zero
+		 * c_path in case the connection ends up being non-MP capable.
+		 */
+		if (conn->c_npaths == 0)
+			if (wait_event_interruptible(conn->c_hs_waitq,
+						     conn->c_npaths != 0))
+				hash = 0;
 		if (conn->c_npaths == 1)
 			hash = 0;
 	}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 9092531d45d8..18089c02e557 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -248,10 +248,14 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
 
 static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
 {
-	if (cfg->is_ebpf)
-		bpf_prog_put(cfg->filter);
-	else
-		bpf_prog_destroy(cfg->filter);
+	struct bpf_prog *filter = cfg->filter;
+
+	if (filter) {
+		if (cfg->is_ebpf)
+			bpf_prog_put(filter);
+		else
+			bpf_prog_destroy(filter);
+	}
 
 	kfree(cfg->bpf_ops);
 	kfree(cfg->bpf_name);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ed8b6a24b9e9..bac47b5d18fd 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -489,6 +489,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
 				RCU_INIT_POINTER(*kp, key->next);
 
 				tcf_unbind_filter(tp, &key->res);
+				idr_remove(&ht->handle_idr, key->handle);
 				tcf_exts_get_net(&key->exts);
 				call_rcu(&key->rcu, u32_delete_key_freepf_rcu);
 				return 0;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 6dd976c8ab61..31083b5035ec 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -757,8 +757,10 @@ static int sctp_v6_addr_to_user(struct sctp_sock *sp, union sctp_addr *addr)
 			sctp_v6_map_v4(addr);
 	}
 
-	if (addr->sa.sa_family == AF_INET)
+	if (addr->sa.sa_family == AF_INET) {
+		memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
 		return sizeof(struct sockaddr_in);
+	}
 	return sizeof(struct sockaddr_in6);
 }
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2a2e094560de..80835ac26d2c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -357,11 +357,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
 	if (!opt->pf->af_supported(addr->sa.sa_family, opt))
 		return NULL;
 
-	/* V4 mapped address are really of AF_INET family */
-	if (addr->sa.sa_family == AF_INET6 &&
-	    ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
-	    !opt->pf->af_supported(AF_INET, opt))
-		return NULL;
+	if (addr->sa.sa_family == AF_INET6) {
+		if (len < SIN6_LEN_RFC2133)
+			return NULL;
+		/* V4 mapped address are really of AF_INET family */
+		if (ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+		    !opt->pf->af_supported(AF_INET, opt))
+			return NULL;
+	}
 
 	/* If we get this far, af is valid. */
 	af = sctp_get_af_specific(addr->sa.sa_family);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 806395687bb6..c2266f387213 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1887,7 +1887,7 @@ call_connect_status(struct rpc_task *task)
 
 	dprint_status(task);
 
-	trace_rpc_connect_status(task, status);
+	trace_rpc_connect_status(task);
 	task->tk_status = 0;
 	switch (status) {
 	case -ECONNREFUSED:
@@ -2014,6 +2014,9 @@ call_transmit_status(struct rpc_task *task)
 	case -EPERM:
 		if (RPC_IS_SOFTCONN(task)) {
 			xprt_end_transmit(task);
+			if (!task->tk_msg.rpc_proc->p_proc)
+				trace_xprt_ping(task->tk_xprt,
+						task->tk_status);
 			rpc_exit(task, task->tk_status);
 			break;
 		}
@@ -2112,6 +2115,9 @@ call_status(struct rpc_task *task)
 	struct rpc_rqst	*req = task->tk_rqstp;
 	int		status;
 
+	if (!task->tk_msg.rpc_proc->p_proc)
+		trace_xprt_ping(task->tk_xprt, task->tk_status);
+
 	if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
 		task->tk_status = req->rq_reply_bytes_recvd;
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d9db2eab3a8d..3fe5d60ab0e2 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -276,7 +276,7 @@ static void rpc_set_active(struct rpc_task *task)
 {
 	rpc_task_set_debuginfo(task);
 	set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
-	trace_rpc_task_begin(task->tk_client, task, NULL);
+	trace_rpc_task_begin(task, NULL);
 }
 
 /*
@@ -291,7 +291,7 @@ static int rpc_complete_task(struct rpc_task *task)
 	unsigned long flags;
 	int ret;
 
-	trace_rpc_task_complete(task->tk_client, task, NULL);
+	trace_rpc_task_complete(task, NULL);
 
 	spin_lock_irqsave(&wq->lock, flags);
 	clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
@@ -358,7 +358,7 @@ static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
 	dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
 			task->tk_pid, rpc_qname(q), jiffies);
 
-	trace_rpc_task_sleep(task->tk_client, task, q);
+	trace_rpc_task_sleep(task, q);
 
 	__rpc_add_wait_queue(q, task, queue_priority);
 
@@ -428,7 +428,7 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
 		return;
 	}
 
-	trace_rpc_task_wakeup(task->tk_client, task, queue);
+	trace_rpc_task_wakeup(task, queue);
 
 	__rpc_remove_wait_queue(queue, task);
 
@@ -780,7 +780,7 @@ static void __rpc_execute(struct rpc_task *task)
 		}
 		if (!do_action)
 			break;
-		trace_rpc_task_run_action(task->tk_client, task, do_action);
+		trace_rpc_task_run_action(task, do_action);
 		do_action(task);
 
 		/*
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 1e671333c3d5..f68aa46c9dd7 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -24,6 +24,8 @@
 #include <linux/sunrpc/metrics.h>
 #include <linux/rcupdate.h>
 
+#include <trace/events/sunrpc.h>
+
 #include "netns.h"
 
 #define RPCDBG_FACILITY	RPCDBG_MISC
@@ -148,7 +150,7 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
 			       struct rpc_iostats *op_metrics)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
-	ktime_t delta, now;
+	ktime_t backlog, execute, now;
 
 	if (!op_metrics || !req)
 		return;
@@ -164,16 +166,20 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
 	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
 
+	backlog = 0;
 	if (ktime_to_ns(req->rq_xtime)) {
-		delta = ktime_sub(req->rq_xtime, task->tk_start);
-		op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
+		backlog = ktime_sub(req->rq_xtime, task->tk_start);
+		op_metrics->om_queue = ktime_add(op_metrics->om_queue, backlog);
 	}
+
 	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
 
-	delta = ktime_sub(now, task->tk_start);
-	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+	execute = ktime_sub(now, task->tk_start);
+	op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
 
 	spin_unlock(&op_metrics->om_lock);
+
+	trace_rpc_stats_latency(req->rq_task, backlog, req->rq_rtt, execute);
 }
 EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics);
 
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index f2b7cb540e61..09a0315ea77b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -37,12 +37,6 @@ struct rpc_buffer {
 	char	data[];
 };
 
-static inline int rpc_reply_expected(struct rpc_task *task)
-{
-	return (task->tk_msg.rpc_proc != NULL) &&
-		(task->tk_msg.rpc_proc->p_decode != NULL);
-}
-
 static inline int sock_is_loopback(struct sock *sk)
 {
 	struct dst_entry *dst;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index e34f4ee7f2b6..30afbd236656 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1519,6 +1519,88 @@ out:
 EXPORT_SYMBOL_GPL(xdr_process_buf);
 
 /**
+ * xdr_stream_decode_opaque - Decode variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store opaque data
+ * @size: size of storage buffer @ptr
+ *
+ * Return values:
+ *   On success, returns size of object stored in *@ptr
+ *   %-EBADMSG on XDR buffer overflow
+ *   %-EMSGSIZE on overflow of storage buffer @ptr
+ */
+ssize_t xdr_stream_decode_opaque(struct xdr_stream *xdr, void *ptr, size_t size)
+{
+	ssize_t ret;
+	void *p;
+
+	ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+	if (ret <= 0)
+		return ret;
+	memcpy(ptr, p, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque);
+
+/**
+ * xdr_stream_decode_opaque_dup - Decode and duplicate variable length opaque
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store pointer to opaque data
+ * @maxlen: maximum acceptable object size
+ * @gfp_flags: GFP mask to use
+ *
+ * Return values:
+ *   On success, returns size of object stored in *@ptr
+ *   %-EBADMSG on XDR buffer overflow
+ *   %-EMSGSIZE if the size of the object would exceed @maxlen
+ *   %-ENOMEM on memory allocation failure
+ */
+ssize_t xdr_stream_decode_opaque_dup(struct xdr_stream *xdr, void **ptr,
+		size_t maxlen, gfp_t gfp_flags)
+{
+	ssize_t ret;
+	void *p;
+
+	ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
+	if (ret > 0) {
+		*ptr = kmemdup(p, ret, gfp_flags);
+		if (*ptr != NULL)
+			return ret;
+		ret = -ENOMEM;
+	}
+	*ptr = NULL;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_dup);
+
+/**
+ * xdr_stream_decode_string - Decode variable length string
+ * @xdr: pointer to xdr_stream
+ * @str: location to store string
+ * @size: size of storage buffer @str
+ *
+ * Return values:
+ *   On success, returns length of NUL-terminated string stored in *@str
+ *   %-EBADMSG on XDR buffer overflow
+ *   %-EMSGSIZE on overflow of storage buffer @str
+ */
+ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size)
+{
+	ssize_t ret;
+	void *p;
+
+	ret = xdr_stream_decode_opaque_inline(xdr, &p, size);
+	if (ret > 0) {
+		memcpy(str, p, ret);
+		str[ret] = '\0';
+		return strlen(str);
+	}
+	*str = '\0';
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_string);
+
+/**
  * xdr_stream_decode_string_dup - Decode and duplicate variable length string
  * @xdr: pointer to xdr_stream
  * @str: location to store pointer to string
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8f0ad4f268da..70f005044f06 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,6 +826,7 @@ static void xprt_connect_status(struct rpc_task *task)
  * @xprt: transport on which the original request was transmitted
  * @xid: RPC XID of incoming reply
  *
+ * Caller holds xprt->recv_lock.
  */
 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 {
@@ -834,6 +835,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 	list_for_each_entry(entry, &xprt->recv, rq_list)
 		if (entry->rq_xid == xid) {
 			trace_xprt_lookup_rqst(xprt, xid, 0);
+			entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
 			return entry;
 		}
 
@@ -889,7 +891,13 @@ __must_hold(&req->rq_xprt->recv_lock)
 	}
 }
 
-static void xprt_update_rtt(struct rpc_task *task)
+/**
+ * xprt_update_rtt - Update RPC RTT statistics
+ * @task: RPC request that recently completed
+ *
+ * Caller holds xprt->recv_lock.
+ */
+void xprt_update_rtt(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
@@ -902,13 +910,14 @@ static void xprt_update_rtt(struct rpc_task *task)
 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
 	}
 }
+EXPORT_SYMBOL_GPL(xprt_update_rtt);
 
 /**
  * xprt_complete_rqst - called when reply processing is complete
  * @task: RPC request that recently completed
  * @copied: actual number of bytes received from the transport
  *
- * Caller holds transport lock.
+ * Caller holds xprt->recv_lock.
  */
 void xprt_complete_rqst(struct rpc_task *task, int copied)
 {
@@ -920,9 +929,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 	trace_xprt_complete_rqst(xprt, req->rq_xid, copied);
 
 	xprt->stat.recvs++;
-	req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
-	if (xprt->ops->timer != NULL)
-		xprt_update_rtt(task);
 
 	list_del_init(&req->rq_list);
 	req->rq_private_buf.len = copied;
@@ -1003,7 +1009,7 @@ void xprt_transmit(struct rpc_task *task)
 	struct rpc_rqst	*req = task->tk_rqstp;
 	struct rpc_xprt	*xprt = req->rq_xprt;
 	unsigned int connect_cookie;
-	int status, numreqs;
+	int status;
 
 	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
 
@@ -1027,7 +1033,6 @@ void xprt_transmit(struct rpc_task *task)
 		return;
 
 	connect_cookie = xprt->connect_cookie;
-	req->rq_xtime = ktime_get();
 	status = xprt->ops->send_request(task);
 	trace_xprt_transmit(xprt, req->rq_xid, status);
 	if (status != 0) {
@@ -1042,9 +1047,6 @@ void xprt_transmit(struct rpc_task *task)
 
 	xprt->ops->set_retrans_timeout(task);
 
-	numreqs = atomic_read(&xprt->num_reqs);
-	if (numreqs > xprt->stat.max_slots)
-		xprt->stat.max_slots = numreqs;
 	xprt->stat.sends++;
 	xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
 	xprt->stat.bklog_u += xprt->backlog.qlen;
@@ -1106,14 +1108,15 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
 {
 	struct rpc_rqst *req = ERR_PTR(-EAGAIN);
 
-	if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+	if (xprt->num_reqs >= xprt->max_reqs)
 		goto out;
+	++xprt->num_reqs;
 	spin_unlock(&xprt->reserve_lock);
 	req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS);
 	spin_lock(&xprt->reserve_lock);
 	if (req != NULL)
 		goto out;
-	atomic_dec(&xprt->num_reqs);
+	--xprt->num_reqs;
 	req = ERR_PTR(-ENOMEM);
 out:
 	return req;
@@ -1121,7 +1124,8 @@ out:
 
 static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
-	if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+	if (xprt->num_reqs > xprt->min_reqs) {
+		--xprt->num_reqs;
 		kfree(req);
 		return true;
 	}
@@ -1157,6 +1161,8 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 	spin_unlock(&xprt->reserve_lock);
 	return;
 out_init_req:
+	xprt->stat.max_slots = max_t(unsigned int, xprt->stat.max_slots,
+				     xprt->num_reqs);
 	task->tk_status = 0;
 	task->tk_rqstp = req;
 	xprt_request_init(task, xprt);
@@ -1224,7 +1230,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
 	else
 		xprt->max_reqs = num_prealloc;
 	xprt->min_reqs = num_prealloc;
-	atomic_set(&xprt->num_reqs, num_prealloc);
+	xprt->num_reqs = num_prealloc;
 
 	return xprt;
 
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index ed1a4a3065ee..47ebac949769 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -44,13 +44,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
-				  DMA_TO_DEVICE, GFP_KERNEL);
-	if (IS_ERR(rb))
-		goto out_fail;
-	req->rl_rdmabuf = rb;
-	xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
-
 	size = r_xprt->rx_data.inline_rsize;
 	rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
 	if (IS_ERR(rb))
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index d5f95bb39300..5cc68a824f45 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -191,7 +191,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 
 	mr = rpcrdma_mr_get(r_xprt);
 	if (!mr)
-		return ERR_PTR(-ENOBUFS);
+		return ERR_PTR(-EAGAIN);
 
 	pageoff = offset_in_page(seg1->mr_offset);
 	seg1->mr_offset -= pageoff;	/* start of page */
@@ -251,6 +251,16 @@ out_maperr:
 	return ERR_PTR(-EIO);
 }
 
+/* Post Send WR containing the RPC Call message.
+ */
+static int
+fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+	struct ib_send_wr *bad_wr;
+
+	return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr);
+}
+
 /* Invalidate all memory regions that were registered for "req".
  *
  * Sleeps until it is safe for the host CPU to access the
@@ -305,6 +315,7 @@ out_reset:
 
 const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
 	.ro_map				= fmr_op_map,
+	.ro_send			= fmr_op_send,
 	.ro_unmap_sync			= fmr_op_unmap_sync,
 	.ro_recover_mr			= fmr_op_recover_mr,
 	.ro_open			= fmr_op_open,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 90f688f19783..c5743a0960be 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -357,8 +357,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	struct rpcrdma_mr *mr;
 	struct ib_mr *ibmr;
 	struct ib_reg_wr *reg_wr;
-	struct ib_send_wr *bad_wr;
-	int rc, i, n;
+	int i, n;
 	u8 key;
 
 	mr = NULL;
@@ -367,7 +366,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 			rpcrdma_mr_defer_recovery(mr);
 		mr = rpcrdma_mr_get(r_xprt);
 		if (!mr)
-			return ERR_PTR(-ENOBUFS);
+			return ERR_PTR(-EAGAIN);
 	} while (mr->frwr.fr_state != FRWR_IS_INVALID);
 	frwr = &mr->frwr;
 	frwr->fr_state = FRWR_IS_VALID;
@@ -407,22 +406,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	ib_update_fast_reg_key(ibmr, ++key);
 
 	reg_wr = &frwr->fr_regwr;
-	reg_wr->wr.next = NULL;
-	reg_wr->wr.opcode = IB_WR_REG_MR;
-	frwr->fr_cqe.done = frwr_wc_fastreg;
-	reg_wr->wr.wr_cqe = &frwr->fr_cqe;
-	reg_wr->wr.num_sge = 0;
-	reg_wr->wr.send_flags = 0;
 	reg_wr->mr = ibmr;
 	reg_wr->key = ibmr->rkey;
 	reg_wr->access = writing ?
 			 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
 			 IB_ACCESS_REMOTE_READ;
 
-	rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
-	if (rc)
-		goto out_senderr;
-
 	mr->mr_handle = ibmr->rkey;
 	mr->mr_length = ibmr->length;
 	mr->mr_offset = ibmr->iova;
@@ -442,11 +431,40 @@ out_mapmr_err:
 	       frwr->fr_mr, n, mr->mr_nents);
 	rpcrdma_mr_defer_recovery(mr);
 	return ERR_PTR(-EIO);
+}
 
-out_senderr:
-	pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
-	rpcrdma_mr_defer_recovery(mr);
-	return ERR_PTR(-ENOTCONN);
+/* Post Send WR containing the RPC Call message.
+ *
+ * For FRMR, chain any FastReg WRs to the Send WR. Only a
+ * single ib_post_send call is needed to register memory
+ * and then post the Send WR.
+ */
+static int
+frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
+{
+	struct ib_send_wr *post_wr, *bad_wr;
+	struct rpcrdma_mr *mr;
+
+	post_wr = &req->rl_sendctx->sc_wr;
+	list_for_each_entry(mr, &req->rl_registered, mr_list) {
+		struct rpcrdma_frwr *frwr;
+
+		frwr = &mr->frwr;
+
+		frwr->fr_cqe.done = frwr_wc_fastreg;
+		frwr->fr_regwr.wr.next = post_wr;
+		frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
+		frwr->fr_regwr.wr.num_sge = 0;
+		frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
+		frwr->fr_regwr.wr.send_flags = 0;
+
+		post_wr = &frwr->fr_regwr.wr;
+	}
+
+	/* If ib_post_send fails, the next ->send_request for
+	 * @req will queue these MWs for recovery.
+	 */
+	return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
 }
 
 /* Handle a remotely invalidated mr on the @mrs list
@@ -561,6 +579,7 @@ reset_mrs:
 
 const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
 	.ro_map				= frwr_op_map,
+	.ro_send			= frwr_op_send,
 	.ro_reminv			= frwr_op_reminv,
 	.ro_unmap_sync			= frwr_op_unmap_sync,
 	.ro_recover_mr			= frwr_op_recover_mr,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f0855a959a27..e8adad33d0bb 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -365,7 +365,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
 						   false, &mr);
 		if (IS_ERR(seg))
-			return PTR_ERR(seg);
+			goto out_maperr;
 		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_read_segment(xdr, mr, pos) < 0)
@@ -377,6 +377,11 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 	} while (nsegs);
 
 	return 0;
+
+out_maperr:
+	if (PTR_ERR(seg) == -EAGAIN)
+		xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+	return PTR_ERR(seg);
 }
 
 /* Register and XDR encode the Write list. Supports encoding a list
@@ -423,7 +428,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
 						   true, &mr);
 		if (IS_ERR(seg))
-			return PTR_ERR(seg);
+			goto out_maperr;
 		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_rdma_segment(xdr, mr) < 0)
@@ -440,6 +445,11 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 	*segcount = cpu_to_be32(nchunks);
 
 	return 0;
+
+out_maperr:
+	if (PTR_ERR(seg) == -EAGAIN)
+		xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+	return PTR_ERR(seg);
 }
 
 /* Register and XDR encode the Reply chunk. Supports encoding an array
@@ -481,7 +491,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
 						   true, &mr);
 		if (IS_ERR(seg))
-			return PTR_ERR(seg);
+			goto out_maperr;
 		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_rdma_segment(xdr, mr) < 0)
@@ -498,6 +508,11 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 	*segcount = cpu_to_be32(nchunks);
 
 	return 0;
+
+out_maperr:
+	if (PTR_ERR(seg) == -EAGAIN)
+		xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+	return PTR_ERR(seg);
 }
 
 /**
@@ -724,8 +739,8 @@ rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt,
  * Returns:
  *	%0 if the RPC was sent successfully,
  *	%-ENOTCONN if the connection was lost,
- *	%-EAGAIN if not enough pages are available for on-demand reply buffer,
- *	%-ENOBUFS if no MRs are available to register chunks,
+ *	%-EAGAIN if the caller should call again with the same arguments,
+ *	%-ENOBUFS if the caller should call again after a delay,
  *	%-EMSGSIZE if the transport header is too small,
  *	%-EIO if a permanent problem occurred while marshaling.
  */
@@ -868,10 +883,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
 	return 0;
 
 out_err:
-	if (ret != -ENOBUFS) {
-		pr_err("rpcrdma: header marshaling failed (%d)\n", ret);
-		r_xprt->rx_stats.failed_marshal_count++;
-	}
+	r_xprt->rx_stats.failed_marshal_count++;
 	return ret;
 }
 
@@ -1366,7 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 
 	trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
 
-	queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
+	queue_work(rpcrdma_receive_wq, &rep->rr_work);
 	return;
 
 out_badstatus:
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 4b1ecfe979cf..cc1aad325496 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -52,7 +52,6 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/smp.h>
 
 #include "xprt_rdma.h"
 
@@ -237,8 +236,6 @@ rpcrdma_connect_worker(struct work_struct *work)
 	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
 
 	spin_lock_bh(&xprt->transport_lock);
-	if (++xprt->connect_cookie == 0)	/* maintain a reserved value */
-		++xprt->connect_cookie;
 	if (ep->rep_connected > 0) {
 		if (!xprt_test_and_set_connected(xprt))
 			xprt_wake_pending_tasks(xprt, 0);
@@ -540,29 +537,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 	}
 }
 
-/* Allocate a fixed-size buffer in which to construct and send the
- * RPC-over-RDMA header for this request.
- */
-static bool
-rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
-		    gfp_t flags)
-{
-	size_t size = RPCRDMA_HDRBUF_SIZE;
-	struct rpcrdma_regbuf *rb;
-
-	if (req->rl_rdmabuf)
-		return true;
-
-	rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, flags);
-	if (IS_ERR(rb))
-		return false;
-
-	r_xprt->rx_stats.hardway_register_count += size;
-	req->rl_rdmabuf = rb;
-	xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
-	return true;
-}
-
 static bool
 rpcrdma_get_sendbuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		    size_t size, gfp_t flags)
@@ -644,15 +618,11 @@ xprt_rdma_allocate(struct rpc_task *task)
 	if (RPC_IS_SWAPPER(task))
 		flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
 
-	if (!rpcrdma_get_rdmabuf(r_xprt, req, flags))
-		goto out_fail;
 	if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
 		goto out_fail;
 	if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
 		goto out_fail;
 
-	req->rl_cpu = smp_processor_id();
-	req->rl_connect_cookie = 0;	/* our reserved value */
 	rpcrdma_set_xprtdata(rqst, req);
 	rqst->rq_buffer = req->rl_sendbuf->rg_base;
 	rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
@@ -694,7 +664,8 @@ xprt_rdma_free(struct rpc_task *task)
  * Returns:
  *	%0 if the RPC message has been sent
  *	%-ENOTCONN if the caller should reconnect and call again
- *	%-ENOBUFS if the caller should call again later
+ *	%-EAGAIN if the caller should call again
+ *	%-ENOBUFS if the caller should call again after a delay
  *	%-EIO if a permanent error occurred and the request was not
  *		sent. Do not try to send this message again.
  */
@@ -723,9 +694,9 @@ xprt_rdma_send_request(struct rpc_task *task)
 		rpcrdma_recv_buffer_get(req);
 
 	/* Must suppress retransmit to maintain credits */
-	if (req->rl_connect_cookie == xprt->connect_cookie)
+	if (rqst->rq_connect_cookie == xprt->connect_cookie)
 		goto drop_connection;
-	req->rl_connect_cookie = xprt->connect_cookie;
+	rqst->rq_xtime = ktime_get();
 
 	__set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
@@ -733,6 +704,12 @@ xprt_rdma_send_request(struct rpc_task *task)
 
 	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
 	rqst->rq_bytes_sent = 0;
+
+	/* An RPC with no reply will throw off credit accounting,
+	 * so drop the connection to reset the credit grant.
+	 */
+	if (!rpc_reply_expected(task))
+		goto drop_connection;
 	return 0;
 
 failed_marshal:
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e6f84a6434a0..fe5eaca2d197 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -250,11 +250,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 		wait_for_completion(&ia->ri_remove_done);
 
 		ia->ri_id = NULL;
-		ia->ri_pd = NULL;
 		ia->ri_device = NULL;
 		/* Return 1 to ensure the core destroys the id. */
 		return 1;
 	case RDMA_CM_EVENT_ESTABLISHED:
+		++xprt->rx_xprt.connect_cookie;
 		connstate = 1;
 		rpcrdma_update_connect_private(xprt, &event->param.conn);
 		goto connected;
@@ -273,6 +273,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 			connstate = -EAGAIN;
 		goto connected;
 	case RDMA_CM_EVENT_DISCONNECTED:
+		++xprt->rx_xprt.connect_cookie;
 		connstate = -ECONNABORTED;
 connected:
 		xprt->rx_buf.rb_credits = 1;
@@ -445,7 +446,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
 		ia->ri_id->qp = NULL;
 	}
 	ib_free_cq(ep->rep_attr.recv_cq);
+	ep->rep_attr.recv_cq = NULL;
 	ib_free_cq(ep->rep_attr.send_cq);
+	ep->rep_attr.send_cq = NULL;
 
 	/* The ULP is responsible for ensuring all DMA
 	 * mappings and MRs are gone.
@@ -458,6 +461,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
 		rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
 	}
 	rpcrdma_mrs_destroy(buf);
+	ib_dealloc_pd(ia->ri_pd);
+	ia->ri_pd = NULL;
 
 	/* Allow waiters to continue */
 	complete(&ia->ri_remove_done);
@@ -589,11 +594,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
 	/* Client offers RDMA Read but does not initiate */
 	ep->rep_remote_cma.initiator_depth = 0;
-	if (ia->ri_device->attrs.max_qp_rd_atom > 32)	/* arbitrary but <= 255 */
-		ep->rep_remote_cma.responder_resources = 32;
-	else
-		ep->rep_remote_cma.responder_resources =
-						ia->ri_device->attrs.max_qp_rd_atom;
+	ep->rep_remote_cma.responder_resources =
+		min_t(int, U8_MAX, ia->ri_device->attrs.max_qp_rd_atom);
 
 	/* Limit transport retries so client can detect server
 	 * GID changes quickly. RPC layer handles re-establishing
@@ -628,14 +630,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 {
 	cancel_delayed_work_sync(&ep->rep_connect_worker);
 
-	if (ia->ri_id->qp) {
+	if (ia->ri_id && ia->ri_id->qp) {
 		rpcrdma_ep_disconnect(ep, ia);
 		rdma_destroy_qp(ia->ri_id);
 		ia->ri_id->qp = NULL;
 	}
 
-	ib_free_cq(ep->rep_attr.recv_cq);
-	ib_free_cq(ep->rep_attr.send_cq);
+	if (ep->rep_attr.recv_cq)
+		ib_free_cq(ep->rep_attr.recv_cq);
+	if (ep->rep_attr.send_cq)
+		ib_free_cq(ep->rep_attr.send_cq);
 }
 
 /* Re-establish a connection after a device removal event.
@@ -1024,7 +1028,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
 	LIST_HEAD(free);
 	LIST_HEAD(all);
 
-	for (count = 0; count < 32; count++) {
+	for (count = 0; count < 3; count++) {
 		struct rpcrdma_mr *mr;
 		int rc;
 
@@ -1049,8 +1053,9 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
 	list_splice(&all, &buf->rb_all);
 	r_xprt->rx_stats.mrs_allocated += count;
 	spin_unlock(&buf->rb_mrlock);
-
 	trace_xprtrdma_createmrs(r_xprt, count);
+
+	xprt_write_space(&r_xprt->rx_xprt);
 }
 
 static void
@@ -1068,17 +1073,27 @@ struct rpcrdma_req *
 rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
+	struct rpcrdma_regbuf *rb;
 	struct rpcrdma_req *req;
 
 	req = kzalloc(sizeof(*req), GFP_KERNEL);
 	if (req == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
+				  DMA_TO_DEVICE, GFP_KERNEL);
+	if (IS_ERR(rb)) {
+		kfree(req);
+		return ERR_PTR(-ENOMEM);
+	}
+	req->rl_rdmabuf = rb;
+	xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb));
+	req->rl_buffer = buffer;
+	INIT_LIST_HEAD(&req->rl_registered);
+
 	spin_lock(&buffer->rb_reqslock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
 	spin_unlock(&buffer->rb_reqslock);
-	req->rl_buffer = &r_xprt->rx_buf;
-	INIT_LIST_HEAD(&req->rl_registered);
 	return req;
 }
 
@@ -1535,7 +1550,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
 		struct rpcrdma_req *req)
 {
 	struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr;
-	struct ib_send_wr *send_wr_fail;
 	int rc;
 
 	if (req->rl_reply) {
@@ -1554,7 +1568,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
 		--ep->rep_send_count;
 	}
 
-	rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
+	rc = ia->ri_ops->ro_send(ia, req);
 	trace_xprtrdma_post_send(req, rc);
 	if (rc)
 		return -ENOTCONN;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 69883a960a3f..3d3b423fa9c1 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -334,8 +334,6 @@ enum {
 struct rpcrdma_buffer;
 struct rpcrdma_req {
 	struct list_head	rl_list;
-	int			rl_cpu;
-	unsigned int		rl_connect_cookie;
 	struct rpcrdma_buffer	*rl_buffer;
 	struct rpcrdma_rep	*rl_reply;
 	struct xdr_stream	rl_stream;
@@ -474,6 +472,8 @@ struct rpcrdma_memreg_ops {
 			(*ro_map)(struct rpcrdma_xprt *,
 				  struct rpcrdma_mr_seg *, int, bool,
 				  struct rpcrdma_mr **);
+	int		(*ro_send)(struct rpcrdma_ia *ia,
+				   struct rpcrdma_req *req);
 	void		(*ro_reminv)(struct rpcrdma_rep *rep,
 				     struct list_head *mrs);
 	void		(*ro_unmap_sync)(struct rpcrdma_xprt *,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 956e29c1438d..c8902f11efdd 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -527,6 +527,7 @@ static int xs_local_send_request(struct rpc_task *task)
 	xs_pktdump("packet data:",
 			req->rq_svec->iov_base, req->rq_svec->iov_len);
 
+	req->rq_xtime = ktime_get();
 	status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
 			      true, &sent);
 	dprintk("RPC:       %s(%u) = %d\n",
@@ -589,6 +590,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 
 	if (!xprt_bound(xprt))
 		return -ENOTCONN;
+	req->rq_xtime = ktime_get();
 	status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
 			      xdr, req->rq_bytes_sent, true, &sent);
 
@@ -678,6 +680,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	/* Continue transmitting the packet/record. We must be careful
 	 * to cope with writespace callbacks arriving _after_ we have
 	 * called sendmsg(). */
+	req->rq_xtime = ktime_get();
 	while (1) {
 		sent = 0;
 		status = xs_sendpages(transport->sock, NULL, 0, xdr,
@@ -1060,6 +1063,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
 	if (!rovr)
 		goto out_unlock;
 	xprt_pin_rqst(rovr);
+	xprt_update_rtt(rovr->rq_task);
 	spin_unlock(&xprt->recv_lock);
 	task = rovr->rq_task;
 
diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index 46d9cd62f781..aaabb0b776dd 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -59,7 +59,7 @@ static int __tipc_add_sock_diag(struct sk_buff *skb,
 	if (!nlh)
 		return -EMSGSIZE;
 
-	err = tipc_sk_fill_sock_diag(skb, tsk, req->tidiag_states,
+	err = tipc_sk_fill_sock_diag(skb, cb, tsk, req->tidiag_states,
 				     __tipc_diag_gen_cookie);
 	if (err)
 		return err;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cee6674a3bf4..1fd1c8b5ce03 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3257,8 +3257,8 @@ out:
 }
 EXPORT_SYMBOL(tipc_nl_sk_walk);
 
-int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
-			   u32 sk_filter_state,
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+			   struct tipc_sock *tsk, u32 sk_filter_state,
 			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
 {
 	struct sock *sk = &tsk->sk;
@@ -3280,7 +3280,7 @@ int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
 	    nla_put_u32(skb, TIPC_NLA_SOCK_TIPC_STATE, (u32)sk->sk_state) ||
 	    nla_put_u32(skb, TIPC_NLA_SOCK_INO, sock_i_ino(sk)) ||
 	    nla_put_u32(skb, TIPC_NLA_SOCK_UID,
-			from_kuid_munged(sk_user_ns(NETLINK_CB(skb).sk),
+			from_kuid_munged(sk_user_ns(NETLINK_CB(cb->skb).sk),
 					 sock_i_uid(sk))) ||
 	    nla_put_u64_64bit(skb, TIPC_NLA_SOCK_COOKIE,
 			      tipc_diag_gen_cookie(sk),
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index aae3fd4cd06c..aff9b2ae5a1f 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -61,8 +61,8 @@ int tipc_sk_rht_init(struct net *net);
 void tipc_sk_rht_destroy(struct net *net);
 int tipc_nl_sk_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_publ_dump(struct sk_buff *skb, struct netlink_callback *cb);
-int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct tipc_sock *tsk,
-			   u32 sk_filter_state,
+int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
+			   struct tipc_sock *tsk, u32 sk_filter_state,
 			   u64 (*tipc_diag_gen_cookie)(struct sock *sk));
 int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
 		    int (*skb_handler)(struct sk_buff *skb,
diff --git a/samples/Kconfig b/samples/Kconfig
index f524f551718e..3db002b9e1d3 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -62,6 +62,16 @@ config SAMPLE_KDB
 	  Build an example of how to dynamically add the hello
 	  command to the kdb shell.
 
+config SAMPLE_QMI_CLIENT
+	tristate "Build qmi client sample -- loadable modules only"
+	depends on m
+	depends on ARCH_QCOM
+	depends on NET
+	select QCOM_QMI_HELPERS
+	help
+	  Build an QMI client sample driver, which demonstrates how to
+	  communicate with a remote QRTR service, using QMI encoded messages.
+
 config SAMPLE_RPMSG_CLIENT
 	tristate "Build rpmsg client sample -- loadable modules only"
 	depends on RPMSG && m
diff --git a/samples/Makefile b/samples/Makefile
index 70cf3758dcf2..bd601c038b86 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,4 +3,4 @@
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
 			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
 			   configfs/ connector/ v4l/ trace_printk/ \
-			   vfio-mdev/ statx/
+			   vfio-mdev/ statx/ qmi/
diff --git a/samples/qmi/Makefile b/samples/qmi/Makefile
new file mode 100644
index 000000000000..2b111d2769df
--- /dev/null
+++ b/samples/qmi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi_sample_client.o
diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c
new file mode 100644
index 000000000000..c9e7276c3d83
--- /dev/null
+++ b/samples/qmi/qmi_sample_client.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sample in-kernel QMI client driver
+ *
+ * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2017 Linaro Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/qrtr.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/idr.h>
+#include <linux/string.h>
+#include <net/sock.h>
+#include <linux/soc/qcom/qmi.h>
+
+#define PING_REQ1_TLV_TYPE		0x1
+#define PING_RESP1_TLV_TYPE		0x2
+#define PING_OPT1_TLV_TYPE		0x10
+#define PING_OPT2_TLV_TYPE		0x11
+
+#define DATA_REQ1_TLV_TYPE		0x1
+#define DATA_RESP1_TLV_TYPE		0x2
+#define DATA_OPT1_TLV_TYPE		0x10
+#define DATA_OPT2_TLV_TYPE		0x11
+
+#define TEST_MED_DATA_SIZE_V01		8192
+#define TEST_MAX_NAME_SIZE_V01		255
+
+#define TEST_PING_REQ_MSG_ID_V01	0x20
+#define TEST_DATA_REQ_MSG_ID_V01	0x21
+
+#define TEST_PING_REQ_MAX_MSG_LEN_V01	266
+#define TEST_DATA_REQ_MAX_MSG_LEN_V01	8456
+
+struct test_name_type_v01 {
+	u32 name_len;
+	char name[TEST_MAX_NAME_SIZE_V01];
+};
+
+static struct qmi_elem_info test_name_type_v01_ei[] = {
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= QMI_COMMON_TLV_TYPE,
+		.offset		= offsetof(struct test_name_type_v01,
+					   name_len),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= TEST_MAX_NAME_SIZE_V01,
+		.elem_size	= sizeof(char),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= QMI_COMMON_TLV_TYPE,
+		.offset		= offsetof(struct test_name_type_v01,
+					   name),
+	},
+	{}
+};
+
+struct test_ping_req_msg_v01 {
+	char ping[4];
+
+	u8 client_name_valid;
+	struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_ping_req_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 4,
+		.elem_size	= sizeof(char),
+		.array_type	= STATIC_ARRAY,
+		.tlv_type	= PING_REQ1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_req_msg_v01,
+					   ping),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_req_msg_v01,
+					   client_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_req_msg_v01,
+					   client_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+struct test_ping_resp_msg_v01 {
+	struct qmi_response_type_v01 resp;
+
+	u8 pong_valid;
+	char pong[4];
+
+	u8 service_name_valid;
+	struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_ping_resp_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_RESP1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   pong_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 4,
+		.elem_size	= sizeof(char),
+		.array_type	= STATIC_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   pong),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   service_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   service_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+struct test_data_req_msg_v01 {
+	u32 data_len;
+	u8 data[TEST_MED_DATA_SIZE_V01];
+
+	u8 client_name_valid;
+	struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_data_req_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u32),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_REQ1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   data_len),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= TEST_MED_DATA_SIZE_V01,
+		.elem_size	= sizeof(u8),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= DATA_REQ1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   data),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   client_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   client_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+struct test_data_resp_msg_v01 {
+	struct qmi_response_type_v01 resp;
+
+	u8 data_valid;
+	u32 data_len;
+	u8 data[TEST_MED_DATA_SIZE_V01];
+
+	u8 service_name_valid;
+	struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_data_resp_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_RESP1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   data_valid),
+	},
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u32),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   data_len),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= TEST_MED_DATA_SIZE_V01,
+		.elem_size	= sizeof(u8),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   data),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   service_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   service_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+/*
+ * ping_write() - ping_pong debugfs file write handler
+ * @file:	debugfs file context
+ * @user_buf:	reference to the user data (ignored)
+ * @count:	number of bytes in @user_buf
+ * @ppos:	offset in @file to write
+ *
+ * This function allows user space to send out a ping_pong QMI encoded message
+ * to the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to provide a custom response
+ * handler.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t ping_write(struct file *file, const char __user *user_buf,
+			  size_t count, loff_t *ppos)
+{
+	struct qmi_handle *qmi = file->private_data;
+	struct test_ping_req_msg_v01 req = {};
+	struct qmi_txn txn;
+	int ret;
+
+	memcpy(req.ping, "ping", sizeof(req.ping));
+
+	ret = qmi_txn_init(qmi, &txn, NULL, NULL);
+	if (ret < 0)
+		return ret;
+
+	ret = qmi_send_request(qmi, NULL, &txn,
+			       TEST_PING_REQ_MSG_ID_V01,
+			       TEST_PING_REQ_MAX_MSG_LEN_V01,
+			       test_ping_req_msg_v01_ei, &req);
+	if (ret < 0) {
+		qmi_txn_cancel(&txn);
+		return ret;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0)
+		count = ret;
+
+	return count;
+}
+
+static const struct file_operations ping_fops = {
+	.open = simple_open,
+	.write = ping_write,
+};
+
+static void ping_pong_cb(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
+			 struct qmi_txn *txn, const void *data)
+{
+	const struct test_ping_resp_msg_v01 *resp = data;
+
+	if (!txn) {
+		pr_err("spurious ping response\n");
+		return;
+	}
+
+	if (resp->resp.result == QMI_RESULT_FAILURE_V01)
+		txn->result = -ENXIO;
+	else if (!resp->pong_valid || memcmp(resp->pong, "pong", 4))
+		txn->result = -EINVAL;
+
+	complete(&txn->completion);
+}
+
+/*
+ * data_write() - data debugfs file write handler
+ * @file:	debugfs file context
+ * @user_buf:	reference to the user data
+ * @count:	number of bytes in @user_buf
+ * @ppos:	offset in @file to write
+ *
+ * This function allows user space to send out a data QMI encoded message to
+ * the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to have the QMI helpers decode a
+ * transaction response into a provided object automatically.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t data_write(struct file *file, const char __user *user_buf,
+			  size_t count, loff_t *ppos)
+
+{
+	struct qmi_handle *qmi = file->private_data;
+	struct test_data_resp_msg_v01 *resp;
+	struct test_data_req_msg_v01 *req;
+	struct qmi_txn txn;
+	int ret;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+	if (!resp) {
+		kfree(req);
+		return -ENOMEM;
+	}
+
+	req->data_len = min_t(size_t, sizeof(req->data), count);
+	if (copy_from_user(req->data, user_buf, req->data_len)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = qmi_txn_init(qmi, &txn, test_data_resp_msg_v01_ei, resp);
+	if (ret < 0)
+		goto out;
+
+	ret = qmi_send_request(qmi, NULL, &txn,
+			       TEST_DATA_REQ_MSG_ID_V01,
+			       TEST_DATA_REQ_MAX_MSG_LEN_V01,
+			       test_data_req_msg_v01_ei, req);
+	if (ret < 0) {
+		qmi_txn_cancel(&txn);
+		goto out;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0) {
+		goto out;
+	} else if (!resp->data_valid ||
+		   resp->data_len != req->data_len ||
+		   memcmp(resp->data, req->data, req->data_len)) {
+		pr_err("response data doesn't match expectation\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = count;
+
+out:
+	kfree(resp);
+	kfree(req);
+
+	return ret;
+}
+
+static const struct file_operations data_fops = {
+	.open = simple_open,
+	.write = data_write,
+};
+
+static struct qmi_msg_handler qmi_sample_handlers[] = {
+	{
+		.type = QMI_RESPONSE,
+		.msg_id = TEST_PING_REQ_MSG_ID_V01,
+		.ei = test_ping_resp_msg_v01_ei,
+		.decoded_size = sizeof(struct test_ping_req_msg_v01),
+		.fn = ping_pong_cb
+	},
+	{}
+};
+
+struct qmi_sample {
+	struct qmi_handle qmi;
+
+	struct dentry *de_dir;
+	struct dentry *de_data;
+	struct dentry *de_ping;
+};
+
+static struct dentry *qmi_debug_dir;
+
+static int qmi_sample_probe(struct platform_device *pdev)
+{
+	struct sockaddr_qrtr *sq;
+	struct qmi_sample *sample;
+	char path[20];
+	int ret;
+
+	sample = devm_kzalloc(&pdev->dev, sizeof(*sample), GFP_KERNEL);
+	if (!sample)
+		return -ENOMEM;
+
+	ret = qmi_handle_init(&sample->qmi, TEST_DATA_REQ_MAX_MSG_LEN_V01,
+			      NULL,
+			      qmi_sample_handlers);
+	if (ret < 0)
+		return ret;
+
+	sq = dev_get_platdata(&pdev->dev);
+	ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq,
+			     sizeof(*sq), 0);
+	if (ret < 0) {
+		pr_err("failed to connect to remote service port\n");
+		goto err_release_qmi_handle;
+	}
+
+	snprintf(path, sizeof(path), "%d:%d", sq->sq_node, sq->sq_port);
+
+	sample->de_dir = debugfs_create_dir(path, qmi_debug_dir);
+	if (IS_ERR(sample->de_dir)) {
+		ret = PTR_ERR(sample->de_dir);
+		goto err_release_qmi_handle;
+	}
+
+	sample->de_data = debugfs_create_file("data", 0600, sample->de_dir,
+					      sample, &data_fops);
+	if (IS_ERR(sample->de_data)) {
+		ret = PTR_ERR(sample->de_data);
+		goto err_remove_de_dir;
+	}
+
+	sample->de_ping = debugfs_create_file("ping", 0600, sample->de_dir,
+					      sample, &ping_fops);
+	if (IS_ERR(sample->de_ping)) {
+		ret = PTR_ERR(sample->de_ping);
+		goto err_remove_de_data;
+	}
+
+	platform_set_drvdata(pdev, sample);
+
+	return 0;
+
+err_remove_de_data:
+	debugfs_remove(sample->de_data);
+err_remove_de_dir:
+	debugfs_remove(sample->de_dir);
+err_release_qmi_handle:
+	qmi_handle_release(&sample->qmi);
+
+	return ret;
+}
+
+static int qmi_sample_remove(struct platform_device *pdev)
+{
+	struct qmi_sample *sample = platform_get_drvdata(pdev);
+
+	debugfs_remove(sample->de_ping);
+	debugfs_remove(sample->de_data);
+	debugfs_remove(sample->de_dir);
+
+	qmi_handle_release(&sample->qmi);
+
+	return 0;
+}
+
+static struct platform_driver qmi_sample_driver = {
+	.probe = qmi_sample_probe,
+	.remove = qmi_sample_remove,
+	.driver = {
+		.name = "qmi_sample_client",
+	},
+};
+
+static int qmi_sample_new_server(struct qmi_handle *qmi,
+				 struct qmi_service *service)
+{
+	struct platform_device *pdev;
+	struct sockaddr_qrtr sq = { AF_QIPCRTR, service->node, service->port };
+	int ret;
+
+	pdev = platform_device_alloc("qmi_sample_client", PLATFORM_DEVID_AUTO);
+	if (!pdev)
+		return -ENOMEM;
+
+	ret = platform_device_add_data(pdev, &sq, sizeof(sq));
+	if (ret)
+		goto err_put_device;
+
+	ret = platform_device_add(pdev);
+	if (ret)
+		goto err_put_device;
+
+	service->priv = pdev;
+
+	return 0;
+
+err_put_device:
+	platform_device_put(pdev);
+
+	return ret;
+}
+
+static void qmi_sample_del_server(struct qmi_handle *qmi,
+				  struct qmi_service *service)
+{
+	struct platform_device *pdev = service->priv;
+
+	platform_device_unregister(pdev);
+}
+
+static struct qmi_handle lookup_client;
+
+static struct qmi_ops lookup_ops = {
+	.new_server = qmi_sample_new_server,
+	.del_server = qmi_sample_del_server,
+};
+
+static int qmi_sample_init(void)
+{
+	int ret;
+
+	qmi_debug_dir = debugfs_create_dir("qmi_sample", NULL);
+	if (IS_ERR(qmi_debug_dir)) {
+		pr_err("failed to create qmi_sample dir\n");
+		return PTR_ERR(qmi_debug_dir);
+	}
+
+	ret = platform_driver_register(&qmi_sample_driver);
+	if (ret)
+		goto err_remove_debug_dir;
+
+	ret = qmi_handle_init(&lookup_client, 0, &lookup_ops, NULL);
+	if (ret < 0)
+		goto err_unregister_driver;
+
+	qmi_add_lookup(&lookup_client, 15, 0, 0);
+
+	return 0;
+
+err_unregister_driver:
+	platform_driver_unregister(&qmi_sample_driver);
+err_remove_debug_dir:
+	debugfs_remove(qmi_debug_dir);
+
+	return ret;
+}
+
+static void qmi_sample_exit(void)
+{
+	qmi_handle_release(&lookup_client);
+
+	platform_driver_unregister(&qmi_sample_driver);
+
+	debugfs_remove(qmi_debug_dir);
+}
+
+module_init(qmi_sample_init);
+module_exit(qmi_sample_exit);
+
+MODULE_DESCRIPTION("Sample QMI client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
index d84a5674e95e..a923f05edb36 100755
--- a/scripts/bloat-o-meter
+++ b/scripts/bloat-o-meter
@@ -30,8 +30,8 @@ def getsizes(file, format):
             if type in format:
                 # strip generated symbols
                 if name.startswith("__mod_"): continue
-                if name.startswith("SyS_"): continue
-                if name.startswith("compat_SyS_"): continue
+                if name.startswith("__se_sys"): continue
+                if name.startswith("__se_compat_sys"): continue
                 if name == "linux_banner": continue
                 # statics and some other optimizations adds random .NUMBER
                 name = re_NUMBER.sub('', name)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 764ffd1bb1c5..e16d6713f236 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -791,7 +791,8 @@ our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)};
 our $declaration_macros = qr{(?x:
 	(?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(|
 	(?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(|
-	(?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
+	(?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(|
+	(?:SKCIPHER_REQUEST|SHASH_DESC|AHASH_REQUEST)_ON_STACK\s*\(
 )};
 
 sub deparenthesize {
@@ -1075,7 +1076,7 @@ sub parse_email {
 	} elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
 		$address = $1;
 		$comment = $2 if defined $2;
-		$formatted_email =~ s/$address.*$//;
+		$formatted_email =~ s/\Q$address\E.*$//;
 		$name = $formatted_email;
 		$name = trim($name);
 		$name =~ s/^\"|\"$//g;
@@ -1217,7 +1218,7 @@ sub sanitise_line {
 	for ($off = 1; $off < length($line); $off++) {
 		$c = substr($line, $off, 1);
 
-		# Comments we are wacking completly including the begin
+		# Comments we are whacking completely including the begin
 		# and end, all to $;.
 		if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
 			$sanitise_quote = '*/';
@@ -1297,6 +1298,7 @@ sub sanitise_line {
 sub get_quoted_string {
 	my ($line, $rawline) = @_;
 
+	return "" if (!defined($line) || !defined($rawline));
 	return "" if ($line !~ m/($String)/g);
 	return substr($rawline, $-[0], $+[0] - $-[0]);
 }
@@ -1644,6 +1646,28 @@ sub raw_line {
 	return $line;
 }
 
+sub get_stat_real {
+	my ($linenr, $lc) = @_;
+
+	my $stat_real = raw_line($linenr, 0);
+	for (my $count = $linenr + 1; $count <= $lc; $count++) {
+		$stat_real = $stat_real . "\n" . raw_line($count, 0);
+	}
+
+	return $stat_real;
+}
+
+sub get_stat_here {
+	my ($linenr, $cnt, $here) = @_;
+
+	my $herectx = $here . "\n";
+	for (my $n = 0; $n < $cnt; $n++) {
+		$herectx .= raw_line($linenr, $n) . "\n";
+	}
+
+	return $herectx;
+}
+
 sub cat_vet {
 	my ($vet) = @_;
 	my ($res, $coded);
@@ -2257,6 +2281,8 @@ sub process {
 
 	my $camelcase_file_seeded = 0;
 
+	my $checklicenseline = 1;
+
 	sanitise_line_reset();
 	my $line;
 	foreach my $rawline (@rawlines) {
@@ -2448,6 +2474,7 @@ sub process {
 			} else {
 				$check = $check_orig;
 			}
+			$checklicenseline = 1;
 			next;
 		}
 
@@ -2911,6 +2938,30 @@ sub process {
 			}
 		}
 
+# check for using SPDX license tag at beginning of files
+		if ($realline == $checklicenseline) {
+			if ($rawline =~ /^[ \+]\s*\#\!\s*\//) {
+				$checklicenseline = 2;
+			} elsif ($rawline =~ /^\+/) {
+				my $comment = "";
+				if ($realfile =~ /\.(h|s|S)$/) {
+					$comment = '/*';
+				} elsif ($realfile =~ /\.(c|dts|dtsi)$/) {
+					$comment = '//';
+				} elsif (($checklicenseline == 2) || $realfile =~ /\.(sh|pl|py|awk|tc)$/) {
+					$comment = '#';
+				} elsif ($realfile =~ /\.rst$/) {
+					$comment = '..';
+				}
+
+				if ($comment !~ /^$/ &&
+				    $rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) {
+					WARN("SPDX_LICENSE_TAG",
+					     "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+				}
+			}
+		}
+
 # check we are in a valid source file if not then ignore this hunk
 		next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/);
 
@@ -3011,6 +3062,12 @@ sub process {
 			}
 		}
 
+# check for assignments on the start of a line
+		if ($sline =~ /^\+\s+($Assignment)[^=]/) {
+			CHK("ASSIGNMENT_CONTINUATIONS",
+			    "Assignment operator '$1' should be on the previous line\n" . $hereprev);
+		}
+
 # check for && or || at the start of a line
 		if ($rawline =~ /^\+\s*(&&|\|\|)/) {
 			CHK("LOGICAL_CONTINUATIONS",
@@ -4032,7 +4089,7 @@ sub process {
 			my ($where, $prefix) = ($-[1], $1);
 			if ($prefix !~ /$Type\s+$/ &&
 			    ($where != 0 || $prefix !~ /^.\s+$/) &&
-			    $prefix !~ /[{,]\s+$/) {
+			    $prefix !~ /[{,:]\s+$/) {
 				if (ERROR("BRACKET_SPACE",
 					  "space prohibited before open square bracket '['\n" . $herecurr) &&
 				    $fix) {
@@ -4928,12 +4985,8 @@ sub process {
 			#print "REST<$rest> dstat<$dstat> ctx<$ctx>\n";
 
 			$ctx =~ s/\n*$//;
-			my $herectx = $here . "\n";
 			my $stmt_cnt = statement_rawlines($ctx);
-
-			for (my $n = 0; $n < $stmt_cnt; $n++) {
-				$herectx .= raw_line($linenr, $n) . "\n";
-			}
+			my $herectx = get_stat_here($linenr, $stmt_cnt, $here);
 
 			if ($dstat ne '' &&
 			    $dstat !~ /^(?:$Ident|-?$Constant),$/ &&			# 10, // foo(),
@@ -5005,12 +5058,9 @@ sub process {
 # check for macros with flow control, but without ## concatenation
 # ## concatenation is commonly a macro that defines a function so ignore those
 			if ($has_flow_statement && !$has_arg_concat) {
-				my $herectx = $here . "\n";
 				my $cnt = statement_rawlines($ctx);
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
 				WARN("MACRO_WITH_FLOW_CONTROL",
 				     "Macros with flow control statements should be avoided\n" . "$herectx");
 			}
@@ -5050,11 +5100,7 @@ sub process {
 
 				$ctx =~ s/\n*$//;
 				my $cnt = statement_rawlines($ctx);
-				my $herectx = $here . "\n";
-
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
 				if (($stmts =~ tr/;/;/) == 1 &&
 				    $stmts !~ /^\s*(if|while|for|switch)\b/) {
@@ -5068,11 +5114,7 @@ sub process {
 			} elsif ($dstat =~ /^\+\s*#\s*define\s+$Ident.*;\s*$/) {
 				$ctx =~ s/\n*$//;
 				my $cnt = statement_rawlines($ctx);
-				my $herectx = $here . "\n";
-
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
 				WARN("TRAILING_SEMICOLON",
 				     "macros should not use a trailing semicolon\n" . "$herectx");
@@ -5195,12 +5237,8 @@ sub process {
 				}
 			}
 			if ($level == 0 && $block =~ /^\s*\{/ && !$allowed) {
-				my $herectx = $here . "\n";
 				my $cnt = statement_rawlines($block);
-
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
 				WARN("BRACES",
 				     "braces {} are not necessary for single statement blocks\n" . $herectx);
@@ -5776,36 +5814,50 @@ sub process {
 			}
 		}
 
-		# check for vsprintf extension %p<foo> misuses
+# check for vsprintf extension %p<foo> misuses
 		if ($^V && $^V ge 5.10.0 &&
 		    defined $stat &&
 		    $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s &&
 		    $1 !~ /^_*volatile_*$/) {
-			my $bad_extension = "";
+			my $specifier;
+			my $extension;
+			my $bad_specifier = "";
+			my $stat_real;
+
 			my $lc = $stat =~ tr@\n@@;
 			$lc = $lc + $linenr;
 		        for (my $count = $linenr; $count <= $lc; $count++) {
 				my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0));
 				$fmt =~ s/%%//g;
-				if ($fmt =~ /(\%[\*\d\.]*p(?![\WSsBKRraEhMmIiUDdgVCbGNOx]).)/) {
-					$bad_extension = $1;
-					last;
-				}
-			}
-			if ($bad_extension ne "") {
-				my $stat_real = raw_line($linenr, 0);
-				my $ext_type = "Invalid";
-				my $use = "";
-				for (my $count = $linenr + 1; $count <= $lc; $count++) {
-					$stat_real = $stat_real . "\n" . raw_line($count, 0);
+
+				while ($fmt =~ /(\%[\*\d\.]*p(\w))/g) {
+					$specifier = $1;
+					$extension = $2;
+					if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOx]/) {
+						$bad_specifier = $specifier;
+						last;
+					}
+					if ($extension eq "x" && !defined($stat_real)) {
+						if (!defined($stat_real)) {
+							$stat_real = get_stat_real($linenr, $lc);
+						}
+						WARN("VSPRINTF_SPECIFIER_PX",
+						     "Using vsprintf specifier '\%px' potentially exposes the kernel memory layout, if you don't really need the address please consider using '\%p'.\n" . "$here\n$stat_real\n");
+					}
 				}
-				if ($bad_extension =~ /p[Ff]/) {
-					$ext_type = "Deprecated";
-					$use = " - use %pS instead";
-					$use =~ s/pS/ps/ if ($bad_extension =~ /pf/);
+				if ($bad_specifier ne "") {
+					my $stat_real = get_stat_real($linenr, $lc);
+					my $ext_type = "Invalid";
+					my $use = "";
+					if ($bad_specifier =~ /p[Ff]/) {
+						$ext_type = "Deprecated";
+						$use = " - use %pS instead";
+						$use =~ s/pS/ps/ if ($bad_specifier =~ /pf/);
+					}
+
+					WARN("VSPRINTF_POINTER_EXTENSION",
+					     "$ext_type vsprintf pointer extension '$bad_specifier'$use\n" . "$here\n$stat_real\n");
 				}
-				WARN("VSPRINTF_POINTER_EXTENSION",
-				     "$ext_type vsprintf pointer extension '$bad_extension'$use\n" . "$here\n$stat_real\n");
 			}
 		}
 
@@ -5918,10 +5970,7 @@ sub process {
 		     $stat !~ /(?:$Compare)\s*\bsscanf\s*$balanced_parens/)) {
 			my $lc = $stat =~ tr@\n@@;
 			$lc = $lc + $linenr;
-			my $stat_real = raw_line($linenr, 0);
-		        for (my $count = $linenr + 1; $count <= $lc; $count++) {
-				$stat_real = $stat_real . "\n" . raw_line($count, 0);
-			}
+			my $stat_real = get_stat_real($linenr, $lc);
 			WARN("NAKED_SSCANF",
 			     "unchecked sscanf return value\n" . "$here\n$stat_real\n");
 		}
@@ -5932,10 +5981,7 @@ sub process {
 		    $line =~ /\bsscanf\b/) {
 			my $lc = $stat =~ tr@\n@@;
 			$lc = $lc + $linenr;
-			my $stat_real = raw_line($linenr, 0);
-		        for (my $count = $linenr + 1; $count <= $lc; $count++) {
-				$stat_real = $stat_real . "\n" . raw_line($count, 0);
-			}
+			my $stat_real = get_stat_real($linenr, $lc);
 			if ($stat_real =~ /\bsscanf\b\s*\(\s*$FuncArg\s*,\s*("[^"]+")/) {
 				my $format = $6;
 				my $count = $format =~ tr@%@%@;
@@ -6065,12 +6111,9 @@ sub process {
 			}
 			if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
 			    !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
-				my $ctx = '';
-				my $herectx = $here . "\n";
 				my $cnt = statement_rawlines($stat);
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
+
 				if (WARN("ALLOC_WITH_MULTIPLY",
 					 "Prefer $newfunc over $oldfunc with multiply\n" . $herectx) &&
 				    $cnt == 1 &&
@@ -6153,12 +6196,9 @@ sub process {
 		if ($^V && $^V ge 5.10.0 &&
 		    defined $stat &&
 		    $stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
-			my $ctx = '';
-			my $herectx = $here . "\n";
 			my $cnt = statement_rawlines($stat);
-			for (my $n = 0; $n < $cnt; $n++) {
-				$herectx .= raw_line($linenr, $n) . "\n";
-			}
+			my $herectx = get_stat_here($linenr, $cnt, $here);
+
 			WARN("DEFAULT_NO_BREAK",
 			     "switch default: should use break\n" . $herectx);
 		}
@@ -6211,6 +6251,12 @@ sub process {
 			}
 		}
 
+# check for bool bitfields
+		if ($sline =~ /^.\s+bool\s*$Ident\s*:\s*\d+\s*;/) {
+			WARN("BOOL_BITFIELD",
+			     "Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>\n" . $herecurr);
+		}
+
 # check for semaphores initialized locked
 		if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
 			WARN("CONSIDER_COMPLETION",
@@ -6369,10 +6415,7 @@ sub process {
 
 				my $lc = $stat =~ tr@\n@@;
 				$lc = $lc + $linenr;
-				my $stat_real = raw_line($linenr, 0);
-				for (my $count = $linenr + 1; $count <= $lc; $count++) {
-					$stat_real = $stat_real . "\n" . raw_line($count, 0);
-				}
+				my $stat_real = get_stat_real($linenr, $lc);
 
 				my $skip_args = "";
 				if ($arg_pos > 1) {
@@ -6398,7 +6441,7 @@ sub process {
 		}
 
 # check for uses of S_<PERMS> that could be octal for readability
-		if ($line =~ /\b($multi_mode_perms_string_search)\b/) {
+		while ($line =~ m{\b($multi_mode_perms_string_search)\b}g) {
 			my $oval = $1;
 			my $octal = perms_to_octal($oval);
 			if (WARN("SYMBOLIC_PERMS",
diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris
deleted file mode 120000
index 736d998ba506..000000000000
--- a/scripts/dtc/include-prefixes/cris
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/cris/boot/dts
-\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag
deleted file mode 120000
index 87a3c847db8f..000000000000
--- a/scripts/dtc/include-prefixes/metag
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/metag/boot/dts
-\ No newline at end of file
diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c
index c4a345c3715b..6d5bbd31db7f 100644
--- a/scripts/gcc-plugins/randomize_layout_plugin.c
+++ b/scripts/gcc-plugins/randomize_layout_plugin.c
@@ -52,8 +52,8 @@ static const struct whitelist_entry whitelist[] = {
 	{ "net/unix/af_unix.c", "unix_skb_parms", "char" },
 	/* big_key payload.data struct splashing */
 	{ "security/keys/big_key.c", "path", "void *" },
-	/* walk struct security_hook_heads as an array of struct list_head */
-	{ "security/security.c", "list_head", "security_hook_heads" },
+	/* walk struct security_hook_heads as an array of struct hlist_head */
+	{ "security/security.c", "hlist_head", "security_hook_heads" },
 	{ }
 };
 
diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl
index bc5788000018..6a897788f5a7 100755
--- a/scripts/leaking_addresses.pl
+++ b/scripts/leaking_addresses.pl
@@ -3,15 +3,20 @@
 # (c) 2017 Tobin C. Harding <me@tobin.cc>
 # Licensed under the terms of the GNU GPL License version 2
 #
-# leaking_addresses.pl: Scan 64 bit kernel for potential leaking addresses.
+# leaking_addresses.pl: Scan the kernel for potential leaking addresses.
 #  - Scans dmesg output.
 #  - Walks directory tree and parses each file (for each directory in @DIRS).
 #
 # Use --debug to output path before parsing, this is useful to find files that
 # cause the script to choke.
+
 #
-# You may like to set kptr_restrict=2 before running script
-# (see Documentation/sysctl/kernel.txt).
+# When the system is idle it is likely that most files under /proc/PID will be
+# identical for various processes.  Scanning _all_ the PIDs under /proc is
+# unnecessary and implies that we are thoroughly scanning /proc.  This is _not_
+# the case because there may be ways userspace can trigger creation of /proc
+# files that leak addresses but were not present during a scan.  For these two
+# reasons we exclude all PID directories under /proc except '1/'
 
 use warnings;
 use strict;
@@ -22,9 +27,10 @@ use Cwd 'abs_path';
 use Term::ANSIColor qw(:constants);
 use Getopt::Long qw(:config no_auto_abbrev);
 use Config;
+use bigint qw/hex/;
+use feature 'state';
 
 my $P = $0;
-my $V = '0.01';
 
 # Directories to scan.
 my @DIRS = ('/proc', '/sys');
@@ -32,10 +38,9 @@ my @DIRS = ('/proc', '/sys');
 # Timer for parsing each file, in seconds.
 my $TIMEOUT = 10;
 
-# Script can only grep for kernel addresses on the following architectures. If
-# your architecture is not listed here and has a grep'able kernel address please
-# consider submitting a patch.
-my @SUPPORTED_ARCHITECTURES = ('x86_64', 'ppc64');
+# Kernel addresses vary by architecture.  We can only auto-detect the following
+# architectures (using `uname -m`).  (flag --32-bit overrides auto-detection.)
+my @SUPPORTED_ARCHITECTURES = ('x86_64', 'ppc64', 'x86');
 
 # Command line options.
 my $help = 0;
@@ -43,46 +48,34 @@ my $debug = 0;
 my $raw = 0;
 my $output_raw = "";	# Write raw results to file.
 my $input_raw = "";	# Read raw results from file instead of scanning.
-
 my $suppress_dmesg = 0;		# Don't show dmesg in output.
 my $squash_by_path = 0;		# Summary report grouped by absolute path.
 my $squash_by_filename = 0;	# Summary report grouped by filename.
-
-# Do not parse these files (absolute path).
-my @skip_parse_files_abs = ('/proc/kmsg',
-			    '/proc/kcore',
-			    '/proc/fs/ext4/sdb1/mb_groups',
-			    '/proc/1/fd/3',
-			    '/sys/firmware/devicetree',
-			    '/proc/device-tree',
-			    '/sys/kernel/debug/tracing/trace_pipe',
-			    '/sys/kernel/security/apparmor/revision');
-
-# Do not parse these files under any subdirectory.
-my @skip_parse_files_any = ('0',
-			    '1',
-			    '2',
-			    'pagemap',
-			    'events',
-			    'access',
-			    'registers',
-			    'snapshot_raw',
-			    'trace_pipe_raw',
-			    'ptmx',
-			    'trace_pipe');
-
-# Do not walk these directories (absolute path).
-my @skip_walk_dirs_abs = ();
-
-# Do not walk these directories under any subdirectory.
-my @skip_walk_dirs_any = ('self',
-			  'thread-self',
-			  'cwd',
-			  'fd',
-			  'usbmon',
-			  'stderr',
-			  'stdin',
-			  'stdout');
+my $kernel_config_file = "";	# Kernel configuration file.
+my $opt_32bit = 0;		# Scan 32-bit kernel.
+my $page_offset_32bit = 0;	# Page offset for 32-bit kernel.
+
+# Skip these absolute paths.
+my @skip_abs = (
+	'/proc/kmsg',
+	'/proc/device-tree',
+	'/proc/1/syscall',
+	'/sys/firmware/devicetree',
+	'/sys/kernel/debug/tracing/trace_pipe',
+	'/sys/kernel/security/apparmor/revision');
+
+# Skip these under any subdirectory.
+my @skip_any = (
+	'pagemap',
+	'events',
+	'access',
+	'registers',
+	'snapshot_raw',
+	'trace_pipe_raw',
+	'ptmx',
+	'trace_pipe',
+	'fd',
+	'usbmon');
 
 sub help
 {
@@ -91,31 +84,22 @@ sub help
 	print << "EOM";
 
 Usage: $P [OPTIONS]
-Version: $V
 
 Options:
 
-	-o, --output-raw=<file>  Save results for future processing.
-	-i, --input-raw=<file>   Read results from file instead of scanning.
-	    --raw                Show raw results (default).
-	    --suppress-dmesg     Do not show dmesg results.
-	    --squash-by-path     Show one result per unique path.
-	    --squash-by-filename Show one result per unique filename.
-	-d, --debug              Display debugging output.
-	-h, --help, --version    Display this help and exit.
-
-Examples:
-
-	# Scan kernel and dump raw results.
-	$0
-
-	# Scan kernel and save results to file.
-	$0 --output-raw scan.out
+	-o, --output-raw=<file>		Save results for future processing.
+	-i, --input-raw=<file>		Read results from file instead of scanning.
+	      --raw			Show raw results (default).
+	      --suppress-dmesg		Do not show dmesg results.
+	      --squash-by-path		Show one result per unique path.
+	      --squash-by-filename	Show one result per unique filename.
+	--kernel-config-file=<file>     Kernel configuration file (e.g /boot/config)
+	--32-bit			Scan 32-bit kernel.
+	--page-offset-32-bit=o		Page offset (for 32-bit kernel 0xABCD1234).
+	-d, --debug			Display debugging output.
+	-h, --help, --version		Display this help and exit.
 
-	# View summary report.
-	$0 --input-raw scan.out --squash-by-filename
-
-Scans the running (64 bit) kernel for potential leaking addresses.
+Scans the running kernel for potential leaking addresses.
 
 EOM
 	exit($exitcode);
@@ -131,6 +115,9 @@ GetOptions(
 	'squash-by-path'        => \$squash_by_path,
 	'squash-by-filename'    => \$squash_by_filename,
 	'raw'                   => \$raw,
+	'kernel-config-file=s'	=> \$kernel_config_file,
+	'32-bit'		=> \$opt_32bit,
+	'page-offset-32-bit=o'	=> \$page_offset_32bit,
 ) or help(1);
 
 help(0) if ($help);
@@ -146,16 +133,19 @@ if (!$input_raw and ($squash_by_path or $squash_by_filename)) {
 	exit(128);
 }
 
-if (!is_supported_architecture()) {
+if (!(is_supported_architecture() or $opt_32bit or $page_offset_32bit)) {
 	printf "\nScript does not support your architecture, sorry.\n";
 	printf "\nCurrently we support: \n\n";
 	foreach(@SUPPORTED_ARCHITECTURES) {
 		printf "\t%s\n", $_;
 	}
+	printf("\n");
+
+	printf("If you are running a 32-bit architecture you may use:\n");
+	printf("\n\t--32-bit or --page-offset-32-bit=<page offset>\n\n");
 
-	my $archname = $Config{archname};
-	printf "\n\$ perl -MConfig -e \'print \"\$Config{archname}\\n\"\'\n";
-	printf "%s\n", $archname;
+	my $archname = `uname -m`;
+	printf("Machine hardware name (`uname -m`): %s\n", $archname);
 
 	exit(129);
 }
@@ -177,49 +167,183 @@ sub dprint
 
 sub is_supported_architecture
 {
-	return (is_x86_64() or is_ppc64());
+	return (is_x86_64() or is_ppc64() or is_ix86_32());
 }
 
-sub is_x86_64
+sub is_32bit
 {
-	my $archname = $Config{archname};
-
-	if ($archname =~ m/x86_64/) {
+	# Allow --32-bit or --page-offset-32-bit to override
+	if ($opt_32bit or $page_offset_32bit) {
 		return 1;
 	}
-	return 0;
+
+	return is_ix86_32();
+}
+
+sub is_ix86_32
+{
+       state $arch = `uname -m`;
+
+       chomp $arch;
+       if ($arch =~ m/i[3456]86/) {
+               return 1;
+       }
+       return 0;
+}
+
+sub is_arch
+{
+       my ($desc) = @_;
+       my $arch = `uname -m`;
+
+       chomp $arch;
+       if ($arch eq $desc) {
+               return 1;
+       }
+       return 0;
+}
+
+sub is_x86_64
+{
+	state $is = is_arch('x86_64');
+	return $is;
 }
 
 sub is_ppc64
 {
-	my $archname = $Config{archname};
+	state $is = is_arch('ppc64');
+	return $is;
+}
 
-	if ($archname =~ m/powerpc/ and $archname =~ m/64/) {
-		return 1;
+# Gets config option value from kernel config file.
+# Returns "" on error or if config option not found.
+sub get_kernel_config_option
+{
+	my ($option) = @_;
+	my $value = "";
+	my $tmp_file = "";
+	my @config_files;
+
+	# Allow --kernel-config-file to override.
+	if ($kernel_config_file ne "") {
+		@config_files = ($kernel_config_file);
+	} elsif (-R "/proc/config.gz") {
+		my $tmp_file = "/tmp/tmpkconf";
+
+		if (system("gunzip < /proc/config.gz > $tmp_file")) {
+			dprint "$0: system(gunzip < /proc/config.gz) failed\n";
+			return "";
+		} else {
+			@config_files = ($tmp_file);
+		}
+	} else {
+		my $file = '/boot/config-' . `uname -r`;
+		chomp $file;
+		@config_files = ($file, '/boot/config');
 	}
-	return 0;
+
+	foreach my $file (@config_files) {
+		dprint("parsing config file: %s\n", $file);
+		$value = option_from_file($option, $file);
+		if ($value ne "") {
+			last;
+		}
+	}
+
+	if ($tmp_file ne "") {
+		system("rm -f $tmp_file");
+	}
+
+	return $value;
+}
+
+# Parses $file and returns kernel configuration option value.
+sub option_from_file
+{
+	my ($option, $file) = @_;
+	my $str = "";
+	my $val = "";
+
+	open(my $fh, "<", $file) or return "";
+	while (my $line = <$fh> ) {
+		if ($line =~ /^$option/) {
+			($str, $val) = split /=/, $line;
+			chomp $val;
+			last;
+		}
+	}
+
+	close $fh;
+	return $val;
 }
 
 sub is_false_positive
 {
 	my ($match) = @_;
 
+	if (is_32bit()) {
+		return is_false_positive_32bit($match);
+	}
+
+	# 64 bit false positives.
+
 	if ($match =~ '\b(0x)?(f|F){16}\b' or
 	    $match =~ '\b(0x)?0{16}\b') {
 		return 1;
 	}
 
-	if (is_x86_64) {
-		# vsyscall memory region, we should probably check against a range here.
-		if ($match =~ '\bf{10}600000\b' or
-		    $match =~ '\bf{10}601000\b') {
-			return 1;
-		}
+	if (is_x86_64() and is_in_vsyscall_memory_region($match)) {
+		return 1;
 	}
 
 	return 0;
 }
 
+sub is_false_positive_32bit
+{
+       my ($match) = @_;
+       state $page_offset = get_page_offset();
+
+       if ($match =~ '\b(0x)?(f|F){8}\b') {
+               return 1;
+       }
+
+       if (hex($match) < $page_offset) {
+               return 1;
+       }
+
+       return 0;
+}
+
+# returns integer value
+sub get_page_offset
+{
+       my $page_offset;
+       my $default_offset = 0xc0000000;
+
+       # Allow --page-offset-32bit to override.
+       if ($page_offset_32bit != 0) {
+               return $page_offset_32bit;
+       }
+
+       $page_offset = get_kernel_config_option('CONFIG_PAGE_OFFSET');
+       if (!$page_offset) {
+	       return $default_offset;
+       }
+       return $page_offset;
+}
+
+sub is_in_vsyscall_memory_region
+{
+	my ($match) = @_;
+
+	my $hex = hex($match);
+	my $region_min = hex("0xffffffffff600000");
+	my $region_max = hex("0xffffffffff601000");
+
+	return ($hex >= $region_min and $hex <= $region_max);
+}
+
 # True if argument potentially contains a kernel address.
 sub may_leak_address
 {
@@ -238,14 +362,8 @@ sub may_leak_address
 		return 0;
 	}
 
-	# One of these is guaranteed to be true.
-	if (is_x86_64()) {
-		$address_re = '\b(0x)?ffff[[:xdigit:]]{12}\b';
-	} elsif (is_ppc64()) {
-		$address_re = '\b(0x)?[89abcdef]00[[:xdigit:]]{13}\b';
-	}
-
-	while (/($address_re)/g) {
+	$address_re = get_address_re();
+	while ($line =~ /($address_re)/g) {
 		if (!is_false_positive($1)) {
 			return 1;
 		}
@@ -254,6 +372,31 @@ sub may_leak_address
 	return 0;
 }
 
+sub get_address_re
+{
+	if (is_ppc64()) {
+		return '\b(0x)?[89abcdef]00[[:xdigit:]]{13}\b';
+	} elsif (is_32bit()) {
+		return '\b(0x)?[[:xdigit:]]{8}\b';
+	}
+
+	return get_x86_64_re();
+}
+
+sub get_x86_64_re
+{
+	# We handle page table levels but only if explicitly configured using
+	# CONFIG_PGTABLE_LEVELS.  If config file parsing fails or config option
+	# is not found we default to using address regular expression suitable
+	# for 4 page table levels.
+	state $ptl = get_kernel_config_option('CONFIG_PGTABLE_LEVELS');
+
+	if ($ptl == 5) {
+		return '\b(0x)?ff[[:xdigit:]]{14}\b';
+	}
+	return '\b(0x)?ffff[[:xdigit:]]{12}\b';
+}
+
 sub parse_dmesg
 {
 	open my $cmd, '-|', 'dmesg';
@@ -268,26 +411,20 @@ sub parse_dmesg
 # True if we should skip this path.
 sub skip
 {
-	my ($path, $paths_abs, $paths_any) = @_;
+	my ($path) = @_;
 
-	foreach (@$paths_abs) {
+	foreach (@skip_abs) {
 		return 1 if (/^$path$/);
 	}
 
 	my($filename, $dirs, $suffix) = fileparse($path);
-	foreach (@$paths_any) {
+	foreach (@skip_any) {
 		return 1 if (/^$filename$/);
 	}
 
 	return 0;
 }
 
-sub skip_parse
-{
-	my ($path) = @_;
-	return skip($path, \@skip_parse_files_abs, \@skip_parse_files_any);
-}
-
 sub timed_parse_file
 {
 	my ($file) = @_;
@@ -313,11 +450,9 @@ sub parse_file
 		return;
 	}
 
-	if (skip_parse($file)) {
-		dprint "skipping file: $file\n";
+	if (! -T $file) {
 		return;
 	}
-	dprint "parsing: $file\n";
 
 	open my $fh, "<", $file or return;
 	while ( <$fh> ) {
@@ -328,12 +463,14 @@ sub parse_file
 	close $fh;
 }
 
-
-# True if we should skip walking this directory.
-sub skip_walk
+# Checks if the actual path name is leaking a kernel address.
+sub check_path_for_leaks
 {
 	my ($path) = @_;
-	return skip($path, \@skip_walk_dirs_abs, \@skip_walk_dirs_any)
+
+	if (may_leak_address($path)) {
+		printf("Path name may contain address: $path\n");
+	}
 }
 
 # Recursively walk directory tree.
@@ -342,7 +479,6 @@ sub walk
 	my @dirs = @_;
 
 	while (my $pwd = shift @dirs) {
-		next if (skip_walk($pwd));
 		next if (!opendir(DIR, $pwd));
 		my @files = readdir(DIR);
 		closedir(DIR);
@@ -353,11 +489,21 @@ sub walk
 			my $path = "$pwd/$file";
 			next if (-l $path);
 
+			# skip /proc/PID except /proc/1
+			next if (($path =~ /^\/proc\/[0-9]+$/) &&
+				 ($path !~ /^\/proc\/1$/));
+
+			next if (skip($path));
+
+			check_path_for_leaks($path);
+
 			if (-d $path) {
 				push @dirs, $path;
-			} else {
-				timed_parse_file($path);
+				next;
 			}
+
+			dprint "parsing: $path\n";
+			timed_parse_file($path);
 		}
 	}
 }
diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore
index 9cdec70d72b8..d5b291e94264 100644
--- a/security/apparmor/.gitignore
+++ b/security/apparmor/.gitignore
@@ -1,5 +1,6 @@
 #
 # Generated include files
 #
+net_names.h
 capability_names.h
 rlim_names.h
diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index 9a6b4033d52b..ff23fcfefe19 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -3,13 +3,46 @@
 #
 obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
 
-apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \
+apparmor-y := apparmorfs.o audit.o capability.o task.o ipc.o lib.o match.o \
               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
-              resource.o secid.o file.o policy_ns.o label.o mount.o
+              resource.o secid.o file.o policy_ns.o label.o mount.o net.o
 apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
 
-clean-files := capability_names.h rlim_names.h
+clean-files := capability_names.h rlim_names.h net_names.h
 
+# Build a lower case string table of address family names
+# Transform lines from
+#    #define AF_LOCAL		1	/* POSIX name for AF_UNIX	*/
+#    #define AF_INET		2	/* Internet IP Protocol 	*/
+# to
+#    [1] = "local",
+#    [2] = "inet",
+#
+# and build the securityfs entries for the mapping.
+# Transforms lines from
+#    #define AF_INET		2	/* Internet IP Protocol 	*/
+# to
+#    #define AA_SFS_AF_MASK "local inet"
+quiet_cmd_make-af = GEN     $@
+cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\
+	sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+	 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+	echo "};" >> $@ ;\
+	printf '%s' '\#define AA_SFS_AF_MASK "' >> $@ ;\
+	sed -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \
+	 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/\L\1/p'\
+	 $< | tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
+
+# Build a lower case string table of sock type names
+# Transform lines from
+#    SOCK_STREAM	= 1,
+# to
+#    [1] = "stream",
+quiet_cmd_make-sock = GEN     $@
+cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\
+	sed $^ >>$@ -r -n \
+	-e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\
+	echo "};" >> $@
 
 # Build a lower case string table of capability names
 # Transforms lines from
@@ -62,6 +95,7 @@ cmd_make-rlim = echo "static const char *const rlim_names[RLIM_NLIMITS] = {" \
 	    tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@
 
 $(obj)/capability.o : $(obj)/capability_names.h
+$(obj)/net.o : $(obj)/net_names.h
 $(obj)/resource.o : $(obj)/rlim_names.h
 $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
 			    $(src)/Makefile
@@ -69,3 +103,8 @@ $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \
 $(obj)/rlim_names.h : $(srctree)/include/uapi/asm-generic/resource.h \
 		      $(src)/Makefile
 	$(call cmd,make-rlim)
+$(obj)/net_names.h : $(srctree)/include/linux/socket.h \
+		     $(srctree)/include/linux/net.h \
+		     $(src)/Makefile
+	$(call cmd,make-af)
+	$(call cmd,make-sock)
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index a9428daa69f3..949dd8a48164 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -30,10 +30,9 @@
 #include "include/apparmor.h"
 #include "include/apparmorfs.h"
 #include "include/audit.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/crypto.h"
 #include "include/ipc.h"
-#include "include/policy_ns.h"
 #include "include/label.h"
 #include "include/policy.h"
 #include "include/policy_ns.h"
@@ -120,9 +119,7 @@ static int aafs_count;
 
 static int aafs_show_path(struct seq_file *seq, struct dentry *dentry)
 {
-	struct inode *inode = d_inode(dentry);
-
-	seq_printf(seq, "%s:[%lu]", AAFS_NAME, inode->i_ino);
+	seq_printf(seq, "%s:[%lu]", AAFS_NAME, d_inode(dentry)->i_ino);
 	return 0;
 }
 
@@ -313,6 +310,7 @@ static struct dentry *aafs_create_dir(const char *name, struct dentry *parent)
  * @name: name of dentry to create
  * @parent: parent directory for this dentry
  * @target: if symlink, symlink target string
+ * @private: private data
  * @iops: struct of inode_operations that should be used
  *
  * If @target parameter is %NULL, then the @iops parameter needs to be
@@ -321,17 +319,17 @@ static struct dentry *aafs_create_dir(const char *name, struct dentry *parent)
 static struct dentry *aafs_create_symlink(const char *name,
 					  struct dentry *parent,
 					  const char *target,
+					  void *private,
 					  const struct inode_operations *iops)
 {
 	struct dentry *dent;
 	char *link = NULL;
 
 	if (target) {
-		link = kstrdup(target, GFP_KERNEL);
 		if (!link)
 			return ERR_PTR(-ENOMEM);
 	}
-	dent = aafs_create(name, S_IFLNK | 0444, parent, NULL, link, NULL,
+	dent = aafs_create(name, S_IFLNK | 0444, parent, private, link, NULL,
 			   iops);
 	if (IS_ERR(dent))
 		kfree(link);
@@ -622,7 +620,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 			tmp = aa_compute_fperms(dfa, state, &cond);
 		}
 	} else if (profile->policy.dfa) {
-		if (!PROFILE_MEDIATES_SAFE(profile, *match_str))
+		if (!PROFILE_MEDIATES(profile, *match_str))
 			return;	/* no change to current perms */
 		dfa = profile->policy.dfa;
 		state = aa_dfa_match_len(dfa, profile->policy.start[0],
@@ -1189,9 +1187,7 @@ static int seq_ns_level_show(struct seq_file *seq, void *v)
 static int seq_ns_name_show(struct seq_file *seq, void *v)
 {
 	struct aa_label *label = begin_current_label_crit_section();
-
-	seq_printf(seq, "%s\n", aa_ns_name(labels_ns(label),
-					   labels_ns(label), true));
+	seq_printf(seq, "%s\n", labels_ns(label)->base.name);
 	end_current_label_crit_section(label);
 
 	return 0;
@@ -1484,26 +1480,97 @@ static int profile_depth(struct aa_profile *profile)
 	return depth;
 }
 
-static int gen_symlink_name(char *buffer, size_t bsize, int depth,
-			    const char *dirname, const char *fname)
+static char *gen_symlink_name(int depth, const char *dirname, const char *fname)
 {
+	char *buffer, *s;
 	int error;
+	int size = depth * 6 + strlen(dirname) + strlen(fname) + 11;
+
+	s = buffer = kmalloc(size, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
 
 	for (; depth > 0; depth--) {
-		if (bsize < 7)
-			return -ENAMETOOLONG;
-		strcpy(buffer, "../../");
-		buffer += 6;
-		bsize -= 6;
+		strcpy(s, "../../");
+		s += 6;
+		size -= 6;
 	}
 
-	error = snprintf(buffer, bsize, "raw_data/%s/%s", dirname, fname);
-	if (error >= bsize || error < 0)
-		return -ENAMETOOLONG;
+	error = snprintf(s, size, "raw_data/%s/%s", dirname, fname);
+	if (error >= size || error < 0) {
+		kfree(buffer);
+		return ERR_PTR(-ENAMETOOLONG);
+	}
 
-	return 0;
+	return buffer;
+}
+
+static void rawdata_link_cb(void *arg)
+{
+	kfree(arg);
+}
+
+static const char *rawdata_get_link_base(struct dentry *dentry,
+					 struct inode *inode,
+					 struct delayed_call *done,
+					 const char *name)
+{
+	struct aa_proxy *proxy = inode->i_private;
+	struct aa_label *label;
+	struct aa_profile *profile;
+	char *target;
+	int depth;
+
+	if (!dentry)
+		return ERR_PTR(-ECHILD);
+
+	label = aa_get_label_rcu(&proxy->label);
+	profile = labels_profile(label);
+	depth = profile_depth(profile);
+	target = gen_symlink_name(depth, profile->rawdata->name, name);
+	aa_put_label(label);
+
+	if (IS_ERR(target))
+		return target;
+
+	set_delayed_call(done, rawdata_link_cb, target);
+
+	return target;
+}
+
+static const char *rawdata_get_link_sha1(struct dentry *dentry,
+					 struct inode *inode,
+					 struct delayed_call *done)
+{
+	return rawdata_get_link_base(dentry, inode, done, "sha1");
+}
+
+static const char *rawdata_get_link_abi(struct dentry *dentry,
+					struct inode *inode,
+					struct delayed_call *done)
+{
+	return rawdata_get_link_base(dentry, inode, done, "abi");
+}
+
+static const char *rawdata_get_link_data(struct dentry *dentry,
+					 struct inode *inode,
+					 struct delayed_call *done)
+{
+	return rawdata_get_link_base(dentry, inode, done, "raw_data");
 }
 
+static const struct inode_operations rawdata_link_sha1_iops = {
+	.get_link	= rawdata_get_link_sha1,
+};
+
+static const struct inode_operations rawdata_link_abi_iops = {
+	.get_link	= rawdata_get_link_abi,
+};
+static const struct inode_operations rawdata_link_data_iops = {
+	.get_link	= rawdata_get_link_data,
+};
+
+
 /*
  * Requires: @profile->ns->lock held
  */
@@ -1574,34 +1641,28 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent)
 	}
 
 	if (profile->rawdata) {
-		char target[64];
-		int depth = profile_depth(profile);
-
-		error = gen_symlink_name(target, sizeof(target), depth,
-					 profile->rawdata->name, "sha1");
-		if (error < 0)
-			goto fail2;
-		dent = aafs_create_symlink("raw_sha1", dir, target, NULL);
+		dent = aafs_create_symlink("raw_sha1", dir, NULL,
+					   profile->label.proxy,
+					   &rawdata_link_sha1_iops);
 		if (IS_ERR(dent))
 			goto fail;
+		aa_get_proxy(profile->label.proxy);
 		profile->dents[AAFS_PROF_RAW_HASH] = dent;
 
-		error = gen_symlink_name(target, sizeof(target), depth,
-					 profile->rawdata->name, "abi");
-		if (error < 0)
-			goto fail2;
-		dent = aafs_create_symlink("raw_abi", dir, target, NULL);
+		dent = aafs_create_symlink("raw_abi", dir, NULL,
+					   profile->label.proxy,
+					   &rawdata_link_abi_iops);
 		if (IS_ERR(dent))
 			goto fail;
+		aa_get_proxy(profile->label.proxy);
 		profile->dents[AAFS_PROF_RAW_ABI] = dent;
 
-		error = gen_symlink_name(target, sizeof(target), depth,
-					 profile->rawdata->name, "raw_data");
-		if (error < 0)
-			goto fail2;
-		dent = aafs_create_symlink("raw_data", dir, target, NULL);
+		dent = aafs_create_symlink("raw_data", dir, NULL,
+					   profile->label.proxy,
+					   &rawdata_link_data_iops);
 		if (IS_ERR(dent))
 			goto fail;
+		aa_get_proxy(profile->label.proxy);
 		profile->dents[AAFS_PROF_RAW_DATA] = dent;
 	}
 
@@ -2152,6 +2213,10 @@ static struct aa_sfs_entry aa_sfs_entry_signal[] = {
 	{ }
 };
 
+static struct aa_sfs_entry aa_sfs_entry_attach[] = {
+	AA_SFS_FILE_BOOLEAN("xattr", 1),
+	{ }
+};
 static struct aa_sfs_entry aa_sfs_entry_domain[] = {
 	AA_SFS_FILE_BOOLEAN("change_hat",	1),
 	AA_SFS_FILE_BOOLEAN("change_hatv",	1),
@@ -2159,6 +2224,9 @@ static struct aa_sfs_entry aa_sfs_entry_domain[] = {
 	AA_SFS_FILE_BOOLEAN("change_profile",	1),
 	AA_SFS_FILE_BOOLEAN("stack",		1),
 	AA_SFS_FILE_BOOLEAN("fix_binfmt_elf_mmap",	1),
+	AA_SFS_FILE_BOOLEAN("post_nnp_subset",	1),
+	AA_SFS_FILE_BOOLEAN("computed_longest_left",	1),
+	AA_SFS_DIR("attach_conditions",		aa_sfs_entry_attach),
 	AA_SFS_FILE_STRING("version", "1.2"),
 	{ }
 };
@@ -2167,6 +2235,7 @@ static struct aa_sfs_entry aa_sfs_entry_versions[] = {
 	AA_SFS_FILE_BOOLEAN("v5",	1),
 	AA_SFS_FILE_BOOLEAN("v6",	1),
 	AA_SFS_FILE_BOOLEAN("v7",	1),
+	AA_SFS_FILE_BOOLEAN("v8",	1),
 	{ }
 };
 
@@ -2202,6 +2271,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = {
 	AA_SFS_DIR("policy",			aa_sfs_entry_policy),
 	AA_SFS_DIR("domain",			aa_sfs_entry_domain),
 	AA_SFS_DIR("file",			aa_sfs_entry_file),
+	AA_SFS_DIR("network_v8",		aa_sfs_entry_network),
 	AA_SFS_DIR("mount",			aa_sfs_entry_mount),
 	AA_SFS_DIR("namespaces",		aa_sfs_entry_ns),
 	AA_SFS_FILE_U64("capability",		VFS_CAP_FLAGS_MASK),
@@ -2394,29 +2464,18 @@ static const char *policy_get_link(struct dentry *dentry,
 	return NULL;
 }
 
-static int ns_get_name(char *buf, size_t size, struct aa_ns *ns,
-		       struct inode *inode)
-{
-	int res = snprintf(buf, size, "%s:[%lu]", AAFS_NAME, inode->i_ino);
-
-	if (res < 0 || res >= size)
-		res = -ENOENT;
-
-	return res;
-}
-
 static int policy_readlink(struct dentry *dentry, char __user *buffer,
 			   int buflen)
 {
-	struct aa_ns *ns;
 	char name[32];
 	int res;
 
-	ns = aa_get_current_ns();
-	res = ns_get_name(name, sizeof(name), ns, d_inode(dentry));
-	if (res >= 0)
+	res = snprintf(name, sizeof(name), "%s:[%lu]", AAFS_NAME,
+		       d_inode(dentry)->i_ino);
+	if (res > 0 && res < sizeof(name))
 		res = readlink_copy(buffer, buflen, name);
-	aa_put_ns(ns);
+	else
+		res = -ENOENT;
 
 	return res;
 }
@@ -2460,34 +2519,26 @@ static int __init aa_create_aafs(void)
 
 	dent = securityfs_create_file(".load", 0666, aa_sfs_entry.dentry,
 				      NULL, &aa_fs_profile_load);
-	if (IS_ERR(dent)) {
-		error = PTR_ERR(dent);
-		goto error;
-	}
+	if (IS_ERR(dent))
+		goto dent_error;
 	ns_subload(root_ns) = dent;
 
 	dent = securityfs_create_file(".replace", 0666, aa_sfs_entry.dentry,
 				      NULL, &aa_fs_profile_replace);
-	if (IS_ERR(dent)) {
-		error = PTR_ERR(dent);
-		goto error;
-	}
+	if (IS_ERR(dent))
+		goto dent_error;
 	ns_subreplace(root_ns) = dent;
 
 	dent = securityfs_create_file(".remove", 0666, aa_sfs_entry.dentry,
 				      NULL, &aa_fs_profile_remove);
-	if (IS_ERR(dent)) {
-		error = PTR_ERR(dent);
-		goto error;
-	}
+	if (IS_ERR(dent))
+		goto dent_error;
 	ns_subremove(root_ns) = dent;
 
 	dent = securityfs_create_file("revision", 0444, aa_sfs_entry.dentry,
 				      NULL, &aa_fs_ns_revision_fops);
-	if (IS_ERR(dent)) {
-		error = PTR_ERR(dent);
-		goto error;
-	}
+	if (IS_ERR(dent))
+		goto dent_error;
 	ns_subrevision(root_ns) = dent;
 
 	/* policy tree referenced by magic policy symlink */
@@ -2501,10 +2552,8 @@ static int __init aa_create_aafs(void)
 	/* magic symlink similar to nsfs redirects based on task policy */
 	dent = securityfs_create_symlink("policy", aa_sfs_entry.dentry,
 					 NULL, &policy_link_iops);
-	if (IS_ERR(dent)) {
-		error = PTR_ERR(dent);
-		goto error;
-	}
+	if (IS_ERR(dent))
+		goto dent_error;
 
 	error = aa_mk_null_file(aa_sfs_entry.dentry);
 	if (error)
@@ -2516,6 +2565,8 @@ static int __init aa_create_aafs(void)
 	aa_info_message("AppArmor Filesystem Enabled");
 	return 0;
 
+dent_error:
+	error = PTR_ERR(dent);
 error:
 	aa_destroy_aafs();
 	AA_ERROR("Error creating AppArmor securityfs\n");
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index 67e347192a55..253ef6e9d445 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -19,7 +19,7 @@
 
 #include "include/apparmor.h"
 #include "include/capability.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/policy.h"
 #include "include/audit.h"
 
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 6a54d2ffa840..590b7e8cd21c 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -19,10 +19,11 @@
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
 #include <linux/personality.h>
+#include <linux/xattr.h>
 
 #include "include/audit.h"
 #include "include/apparmorfs.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/domain.h"
 #include "include/file.h"
 #include "include/ipc.h"
@@ -302,7 +303,70 @@ static int change_profile_perms(struct aa_profile *profile,
 }
 
 /**
+ * aa_xattrs_match - check whether a file matches the xattrs defined in profile
+ * @bprm: binprm struct for the process to validate
+ * @profile: profile to match against (NOT NULL)
+ * @state: state to start match in
+ *
+ * Returns: number of extended attributes that matched, or < 0 on error
+ */
+static int aa_xattrs_match(const struct linux_binprm *bprm,
+			   struct aa_profile *profile, unsigned int state)
+{
+	int i;
+	ssize_t size;
+	struct dentry *d;
+	char *value = NULL;
+	int value_size = 0, ret = profile->xattr_count;
+
+	if (!bprm || !profile->xattr_count)
+		return 0;
+
+	/* transition from exec match to xattr set */
+	state = aa_dfa_null_transition(profile->xmatch, state);
+
+	d = bprm->file->f_path.dentry;
+
+	for (i = 0; i < profile->xattr_count; i++) {
+		size = vfs_getxattr_alloc(d, profile->xattrs[i], &value,
+					  value_size, GFP_KERNEL);
+		if (size >= 0) {
+			u32 perm;
+
+			/* Check the xattr value, not just presence */
+			state = aa_dfa_match_len(profile->xmatch, state, value,
+						 size);
+			perm = dfa_user_allow(profile->xmatch, state);
+			if (!(perm & MAY_EXEC)) {
+				ret = -EINVAL;
+				goto out;
+			}
+		}
+		/* transition to next element */
+		state = aa_dfa_null_transition(profile->xmatch, state);
+		if (size < 0) {
+			/*
+			 * No xattr match, so verify if transition to
+			 * next element was valid. IFF so the xattr
+			 * was optional.
+			 */
+			if (!state) {
+				ret = -EINVAL;
+				goto out;
+			}
+			/* don't count missing optional xattr as matched */
+			ret--;
+		}
+	}
+
+out:
+	kfree(value);
+	return ret;
+}
+
+/**
  * __attach_match_ - find an attachment match
+ * @bprm - binprm structure of transitioning task
  * @name - to match against  (NOT NULL)
  * @head - profile list to walk  (NOT NULL)
  * @info - info message if there was an error (NOT NULL)
@@ -316,40 +380,80 @@ static int change_profile_perms(struct aa_profile *profile,
  *
  * Returns: profile or NULL if no match found
  */
-static struct aa_profile *__attach_match(const char *name,
+static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
+					 const char *name,
 					 struct list_head *head,
 					 const char **info)
 {
-	int len = 0;
+	int candidate_len = 0, candidate_xattrs = 0;
 	bool conflict = false;
 	struct aa_profile *profile, *candidate = NULL;
 
+	AA_BUG(!name);
+	AA_BUG(!head);
+
 	list_for_each_entry_rcu(profile, head, base.list) {
 		if (profile->label.flags & FLAG_NULL &&
 		    &profile->label == ns_unconfined(profile->ns))
 			continue;
 
+		/* Find the "best" matching profile. Profiles must
+		 * match the path and extended attributes (if any)
+		 * associated with the file. A more specific path
+		 * match will be preferred over a less specific one,
+		 * and a match with more matching extended attributes
+		 * will be preferred over one with fewer. If the best
+		 * match has both the same level of path specificity
+		 * and the same number of matching extended attributes
+		 * as another profile, signal a conflict and refuse to
+		 * match.
+		 */
 		if (profile->xmatch) {
-			if (profile->xmatch_len >= len) {
-				unsigned int state;
-				u32 perm;
-
-				state = aa_dfa_match(profile->xmatch,
-						     DFA_START, name);
-				perm = dfa_user_allow(profile->xmatch, state);
-				/* any accepting state means a valid match. */
-				if (perm & MAY_EXEC) {
-					if (profile->xmatch_len == len) {
+			unsigned int state, count;
+			u32 perm;
+
+			state = aa_dfa_leftmatch(profile->xmatch, DFA_START,
+						 name, &count);
+			perm = dfa_user_allow(profile->xmatch, state);
+			/* any accepting state means a valid match. */
+			if (perm & MAY_EXEC) {
+				int ret;
+
+				if (count < candidate_len)
+					continue;
+
+				ret = aa_xattrs_match(bprm, profile, state);
+				/* Fail matching if the xattrs don't match */
+				if (ret < 0)
+					continue;
+
+				/*
+				 * TODO: allow for more flexible best match
+				 *
+				 * The new match isn't more specific
+				 * than the current best match
+				 */
+				if (count == candidate_len &&
+				    ret <= candidate_xattrs) {
+					/* Match is equivalent, so conflict */
+					if (ret == candidate_xattrs)
 						conflict = true;
-						continue;
-					}
-					candidate = profile;
-					len = profile->xmatch_len;
-					conflict = false;
+					continue;
 				}
+
+				/* Either the same length with more matching
+				 * xattrs, or a longer match
+				 */
+				candidate = profile;
+				candidate_len = profile->xmatch_len;
+				candidate_xattrs = ret;
+				conflict = false;
 			}
 		} else if (!strcmp(profile->base.name, name))
-			/* exact non-re match, no more searching required */
+			/*
+			 * old exact non-re match, without conditionals such
+			 * as xattrs. no more searching required
+			 */
 			return profile;
 	}
 
@@ -363,6 +467,7 @@ static struct aa_profile *__attach_match(const char *name,
 
 /**
  * find_attach - do attachment search for unconfined processes
+ * @bprm - binprm structure of transitioning task
  * @ns: the current namespace  (NOT NULL)
  * @list: list to search  (NOT NULL)
  * @name: the executable name to match against  (NOT NULL)
@@ -370,13 +475,14 @@ static struct aa_profile *__attach_match(const char *name,
  *
  * Returns: label or NULL if no match found
  */
-static struct aa_label *find_attach(struct aa_ns *ns, struct list_head *list,
+static struct aa_label *find_attach(const struct linux_binprm *bprm,
+				    struct aa_ns *ns, struct list_head *list,
 				    const char *name, const char **info)
 {
 	struct aa_profile *profile;
 
 	rcu_read_lock();
-	profile = aa_get_profile(__attach_match(name, list, info));
+	profile = aa_get_profile(__attach_match(bprm, name, list, info));
 	rcu_read_unlock();
 
 	return profile ? &profile->label : NULL;
@@ -432,6 +538,7 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
 /**
  * x_to_label - get target label for a given xindex
  * @profile: current profile  (NOT NULL)
+ * @bprm: binprm structure of transitioning task
  * @name: name to lookup (NOT NULL)
  * @xindex: index into x transition table
  * @lookupname: returns: name used in lookup if one was specified (NOT NULL)
@@ -441,6 +548,7 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
  * Returns: refcounted label or NULL if not found available
  */
 static struct aa_label *x_to_label(struct aa_profile *profile,
+				   const struct linux_binprm *bprm,
 				   const char *name, u32 xindex,
 				   const char **lookupname,
 				   const char **info)
@@ -468,11 +576,11 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
 	case AA_X_NAME:
 		if (xindex & AA_X_CHILD)
 			/* released by caller */
-			new = find_attach(ns, &profile->base.profiles,
+			new = find_attach(bprm, ns, &profile->base.profiles,
 					  name, info);
 		else
 			/* released by caller */
-			new = find_attach(ns, &ns->base.profiles,
+			new = find_attach(bprm, ns, &ns->base.profiles,
 					  name, info);
 		*lookupname = name;
 		break;
@@ -512,6 +620,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 					   bool *secure_exec)
 {
 	struct aa_label *new = NULL;
+	struct aa_profile *component;
+	struct label_it i;
 	const char *info = NULL, *name = NULL, *target = NULL;
 	unsigned int state = profile->file.start;
 	struct aa_perms perms = {};
@@ -536,8 +646,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 	}
 
 	if (profile_unconfined(profile)) {
-		new = find_attach(profile->ns, &profile->ns->base.profiles,
-				  name, &info);
+		new = find_attach(bprm, profile->ns,
+				  &profile->ns->base.profiles, name, &info);
 		if (new) {
 			AA_DEBUG("unconfined attached to new label");
 			return new;
@@ -550,7 +660,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 	state = aa_str_perms(profile->file.dfa, state, name, cond, &perms);
 	if (perms.allow & MAY_EXEC) {
 		/* exec permission determine how to transition */
-		new = x_to_label(profile, name, perms.xindex, &target, &info);
+		new = x_to_label(profile, bprm, name, perms.xindex, &target,
+				 &info);
 		if (new && new->proxy == profile->label.proxy && info) {
 			/* hack ix fallback - improve how this is detected */
 			goto audit;
@@ -559,6 +670,21 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 			info = "profile transition not found";
 			/* remove MAY_EXEC to audit as failure */
 			perms.allow &= ~MAY_EXEC;
+		} else {
+			/* verify that each component's xattr requirements are
+			 * met, and fail execution otherwise
+			 */
+			label_for_each(i, new, component) {
+				if (aa_xattrs_match(bprm, component, state) <
+				    0) {
+					error = -EACCES;
+					info = "required xattrs not present";
+					perms.allow &= ~MAY_EXEC;
+					aa_put_label(new);
+					new = NULL;
+					goto audit;
+				}
+			}
 		}
 	} else if (COMPLAIN_MODE(profile)) {
 		/* no exec permission - learning mode */
@@ -592,22 +718,6 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 	if (!new)
 		goto audit;
 
-	/* Policy has specified a domain transitions. if no_new_privs and
-	 * confined and not transitioning to the current domain fail.
-	 *
-	 * NOTE: Domain transitions from unconfined and to stritly stacked
-	 * subsets are allowed even when no_new_privs is set because this
-	 * aways results in a further reduction of permissions.
-	 */
-	if ((bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) &&
-	    !profile_unconfined(profile) &&
-	    !aa_label_is_subset(new, &profile->label)) {
-		error = -EPERM;
-		info = "no new privs";
-		nonewprivs = true;
-		perms.allow &= ~MAY_EXEC;
-		goto audit;
-	}
 
 	if (!(perms.xindex & AA_X_UNSAFE)) {
 		if (DEBUG_ON) {
@@ -684,21 +794,6 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 		perms.allow &= ~AA_MAY_ONEXEC;
 		goto audit;
 	}
-	/* Policy has specified a domain transitions. if no_new_privs and
-	 * confined and not transitioning to the current domain fail.
-	 *
-	 * NOTE: Domain transitions from unconfined and to stritly stacked
-	 * subsets are allowed even when no_new_privs is set because this
-	 * aways results in a further reduction of permissions.
-	 */
-	if ((bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) &&
-	    !profile_unconfined(profile) &&
-	    !aa_label_is_subset(onexec, &profile->label)) {
-		error = -EPERM;
-		info = "no new privs";
-		perms.allow &= ~AA_MAY_ONEXEC;
-		goto audit;
-	}
 
 	if (!(perms.xindex & AA_X_UNSAFE)) {
 		if (DEBUG_ON) {
@@ -794,10 +889,22 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 	if (bprm->called_set_creds)
 		return 0;
 
-	ctx = cred_ctx(bprm->cred);
+	ctx = task_ctx(current);
+	AA_BUG(!cred_label(bprm->cred));
 	AA_BUG(!ctx);
 
-	label = aa_get_newest_label(ctx->label);
+	label = aa_get_newest_label(cred_label(bprm->cred));
+
+	/*
+	 * Detect no new privs being set, and store the label it
+	 * occurred under. Ideally this would happen when nnp
+	 * is set but there isn't a good way to do that yet.
+	 *
+	 * Testing for unconfined must be done before the subset test
+	 */
+	if ((bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) && !unconfined(label) &&
+	    !ctx->nnp)
+		ctx->nnp = aa_get_label(label);
 
 	/* buffer freed below, name is pointer into buffer */
 	get_buffers(buffer);
@@ -819,7 +926,20 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 		goto done;
 	}
 
-	/* TODO: Add ns level no_new_privs subset test */
+	/* Policy has specified a domain transitions. If no_new_privs and
+	 * confined ensure the transition is to confinement that is subset
+	 * of the confinement when the task entered no new privs.
+	 *
+	 * NOTE: Domain transitions from unconfined and to stacked
+	 * subsets are allowed even when no_new_privs is set because this
+	 * aways results in a further reduction of permissions.
+	 */
+	if ((bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS) &&
+	    !unconfined(label) && !aa_label_is_subset(new, ctx->nnp)) {
+		error = -EPERM;
+		info = "no new privs";
+		goto audit;
+	}
 
 	if (bprm->unsafe & LSM_UNSAFE_SHARE) {
 		/* FIXME: currently don't mediate shared state */
@@ -853,14 +973,11 @@ int apparmor_bprm_set_creds(struct linux_binprm *bprm)
 		}
 		bprm->per_clear |= PER_CLEAR_ON_SETID;
 	}
-	aa_put_label(ctx->label);
-	/* transfer reference, released when ctx is freed */
-	ctx->label = new;
+	aa_put_label(cred_label(bprm->cred));
+	/* transfer reference, released when cred is freed */
+	cred_label(bprm->cred) = new;
 
 done:
-	/* clear out temporary/transitional state from the context */
-	aa_clear_task_ctx_trans(ctx);
-
 	aa_put_label(label);
 	put_buffers(buffer);
 
@@ -1049,30 +1166,28 @@ build:
 int aa_change_hat(const char *hats[], int count, u64 token, int flags)
 {
 	const struct cred *cred;
-	struct aa_task_ctx *ctx;
+	struct aa_task_ctx *ctx = task_ctx(current);
 	struct aa_label *label, *previous, *new = NULL, *target = NULL;
 	struct aa_profile *profile;
 	struct aa_perms perms = {};
 	const char *info = NULL;
 	int error = 0;
 
-	/*
-	 * Fail explicitly requested domain transitions if no_new_privs.
-	 * There is no exception for unconfined as change_hat is not
-	 * available.
-	 */
-	if (task_no_new_privs(current)) {
-		/* not an apparmor denial per se, so don't log it */
-		AA_DEBUG("no_new_privs - change_hat denied");
-		return -EPERM;
-	}
-
 	/* released below */
 	cred = get_current_cred();
-	ctx = cred_ctx(cred);
 	label = aa_get_newest_cred_label(cred);
 	previous = aa_get_newest_label(ctx->previous);
 
+	/*
+	 * Detect no new privs being set, and store the label it
+	 * occurred under. Ideally this would happen when nnp
+	 * is set but there isn't a good way to do that yet.
+	 *
+	 * Testing for unconfined must be done before the subset test
+	 */
+	if (task_no_new_privs(current) && !unconfined(label) && !ctx->nnp)
+		ctx->nnp = aa_get_label(label);
+
 	if (unconfined(label)) {
 		info = "unconfined can not change_hat";
 		error = -EPERM;
@@ -1093,6 +1208,18 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags)
 		if (error)
 			goto fail;
 
+		/*
+		 * no new privs prevents domain transitions that would
+		 * reduce restrictions.
+		 */
+		if (task_no_new_privs(current) && !unconfined(label) &&
+		    !aa_label_is_subset(new, ctx->nnp)) {
+			/* not an apparmor denial per se, so don't log it */
+			AA_DEBUG("no_new_privs - change_hat denied");
+			error = -EPERM;
+			goto out;
+		}
+
 		if (flags & AA_CHANGE_TEST)
 			goto out;
 
@@ -1102,6 +1229,18 @@ int aa_change_hat(const char *hats[], int count, u64 token, int flags)
 			/* kill task in case of brute force attacks */
 			goto kill;
 	} else if (previous && !(flags & AA_CHANGE_TEST)) {
+		/*
+		 * no new privs prevents domain transitions that would
+		 * reduce restrictions.
+		 */
+		if (task_no_new_privs(current) && !unconfined(label) &&
+		    !aa_label_is_subset(previous, ctx->nnp)) {
+			/* not an apparmor denial per se, so don't log it */
+			AA_DEBUG("no_new_privs - change_hat denied");
+			error = -EPERM;
+			goto out;
+		}
+
 		/* Return to saved label.  Kill task if restore fails
 		 * to avoid brute force attacks
 		 */
@@ -1144,21 +1283,6 @@ static int change_profile_perms_wrapper(const char *op, const char *name,
 	const char *info = NULL;
 	int error = 0;
 
-	/*
-	 * Fail explicitly requested domain transitions when no_new_privs
-	 * and not unconfined OR the transition results in a stack on
-	 * the current label.
-	 * Stacking domain transitions and transitions from unconfined are
-	 * allowed even when no_new_privs is set because this aways results
-	 * in a reduction of permissions.
-	 */
-	if (task_no_new_privs(current) && !stack &&
-	    !profile_unconfined(profile) &&
-	    !aa_label_is_subset(target, &profile->label)) {
-		info = "no new privs";
-		error = -EPERM;
-	}
-
 	if (!error)
 		error = change_profile_perms(profile, target, stack, request,
 					     profile->file.start, perms);
@@ -1192,10 +1316,23 @@ int aa_change_profile(const char *fqname, int flags)
 	const char *info = NULL;
 	const char *auditname = fqname;		/* retain leading & if stack */
 	bool stack = flags & AA_CHANGE_STACK;
+	struct aa_task_ctx *ctx = task_ctx(current);
 	int error = 0;
 	char *op;
 	u32 request;
 
+	label = aa_get_current_label();
+
+	/*
+	 * Detect no new privs being set, and store the label it
+	 * occurred under. Ideally this would happen when nnp
+	 * is set but there isn't a good way to do that yet.
+	 *
+	 * Testing for unconfined must be done before the subset test
+	 */
+	if (task_no_new_privs(current) && !unconfined(label) && !ctx->nnp)
+		ctx->nnp = aa_get_label(label);
+
 	if (!fqname || !*fqname) {
 		AA_DEBUG("no profile name");
 		return -EINVAL;
@@ -1283,14 +1420,28 @@ check:
 	if (flags & AA_CHANGE_TEST)
 		goto out;
 
+	/* stacking is always a subset, so only check the nonstack case */
+	if (!stack) {
+		new = fn_label_build_in_ns(label, profile, GFP_KERNEL,
+					   aa_get_label(target),
+					   aa_get_label(&profile->label));
+		/*
+		 * no new privs prevents domain transitions that would
+		 * reduce restrictions.
+		 */
+		if (task_no_new_privs(current) && !unconfined(label) &&
+		    !aa_label_is_subset(new, ctx->nnp)) {
+			/* not an apparmor denial per se, so don't log it */
+			AA_DEBUG("no_new_privs - change_hat denied");
+			error = -EPERM;
+			goto out;
+		}
+	}
+
 	if (!(flags & AA_CHANGE_ONEXEC)) {
 		/* only transition profiles in the current ns */
 		if (stack)
 			new = aa_label_merge(label, target, GFP_KERNEL);
-		else
-			new = fn_label_build_in_ns(label, profile, GFP_KERNEL,
-					aa_get_label(target),
-					aa_get_label(&profile->label));
 		if (IS_ERR_OR_NULL(new)) {
 			info = "failed to build target label";
 			error = PTR_ERR(new);
@@ -1299,9 +1450,15 @@ check:
 			goto audit;
 		}
 		error = aa_replace_current_label(new);
-	} else
+	} else {
+		if (new) {
+			aa_put_label(new);
+			new = NULL;
+		}
+
 		/* full transition will be built in exec path */
 		error = aa_set_current_onexec(target, stack);
+	}
 
 audit:
 	error = fn_for_each_in_ns(label, profile,
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index e79bf44396a3..224b2fef93ca 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -18,9 +18,10 @@
 
 #include "include/apparmor.h"
 #include "include/audit.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/file.h"
 #include "include/match.h"
+#include "include/net.h"
 #include "include/path.h"
 #include "include/policy.h"
 #include "include/label.h"
@@ -560,6 +561,32 @@ static int __file_path_perm(const char *op, struct aa_label *label,
 	return error;
 }
 
+static int __file_sock_perm(const char *op, struct aa_label *label,
+			    struct aa_label *flabel, struct file *file,
+			    u32 request, u32 denied)
+{
+	struct socket *sock = (struct socket *) file->private_data;
+	int error;
+
+	AA_BUG(!sock);
+
+	/* revalidation due to label out of date. No revocation at this time */
+	if (!denied && aa_label_is_subset(flabel, label))
+		return 0;
+
+	/* TODO: improve to skip profiles cached in flabel */
+	error = aa_sock_file_perm(label, op, request, sock);
+	if (denied) {
+		/* TODO: improve to skip profiles checked above */
+		/* check every profile in file label to is cached */
+		last_error(error, aa_sock_file_perm(flabel, op, request, sock));
+	}
+	if (!error)
+		update_file_ctx(file_ctx(file), label, request);
+
+	return error;
+}
+
 /**
  * aa_file_perm - do permission revalidation check & audit for @file
  * @op: operation being checked
@@ -604,6 +631,9 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 		error = __file_path_perm(op, label, flabel, file, request,
 					 denied);
 
+	else if (S_ISSOCK(file_inode(file)->i_mode))
+		error = __file_sock_perm(op, label, flabel, file, request,
+					 denied);
 done:
 	rcu_read_unlock();
 
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 829082c35faa..73d63b58d875 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -24,12 +24,13 @@
 #define AA_CLASS_UNKNOWN	1
 #define AA_CLASS_FILE		2
 #define AA_CLASS_CAP		3
-#define AA_CLASS_NET		4
+#define AA_CLASS_DEPRECATED	4
 #define AA_CLASS_RLIMITS	5
 #define AA_CLASS_DOMAIN		6
 #define AA_CLASS_MOUNT		7
 #define AA_CLASS_PTRACE		9
 #define AA_CLASS_SIGNAL		10
+#define AA_CLASS_NET		14
 #define AA_CLASS_LABEL		16
 
 #define AA_CLASS_LAST		AA_CLASS_LABEL
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index 4ac095118717..9c9be9c98c15 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -126,7 +126,20 @@ struct apparmor_audit_data {
 					const char *target;
 					kuid_t ouid;
 				} fs;
-				int signal;
+				struct {
+					int rlim;
+					unsigned long max;
+				} rlim;
+				struct {
+					int signal;
+					int unmappedsig;
+				};
+				struct {
+					int type, protocol;
+					struct sock *peer_sk;
+					void *addr;
+					int addrlen;
+				} net;
 			};
 		};
 		struct {
@@ -135,10 +148,6 @@ struct apparmor_audit_data {
 			long pos;
 		} iface;
 		struct {
-			int rlim;
-			unsigned long max;
-		} rlim;
-		struct {
 			const char *src_name;
 			const char *type;
 			const char *trans;
diff --git a/security/apparmor/include/context.h b/security/apparmor/include/cred.h
index 6ae07e9aaa17..e287b7d0d4be 100644
--- a/security/apparmor/include/context.h
+++ b/security/apparmor/include/cred.h
@@ -21,38 +21,9 @@
 
 #include "label.h"
 #include "policy_ns.h"
+#include "task.h"
 
-#define cred_ctx(X) ((X)->security)
-#define current_ctx() cred_ctx(current_cred())
-
-/**
- * struct aa_task_ctx - primary label for confined tasks
- * @label: the current label   (NOT NULL)
- * @exec: label to transition to on next exec  (MAYBE NULL)
- * @previous: label the task may return to     (MAYBE NULL)
- * @token: magic value the task must know for returning to @previous
- *
- * Contains the task's current label (which could change due to
- * change_hat).  Plus the hat_magic needed during change_hat.
- *
- * TODO: make so a task can be confined by a stack of contexts
- */
-struct aa_task_ctx {
-	struct aa_label *label;
-	struct aa_label *onexec;
-	struct aa_label *previous;
-	u64 token;
-};
-
-struct aa_task_ctx *aa_alloc_task_context(gfp_t flags);
-void aa_free_task_context(struct aa_task_ctx *ctx);
-void aa_dup_task_context(struct aa_task_ctx *new,
-			 const struct aa_task_ctx *old);
-int aa_replace_current_label(struct aa_label *label);
-int aa_set_current_onexec(struct aa_label *label, bool stack);
-int aa_set_current_hat(struct aa_label *label, u64 token);
-int aa_restore_previous_label(u64 cookie);
-struct aa_label *aa_get_task_label(struct task_struct *task);
+#define cred_label(X) ((X)->security)
 
 
 /**
@@ -65,10 +36,10 @@ struct aa_label *aa_get_task_label(struct task_struct *task);
  */
 static inline struct aa_label *aa_cred_raw_label(const struct cred *cred)
 {
-	struct aa_task_ctx *ctx = cred_ctx(cred);
+	struct aa_label *label = cred_label(cred);
 
-	AA_BUG(!ctx || !ctx->label);
-	return ctx->label;
+	AA_BUG(!label);
+	return label;
 }
 
 /**
@@ -96,17 +67,6 @@ static inline struct aa_label *__aa_task_raw_label(struct task_struct *task)
 }
 
 /**
- * __aa_task_is_confined - determine if @task has any confinement
- * @task: task to check confinement of  (NOT NULL)
- *
- * If @task != current needs to be called in RCU safe critical section
- */
-static inline bool __aa_task_is_confined(struct task_struct *task)
-{
-	return !unconfined(__aa_task_raw_label(task));
-}
-
-/**
  * aa_current_raw_label - find the current tasks confining label
  *
  * Returns: up to date confining label or the ns unconfined label (NOT NULL)
@@ -213,17 +173,4 @@ static inline struct aa_ns *aa_get_current_ns(void)
 	return ns;
 }
 
-/**
- * aa_clear_task_ctx_trans - clear transition tracking info from the ctx
- * @ctx: task context to clear (NOT NULL)
- */
-static inline void aa_clear_task_ctx_trans(struct aa_task_ctx *ctx)
-{
-	aa_put_label(ctx->previous);
-	aa_put_label(ctx->onexec);
-	ctx->previous = NULL;
-	ctx->onexec = NULL;
-	ctx->token = 0;
-}
-
 #endif /* __AA_CONTEXT_H */
diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h
index af22dcbbcb8a..d871e7ff0952 100644
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -327,9 +327,37 @@ void aa_label_audit(struct audit_buffer *ab, struct aa_label *label, gfp_t gfp);
 void aa_label_seq_print(struct seq_file *f, struct aa_label *label, gfp_t gfp);
 void aa_label_printk(struct aa_label *label, gfp_t gfp);
 
+struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str,
+				     size_t n, gfp_t gfp, bool create,
+				     bool force_stack);
 struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
 				gfp_t gfp, bool create, bool force_stack);
 
+static inline const char *aa_label_strn_split(const char *str, int n)
+{
+	const char *pos;
+	unsigned int state;
+
+	state = aa_dfa_matchn_until(stacksplitdfa, DFA_START, str, n, &pos);
+	if (!ACCEPT_TABLE(stacksplitdfa)[state])
+		return NULL;
+
+	return pos - 3;
+}
+
+static inline const char *aa_label_str_split(const char *str)
+{
+	const char *pos;
+	unsigned int state;
+
+	state = aa_dfa_match_until(stacksplitdfa, DFA_START, str, &pos);
+	if (!ACCEPT_TABLE(stacksplitdfa)[state])
+		return NULL;
+
+	return pos - 3;
+}
+
+
 
 struct aa_perms;
 int aa_label_match(struct aa_profile *profile, struct aa_label *label,
diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h
index add4c6726558..958d2b52a7b7 100644
--- a/security/apparmor/include/match.h
+++ b/security/apparmor/include/match.h
@@ -40,6 +40,7 @@
  */
 
 #define YYTH_MAGIC	0x1B5E783D
+#define YYTH_FLAG_DIFF_ENCODE	1
 
 struct table_set_header {
 	u32 th_magic;		/* YYTH_MAGIC */
@@ -101,6 +102,7 @@ struct aa_dfa {
 };
 
 extern struct aa_dfa *nulldfa;
+extern struct aa_dfa *stacksplitdfa;
 
 #define byte_to_byte(X) (X)
 
@@ -129,9 +131,32 @@ unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start,
 			  const char *str);
 unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state,
 			 const char c);
+unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start,
+				const char *str, const char **retpos);
+unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start,
+				 const char *str, int n, const char **retpos);
 
 void aa_dfa_free_kref(struct kref *kref);
 
+#define WB_HISTORY_SIZE 8
+struct match_workbuf {
+	unsigned int count;
+	unsigned int pos;
+	unsigned int len;
+	unsigned int size;	/* power of 2, same as history size */
+	unsigned int history[WB_HISTORY_SIZE];
+};
+#define DEFINE_MATCH_WB(N)		\
+struct match_workbuf N = {		\
+	.count = 0,			\
+	.pos = 0,			\
+	.len = 0,			\
+	.size = WB_HISTORY_SIZE,			\
+}
+
+unsigned int aa_dfa_leftmatch(struct aa_dfa *dfa, unsigned int start,
+			      const char *str, unsigned int *count);
+
 /**
  * aa_get_dfa - increment refcount on dfa @p
  * @dfa: dfa  (MAYBE NULL)
@@ -159,4 +184,7 @@ static inline void aa_put_dfa(struct aa_dfa *dfa)
 		kref_put(&dfa->count, aa_dfa_free_kref);
 }
 
+#define MATCH_FLAG_DIFF_ENCODE 0x80000000
+#define MARK_DIFF_ENCODE 0x40000000
+
 #endif /* __AA_MATCH_H */
diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
new file mode 100644
index 000000000000..ec7228e857a9
--- /dev/null
+++ b/security/apparmor/include/net.h
@@ -0,0 +1,106 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation definitions.
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_NET_H
+#define __AA_NET_H
+
+#include <net/sock.h>
+#include <linux/path.h>
+
+#include "apparmorfs.h"
+#include "label.h"
+#include "perms.h"
+#include "policy.h"
+
+#define AA_MAY_SEND		AA_MAY_WRITE
+#define AA_MAY_RECEIVE		AA_MAY_READ
+
+#define AA_MAY_SHUTDOWN		AA_MAY_DELETE
+
+#define AA_MAY_CONNECT		AA_MAY_OPEN
+#define AA_MAY_ACCEPT		0x00100000
+
+#define AA_MAY_BIND		0x00200000
+#define AA_MAY_LISTEN		0x00400000
+
+#define AA_MAY_SETOPT		0x01000000
+#define AA_MAY_GETOPT		0x02000000
+
+#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |    \
+			AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN |	  \
+			AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \
+			AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT)
+
+#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE |	\
+		      AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\
+		      AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD |	\
+		      AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK |	\
+		      AA_MAY_MPROT)
+
+#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT |	\
+		       AA_MAY_ACCEPT)
+struct aa_sk_ctx {
+	struct aa_label *label;
+	struct aa_label *peer;
+};
+
+#define SK_CTX(X) ((X)->sk_security)
+#define SOCK_ctx(X) SOCK_INODE(X)->i_security
+#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P)				  \
+	struct lsm_network_audit NAME ## _net = { .sk = (SK),		  \
+						  .family = (F)};	  \
+	DEFINE_AUDIT_DATA(NAME,						  \
+			  ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \
+						     LSM_AUDIT_DATA_NONE, \
+			  OP);						  \
+	NAME.u.net = &(NAME ## _net);					  \
+	aad(&NAME)->net.type = (T);					  \
+	aad(&NAME)->net.protocol = (P)
+
+#define DEFINE_AUDIT_SK(NAME, OP, SK)					\
+	DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type,	\
+			 (SK)->sk_protocol)
+
+
+#define af_select(FAMILY, FN, DEF_FN)		\
+({						\
+	int __e;				\
+	switch ((FAMILY)) {			\
+	default:				\
+		__e = DEF_FN;			\
+	}					\
+	__e;					\
+})
+
+extern struct aa_sfs_entry aa_sfs_entry_network[];
+
+void audit_net_cb(struct audit_buffer *ab, void *va);
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+		       u32 request, u16 family, int type);
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+	       int type, int protocol);
+static inline int aa_profile_af_sk_perm(struct aa_profile *profile,
+					struct common_audit_data *sa,
+					u32 request,
+					struct sock *sk)
+{
+	return aa_profile_af_perm(profile, sa, request, sk->sk_family,
+				  sk->sk_type);
+}
+int aa_sk_perm(const char *op, u32 request, struct sock *sk);
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+		      struct socket *sock);
+
+#endif /* __AA_NET_H */
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index d7b7e7115160..38aa6247d00f 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -138,9 +138,10 @@ extern struct aa_perms allperms;
 
 
 void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+			 u32 mask);
 void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-			u32 chrsmask, const char **names, u32 namesmask);
+			u32 chrsmask, const char * const *names, u32 namesmask);
 void aa_apply_modes_to_perms(struct aa_profile *profile,
 			     struct aa_perms *perms);
 void aa_compute_perms(struct aa_dfa *dfa, unsigned int state,
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 17fe41a9cac3..ab64c6b5db5a 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -30,6 +30,7 @@
 #include "file.h"
 #include "lib.h"
 #include "label.h"
+#include "net.h"
 #include "perms.h"
 #include "resource.h"
 
@@ -148,6 +149,10 @@ struct aa_profile {
 	struct aa_policydb policy;
 	struct aa_file_rules file;
 	struct aa_caps caps;
+
+	int xattr_count;
+	char **xattrs;
+
 	struct aa_rlimit rlimits;
 
 	struct aa_loaddata *rawdata;
@@ -209,15 +214,15 @@ static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p)
 	return labels_profile(aa_get_newest_label(&p->label));
 }
 
-#define PROFILE_MEDIATES(P, T)  ((P)->policy.start[(T)])
-/* safe version of POLICY_MEDIATES for full range input */
-static inline unsigned int PROFILE_MEDIATES_SAFE(struct aa_profile *profile,
-						 unsigned char class)
-{
-	if (profile->policy.dfa)
-		return aa_dfa_match_len(profile->policy.dfa,
-					profile->policy.start[0], &class, 1);
-	return 0;
+#define PROFILE_MEDIATES(P, T)  ((P)->policy.start[(unsigned char) (T)])
+static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile,
+					       u16 AF) {
+	unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+	__be16 be_af = cpu_to_be16(AF);
+
+	if (!state)
+		return 0;
+	return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2);
 }
 
 /**
diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h
index be6cd69ac319..8db4ab759e80 100644
--- a/security/apparmor/include/policy_unpack.h
+++ b/security/apparmor/include/policy_unpack.h
@@ -70,7 +70,7 @@ struct aa_loaddata {
 	int abi;
 	unsigned char *hash;
 
-	char data[];
+	char *data;
 };
 
 int aa_unpack(struct aa_loaddata *udata, struct list_head *lh, const char **ns);
diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h
index 92e62fe95292..cbf7a997ed84 100644
--- a/security/apparmor/include/sig_names.h
+++ b/security/apparmor/include/sig_names.h
@@ -2,6 +2,9 @@
 
 #define SIGUNKNOWN 0
 #define MAXMAPPED_SIG 35
+#define MAXMAPPED_SIGNAME (MAXMAPPED_SIG + 1)
+#define SIGRT_BASE 128
+
 /* provide a mapping of arch signal to internal signal # for mediation
  * those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO
  * map to the same entry those that may/or may not get a separate entry
@@ -56,7 +59,7 @@ static const int sig_map[MAXMAPPED_SIG] = {
 };
 
 /* this table is ordered post sig_map[sig] mapping */
-static const char *const sig_names[MAXMAPPED_SIG + 1] = {
+static const char *const sig_names[MAXMAPPED_SIGNAME] = {
 	"unknown",
 	"hup",
 	"int",
diff --git a/security/apparmor/include/task.h b/security/apparmor/include/task.h
new file mode 100644
index 000000000000..55edaa1d83f8
--- /dev/null
+++ b/security/apparmor/include/task.h
@@ -0,0 +1,94 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor task related definitions and mediation
+ *
+ * Copyright 2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef __AA_TASK_H
+#define __AA_TASK_H
+
+#define task_ctx(X) ((X)->security)
+
+/*
+ * struct aa_task_ctx - information for current task label change
+ * @nnp: snapshot of label at time of no_new_privs
+ * @onexec: profile to transition to on next exec  (MAY BE NULL)
+ * @previous: profile the task may return to     (MAY BE NULL)
+ * @token: magic value the task must know for returning to @previous_profile
+ */
+struct aa_task_ctx {
+	struct aa_label *nnp;
+	struct aa_label *onexec;
+	struct aa_label *previous;
+	u64 token;
+};
+
+int aa_replace_current_label(struct aa_label *label);
+int aa_set_current_onexec(struct aa_label *label, bool stack);
+int aa_set_current_hat(struct aa_label *label, u64 token);
+int aa_restore_previous_label(u64 cookie);
+struct aa_label *aa_get_task_label(struct task_struct *task);
+
+/**
+ * aa_alloc_task_ctx - allocate a new task_ctx
+ * @flags: gfp flags for allocation
+ *
+ * Returns: allocated buffer or NULL on failure
+ */
+static inline struct aa_task_ctx *aa_alloc_task_ctx(gfp_t flags)
+{
+	return kzalloc(sizeof(struct aa_task_ctx), flags);
+}
+
+/**
+ * aa_free_task_ctx - free a task_ctx
+ * @ctx: task_ctx to free (MAYBE NULL)
+ */
+static inline void aa_free_task_ctx(struct aa_task_ctx *ctx)
+{
+	if (ctx) {
+		aa_put_label(ctx->nnp);
+		aa_put_label(ctx->previous);
+		aa_put_label(ctx->onexec);
+
+		kzfree(ctx);
+	}
+}
+
+/**
+ * aa_dup_task_ctx - duplicate a task context, incrementing reference counts
+ * @new: a blank task context      (NOT NULL)
+ * @old: the task context to copy  (NOT NULL)
+ */
+static inline void aa_dup_task_ctx(struct aa_task_ctx *new,
+				   const struct aa_task_ctx *old)
+{
+	*new = *old;
+	aa_get_label(new->nnp);
+	aa_get_label(new->previous);
+	aa_get_label(new->onexec);
+}
+
+/**
+ * aa_clear_task_ctx_trans - clear transition tracking info from the ctx
+ * @ctx: task context to clear (NOT NULL)
+ */
+static inline void aa_clear_task_ctx_trans(struct aa_task_ctx *ctx)
+{
+	AA_BUG(!ctx);
+
+	aa_put_label(ctx->previous);
+	aa_put_label(ctx->onexec);
+	ctx->previous = NULL;
+	ctx->onexec = NULL;
+	ctx->token = 0;
+}
+
+#endif /* __AA_TASK_H */
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index b40678f3c1d5..527ea1557120 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -17,7 +17,7 @@
 
 #include "include/audit.h"
 #include "include/capability.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/policy.h"
 #include "include/ipc.h"
 #include "include/sig_names.h"
@@ -138,7 +138,7 @@ static inline int map_signal_num(int sig)
 	if (sig > SIGRTMAX)
 		return SIGUNKNOWN;
 	else if (sig >= SIGRTMIN)
-		return sig - SIGRTMIN + 128;	/* rt sigs mapped to 128 */
+		return sig - SIGRTMIN + SIGRT_BASE;
 	else if (sig < MAXMAPPED_SIG)
 		return sig_map[sig];
 	return SIGUNKNOWN;
@@ -174,60 +174,48 @@ static void audit_signal_cb(struct audit_buffer *ab, void *va)
 			audit_signal_mask(ab, aad(sa)->denied);
 		}
 	}
-	if (aad(sa)->signal < MAXMAPPED_SIG)
+	if (aad(sa)->signal == SIGUNKNOWN)
+		audit_log_format(ab, "signal=unknown(%d)",
+				 aad(sa)->unmappedsig);
+	else if (aad(sa)->signal < MAXMAPPED_SIGNAME)
 		audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]);
 	else
 		audit_log_format(ab, " signal=rtmin+%d",
-				 aad(sa)->signal - 128);
+				 aad(sa)->signal - SIGRT_BASE);
 	audit_log_format(ab, " peer=");
 	aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
 			FLAGS_NONE, GFP_ATOMIC);
 }
 
-/* TODO: update to handle compound name&name2, conditionals */
-static void profile_match_signal(struct aa_profile *profile, const char *label,
-				 int signal, struct aa_perms *perms)
-{
-	unsigned int state;
-
-	/* TODO: secondary cache check <profile, profile, perm> */
-	state = aa_dfa_next(profile->policy.dfa,
-			    profile->policy.start[AA_CLASS_SIGNAL],
-			    signal);
-	state = aa_dfa_match(profile->policy.dfa, state, label);
-	aa_compute_perms(profile->policy.dfa, state, perms);
-}
-
 static int profile_signal_perm(struct aa_profile *profile,
-			       struct aa_profile *peer, u32 request,
+			       struct aa_label *peer, u32 request,
 			       struct common_audit_data *sa)
 {
 	struct aa_perms perms;
+	unsigned int state;
 
 	if (profile_unconfined(profile) ||
 	    !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL))
 		return 0;
 
-	aad(sa)->peer = &peer->label;
-	profile_match_signal(profile, peer->base.hname, aad(sa)->signal,
-			     &perms);
+	aad(sa)->peer = peer;
+	/* TODO: secondary cache check <profile, profile, perm> */
+	state = aa_dfa_next(profile->policy.dfa,
+			    profile->policy.start[AA_CLASS_SIGNAL],
+			    aad(sa)->signal);
+	aa_label_match(profile, peer, state, false, request, &perms);
 	aa_apply_modes_to_perms(profile, &perms);
 	return aa_check_perms(profile, &perms, request, sa, audit_signal_cb);
 }
 
-static int aa_signal_cross_perm(struct aa_profile *sender,
-				struct aa_profile *target,
-				struct common_audit_data *sa)
-{
-	return xcheck(profile_signal_perm(sender, target, MAY_WRITE, sa),
-		      profile_signal_perm(target, sender, MAY_READ, sa));
-}
-
 int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig)
 {
+	struct aa_profile *profile;
 	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SIGNAL);
 
 	aad(&sa)->signal = map_signal_num(sig);
-	return xcheck_labels_profiles(sender, target, aa_signal_cross_perm,
-				      &sa);
+	aad(&sa)->unmappedsig = sig;
+	return xcheck_labels(sender, target, profile,
+			profile_signal_perm(profile, target, MAY_WRITE, &sa),
+			profile_signal_perm(profile, sender, MAY_READ, &sa));
 }
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 324fe5c60f87..523250e34837 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -16,7 +16,7 @@
 #include <linux/sort.h>
 
 #include "include/apparmor.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/label.h"
 #include "include/policy.h"
 #include "include/secid.h"
@@ -1808,14 +1808,17 @@ void aa_label_printk(struct aa_label *label, gfp_t gfp)
 	aa_put_ns(ns);
 }
 
-static int label_count_str_entries(const char *str)
+static int label_count_strn_entries(const char *str, size_t n)
 {
+	const char *end = str + n;
 	const char *split;
 	int count = 1;
 
 	AA_BUG(!str);
 
-	for (split = strstr(str, "//&"); split; split = strstr(str, "//&")) {
+	for (split = aa_label_strn_split(str, end - str);
+	     split;
+	     split = aa_label_strn_split(str, end - str)) {
 		count++;
 		str = split + 3;
 	}
@@ -1843,9 +1846,10 @@ static struct aa_profile *fqlookupn_profile(struct aa_label *base,
 }
 
 /**
- * aa_label_parse - parse, validate and convert a text string to a label
+ * aa_label_strn_parse - parse, validate and convert a text string to a label
  * @base: base label to use for lookups (NOT NULL)
  * @str: null terminated text string (NOT NULL)
+ * @n: length of str to parse, will stop at \0 if encountered before n
  * @gfp: allocation type
  * @create: true if should create compound labels if they don't exist
  * @force_stack: true if should stack even if no leading &
@@ -1853,19 +1857,24 @@ static struct aa_profile *fqlookupn_profile(struct aa_label *base,
  * Returns: the matching refcounted label if present
  *     else ERRPTR
  */
-struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
-				gfp_t gfp, bool create, bool force_stack)
+struct aa_label *aa_label_strn_parse(struct aa_label *base, const char *str,
+				     size_t n, gfp_t gfp, bool create,
+				     bool force_stack)
 {
 	DEFINE_VEC(profile, vec);
 	struct aa_label *label, *currbase = base;
 	int i, len, stack = 0, error;
-	char *split;
+	const char *end = str + n;
+	const char *split;
 
 	AA_BUG(!base);
 	AA_BUG(!str);
 
-	str = skip_spaces(str);
-	len = label_count_str_entries(str);
+	str = skipn_spaces(str, n);
+	if (str == NULL || (*str == '=' && base != &root_ns->unconfined->label))
+		return ERR_PTR(-EINVAL);
+
+	len = label_count_strn_entries(str, end - str);
 	if (*str == '&' || force_stack) {
 		/* stack on top of base */
 		stack = base->size;
@@ -1873,8 +1882,6 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
 		if (*str == '&')
 			str++;
 	}
-	if (*str == '=')
-		base = &root_ns->unconfined->label;
 
 	error = vec_setup(profile, vec, len, gfp);
 	if (error)
@@ -1883,7 +1890,8 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
 	for (i = 0; i < stack; i++)
 		vec[i] = aa_get_profile(base->vec[i]);
 
-	for (split = strstr(str, "//&"), i = stack; split && i < len; i++) {
+	for (split = aa_label_strn_split(str, end - str), i = stack;
+	     split && i < len; i++) {
 		vec[i] = fqlookupn_profile(base, currbase, str, split - str);
 		if (!vec[i])
 			goto fail;
@@ -1894,11 +1902,11 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
 		if (vec[i]->ns != labels_ns(currbase))
 			currbase = &vec[i]->label;
 		str = split + 3;
-		split = strstr(str, "//&");
+		split = aa_label_strn_split(str, end - str);
 	}
 	/* last element doesn't have a split */
 	if (i < len) {
-		vec[i] = fqlookupn_profile(base, currbase, str, strlen(str));
+		vec[i] = fqlookupn_profile(base, currbase, str, end - str);
 		if (!vec[i])
 			goto fail;
 	}
@@ -1930,6 +1938,12 @@ fail:
 	goto out;
 }
 
+struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
+				gfp_t gfp, bool create, bool force_stack)
+{
+	return aa_label_strn_parse(base, str, strlen(str), gfp, create,
+				   force_stack);
+}
 
 /**
  * aa_labelset_destroy - remove all labels from the label set
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 4d5e98e49d5e..068a9f471f77 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -211,7 +211,8 @@ void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask)
 	*str = '\0';
 }
 
-void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
+void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
+			 u32 mask)
 {
 	const char *fmt = "%s";
 	unsigned int i, perm = 1;
@@ -229,7 +230,7 @@ void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask)
 }
 
 void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
-			u32 chrsmask, const char **names, u32 namesmask)
+			u32 chrsmask, const char * const *names, u32 namesmask)
 {
 	char str[33];
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 6134302c143c..ce2b89e9ad94 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -29,9 +29,10 @@
 #include "include/apparmorfs.h"
 #include "include/audit.h"
 #include "include/capability.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/file.h"
 #include "include/ipc.h"
+#include "include/net.h"
 #include "include/path.h"
 #include "include/label.h"
 #include "include/policy.h"
@@ -50,12 +51,12 @@ DEFINE_PER_CPU(struct aa_buffers, aa_buffers);
  */
 
 /*
- * free the associated aa_task_ctx and put its labels
+ * put the associated labels
  */
 static void apparmor_cred_free(struct cred *cred)
 {
-	aa_free_task_context(cred_ctx(cred));
-	cred_ctx(cred) = NULL;
+	aa_put_label(cred_label(cred));
+	cred_label(cred) = NULL;
 }
 
 /*
@@ -63,30 +64,17 @@ static void apparmor_cred_free(struct cred *cred)
  */
 static int apparmor_cred_alloc_blank(struct cred *cred, gfp_t gfp)
 {
-	/* freed by apparmor_cred_free */
-	struct aa_task_ctx *ctx = aa_alloc_task_context(gfp);
-
-	if (!ctx)
-		return -ENOMEM;
-
-	cred_ctx(cred) = ctx;
+	cred_label(cred) = NULL;
 	return 0;
 }
 
 /*
- * prepare new aa_task_ctx for modification by prepare_cred block
+ * prepare new cred label for modification by prepare_cred block
  */
 static int apparmor_cred_prepare(struct cred *new, const struct cred *old,
 				 gfp_t gfp)
 {
-	/* freed by apparmor_cred_free */
-	struct aa_task_ctx *ctx = aa_alloc_task_context(gfp);
-
-	if (!ctx)
-		return -ENOMEM;
-
-	aa_dup_task_context(ctx, cred_ctx(old));
-	cred_ctx(new) = ctx;
+	cred_label(new) = aa_get_newest_label(cred_label(old));
 	return 0;
 }
 
@@ -95,10 +83,28 @@ static int apparmor_cred_prepare(struct cred *new, const struct cred *old,
  */
 static void apparmor_cred_transfer(struct cred *new, const struct cred *old)
 {
-	const struct aa_task_ctx *old_ctx = cred_ctx(old);
-	struct aa_task_ctx *new_ctx = cred_ctx(new);
+	cred_label(new) = aa_get_newest_label(cred_label(old));
+}
+
+static void apparmor_task_free(struct task_struct *task)
+{
+
+	aa_free_task_ctx(task_ctx(task));
+	task_ctx(task) = NULL;
+}
+
+static int apparmor_task_alloc(struct task_struct *task,
+			       unsigned long clone_flags)
+{
+	struct aa_task_ctx *new = aa_alloc_task_ctx(GFP_KERNEL);
+
+	if (!new)
+		return -ENOMEM;
+
+	aa_dup_task_ctx(new, task_ctx(current));
+	task_ctx(task) = new;
 
-	aa_dup_task_context(new_ctx, old_ctx);
+	return 0;
 }
 
 static int apparmor_ptrace_access_check(struct task_struct *child,
@@ -576,11 +582,11 @@ static int apparmor_getprocattr(struct task_struct *task, char *name,
 	int error = -ENOENT;
 	/* released below */
 	const struct cred *cred = get_task_cred(task);
-	struct aa_task_ctx *ctx = cred_ctx(cred);
+	struct aa_task_ctx *ctx = task_ctx(current);
 	struct aa_label *label = NULL;
 
 	if (strcmp(name, "current") == 0)
-		label = aa_get_newest_label(ctx->label);
+		label = aa_get_newest_label(cred_label(cred));
 	else if (strcmp(name, "prev") == 0  && ctx->previous)
 		label = aa_get_newest_label(ctx->previous);
 	else if (strcmp(name, "exec") == 0 && ctx->onexec)
@@ -677,11 +683,11 @@ fail:
 static void apparmor_bprm_committing_creds(struct linux_binprm *bprm)
 {
 	struct aa_label *label = aa_current_raw_label();
-	struct aa_task_ctx *new_ctx = cred_ctx(bprm->cred);
+	struct aa_label *new_label = cred_label(bprm->cred);
 
 	/* bail out if unconfined or not changing profile */
-	if ((new_ctx->label->proxy == label->proxy) ||
-	    (unconfined(new_ctx->label)))
+	if ((new_label->proxy == label->proxy) ||
+	    (unconfined(new_label)))
 		return;
 
 	aa_inherit_files(bprm->cred, current->files);
@@ -689,7 +695,7 @@ static void apparmor_bprm_committing_creds(struct linux_binprm *bprm)
 	current->pdeath_signal = 0;
 
 	/* reset soft limits and set hard limits for the new label */
-	__aa_transition_rlimits(label, new_ctx->label);
+	__aa_transition_rlimits(label, new_label);
 }
 
 /**
@@ -698,7 +704,9 @@ static void apparmor_bprm_committing_creds(struct linux_binprm *bprm)
  */
 static void apparmor_bprm_committed_creds(struct linux_binprm *bprm)
 {
-	/* TODO: cleanup signals - ipc mediation */
+	/* clear out temporary/transitional state from the context */
+	aa_clear_task_ctx_trans(task_ctx(current));
+
 	return;
 }
 
@@ -716,16 +724,23 @@ static int apparmor_task_setrlimit(struct task_struct *task,
 }
 
 static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
-			      int sig, u32 secid)
+			      int sig, const struct cred *cred)
 {
 	struct aa_label *cl, *tl;
 	int error;
 
-	if (secid)
-		/* TODO: after secid to label mapping is done.
-		 *  Dealing with USB IO specific behavior
+	if (cred) {
+		/*
+		 * Dealing with USB IO specific behavior
 		 */
-		return 0;
+		cl = aa_get_newest_cred_label(cred);
+		tl = aa_get_task_label(target);
+		error = aa_may_signal(cl, tl, sig);
+		aa_put_label(cl);
+		aa_put_label(tl);
+		return error;
+	}
+
 	cl = __begin_current_label_crit_section();
 	tl = aa_get_task_label(target);
 	error = aa_may_signal(cl, tl, sig);
@@ -735,6 +750,373 @@ static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
 	return error;
 }
 
+/**
+ * apparmor_sk_alloc_security - allocate and attach the sk_security field
+ */
+static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags)
+{
+	struct aa_sk_ctx *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), flags);
+	if (!ctx)
+		return -ENOMEM;
+
+	SK_CTX(sk) = ctx;
+
+	return 0;
+}
+
+/**
+ * apparmor_sk_free_security - free the sk_security field
+ */
+static void apparmor_sk_free_security(struct sock *sk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	SK_CTX(sk) = NULL;
+	aa_put_label(ctx->label);
+	aa_put_label(ctx->peer);
+	kfree(ctx);
+}
+
+/**
+ * apparmor_clone_security - clone the sk_security field
+ */
+static void apparmor_sk_clone_security(const struct sock *sk,
+				       struct sock *newsk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+	struct aa_sk_ctx *new = SK_CTX(newsk);
+
+	new->label = aa_get_label(ctx->label);
+	new->peer = aa_get_label(ctx->peer);
+}
+
+/**
+ * apparmor_socket_create - check perms before creating a new socket
+ */
+static int apparmor_socket_create(int family, int type, int protocol, int kern)
+{
+	struct aa_label *label;
+	int error = 0;
+
+	AA_BUG(in_interrupt());
+
+	label = begin_current_label_crit_section();
+	if (!(kern || unconfined(label)))
+		error = af_select(family,
+				  create_perm(label, family, type, protocol),
+				  aa_af_perm(label, OP_CREATE, AA_MAY_CREATE,
+					     family, type, protocol));
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+/**
+ * apparmor_socket_post_create - setup the per-socket security struct
+ *
+ * Note:
+ * -   kernel sockets currently labeled unconfined but we may want to
+ *     move to a special kernel label
+ * -   socket may not have sk here if created with sock_create_lite or
+ *     sock_alloc. These should be accept cases which will be handled in
+ *     sock_graft.
+ */
+static int apparmor_socket_post_create(struct socket *sock, int family,
+				       int type, int protocol, int kern)
+{
+	struct aa_label *label;
+
+	if (kern) {
+		struct aa_ns *ns = aa_get_current_ns();
+
+		label = aa_get_label(ns_unconfined(ns));
+		aa_put_ns(ns);
+	} else
+		label = aa_get_current_label();
+
+	if (sock->sk) {
+		struct aa_sk_ctx *ctx = SK_CTX(sock->sk);
+
+		aa_put_label(ctx->label);
+		ctx->label = aa_get_label(label);
+	}
+	aa_put_label(label);
+
+	return 0;
+}
+
+/**
+ * apparmor_socket_bind - check perms before bind addr to socket
+ */
+static int apparmor_socket_bind(struct socket *sock,
+				struct sockaddr *address, int addrlen)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!address);
+	AA_BUG(in_interrupt());
+
+	return af_select(sock->sk->sk_family,
+			 bind_perm(sock, address, addrlen),
+			 aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk));
+}
+
+/**
+ * apparmor_socket_connect - check perms before connecting @sock to @address
+ */
+static int apparmor_socket_connect(struct socket *sock,
+				   struct sockaddr *address, int addrlen)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!address);
+	AA_BUG(in_interrupt());
+
+	return af_select(sock->sk->sk_family,
+			 connect_perm(sock, address, addrlen),
+			 aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk));
+}
+
+/**
+ * apparmor_socket_list - check perms before allowing listen
+ */
+static int apparmor_socket_listen(struct socket *sock, int backlog)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return af_select(sock->sk->sk_family,
+			 listen_perm(sock, backlog),
+			 aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk));
+}
+
+/**
+ * apparmor_socket_accept - check perms before accepting a new connection.
+ *
+ * Note: while @newsock is created and has some information, the accept
+ *       has not been done.
+ */
+static int apparmor_socket_accept(struct socket *sock, struct socket *newsock)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!newsock);
+	AA_BUG(in_interrupt());
+
+	return af_select(sock->sk->sk_family,
+			 accept_perm(sock, newsock),
+			 aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk));
+}
+
+static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock,
+			    struct msghdr *msg, int size)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(!msg);
+	AA_BUG(in_interrupt());
+
+	return af_select(sock->sk->sk_family,
+			 msg_perm(op, request, sock, msg, size),
+			 aa_sk_perm(op, request, sock->sk));
+}
+
+/**
+ * apparmor_socket_sendmsg - check perms before sending msg to another socket
+ */
+static int apparmor_socket_sendmsg(struct socket *sock,
+				   struct msghdr *msg, int size)
+{
+	return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size);
+}
+
+/**
+ * apparmor_socket_recvmsg - check perms before receiving a message
+ */
+static int apparmor_socket_recvmsg(struct socket *sock,
+				   struct msghdr *msg, int size, int flags)
+{
+	return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size);
+}
+
+/* revaliation, get/set attr, shutdown */
+static int aa_sock_perm(const char *op, u32 request, struct socket *sock)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return af_select(sock->sk->sk_family,
+			 sock_perm(op, request, sock),
+			 aa_sk_perm(op, request, sock->sk));
+}
+
+/**
+ * apparmor_socket_getsockname - check perms before getting the local address
+ */
+static int apparmor_socket_getsockname(struct socket *sock)
+{
+	return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock);
+}
+
+/**
+ * apparmor_socket_getpeername - check perms before getting remote address
+ */
+static int apparmor_socket_getpeername(struct socket *sock)
+{
+	return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock);
+}
+
+/* revaliation, get/set attr, opt */
+static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock,
+			    int level, int optname)
+{
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+	AA_BUG(in_interrupt());
+
+	return af_select(sock->sk->sk_family,
+			 opt_perm(op, request, sock, level, optname),
+			 aa_sk_perm(op, request, sock->sk));
+}
+
+/**
+ * apparmor_getsockopt - check perms before getting socket options
+ */
+static int apparmor_socket_getsockopt(struct socket *sock, int level,
+				      int optname)
+{
+	return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock,
+				level, optname);
+}
+
+/**
+ * apparmor_setsockopt - check perms before setting socket options
+ */
+static int apparmor_socket_setsockopt(struct socket *sock, int level,
+				      int optname)
+{
+	return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock,
+				level, optname);
+}
+
+/**
+ * apparmor_socket_shutdown - check perms before shutting down @sock conn
+ */
+static int apparmor_socket_shutdown(struct socket *sock, int how)
+{
+	return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
+}
+
+/**
+ * apparmor_socket_sock_recv_skb - check perms before associating skb to sk
+ *
+ * Note: can not sleep may be called with locks held
+ *
+ * dont want protocol specific in __skb_recv_datagram()
+ * to deny an incoming connection  socket_sock_rcv_skb()
+ */
+static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+
+
+static struct aa_label *sk_peer_label(struct sock *sk)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (ctx->peer)
+		return ctx->peer;
+
+	return ERR_PTR(-ENOPROTOOPT);
+}
+
+/**
+ * apparmor_socket_getpeersec_stream - get security context of peer
+ *
+ * Note: for tcp only valid if using ipsec or cipso on lan
+ */
+static int apparmor_socket_getpeersec_stream(struct socket *sock,
+					     char __user *optval,
+					     int __user *optlen,
+					     unsigned int len)
+{
+	char *name;
+	int slen, error = 0;
+	struct aa_label *label;
+	struct aa_label *peer;
+
+	label = begin_current_label_crit_section();
+	peer = sk_peer_label(sock->sk);
+	if (IS_ERR(peer)) {
+		error = PTR_ERR(peer);
+		goto done;
+	}
+	slen = aa_label_asxprint(&name, labels_ns(label), peer,
+				 FLAG_SHOW_MODE | FLAG_VIEW_SUBNS |
+				 FLAG_HIDDEN_UNCONFINED, GFP_KERNEL);
+	/* don't include terminating \0 in slen, it breaks some apps */
+	if (slen < 0) {
+		error = -ENOMEM;
+	} else {
+		if (slen > len) {
+			error = -ERANGE;
+		} else if (copy_to_user(optval, name, slen)) {
+			error = -EFAULT;
+			goto out;
+		}
+		if (put_user(slen, optlen))
+			error = -EFAULT;
+out:
+		kfree(name);
+
+	}
+
+done:
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+/**
+ * apparmor_socket_getpeersec_dgram - get security label of packet
+ * @sock: the peer socket
+ * @skb: packet data
+ * @secid: pointer to where to put the secid of the packet
+ *
+ * Sets the netlabel socket state on sk from parent
+ */
+static int apparmor_socket_getpeersec_dgram(struct socket *sock,
+					    struct sk_buff *skb, u32 *secid)
+
+{
+	/* TODO: requires secid support */
+	return -ENOPROTOOPT;
+}
+
+/**
+ * apparmor_sock_graft - Initialize newly created socket
+ * @sk: child sock
+ * @parent: parent socket
+ *
+ * Note: could set off of SOCK_CTX(parent) but need to track inode and we can
+ *       just set sk security information off of current creating process label
+ *       Labeling of sk for accept case - probably should be sock based
+ *       instead of task, because of the case where an implicitly labeled
+ *       socket is shared by different tasks.
+ */
+static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (!ctx->label)
+		ctx->label = aa_get_current_label();
+}
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -769,6 +1151,30 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(getprocattr, apparmor_getprocattr),
 	LSM_HOOK_INIT(setprocattr, apparmor_setprocattr),
 
+	LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security),
+	LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security),
+	LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security),
+
+	LSM_HOOK_INIT(socket_create, apparmor_socket_create),
+	LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create),
+	LSM_HOOK_INIT(socket_bind, apparmor_socket_bind),
+	LSM_HOOK_INIT(socket_connect, apparmor_socket_connect),
+	LSM_HOOK_INIT(socket_listen, apparmor_socket_listen),
+	LSM_HOOK_INIT(socket_accept, apparmor_socket_accept),
+	LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg),
+	LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg),
+	LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname),
+	LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername),
+	LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
+	LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
+	LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
+	LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
+	LSM_HOOK_INIT(socket_getpeersec_stream,
+		      apparmor_socket_getpeersec_stream),
+	LSM_HOOK_INIT(socket_getpeersec_dgram,
+		      apparmor_socket_getpeersec_dgram),
+	LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+
 	LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
 	LSM_HOOK_INIT(cred_free, apparmor_cred_free),
 	LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare),
@@ -778,6 +1184,8 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(bprm_committing_creds, apparmor_bprm_committing_creds),
 	LSM_HOOK_INIT(bprm_committed_creds, apparmor_bprm_committed_creds),
 
+	LSM_HOOK_INIT(task_free, apparmor_task_free),
+	LSM_HOOK_INIT(task_alloc, apparmor_task_alloc),
 	LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit),
 	LSM_HOOK_INIT(task_kill, apparmor_task_kill),
 };
@@ -1025,12 +1433,12 @@ static int __init set_init_ctx(void)
 	struct cred *cred = (struct cred *)current->real_cred;
 	struct aa_task_ctx *ctx;
 
-	ctx = aa_alloc_task_context(GFP_KERNEL);
+	ctx = aa_alloc_task_ctx(GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->label = aa_get_label(ns_unconfined(root_ns));
-	cred_ctx(cred) = ctx;
+	cred_label(cred) = aa_get_label(ns_unconfined(root_ns));
+	task_ctx(current) = ctx;
 
 	return 0;
 }
diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index 72c604350e80..280eba082c7b 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -30,6 +30,11 @@ static char nulldfa_src[] = {
 };
 struct aa_dfa *nulldfa;
 
+static char stacksplitdfa_src[] = {
+	#include "stacksplitdfa.in"
+};
+struct aa_dfa *stacksplitdfa;
+
 int aa_setup_dfa_engine(void)
 {
 	int error;
@@ -37,19 +42,31 @@ int aa_setup_dfa_engine(void)
 	nulldfa = aa_dfa_unpack(nulldfa_src, sizeof(nulldfa_src),
 				TO_ACCEPT1_FLAG(YYTD_DATA32) |
 				TO_ACCEPT2_FLAG(YYTD_DATA32));
-	if (!IS_ERR(nulldfa))
-		return 0;
+	if (IS_ERR(nulldfa)) {
+		error = PTR_ERR(nulldfa);
+		nulldfa = NULL;
+		return error;
+	}
 
-	error = PTR_ERR(nulldfa);
-	nulldfa = NULL;
+	stacksplitdfa = aa_dfa_unpack(stacksplitdfa_src,
+				      sizeof(stacksplitdfa_src),
+				      TO_ACCEPT1_FLAG(YYTD_DATA32) |
+				      TO_ACCEPT2_FLAG(YYTD_DATA32));
+	if (IS_ERR(stacksplitdfa)) {
+		aa_put_dfa(nulldfa);
+		nulldfa = NULL;
+		error = PTR_ERR(stacksplitdfa);
+		stacksplitdfa = NULL;
+		return error;
+	}
 
-	return error;
+	return 0;
 }
 
 void aa_teardown_dfa_engine(void)
 {
+	aa_put_dfa(stacksplitdfa);
 	aa_put_dfa(nulldfa);
-	nulldfa = NULL;
 }
 
 /**
@@ -119,8 +136,8 @@ fail:
 }
 
 /**
- * verify_dfa - verify that transitions and states in the tables are in bounds.
- * @dfa: dfa to test  (NOT NULL)
+ * verify_table_headers - verify that the tables headers are as expected
+ * @tables - array of dfa tables to check (NOT NULL)
  * @flags: flags controlling what type of accept table are acceptable
  *
  * Assumes dfa has gone through the first pass verification done by unpacking
@@ -128,64 +145,98 @@ fail:
  *
  * Returns: %0 else error code on failure to verify
  */
-static int verify_dfa(struct aa_dfa *dfa, int flags)
+static int verify_table_headers(struct table_header **tables, int flags)
 {
-	size_t i, state_count, trans_count;
+	size_t state_count, trans_count;
 	int error = -EPROTO;
 
 	/* check that required tables exist */
-	if (!(dfa->tables[YYTD_ID_DEF] &&
-	      dfa->tables[YYTD_ID_BASE] &&
-	      dfa->tables[YYTD_ID_NXT] && dfa->tables[YYTD_ID_CHK]))
+	if (!(tables[YYTD_ID_DEF] && tables[YYTD_ID_BASE] &&
+	      tables[YYTD_ID_NXT] && tables[YYTD_ID_CHK]))
 		goto out;
 
 	/* accept.size == default.size == base.size */
-	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+	state_count = tables[YYTD_ID_BASE]->td_lolen;
 	if (ACCEPT1_FLAGS(flags)) {
-		if (!dfa->tables[YYTD_ID_ACCEPT])
+		if (!tables[YYTD_ID_ACCEPT])
 			goto out;
-		if (state_count != dfa->tables[YYTD_ID_ACCEPT]->td_lolen)
+		if (state_count != tables[YYTD_ID_ACCEPT]->td_lolen)
 			goto out;
 	}
 	if (ACCEPT2_FLAGS(flags)) {
-		if (!dfa->tables[YYTD_ID_ACCEPT2])
+		if (!tables[YYTD_ID_ACCEPT2])
 			goto out;
-		if (state_count != dfa->tables[YYTD_ID_ACCEPT2]->td_lolen)
+		if (state_count != tables[YYTD_ID_ACCEPT2]->td_lolen)
 			goto out;
 	}
-	if (state_count != dfa->tables[YYTD_ID_DEF]->td_lolen)
+	if (state_count != tables[YYTD_ID_DEF]->td_lolen)
 		goto out;
 
 	/* next.size == chk.size */
-	trans_count = dfa->tables[YYTD_ID_NXT]->td_lolen;
-	if (trans_count != dfa->tables[YYTD_ID_CHK]->td_lolen)
+	trans_count = tables[YYTD_ID_NXT]->td_lolen;
+	if (trans_count != tables[YYTD_ID_CHK]->td_lolen)
 		goto out;
 
 	/* if equivalence classes then its table size must be 256 */
-	if (dfa->tables[YYTD_ID_EC] &&
-	    dfa->tables[YYTD_ID_EC]->td_lolen != 256)
+	if (tables[YYTD_ID_EC] && tables[YYTD_ID_EC]->td_lolen != 256)
 		goto out;
 
-	if (flags & DFA_FLAG_VERIFY_STATES) {
-		for (i = 0; i < state_count; i++) {
-			if (DEFAULT_TABLE(dfa)[i] >= state_count)
-				goto out;
-			if (base_idx(BASE_TABLE(dfa)[i]) + 255 >= trans_count) {
-				printk(KERN_ERR "AppArmor DFA next/check upper "
-				       "bounds error\n");
-				goto out;
-			}
+	error = 0;
+out:
+	return error;
+}
+
+/**
+ * verify_dfa - verify that transitions and states in the tables are in bounds.
+ * @dfa: dfa to test  (NOT NULL)
+ *
+ * Assumes dfa has gone through the first pass verification done by unpacking
+ * NOTE: this does not valid accept table values
+ *
+ * Returns: %0 else error code on failure to verify
+ */
+static int verify_dfa(struct aa_dfa *dfa)
+{
+	size_t i, state_count, trans_count;
+	int error = -EPROTO;
+
+	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+	trans_count = dfa->tables[YYTD_ID_NXT]->td_lolen;
+	for (i = 0; i < state_count; i++) {
+		if (!(BASE_TABLE(dfa)[i] & MATCH_FLAG_DIFF_ENCODE) &&
+		    (DEFAULT_TABLE(dfa)[i] >= state_count))
+			goto out;
+		if (base_idx(BASE_TABLE(dfa)[i]) + 255 >= trans_count) {
+			pr_err("AppArmor DFA next/check upper bounds error\n");
+			goto out;
 		}
+	}
 
-		for (i = 0; i < trans_count; i++) {
-			if (NEXT_TABLE(dfa)[i] >= state_count)
-				goto out;
-			if (CHECK_TABLE(dfa)[i] >= state_count)
+	for (i = 0; i < trans_count; i++) {
+		if (NEXT_TABLE(dfa)[i] >= state_count)
+			goto out;
+		if (CHECK_TABLE(dfa)[i] >= state_count)
+			goto out;
+	}
+
+	/* Now that all the other tables are verified, verify diffencoding */
+	for (i = 0; i < state_count; i++) {
+		size_t j, k;
+
+		for (j = i;
+		     (BASE_TABLE(dfa)[j] & MATCH_FLAG_DIFF_ENCODE) &&
+		     !(BASE_TABLE(dfa)[j] & MARK_DIFF_ENCODE);
+		     j = k) {
+			k = DEFAULT_TABLE(dfa)[j];
+			if (j == k)
 				goto out;
+			if (k < j)
+				break;		/* already verified */
+			BASE_TABLE(dfa)[j] |= MARK_DIFF_ENCODE;
 		}
 	}
-
 	error = 0;
+
 out:
 	return error;
 }
@@ -257,6 +308,9 @@ struct aa_dfa *aa_dfa_unpack(void *blob, size_t size, int flags)
 		goto fail;
 
 	dfa->flags = ntohs(*(__be16 *) (data + 12));
+	if (dfa->flags != 0 && dfa->flags != YYTH_FLAG_DIFF_ENCODE)
+		goto fail;
+
 	data += hsize;
 	size -= hsize;
 
@@ -299,11 +353,16 @@ struct aa_dfa *aa_dfa_unpack(void *blob, size_t size, int flags)
 		size -= table_size(table->td_lolen, table->td_flags);
 		table = NULL;
 	}
-
-	error = verify_dfa(dfa, flags);
+	error = verify_table_headers(dfa->tables, flags);
 	if (error)
 		goto fail;
 
+	if (flags & DFA_FLAG_VERIFY_STATES) {
+		error = verify_dfa(dfa);
+		if (error)
+			goto fail;
+	}
+
 	return dfa;
 
 fail:
@@ -312,6 +371,20 @@ fail:
 	return ERR_PTR(error);
 }
 
+#define match_char(state, def, base, next, check, C)	\
+do {							\
+	u32 b = (base)[(state)];			\
+	unsigned int pos = base_idx(b) + (C);		\
+	if ((check)[pos] != (state)) {			\
+		(state) = (def)[(state)];		\
+		if (b & MATCH_FLAG_DIFF_ENCODE)		\
+			continue;			\
+		break;					\
+	}						\
+	(state) = (next)[pos];				\
+	break;						\
+} while (1)
+
 /**
  * aa_dfa_match_len - traverse @dfa to find state @str stops at
  * @dfa: the dfa to match @str against  (NOT NULL)
@@ -335,6 +408,118 @@ unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start,
 	u32 *base = BASE_TABLE(dfa);
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
+	unsigned int state = start;
+
+	if (state == 0)
+		return 0;
+
+	/* current state is <state>, matching character *str */
+	if (dfa->tables[YYTD_ID_EC]) {
+		/* Equivalence class table defined */
+		u8 *equiv = EQUIV_TABLE(dfa);
+		for (; len; len--)
+			match_char(state, def, base, next, check,
+				   equiv[(u8) *str++]);
+	} else {
+		/* default is direct to next state */
+		for (; len; len--)
+			match_char(state, def, base, next, check, (u8) *str++);
+	}
+
+	return state;
+}
+
+/**
+ * aa_dfa_match - traverse @dfa to find state @str stops at
+ * @dfa: the dfa to match @str against  (NOT NULL)
+ * @start: the state of the dfa to start matching in
+ * @str: the null terminated string of bytes to match against the dfa (NOT NULL)
+ *
+ * aa_dfa_match will match @str against the dfa and return the state it
+ * finished matching in. The final state can be used to look up the accepting
+ * label, or as the start state of a continuing match.
+ *
+ * Returns: final state reached after input is consumed
+ */
+unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start,
+			  const char *str)
+{
+	u16 *def = DEFAULT_TABLE(dfa);
+	u32 *base = BASE_TABLE(dfa);
+	u16 *next = NEXT_TABLE(dfa);
+	u16 *check = CHECK_TABLE(dfa);
+	unsigned int state = start;
+
+	if (state == 0)
+		return 0;
+
+	/* current state is <state>, matching character *str */
+	if (dfa->tables[YYTD_ID_EC]) {
+		/* Equivalence class table defined */
+		u8 *equiv = EQUIV_TABLE(dfa);
+		/* default is direct to next state */
+		while (*str)
+			match_char(state, def, base, next, check,
+				   equiv[(u8) *str++]);
+	} else {
+		/* default is direct to next state */
+		while (*str)
+			match_char(state, def, base, next, check, (u8) *str++);
+	}
+
+	return state;
+}
+
+/**
+ * aa_dfa_next - step one character to the next state in the dfa
+ * @dfa: the dfa to tranverse (NOT NULL)
+ * @state: the state to start in
+ * @c: the input character to transition on
+ *
+ * aa_dfa_match will step through the dfa by one input character @c
+ *
+ * Returns: state reach after input @c
+ */
+unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state,
+			  const char c)
+{
+	u16 *def = DEFAULT_TABLE(dfa);
+	u32 *base = BASE_TABLE(dfa);
+	u16 *next = NEXT_TABLE(dfa);
+	u16 *check = CHECK_TABLE(dfa);
+
+	/* current state is <state>, matching character *str */
+	if (dfa->tables[YYTD_ID_EC]) {
+		/* Equivalence class table defined */
+		u8 *equiv = EQUIV_TABLE(dfa);
+		match_char(state, def, base, next, check, equiv[(u8) c]);
+	} else
+		match_char(state, def, base, next, check, (u8) c);
+
+	return state;
+}
+
+/**
+ * aa_dfa_match_until - traverse @dfa until accept state or end of input
+ * @dfa: the dfa to match @str against  (NOT NULL)
+ * @start: the state of the dfa to start matching in
+ * @str: the null terminated string of bytes to match against the dfa (NOT NULL)
+ * @retpos: first character in str after match OR end of string
+ *
+ * aa_dfa_match will match @str against the dfa and return the state it
+ * finished matching in. The final state can be used to look up the accepting
+ * label, or as the start state of a continuing match.
+ *
+ * Returns: final state reached after input is consumed
+ */
+unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start,
+				const char *str, const char **retpos)
+{
+	u16 *def = DEFAULT_TABLE(dfa);
+	u32 *base = BASE_TABLE(dfa);
+	u16 *next = NEXT_TABLE(dfa);
+	u16 *check = CHECK_TABLE(dfa);
+	u32 *accept = ACCEPT_TABLE(dfa);
 	unsigned int state = start, pos;
 
 	if (state == 0)
@@ -345,48 +530,60 @@ unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start,
 		/* Equivalence class table defined */
 		u8 *equiv = EQUIV_TABLE(dfa);
 		/* default is direct to next state */
-		for (; len; len--) {
+		while (*str) {
 			pos = base_idx(base[state]) + equiv[(u8) *str++];
 			if (check[pos] == state)
 				state = next[pos];
 			else
 				state = def[state];
+			if (accept[state])
+				break;
 		}
 	} else {
 		/* default is direct to next state */
-		for (; len; len--) {
+		while (*str) {
 			pos = base_idx(base[state]) + (u8) *str++;
 			if (check[pos] == state)
 				state = next[pos];
 			else
 				state = def[state];
+			if (accept[state])
+				break;
 		}
 	}
 
+	*retpos = str;
 	return state;
 }
 
 /**
- * aa_dfa_match - traverse @dfa to find state @str stops at
+ * aa_dfa_matchn_until - traverse @dfa until accept or @n bytes consumed
  * @dfa: the dfa to match @str against  (NOT NULL)
  * @start: the state of the dfa to start matching in
- * @str: the null terminated string of bytes to match against the dfa (NOT NULL)
+ * @str: the string of bytes to match against the dfa  (NOT NULL)
+ * @n: length of the string of bytes to match
+ * @retpos: first character in str after match OR str + n
  *
- * aa_dfa_match will match @str against the dfa and return the state it
+ * aa_dfa_match_len will match @str against the dfa and return the state it
  * finished matching in. The final state can be used to look up the accepting
  * label, or as the start state of a continuing match.
  *
+ * This function will happily match again the 0 byte and only finishes
+ * when @n input is consumed.
+ *
  * Returns: final state reached after input is consumed
  */
-unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start,
-			  const char *str)
+unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start,
+				 const char *str, int n, const char **retpos)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
 	u32 *base = BASE_TABLE(dfa);
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
+	u32 *accept = ACCEPT_TABLE(dfa);
 	unsigned int state = start, pos;
 
+	*retpos = NULL;
 	if (state == 0)
 		return 0;
 
@@ -395,65 +592,149 @@ unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start,
 		/* Equivalence class table defined */
 		u8 *equiv = EQUIV_TABLE(dfa);
 		/* default is direct to next state */
-		while (*str) {
+		for (; n; n--) {
 			pos = base_idx(base[state]) + equiv[(u8) *str++];
 			if (check[pos] == state)
 				state = next[pos];
 			else
 				state = def[state];
+			if (accept[state])
+				break;
 		}
 	} else {
 		/* default is direct to next state */
-		while (*str) {
+		for (; n; n--) {
 			pos = base_idx(base[state]) + (u8) *str++;
 			if (check[pos] == state)
 				state = next[pos];
 			else
 				state = def[state];
+			if (accept[state])
+				break;
 		}
 	}
 
+	*retpos = str;
 	return state;
 }
 
-/**
- * aa_dfa_next - step one character to the next state in the dfa
- * @dfa: the dfa to tranverse (NOT NULL)
- * @state: the state to start in
- * @c: the input character to transition on
- *
- * aa_dfa_match will step through the dfa by one input character @c
- *
- * Returns: state reach after input @c
- */
-unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state,
-			  const char c)
+#define inc_wb_pos(wb)						\
+do {								\
+	wb->pos = (wb->pos + 1) & (wb->size - 1);		\
+	wb->len = (wb->len + 1) & (wb->size - 1);		\
+} while (0)
+
+/* For DFAs that don't support extended tagging of states */
+static bool is_loop(struct match_workbuf *wb, unsigned int state,
+		    unsigned int *adjust)
+{
+	unsigned int pos = wb->pos;
+	unsigned int i;
+
+	if (wb->history[pos] < state)
+		return false;
+
+	for (i = 0; i <= wb->len; i++) {
+		if (wb->history[pos] == state) {
+			*adjust = i;
+			return true;
+		}
+		if (pos == 0)
+			pos = wb->size;
+		pos--;
+	}
+
+	*adjust = i;
+	return true;
+}
+
+static unsigned int leftmatch_fb(struct aa_dfa *dfa, unsigned int start,
+				 const char *str, struct match_workbuf *wb,
+				 unsigned int *count)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
 	u32 *base = BASE_TABLE(dfa);
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
-	unsigned int pos;
+	unsigned int state = start, pos;
+
+	AA_BUG(!dfa);
+	AA_BUG(!str);
+	AA_BUG(!wb);
+	AA_BUG(!count);
+
+	*count = 0;
+	if (state == 0)
+		return 0;
 
 	/* current state is <state>, matching character *str */
 	if (dfa->tables[YYTD_ID_EC]) {
 		/* Equivalence class table defined */
 		u8 *equiv = EQUIV_TABLE(dfa);
 		/* default is direct to next state */
+		while (*str) {
+			unsigned int adjust;
 
-		pos = base_idx(base[state]) + equiv[(u8) c];
-		if (check[pos] == state)
-			state = next[pos];
-		else
-			state = def[state];
+			wb->history[wb->pos] = state;
+			pos = base_idx(base[state]) + equiv[(u8) *str++];
+			if (check[pos] == state)
+				state = next[pos];
+			else
+				state = def[state];
+			if (is_loop(wb, state, &adjust)) {
+				state = aa_dfa_match(dfa, state, str);
+				*count -= adjust;
+				goto out;
+			}
+			inc_wb_pos(wb);
+			(*count)++;
+		}
 	} else {
 		/* default is direct to next state */
-		pos = base_idx(base[state]) + (u8) c;
-		if (check[pos] == state)
-			state = next[pos];
-		else
-			state = def[state];
+		while (*str) {
+			unsigned int adjust;
+
+			wb->history[wb->pos] = state;
+			pos = base_idx(base[state]) + (u8) *str++;
+			if (check[pos] == state)
+				state = next[pos];
+			else
+				state = def[state];
+			if (is_loop(wb, state, &adjust)) {
+				state = aa_dfa_match(dfa, state, str);
+				*count -= adjust;
+				goto out;
+			}
+			inc_wb_pos(wb);
+			(*count)++;
+		}
 	}
 
+out:
+	if (!state)
+		*count = 0;
 	return state;
 }
+
+/**
+ * aa_dfa_leftmatch - traverse @dfa to find state @str stops at
+ * @dfa: the dfa to match @str against  (NOT NULL)
+ * @start: the state of the dfa to start matching in
+ * @str: the null terminated string of bytes to match against the dfa (NOT NULL)
+ * @count: current count of longest left.
+ *
+ * aa_dfa_match will match @str against the dfa and return the state it
+ * finished matching in. The final state can be used to look up the accepting
+ * label, or as the start state of a continuing match.
+ *
+ * Returns: final state reached after input is consumed
+ */
+unsigned int aa_dfa_leftmatch(struct aa_dfa *dfa, unsigned int start,
+			      const char *str, unsigned int *count)
+{
+	DEFINE_MATCH_WB(wb);
+
+	/* TODO: match for extended state dfas */
+
+	return leftmatch_fb(dfa, start, str, &wb, count);
+}
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index 8c558cbce930..6e8c7ac0b33d 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -18,7 +18,7 @@
 
 #include "include/apparmor.h"
 #include "include/audit.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/domain.h"
 #include "include/file.h"
 #include "include/match.h"
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
new file mode 100644
index 000000000000..bb24cfa0a164
--- /dev/null
+++ b/security/apparmor/net.c
@@ -0,0 +1,187 @@
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor network mediation
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2017 Canonical Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include "include/apparmor.h"
+#include "include/audit.h"
+#include "include/cred.h"
+#include "include/label.h"
+#include "include/net.h"
+#include "include/policy.h"
+
+#include "net_names.h"
+
+
+struct aa_sfs_entry aa_sfs_entry_network[] = {
+	AA_SFS_FILE_STRING("af_mask",	AA_SFS_AF_MASK),
+	{ }
+};
+
+static const char * const net_mask_names[] = {
+	"unknown",
+	"send",
+	"receive",
+	"unknown",
+
+	"create",
+	"shutdown",
+	"connect",
+	"unknown",
+
+	"setattr",
+	"getattr",
+	"setcred",
+	"getcred",
+
+	"chmod",
+	"chown",
+	"chgrp",
+	"lock",
+
+	"mmap",
+	"mprot",
+	"unknown",
+	"unknown",
+
+	"accept",
+	"bind",
+	"listen",
+	"unknown",
+
+	"setopt",
+	"getopt",
+	"unknown",
+	"unknown",
+
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+};
+
+
+/* audit callback for net specific fields */
+void audit_net_cb(struct audit_buffer *ab, void *va)
+{
+	struct common_audit_data *sa = va;
+
+	audit_log_format(ab, " family=");
+	if (address_family_names[sa->u.net->family])
+		audit_log_string(ab, address_family_names[sa->u.net->family]);
+	else
+		audit_log_format(ab, "\"unknown(%d)\"", sa->u.net->family);
+	audit_log_format(ab, " sock_type=");
+	if (sock_type_names[aad(sa)->net.type])
+		audit_log_string(ab, sock_type_names[aad(sa)->net.type]);
+	else
+		audit_log_format(ab, "\"unknown(%d)\"", aad(sa)->net.type);
+	audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol);
+
+	if (aad(sa)->request & NET_PERMS_MASK) {
+		audit_log_format(ab, " requested_mask=");
+		aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0,
+				   net_mask_names, NET_PERMS_MASK);
+
+		if (aad(sa)->denied & NET_PERMS_MASK) {
+			audit_log_format(ab, " denied_mask=");
+			aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0,
+					   net_mask_names, NET_PERMS_MASK);
+		}
+	}
+	if (aad(sa)->peer) {
+		audit_log_format(ab, " peer=");
+		aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer,
+				FLAGS_NONE, GFP_ATOMIC);
+	}
+}
+
+/* Generic af perm */
+int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
+		       u32 request, u16 family, int type)
+{
+	struct aa_perms perms = { };
+	unsigned int state;
+	__be16 buffer[2];
+
+	AA_BUG(family >= AF_MAX);
+	AA_BUG(type < 0 || type >= SOCK_MAX);
+
+	if (profile_unconfined(profile))
+		return 0;
+	state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+	if (!state)
+		return 0;
+
+	buffer[0] = cpu_to_be16(family);
+	buffer[1] = cpu_to_be16((u16) type);
+	state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer,
+				 4);
+	aa_compute_perms(profile->policy.dfa, state, &perms);
+	aa_apply_modes_to_perms(profile, &perms);
+
+	return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
+}
+
+int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
+	       int type, int protocol)
+{
+	struct aa_profile *profile;
+	DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol);
+
+	return fn_for_each_confined(label, profile,
+			aa_profile_af_perm(profile, &sa, request, family,
+					   type));
+}
+
+static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request,
+			    struct sock *sk)
+{
+	struct aa_profile *profile;
+	DEFINE_AUDIT_SK(sa, op, sk);
+
+	AA_BUG(!label);
+	AA_BUG(!sk);
+
+	if (unconfined(label))
+		return 0;
+
+	return fn_for_each_confined(label, profile,
+			aa_profile_af_sk_perm(profile, &sa, request, sk));
+}
+
+int aa_sk_perm(const char *op, u32 request, struct sock *sk)
+{
+	struct aa_label *label;
+	int error;
+
+	AA_BUG(!sk);
+	AA_BUG(in_interrupt());
+
+	/* TODO: switch to begin_current_label ???? */
+	label = begin_current_label_crit_section();
+	error = aa_label_sk_perm(label, op, request, sk);
+	end_current_label_crit_section(label);
+
+	return error;
+}
+
+
+int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
+		      struct socket *sock)
+{
+	AA_BUG(!label);
+	AA_BUG(!sock);
+	AA_BUG(!sock->sk);
+
+	return aa_label_sk_perm(label, op, request, sock->sk);
+}
diff --git a/security/apparmor/nulldfa.in b/security/apparmor/nulldfa.in
index 3cb38022902e..095f42a24cbc 100644
--- a/security/apparmor/nulldfa.in
+++ b/security/apparmor/nulldfa.in
@@ -1 +1,107 @@
-0x1B, 0x5E, 0x78, 0x3D, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x04, 0x90, 0x00, 0x00, 0x6E, 0x6F, 0x74, 0x66, 0x6C, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+0x1B, 0x5E, 0x78, 0x3D, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x04,
+0x90, 0x00, 0x00, 0x6E, 0x6F, 0x74, 0x66, 0x6C, 0x65, 0x78, 0x00,
+0x00, 0x00, 0x00, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x04, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
+0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x08, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index b0b58848c248..c07493ce2376 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -82,7 +82,7 @@
 
 #include "include/apparmor.h"
 #include "include/capability.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/file.h"
 #include "include/ipc.h"
 #include "include/match.h"
@@ -210,6 +210,7 @@ static void aa_free_data(void *ptr, void *arg)
 void aa_free_profile(struct aa_profile *profile)
 {
 	struct rhashtable *rht;
+	int i;
 
 	AA_DEBUG("%s(%p)\n", __func__, profile);
 
@@ -227,6 +228,9 @@ void aa_free_profile(struct aa_profile *profile)
 	aa_free_cap_rules(&profile->caps);
 	aa_free_rlimit_rules(&profile->rlimits);
 
+	for (i = 0; i < profile->xattr_count; i++)
+		kzfree(profile->xattrs[i]);
+	kzfree(profile->xattrs);
 	kzfree(profile->dirname);
 	aa_put_dfa(profile->xmatch);
 	aa_put_dfa(profile->policy.dfa);
@@ -845,8 +849,9 @@ static struct aa_profile *update_to_newest_parent(struct aa_profile *new)
  * @udata: serialized data stream  (NOT NULL)
  *
  * unpack and replace a profile on the profile list and uses of that profile
- * by any aa_task_ctx.  If the profile does not exist on the profile list
- * it is added.
+ * by any task creds via invalidating the old version of the profile, which
+ * tasks will notice to update their own cred.  If the profile does not exist
+ * on the profile list it is added.
  *
  * Returns: size of data consumed else error code on failure.
  */
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index b1e629cba70b..b0f9dc3f765a 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -21,7 +21,7 @@
 #include <linux/string.h>
 
 #include "include/apparmor.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/policy_ns.h"
 #include "include/label.h"
 #include "include/policy.h"
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 59a1a25b7d43..b9e6b2cafa69 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -23,7 +23,7 @@
 
 #include "include/apparmor.h"
 #include "include/audit.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/crypto.h"
 #include "include/match.h"
 #include "include/path.h"
@@ -37,7 +37,8 @@
 
 #define v5	5	/* base version */
 #define v6	6	/* per entry policydb mediation check */
-#define v7	7	/* full network masking */
+#define v7	7
+#define v8	8	/* full network masking */
 
 /*
  * The AppArmor interface treats data as a type byte followed by the
@@ -164,8 +165,9 @@ static void do_loaddata_free(struct work_struct *work)
 	}
 
 	kzfree(d->hash);
-	kfree(d->name);
-	kvfree(d);
+	kzfree(d->name);
+	kvfree(d->data);
+	kzfree(d);
 }
 
 void aa_loaddata_kref(struct kref *kref)
@@ -180,10 +182,16 @@ void aa_loaddata_kref(struct kref *kref)
 
 struct aa_loaddata *aa_loaddata_alloc(size_t size)
 {
-	struct aa_loaddata *d = kvzalloc(sizeof(*d) + size, GFP_KERNEL);
+	struct aa_loaddata *d;
 
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
 	if (d == NULL)
 		return ERR_PTR(-ENOMEM);
+	d->data = kvzalloc(size, GFP_KERNEL);
+	if (!d->data) {
+		kfree(d);
+		return ERR_PTR(-ENOMEM);
+	}
 	kref_init(&d->count);
 	INIT_LIST_HEAD(&d->list);
 
@@ -196,6 +204,15 @@ static bool inbounds(struct aa_ext *e, size_t size)
 	return (size <= e->end - e->pos);
 }
 
+static void *kvmemdup(const void *src, size_t len)
+{
+	void *p = kvmalloc(len, GFP_KERNEL);
+
+	if (p)
+		memcpy(p, src, len);
+	return p;
+}
+
 /**
  * aa_u16_chunck - test and do bounds checking for a u16 size based chunk
  * @e: serialized data read head (NOT NULL)
@@ -515,6 +532,35 @@ fail:
 	return 0;
 }
 
+static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile)
+{
+	void *pos = e->pos;
+
+	if (unpack_nameX(e, AA_STRUCT, "xattrs")) {
+		int i, size;
+
+		size = unpack_array(e, NULL);
+		profile->xattr_count = size;
+		profile->xattrs = kcalloc(size, sizeof(char *), GFP_KERNEL);
+		if (!profile->xattrs)
+			goto fail;
+		for (i = 0; i < size; i++) {
+			if (!unpack_strdup(e, &profile->xattrs[i], NULL))
+				goto fail;
+		}
+		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+			goto fail;
+		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+			goto fail;
+	}
+
+	return 1;
+
+fail:
+	e->pos = pos;
+	return 0;
+}
+
 static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 {
 	void *pos = e->pos;
@@ -549,15 +595,6 @@ fail:
 	return 0;
 }
 
-static void *kvmemdup(const void *src, size_t len)
-{
-	void *p = kvmalloc(len, GFP_KERNEL);
-
-	if (p)
-		memcpy(p, src, len);
-	return p;
-}
-
 static u32 strhash(const void *data, u32 len, u32 seed)
 {
 	const char * const *key = data;
@@ -712,6 +749,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			goto fail;
 	}
 
+	if (!unpack_xattrs(e, profile)) {
+		info = "failed to unpack profile xattrs";
+		goto fail;
+	}
+
 	if (!unpack_rlimits(e, profile)) {
 		info = "failed to unpack profile rlimits";
 		goto fail;
diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c
index d81617379d63..80c34ed373c3 100644
--- a/security/apparmor/procattr.c
+++ b/security/apparmor/procattr.c
@@ -13,7 +13,7 @@
  */
 
 #include "include/apparmor.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/policy.h"
 #include "include/policy_ns.h"
 #include "include/domain.h"
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index cf4d234febe9..d022137143b9 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -16,7 +16,7 @@
 #include <linux/security.h>
 
 #include "include/audit.h"
-#include "include/context.h"
+#include "include/cred.h"
 #include "include/resource.h"
 #include "include/policy.h"
 
diff --git a/security/apparmor/stacksplitdfa.in b/security/apparmor/stacksplitdfa.in
new file mode 100644
index 000000000000..4bddd10b62a9
--- /dev/null
+++ b/security/apparmor/stacksplitdfa.in
@@ -0,0 +1,114 @@
+/* 0x1 [^\000]*[^/\000]//& */ 0x1B, 0x5E, 0x78, 0x3D, 0x00, 0x00,
+0x00, 0x18, 0x00, 0x00, 0x04, 0xD8, 0x00, 0x00, 0x6E, 0x6F, 0x74,
+0x66, 0x6C, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x04,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x02, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
+0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x02, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x02,
+0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x02, 0x00, 0x08, 0x00,
+0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x05, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x03, 0x00,
+0x04, 0x00, 0x01, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x01, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00,
+0x02, 0x00, 0x03, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04,
+0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00
diff --git a/security/apparmor/context.c b/security/apparmor/task.c
index c95f1ac6190b..c6b78a14da91 100644
--- a/security/apparmor/context.c
+++ b/security/apparmor/task.c
@@ -1,72 +1,23 @@
 /*
  * AppArmor security module
  *
- * This file contains AppArmor functions used to manipulate object security
- * contexts.
+ * This file contains AppArmor task related definitions and mediation
  *
- * Copyright (C) 1998-2008 Novell/SUSE
- * Copyright 2009-2010 Canonical Ltd.
+ * Copyright 2017 Canonical Ltd.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation, version 2 of the
  * License.
  *
- *
- * AppArmor sets confinement on every task, via the the aa_task_ctx and
- * the aa_task_ctx.label, both of which are required and are not allowed
- * to be NULL.  The aa_task_ctx is not reference counted and is unique
- * to each cred (which is reference count).  The label pointed to by
- * the task_ctx is reference counted.
- *
  * TODO
  * If a task uses change_hat it currently does not return to the old
  * cred or task context but instead creates a new one.  Ideally the task
  * should return to the previous cred if it has not been modified.
- *
  */
 
-#include "include/context.h"
-#include "include/policy.h"
-
-/**
- * aa_alloc_task_context - allocate a new task_ctx
- * @flags: gfp flags for allocation
- *
- * Returns: allocated buffer or NULL on failure
- */
-struct aa_task_ctx *aa_alloc_task_context(gfp_t flags)
-{
-	return kzalloc(sizeof(struct aa_task_ctx), flags);
-}
-
-/**
- * aa_free_task_context - free a task_ctx
- * @ctx: task_ctx to free (MAYBE NULL)
- */
-void aa_free_task_context(struct aa_task_ctx *ctx)
-{
-	if (ctx) {
-		aa_put_label(ctx->label);
-		aa_put_label(ctx->previous);
-		aa_put_label(ctx->onexec);
-
-		kzfree(ctx);
-	}
-}
-
-/**
- * aa_dup_task_context - duplicate a task context, incrementing reference counts
- * @new: a blank task context      (NOT NULL)
- * @old: the task context to copy  (NOT NULL)
- */
-void aa_dup_task_context(struct aa_task_ctx *new, const struct aa_task_ctx *old)
-{
-	*new = *old;
-	aa_get_label(new->label);
-	aa_get_label(new->previous);
-	aa_get_label(new->onexec);
-}
+#include "include/cred.h"
+#include "include/task.h"
 
 /**
  * aa_get_task_label - Get another task's label
@@ -93,11 +44,13 @@ struct aa_label *aa_get_task_label(struct task_struct *task)
  */
 int aa_replace_current_label(struct aa_label *label)
 {
-	struct aa_task_ctx *ctx = current_ctx();
+	struct aa_label *old = aa_current_raw_label();
+	struct aa_task_ctx *ctx = task_ctx(current);
 	struct cred *new;
+
 	AA_BUG(!label);
 
-	if (ctx->label == label)
+	if (old == label)
 		return 0;
 
 	if (current_cred() != current_real_cred())
@@ -107,27 +60,34 @@ int aa_replace_current_label(struct aa_label *label)
 	if (!new)
 		return -ENOMEM;
 
-	ctx = cred_ctx(new);
-	if (unconfined(label) || (labels_ns(ctx->label) != labels_ns(label)))
-		/* if switching to unconfined or a different label namespace
+	if (ctx->nnp && label_is_stale(ctx->nnp)) {
+		struct aa_label *tmp = ctx->nnp;
+
+		ctx->nnp = aa_get_newest_label(tmp);
+		aa_put_label(tmp);
+	}
+	if (unconfined(label) || (labels_ns(old) != labels_ns(label)))
+		/*
+		 * if switching to unconfined or a different label namespace
 		 * clear out context state
 		 */
-		aa_clear_task_ctx_trans(ctx);
+		aa_clear_task_ctx_trans(task_ctx(current));
 
 	/*
-	 * be careful switching ctx->profile, when racing replacement it
-	 * is possible that ctx->profile->proxy->profile is the reference
-	 * keeping @profile valid, so make sure to get its reference before
-	 * dropping the reference on ctx->profile
+	 * be careful switching cred label, when racing replacement it
+	 * is possible that the cred labels's->proxy->label is the reference
+	 * keeping @label valid, so make sure to get its reference before
+	 * dropping the reference on the cred's label
 	 */
 	aa_get_label(label);
-	aa_put_label(ctx->label);
-	ctx->label = label;
+	aa_put_label(cred_label(new));
+	cred_label(new) = label;
 
 	commit_creds(new);
 	return 0;
 }
 
+
 /**
  * aa_set_current_onexec - set the tasks change_profile to happen onexec
  * @label: system label to set at exec  (MAYBE NULL to clear value)
@@ -136,18 +96,13 @@ int aa_replace_current_label(struct aa_label *label)
  */
 int aa_set_current_onexec(struct aa_label *label, bool stack)
 {
-	struct aa_task_ctx *ctx;
-	struct cred *new = prepare_creds();
-	if (!new)
-		return -ENOMEM;
+	struct aa_task_ctx *ctx = task_ctx(current);
 
-	ctx = cred_ctx(new);
 	aa_get_label(label);
-	aa_clear_task_ctx_trans(ctx);
+	aa_put_label(ctx->onexec);
 	ctx->onexec = label;
 	ctx->token = stack;
 
-	commit_creds(new);
 	return 0;
 }
 
@@ -163,25 +118,27 @@ int aa_set_current_onexec(struct aa_label *label, bool stack)
  */
 int aa_set_current_hat(struct aa_label *label, u64 token)
 {
-	struct aa_task_ctx *ctx;
-	struct cred *new = prepare_creds();
+	struct aa_task_ctx *ctx = task_ctx(current);
+	struct cred *new;
+
+	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
 	AA_BUG(!label);
 
-	ctx = cred_ctx(new);
 	if (!ctx->previous) {
 		/* transfer refcount */
-		ctx->previous = ctx->label;
+		ctx->previous = cred_label(new);
 		ctx->token = token;
 	} else if (ctx->token == token) {
-		aa_put_label(ctx->label);
+		aa_put_label(cred_label(new));
 	} else {
 		/* previous_profile && ctx->token != token */
 		abort_creds(new);
 		return -EACCES;
 	}
-	ctx->label = aa_get_newest_label(label);
+
+	cred_label(new) = aa_get_newest_label(label);
 	/* clear exec on switching context */
 	aa_put_label(ctx->onexec);
 	ctx->onexec = NULL;
@@ -201,28 +158,26 @@ int aa_set_current_hat(struct aa_label *label, u64 token)
  */
 int aa_restore_previous_label(u64 token)
 {
-	struct aa_task_ctx *ctx;
-	struct cred *new = prepare_creds();
-	if (!new)
-		return -ENOMEM;
+	struct aa_task_ctx *ctx = task_ctx(current);
+	struct cred *new;
 
-	ctx = cred_ctx(new);
-	if (ctx->token != token) {
-		abort_creds(new);
+	if (ctx->token != token)
 		return -EACCES;
-	}
 	/* ignore restores when there is no saved label */
-	if (!ctx->previous) {
-		abort_creds(new);
+	if (!ctx->previous)
 		return 0;
-	}
 
-	aa_put_label(ctx->label);
-	ctx->label = aa_get_newest_label(ctx->previous);
-	AA_BUG(!ctx->label);
+	new = prepare_creds();
+	if (!new)
+		return -ENOMEM;
+
+	aa_put_label(cred_label(new));
+	cred_label(new) = aa_get_newest_label(ctx->previous);
+	AA_BUG(!cred_label(new));
 	/* clear exec && prev information when restoring to previous context */
 	aa_clear_task_ctx_trans(ctx);
 
 	commit_creds(new);
+
 	return 0;
 }
diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h
index 04825393facb..45c4a89c02ff 100644
--- a/security/integrity/evm/evm.h
+++ b/security/integrity/evm/evm.h
@@ -31,8 +31,6 @@
 		       EVM_ALLOW_METADATA_WRITES)
 
 extern int evm_initialized;
-extern char *evm_hmac;
-extern char *evm_hash;
 
 #define EVM_ATTR_FSUUID		0x0001
 
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 691f3e09154c..a46fba322340 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -37,6 +37,9 @@ static DEFINE_MUTEX(mutex);
 
 static unsigned long evm_set_key_flags;
 
+static char * const evm_hmac = "hmac(sha1)";
+static char * const evm_hash = "sha1";
+
 /**
  * evm_set_key() - set EVM HMAC key from the kernel
  * @key: pointer to a buffer with the key data
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index a8d502827270..9ea9c19a545c 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -30,11 +30,9 @@
 
 int evm_initialized;
 
-static char *integrity_status_msg[] = {
+static const char * const integrity_status_msg[] = {
 	"pass", "pass_immutable", "fail", "no_label", "no_xattrs", "unknown"
 };
-char *evm_hmac = "hmac(sha1)";
-char *evm_hash = "sha1";
 int evm_hmac_attrs;
 
 char *evm_config_xattrnames[] = {
@@ -126,6 +124,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
 	struct evm_ima_xattr_data *xattr_data = NULL;
 	struct evm_ima_xattr_data calc;
 	enum integrity_status evm_status = INTEGRITY_PASS;
+	struct inode *inode;
 	int rc, xattr_len;
 
 	if (iint && (iint->evm_status == INTEGRITY_PASS ||
@@ -180,12 +179,15 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
 					(const char *)xattr_data, xattr_len,
 					calc.digest, sizeof(calc.digest));
 		if (!rc) {
+			inode = d_backing_inode(dentry);
+
 			if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) {
 				if (iint)
 					iint->flags |= EVM_IMMUTABLE_DIGSIG;
 				evm_status = INTEGRITY_PASS_IMMUTABLE;
-			} else if (!IS_RDONLY(d_backing_inode(dentry)) &&
-				   !IS_IMMUTABLE(d_backing_inode(dentry))) {
+			} else if (!IS_RDONLY(inode) &&
+				   !(inode->i_sb->s_readonly_remount) &&
+				   !IS_IMMUTABLE(inode)) {
 				evm_update_evmxattr(dentry, xattr_name,
 						    xattr_value,
 						    xattr_value_len);
diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index 9700e96ab0f0..f266e4b3b7d4 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -79,6 +79,7 @@ static void iint_free(struct integrity_iint_cache *iint)
 	iint->ima_mmap_status = INTEGRITY_UNKNOWN;
 	iint->ima_bprm_status = INTEGRITY_UNKNOWN;
 	iint->ima_read_status = INTEGRITY_UNKNOWN;
+	iint->ima_creds_status = INTEGRITY_UNKNOWN;
 	iint->evm_status = INTEGRITY_UNKNOWN;
 	iint->measured_pcrs = 0;
 	kmem_cache_free(iint_cache, iint);
@@ -158,6 +159,7 @@ static void init_once(void *foo)
 	iint->ima_mmap_status = INTEGRITY_UNKNOWN;
 	iint->ima_bprm_status = INTEGRITY_UNKNOWN;
 	iint->ima_read_status = INTEGRITY_UNKNOWN;
+	iint->ima_creds_status = INTEGRITY_UNKNOWN;
 	iint->evm_status = INTEGRITY_UNKNOWN;
 	mutex_init(&iint->mutex);
 }
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 35ef69312811..6a8f67714c83 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -10,6 +10,7 @@ config IMA
 	select CRYPTO_HASH_INFO
 	select TCG_TPM if HAS_IOMEM && !UML
 	select TCG_TIS if TCG_TPM && X86
+	select TCG_CRB if TCG_TPM && ACPI
 	select TCG_IBMVTPM if TCG_TPM && PPC_PSERIES
 	help
 	  The Trusted Computing Group(TCG) runtime Integrity
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index d52b487ad259..35fe91aa1fc9 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -177,6 +177,7 @@ static inline unsigned long ima_hash_key(u8 *digest)
 	hook(FILE_CHECK)		\
 	hook(MMAP_CHECK)		\
 	hook(BPRM_CHECK)		\
+	hook(CREDS_CHECK)		\
 	hook(POST_SETATTR)		\
 	hook(MODULE_CHECK)		\
 	hook(FIRMWARE_CHECK)		\
@@ -191,8 +192,8 @@ enum ima_hooks {
 };
 
 /* LIM API function definitions */
-int ima_get_action(struct inode *inode, int mask,
-		   enum ima_hooks func, int *pcr);
+int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
+		   int mask, enum ima_hooks func, int *pcr);
 int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
 int ima_collect_measurement(struct integrity_iint_cache *iint,
 			    struct file *file, void *buf, loff_t size,
@@ -212,8 +213,8 @@ void ima_free_template_entry(struct ima_template_entry *entry);
 const char *ima_d_path(const struct path *path, char **pathbuf, char *filename);
 
 /* IMA policy related functions */
-int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
-		     int flags, int *pcr);
+int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
+		     enum ima_hooks func, int mask, int flags, int *pcr);
 void ima_init_policy(void);
 void ima_update_policy(void);
 void ima_update_policy_flag(void);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 08fe405338e1..bf88236b7a0b 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -158,6 +158,8 @@ err_out:
 /**
  * ima_get_action - appraise & measure decision based on policy.
  * @inode: pointer to inode to measure
+ * @cred: pointer to credentials structure to validate
+ * @secid: secid of the task being validated
  * @mask: contains the permission mask (MAY_READ, MAY_WRITE, MAY_EXEC,
  *        MAY_APPEND)
  * @func: caller identifier
@@ -166,20 +168,21 @@ err_out:
  * The policy is defined in terms of keypairs:
  *		subj=, obj=, type=, func=, mask=, fsmagic=
  *	subj,obj, and type: are LSM specific.
- *	func: FILE_CHECK | BPRM_CHECK | MMAP_CHECK | MODULE_CHECK
+ *	func: FILE_CHECK | BPRM_CHECK | CREDS_CHECK | MMAP_CHECK | MODULE_CHECK
  *	mask: contains the permission mask
  *	fsmagic: hex value
  *
  * Returns IMA_MEASURE, IMA_APPRAISE mask.
  *
  */
-int ima_get_action(struct inode *inode, int mask, enum ima_hooks func, int *pcr)
+int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
+		   int mask, enum ima_hooks func, int *pcr)
 {
 	int flags = IMA_MEASURE | IMA_AUDIT | IMA_APPRAISE | IMA_HASH;
 
 	flags &= ima_policy_flag;
 
-	return ima_match_policy(inode, func, mask, flags, pcr);
+	return ima_match_policy(inode, cred, secid, func, mask, flags, pcr);
 }
 
 /*
@@ -308,14 +311,17 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
 			   const unsigned char *filename)
 {
 	struct audit_buffer *ab;
-	char hash[(iint->ima_hash->length * 2) + 1];
+	char *hash;
 	const char *algo_name = hash_algo_name[iint->ima_hash->algo];
-	char algo_hash[sizeof(hash) + strlen(algo_name) + 2];
 	int i;
 
 	if (iint->flags & IMA_AUDITED)
 		return;
 
+	hash = kzalloc((iint->ima_hash->length * 2) + 1, GFP_KERNEL);
+	if (!hash)
+		return;
+
 	for (i = 0; i < iint->ima_hash->length; i++)
 		hex_byte_pack(hash + (i * 2), iint->ima_hash->digest[i]);
 	hash[i * 2] = '\0';
@@ -323,18 +329,19 @@ void ima_audit_measurement(struct integrity_iint_cache *iint,
 	ab = audit_log_start(current->audit_context, GFP_KERNEL,
 			     AUDIT_INTEGRITY_RULE);
 	if (!ab)
-		return;
+		goto out;
 
 	audit_log_format(ab, "file=");
 	audit_log_untrustedstring(ab, filename);
-	audit_log_format(ab, " hash=");
-	snprintf(algo_hash, sizeof(algo_hash), "%s:%s", algo_name, hash);
-	audit_log_untrustedstring(ab, algo_hash);
+	audit_log_format(ab, " hash=\"%s:%s\"", algo_name, hash);
 
 	audit_log_task_info(ab, current);
 	audit_log_end(ab);
 
 	iint->flags |= IMA_AUDITED;
+out:
+	kfree(hash);
+	return;
 }
 
 /*
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index f2803a40ff82..8bd7a0733e51 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -50,11 +50,14 @@ bool is_ima_appraise_enabled(void)
  */
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func)
 {
+	u32 secid;
+
 	if (!ima_appraise)
 		return 0;
 
-	return ima_match_policy(inode, func, mask, IMA_APPRAISE | IMA_HASH,
-				NULL);
+	security_task_getsecid(current, &secid);
+	return ima_match_policy(inode, current_cred(), secid, func, mask,
+				IMA_APPRAISE | IMA_HASH, NULL);
 }
 
 static int ima_fix_xattr(struct dentry *dentry,
@@ -87,6 +90,8 @@ enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
 		return iint->ima_mmap_status;
 	case BPRM_CHECK:
 		return iint->ima_bprm_status;
+	case CREDS_CHECK:
+		return iint->ima_creds_status;
 	case FILE_CHECK:
 	case POST_SETATTR:
 		return iint->ima_file_status;
@@ -107,6 +112,8 @@ static void ima_set_cache_status(struct integrity_iint_cache *iint,
 	case BPRM_CHECK:
 		iint->ima_bprm_status = status;
 		break;
+	case CREDS_CHECK:
+		iint->ima_creds_status = status;
 	case FILE_CHECK:
 	case POST_SETATTR:
 		iint->ima_file_status = status;
@@ -128,6 +135,9 @@ static void ima_cache_flags(struct integrity_iint_cache *iint,
 	case BPRM_CHECK:
 		iint->flags |= (IMA_BPRM_APPRAISED | IMA_APPRAISED);
 		break;
+	case CREDS_CHECK:
+		iint->flags |= (IMA_CREDS_APPRAISED | IMA_APPRAISED);
+		break;
 	case FILE_CHECK:
 	case POST_SETATTR:
 		iint->flags |= (IMA_FILE_APPRAISED | IMA_APPRAISED);
@@ -205,7 +215,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 			     int xattr_len, int opened)
 {
 	static const char op[] = "appraise_data";
-	char *cause = "unknown";
+	const char *cause = "unknown";
 	struct dentry *dentry = file_dentry(file);
 	struct inode *inode = d_backing_inode(dentry);
 	enum integrity_status status = INTEGRITY_UNKNOWN;
@@ -231,16 +241,22 @@ int ima_appraise_measurement(enum ima_hooks func,
 	}
 
 	status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint);
-	if ((status != INTEGRITY_PASS) &&
-	    (status != INTEGRITY_PASS_IMMUTABLE) &&
-	    (status != INTEGRITY_UNKNOWN)) {
-		if ((status == INTEGRITY_NOLABEL)
-		    || (status == INTEGRITY_NOXATTRS))
-			cause = "missing-HMAC";
-		else if (status == INTEGRITY_FAIL)
-			cause = "invalid-HMAC";
+	switch (status) {
+	case INTEGRITY_PASS:
+	case INTEGRITY_PASS_IMMUTABLE:
+	case INTEGRITY_UNKNOWN:
+		break;
+	case INTEGRITY_NOXATTRS:	/* No EVM protected xattrs. */
+	case INTEGRITY_NOLABEL:		/* No security.evm xattr. */
+		cause = "missing-HMAC";
 		goto out;
+	case INTEGRITY_FAIL:		/* Invalid HMAC/signature. */
+		cause = "invalid-HMAC";
+		goto out;
+	default:
+		WARN_ONCE(true, "Unexpected integrity status %d\n", status);
 	}
+
 	switch (xattr_value->type) {
 	case IMA_XATTR_DIGEST_NG:
 		/* first byte contains algorithm id */
@@ -292,23 +308,40 @@ int ima_appraise_measurement(enum ima_hooks func,
 	}
 
 out:
-	if (status != INTEGRITY_PASS) {
+	/*
+	 * File signatures on some filesystems can not be properly verified.
+	 * When such filesystems are mounted by an untrusted mounter or on a
+	 * system not willing to accept such a risk, fail the file signature
+	 * verification.
+	 */
+	if ((inode->i_sb->s_iflags & SB_I_IMA_UNVERIFIABLE_SIGNATURE) &&
+	    ((inode->i_sb->s_iflags & SB_I_UNTRUSTED_MOUNTER) ||
+	     (iint->flags & IMA_FAIL_UNVERIFIABLE_SIGS))) {
+		status = INTEGRITY_FAIL;
+		cause = "unverifiable-signature";
+		integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
+				    op, cause, rc, 0);
+	} else if (status != INTEGRITY_PASS) {
+		/* Fix mode, but don't replace file signatures. */
 		if ((ima_appraise & IMA_APPRAISE_FIX) &&
 		    (!xattr_value ||
 		     xattr_value->type != EVM_IMA_XATTR_DIGSIG)) {
 			if (!ima_fix_xattr(dentry, iint))
 				status = INTEGRITY_PASS;
-		} else if ((inode->i_size == 0) &&
-			   (iint->flags & IMA_NEW_FILE) &&
-			   (xattr_value &&
-			    xattr_value->type == EVM_IMA_XATTR_DIGSIG)) {
+		}
+
+		/* Permit new files with file signatures, but without data. */
+		if (inode->i_size == 0 && iint->flags & IMA_NEW_FILE &&
+		    xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG) {
 			status = INTEGRITY_PASS;
 		}
+
 		integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, filename,
 				    op, cause, rc, 0);
 	} else {
 		ima_cache_flags(iint, func);
 	}
+
 	ima_set_cache_status(iint, func, status);
 	return status;
 }
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 205bc69361ea..4e085a17124f 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -73,6 +73,8 @@ int __init ima_init_crypto(void)
 		       hash_algo_name[ima_hash_algo], rc);
 		return rc;
 	}
+	pr_info("Allocated hash algorithm: %s\n",
+		hash_algo_name[ima_hash_algo]);
 	return 0;
 }
 
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 2cfb0c714967..74d0bd7e76d7 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -16,6 +16,9 @@
  *	implements the IMA hooks: ima_bprm_check, ima_file_mmap,
  *	and ima_file_check.
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/file.h>
 #include <linux/binfmts.h>
@@ -25,6 +28,7 @@
 #include <linux/xattr.h>
 #include <linux/ima.h>
 #include <linux/iversion.h>
+#include <linux/fs.h>
 
 #include "ima.h"
 
@@ -167,8 +171,9 @@ void ima_file_free(struct file *file)
 	ima_check_last_writer(iint, inode, file);
 }
 
-static int process_measurement(struct file *file, char *buf, loff_t size,
-			       int mask, enum ima_hooks func, int opened)
+static int process_measurement(struct file *file, const struct cred *cred,
+			       u32 secid, char *buf, loff_t size, int mask,
+			       enum ima_hooks func, int opened)
 {
 	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint = NULL;
@@ -190,7 +195,7 @@ static int process_measurement(struct file *file, char *buf, loff_t size,
 	 * bitmask based on the appraise/audit/measurement policy.
 	 * Included is the appraise submask.
 	 */
-	action = ima_get_action(inode, mask, func, &pcr);
+	action = ima_get_action(inode, cred, secid, mask, func, &pcr);
 	violation_check = ((func == FILE_CHECK || func == MMAP_CHECK) &&
 			   (ima_policy_flag & IMA_MEASURE));
 	if (!action && !violation_check)
@@ -229,9 +234,18 @@ static int process_measurement(struct file *file, char *buf, loff_t size,
 				 IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK |
 				 IMA_ACTION_FLAGS);
 
-	if (test_and_clear_bit(IMA_CHANGE_XATTR, &iint->atomic_flags))
-		/* reset all flags if ima_inode_setxattr was called */
+	/*
+	 * Re-evaulate the file if either the xattr has changed or the
+	 * kernel has no way of detecting file change on the filesystem.
+	 * (Limited to privileged mounted filesystems.)
+	 */
+	if (test_and_clear_bit(IMA_CHANGE_XATTR, &iint->atomic_flags) ||
+	    ((inode->i_sb->s_iflags & SB_I_IMA_UNVERIFIABLE_SIGNATURE) &&
+	     !(inode->i_sb->s_iflags & SB_I_UNTRUSTED_MOUNTER) &&
+	     !(action & IMA_FAIL_UNVERIFIABLE_SIGS))) {
 		iint->flags &= ~IMA_DONE_MASK;
+		iint->measured_pcrs = 0;
+	}
 
 	/* Determine if already appraised/measured based on bitmask
 	 * (IMA_MEASURE, IMA_MEASURED, IMA_XXXX_APPRAISE, IMA_XXXX_APPRAISED,
@@ -324,9 +338,14 @@ out:
  */
 int ima_file_mmap(struct file *file, unsigned long prot)
 {
-	if (file && (prot & PROT_EXEC))
-		return process_measurement(file, NULL, 0, MAY_EXEC,
-					   MMAP_CHECK, 0);
+	u32 secid;
+
+	if (file && (prot & PROT_EXEC)) {
+		security_task_getsecid(current, &secid);
+		return process_measurement(file, current_cred(), secid, NULL,
+					   0, MAY_EXEC, MMAP_CHECK, 0);
+	}
+
 	return 0;
 }
 
@@ -345,8 +364,18 @@ int ima_file_mmap(struct file *file, unsigned long prot)
  */
 int ima_bprm_check(struct linux_binprm *bprm)
 {
-	return process_measurement(bprm->file, NULL, 0, MAY_EXEC,
-				   BPRM_CHECK, 0);
+	int ret;
+	u32 secid;
+
+	security_task_getsecid(current, &secid);
+	ret = process_measurement(bprm->file, current_cred(), secid, NULL, 0,
+				  MAY_EXEC, BPRM_CHECK, 0);
+	if (ret)
+		return ret;
+
+	security_cred_getsecid(bprm->cred, &secid);
+	return process_measurement(bprm->file, bprm->cred, secid, NULL, 0,
+				   MAY_EXEC, CREDS_CHECK, 0);
 }
 
 /**
@@ -361,7 +390,10 @@ int ima_bprm_check(struct linux_binprm *bprm)
  */
 int ima_file_check(struct file *file, int mask, int opened)
 {
-	return process_measurement(file, NULL, 0,
+	u32 secid;
+
+	security_task_getsecid(current, &secid);
+	return process_measurement(file, current_cred(), secid, NULL, 0,
 				   mask & (MAY_READ | MAY_WRITE | MAY_EXEC |
 					   MAY_APPEND), FILE_CHECK, opened);
 }
@@ -440,6 +472,7 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 		       enum kernel_read_file_id read_id)
 {
 	enum ima_hooks func;
+	u32 secid;
 
 	if (!file && read_id == READING_FIRMWARE) {
 		if ((ima_appraise & IMA_APPRAISE_FIRMWARE) &&
@@ -462,7 +495,9 @@ int ima_post_read_file(struct file *file, void *buf, loff_t size,
 	}
 
 	func = read_idmap[read_id] ?: FILE_CHECK;
-	return process_measurement(file, buf, size, MAY_READ, func, 0);
+	security_task_getsecid(current, &secid);
+	return process_measurement(file, current_cred(), secid, buf, size,
+				   MAY_READ, func, 0);
 }
 
 static int __init init_ima(void)
@@ -472,6 +507,16 @@ static int __init init_ima(void)
 	ima_init_template_list();
 	hash_setup(CONFIG_IMA_DEFAULT_HASH);
 	error = ima_init();
+
+	if (error && strcmp(hash_algo_name[ima_hash_algo],
+			    CONFIG_IMA_DEFAULT_HASH) != 0) {
+		pr_info("Allocating %s failed, going to use default hash algorithm %s\n",
+			hash_algo_name[ima_hash_algo], CONFIG_IMA_DEFAULT_HASH);
+		hash_setup_done = 0;
+		hash_setup(CONFIG_IMA_DEFAULT_HASH);
+		error = ima_init();
+	}
+
 	if (!error) {
 		ima_initialized = 1;
 		ima_update_policy_flag();
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 915f5572c6ff..d89bebf85421 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -96,6 +96,7 @@ static struct ima_rule_entry dont_measure_rules[] __ro_after_init = {
 	{.action = DONT_MEASURE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC},
+	{.action = DONT_MEASURE, .fsmagic = SMACK_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE, .fsmagic = CGROUP_SUPER_MAGIC,
 	 .flags = IMA_FSMAGIC},
 	{.action = DONT_MEASURE, .fsmagic = CGROUP2_SUPER_MAGIC,
@@ -141,6 +142,7 @@ static struct ima_rule_entry default_appraise_rules[] __ro_after_init = {
 	{.action = DONT_APPRAISE, .fsmagic = BINFMTFS_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_APPRAISE, .fsmagic = SECURITYFS_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_APPRAISE, .fsmagic = SELINUX_MAGIC, .flags = IMA_FSMAGIC},
+	{.action = DONT_APPRAISE, .fsmagic = SMACK_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_APPRAISE, .fsmagic = NSFS_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_APPRAISE, .fsmagic = CGROUP_SUPER_MAGIC, .flags = IMA_FSMAGIC},
 	{.action = DONT_APPRAISE, .fsmagic = CGROUP2_SUPER_MAGIC, .flags = IMA_FSMAGIC},
@@ -188,6 +190,7 @@ __setup("ima_tcb", default_measure_policy_setup);
 
 static bool ima_use_appraise_tcb __initdata;
 static bool ima_use_secure_boot __initdata;
+static bool ima_fail_unverifiable_sigs __ro_after_init;
 static int __init policy_setup(char *str)
 {
 	char *p;
@@ -201,6 +204,8 @@ static int __init policy_setup(char *str)
 			ima_use_appraise_tcb = true;
 		else if (strcmp(p, "secure_boot") == 0)
 			ima_use_secure_boot = true;
+		else if (strcmp(p, "fail_securely") == 0)
+			ima_fail_unverifiable_sigs = true;
 	}
 
 	return 1;
@@ -243,16 +248,17 @@ static void ima_lsm_update_rules(void)
  * ima_match_rules - determine whether an inode matches the measure rule.
  * @rule: a pointer to a rule
  * @inode: a pointer to an inode
+ * @cred: a pointer to a credentials structure for user validation
+ * @secid: the secid of the task to be validated
  * @func: LIM hook identifier
  * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
  *
  * Returns true on rule match, false on failure.
  */
 static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
+			    const struct cred *cred, u32 secid,
 			    enum ima_hooks func, int mask)
 {
-	struct task_struct *tsk = current;
-	const struct cred *cred = current_cred();
 	int i;
 
 	if ((rule->flags & IMA_FUNC) &&
@@ -287,7 +293,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
 		return false;
 	for (i = 0; i < MAX_LSM_RULES; i++) {
 		int rc = 0;
-		u32 osid, sid;
+		u32 osid;
 		int retried = 0;
 
 		if (!rule->lsm[i].rule)
@@ -307,8 +313,7 @@ retry:
 		case LSM_SUBJ_USER:
 		case LSM_SUBJ_ROLE:
 		case LSM_SUBJ_TYPE:
-			security_task_getsecid(tsk, &sid);
-			rc = security_filter_rule_match(sid,
+			rc = security_filter_rule_match(secid,
 							rule->lsm[i].type,
 							Audit_equal,
 							rule->lsm[i].rule,
@@ -341,6 +346,8 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
 		return IMA_MMAP_APPRAISE;
 	case BPRM_CHECK:
 		return IMA_BPRM_APPRAISE;
+	case CREDS_CHECK:
+		return IMA_CREDS_APPRAISE;
 	case FILE_CHECK:
 	case POST_SETATTR:
 		return IMA_FILE_APPRAISE;
@@ -353,6 +360,9 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
 /**
  * ima_match_policy - decision based on LSM and other conditions
  * @inode: pointer to an inode for which the policy decision is being made
+ * @cred: pointer to a credentials structure for which the policy decision is
+ *        being made
+ * @secid: LSM secid of the task to be validated
  * @func: IMA hook identifier
  * @mask: requested action (MAY_READ | MAY_WRITE | MAY_APPEND | MAY_EXEC)
  * @pcr: set the pcr to extend
@@ -364,8 +374,8 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
  * list when walking it.  Reads are many orders of magnitude more numerous
  * than writes so ima_match_policy() is classical RCU candidate.
  */
-int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
-		     int flags, int *pcr)
+int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
+		     enum ima_hooks func, int mask, int flags, int *pcr)
 {
 	struct ima_rule_entry *entry;
 	int action = 0, actmask = flags | (flags << 1);
@@ -376,7 +386,7 @@ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
 		if (!(entry->action & actmask))
 			continue;
 
-		if (!ima_match_rules(entry, inode, func, mask))
+		if (!ima_match_rules(entry, inode, cred, secid, func, mask))
 			continue;
 
 		action |= entry->flags & IMA_ACTION_FLAGS;
@@ -384,7 +394,9 @@ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask,
 		action |= entry->action & IMA_DO_MASK;
 		if (entry->action & IMA_APPRAISE) {
 			action |= get_subaction(entry, func);
-			action ^= IMA_HASH;
+			action &= ~IMA_HASH;
+			if (ima_fail_unverifiable_sigs)
+				action |= IMA_FAIL_UNVERIFIABLE_SIGS;
 		}
 
 		if (entry->action & IMA_DO_MASK)
@@ -713,6 +725,8 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
 				entry->func = MMAP_CHECK;
 			else if (strcmp(args[0].from, "BPRM_CHECK") == 0)
 				entry->func = BPRM_CHECK;
+			else if (strcmp(args[0].from, "CREDS_CHECK") == 0)
+				entry->func = CREDS_CHECK;
 			else if (strcmp(args[0].from, "KEXEC_KERNEL_CHECK") ==
 				 0)
 				entry->func = KEXEC_KERNEL_CHECK;
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 28af43f63572..5afaa53decc5 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -378,16 +378,11 @@ int ima_eventname_ng_init(struct ima_event_data *event_data,
 int ima_eventsig_init(struct ima_event_data *event_data,
 		      struct ima_field_data *field_data)
 {
-	enum data_formats fmt = DATA_FMT_HEX;
 	struct evm_ima_xattr_data *xattr_value = event_data->xattr_value;
-	int xattr_len = event_data->xattr_len;
-	int rc = 0;
 
 	if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG))
-		goto out;
+		return 0;
 
-	rc = ima_write_template_field_data(xattr_value, xattr_len, fmt,
-					   field_data);
-out:
-	return rc;
+	return ima_write_template_field_data(xattr_value, event_data->xattr_len,
+					     DATA_FMT_HEX, field_data);
 }
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 50a8e3365df7..5e58e02ba8dc 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -30,11 +30,11 @@
 
 /* iint cache flags */
 #define IMA_ACTION_FLAGS	0xff000000
-#define IMA_ACTION_RULE_FLAGS	0x06000000
 #define IMA_DIGSIG_REQUIRED	0x01000000
 #define IMA_PERMIT_DIRECTIO	0x02000000
 #define IMA_NEW_FILE		0x04000000
 #define EVM_IMMUTABLE_DIGSIG	0x08000000
+#define IMA_FAIL_UNVERIFIABLE_SIGS	0x10000000
 
 #define IMA_DO_MASK		(IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
 				 IMA_HASH | IMA_APPRAISE_SUBMASK)
@@ -51,10 +51,14 @@
 #define IMA_BPRM_APPRAISED	0x00020000
 #define IMA_READ_APPRAISE	0x00040000
 #define IMA_READ_APPRAISED	0x00080000
+#define IMA_CREDS_APPRAISE	0x00100000
+#define IMA_CREDS_APPRAISED	0x00200000
 #define IMA_APPRAISE_SUBMASK	(IMA_FILE_APPRAISE | IMA_MMAP_APPRAISE | \
-				 IMA_BPRM_APPRAISE | IMA_READ_APPRAISE)
+				 IMA_BPRM_APPRAISE | IMA_READ_APPRAISE | \
+				 IMA_CREDS_APPRAISE)
 #define IMA_APPRAISED_SUBMASK	(IMA_FILE_APPRAISED | IMA_MMAP_APPRAISED | \
-				 IMA_BPRM_APPRAISED | IMA_READ_APPRAISED)
+				 IMA_BPRM_APPRAISED | IMA_READ_APPRAISED | \
+				 IMA_CREDS_APPRAISED)
 
 /* iint cache atomic_flags */
 #define IMA_CHANGE_XATTR	0
@@ -121,6 +125,7 @@ struct integrity_iint_cache {
 	enum integrity_status ima_mmap_status:4;
 	enum integrity_status ima_bprm_status:4;
 	enum integrity_status ima_read_status:4;
+	enum integrity_status ima_creds_status:4;
 	enum integrity_status evm_status:4;
 	struct ima_digest_data *ima_hash;
 };
diff --git a/security/security.c b/security/security.c
index af53d2834e20..7bc2fde023a7 100644
--- a/security/security.c
+++ b/security/security.c
@@ -30,6 +30,8 @@
 #include <linux/string.h>
 #include <net/flow.h>
 
+#include <trace/events/initcall.h>
+
 #define MAX_LSM_EVM_XATTR	2
 
 /* Maximum number of letters for an LSM name string */
@@ -45,10 +47,14 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
 
 static void __init do_security_initcalls(void)
 {
+	int ret;
 	initcall_t *call;
 	call = __security_initcall_start;
+	trace_initcall_level("security");
 	while (call < __security_initcall_end) {
-		(*call) ();
+		trace_initcall_start((*call));
+		ret = (*call) ();
+		trace_initcall_finish((*call), ret);
 		call++;
 	}
 }
@@ -61,11 +67,11 @@ static void __init do_security_initcalls(void)
 int __init security_init(void)
 {
 	int i;
-	struct list_head *list = (struct list_head *) &security_hook_heads;
+	struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
 
-	for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct list_head);
+	for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
 	     i++)
-		INIT_LIST_HEAD(&list[i]);
+		INIT_HLIST_HEAD(&list[i]);
 	pr_info("Security Framework initialized\n");
 
 	/*
@@ -163,7 +169,7 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count,
 
 	for (i = 0; i < count; i++) {
 		hooks[i].lsm = lsm;
-		list_add_tail_rcu(&hooks[i].list, hooks[i].head);
+		hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
 	}
 	if (lsm_append(lsm, &lsm_names) < 0)
 		panic("%s - Cannot get early memory.\n", __func__);
@@ -201,7 +207,7 @@ EXPORT_SYMBOL(unregister_lsm_notifier);
 	do {							\
 		struct security_hook_list *P;			\
 								\
-		list_for_each_entry(P, &security_hook_heads.FUNC, list)	\
+		hlist_for_each_entry(P, &security_hook_heads.FUNC, list) \
 			P->hook.FUNC(__VA_ARGS__);		\
 	} while (0)
 
@@ -210,7 +216,7 @@ EXPORT_SYMBOL(unregister_lsm_notifier);
 	do {							\
 		struct security_hook_list *P;			\
 								\
-		list_for_each_entry(P, &security_hook_heads.FUNC, list) { \
+		hlist_for_each_entry(P, &security_hook_heads.FUNC, list) { \
 			RC = P->hook.FUNC(__VA_ARGS__);		\
 			if (RC != 0)				\
 				break;				\
@@ -317,7 +323,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages)
 	 * agree that it should be set it will. If any module
 	 * thinks it should not be set it won't.
 	 */
-	list_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) {
+	hlist_for_each_entry(hp, &security_hook_heads.vm_enough_memory, list) {
 		rc = hp->hook.vm_enough_memory(mm, pages);
 		if (rc <= 0) {
 			cap_sys_admin = 0;
@@ -805,7 +811,7 @@ int security_inode_getsecurity(struct inode *inode, const char *name, void **buf
 	/*
 	 * Only one module will provide an attribute with a given name.
 	 */
-	list_for_each_entry(hp, &security_hook_heads.inode_getsecurity, list) {
+	hlist_for_each_entry(hp, &security_hook_heads.inode_getsecurity, list) {
 		rc = hp->hook.inode_getsecurity(inode, name, buffer, alloc);
 		if (rc != -EOPNOTSUPP)
 			return rc;
@@ -823,7 +829,7 @@ int security_inode_setsecurity(struct inode *inode, const char *name, const void
 	/*
 	 * Only one module will provide an attribute with a given name.
 	 */
-	list_for_each_entry(hp, &security_hook_heads.inode_setsecurity, list) {
+	hlist_for_each_entry(hp, &security_hook_heads.inode_setsecurity, list) {
 		rc = hp->hook.inode_setsecurity(inode, name, value, size,
 								flags);
 		if (rc != -EOPNOTSUPP)
@@ -1005,6 +1011,13 @@ void security_transfer_creds(struct cred *new, const struct cred *old)
 	call_void_hook(cred_transfer, new, old);
 }
 
+void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+	*secid = 0;
+	call_void_hook(cred_getsecid, c, secid);
+}
+EXPORT_SYMBOL(security_cred_getsecid);
+
 int security_kernel_act_as(struct cred *new, u32 secid)
 {
 	return call_int_hook(kernel_act_as, 0, new, secid);
@@ -1114,9 +1127,9 @@ int security_task_movememory(struct task_struct *p)
 }
 
 int security_task_kill(struct task_struct *p, struct siginfo *info,
-			int sig, u32 secid)
+			int sig, const struct cred *cred)
 {
-	return call_int_hook(task_kill, 0, p, info, sig, secid);
+	return call_int_hook(task_kill, 0, p, info, sig, cred);
 }
 
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
@@ -1126,7 +1139,7 @@ int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 	int rc = -ENOSYS;
 	struct security_hook_list *hp;
 
-	list_for_each_entry(hp, &security_hook_heads.task_prctl, list) {
+	hlist_for_each_entry(hp, &security_hook_heads.task_prctl, list) {
 		thisrc = hp->hook.task_prctl(option, arg2, arg3, arg4, arg5);
 		if (thisrc != -ENOSYS) {
 			rc = thisrc;
@@ -1651,7 +1664,7 @@ int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
 	 * For speed optimization, we explicitly break the loop rather than
 	 * using the macro
 	 */
-	list_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match,
+	hlist_for_each_entry(hp, &security_hook_heads.xfrm_state_pol_flow_match,
 				list) {
 		rc = hp->hook.xfrm_state_pol_flow_match(x, xp, fl);
 		break;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 0314fc766134..4cafe6a19167 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3947,6 +3947,11 @@ static void selinux_cred_transfer(struct cred *new, const struct cred *old)
 	*tsec = *old_tsec;
 }
 
+static void selinux_cred_getsecid(const struct cred *c, u32 *secid)
+{
+	*secid = cred_sid(c);
+}
+
 /*
  * set the security data for a kernel service
  * - all the creation contexts are set to unlabelled
@@ -4156,16 +4161,19 @@ static int selinux_task_movememory(struct task_struct *p)
 }
 
 static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
-				int sig, u32 secid)
+				int sig, const struct cred *cred)
 {
+	u32 secid;
 	u32 perm;
 
 	if (!sig)
 		perm = PROCESS__SIGNULL; /* null signal; existence test */
 	else
 		perm = signal_to_av(sig);
-	if (!secid)
+	if (!cred)
 		secid = current_sid();
+	else
+		secid = cred_sid(cred);
 	return avc_has_perm(&selinux_state,
 			    secid, task_sid(p), SECCLASS_PROCESS, perm, NULL);
 }
@@ -5998,6 +6006,7 @@ static int selinux_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd)
 				    SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
 	case IPC_STAT:
 	case MSG_STAT:
+	case MSG_STAT_ANY:
 		perms = MSGQ__GETATTR | MSGQ__ASSOCIATE;
 		break;
 	case IPC_SET:
@@ -6149,6 +6158,7 @@ static int selinux_shm_shmctl(struct kern_ipc_perm *shp, int cmd)
 				    SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
 	case IPC_STAT:
 	case SHM_STAT:
+	case SHM_STAT_ANY:
 		perms = SHM__GETATTR | SHM__ASSOCIATE;
 		break;
 	case IPC_SET:
@@ -6264,6 +6274,7 @@ static int selinux_sem_semctl(struct kern_ipc_perm *sma, int cmd)
 		break;
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		perms = SEM__GETATTR | SEM__ASSOCIATE;
 		break;
 	default:
@@ -6923,6 +6934,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(cred_free, selinux_cred_free),
 	LSM_HOOK_INIT(cred_prepare, selinux_cred_prepare),
 	LSM_HOOK_INIT(cred_transfer, selinux_cred_transfer),
+	LSM_HOOK_INIT(cred_getsecid, selinux_cred_getsecid),
 	LSM_HOOK_INIT(kernel_act_as, selinux_kernel_act_as),
 	LSM_HOOK_INIT(kernel_create_files_as, selinux_kernel_create_files_as),
 	LSM_HOOK_INIT(kernel_module_request, selinux_kernel_module_request),
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 4be683eeba01..245160373dab 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -2061,6 +2061,7 @@ __initcall(init_sel_fs);
 void exit_sel_fs(void)
 {
 	sysfs_remove_mount_point(fs_kobj, "selinux");
+	dput(selinux_null.dentry);
 	kern_unmount(selinuxfs_mount);
 	unregister_filesystem(&sel_fs_type);
 }
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0735b8db158b..0b414836bebd 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2050,6 +2050,23 @@ static void smack_cred_transfer(struct cred *new, const struct cred *old)
 }
 
 /**
+ * smack_cred_getsecid - get the secid corresponding to a creds structure
+ * @c: the object creds
+ * @secid: where to put the result
+ *
+ * Sets the secid to contain a u32 version of the smack label.
+ */
+static void smack_cred_getsecid(const struct cred *c, u32 *secid)
+{
+	struct smack_known *skp;
+
+	rcu_read_lock();
+	skp = smk_of_task(c->security);
+	*secid = skp->smk_secid;
+	rcu_read_unlock();
+}
+
+/**
  * smack_kernel_act_as - Set the subjective context in a set of credentials
  * @new: points to the set of credentials to be modified.
  * @secid: specifies the security ID to be set
@@ -2228,15 +2245,13 @@ static int smack_task_movememory(struct task_struct *p)
  * @p: the task object
  * @info: unused
  * @sig: unused
- * @secid: identifies the smack to use in lieu of current's
+ * @cred: identifies the cred to use in lieu of current's
  *
  * Return 0 if write access is permitted
  *
- * The secid behavior is an artifact of an SELinux hack
- * in the USB code. Someday it may go away.
  */
 static int smack_task_kill(struct task_struct *p, struct siginfo *info,
-			   int sig, u32 secid)
+			   int sig, const struct cred *cred)
 {
 	struct smk_audit_info ad;
 	struct smack_known *skp;
@@ -2252,17 +2267,17 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info,
 	 * Sending a signal requires that the sender
 	 * can write the receiver.
 	 */
-	if (secid == 0) {
+	if (cred == NULL) {
 		rc = smk_curacc(tkp, MAY_DELIVER, &ad);
 		rc = smk_bu_task(p, MAY_DELIVER, rc);
 		return rc;
 	}
 	/*
-	 * If the secid isn't 0 we're dealing with some USB IO
+	 * If the cred isn't NULL we're dealing with some USB IO
 	 * specific behavior. This is not clean. For one thing
 	 * we can't take privilege into account.
 	 */
-	skp = smack_from_secid(secid);
+	skp = smk_of_task(cred->security);
 	rc = smk_access(skp, tkp, MAY_DELIVER, &ad);
 	rc = smk_bu_note("USB signal", skp, tkp, MAY_DELIVER, rc);
 	return rc;
@@ -3031,6 +3046,7 @@ static int smack_shm_shmctl(struct kern_ipc_perm *isp, int cmd)
 	switch (cmd) {
 	case IPC_STAT:
 	case SHM_STAT:
+	case SHM_STAT_ANY:
 		may = MAY_READ;
 		break;
 	case IPC_SET:
@@ -3124,6 +3140,7 @@ static int smack_sem_semctl(struct kern_ipc_perm *isp, int cmd)
 	case GETALL:
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		may = MAY_READ;
 		break;
 	case SETVAL:
@@ -3213,6 +3230,7 @@ static int smack_msg_queue_msgctl(struct kern_ipc_perm *isp, int cmd)
 	switch (cmd) {
 	case IPC_STAT:
 	case MSG_STAT:
+	case MSG_STAT_ANY:
 		may = MAY_READ;
 		break;
 	case IPC_SET:
@@ -3350,6 +3368,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 	if (opt_dentry->d_parent == opt_dentry) {
 		switch (sbp->s_magic) {
 		case CGROUP_SUPER_MAGIC:
+		case CGROUP2_SUPER_MAGIC:
 			/*
 			 * The cgroup filesystem is never mounted,
 			 * so there's no opportunity to set the mount
@@ -3393,6 +3412,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
 	switch (sbp->s_magic) {
 	case SMACK_MAGIC:
 	case CGROUP_SUPER_MAGIC:
+	case CGROUP2_SUPER_MAGIC:
 		/*
 		 * Casey says that it's a little embarrassing
 		 * that the smack file system doesn't do
@@ -4654,6 +4674,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(cred_free, smack_cred_free),
 	LSM_HOOK_INIT(cred_prepare, smack_cred_prepare),
 	LSM_HOOK_INIT(cred_transfer, smack_cred_transfer),
+	LSM_HOOK_INIT(cred_getsecid, smack_cred_getsecid),
 	LSM_HOOK_INIT(kernel_act_as, smack_kernel_act_as),
 	LSM_HOOK_INIT(kernel_create_files_as, smack_kernel_create_files_as),
 	LSM_HOOK_INIT(task_setpgid, smack_task_setpgid),
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 481ab0e94ffa..1980f68246cb 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1128,13 +1128,14 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil
 }
 
 /* call with params_lock held */
+/* NOTE: this always call PREPARE unconditionally no matter whether
+ * runtime->oss.prepare is set or not
+ */
 static int snd_pcm_oss_prepare(struct snd_pcm_substream *substream)
 {
 	int err;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
-	if (!runtime->oss.prepare)
-		return 0;
 	err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_PREPARE, NULL);
 	if (err < 0) {
 		pcm_dbg(substream->pcm,
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index b84554893fab..35ffccea94c3 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -617,7 +617,7 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm,
 			changed = snd_pcm_hw_param_first(pcm, params, *v, NULL);
 		else
 			changed = snd_pcm_hw_param_last(pcm, params, *v, NULL);
-		if (snd_BUG_ON(changed < 0))
+		if (changed < 0)
 			return changed;
 		if (changed == 0)
 			continue;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 7720e3102bcc..7a111a1b5836 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -987,7 +987,7 @@ static int param_set_xint(const char *val, const struct kernel_param *kp)
 #define azx_del_card_list(chip) /* NOP */
 #endif /* CONFIG_PM */
 
-#if defined(CONFIG_PM_SLEEP) || defined(SUPPORT_VGA_SWITCHEROO)
+#ifdef CONFIG_PM_SLEEP
 /*
  * power management
  */
@@ -1068,9 +1068,7 @@ static int azx_resume(struct device *dev)
 	trace_azx_resume(chip);
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP || SUPPORT_VGA_SWITCHEROO */
 
-#ifdef CONFIG_PM_SLEEP
 /* put codec down to D3 at hibernation for Intel SKL+;
  * otherwise BIOS may still access the codec and screw up the driver
  */
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index ab39ccb974c6..0b030d8fe3fa 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -35,105 +35,85 @@
 #include "clock.h"
 #include "quirks.h"
 
-static struct uac_clock_source_descriptor *
-	snd_usb_find_clock_source(struct usb_host_interface *ctrl_iface,
-				  int clock_id)
+static void *find_uac_clock_desc(struct usb_host_interface *iface, int id,
+				 bool (*validator)(void *, int), u8 type)
 {
-	struct uac_clock_source_descriptor *cs = NULL;
+	void *cs = NULL;
 
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC2_CLOCK_SOURCE))) {
-		if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
+	while ((cs = snd_usb_find_csint_desc(iface->extra, iface->extralen,
+					     cs, type))) {
+		if (validator(cs, id))
 			return cs;
 	}
 
 	return NULL;
 }
 
-static struct uac3_clock_source_descriptor *
-	snd_usb_find_clock_source_v3(struct usb_host_interface *ctrl_iface,
-				  int clock_id)
+static bool validate_clock_source_v2(void *p, int id)
 {
-	struct uac3_clock_source_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC3_CLOCK_SOURCE))) {
-		if (cs->bClockID == clock_id)
-			return cs;
-	}
-
-	return NULL;
+	struct uac_clock_source_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
 }
 
-static struct uac_clock_selector_descriptor *
-	snd_usb_find_clock_selector(struct usb_host_interface *ctrl_iface,
-				    int clock_id)
+static bool validate_clock_source_v3(void *p, int id)
 {
-	struct uac_clock_selector_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC2_CLOCK_SELECTOR))) {
-		if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id) {
-			if (cs->bLength < 5 + cs->bNrInPins)
-				return NULL;
-			return cs;
-		}
-	}
-
-	return NULL;
+	struct uac3_clock_source_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
 }
 
-static struct uac3_clock_selector_descriptor *
-	snd_usb_find_clock_selector_v3(struct usb_host_interface *ctrl_iface,
-				    int clock_id)
+static bool validate_clock_selector_v2(void *p, int id)
 {
-	struct uac3_clock_selector_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC3_CLOCK_SELECTOR))) {
-		if (cs->bClockID == clock_id)
-			return cs;
-	}
-
-	return NULL;
+	struct uac_clock_selector_descriptor *cs = p;
+	return cs->bLength >= sizeof(*cs) && cs->bClockID == id &&
+		cs->bLength == 7 + cs->bNrInPins;
 }
 
-static struct uac_clock_multiplier_descriptor *
-	snd_usb_find_clock_multiplier(struct usb_host_interface *ctrl_iface,
-				      int clock_id)
+static bool validate_clock_selector_v3(void *p, int id)
 {
-	struct uac_clock_multiplier_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC2_CLOCK_MULTIPLIER))) {
-		if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
-			return cs;
-	}
-
-	return NULL;
+	struct uac3_clock_selector_descriptor *cs = p;
+	return cs->bLength >= sizeof(*cs) && cs->bClockID == id &&
+		cs->bLength == 11 + cs->bNrInPins;
 }
 
-static struct uac3_clock_multiplier_descriptor *
-	snd_usb_find_clock_multiplier_v3(struct usb_host_interface *ctrl_iface,
-				      int clock_id)
+static bool validate_clock_multiplier_v2(void *p, int id)
 {
-	struct uac3_clock_multiplier_descriptor *cs = NULL;
+	struct uac_clock_multiplier_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
+}
 
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC3_CLOCK_MULTIPLIER))) {
-		if (cs->bClockID == clock_id)
-			return cs;
-	}
+static bool validate_clock_multiplier_v3(void *p, int id)
+{
+	struct uac3_clock_multiplier_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
+}
 
-	return NULL;
+#define DEFINE_FIND_HELPER(name, obj, validator, type)		\
+static obj *name(struct usb_host_interface *iface, int id)	\
+{								\
+	return find_uac_clock_desc(iface, id, validator, type);	\
 }
 
+DEFINE_FIND_HELPER(snd_usb_find_clock_source,
+		   struct uac_clock_source_descriptor,
+		   validate_clock_source_v2, UAC2_CLOCK_SOURCE);
+DEFINE_FIND_HELPER(snd_usb_find_clock_source_v3,
+		   struct uac3_clock_source_descriptor,
+		   validate_clock_source_v3, UAC3_CLOCK_SOURCE);
+
+DEFINE_FIND_HELPER(snd_usb_find_clock_selector,
+		   struct uac_clock_selector_descriptor,
+		   validate_clock_selector_v2, UAC2_CLOCK_SELECTOR);
+DEFINE_FIND_HELPER(snd_usb_find_clock_selector_v3,
+		   struct uac3_clock_selector_descriptor,
+		   validate_clock_selector_v3, UAC3_CLOCK_SELECTOR);
+
+DEFINE_FIND_HELPER(snd_usb_find_clock_multiplier,
+		   struct uac_clock_multiplier_descriptor,
+		   validate_clock_multiplier_v2, UAC2_CLOCK_MULTIPLIER);
+DEFINE_FIND_HELPER(snd_usb_find_clock_multiplier_v3,
+		   struct uac3_clock_multiplier_descriptor,
+		   validate_clock_multiplier_v3, UAC3_CLOCK_MULTIPLIER);
+
 static int uac_clock_selector_get_val(struct snd_usb_audio *chip, int selector_id)
 {
 	unsigned char buf;
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index f41079da38c5..d554c11e01ff 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -316,6 +316,7 @@
 #define X86_FEATURE_VPCLMULQDQ		(16*32+10) /* Carry-Less Multiplication Double Quadword */
 #define X86_FEATURE_AVX512_VNNI		(16*32+11) /* Vector Neural Network Instructions */
 #define X86_FEATURE_AVX512_BITALG	(16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME			(16*32+13) /* Intel Total Memory Encryption */
 #define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
@@ -328,6 +329,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
 #define X86_FEATURE_AVX512_4VNNIW	(18*32+ 2) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS	(18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_SPEC_CTRL		(18*32+26) /* "" Speculation Control (IBRS + IBPB) */
 #define X86_FEATURE_INTEL_STIBP		(18*32+27) /* "" Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ARCH_CAPABILITIES	(18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 4ed569fcb139..b21b586b9854 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -7,6 +7,7 @@
 
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+#define __SPIN_LOCK_UNLOCKED(x)	(pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
 
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
diff --git a/tools/include/tools/config.h b/tools/include/tools/config.h
new file mode 100644
index 000000000000..08ade7df8132
--- /dev/null
+++ b/tools/include/tools/config.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_CONFIG_H
+#define _TOOLS_CONFIG_H
+
+/* Subset of include/linux/kconfig.h */
+
+#define __ARG_PLACEHOLDER_1 0,
+#define __take_second_arg(__ignored, val, ...) val
+
+/*
+ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
+ * these only work with boolean and tristate options.
+ */
+
+/*
+ * Getting something that works in C and CPP for an arg that may or may
+ * not be defined is tricky.  Here, if we have "#define CONFIG_BOOGER 1"
+ * we match on the placeholder define, insert the "0," for arg1 and generate
+ * the triplet (0, 1, 0).  Then the last step cherry picks the 2nd arg (a one).
+ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
+ * the last step cherry picks the 2nd arg, we get a zero.
+ */
+#define __is_defined(x)			___is_defined(x)
+#define ___is_defined(val)		____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk)	__take_second_arg(arg1_or_junk 1, 0)
+
+/*
+ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
+ * otherwise. For boolean options, this is equivalent to
+ * IS_ENABLED(CONFIG_FOO).
+ */
+#define IS_BUILTIN(option) __is_defined(option)
+
+#endif /* _TOOLS_CONFIG_H */
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 536ee4febd74..7f5634ce8e88 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -318,6 +318,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_OPEN		0x36
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
+#define DRM_I915_QUERY			0x39
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -375,6 +376,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
+#define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1358,7 +1360,9 @@ struct drm_intel_overlay_attrs {
  * active on a given plane.
  */
 
-#define I915_SET_COLORKEY_NONE		(1<<0) /* disable color key matching */
+#define I915_SET_COLORKEY_NONE		(1<<0) /* Deprecated. Instead set
+						* flags==0 to disable colorkeying.
+						*/
 #define I915_SET_COLORKEY_DESTINATION	(1<<1)
 #define I915_SET_COLORKEY_SOURCE	(1<<2)
 struct drm_intel_sprite_colorkey {
@@ -1604,15 +1608,115 @@ struct drm_i915_perf_oa_config {
 	__u32 n_flex_regs;
 
 	/*
-	 * These fields are pointers to tuples of u32 values (register
-	 * address, value). For example the expected length of the buffer
-	 * pointed by mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+	 * These fields are pointers to tuples of u32 values (register address,
+	 * value). For example the expected length of the buffer pointed by
+	 * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
 	 */
 	__u64 mux_regs_ptr;
 	__u64 boolean_regs_ptr;
 	__u64 flex_regs_ptr;
 };
 
+struct drm_i915_query_item {
+	__u64 query_id;
+#define DRM_I915_QUERY_TOPOLOGY_INFO    1
+
+	/*
+	 * When set to zero by userspace, this is filled with the size of the
+	 * data to be written at the data_ptr pointer. The kernel sets this
+	 * value to a negative value to signal an error on a particular query
+	 * item.
+	 */
+	__s32 length;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Data will be written at the location pointed by data_ptr when the
+	 * value of length matches the length of the data to be written by the
+	 * kernel.
+	 */
+	__u64 data_ptr;
+};
+
+struct drm_i915_query {
+	__u32 num_items;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * This points to an array of num_items drm_i915_query_item structures.
+	 */
+	__u64 items_ptr;
+};
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
+ *
+ * data: contains the 3 pieces of information :
+ *
+ * - the slice mask with one bit per slice telling whether a slice is
+ *   available. The availability of slice X can be queried with the following
+ *   formula :
+ *
+ *           (data[X / 8] >> (X % 8)) & 1
+ *
+ * - the subslice mask for each slice with one bit per subslice telling
+ *   whether a subslice is available. The availability of subslice Y in slice
+ *   X can be queried with the following formula :
+ *
+ *           (data[subslice_offset +
+ *                 X * subslice_stride +
+ *                 Y / 8] >> (Y % 8)) & 1
+ *
+ * - the EU mask for each subslice in each slice with one bit per EU telling
+ *   whether an EU is available. The availability of EU Z in subslice Y in
+ *   slice X can be queried with the following formula :
+ *
+ *           (data[eu_offset +
+ *                 (X * max_subslices + Y) * eu_stride +
+ *                 Z / 8] >> (Z % 8)) & 1
+ */
+struct drm_i915_query_topology_info {
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u16 flags;
+
+	__u16 max_slices;
+	__u16 max_subslices;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Offset in data[] at which the subslice masks are stored.
+	 */
+	__u16 subslice_offset;
+
+	/*
+	 * Stride at which each of the subslice masks for each slice are
+	 * stored.
+	 */
+	__u16 subslice_stride;
+
+	/*
+	 * Offset in data[] at which the EU masks are stored.
+	 */
+	__u16 eu_offset;
+
+	/*
+	 * Stride at which each of the EU masks for each subslice are stored.
+	 */
+	__u16 eu_stride;
+
+	__u8 data[];
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7b26d4b0b052..6b89f87db200 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -925,7 +925,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_SPAPR_TCE_VFIO 142
-#define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_X86_DISABLE_EXITS 143
 #define KVM_CAP_ARM_USER_IRQ 144
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 5898c22ba310..56c4b3f8a01b 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1121,9 +1121,6 @@ class Tui(object):
         self.screen.refresh()
 
     def _refresh_body(self, sleeptime):
-        def is_child_field(field):
-            return field.find('(') != -1
-
         def insert_child(sorted_items, child, values, parent):
             num = len(sorted_items)
             for i in range(0, num):
@@ -1134,12 +1131,14 @@ class Tui(object):
         def get_sorted_events(self, stats):
             """ separate parent and child events """
             if self._sorting == SORT_DEFAULT:
-                def sortkey((_k, v)):
+                def sortkey(pair):
                     # sort by (delta value, overall value)
+                    v = pair[1]
                     return (v.delta, v.value)
             else:
-                def sortkey((_k, v)):
+                def sortkey(pair):
                     # sort by overall value
+                    v = pair[1]
                     return v.value
 
             childs = []
@@ -1613,7 +1612,7 @@ def assign_globals():
     global PATH_DEBUGFS_TRACING
 
     debugfs = ''
-    for line in file('/proc/mounts'):
+    for line in open('/proc/mounts'):
         if line.split(' ')[0] == 'debugfs':
             debugfs = line.split(' ')[1]
             break
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index e1a660e60849..917e36fde6d8 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -80,6 +80,7 @@ OPTIONS
 	- comm: command (name) of the task which can be read via /proc/<pid>/comm
 	- pid: command and tid of the task
 	- dso: name of library or module executed at the time of sample
+	- dso_size: size of library or module executed at the time of sample
 	- symbol: name of function executed at the time of sample
 	- symbol_size: size of function executed at the time of sample
 	- parent: name of function matched to the parent regex filter. Unmatched
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 5a7035c5c523..115db9e06ecd 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,6 +117,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --sched::
 	Accrue thread runtime and provide a summary at the end of the session.
 
+--failure::
+	Show only syscalls that failed, i.e. that returned < 0.
+
 -i::
 --input::
 	Process events from a given perf data file.
diff --git a/tools/perf/Documentation/perf-version.txt b/tools/perf/Documentation/perf-version.txt
new file mode 100644
index 000000000000..e207b7cfca26
--- /dev/null
+++ b/tools/perf/Documentation/perf-version.txt
@@ -0,0 +1,24 @@
+perf-version(1)
+===============
+
+NAME
+----
+perf-version - display the version of perf binary
+
+SYNOPSIS
+--------
+'perf version' [--build-options]
+
+DESCRIPTION
+-----------
+With no options given, the 'perf version' prints the perf version
+on the standard output.
+
+If the option '--build-options' is given, then the status of
+compiled-in libraries are printed on the standard output.
+
+OPTIONS
+-------
+--build-options::
+        Prints the status of compiled-in libraries on the
+        standard output.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 98ff73648b51..c7abd83a8e19 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -346,12 +346,16 @@ else
       ifneq ($(feature-dwarf_getlocations), 1)
         msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157);
       else
-        CFLAGS += -DHAVE_DWARF_GETLOCATIONS
+        CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT
       endif # dwarf_getlocations
     endif # Dwarf support
   endif # libelf support
 endif # NO_LIBELF
 
+ifeq ($(feature-glibc), 1)
+  CFLAGS += -DHAVE_GLIBC_SUPPORT
+endif
+
 ifdef NO_DWARF
   NO_LIBDW_DWARF_UNWIND := 1
 endif
@@ -635,6 +639,7 @@ else
   else
     LDFLAGS += $(PERL_EMBED_LDFLAGS)
     EXTLIBS += $(PERL_EMBED_LIBADD)
+    CFLAGS += -DHAVE_LIBPERL_SUPPORT
     $(call detected,CONFIG_LIBPERL)
   endif
 endif
@@ -671,6 +676,7 @@ else
          LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
          EXTLIBS += $(PYTHON_EMBED_LIBADD)
          LANG_BINDINGS += $(obj-perf)python/perf.so
+         CFLAGS += -DHAVE_LIBPYTHON_SUPPORT
          $(call detected,CONFIG_LIBPYTHON)
       endif
     endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f7517e1b73f8..83e453de36f8 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -364,7 +364,8 @@ LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive
 
 ifeq ($(USE_CLANG), 1)
   CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
-  LIBCLANG = $(foreach l,$(CLANGLIBS_LIST),$(wildcard $(shell $(LLVM_CONFIG) --libdir)/libclang$(l).a))
+  CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l))
+  LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so))
   LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group
 endif
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 87b95c9410b4..3ad17ee89403 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -112,6 +112,7 @@ struct trace {
 	bool			multiple_threads;
 	bool			summary;
 	bool			summary_only;
+	bool			failure_only;
 	bool			show_comm;
 	bool			print_sample;
 	bool			show_tool_stats;
@@ -1565,7 +1566,7 @@ static int trace__printf_interrupted_entry(struct trace *trace)
 	struct thread_trace *ttrace;
 	size_t printed;
 
-	if (trace->current == NULL)
+	if (trace->failure_only || trace->current == NULL)
 		return 0;
 
 	ttrace = thread__priv(trace->current);
@@ -1638,7 +1639,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 					   args, trace, thread);
 
 	if (sc->is_exit) {
-		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
+		if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
 			trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
 			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
 		}
@@ -1742,7 +1743,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 		}
 	}
 
-	if (trace->summary_only)
+	if (trace->summary_only || (ret >= 0 && trace->failure_only))
 		goto out;
 
 	trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
@@ -1961,7 +1962,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 				      trace->output);
 	}
 
-	fprintf(trace->output, ")\n");
+	fprintf(trace->output, "\n");
 
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
@@ -3087,6 +3088,8 @@ int cmd_trace(int argc, const char **argv)
 	OPT_INCR('v', "verbose", &verbose, "be more verbose"),
 	OPT_BOOLEAN('T', "time", &trace.full_time,
 		    "Show full timestamp, not time relative to first start"),
+	OPT_BOOLEAN(0, "failure", &trace.failure_only,
+		    "Show only syscalls that failed"),
 	OPT_BOOLEAN('s', "summary", &trace.summary_only,
 		    "Show only syscall summary with statistics"),
 	OPT_BOOLEAN('S', "with-summary", &trace.summary,
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index 37019c5d675f..2abe3910d6b6 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -1,11 +1,91 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "builtin.h"
 #include "perf.h"
+#include "color.h"
 #include <linux/compiler.h>
+#include <tools/config.h>
 #include <stdio.h>
+#include <string.h>
+#include <subcmd/parse-options.h>
 
-int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused)
+int version_verbose;
+
+struct version {
+	bool	build_options;
+};
+
+static struct version version;
+
+static struct option version_options[] = {
+	OPT_BOOLEAN(0, "build-options", &version.build_options,
+		    "display the build options"),
+};
+
+static const char * const version_usage[] = {
+	"perf version [<options>]",
+	NULL
+};
+
+static void on_off_print(const char *status)
+{
+	printf("[ ");
+
+	if (!strcmp(status, "OFF"))
+		color_fprintf(stdout, PERF_COLOR_RED, "%-3s", status);
+	else
+		color_fprintf(stdout, PERF_COLOR_GREEN, "%-3s", status);
+
+	printf(" ]");
+}
+
+static void status_print(const char *name, const char *macro,
+			 const char *status)
 {
+	printf("%22s: ", name);
+	on_off_print(status);
+	printf("  # %s\n", macro);
+}
+
+#define STATUS(__d, __m)				\
+do {							\
+	if (IS_BUILTIN(__d))				\
+		status_print(#__m, #__d, "on");		\
+	else						\
+		status_print(#__m, #__d, "OFF");	\
+} while (0)
+
+static void library_status(void)
+{
+	STATUS(HAVE_DWARF_SUPPORT, dwarf);
+	STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
+	STATUS(HAVE_GLIBC_SUPPORT, glibc);
+	STATUS(HAVE_GTK2_SUPPORT, gtk2);
+	STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
+	STATUS(HAVE_LIBBFD_SUPPORT, libbfd);
+	STATUS(HAVE_LIBELF_SUPPORT, libelf);
+	STATUS(HAVE_LIBNUMA_SUPPORT, libnuma);
+	STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus);
+	STATUS(HAVE_LIBPERL_SUPPORT, libperl);
+	STATUS(HAVE_LIBPYTHON_SUPPORT, libpython);
+	STATUS(HAVE_SLANG_SUPPORT, libslang);
+	STATUS(HAVE_LIBCRYPTO_SUPPORT, libcrypto);
+	STATUS(HAVE_LIBUNWIND_SUPPORT, libunwind);
+	STATUS(HAVE_DWARF_SUPPORT, libdw-dwarf-unwind);
+	STATUS(HAVE_ZLIB_SUPPORT, zlib);
+	STATUS(HAVE_LZMA_SUPPORT, lzma);
+	STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
+	STATUS(HAVE_LIBBPF_SUPPORT, bpf);
+}
+
+int cmd_version(int argc, const char **argv)
+{
+	argc = parse_options(argc, argv, version_options, version_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+
 	printf("perf version %s\n", perf_version_string);
+
+	if (version.build_options || version_verbose == 1)
+		library_status();
+
 	return 0;
 }
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 1b3fc8ec0fa2..1659029d03fc 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -190,6 +190,12 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 			break;
 		}
 
+		if (!strcmp(cmd, "-vv")) {
+			(*argv)[0] = "version";
+			version_verbose = 1;
+			break;
+		}
+
 		/*
 		 * Check remaining flags.
 		 */
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 8fec1abd0f1f..a1a97956136f 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -84,6 +84,7 @@ struct record_opts {
 struct option;
 extern const char * const *record_usage;
 extern struct option *record_options;
+extern int version_verbose;
 
 int record__parse_freq(const struct option *opt, const char *str, int unset);
 #endif
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 9f6ce29b83b4..4f75561424ed 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -45,11 +45,16 @@ void ui_browser__set_percent_color(struct ui_browser *browser,
 	 ui_browser__set_color(browser, color);
 }
 
-void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x)
 {
 	SLsmg_gotorc(browser->y + y, browser->x + x);
 }
 
+void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
+{
+	SLsmg_gotorc(browser->y + y + browser->extra_title_lines, browser->x + x);
+}
+
 void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg,
 			       unsigned int width)
 {
@@ -191,6 +196,7 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser)
 {
 	browser->width = SLtt_Screen_Cols - 1;
 	browser->height = browser->rows = SLtt_Screen_Rows - 2;
+	browser->rows -= browser->extra_title_lines;
 	browser->y = 1;
 	browser->x = 0;
 }
@@ -337,8 +343,8 @@ static int __ui_browser__refresh(struct ui_browser *browser)
 	else
 		width += 1;
 
-	SLsmg_fill_region(browser->y + row, browser->x,
-			  browser->height - row, width, ' ');
+	SLsmg_fill_region(browser->y + row + browser->extra_title_lines, browser->x,
+			  browser->rows - row, width, ' ');
 
 	return 0;
 }
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
index 70057178ee34..aa5932e1d62e 100644
--- a/tools/perf/ui/browser.h
+++ b/tools/perf/ui/browser.h
@@ -17,6 +17,7 @@ struct ui_browser {
 	u64	      index, top_idx;
 	void	      *top, *entries;
 	u16	      y, x, width, height, rows, columns, horiz_scroll;
+	u8	      extra_title_lines;
 	int	      current_color;
 	void	      *priv;
 	const char    *title;
@@ -38,6 +39,7 @@ bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row);
 void ui_browser__refresh_dimensions(struct ui_browser *browser);
 void ui_browser__reset_index(struct ui_browser *browser);
 
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x);
 void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
 void ui_browser__write_nstring(struct ui_browser *browser, const char *msg,
 			       unsigned int width);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index c02fb437ac8e..12c099a87f8b 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -218,7 +218,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 		annotate_browser__draw_current_jump(browser);
 
 	ui_browser__set_color(browser, HE_COLORSET_NORMAL);
-	__ui_browser__vline(browser, pcnt_width, 0, browser->height - 1);
+	__ui_browser__vline(browser, pcnt_width, 0, browser->rows - 1);
 	return ret;
 }
 
@@ -592,21 +592,40 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
 	return __annotate_browser__search_reverse(browser);
 }
 
+static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help)
+{
+	struct map_symbol *ms = browser->priv;
+	struct symbol *sym = ms->sym;
+	char symbol_dso[SYM_TITLE_MAX_SIZE];
+
+	if (ui_browser__show(browser, title, help) < 0)
+		return -1;
+
+	sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso));
+
+	ui_browser__gotorc_title(browser, 0, 0);
+	ui_browser__set_color(browser, HE_COLORSET_ROOT);
+	ui_browser__write_nstring(browser, symbol_dso, browser->width + 1);
+	return 0;
+}
+
 static int annotate_browser__run(struct annotate_browser *browser,
 				 struct perf_evsel *evsel,
 				 struct hist_browser_timer *hbt)
 {
 	struct rb_node *nd = NULL;
+	struct hists *hists = evsel__hists(evsel);
 	struct map_symbol *ms = browser->b.priv;
 	struct symbol *sym = ms->sym;
 	struct annotation *notes = symbol__annotation(ms->sym);
 	const char *help = "Press 'h' for help on key bindings";
 	int delay_secs = hbt ? hbt->refresh : 0;
+	char title[256];
 	int key;
-	char title[SYM_TITLE_MAX_SIZE];
 
-	sym_title(sym, ms->map, title, sizeof(title));
-	if (ui_browser__show(&browser->b, title, help) < 0)
+	annotation__scnprintf_samples_period(notes, title, sizeof(title), evsel);
+
+	if (annotate_browser__show(&browser->b, title, help) < 0)
 		return -1;
 
 	annotate_browser__calc_percent(browser, evsel);
@@ -637,8 +656,11 @@ static int annotate_browser__run(struct annotate_browser *browser,
 			if (hbt)
 				hbt->timer(hbt->arg);
 
-			if (delay_secs != 0)
+			if (delay_secs != 0) {
 				symbol__annotate_decay_histogram(sym, evsel->idx);
+				hists__scnprintf_title(hists, title, sizeof(title));
+				annotate_browser__show(&browser->b, title, help);
+			}
 			continue;
 		case K_TAB:
 			if (nd != NULL) {
@@ -812,6 +834,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			.seek	 = ui_browser__list_head_seek,
 			.write	 = annotate_browser__write,
 			.filter  = disasm_line__filter,
+			.extra_title_lines = 1, /* for hists__scnprintf_title() */
 			.priv	 = &ms,
 			.use_navkeypressed = true,
 		},
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 8b4e82548f8e..0eec06c105c6 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -32,8 +32,7 @@
 
 extern void hist_browser__init_hpp(void);
 
-static int perf_evsel_browser_title(struct hist_browser *browser,
-				    char *bf, size_t size);
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size);
 static void hist_browser__update_nr_entries(struct hist_browser *hb);
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
@@ -62,6 +61,15 @@ static int hist_browser__get_folding(struct hist_browser *browser)
 	return unfolded_rows;
 }
 
+static void hist_browser__set_title_space(struct hist_browser *hb)
+{
+	struct ui_browser *browser = &hb->b;
+	struct hists *hists = hb->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+
+	browser->extra_title_lines = hb->show_headers ? hpp_list->nr_header_lines : 0;
+}
+
 static u32 hist_browser__nr_entries(struct hist_browser *hb)
 {
 	u32 nr_entries;
@@ -82,10 +90,16 @@ static void hist_browser__update_rows(struct hist_browser *hb)
 	struct ui_browser *browser = &hb->b;
 	struct hists *hists = hb->hists;
 	struct perf_hpp_list *hpp_list = hists->hpp_list;
-	u16 header_offset, index_row;
+	u16 index_row;
 
-	header_offset = hb->show_headers ? hpp_list->nr_header_lines : 0;
-	browser->rows = browser->height - header_offset;
+	if (!hb->show_headers) {
+		browser->rows += browser->extra_title_lines;
+		browser->extra_title_lines = 0;
+		return;
+	}
+
+	browser->extra_title_lines = hpp_list->nr_header_lines;
+	browser->rows -= browser->extra_title_lines;
 	/*
 	 * Verify if we were at the last line and that line isn't
 	 * visibe because we now show the header line(s).
@@ -108,17 +122,6 @@ static void hist_browser__refresh_dimensions(struct ui_browser *browser)
  	 *	  changeset.
  	 */
 	ui_browser__refresh_dimensions(browser);
-	hist_browser__update_rows(hb);
-}
-
-static void hist_browser__gotorc(struct hist_browser *browser, int row, int column)
-{
-	struct hists *hists = browser->hists;
-	struct perf_hpp_list *hpp_list = hists->hpp_list;
-	u16 header_offset;
-
-	header_offset = browser->show_headers ? hpp_list->nr_header_lines : 0;
-	ui_browser__gotorc(&browser->b, row + header_offset, column);
 }
 
 static void hist_browser__reset(struct hist_browser *browser)
@@ -656,9 +659,10 @@ int hist_browser__run(struct hist_browser *browser, const char *help,
 			struct hist_entry *h = rb_entry(browser->b.top,
 							struct hist_entry, rb_node);
 			ui_helpline__pop();
-			ui_helpline__fpush("%d: nr_ent=(%d,%d), rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
+			ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
 					   seq++, browser->b.nr_entries,
 					   browser->hists->nr_entries,
+					   browser->b.extra_title_lines,
 					   browser->b.rows,
 					   browser->b.index,
 					   browser->b.top_idx,
@@ -733,7 +737,7 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser,
 	}
 
 	ui_browser__set_color(&browser->b, color);
-	hist_browser__gotorc(browser, row, 0);
+	ui_browser__gotorc(&browser->b, row, 0);
 	ui_browser__write_nstring(&browser->b, " ", offset);
 	ui_browser__printf(&browser->b, "%c", folded_sign);
 	ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' ');
@@ -1249,7 +1253,7 @@ static int hist_browser__show_entry(struct hist_browser *browser,
 		};
 		int column = 0;
 
-		hist_browser__gotorc(browser, row, 0);
+		ui_browser__gotorc(&browser->b, row, 0);
 
 		hists__for_each_format(browser->hists, fmt) {
 			char s[2048];
@@ -1358,7 +1362,7 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
 		goto show_callchain;
 	}
 
-	hist_browser__gotorc(browser, row, 0);
+	ui_browser__gotorc(&browser->b, row, 0);
 
 	if (current_entry && browser->b.navkeypressed)
 		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
@@ -1507,7 +1511,7 @@ static int hist_browser__show_no_entry(struct hist_browser *browser,
 		browser->selection = NULL;
 	}
 
-	hist_browser__gotorc(browser, row, 0);
+	ui_browser__gotorc(&browser->b, row, 0);
 
 	if (current_entry && browser->b.navkeypressed)
 		ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
@@ -1713,7 +1717,7 @@ static void hists_browser__headers(struct hist_browser *browser)
 		hists_browser__scnprintf_headers(browser, headers,
 						 sizeof(headers), line);
 
-		ui_browser__gotorc(&browser->b, line, 0);
+		ui_browser__gotorc_title(&browser->b, line, 0);
 		ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
 		ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
 	}
@@ -1740,17 +1744,11 @@ static void ui_browser__hists_init_top(struct ui_browser *browser)
 static unsigned int hist_browser__refresh(struct ui_browser *browser)
 {
 	unsigned row = 0;
-	u16 header_offset = 0;
 	struct rb_node *nd;
 	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
-	struct hists *hists = hb->hists;
-
-	if (hb->show_headers) {
-		struct perf_hpp_list *hpp_list = hists->hpp_list;
 
+	if (hb->show_headers)
 		hist_browser__show_headers(hb);
-		header_offset = hpp_list->nr_header_lines;
-	}
 
 	ui_browser__hists_init_top(browser);
 	hb->he_selection = NULL;
@@ -1788,7 +1786,7 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser)
 			break;
 	}
 
-	return row + header_offset;
+	return row;
 }
 
 static struct rb_node *hists__filter_entries(struct rb_node *nd,
@@ -2143,6 +2141,7 @@ void hist_browser__init(struct hist_browser *browser,
 	browser->b.seek			= ui_browser__hists_seek;
 	browser->b.use_navkeypressed	= true;
 	browser->show_headers		= symbol_conf.show_hist_headers;
+	hist_browser__set_title_space(browser);
 
 	if (symbol_conf.report_hierarchy) {
 		struct perf_hpp_list_node *fmt_node;
@@ -2183,7 +2182,7 @@ perf_evsel_browser__new(struct perf_evsel *evsel,
 	if (browser) {
 		browser->hbt   = hbt;
 		browser->env   = env;
-		browser->title = perf_evsel_browser_title;
+		browser->title = hists_browser__scnprintf_title;
 	}
 	return browser;
 }
@@ -2209,84 +2208,11 @@ static inline bool is_report_browser(void *timer)
 	return timer == NULL;
 }
 
-static int perf_evsel_browser_title(struct hist_browser *browser,
-				char *bf, size_t size)
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size)
 {
 	struct hist_browser_timer *hbt = browser->hbt;
-	struct hists *hists = browser->hists;
-	char unit;
-	int printed;
-	const struct dso *dso = hists->dso_filter;
-	const struct thread *thread = hists->thread_filter;
-	int socket_id = hists->socket_filter;
-	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
-	u64 nr_events = hists->stats.total_period;
-	struct perf_evsel *evsel = hists_to_evsel(hists);
-	const char *ev_name = perf_evsel__name(evsel);
-	char buf[512], sample_freq_str[64] = "";
-	size_t buflen = sizeof(buf);
-	char ref[30] = " show reference callgraph, ";
-	bool enable_ref = false;
+	int printed = __hists__scnprintf_title(browser->hists, bf, size, !is_report_browser(hbt));
 
-	if (symbol_conf.filter_relative) {
-		nr_samples = hists->stats.nr_non_filtered_samples;
-		nr_events = hists->stats.total_non_filtered_period;
-	}
-
-	if (perf_evsel__is_group_event(evsel)) {
-		struct perf_evsel *pos;
-
-		perf_evsel__group_desc(evsel, buf, buflen);
-		ev_name = buf;
-
-		for_each_group_member(pos, evsel) {
-			struct hists *pos_hists = evsel__hists(pos);
-
-			if (symbol_conf.filter_relative) {
-				nr_samples += pos_hists->stats.nr_non_filtered_samples;
-				nr_events += pos_hists->stats.total_non_filtered_period;
-			} else {
-				nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
-				nr_events += pos_hists->stats.total_period;
-			}
-		}
-	}
-
-	if (symbol_conf.show_ref_callgraph &&
-	    strstr(ev_name, "call-graph=no"))
-		enable_ref = true;
-
-	if (!is_report_browser(hbt))
-		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
-
-	nr_samples = convert_unit(nr_samples, &unit);
-	printed = scnprintf(bf, size,
-			   "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
-			   nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
-			   ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
-
-
-	if (hists->uid_filter_str)
-		printed += snprintf(bf + printed, size - printed,
-				    ", UID: %s", hists->uid_filter_str);
-	if (thread) {
-		if (hists__has(hists, thread)) {
-			printed += scnprintf(bf + printed, size - printed,
-				    ", Thread: %s(%d)",
-				     (thread->comm_set ? thread__comm_str(thread) : ""),
-				    thread->tid);
-		} else {
-			printed += scnprintf(bf + printed, size - printed,
-				    ", Thread: %s",
-				     (thread->comm_set ? thread__comm_str(thread) : ""));
-		}
-	}
-	if (dso)
-		printed += scnprintf(bf + printed, size - printed,
-				    ", DSO: %s", dso->short_name);
-	if (socket_id > -1)
-		printed += scnprintf(bf + printed, size - printed,
-				    ", Processor Socket: %d", socket_id);
 	if (!is_report_browser(hbt)) {
 		struct perf_top *top = hbt->arg;
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 3a428d7c59b9..fbad8dfbb186 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
 #include "config.h"
 #include "cache.h"
 #include "symbol.h"
+#include "units.h"
 #include "debug.h"
 #include "annotate.h"
 #include "evsel.h"
@@ -2324,7 +2325,7 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
 	struct dso *dso = map->dso;
 	struct rb_root source_line = RB_ROOT;
 	struct annotation_options opts = annotation__default_options;
-	const char *ev_name = perf_evsel__name(evsel);
+	struct annotation *notes = symbol__annotation(sym);
 	char buf[1024];
 
 	if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0)
@@ -2336,12 +2337,8 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map,
 		print_summary(&source_line, dso->long_name);
 	}
 
-	if (perf_evsel__is_group_event(evsel)) {
-		perf_evsel__group_desc(evsel, buf, sizeof(buf));
-		ev_name = buf;
-	}
-
-	fprintf(stdout, "%s() %s\nEvent: %s\n\n", sym->name, dso->long_name, ev_name);
+	annotation__scnprintf_samples_period(notes, buf, sizeof(buf), evsel);
+	fprintf(stdout, "%s\n%s() %s\n", buf, sym->name, dso->long_name);
 	symbol__annotate_fprintf2(sym, stdout);
 
 	annotated_source__purge(symbol__annotation(sym)->src);
@@ -2597,6 +2594,46 @@ out_free_offsets:
 	return -1;
 }
 
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+					   char *bf, size_t size,
+					   struct perf_evsel *evsel,
+					   bool show_freq)
+{
+	const char *ev_name = perf_evsel__name(evsel);
+	char buf[1024], ref[30] = " show reference callgraph, ";
+	char sample_freq_str[64] = "";
+	unsigned long nr_samples = 0;
+	int nr_members = 1;
+	bool enable_ref = false;
+	u64 nr_events = 0;
+	char unit;
+	int i;
+
+	if (perf_evsel__is_group_event(evsel)) {
+		perf_evsel__group_desc(evsel, buf, sizeof(buf));
+		ev_name = buf;
+                nr_members = evsel->nr_members;
+	}
+
+	for (i = 0; i < nr_members; i++) {
+		struct sym_hist *ah = annotation__histogram(notes, evsel->idx + i);
+
+		nr_samples += ah->nr_samples;
+		nr_events  += ah->period;
+	}
+
+	if (symbol_conf.show_ref_callgraph && strstr(ev_name, "call-graph=no"))
+		enable_ref = true;
+
+	if (show_freq)
+		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
+	nr_samples = convert_unit(nr_samples, &unit);
+	return scnprintf(bf, size, "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+			 nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+			 ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+}
+
 #define ANNOTATION__CFG(n) \
 	{ .name = #n, .value = &annotation__default_options.n, }
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index ff7e3df31efa..db8d09bea07e 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -151,6 +151,18 @@ double annotation_line__max_percent(struct annotation_line *al, struct annotatio
 void annotation_line__write(struct annotation_line *al, struct annotation *notes,
 			    struct annotation_write_ops *ops);
 
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+					   char *bf, size_t size,
+					   struct perf_evsel *evsel,
+					   bool show_freq);
+
+static inline int annotation__scnprintf_samples_period(struct annotation *notes,
+						       char *bf, size_t size,
+						       struct perf_evsel *evsel)
+{
+	return __annotation__scnprintf_samples_period(notes, bf, size, evsel, true);
+}
+
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index fb357a00dd86..857de69a5361 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -302,13 +302,27 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
 	return 0;
 }
 
+static bool filter_cpu(struct perf_session *session, int cpu)
+{
+	unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+	return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+}
+
 static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
 				       struct perf_session *session,
 				       unsigned int idx,
 				       struct auxtrace_buffer *buffer,
 				       struct auxtrace_buffer **buffer_ptr)
 {
-	int err;
+	int err = -ENOMEM;
+
+	if (filter_cpu(session, buffer->cpu))
+		return 0;
+
+	buffer = memdup(buffer, sizeof(*buffer));
+	if (!buffer)
+		return -ENOMEM;
 
 	if (session->one_mmap) {
 		buffer->data = buffer->data_offset - session->one_mmap_offset +
@@ -316,31 +330,28 @@ static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
 	} else if (perf_data__is_pipe(session->data)) {
 		buffer->data = auxtrace_copy_data(buffer->size, session);
 		if (!buffer->data)
-			return -ENOMEM;
+			goto out_free;
 		buffer->data_needs_freeing = true;
 	} else if (BITS_PER_LONG == 32 &&
 		   buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
 		err = auxtrace_queues__split_buffer(queues, idx, buffer);
 		if (err)
-			return err;
+			goto out_free;
 	}
 
 	err = auxtrace_queues__queue_buffer(queues, idx, buffer);
 	if (err)
-		return err;
+		goto out_free;
 
 	/* FIXME: Doesn't work for split buffer */
 	if (buffer_ptr)
 		*buffer_ptr = buffer;
 
 	return 0;
-}
 
-static bool filter_cpu(struct perf_session *session, int cpu)
-{
-	unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
-
-	return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+out_free:
+	auxtrace_buffer__free(buffer);
+	return err;
 }
 
 int auxtrace_queues__add_event(struct auxtrace_queues *queues,
@@ -348,36 +359,19 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 			       union perf_event *event, off_t data_offset,
 			       struct auxtrace_buffer **buffer_ptr)
 {
-	struct auxtrace_buffer *buffer;
-	unsigned int idx;
-	int err;
-
-	if (filter_cpu(session, event->auxtrace.cpu))
-		return 0;
-
-	buffer = zalloc(sizeof(struct auxtrace_buffer));
-	if (!buffer)
-		return -ENOMEM;
-
-	buffer->pid = -1;
-	buffer->tid = event->auxtrace.tid;
-	buffer->cpu = event->auxtrace.cpu;
-	buffer->data_offset = data_offset;
-	buffer->offset = event->auxtrace.offset;
-	buffer->reference = event->auxtrace.reference;
-	buffer->size = event->auxtrace.size;
-	idx = event->auxtrace.idx;
-
-	err = auxtrace_queues__add_buffer(queues, session, idx, buffer,
-					  buffer_ptr);
-	if (err)
-		goto out_err;
-
-	return 0;
+	struct auxtrace_buffer buffer = {
+		.pid = -1,
+		.tid = event->auxtrace.tid,
+		.cpu = event->auxtrace.cpu,
+		.data_offset = data_offset,
+		.offset = event->auxtrace.offset,
+		.reference = event->auxtrace.reference,
+		.size = event->auxtrace.size,
+	};
+	unsigned int idx = event->auxtrace.idx;
 
-out_err:
-	auxtrace_buffer__free(buffer);
-	return err;
+	return auxtrace_queues__add_buffer(queues, session, idx, &buffer,
+					   buffer_ptr);
 }
 
 static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp
index a4014d786676..7b042a5ebc68 100644
--- a/tools/perf/util/c++/clang-test.cpp
+++ b/tools/perf/util/c++/clang-test.cpp
@@ -41,7 +41,7 @@ int test__clang_to_IR(void)
 	if (!M)
 		return -1;
 	for (llvm::Function& F : *M)
-		if (F.getName() == "bpf_func__SyS_epoll_wait")
+		if (F.getName() == "bpf_func__SyS_epoll_pwait")
 			return 0;
 	return -1;
 }
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index 1bfc946e37dc..bf31ceab33bd 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -9,6 +9,7 @@
  * Copyright (C) 2016 Huawei Inc.
  */
 
+#include "clang/Basic/Version.h"
 #include "clang/CodeGen/CodeGenAction.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -58,7 +59,8 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path,
 
 	FrontendOptions& Opts = CI->getFrontendOpts();
 	Opts.Inputs.clear();
-	Opts.Inputs.emplace_back(Path, IK_C);
+	Opts.Inputs.emplace_back(Path,
+			FrontendOptions::getInputKindForExtension("c"));
 	return CI;
 }
 
@@ -71,10 +73,17 @@ getModuleFromSource(llvm::opt::ArgStringList CFlags,
 
 	Clang.setVirtualFileSystem(&*VFS);
 
+#if CLANG_VERSION_MAJOR < 4
 	IntrusiveRefCntPtr<CompilerInvocation> CI =
 		createCompilerInvocation(std::move(CFlags), Path,
 					 Clang.getDiagnostics());
 	Clang.setInvocation(&*CI);
+#else
+	std::shared_ptr<CompilerInvocation> CI(
+		createCompilerInvocation(std::move(CFlags), Path,
+					 Clang.getDiagnostics()));
+	Clang.setInvocation(CI);
+#endif
 
 	std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx));
 	if (!Clang.ExecuteAction(*Act))
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index f5acda13dcfa..7eb7de5aee44 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -979,7 +979,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
 	return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
 }
 
-#ifdef HAVE_DWARF_GETLOCATIONS
+#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
 /**
  * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
  * @sp_die: a subprogram DIE
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 7d968892ee39..4d602fba40b2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -6,6 +6,7 @@
 #include "session.h"
 #include "namespaces.h"
 #include "sort.h"
+#include "units.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "annotate.h"
@@ -14,6 +15,7 @@
 #include "ui/progress.h"
 #include <errno.h>
 #include <math.h>
+#include <inttypes.h>
 #include <sys/param.h>
 
 static bool hists__filter_entry_by_dso(struct hists *hists,
@@ -2454,6 +2456,85 @@ u64 hists__total_period(struct hists *hists)
 		hists->stats.total_period;
 }
 
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
+{
+	char unit;
+	int printed;
+	const struct dso *dso = hists->dso_filter;
+	const struct thread *thread = hists->thread_filter;
+	int socket_id = hists->socket_filter;
+	unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+	u64 nr_events = hists->stats.total_period;
+	struct perf_evsel *evsel = hists_to_evsel(hists);
+	const char *ev_name = perf_evsel__name(evsel);
+	char buf[512], sample_freq_str[64] = "";
+	size_t buflen = sizeof(buf);
+	char ref[30] = " show reference callgraph, ";
+	bool enable_ref = false;
+
+	if (symbol_conf.filter_relative) {
+		nr_samples = hists->stats.nr_non_filtered_samples;
+		nr_events = hists->stats.total_non_filtered_period;
+	}
+
+	if (perf_evsel__is_group_event(evsel)) {
+		struct perf_evsel *pos;
+
+		perf_evsel__group_desc(evsel, buf, buflen);
+		ev_name = buf;
+
+		for_each_group_member(pos, evsel) {
+			struct hists *pos_hists = evsel__hists(pos);
+
+			if (symbol_conf.filter_relative) {
+				nr_samples += pos_hists->stats.nr_non_filtered_samples;
+				nr_events += pos_hists->stats.total_non_filtered_period;
+			} else {
+				nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+				nr_events += pos_hists->stats.total_period;
+			}
+		}
+	}
+
+	if (symbol_conf.show_ref_callgraph &&
+	    strstr(ev_name, "call-graph=no"))
+		enable_ref = true;
+
+	if (show_freq)
+		scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
+	nr_samples = convert_unit(nr_samples, &unit);
+	printed = scnprintf(bf, size,
+			   "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+			   nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+			   ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+
+
+	if (hists->uid_filter_str)
+		printed += snprintf(bf + printed, size - printed,
+				    ", UID: %s", hists->uid_filter_str);
+	if (thread) {
+		if (hists__has(hists, thread)) {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s(%d)",
+				     (thread->comm_set ? thread__comm_str(thread) : ""),
+				    thread->tid);
+		} else {
+			printed += scnprintf(bf + printed, size - printed,
+				    ", Thread: %s",
+				     (thread->comm_set ? thread__comm_str(thread) : ""));
+		}
+	}
+	if (dso)
+		printed += scnprintf(bf + printed, size - printed,
+				    ", DSO: %s", dso->short_name);
+	if (socket_id > -1)
+		printed += scnprintf(bf + printed, size - printed,
+				    ", Processor Socket: %d", socket_id);
+
+	return printed;
+}
+
 int parse_filter_percentage(const struct option *opt __maybe_unused,
 			    const char *arg, int unset __maybe_unused)
 {
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index e869cad4d89f..fbabfd8a215d 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -61,6 +61,7 @@ enum hist_column {
 	HISTC_SRCLINE_TO,
 	HISTC_TRACE,
 	HISTC_SYM_SIZE,
+	HISTC_DSO_SIZE,
 	HISTC_NR_COLS, /* Last entry */
 };
 
@@ -503,5 +504,11 @@ int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
 int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
 			   struct perf_hpp_list *hpp_list);
 int hists__fprintf_headers(struct hists *hists, FILE *fp);
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq);
+
+static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t size)
+{
+	return __hists__scnprintf_title(hists, bf, size, true);
+}
 
 #endif	/* __PERF_HIST_H */
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index edeb7291c8e1..0e9bbe01b0ab 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -103,6 +103,10 @@ static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
 	return ip;
 }
 
+static inline size_t map__size(const struct map *map)
+{
+	return map->end - map->start;
+}
 
 /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
 u64 map__rip_2objdump(struct map *map, u64 rip);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c71ced7db152..f4a7a437ee87 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1591,7 +1591,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 		drop_rate = (double)stats->total_lost_samples /
 			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
 		if (drop_rate > 0.05) {
-			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n",
+			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
 				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
 				    drop_rate * 100.0);
 		}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e8514f651865..26a68dfd8a4f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1545,6 +1545,46 @@ struct sort_entry sort_sym_size = {
 	.se_width_idx	= HISTC_SYM_SIZE,
 };
 
+/* --sort dso_size */
+
+static int64_t _sort__dso_size_cmp(struct map *map_l, struct map *map_r)
+{
+	int64_t size_l = map_l != NULL ? map__size(map_l) : 0;
+	int64_t size_r = map_r != NULL ? map__size(map_r) : 0;
+
+	return size_l < size_r ? -1 :
+		size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return _sort__dso_size_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
+					  size_t bf_size, unsigned int width)
+{
+	if (map && map->dso)
+		return repsep_snprintf(bf, bf_size, "%*d", width,
+				       map__size(map));
+
+	return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf,
+					 size_t size, unsigned int width)
+{
+	return _hist_entry__dso_size_snprintf(he->ms.map, bf, size, width);
+}
+
+struct sort_entry sort_dso_size = {
+	.se_header	= "DSO size",
+	.se_cmp		= sort__dso_size_cmp,
+	.se_snprintf	= hist_entry__dso_size_snprintf,
+	.se_width_idx	= HISTC_DSO_SIZE,
+};
+
 
 struct sort_dimension {
 	const char		*name;
@@ -1569,6 +1609,7 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_TRANSACTION, "transaction", sort_transaction),
 	DIM(SORT_TRACE, "trace", sort_trace),
 	DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+	DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
 	DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
 };
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index f5901c10a563..035b62e2c60b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -220,6 +220,7 @@ enum sort_type {
 	SORT_TRANSACTION,
 	SORT_TRACE,
 	SORT_SYM_SIZE,
+	SORT_DSO_SIZE,
 	SORT_CGROUP_ID,
 
 	/* branch stack specific sort keys */
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9496365da3d7..c9626c206208 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -11,8 +11,7 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <linux/compiler.h>
-#include <linux/types.h>
-#include "namespaces.h"
+#include <sys/types.h>
 
 /* General helper functions */
 void usage(const char *err) __noreturn;
@@ -26,6 +25,7 @@ static inline void *zalloc(size_t size)
 #define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
 
 struct dirent;
+struct nsinfo;
 struct strlist;
 
 int mkdir_p(char *path, mode_t mode);
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
new file mode 100755
index 000000000000..b28feea7c363
--- /dev/null
+++ b/tools/testing/ktest/config-bisect.pl
@@ -0,0 +1,770 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2015 - Steven Rostedt, Red Hat Inc.
+# Copyright 2017 - Steven Rostedt, VMware, Inc.
+#
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+# usage:
+#  config-bisect.pl [options] good-config bad-config [good|bad]
+#
+
+# Compares a good config to a bad config, then takes half of the diffs
+# and produces a config that is somewhere between the good config and
+# the bad config. That is, the resulting config will start with the
+# good config and will try to make half of the differences of between
+# the good and bad configs match the bad config. It tries because of
+# dependencies between the two configs it may not be able to change
+# exactly half of the configs that are different between the two config
+# files.
+
+# Here's a normal way to use it:
+#
+#  $ cd /path/to/linux/kernel
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config
+
+# This will now pull in good config (blowing away .config in that directory
+# so do not make that be one of the good or bad configs), and then
+# build the config with "make oldconfig" to make sure it matches the
+# current kernel. It will then store the configs in that result for
+# the good config. It does the same for the bad config as well.
+# The algorithm will run, merging half of the differences between
+# the two configs and building them with "make oldconfig" to make sure
+# the result changes (dependencies may reset changes the tool had made).
+# It then copies the result of its good config to /path/to/good/config.tmp
+# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the
+# files passed in). And the ".config" that you should test will be in
+# directory
+
+# After the first run, determine if the result is good or bad then
+# run the same command appending the result
+
+# For good results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config good
+
+# For bad results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config bad
+
+# Do not change the good-config or bad-config, config-bisect.pl will
+# copy the good-config to a temp file with the same name as good-config
+# but with a ".tmp" after it. It will do the same with the bad-config.
+
+# If "good" or "bad" is not stated at the end, it will copy the good and
+# bad configs to the .tmp versions. If a .tmp version already exists, it will
+# warn before writing over them (-r will not warn, and just write over them).
+# If the last config is labeled "good", then it will copy it to the good .tmp
+# version. If the last config is labeled "bad", it will copy it to the bad
+# .tmp version. It will continue this until it can not merge the two any more
+# without the result being equal to either the good or bad .tmp configs.
+
+my $start = 0;
+my $val = "";
+
+my $pwd = `pwd`;
+chomp $pwd;
+my $tree = $pwd;
+my $build;
+
+my $output_config;
+my $reset_bisect;
+
+sub usage {
+    print << "EOF"
+
+usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad]
+  -l [optional] define location of linux-tree (default is current directory)
+  -b [optional] define location to build (O=build-dir) (default is linux-tree)
+  good-config the config that is considered good
+  bad-config the config that does not work
+  "good" add this if the last run produced a good config
+  "bad" add this if the last run produced a bad config
+  If "good" or "bad" is not specified, then it is the start of a new bisect
+
+  Note, each run will create copy of good and bad configs with ".tmp" appended.
+
+EOF
+;
+
+    exit(-1);
+}
+
+sub doprint {
+    print @_;
+}
+
+sub dodie {
+    doprint "CRITICAL FAILURE... ", @_, "\n";
+
+    die @_, "\n";
+}
+
+sub expand_path {
+    my ($file) = @_;
+
+    if ($file =~ m,^/,) {
+	return $file;
+    }
+    return "$pwd/$file";
+}
+
+sub read_prompt {
+    my ($cancel, $prompt) = @_;
+
+    my $ans;
+
+    for (;;) {
+	if ($cancel) {
+	    print "$prompt [y/n/C] ";
+	} else {
+	    print "$prompt [y/N] ";
+	}
+	$ans = <STDIN>;
+	chomp $ans;
+	if ($ans =~ /^\s*$/) {
+	    if ($cancel) {
+		$ans = "c";
+	    } else {
+		$ans = "n";
+	    }
+	}
+	last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+	if ($cancel) {
+	    last if ($ans =~ /^c$/i);
+	    print "Please answer either 'y', 'n' or 'c'.\n";
+	} else {
+	    print "Please answer either 'y' or 'n'.\n";
+	}
+    }
+    if ($ans =~ /^c/i) {
+	exit;
+    }
+    if ($ans !~ /^y$/i) {
+	return 0;
+    }
+    return 1;
+}
+
+sub read_yn {
+    my ($prompt) = @_;
+
+    return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+    my ($prompt) = @_;
+
+    return read_prompt 1, $prompt;
+}
+
+sub run_command {
+    my ($command, $redirect) = @_;
+    my $start_time;
+    my $end_time;
+    my $dord = 0;
+    my $pid;
+
+    $start_time = time;
+
+    doprint("$command ... ");
+
+    $pid = open(CMD, "$command 2>&1 |") or
+	dodie "unable to exec $command";
+
+    if (defined($redirect)) {
+	open (RD, ">$redirect") or
+	    dodie "failed to write to redirect $redirect";
+	$dord = 1;
+    }
+
+    while (<CMD>) {
+	print RD  if ($dord);
+    }
+
+    waitpid($pid, 0);
+    my $failed = $?;
+
+    close(CMD);
+    close(RD)  if ($dord);
+
+    $end_time = time;
+    my $delta = $end_time - $start_time;
+
+    if ($delta == 1) {
+	doprint "[1 second] ";
+    } else {
+	doprint "[$delta seconds] ";
+    }
+
+    if ($failed) {
+	doprint "FAILED!\n";
+    } else {
+	doprint "SUCCESS\n";
+    }
+
+    return !$failed;
+}
+
+###### CONFIG BISECT ######
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+my $make;
+
+sub make_oldconfig {
+
+    if (!run_command "$make olddefconfig") {
+	# Perhaps olddefconfig doesn't exist in this version of the kernel
+	# try oldnoconfig
+	doprint "olddefconfig failed, trying make oldnoconfig\n";
+	if (!run_command "$make oldnoconfig") {
+	    doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+	    # try a yes '' | oldconfig
+	    run_command "yes '' | $make oldconfig" or
+		dodie "failed make config oldconfig";
+	}
+    }
+}
+
+sub assign_configs {
+    my ($hash, $config) = @_;
+
+    doprint "Reading configs from $config\n";
+
+    open (IN, $config)
+	or dodie "Failed to read $config";
+
+    while (<IN>) {
+	chomp;
+	if (/^((CONFIG\S*)=.*)/) {
+	    ${$hash}{$2} = $1;
+	} elsif (/^(# (CONFIG\S*) is not set)/) {
+	    ${$hash}{$2} = $1;
+	}
+    }
+
+    close(IN);
+}
+
+sub process_config_ignore {
+    my ($config) = @_;
+
+    assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+    my ($config) = @_;
+
+    my $arr = $dependency{$config};
+    if (!defined($arr)) {
+	return ();
+    }
+
+    my @deps = @{$arr};
+
+    foreach my $dep (@{$arr}) {
+	print "ADD DEP $dep\n";
+	@deps = (@deps, get_dependencies $dep);
+    }
+
+    return @deps;
+}
+
+sub save_config {
+    my ($pc, $file) = @_;
+
+    my %configs = %{$pc};
+
+    doprint "Saving configs into $file\n";
+
+    open(OUT, ">$file") or dodie "Can not write to $file";
+
+    foreach my $config (keys %configs) {
+	print OUT "$configs{$config}\n";
+    }
+    close(OUT);
+}
+
+sub create_config {
+    my ($name, $pc) = @_;
+
+    doprint "Creating old config from $name configs\n";
+
+    save_config $pc, $output_config;
+
+    make_oldconfig;
+}
+
+# compare two config hashes, and return configs with different vals.
+# It returns B's config values, but you can use A to see what A was.
+sub diff_config_vals {
+    my ($pa, $pb) = @_;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    my %ret;
+
+    foreach my $item (keys %a) {
+	if (defined($b{$item}) && $b{$item} ne $a{$item}) {
+	    $ret{$item} = $b{$item};
+	}
+    }
+
+    return %ret;
+}
+
+# compare two config hashes and return the configs in B but not A
+sub diff_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+	if (!defined($a{$item})) {
+	    $ret{$item} = $b{$item};
+	}
+    }
+
+    return %ret;
+}
+
+# return if two configs are equal or not
+# 0 is equal +1 b has something a does not
+# +1 if a and b have a different item.
+# -1 if a has something b does not
+sub compare_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+	if (!defined($a{$item})) {
+	    return 1;
+	}
+	if ($a{$item} ne $b{$item}) {
+	    return 1;
+	}
+    }
+
+    foreach my $item (keys %a) {
+	if (!defined($b{$item})) {
+	    return -1;
+	}
+    }
+
+    return 0;
+}
+
+sub process_failed {
+    my ($config) = @_;
+
+    doprint "\n\n***************************************\n";
+    doprint "Found bad config: $config\n";
+    doprint "***************************************\n\n";
+}
+
+sub process_new_config {
+    my ($tc, $nc, $gc, $bc) = @_;
+
+    my %tmp_config = %{$tc};
+    my %good_configs = %{$gc};
+    my %bad_configs = %{$bc};
+
+    my %new_configs;
+
+    my $runtest = 1;
+    my $ret;
+
+    create_config "tmp_configs", \%tmp_config;
+    assign_configs \%new_configs, $output_config;
+
+    $ret = compare_configs \%new_configs, \%bad_configs;
+    if (!$ret) {
+	doprint "New config equals bad config, try next test\n";
+	$runtest = 0;
+    }
+
+    if ($runtest) {
+	$ret = compare_configs \%new_configs, \%good_configs;
+	if (!$ret) {
+	    doprint "New config equals good config, try next test\n";
+	    $runtest = 0;
+	}
+    }
+
+    %{$nc} = %new_configs;
+
+    return $runtest;
+}
+
+sub convert_config {
+    my ($config) = @_;
+
+    if ($config =~ /^# (.*) is not set/) {
+	$config = "$1=n";
+    }
+
+    $config =~ s/^CONFIG_//;
+    return $config;
+}
+
+sub print_config {
+    my ($sym, $config) = @_;
+
+    $config = convert_config $config;
+    doprint "$sym$config\n";
+}
+
+sub print_config_compare {
+    my ($good_config, $bad_config) = @_;
+
+    $good_config = convert_config $good_config;
+    $bad_config = convert_config $bad_config;
+
+    my $good_value = $good_config;
+    my $bad_value = $bad_config;
+    $good_value =~ s/(.*)=//;
+    my $config = $1;
+
+    $bad_value =~ s/.*=//;
+
+    doprint " $config $good_value -> $bad_value\n";
+}
+
+# Pass in:
+# $phalf: half of the configs names you want to add
+# $oconfigs: The orginial configs to start with
+# $sconfigs: The source to update $oconfigs with (from $phalf)
+# $which: The name of which half that is updating (top / bottom)
+# $type: The name of the source type (good / bad)
+sub make_half {
+    my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_;
+
+    my @half = @{$phalf};
+    my %orig_configs = %{$oconfigs};
+    my %source_configs = %{$sconfigs};
+
+    my %tmp_config = %orig_configs;
+
+    doprint "Settings bisect with $which half of $type configs:\n";
+    foreach my $item (@half) {
+	doprint "Updating $item to $source_configs{$item}\n";
+	$tmp_config{$item} = $source_configs{$item};
+    }
+
+    return %tmp_config;
+}
+
+sub run_config_bisect {
+    my ($pgood, $pbad) = @_;
+
+    my %good_configs = %{$pgood};
+    my %bad_configs = %{$pbad};
+
+    my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
+    my %b_configs = diff_configs \%good_configs, \%bad_configs;
+    my %g_configs = diff_configs \%bad_configs, \%good_configs;
+
+    # diff_arr is what is in both good and bad but are different (y->n)
+    my @diff_arr = keys %diff_configs;
+    my $len_diff = $#diff_arr + 1;
+
+    # b_arr is what is in bad but not in good (has depends)
+    my @b_arr = keys %b_configs;
+    my $len_b = $#b_arr + 1;
+
+    # g_arr is what is in good but not in bad
+    my @g_arr = keys %g_configs;
+    my $len_g = $#g_arr + 1;
+
+    my $runtest = 0;
+    my %new_configs;
+    my $ret;
+
+    # Look at the configs that are different between good and bad.
+    # This does not include those that depend on other configs
+    #  (configs depending on other configs that are not set would
+    #   not show up even as a "# CONFIG_FOO is not set"
+
+
+    doprint "# of configs to check:             $len_diff\n";
+    doprint "# of configs showing only in good: $len_g\n";
+    doprint "# of configs showing only in bad:  $len_b\n";
+
+    if ($len_diff > 0) {
+	# Now test for different values
+
+	doprint "Configs left to check:\n";
+	doprint "  Good Config\t\t\tBad Config\n";
+	doprint "  -----------\t\t\t----------\n";
+	foreach my $item (@diff_arr) {
+	    doprint "  $good_configs{$item}\t$bad_configs{$item}\n";
+	}
+
+	my $half = int($#diff_arr / 2);
+	my @tophalf = @diff_arr[0 .. $half];
+
+	doprint "Set tmp config to be good config with some bad config values\n";
+
+	my %tmp_config = make_half \@tophalf, \%good_configs,
+	    \%bad_configs, "top", "bad";
+
+	$runtest = process_new_config \%tmp_config, \%new_configs,
+			    \%good_configs, \%bad_configs;
+
+	if (!$runtest) {
+	    doprint "Set tmp config to be bad config with some good config values\n";
+
+	    my %tmp_config = make_half \@tophalf, \%bad_configs,
+		\%good_configs, "top", "good";
+
+	    $runtest = process_new_config \%tmp_config, \%new_configs,
+		\%good_configs, \%bad_configs;
+	}
+    }
+
+    if (!$runtest && $len_diff > 0) {
+	# do the same thing, but this time with bottom half
+
+	my $half = int($#diff_arr / 2);
+	my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
+
+	doprint "Set tmp config to be good config with some bad config values\n";
+
+	my %tmp_config = make_half \@bottomhalf, \%good_configs,
+	    \%bad_configs, "bottom", "bad";
+
+	$runtest = process_new_config \%tmp_config, \%new_configs,
+			    \%good_configs, \%bad_configs;
+
+	if (!$runtest) {
+	    doprint "Set tmp config to be bad config with some good config values\n";
+
+	    my %tmp_config = make_half \@bottomhalf, \%bad_configs,
+		\%good_configs, "bottom", "good";
+
+	    $runtest = process_new_config \%tmp_config, \%new_configs,
+		\%good_configs, \%bad_configs;
+	}
+    }
+
+    if ($runtest) {
+	make_oldconfig;
+	doprint "READY TO TEST .config IN $build\n";
+	return 0;
+    }
+
+    doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+    doprint "Hmm, can't make any more changes without making good == bad?\n";
+    doprint "Difference between good (+) and bad (-)\n";
+
+    foreach my $item (keys %bad_configs) {
+	if (!defined($good_configs{$item})) {
+	    print_config "-", $bad_configs{$item};
+	}
+    }
+
+    foreach my $item (keys %good_configs) {
+	next if (!defined($bad_configs{$item}));
+	if ($good_configs{$item} ne $bad_configs{$item}) {
+	    print_config_compare $good_configs{$item}, $bad_configs{$item};
+	}
+    }
+
+    foreach my $item (keys %good_configs) {
+	if (!defined($bad_configs{$item})) {
+	    print_config "+", $good_configs{$item};
+	}
+    }
+    return -1;
+}
+
+sub config_bisect {
+    my ($good_config, $bad_config) = @_;
+    my $ret;
+
+    my %good_configs;
+    my %bad_configs;
+    my %tmp_configs;
+
+    doprint "Run good configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $good_config;
+    create_config "$good_config", \%tmp_configs;
+    assign_configs \%good_configs, $output_config;
+
+    doprint "Run bad configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $bad_config;
+    create_config "$bad_config", \%tmp_configs;
+    assign_configs \%bad_configs, $output_config;
+
+    save_config \%good_configs, $good_config;
+    save_config \%bad_configs, $bad_config;
+
+    return run_config_bisect \%good_configs, \%bad_configs;
+}
+
+while ($#ARGV >= 0) {
+    if ($ARGV[0] !~ m/^-/) {
+	last;
+    }
+    my $opt = shift @ARGV;
+
+    if ($opt eq "-b") {
+	$val = shift @ARGV;
+	if (!defined($val)) {
+	    die "-b requires value\n";
+	}
+	$build = $val;
+    }
+
+    elsif ($opt eq "-l") {
+	$val = shift @ARGV;
+	if (!defined($val)) {
+	    die "-l requires value\n";
+	}
+	$tree = $val;
+    }
+
+    elsif ($opt eq "-r") {
+	$reset_bisect = 1;
+    }
+
+    elsif ($opt eq "-h") {
+	usage;
+    }
+
+    else {
+	die "Unknow option $opt\n";
+    }
+}
+
+$build = $tree if (!defined($build));
+
+$tree = expand_path $tree;
+$build = expand_path $build;
+
+if ( ! -d $tree ) {
+    die "$tree not a directory\n";
+}
+
+if ( ! -d $build ) {
+    die "$build not a directory\n";
+}
+
+usage if $#ARGV < 1;
+
+if ($#ARGV == 1) {
+    $start = 1;
+} elsif ($#ARGV == 2) {
+    $val = $ARGV[2];
+    if ($val ne "good" && $val ne "bad") {
+	die "Unknown command '$val', bust be either \"good\" or \"bad\"\n";
+    }
+} else {
+    usage;
+}
+
+my $good_start = expand_path $ARGV[0];
+my $bad_start = expand_path $ARGV[1];
+
+my $good = "$good_start.tmp";
+my $bad = "$bad_start.tmp";
+
+$make = "make";
+
+if ($build ne $tree) {
+    $make = "make O=$build"
+}
+
+$output_config = "$build/.config";
+
+if ($start) {
+    if ( ! -f $good_start ) {
+	die "$good_start not found\n";
+    }
+    if ( ! -f $bad_start ) {
+	die "$bad_start not found\n";
+    }
+    if ( -f $good || -f $bad ) {
+	my $p = "";
+
+	if ( -f $good ) {
+	    $p = "$good exists\n";
+	}
+
+	if ( -f $bad ) {
+	    $p = "$p$bad exists\n";
+	}
+
+	if (!defined($reset_bisect)) {
+	    if (!read_yn "${p}Overwrite and start new bisect anyway?") {
+		exit (-1);
+	    }
+	}
+    }
+    run_command "cp $good_start $good" or die "failed to copy to $good\n";
+    run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+} else {
+    if ( ! -f $good ) {
+	die "Can not find file $good\n";
+    }
+    if ( ! -f $bad ) {
+	die "Can not find file $bad\n";
+    }
+    if ($val eq "good") {
+	run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+    } elsif ($val eq "bad") {
+	run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+    }
+}
+
+chdir $tree || die "can't change directory to $tree";
+
+my $ret = config_bisect $good, $bad;
+
+if (!$ret) {
+    exit(0);
+}
+
+if ($ret > 0) {
+    doprint "Cleaning temp files\n";
+    run_command "rm $good";
+    run_command "rm $bad";
+    exit(1);
+} else {
+    doprint "See good and bad configs for details:\n";
+    doprint "good: $good\n";
+    doprint "bad:  $bad\n";
+    doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+}
+exit(2);
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8809f244bb7c..87af8a68ab25 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -10,6 +10,7 @@ use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
 use File::Path qw(mkpath);
 use File::Copy qw(cp);
 use FileHandle;
+use FindBin;
 
 my $VERSION = "0.2";
 
@@ -22,6 +23,11 @@ my %evals;
 
 #default opts
 my %default = (
+    "MAILER"			=> "sendmail",  # default mailer
+    "EMAIL_ON_ERROR"		=> 1,
+    "EMAIL_WHEN_FINISHED"	=> 1,
+    "EMAIL_WHEN_CANCELED"	=> 0,
+    "EMAIL_WHEN_STARTED"	=> 0,
     "NUM_TESTS"			=> 1,
     "TEST_TYPE"			=> "build",
     "BUILD_TYPE"		=> "randconfig",
@@ -59,6 +65,7 @@ my %default = (
     "GRUB_REBOOT"		=> "grub2-reboot",
     "SYSLINUX"			=> "extlinux",
     "SYSLINUX_PATH"		=> "/boot/extlinux",
+    "CONNECT_TIMEOUT"		=> 25,
 
 # required, and we will ask users if they don't have them but we keep the default
 # value something that is common.
@@ -163,6 +170,8 @@ my $store_failures;
 my $store_successes;
 my $test_name;
 my $timeout;
+my $connect_timeout;
+my $config_bisect_exec;
 my $booted_timeout;
 my $detect_triplefault;
 my $console;
@@ -204,6 +213,20 @@ my $install_time;
 my $reboot_time;
 my $test_time;
 
+my $pwd;
+my $dirname = $FindBin::Bin;
+
+my $mailto;
+my $mailer;
+my $mail_path;
+my $mail_command;
+my $email_on_error;
+my $email_when_finished;
+my $email_when_started;
+my $email_when_canceled;
+
+my $script_start_time = localtime();
+
 # set when a test is something other that just building or install
 # which would require more options.
 my $buildonly = 1;
@@ -229,6 +252,14 @@ my $no_reboot = 1;
 my $reboot_success = 0;
 
 my %option_map = (
+    "MAILTO"			=> \$mailto,
+    "MAILER"			=> \$mailer,
+    "MAIL_PATH"			=> \$mail_path,
+    "MAIL_COMMAND"		=> \$mail_command,
+    "EMAIL_ON_ERROR"		=> \$email_on_error,
+    "EMAIL_WHEN_FINISHED"	=> \$email_when_finished,
+    "EMAIL_WHEN_STARTED"	=> \$email_when_started,
+    "EMAIL_WHEN_CANCELED"	=> \$email_when_canceled,
     "MACHINE"			=> \$machine,
     "SSH_USER"			=> \$ssh_user,
     "TMP_DIR"			=> \$tmpdir,
@@ -296,6 +327,8 @@ my %option_map = (
     "STORE_SUCCESSES"		=> \$store_successes,
     "TEST_NAME"			=> \$test_name,
     "TIMEOUT"			=> \$timeout,
+    "CONNECT_TIMEOUT"		=> \$connect_timeout,
+    "CONFIG_BISECT_EXEC"	=> \$config_bisect_exec,
     "BOOTED_TIMEOUT"		=> \$booted_timeout,
     "CONSOLE"			=> \$console,
     "CLOSE_CONSOLE_SIGNAL"	=> \$close_console_signal,
@@ -337,6 +370,7 @@ my %used_options;
 
 # default variables that can be used
 chomp ($variable{"PWD"} = `pwd`);
+$pwd = $variable{"PWD"};
 
 $config_help{"MACHINE"} = << "EOF"
  The machine hostname that you will test.
@@ -718,22 +752,14 @@ sub set_value {
 
     my $prvalue = process_variables($rvalue);
 
-    if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") {
+    if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
+	$prvalue !~ /^(config_|)bisect$/ &&
+	$prvalue !~ /^build$/ &&
+	$buildonly) {
+
 	# Note if a test is something other than build, then we
 	# will need other mandatory options.
 	if ($prvalue ne "install") {
-	    # for bisect, we need to check BISECT_TYPE
-	    if ($prvalue ne "bisect") {
-		$buildonly = 0;
-	    }
-	} else {
-	    # install still limits some mandatory options.
-	    $buildonly = 2;
-	}
-    }
-
-    if ($buildonly && $lvalue =~ /^BISECT_TYPE(\[.*\])?$/ && $prvalue ne "build") {
-	if ($prvalue ne "install") {
 	    $buildonly = 0;
 	} else {
 	    # install still limits some mandatory options.
@@ -1140,7 +1166,8 @@ sub __read_config {
 sub get_test_case {
 	print "What test case would you like to run?\n";
 	print " (build, install or boot)\n";
-	print " Other tests are available but require editing the config file\n";
+	print " Other tests are available but require editing ktest.conf\n";
+	print " (see tools/testing/ktest/sample.conf)\n";
 	my $ans = <STDIN>;
 	chomp $ans;
 	$default{"TEST_TYPE"} = $ans;
@@ -1328,8 +1355,8 @@ sub reboot {
     my ($time) = @_;
     my $powercycle = 0;
 
-    # test if the machine can be connected to within 5 seconds
-    my $stat = run_ssh("echo check machine status", 5);
+    # test if the machine can be connected to within a few seconds
+    my $stat = run_ssh("echo check machine status", $connect_timeout);
     if (!$stat) {
 	doprint("power cycle\n");
 	$powercycle = 1;
@@ -1404,10 +1431,18 @@ sub do_not_reboot {
 
     return $test_type eq "build" || $no_reboot ||
 	($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
-	($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
+	($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") ||
+	($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
 }
 
+my $in_die = 0;
+
 sub dodie {
+
+    # avoid recusion
+    return if ($in_die);
+    $in_die = 1;
+
     doprint "CRITICAL FAILURE... ", @_, "\n";
 
     my $i = $iteration;
@@ -1426,6 +1461,11 @@ sub dodie {
 	print " See $opt{LOG_FILE} for more info.\n";
     }
 
+    if ($email_on_error) {
+        send_email("KTEST: critical failure for your [$test_type] test",
+                "Your test started at $script_start_time has failed with:\n@_\n");
+    }
+
     if ($monitor_cnt) {
 	    # restore terminal settings
 	    system("stty $stty_orig");
@@ -1477,7 +1517,7 @@ sub exec_console {
     close($pts);
 
     exec $console or
-	die "Can't open console $console";
+	dodie "Can't open console $console";
 }
 
 sub open_console {
@@ -1515,6 +1555,9 @@ sub close_console {
     doprint "kill child process $pid\n";
     kill $close_console_signal, $pid;
 
+    doprint "wait for child process $pid to exit\n";
+    waitpid($pid, 0);
+
     print "closing!\n";
     close($fp);
 
@@ -1625,7 +1668,7 @@ sub save_logs {
 
 	if (!-d $dir) {
 	    mkpath($dir) or
-		die "can't create $dir";
+		dodie "can't create $dir";
 	}
 
 	my %files = (
@@ -1638,7 +1681,7 @@ sub save_logs {
 	while (my ($name, $source) = each(%files)) {
 		if (-f "$source") {
 			cp "$source", "$dir/$name" or
-				die "failed to copy $source";
+				dodie "failed to copy $source";
 		}
 	}
 
@@ -1692,6 +1735,7 @@ sub run_command {
     my $end_time;
     my $dolog = 0;
     my $dord = 0;
+    my $dostdout = 0;
     my $pid;
 
     $command =~ s/\$SSH_USER/$ssh_user/g;
@@ -1710,9 +1754,15 @@ sub run_command {
     }
 
     if (defined($redirect)) {
-	open (RD, ">$redirect") or
-	    dodie "failed to write to redirect $redirect";
-	$dord = 1;
+	if ($redirect eq 1) {
+	    $dostdout = 1;
+	    # Have the output of the command on its own line
+	    doprint "\n";
+	} else {
+	    open (RD, ">$redirect") or
+		dodie "failed to write to redirect $redirect";
+	    $dord = 1;
+	}
     }
 
     my $hit_timeout = 0;
@@ -1734,6 +1784,7 @@ sub run_command {
 	}
 	print LOG $line if ($dolog);
 	print RD $line if ($dord);
+	print $line if ($dostdout);
     }
 
     waitpid($pid, 0);
@@ -1812,7 +1863,7 @@ sub get_grub2_index {
     $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g;
 
     open(IN, "$ssh_grub |")
-	or die "unable to get $grub_file";
+	or dodie "unable to get $grub_file";
 
     my $found = 0;
 
@@ -1821,13 +1872,13 @@ sub get_grub2_index {
 	    $grub_number++;
 	    $found = 1;
 	    last;
-	} elsif (/^menuentry\s/) {
+	} elsif (/^menuentry\s|^submenu\s/) {
 	    $grub_number++;
 	}
     }
     close(IN);
 
-    die "Could not find '$grub_menu' in $grub_file on $machine"
+    dodie "Could not find '$grub_menu' in $grub_file on $machine"
 	if (!$found);
     doprint "$grub_number\n";
     $last_grub_menu = $grub_menu;
@@ -1855,7 +1906,7 @@ sub get_grub_index {
     $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
 
     open(IN, "$ssh_grub |")
-	or die "unable to get menu.lst";
+	or dodie "unable to get menu.lst";
 
     my $found = 0;
 
@@ -1870,7 +1921,7 @@ sub get_grub_index {
     }
     close(IN);
 
-    die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+    dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine"
 	if (!$found);
     doprint "$grub_number\n";
     $last_grub_menu = $grub_menu;
@@ -1983,7 +2034,7 @@ sub monitor {
     my $full_line = "";
 
     open(DMESG, "> $dmesg") or
-	die "unable to write to $dmesg";
+	dodie "unable to write to $dmesg";
 
     reboot_to;
 
@@ -2862,7 +2913,7 @@ sub run_bisect {
 sub update_bisect_replay {
     my $tmp_log = "$tmpdir/ktest_bisect_log";
     run_command "git bisect log > $tmp_log" or
-	die "can't create bisect log";
+	dodie "can't create bisect log";
     return $tmp_log;
 }
 
@@ -2871,9 +2922,9 @@ sub bisect {
 
     my $result;
 
-    die "BISECT_GOOD[$i] not defined\n"	if (!defined($bisect_good));
-    die "BISECT_BAD[$i] not defined\n"	if (!defined($bisect_bad));
-    die "BISECT_TYPE[$i] not defined\n"	if (!defined($bisect_type));
+    dodie "BISECT_GOOD[$i] not defined\n"	if (!defined($bisect_good));
+    dodie "BISECT_BAD[$i] not defined\n"	if (!defined($bisect_bad));
+    dodie "BISECT_TYPE[$i] not defined\n"	if (!defined($bisect_type));
 
     my $good = $bisect_good;
     my $bad = $bisect_bad;
@@ -2936,7 +2987,7 @@ sub bisect {
 	if ($check ne "good") {
 	    doprint "TESTING BISECT BAD [$bad]\n";
 	    run_command "git checkout $bad" or
-		die "Failed to checkout $bad";
+		dodie "Failed to checkout $bad";
 
 	    $result = run_bisect $type;
 
@@ -2948,7 +2999,7 @@ sub bisect {
 	if ($check ne "bad") {
 	    doprint "TESTING BISECT GOOD [$good]\n";
 	    run_command "git checkout $good" or
-		die "Failed to checkout $good";
+		dodie "Failed to checkout $good";
 
 	    $result = run_bisect $type;
 
@@ -2959,7 +3010,7 @@ sub bisect {
 
 	# checkout where we started
 	run_command "git checkout $head" or
-	    die "Failed to checkout $head";
+	    dodie "Failed to checkout $head";
     }
 
     run_command "git bisect start$start_files" or
@@ -3092,76 +3143,6 @@ sub create_config {
     make_oldconfig;
 }
 
-# compare two config hashes, and return configs with different vals.
-# It returns B's config values, but you can use A to see what A was.
-sub diff_config_vals {
-    my ($pa, $pb) = @_;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    my %ret;
-
-    foreach my $item (keys %a) {
-	if (defined($b{$item}) && $b{$item} ne $a{$item}) {
-	    $ret{$item} = $b{$item};
-	}
-    }
-
-    return %ret;
-}
-
-# compare two config hashes and return the configs in B but not A
-sub diff_configs {
-    my ($pa, $pb) = @_;
-
-    my %ret;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    foreach my $item (keys %b) {
-	if (!defined($a{$item})) {
-	    $ret{$item} = $b{$item};
-	}
-    }
-
-    return %ret;
-}
-
-# return if two configs are equal or not
-# 0 is equal +1 b has something a does not
-# +1 if a and b have a different item.
-# -1 if a has something b does not
-sub compare_configs {
-    my ($pa, $pb) = @_;
-
-    my %ret;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    foreach my $item (keys %b) {
-	if (!defined($a{$item})) {
-	    return 1;
-	}
-	if ($a{$item} ne $b{$item}) {
-	    return 1;
-	}
-    }
-
-    foreach my $item (keys %a) {
-	if (!defined($b{$item})) {
-	    return -1;
-	}
-    }
-
-    return 0;
-}
-
 sub run_config_bisect_test {
     my ($type) = @_;
 
@@ -3174,166 +3155,57 @@ sub run_config_bisect_test {
     return $ret;
 }
 
-sub process_failed {
-    my ($config) = @_;
+sub config_bisect_end {
+    my ($good, $bad) = @_;
+    my $diffexec = "diff -u";
 
+    if (-f "$builddir/scripts/diffconfig") {
+	$diffexec = "$builddir/scripts/diffconfig";
+    }
     doprint "\n\n***************************************\n";
-    doprint "Found bad config: $config\n";
+    doprint "No more config bisecting possible.\n";
+    run_command "$diffexec $good $bad", 1;
     doprint "***************************************\n\n";
 }
 
-# used for config bisecting
-my $good_config;
-my $bad_config;
-
-sub process_new_config {
-    my ($tc, $nc, $gc, $bc) = @_;
-
-    my %tmp_config = %{$tc};
-    my %good_configs = %{$gc};
-    my %bad_configs = %{$bc};
-
-    my %new_configs;
-
-    my $runtest = 1;
-    my $ret;
-
-    create_config "tmp_configs", \%tmp_config;
-    assign_configs \%new_configs, $output_config;
-
-    $ret = compare_configs \%new_configs, \%bad_configs;
-    if (!$ret) {
-	doprint "New config equals bad config, try next test\n";
-	$runtest = 0;
-    }
-
-    if ($runtest) {
-	$ret = compare_configs \%new_configs, \%good_configs;
-	if (!$ret) {
-	    doprint "New config equals good config, try next test\n";
-	    $runtest = 0;
-	}
-    }
-
-    %{$nc} = %new_configs;
-
-    return $runtest;
-}
-
 sub run_config_bisect {
-    my ($pgood, $pbad) = @_;
-
-    my $type = $config_bisect_type;
-
-    my %good_configs = %{$pgood};
-    my %bad_configs = %{$pbad};
-
-    my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
-    my %b_configs = diff_configs \%good_configs, \%bad_configs;
-    my %g_configs = diff_configs \%bad_configs, \%good_configs;
-
-    my @diff_arr = keys %diff_configs;
-    my $len_diff = $#diff_arr + 1;
-
-    my @b_arr = keys %b_configs;
-    my $len_b = $#b_arr + 1;
-
-    my @g_arr = keys %g_configs;
-    my $len_g = $#g_arr + 1;
-
-    my $runtest = 1;
-    my %new_configs;
+    my ($good, $bad, $last_result) = @_;
+    my $reset = "";
+    my $cmd;
     my $ret;
 
-    # First, lets get it down to a single subset.
-    # Is the problem with a difference in values?
-    # Is the problem with a missing config?
-    # Is the problem with a config that breaks things?
-
-    # Enable all of one set and see if we get a new bad
-    # or good config.
-
-    # first set the good config to the bad values.
-
-    doprint "d=$len_diff g=$len_g b=$len_b\n";
-
-    # first lets enable things in bad config that are enabled in good config
-
-    if ($len_diff > 0) {
-	if ($len_b > 0 || $len_g > 0) {
-	    my %tmp_config = %bad_configs;
-
-	    doprint "Set tmp config to be bad config with good config values\n";
-	    foreach my $item (@diff_arr) {
-		$tmp_config{$item} = $good_configs{$item};
-	    }
-
-	    $runtest = process_new_config \%tmp_config, \%new_configs,
-			    \%good_configs, \%bad_configs;
-	}
+    if (!length($last_result)) {
+	$reset = "-r";
     }
+    run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1;
 
-    if (!$runtest && $len_diff > 0) {
-
-	if ($len_diff == 1) {
-	    process_failed $diff_arr[0];
-	    return 1;
-	}
-	my %tmp_config = %bad_configs;
-
-	my $half = int($#diff_arr / 2);
-	my @tophalf = @diff_arr[0 .. $half];
-
-	doprint "Settings bisect with top half:\n";
-	doprint "Set tmp config to be bad config with some good config values\n";
-	foreach my $item (@tophalf) {
-	    $tmp_config{$item} = $good_configs{$item};
-	}
-
-	$runtest = process_new_config \%tmp_config, \%new_configs,
-			    \%good_configs, \%bad_configs;
-
-	if (!$runtest) {
-	    my %tmp_config = %bad_configs;
-
-	    doprint "Try bottom half\n";
-
-	    my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
-
-	    foreach my $item (@bottomhalf) {
-		$tmp_config{$item} = $good_configs{$item};
-	    }
-
-	    $runtest = process_new_config \%tmp_config, \%new_configs,
-			    \%good_configs, \%bad_configs;
-	}
+    # config-bisect returns:
+    #   0 if there is more to bisect
+    #   1 for finding a good config
+    #   2 if it can not find any more configs
+    #  -1 (255) on error
+    if ($run_command_status) {
+	return $run_command_status;
     }
 
-    if ($runtest) {
-	$ret = run_config_bisect_test $type;
-	if ($ret) {
-	    doprint "NEW GOOD CONFIG\n";
-	    %good_configs = %new_configs;
-	    run_command "mv $good_config ${good_config}.last";
-	    save_config \%good_configs, $good_config;
-	    %{$pgood} = %good_configs;
-	} else {
-	    doprint "NEW BAD CONFIG\n";
-	    %bad_configs = %new_configs;
-	    run_command "mv $bad_config ${bad_config}.last";
-	    save_config \%bad_configs, $bad_config;
-	    %{$pbad} = %bad_configs;
-	}
-	return 0;
+    $ret = run_config_bisect_test $config_bisect_type;
+    if ($ret) {
+        doprint "NEW GOOD CONFIG\n";
+	# Return 3 for good config
+	return 3;
+    } else {
+        doprint "NEW BAD CONFIG\n";
+	# Return 4 for bad config
+	return 4;
     }
-
-    fail "Hmm, need to do a mix match?\n";
-    return -1;
 }
 
 sub config_bisect {
     my ($i) = @_;
 
+    my $good_config;
+    my $bad_config;
+
     my $type = $config_bisect_type;
     my $ret;
 
@@ -3353,6 +3225,24 @@ sub config_bisect {
 	$good_config = $output_config;
     }
 
+    if (!defined($config_bisect_exec)) {
+	# First check the location that ktest.pl ran
+	my @locations = ( "$pwd/config-bisect.pl",
+			  "$dirname/config-bisect.pl",
+			  "$builddir/tools/testing/ktest/config-bisect.pl",
+			  undef );
+	foreach my $loc (@locations) {
+	    doprint "loc = $loc\n";
+	    $config_bisect_exec = $loc;
+	    last if (defined($config_bisect_exec && -x $config_bisect_exec));
+	}
+	if (!defined($config_bisect_exec)) {
+	    fail "Could not find an executable config-bisect.pl\n",
+		"  Set CONFIG_BISECT_EXEC to point to config-bisect.pl";
+	    return 1;
+	}
+    }
+
     # we don't want min configs to cause issues here.
     doprint "Disabling 'MIN_CONFIG' for this test\n";
     undef $minconfig;
@@ -3361,21 +3251,31 @@ sub config_bisect {
     my %bad_configs;
     my %tmp_configs;
 
+    if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") {
+	if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") {
+	    if (-f "$tmpdir/good_config.tmp") {
+		$good_config = "$tmpdir/good_config.tmp";
+	    } else {
+		$good_config = "$tmpdir/good_config";
+	    }
+	    if (-f "$tmpdir/bad_config.tmp") {
+		$bad_config = "$tmpdir/bad_config.tmp";
+	    } else {
+		$bad_config = "$tmpdir/bad_config";
+	    }
+	}
+    }
     doprint "Run good configs through make oldconfig\n";
     assign_configs \%tmp_configs, $good_config;
     create_config "$good_config", \%tmp_configs;
-    assign_configs \%good_configs, $output_config;
+    $good_config = "$tmpdir/good_config";
+    system("cp $output_config $good_config") == 0 or dodie "cp good config";
 
     doprint "Run bad configs through make oldconfig\n";
     assign_configs \%tmp_configs, $bad_config;
     create_config "$bad_config", \%tmp_configs;
-    assign_configs \%bad_configs, $output_config;
-
-    $good_config = "$tmpdir/good_config";
     $bad_config = "$tmpdir/bad_config";
-
-    save_config \%good_configs, $good_config;
-    save_config \%bad_configs, $bad_config;
+    system("cp $output_config $bad_config") == 0 or dodie "cp bad config";
 
     if (defined($config_bisect_check) && $config_bisect_check ne "0") {
 	if ($config_bisect_check ne "good") {
@@ -3398,10 +3298,21 @@ sub config_bisect {
 	}
     }
 
+    my $last_run = "";
+
     do {
-	$ret = run_config_bisect \%good_configs, \%bad_configs;
+	$ret = run_config_bisect $good_config, $bad_config, $last_run;
+	if ($ret == 3) {
+	    $last_run = "good";
+	} elsif ($ret == 4) {
+	    $last_run = "bad";
+	}
 	print_times;
-    } while (!$ret);
+    } while ($ret == 3 || $ret == 4);
+
+    if ($ret == 2) {
+        config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+    }
 
     return $ret if ($ret < 0);
 
@@ -3416,9 +3327,9 @@ sub patchcheck_reboot {
 sub patchcheck {
     my ($i) = @_;
 
-    die "PATCHCHECK_START[$i] not defined\n"
+    dodie "PATCHCHECK_START[$i] not defined\n"
 	if (!defined($patchcheck_start));
-    die "PATCHCHECK_TYPE[$i] not defined\n"
+    dodie "PATCHCHECK_TYPE[$i] not defined\n"
 	if (!defined($patchcheck_type));
 
     my $start = $patchcheck_start;
@@ -3432,7 +3343,7 @@ sub patchcheck {
     if (defined($patchcheck_end)) {
 	$end = $patchcheck_end;
     } elsif ($cherry) {
-	die "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
+	dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
     }
 
     # Get the true sha1's since we can use things like HEAD~3
@@ -3496,7 +3407,7 @@ sub patchcheck {
 	doprint "\nProcessing commit \"$item\"\n\n";
 
 	run_command "git checkout $sha1" or
-	    die "Failed to checkout $sha1";
+	    dodie "Failed to checkout $sha1";
 
 	# only clean on the first and last patch
 	if ($item eq $list[0] ||
@@ -3587,7 +3498,7 @@ sub read_kconfig {
     }
 
     open(KIN, "$kconfig")
-	or die "Can't open $kconfig";
+	or dodie "Can't open $kconfig";
     while (<KIN>) {
 	chomp;
 
@@ -3746,7 +3657,7 @@ sub get_depends {
 
 	    $dep =~ s/^[^$valid]*[$valid]+//;
 	} else {
-	    die "this should never happen";
+	    dodie "this should never happen";
 	}
     }
 
@@ -4007,7 +3918,7 @@ sub make_min_config {
 	    # update new ignore configs
 	    if (defined($ignore_config)) {
 		open (OUT, ">$temp_config")
-		    or die "Can't write to $temp_config";
+		    or dodie "Can't write to $temp_config";
 		foreach my $config (keys %save_configs) {
 		    print OUT "$save_configs{$config}\n";
 		}
@@ -4035,7 +3946,7 @@ sub make_min_config {
 
 	    # Save off all the current mandatory configs
 	    open (OUT, ">$temp_config")
-		or die "Can't write to $temp_config";
+		or dodie "Can't write to $temp_config";
 	    foreach my $config (keys %keep_configs) {
 		print OUT "$keep_configs{$config}\n";
 	    }
@@ -4222,6 +4133,74 @@ sub set_test_option {
     return eval_option($name, $option, $i);
 }
 
+sub find_mailer {
+    my ($mailer) = @_;
+
+    my @paths = split /:/, $ENV{PATH};
+
+    # sendmail is usually in /usr/sbin
+    $paths[$#paths + 1] = "/usr/sbin";
+
+    foreach my $path (@paths) {
+	if (-x "$path/$mailer") {
+	    return $path;
+	}
+    }
+
+    return undef;
+}
+
+sub do_send_mail {
+    my ($subject, $message) = @_;
+
+    if (!defined($mail_path)) {
+	# find the mailer
+	$mail_path = find_mailer $mailer;
+	if (!defined($mail_path)) {
+	    die "\nCan not find $mailer in PATH\n";
+	}
+    }
+
+    if (!defined($mail_command)) {
+	if ($mailer eq "mail" || $mailer eq "mailx") {
+	    $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+	} elsif ($mailer eq "sendmail" ) {
+	    $mail_command =  "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+	} else {
+	    die "\nYour mailer: $mailer is not supported.\n";
+	}
+    }
+
+    $mail_command =~ s/\$MAILER/$mailer/g;
+    $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
+    $mail_command =~ s/\$MAILTO/$mailto/g;
+    $mail_command =~ s/\$SUBJECT/$subject/g;
+    $mail_command =~ s/\$MESSAGE/$message/g;
+
+    run_command $mail_command;
+}
+
+sub send_email {
+
+    if (defined($mailto)) {
+	if (!defined($mailer)) {
+	    doprint "No email sent: email or mailer not specified in config.\n";
+	    return;
+	}
+	do_send_mail @_;
+    }
+}
+
+sub cancel_test {
+    if ($email_when_canceled) {
+        send_email("KTEST: Your [$test_type] test was cancelled",
+                "Your test started at $script_start_time was cancelled: sig int");
+    }
+    die "\nCaught Sig Int, test interrupted: $!\n"
+}
+
+$SIG{INT} = qw(cancel_test);
+
 # First we need to do is the builds
 for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
@@ -4245,11 +4224,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
     $outputdir = set_test_option("OUTPUT_DIR", $i);
     $builddir = set_test_option("BUILD_DIR", $i);
 
-    chdir $builddir || die "can't change directory to $builddir";
+    chdir $builddir || dodie "can't change directory to $builddir";
 
     if (!-d $outputdir) {
 	mkpath($outputdir) or
-	    die "can't create $outputdir";
+	    dodie "can't create $outputdir";
     }
 
     $make = "$makecmd O=$outputdir";
@@ -4262,9 +4241,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
     $start_minconfig_defined = 1;
 
     # The first test may override the PRE_KTEST option
-    if (defined($pre_ktest) && $i == 1) {
-	doprint "\n";
-	run_command $pre_ktest;
+    if ($i == 1) {
+        if (defined($pre_ktest)) {
+            doprint "\n";
+            run_command $pre_ktest;
+        }
+        if ($email_when_started) {
+            send_email("KTEST: Your [$test_type] test was started",
+                "Your test was started on $script_start_time");
+        }
     }
 
     # Any test can override the POST_KTEST option
@@ -4280,7 +4265,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
     if (!-d $tmpdir) {
 	mkpath($tmpdir) or
-	    die "can't create $tmpdir";
+	    dodie "can't create $tmpdir";
     }
 
     $ENV{"SSH_USER"} = $ssh_user;
@@ -4353,7 +4338,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
     if (defined($checkout)) {
 	run_command "git checkout $checkout" or
-	    die "failed to checkout $checkout";
+	    dodie "failed to checkout $checkout";
     }
 
     $no_reboot = 0;
@@ -4428,4 +4413,8 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) {
 
 doprint "\n    $successes of $opt{NUM_TESTS} tests were successful\n\n";
 
+if ($email_when_finished) {
+    send_email("KTEST: Your [$test_type] test has finished!",
+            "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+}
 exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 6c58cd8bbbae..6ca6ca0ce695 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -1,6 +1,11 @@
 #
 # Config file for ktest.pl
 #
+# Place your customized version of this, in the working directory that
+# ktest.pl is run from. By default, ktest.pl will look for a file
+# called "ktest.conf", but you can name it anything you like and specify
+# the name of your config file as the first argument of ktest.pl.
+#
 # Note, all paths must be absolute
 #
 
@@ -396,6 +401,44 @@
 
 #### Optional Config Options (all have defaults) ####
 
+# Email options for receiving notifications. Users must setup
+# the specified mailer prior to using this feature.
+#
+# (default undefined)
+#MAILTO =
+#
+# Supported mailers: sendmail, mail, mailx
+# (default sendmail)
+#MAILER = sendmail
+#
+# The executable to run
+# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER})
+#MAIL_EXEC = /usr/sbin/sendmail
+#
+# The command used to send mail, which uses the above options
+# can be modified. By default if the mailer is "sendmail" then
+#  MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+# For mail or mailx:
+#  MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\'
+# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time
+#    it sends the mail if "$FOO" format is used. If "${FOO}" format is used,
+#    then the substitutions will occur at the time the config file is read.
+#    But note, MAIL_PATH and MAILER require being set by the config file if
+#     ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are.
+#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+#
+# Errors are defined as those would terminate the script
+# (default 1)
+#EMAIL_ON_ERROR = 1
+# (default 1)
+#EMAIL_WHEN_FINISHED = 1
+# (default 0)
+#EMAIL_WHEN_STARTED = 1
+#
+# Users can cancel the test by Ctrl^C
+# (default 0)
+#EMAIL_WHEN_CANCELED = 1
+
 # Start a test setup. If you leave this off, all options
 # will be default and the test will run once.
 # This is a label and not really an option (it takes no value).
@@ -725,6 +768,13 @@
 # (default 120)
 #TIMEOUT = 120
 
+# The timeout in seconds when to test if the box can be rebooted
+# or not. Before issuing the reboot command, a ssh connection
+# is attempted to see if the target machine is still active.
+# If the target does not connect within this timeout, a power cycle
+# is issued instead of a reboot.
+# CONNECT_TIMEOUT = 25
+
 # In between tests, a reboot of the box may occur, and this
 # is the time to wait for the console after it stops producing
 # output. Some machines may not produce a large lag on reboot
@@ -1167,6 +1217,16 @@
 #  Set it to "good" to test only the good config and set it
 #  to "bad" to only test the bad config.
 #
+# CONFIG_BISECT_EXEC (optional)
+#  The config bisect is a separate program that comes with ktest.pl.
+#  By befault, it will look for:
+#    `pwd`/config-bisect.pl # the location ktest.pl was executed from.
+#  If it does not find it there, it will look for:
+#    `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
+#  If it does not find it there, it will look for:
+#    ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl
+#  Setting CONFIG_BISECT_EXEC will override where it looks.
+#
 # Example:
 #   TEST_START
 #   TEST_TYPE = config_bisect
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 620fa78b3b1b..cb166be4918d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -104,7 +104,8 @@ enum {
 	NUM_HINTS = 8,
 	NUM_BDW = NUM_DCR,
 	NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
-	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+		+ 4 /* spa1 iset */ + 1 /* spa11 iset */,
 	DIMM_SIZE = SZ_32M,
 	LABEL_SIZE = SZ_128K,
 	SPA_VCD_SIZE = SZ_4M,
@@ -153,6 +154,7 @@ struct nfit_test {
 	void *nfit_buf;
 	dma_addr_t nfit_dma;
 	size_t nfit_size;
+	size_t nfit_filled;
 	int dcr_idx;
 	int num_dcr;
 	int num_pm;
@@ -709,7 +711,9 @@ static void smart_notify(struct device *bus_dev,
 				>= thresh->media_temperature)
 			|| ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
 				&& smart->ctrl_temperature
-				>= thresh->ctrl_temperature)) {
+				>= thresh->ctrl_temperature)
+			|| (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+			|| (smart->shutdown_state != 0)) {
 		device_lock(bus_dev);
 		__acpi_nvdimm_notify(dimm_dev, 0x81);
 		device_unlock(bus_dev);
@@ -735,6 +739,32 @@ static int nfit_test_cmd_smart_set_threshold(
 	return 0;
 }
 
+static int nfit_test_cmd_smart_inject(
+		struct nd_intel_smart_inject *inj,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *thresh,
+		struct nd_intel_smart *smart,
+		struct device *bus_dev, struct device *dimm_dev)
+{
+	if (buf_len != sizeof(*inj))
+		return -EINVAL;
+
+	if (inj->mtemp_enable)
+		smart->media_temperature = inj->media_temperature;
+	if (inj->spare_enable)
+		smart->spares = inj->spares;
+	if (inj->fatal_enable)
+		smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+	if (inj->unsafe_shutdown_enable) {
+		smart->shutdown_state = 1;
+		smart->shutdown_count++;
+	}
+	inj->status = 0;
+	smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+	return 0;
+}
+
 static void uc_error_notify(struct work_struct *work)
 {
 	struct nfit_test *t = container_of(work, typeof(*t), work);
@@ -935,6 +965,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 							t->dcr_idx],
 						&t->smart[i - t->dcr_idx],
 						&t->pdev.dev, t->dimm_dev[i]);
+			case ND_INTEL_SMART_INJECT:
+				return nfit_test_cmd_smart_inject(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx],
+						&t->smart[i - t->dcr_idx],
+						&t->pdev.dev, t->dimm_dev[i]);
 			default:
 				return -ENOTTY;
 			}
@@ -1222,7 +1259,7 @@ static void smart_init(struct nfit_test *t)
 			| ND_INTEL_SMART_MTEMP_VALID,
 		.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
 		.media_temperature = 23 * 16,
-		.ctrl_temperature = 30 * 16,
+		.ctrl_temperature = 25 * 16,
 		.pmic_temperature = 40 * 16,
 		.spares = 75,
 		.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
@@ -1366,7 +1403,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_flush_address *flush;
 	struct acpi_nfit_capabilities *pcap;
-	unsigned int offset, i;
+	unsigned int offset = 0, i;
 
 	/*
 	 * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -1380,93 +1417,102 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
 	spa->length = SPA0_SIZE;
+	offset += spa->header.length;
 
 	/*
 	 * spa1 (interleave last half of the 4 DIMMS, note storage
 	 * does not actually alias the related block-data-window
 	 * regions)
 	 */
-	spa = nfit_buf + sizeof(*spa);
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 1+1;
 	spa->address = t->spa_set_dma[1];
 	spa->length = SPA1_SIZE;
+	offset += spa->header.length;
 
 	/* spa2 (dcr0) dimm0 */
-	spa = nfit_buf + sizeof(*spa) * 2;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 2+1;
 	spa->address = t->dcr_dma[0];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa3 (dcr1) dimm1 */
-	spa = nfit_buf + sizeof(*spa) * 3;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 3+1;
 	spa->address = t->dcr_dma[1];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa4 (dcr2) dimm2 */
-	spa = nfit_buf + sizeof(*spa) * 4;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 4+1;
 	spa->address = t->dcr_dma[2];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa5 (dcr3) dimm3 */
-	spa = nfit_buf + sizeof(*spa) * 5;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 5+1;
 	spa->address = t->dcr_dma[3];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa6 (bdw for dcr0) dimm0 */
-	spa = nfit_buf + sizeof(*spa) * 6;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 6+1;
 	spa->address = t->dimm_dma[0];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa7 (bdw for dcr1) dimm1 */
-	spa = nfit_buf + sizeof(*spa) * 7;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 7+1;
 	spa->address = t->dimm_dma[1];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa8 (bdw for dcr2) dimm2 */
-	spa = nfit_buf + sizeof(*spa) * 8;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 8+1;
 	spa->address = t->dimm_dma[2];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa9 (bdw for dcr3) dimm3 */
-	spa = nfit_buf + sizeof(*spa) * 9;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 9+1;
 	spa->address = t->dimm_dma[3];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
-	offset = sizeof(*spa) * 10;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1481,9 +1527,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
+	offset += memdev->header.length;
 
 	/* mem-region1 (spa0, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map);
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1497,9 +1544,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region2 (spa1, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1513,9 +1561,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region3 (spa1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1528,9 +1577,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
 
 	/* mem-region4 (spa1, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1544,9 +1594,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region5 (spa1, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1559,9 +1610,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
 
 	/* mem-region6 (spa/dcr0, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1574,9 +1626,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region7 (spa/dcr1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1589,9 +1642,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region8 (spa/dcr2, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1604,9 +1658,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region9 (spa/dcr3, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1619,9 +1674,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region10 (spa/bdw0, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1634,9 +1690,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region11 (spa/bdw1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1649,9 +1706,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region12 (spa/bdw2, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1664,9 +1722,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region13 (spa/dcr3, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1680,12 +1739,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
 	/* dcr-descriptor0: blk */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 0+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[0];
@@ -1696,11 +1755,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor1: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 1+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[1];
@@ -1711,11 +1771,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor2: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 2+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[2];
@@ -1726,11 +1787,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor3: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 3+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[3];
@@ -1741,8 +1803,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
 	/* dcr-descriptor0: pmem */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1753,10 +1815,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[0];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor1: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size);
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1765,10 +1827,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[1];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor2: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 2;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1777,10 +1839,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[2];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor3: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 3;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1789,54 +1851,56 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[3];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
-	offset = offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 4;
 	/* bdw0 (spa/dcr0, dimm0) */
 	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 0+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw1 (spa/dcr1, dimm1) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region);
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 1+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw2 (spa/dcr2, dimm2) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2;
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 2+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw3 (spa/dcr3, dimm3) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3;
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 3+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
 	/* flush0 (dimm0) */
 	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -1845,48 +1909,52 @@ static void nfit_test0_setup(struct nfit_test *t)
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush1 (dimm1) */
-	flush = nfit_buf + offset + flush_hint_size * 1;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[1];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush2 (dimm2) */
-	flush = nfit_buf + offset + flush_hint_size  * 2;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[2];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush3 (dimm3) */
-	flush = nfit_buf + offset + flush_hint_size * 3;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[3];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* platform capabilities */
-	pcap = nfit_buf + offset + flush_hint_size * 4;
+	pcap = nfit_buf + offset;
 	pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
 	pcap->header.length = sizeof(*pcap);
 	pcap->highest_capability = 1;
 	pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
 		ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+	offset += pcap->header.length;
 
 	if (t->setup_hotplug) {
-		offset = offset + flush_hint_size * 4 + sizeof(*pcap);
 		/* dcr-descriptor4: blk */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-		dcr->header.length = sizeof(struct acpi_nfit_control_region);
+		dcr->header.length = sizeof(*dcr);
 		dcr->region_index = 8+1;
 		dcr_common_init(dcr);
 		dcr->serial_number = ~handle[4];
@@ -1897,8 +1965,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 		dcr->command_size = 8;
 		dcr->status_offset = 8;
 		dcr->status_size = 4;
+		offset += dcr->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_control_region);
 		/* dcr-descriptor4: pmem */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1909,21 +1977,20 @@ static void nfit_test0_setup(struct nfit_test *t)
 		dcr->serial_number = ~handle[4];
 		dcr->code = NFIT_FIC_BYTEN;
 		dcr->windows = 0;
+		offset += dcr->header.length;
 
-		offset = offset + offsetof(struct acpi_nfit_control_region,
-				window_size);
 		/* bdw4 (spa/dcr4, dimm4) */
 		bdw = nfit_buf + offset;
 		bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-		bdw->header.length = sizeof(struct acpi_nfit_data_region);
+		bdw->header.length = sizeof(*bdw);
 		bdw->region_index = 8+1;
 		bdw->windows = 1;
 		bdw->offset = 0;
 		bdw->size = BDW_SIZE;
 		bdw->capacity = DIMM_SIZE;
 		bdw->start_address = 0;
+		offset += bdw->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_data_region);
 		/* spa10 (dcr4) dimm4 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
@@ -1932,30 +1999,32 @@ static void nfit_test0_setup(struct nfit_test *t)
 		spa->range_index = 10+1;
 		spa->address = t->dcr_dma[4];
 		spa->length = DCR_SIZE;
+		offset += spa->header.length;
 
 		/*
 		 * spa11 (single-dimm interleave for hotplug, note storage
 		 * does not actually alias the related block-data-window
 		 * regions)
 		 */
-		spa = nfit_buf + offset + sizeof(*spa);
+		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 		spa->header.length = sizeof(*spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 		spa->range_index = 11+1;
 		spa->address = t->spa_set_dma[2];
 		spa->length = SPA0_SIZE;
+		offset += spa->header.length;
 
 		/* spa12 (bdw for dcr4) dimm4 */
-		spa = nfit_buf + offset + sizeof(*spa) * 2;
+		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 		spa->header.length = sizeof(*spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 		spa->range_index = 12+1;
 		spa->address = t->dimm_dma[4];
 		spa->length = DIMM_SIZE;
+		offset += spa->header.length;
 
-		offset = offset + sizeof(*spa) * 3;
 		/* mem-region14 (spa/dcr4, dimm4) */
 		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1970,10 +2039,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
 
-		/* mem-region15 (spa0, dimm4) */
-		memdev = nfit_buf + offset +
-				sizeof(struct acpi_nfit_memory_map);
+		/* mem-region15 (spa11, dimm4) */
+		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 		memdev->header.length = sizeof(*memdev);
 		memdev->device_handle = handle[4];
@@ -1987,10 +2056,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
 		memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+		offset += memdev->header.length;
 
 		/* mem-region16 (spa/bdw4, dimm4) */
-		memdev = nfit_buf + offset +
-				sizeof(struct acpi_nfit_memory_map) * 2;
+		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 		memdev->header.length = sizeof(*memdev);
 		memdev->device_handle = handle[4];
@@ -2003,8 +2072,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_memory_map) * 3;
 		/* flush3 (dimm4) */
 		flush = nfit_buf + offset;
 		flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -2014,8 +2083,14 @@ static void nfit_test0_setup(struct nfit_test *t)
 		for (i = 0; i < NUM_HINTS; i++)
 			flush->hint_address[i] = t->flush_dma[4]
 				+ i * sizeof(u64);
+		offset += flush->header.length;
+
+		/* sanity check to make sure we've filled the buffer */
+		WARN_ON(offset != t->nfit_size);
 	}
 
+	t->nfit_filled = offset;
+
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA0_SIZE);
 
@@ -2026,6 +2101,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
@@ -2061,17 +2137,18 @@ static void nfit_test1_setup(struct nfit_test *t)
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
 	spa->length = SPA2_SIZE;
+	offset += spa->header.length;
 
 	/* virtual cd region */
-	spa = nfit_buf + sizeof(*spa);
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
 	spa->range_index = 0;
 	spa->address = t->spa_set_dma[1];
 	spa->length = SPA_VCD_SIZE;
+	offset += spa->header.length;
 
-	offset += sizeof(*spa) * 2;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -2089,8 +2166,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
 		| ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
 		| ACPI_NFIT_MEM_NOT_ARMED;
+	offset += memdev->header.length;
 
-	offset += sizeof(*memdev);
 	/* dcr-descriptor0 */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -2101,8 +2178,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[5];
 	dcr->code = NFIT_FIC_BYTE;
 	dcr->windows = 0;
-
 	offset += dcr->header.length;
+
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
@@ -2117,9 +2194,9 @@ static void nfit_test1_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
 	memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+	offset += memdev->header.length;
 
 	/* dcr-descriptor1 */
-	offset += sizeof(*memdev);
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
@@ -2129,6 +2206,12 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[6];
 	dcr->code = NFIT_FIC_BYTE;
 	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	/* sanity check to make sure we've filled the buffer */
+	WARN_ON(offset != t->nfit_size);
+
+	t->nfit_filled = offset;
 
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA2_SIZE);
@@ -2487,7 +2570,7 @@ static int nfit_test_probe(struct platform_device *pdev)
 	nd_desc->ndctl = nfit_test_ctl;
 
 	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
-			nfit_test->nfit_size);
+			nfit_test->nfit_filled);
 	if (rc)
 		return rc;
 
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 428344519cdf..33752e06ff8d 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_FW_FINISH_UPDATE	15
 #define ND_INTEL_FW_FINISH_QUERY	16
 #define ND_INTEL_SMART_SET_THRESHOLD	17
+#define ND_INTEL_SMART_INJECT		18
 
 #define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
 #define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
@@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_SMART_NON_CRITICAL_HEALTH      (1 << 0)
 #define ND_INTEL_SMART_CRITICAL_HEALTH          (1 << 1)
 #define ND_INTEL_SMART_FATAL_HEALTH             (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP		(1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE		(1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL		(1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN		(1 << 3)
 
 struct nd_intel_smart {
 	__u32 status;
@@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold {
 	__u32 status;
 } __packed;
 
+struct nd_intel_smart_inject {
+	__u64 flags;
+	__u8 mtemp_enable;
+	__u16 media_temperature;
+	__u8 spare_enable;
+	__u8 spares;
+	__u8 fatal_enable;
+	__u8 unsafe_shutdown_enable;
+	__u32 status;
+} __packed;
+
 #define INTEL_FW_STORAGE_SIZE		0x100000
 #define INTEL_FW_MAX_SEND_LEN		0xFFEC
 #define INTEL_FW_QUERY_INTERVAL		250000
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index e3201ccf54c3..32159c08a52e 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -19,6 +19,7 @@
 
 #define __GFP_RECLAIM	(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
 
+#define GFP_ZONEMASK	0x0fu
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index dbda89c9d9b9..32aafa92074c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -15,6 +15,7 @@ TARGETS += gpio
 TARGETS += intel_pstate
 TARGETS += ipc
 TARGETS += kcmp
+TARGETS += kvm
 TARGETS += lib
 TARGETS += membarrier
 TARGETS += memfd
@@ -24,6 +25,7 @@ TARGETS += mqueue
 TARGETS += net
 TARGETS += nsfs
 TARGETS += powerpc
+TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += seccomp
@@ -67,6 +69,12 @@ ifndef BUILD
   BUILD := $(shell pwd)
 endif
 
+# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header
+# printing from tests. Applicable to run_tests case where run_tests adds
+# TAP header prior running tests and when a test program invokes another
+# with system() call. Export it here to cover override RUN_TESTS defines.
+export KSFT_TAP_LEVEL=`echo 1`
+
 export BUILD
 all:
 	@for TARGET in $(TARGETS); do		\
@@ -126,11 +134,14 @@ ifdef INSTALL_PATH
 	echo "else" >> $(ALL_SCRIPT)
 	echo "  OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
 	echo "fi" >> $(ALL_SCRIPT)
+	echo "export KSFT_TAP_LEVEL=`echo 1`" >> $(ALL_SCRIPT)
 
 	for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
-		echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+		echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT);	\
+		echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
 		echo "echo ========================================" >> $(ALL_SCRIPT); \
+		echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
 		echo "cd $$TARGET" >> $(ALL_SCRIPT); \
 		make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
 		echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
index 67e6f391b2a9..95e8f4561474 100644
--- a/tools/testing/selftests/android/ion/.gitignore
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -1,2 +1,3 @@
 ionapp_export
 ionapp_import
+ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index 96e0c448b39d..e03695287f76 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -1,8 +1,8 @@
 
-INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/
+INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
 CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
 
-TEST_GEN_FILES := ionapp_export ionapp_import
+TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
 
 all: $(TEST_GEN_FILES)
 
@@ -14,3 +14,4 @@ include ../../lib.mk
 
 $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
 $(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/ion/config
index 19db6ca9aa2b..b4ad748a9dd9 100644
--- a/tools/testing/selftests/android/ion/config
+++ b/tools/testing/selftests/android/ion/config
@@ -2,3 +2,4 @@ CONFIG_ANDROID=y
 CONFIG_STAGING=y
 CONFIG_ION=y
 CONFIG_ION_SYSTEM_HEAP=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
new file mode 100644
index 000000000000..dab36b06b37d
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionmap_test.c
@@ -0,0 +1,136 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm.h>
+
+#include "ion.h"
+#include "ionutils.h"
+
+int check_vgem(int fd)
+{
+	drm_version_t version = { 0 };
+	char name[5];
+	int ret;
+
+	version.name_len = 4;
+	version.name = name;
+
+	ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
+	if (ret)
+		return 1;
+
+	return strcmp(name, "vgem");
+}
+
+int open_vgem(void)
+{
+	int i, fd;
+	const char *drmstr = "/dev/dri/card";
+
+	fd = -1;
+	for (i = 0; i < 16; i++) {
+		char name[80];
+
+		sprintf(name, "%s%u", drmstr, i);
+
+		fd = open(name, O_RDWR);
+		if (fd < 0)
+			continue;
+
+		if (check_vgem(fd)) {
+			close(fd);
+			continue;
+		} else {
+			break;
+		}
+
+	}
+	return fd;
+}
+
+int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
+{
+	struct drm_prime_handle import_handle = { 0 };
+	int ret;
+
+	import_handle.fd = dma_buf_fd;
+	import_handle.flags = 0;
+	import_handle.handle = 0;
+
+	ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
+	if (ret == 0)
+		*handle = import_handle.handle;
+	return ret;
+}
+
+void close_handle(int vgem_fd, uint32_t handle)
+{
+	struct drm_gem_close close = { 0 };
+
+	close.handle = handle;
+	ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+int main()
+{
+	int ret, vgem_fd;
+	struct ion_buffer_info info;
+	uint32_t handle = 0;
+	struct dma_buf_sync sync = { 0 };
+
+	info.heap_type = ION_HEAP_TYPE_SYSTEM;
+	info.heap_size = 4096;
+	info.flag_type = ION_FLAG_CACHED;
+
+	ret = ion_export_buffer_fd(&info);
+	if (ret < 0) {
+		printf("ion buffer alloc failed\n");
+		return -1;
+	}
+
+	vgem_fd = open_vgem();
+	if (vgem_fd < 0) {
+		ret = vgem_fd;
+		printf("Failed to open vgem\n");
+		goto out_ion;
+	}
+
+	ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
+
+	if (ret < 0) {
+		printf("Failed to import buffer\n");
+		goto out_vgem;
+	}
+
+	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
+	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+	if (ret)
+		printf("sync start failed %d\n", errno);
+
+	memset(info.buffer, 0xff, 4096);
+
+	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
+	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+	if (ret)
+		printf("sync end failed %d\n", errno);
+
+	close_handle(vgem_fd, handle);
+	ret = 0;
+
+out_vgem:
+	close(vgem_fd);
+out_ion:
+	ion_close_buffer_fd(&info);
+	printf("done.\n");
+	return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
index ce69c14f51fa..7d1d37c4ef6a 100644
--- a/tools/testing/selftests/android/ion/ionutils.c
+++ b/tools/testing/selftests/android/ion/ionutils.c
@@ -80,11 +80,6 @@ int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
 	heap_id = MAX_HEAP_COUNT + 1;
 	for (i = 0; i < query.cnt; i++) {
 		if (heap_data[i].type == ion_info->heap_type) {
-			printf("--------------------------------------\n");
-			printf("heap type: %d\n", heap_data[i].type);
-			printf("  heap id: %d\n", heap_data[i].heap_id);
-			printf("heap name: %s\n", heap_data[i].name);
-			printf("--------------------------------------\n");
 			heap_id = heap_data[i].heap_id;
 			break;
 		}
@@ -204,7 +199,6 @@ void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
 		/* Finally, close the client fd */
 		if (ion_info->ionfd > 0)
 			close(ion_info->ionfd);
-		printf("<%s>: buffer release successfully....\n", __func__);
 	}
 }
 
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index df3dd7fe5f9b..2a4f16fc9819 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -59,6 +59,13 @@ disable_events() {
     echo 0 > events/enable
 }
 
+clear_synthetic_events() { # reset all current synthetic events
+    grep -v ^# synthetic_events |
+    while read line; do
+        echo "!$line" >> synthetic_events
+    done
+}
+
 initialize_ftrace() { # Reset ftrace to initial-state
 # As the initial state, ftrace will be set to nop tracer,
 # no events, no triggers, no filters, no function filters,
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644
index 000000000000..786dce7e48be
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+    fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644
index 000000000000..7fd5b4a8f060
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644
index 000000000000..c93dbe38b5df
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency  u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+    fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+    fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644
index 000000000000..e84e7d048566
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644
index 000000000000..7907d8aacde3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+    fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644
index 000000000000..38b7ed6242b2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+    fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644
index 000000000000..cef11377dcbd
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index a63e8453984d..8497a376ef9d 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -18,6 +18,10 @@ all:
 	done
 
 override define RUN_TESTS
+	@export KSFT_TAP_LEVEL=`echo 1`;
+	@echo "TAP version 13";
+	@echo "selftests: futex";
+	@echo "========================================";
 	@cd $(OUTPUT); ./run.sh
 endef
 
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 5a3f7d37e912..7340fd6a9a9f 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -2,7 +2,10 @@
 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
 LDLIBS := $(LDLIBS) -lm
 
-ifeq (,$(filter $(ARCH),x86))
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
 TEST_GEN_FILES := msr aperf
 endif
 
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 1a52b03962a3..1b9d8ecdebce 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -57,7 +57,8 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
 
 static inline void ksft_print_header(void)
 {
-	printf("TAP version 13\n");
+	if (!(getenv("KSFT_TAP_LEVEL")))
+		printf("TAP version 13\n");
 }
 
 static inline void ksft_print_cnts(void)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index e81bd28bdd89..6ae3730c4ee3 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -107,6 +107,27 @@
 			__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
 
 /**
+ * XFAIL(statement, fmt, ...)
+ *
+ * @statement: statement to run after reporting XFAIL
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * and runs "statement", which is usually "return" or "goto skip".
+ */
+#define XFAIL(statement, fmt, ...) do { \
+	if (TH_LOG_ENABLED) { \
+		fprintf(TH_LOG_STREAM, "[  XFAIL!  ] " fmt "\n", \
+			##__VA_ARGS__); \
+	} \
+	/* TODO: find a way to pass xfail to test runner process. */ \
+	_metadata->passed = 1; \
+	_metadata->trigger = 0; \
+	statement; \
+} while (0)
+
+/**
  * TEST(test_name) - Defines the test function and creates the registration
  * stub
  *
@@ -198,7 +219,7 @@
 
 /**
  * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
- * *_metadata* is included so that ASSERT_* work as a convenience
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
  *
  * @fixture_name: fixture name
  *
@@ -221,6 +242,7 @@
 		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
 /**
  * FIXTURE_TEARDOWN(fixture_name)
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
  *
  * @fixture_name: fixture name
  *
@@ -253,6 +275,8 @@
  * Defines a test that depends on a fixture (e.g., is part of a test case).
  * Very similar to TEST() except that *self* is the setup instance of fixture's
  * datatype exposed for use by the implementation.
+ *
+ * Warning: use of ASSERT_* here will skip TEARDOWN.
  */
 /* TODO(wad) register fixtures on dedicated test lists. */
 #define TEST_F(fixture_name, test_name) \
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000000..dc44de904797
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,39 @@
+all:
+
+top_srcdir = ../../../../
+UNAME_M := $(shell uname -m)
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86.c
+
+TEST_GEN_PROGS_x86_64 = set_sregs_test
+TEST_GEN_PROGS_x86_64 += sync_regs_test
+
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+LIBKVM += $(LIBKVM_$(UNAME_M))
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+CFLAGS += -O2 -g -I$(LINUX_HDR_PATH) -Iinclude -I$(<D)
+
+# After inclusion, $(OUTPUT) is defined and
+# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+include ../lib.mk
+
+STATIC_LIBS := $(OUTPUT)/libkvm.a
+LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
+$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+	$(AR) crs $@ $^
+
+$(LINUX_HDR_PATH):
+	make -C $(top_srcdir) headers_install
+
+all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
+$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000000..57974ad46373
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,142 @@
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#ifndef SELFTEST_KVM_UTIL_H
+#define SELFTEST_KVM_UTIL_H 1
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+/*
+ * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
+ * created. Only applies to VMs using EPT.
+ */
+#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
+
+
+/* Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+
+#define DEFAULT_GUEST_PHY_PAGES		512
+#define DEFAULT_GUEST_STACK_VADDR_MIN	0xab6000
+#define DEFAULT_STACK_PGS               5
+
+enum vm_guest_mode {
+	VM_MODE_FLAT48PG,
+};
+
+enum vm_mem_backing_src_type {
+	VM_MEM_SRC_ANONYMOUS,
+	VM_MEM_SRC_ANONYMOUS_THP,
+	VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+int kvm_check_cap(long cap);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+
+int kvm_memcmp_hva_gva(void *hva,
+	struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+	uint32_t data_memslot, uint32_t pgd_memslot);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+	uint32_t vcpuid, uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+	enum vm_mem_backing_src_type src_type,
+	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+	uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm,
+	uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+	uint32_t data_memslot, uint32_t pgd_memslot);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+	struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+void vcpu_sregs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+	vm_paddr_t paddr_min, uint32_t memslot);
+
+void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid);
+void vcpu_set_cpuid(
+	struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid2 *allocate_kvm_cpuid2(void);
+struct kvm_cpuid_entry2 *
+find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
+		       uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+find_cpuid_entry(struct kvm_cpuid2 *cpuid, uint32_t function)
+{
+	return find_cpuid_index_entry(cpuid, function, 0);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+				 uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000000..54cfeb6568d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,75 @@
+/*
+ * tools/testing/selftests/kvm/include/sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Header file that describes API to the sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t.  Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other.  This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef _TEST_SPARSEBIT_H_
+#define _TEST_SPARSEBIT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sparsebit;
+typedef uint64_t sparsebit_idx_t;
+typedef uint64_t sparsebit_num_t;
+
+struct sparsebit *sparsebit_alloc(void);
+void sparsebit_free(struct sparsebit **sbitp);
+void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+
+bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(struct sparsebit *sbit,
+			  sparsebit_idx_t idx, sparsebit_num_t num);
+bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(struct sparsebit *sbit,
+			    sparsebit_idx_t idx, sparsebit_num_t num);
+sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
+bool sparsebit_any_set(struct sparsebit *sbit);
+bool sparsebit_any_clear(struct sparsebit *sbit);
+bool sparsebit_all_set(struct sparsebit *sbit);
+bool sparsebit_all_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+				       sparsebit_idx_t start, sparsebit_num_t num);
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+					 sparsebit_idx_t start, sparsebit_num_t num);
+
+void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
+		       sparsebit_num_t num);
+void sparsebit_set_all(struct sparsebit *sbitp);
+
+void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_clear_num(struct sparsebit *sbitp,
+			 sparsebit_idx_t start, sparsebit_num_t num);
+void sparsebit_clear_all(struct sparsebit *sbitp);
+
+void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+		    unsigned int indent);
+void sparsebit_validate_internal(struct sparsebit *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000000..7ab98e41324f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,45 @@
+/*
+ * tools/testing/selftests/kvm/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef TEST_UTIL_H
+#define TEST_UTIL_H 1
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void test_assert(bool exp, const char *exp_str,
+		 const char *file, unsigned int line, const char *fmt, ...);
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
+
+#define TEST_ASSERT(e, fmt, ...) \
+	test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#define ASSERT_EQ(a, b) do { \
+	typeof(a) __a = (a); \
+	typeof(b) __b = (b); \
+	TEST_ASSERT(__a == __b, \
+		    "ASSERT_EQ(%s, %s) failed.\n" \
+		    "\t%s is %#lx\n" \
+		    "\t%s is %#lx", \
+		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+#endif /* TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h
new file mode 100644
index 000000000000..4a5b2c4c1a0f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86.h
@@ -0,0 +1,1043 @@
+/*
+ * tools/testing/selftests/kvm/include/x86.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_X86_H
+#define SELFTEST_KVM_X86_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#define X86_EFLAGS_FIXED	 (1u << 1)
+
+#define X86_CR4_VME		(1ul << 0)
+#define X86_CR4_PVI		(1ul << 1)
+#define X86_CR4_TSD		(1ul << 2)
+#define X86_CR4_DE		(1ul << 3)
+#define X86_CR4_PSE		(1ul << 4)
+#define X86_CR4_PAE		(1ul << 5)
+#define X86_CR4_MCE		(1ul << 6)
+#define X86_CR4_PGE		(1ul << 7)
+#define X86_CR4_PCE		(1ul << 8)
+#define X86_CR4_OSFXSR		(1ul << 9)
+#define X86_CR4_OSXMMEXCPT	(1ul << 10)
+#define X86_CR4_UMIP		(1ul << 11)
+#define X86_CR4_VMXE		(1ul << 13)
+#define X86_CR4_SMXE		(1ul << 14)
+#define X86_CR4_FSGSBASE	(1ul << 16)
+#define X86_CR4_PCIDE		(1ul << 17)
+#define X86_CR4_OSXSAVE		(1ul << 18)
+#define X86_CR4_SMEP		(1ul << 20)
+#define X86_CR4_SMAP		(1ul << 21)
+#define X86_CR4_PKE		(1ul << 22)
+
+/* The enum values match the intruction encoding of each register */
+enum x86_register {
+	RAX = 0,
+	RCX,
+	RDX,
+	RBX,
+	RSP,
+	RBP,
+	RSI,
+	RDI,
+	R8,
+	R9,
+	R10,
+	R11,
+	R12,
+	R13,
+	R14,
+	R15,
+};
+
+struct desc64 {
+	uint16_t limit0;
+	uint16_t base0;
+	unsigned base1:8, type:5, dpl:2, p:1;
+	unsigned limit1:4, zero0:3, g:1, base2:8;
+	uint32_t base3;
+	uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+	uint16_t size;
+	uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+	return ((uint64_t)desc->base3 << 32) |
+		(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+	uint32_t eax, edx;
+
+	/*
+	 * The lfence is to wait (on Intel CPUs) until all previous
+	 * instructions have been executed.
+	 */
+	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
+	return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+	uint32_t eax, edx;
+
+	__asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+	return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+	uint32_t a, d;
+
+	__asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+	return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+	uint32_t a = value;
+	uint32_t d = value >> 32;
+
+	__asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+	uint16_t tmp;
+
+	__asm__ __volatile__("in %%dx, %%ax"
+		: /* output */ "=a" (tmp)
+		: /* input */ "d" (port));
+
+	return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+	uint16_t es;
+
+	__asm__ __volatile__("mov %%es, %[es]"
+			     : /* output */ [es]"=rm"(es));
+	return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+	uint16_t cs;
+
+	__asm__ __volatile__("mov %%cs, %[cs]"
+			     : /* output */ [cs]"=rm"(cs));
+	return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+	uint16_t ss;
+
+	__asm__ __volatile__("mov %%ss, %[ss]"
+			     : /* output */ [ss]"=rm"(ss));
+	return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+	uint16_t ds;
+
+	__asm__ __volatile__("mov %%ds, %[ds]"
+			     : /* output */ [ds]"=rm"(ds));
+	return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+	uint16_t fs;
+
+	__asm__ __volatile__("mov %%fs, %[fs]"
+			     : /* output */ [fs]"=rm"(fs));
+	return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+	uint16_t gs;
+
+	__asm__ __volatile__("mov %%gs, %[gs]"
+			     : /* output */ [gs]"=rm"(gs));
+	return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+	uint16_t tr;
+
+	__asm__ __volatile__("str %[tr]"
+			     : /* output */ [tr]"=rm"(tr));
+	return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+	uint64_t cr0;
+
+	__asm__ __volatile__("mov %%cr0, %[cr0]"
+			     : /* output */ [cr0]"=r"(cr0));
+	return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+	uint64_t cr3;
+
+	__asm__ __volatile__("mov %%cr3, %[cr3]"
+			     : /* output */ [cr3]"=r"(cr3));
+	return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+	uint64_t cr4;
+
+	__asm__ __volatile__("mov %%cr4, %[cr4]"
+			     : /* output */ [cr4]"=r"(cr4));
+	return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+	__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline uint64_t get_gdt_base(void)
+{
+	struct desc_ptr gdt;
+	__asm__ __volatile__("sgdt %[gdt]"
+			     : /* output */ [gdt]"=m"(gdt));
+	return gdt.address;
+}
+
+static inline uint64_t get_idt_base(void)
+{
+	struct desc_ptr idt;
+	__asm__ __volatile__("sidt %[idt]"
+			     : /* output */ [idt]"=m"(idt));
+	return idt.address;
+}
+
+#define SET_XMM(__var, __xmm) \
+	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+	switch (n) {
+	case 0:
+		SET_XMM(val, xmm0);
+		break;
+	case 1:
+		SET_XMM(val, xmm1);
+		break;
+	case 2:
+		SET_XMM(val, xmm2);
+		break;
+	case 3:
+		SET_XMM(val, xmm3);
+		break;
+	case 4:
+		SET_XMM(val, xmm4);
+		break;
+	case 5:
+		SET_XMM(val, xmm5);
+		break;
+	case 6:
+		SET_XMM(val, xmm6);
+		break;
+	case 7:
+		SET_XMM(val, xmm7);
+		break;
+	}
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+	assert(n >= 0 && n <= 7);
+
+	register v1di xmm0 __asm__("%xmm0");
+	register v1di xmm1 __asm__("%xmm1");
+	register v1di xmm2 __asm__("%xmm2");
+	register v1di xmm3 __asm__("%xmm3");
+	register v1di xmm4 __asm__("%xmm4");
+	register v1di xmm5 __asm__("%xmm5");
+	register v1di xmm6 __asm__("%xmm6");
+	register v1di xmm7 __asm__("%xmm7");
+	switch (n) {
+	case 0:
+		return (unsigned long)xmm0;
+	case 1:
+		return (unsigned long)xmm1;
+	case 2:
+		return (unsigned long)xmm2;
+	case 3:
+		return (unsigned long)xmm3;
+	case 4:
+		return (unsigned long)xmm4;
+	case 5:
+		return (unsigned long)xmm5;
+	case 6:
+		return (unsigned long)xmm6;
+	case 7:
+		return (unsigned long)xmm7;
+	}
+	return 0;
+}
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM          (1UL<<2) /* Emulation */
+#define X86_CR0_TS          (1UL<<3) /* Task Switched */
+#define X86_CR0_ET          (1UL<<4) /* Extension Type */
+#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP          (1UL<<16) /* Write Protect */
+#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG          (1UL<<31) /* Paging */
+
+/*
+ * CPU model specific register (MSR) numbers.
+ */
+
+/* x86-64 specific MSRs */
+#define MSR_EFER		0xc0000080 /* extended feature register */
+#define MSR_STAR		0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR		0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR		0xc0000083 /* compat mode SYSCALL target */
+#define MSR_SYSCALL_MASK	0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_FS_BASE		0xc0000100 /* 64bit FS base */
+#define MSR_GS_BASE		0xc0000101 /* 64bit GS base */
+#define MSR_KERNEL_GS_BASE	0xc0000102 /* SwapGS GS shadow */
+#define MSR_TSC_AUX		0xc0000103 /* Auxiliary TSC */
+
+/* EFER bits: */
+#define EFER_SCE		(1<<0)  /* SYSCALL/SYSRET */
+#define EFER_LME		(1<<8)  /* Long mode enable */
+#define EFER_LMA		(1<<10) /* Long mode active (read-only) */
+#define EFER_NX			(1<<11) /* No execute enable */
+#define EFER_SVME		(1<<12) /* Enable virtualization */
+#define EFER_LMSLE		(1<<13) /* Long Mode Segment Limit Enable */
+#define EFER_FFXSR		(1<<14) /* Enable Fast FXSAVE/FXRSTOR */
+
+/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL			0x0000004e
+#define MSR_PPIN			0x0000004f
+
+#define MSR_IA32_PERFCTR0		0x000000c1
+#define MSR_IA32_PERFCTR1		0x000000c2
+#define MSR_FSB_FREQ			0x000000cd
+#define MSR_PLATFORM_INFO		0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT	31
+#define MSR_PLATFORM_INFO_CPUID_FAULT		BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+
+#define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
+#define NHM_C3_AUTO_DEMOTE		(1UL << 25)
+#define NHM_C1_AUTO_DEMOTE		(1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE		(1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE		(1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
+
+#define MSR_MTRRcap			0x000000fe
+#define MSR_IA32_BBL_CR_CTL		0x00000119
+#define MSR_IA32_BBL_CR_CTL3		0x0000011e
+
+#define MSR_IA32_SYSENTER_CS		0x00000174
+#define MSR_IA32_SYSENTER_ESP		0x00000175
+#define MSR_IA32_SYSENTER_EIP		0x00000176
+
+#define MSR_IA32_MCG_CAP		0x00000179
+#define MSR_IA32_MCG_STATUS		0x0000017a
+#define MSR_IA32_MCG_CTL		0x0000017b
+#define MSR_IA32_MCG_EXT_CTL		0x000004d0
+
+#define MSR_OFFCORE_RSP_0		0x000001a6
+#define MSR_OFFCORE_RSP_1		0x000001a7
+#define MSR_TURBO_RATIO_LIMIT		0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1		0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2		0x000001af
+
+#define MSR_LBR_SELECT			0x000001c8
+#define MSR_LBR_TOS			0x000001c9
+#define MSR_LBR_NHM_FROM		0x00000680
+#define MSR_LBR_NHM_TO			0x000006c0
+#define MSR_LBR_CORE_FROM		0x00000040
+#define MSR_LBR_CORE_TO			0x00000060
+
+#define MSR_LBR_INFO_0			0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED		BIT_ULL(63)
+#define LBR_INFO_IN_TX			BIT_ULL(62)
+#define LBR_INFO_ABORT			BIT_ULL(61)
+#define LBR_INFO_CYCLES			0xffff
+
+#define MSR_IA32_PEBS_ENABLE		0x000003f1
+#define MSR_IA32_DS_AREA		0x00000600
+#define MSR_IA32_PERF_CAPABILITIES	0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
+
+#define MSR_IA32_RTIT_CTL		0x00000570
+#define MSR_IA32_RTIT_STATUS		0x00000571
+#define MSR_IA32_RTIT_ADDR0_A		0x00000580
+#define MSR_IA32_RTIT_ADDR0_B		0x00000581
+#define MSR_IA32_RTIT_ADDR1_A		0x00000582
+#define MSR_IA32_RTIT_ADDR1_B		0x00000583
+#define MSR_IA32_RTIT_ADDR2_A		0x00000584
+#define MSR_IA32_RTIT_ADDR2_B		0x00000585
+#define MSR_IA32_RTIT_ADDR3_A		0x00000586
+#define MSR_IA32_RTIT_ADDR3_B		0x00000587
+#define MSR_IA32_RTIT_CR3_MATCH		0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE	0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK	0x00000561
+
+#define MSR_MTRRfix64K_00000		0x00000250
+#define MSR_MTRRfix16K_80000		0x00000258
+#define MSR_MTRRfix16K_A0000		0x00000259
+#define MSR_MTRRfix4K_C0000		0x00000268
+#define MSR_MTRRfix4K_C8000		0x00000269
+#define MSR_MTRRfix4K_D0000		0x0000026a
+#define MSR_MTRRfix4K_D8000		0x0000026b
+#define MSR_MTRRfix4K_E0000		0x0000026c
+#define MSR_MTRRfix4K_E8000		0x0000026d
+#define MSR_MTRRfix4K_F0000		0x0000026e
+#define MSR_MTRRfix4K_F8000		0x0000026f
+#define MSR_MTRRdefType			0x000002ff
+
+#define MSR_IA32_CR_PAT			0x00000277
+
+#define MSR_IA32_DEBUGCTLMSR		0x000001d9
+#define MSR_IA32_LASTBRANCHFROMIP	0x000001db
+#define MSR_IA32_LASTBRANCHTOIP		0x000001dc
+#define MSR_IA32_LASTINTFROMIP		0x000001dd
+#define MSR_IA32_LASTINTTOIP		0x000001de
+
+/* DEBUGCTLMSR bits (others vary by model): */
+#define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT		1
+#define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_TR			(1UL <<  6)
+#define DEBUGCTLMSR_BTS			(1UL <<  7)
+#define DEBUGCTLMSR_BTINT		(1UL <<  8)
+#define DEBUGCTLMSR_BTS_OFF_OS		(1UL <<  9)
+#define DEBUGCTLMSR_BTS_OFF_USR		(1UL << 10)
+#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI	(1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT	14
+#define DEBUGCTLMSR_FREEZE_IN_SMM	(1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+
+#define MSR_PEBS_FRONTEND		0x000003f7
+
+#define MSR_IA32_POWER_CTL		0x000001fc
+
+#define MSR_IA32_MC0_CTL		0x00000400
+#define MSR_IA32_MC0_STATUS		0x00000401
+#define MSR_IA32_MC0_ADDR		0x00000402
+#define MSR_IA32_MC0_MISC		0x00000403
+
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY		0x000003f8
+#define MSR_PKG_C6_RESIDENCY		0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY	0x000003fa
+#define MSR_PKG_C7_RESIDENCY		0x000003fa
+#define MSR_CORE_C3_RESIDENCY		0x000003fc
+#define MSR_CORE_C6_RESIDENCY		0x000003fd
+#define MSR_CORE_C7_RESIDENCY		0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY	0x000003ff
+#define MSR_PKG_C2_RESIDENCY		0x0000060d
+#define MSR_PKG_C8_RESIDENCY		0x00000630
+#define MSR_PKG_C9_RESIDENCY		0x00000631
+#define MSR_PKG_C10_RESIDENCY		0x00000632
+
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL			0x0000060a
+#define MSR_PKGC6_IRTL			0x0000060b
+#define MSR_PKGC7_IRTL			0x0000060c
+#define MSR_PKGC8_IRTL			0x00000633
+#define MSR_PKGC9_IRTL			0x00000634
+#define MSR_PKGC10_IRTL			0x00000635
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT		0x00000606
+
+#define MSR_PKG_POWER_LIMIT		0x00000610
+#define MSR_PKG_ENERGY_STATUS		0x00000611
+#define MSR_PKG_PERF_STATUS		0x00000613
+#define MSR_PKG_POWER_INFO		0x00000614
+
+#define MSR_DRAM_POWER_LIMIT		0x00000618
+#define MSR_DRAM_ENERGY_STATUS		0x00000619
+#define MSR_DRAM_PERF_STATUS		0x0000061b
+#define MSR_DRAM_POWER_INFO		0x0000061c
+
+#define MSR_PP0_POWER_LIMIT		0x00000638
+#define MSR_PP0_ENERGY_STATUS		0x00000639
+#define MSR_PP0_POLICY			0x0000063a
+#define MSR_PP0_PERF_STATUS		0x0000063b
+
+#define MSR_PP1_POWER_LIMIT		0x00000640
+#define MSR_PP1_ENERGY_STATUS		0x00000641
+#define MSR_PP1_POLICY			0x00000642
+
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL		0x00000648
+#define MSR_CONFIG_TDP_LEVEL_1		0x00000649
+#define MSR_CONFIG_TDP_LEVEL_2		0x0000064A
+#define MSR_CONFIG_TDP_CONTROL		0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
+
+#define MSR_PLATFORM_ENERGY_STATUS	0x0000064D
+
+#define MSR_PKG_WEIGHTED_CORE_C0_RES	0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES		0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES		0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES	0x0000065B
+
+#define MSR_CORE_C1_RES			0x00000660
+#define MSR_MODULE_C6_RES_MS		0x00000664
+
+#define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
+#define MSR_MC6_DEMOTION_POLICY_CONFIG	0x00000669
+
+#define MSR_ATOM_CORE_RATIOS		0x0000066a
+#define MSR_ATOM_CORE_VIDS		0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS	0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS	0x0000066d
+
+
+#define MSR_CORE_PERF_LIMIT_REASONS	0x00000690
+#define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
+#define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
+
+/* Hardware P state interface */
+#define MSR_PPERF			0x0000064e
+#define MSR_PERF_LIMIT_REASONS		0x0000064f
+#define MSR_PM_ENABLE			0x00000770
+#define MSR_HWP_CAPABILITIES		0x00000771
+#define MSR_HWP_REQUEST_PKG		0x00000772
+#define MSR_HWP_INTERRUPT		0x00000773
+#define MSR_HWP_REQUEST			0x00000774
+#define MSR_HWP_STATUS			0x00000777
+
+/* CPUID.6.EAX */
+#define HWP_BASE_BIT			(1<<7)
+#define HWP_NOTIFICATIONS_BIT		(1<<8)
+#define HWP_ACTIVITY_WINDOW_BIT		(1<<9)
+#define HWP_ENERGY_PERF_PREFERENCE_BIT	(1<<10)
+#define HWP_PACKAGE_LEVEL_REQUEST_BIT	(1<<11)
+
+/* IA32_HWP_CAPABILITIES */
+#define HWP_HIGHEST_PERF(x)		(((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x)		(((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x)	(((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x)		(((x) >> 24) & 0xff)
+
+/* IA32_HWP_REQUEST */
+#define HWP_MIN_PERF(x)			(x & 0xff)
+#define HWP_MAX_PERF(x)			((x & 0xff) << 8)
+#define HWP_DESIRED_PERF(x)		((x & 0xff) << 16)
+#define HWP_ENERGY_PERF_PREFERENCE(x)	(((unsigned long long) x & 0xff) << 24)
+#define HWP_EPP_PERFORMANCE		0x00
+#define HWP_EPP_BALANCE_PERFORMANCE	0x80
+#define HWP_EPP_BALANCE_POWERSAVE	0xC0
+#define HWP_EPP_POWERSAVE		0xFF
+#define HWP_ACTIVITY_WINDOW(x)		((unsigned long long)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x)		((unsigned long long)(x & 0x1) << 42)
+
+/* IA32_HWP_STATUS */
+#define HWP_GUARANTEED_CHANGE(x)	(x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM(x)	(x & 0x4)
+
+/* IA32_HWP_INTERRUPT */
+#define HWP_CHANGE_TO_GUARANTEED_INT(x)	(x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM_INT(x)	(x & 0x2)
+
+#define MSR_AMD64_MC0_MASK		0xc0010044
+
+#define MSR_IA32_MCx_CTL(x)		(MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x)		(MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x)		(MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x)		(MSR_IA32_MC0_MISC + 4*(x))
+
+#define MSR_AMD64_MCx_MASK(x)		(MSR_AMD64_MC0_MASK + (x))
+
+/* These are consecutive and not in the normal 4er MCE bank block */
+#define MSR_IA32_MC0_CTL2		0x00000280
+#define MSR_IA32_MCx_CTL2(x)		(MSR_IA32_MC0_CTL2 + (x))
+
+#define MSR_P6_PERFCTR0			0x000000c1
+#define MSR_P6_PERFCTR1			0x000000c2
+#define MSR_P6_EVNTSEL0			0x00000186
+#define MSR_P6_EVNTSEL1			0x00000187
+
+#define MSR_KNC_PERFCTR0               0x00000020
+#define MSR_KNC_PERFCTR1               0x00000021
+#define MSR_KNC_EVNTSEL0               0x00000028
+#define MSR_KNC_EVNTSEL1               0x00000029
+
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0			0x000004c1
+
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
+   complete list. */
+
+#define MSR_AMD64_PATCH_LEVEL		0x0000008b
+#define MSR_AMD64_TSC_RATIO		0xc0000104
+#define MSR_AMD64_NB_CFG		0xc001001f
+#define MSR_AMD64_PATCH_LOADER		0xc0010020
+#define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
+#define MSR_AMD64_OSVW_STATUS		0xc0010141
+#define MSR_AMD64_LS_CFG		0xc0011020
+#define MSR_AMD64_DC_CFG		0xc0011022
+#define MSR_AMD64_BU_CFG2		0xc001102a
+#define MSR_AMD64_IBSFETCHCTL		0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD		0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT	3
+#define MSR_AMD64_IBSFETCH_REG_MASK	((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
+#define MSR_AMD64_IBSOPCTL		0xc0011033
+#define MSR_AMD64_IBSOPRIP		0xc0011034
+#define MSR_AMD64_IBSOPDATA		0xc0011035
+#define MSR_AMD64_IBSOPDATA2		0xc0011036
+#define MSR_AMD64_IBSOPDATA3		0xc0011037
+#define MSR_AMD64_IBSDCLINAD		0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD		0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT	7
+#define MSR_AMD64_IBSOP_REG_MASK	((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
+#define MSR_AMD64_IBSCTL		0xc001103a
+#define MSR_AMD64_IBSBRTARGET		0xc001103b
+#define MSR_AMD64_IBSOPDATA4		0xc001103d
+#define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV			0xc0010131
+#define MSR_AMD64_SEV_ENABLED_BIT	0
+#define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF			0xc00000e9
+
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL		0xc0010230
+#define MSR_F16H_L2I_PERF_CTR		0xc0010231
+#define MSR_F16H_DR1_ADDR_MASK		0xc0011019
+#define MSR_F16H_DR2_ADDR_MASK		0xc001101a
+#define MSR_F16H_DR3_ADDR_MASK		0xc001101b
+#define MSR_F16H_DR0_ADDR_MASK		0xc0011027
+
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL		0xc0010200
+#define MSR_F15H_PERF_CTR		0xc0010201
+#define MSR_F15H_NB_PERF_CTL		0xc0010240
+#define MSR_F15H_NB_PERF_CTR		0xc0010241
+#define MSR_F15H_PTSC			0xc0010280
+#define MSR_F15H_IC_CFG			0xc0011021
+
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE		(1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK	0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
+#define FAM10H_MMIO_CONF_BASE_SHIFT	20
+#define MSR_FAM10H_NODE_ID		0xc001100c
+#define MSR_F10H_DECFG			0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE		BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1			0xc001001a
+#define MSR_K8_TOP_MEM2			0xc001001d
+#define MSR_K8_SYSCFG			0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_K8_INT_PENDING_MSG		0xc0010055
+/* C1E active bits in int pending message */
+#define K8_INTP_C1E_ACTIVE_MASK		0x18000000
+#define MSR_K8_TSEG_ADDR		0xc0010112
+#define MSR_K8_TSEG_MASK		0xc0010113
+#define K8_MTRRFIXRANGE_DRAM_ENABLE	0x00040000 /* MtrrFixDramEn bit    */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY	0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK	0x18181818 /* Mask: RdMem|WrMem    */
+
+/* K7 MSRs */
+#define MSR_K7_EVNTSEL0			0xc0010000
+#define MSR_K7_PERFCTR0			0xc0010004
+#define MSR_K7_EVNTSEL1			0xc0010001
+#define MSR_K7_PERFCTR1			0xc0010005
+#define MSR_K7_EVNTSEL2			0xc0010002
+#define MSR_K7_PERFCTR2			0xc0010006
+#define MSR_K7_EVNTSEL3			0xc0010003
+#define MSR_K7_PERFCTR3			0xc0010007
+#define MSR_K7_CLK_CTL			0xc001001b
+#define MSR_K7_HWCR			0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT		0
+#define MSR_K7_HWCR_SMMLOCK		BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_FID_VID_CTL		0xc0010041
+#define MSR_K7_FID_VID_STATUS		0xc0010042
+
+/* K6 MSRs */
+#define MSR_K6_WHCR			0xc0000082
+#define MSR_K6_UWCCR			0xc0000085
+#define MSR_K6_EPMR			0xc0000086
+#define MSR_K6_PSOR			0xc0000087
+#define MSR_K6_PFIR			0xc0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1			0x00000107
+#define MSR_IDT_FCR2			0x00000108
+#define MSR_IDT_FCR3			0x00000109
+#define MSR_IDT_FCR4			0x0000010a
+
+#define MSR_IDT_MCR0			0x00000110
+#define MSR_IDT_MCR1			0x00000111
+#define MSR_IDT_MCR2			0x00000112
+#define MSR_IDT_MCR3			0x00000113
+#define MSR_IDT_MCR4			0x00000114
+#define MSR_IDT_MCR5			0x00000115
+#define MSR_IDT_MCR6			0x00000116
+#define MSR_IDT_MCR7			0x00000117
+#define MSR_IDT_MCR_CTRL		0x00000120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR			0x00001107
+#define MSR_VIA_LONGHAUL		0x0000110a
+#define MSR_VIA_RNG			0x0000110b
+#define MSR_VIA_BCR2			0x00001147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL		0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS		0x80868011
+#define MSR_TMTA_LRTI_READOUT		0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ		0x8086801a
+
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR		0x00000000
+#define MSR_IA32_P5_MC_TYPE		0x00000001
+#define MSR_IA32_TSC			0x00000010
+#define MSR_IA32_PLATFORM_ID		0x00000017
+#define MSR_IA32_EBL_CR_POWERON		0x0000002a
+#define MSR_EBC_FREQUENCY_ID		0x0000002c
+#define MSR_SMI_COUNT			0x00000034
+#define MSR_IA32_FEATURE_CONTROL        0x0000003a
+#define MSR_IA32_TSC_ADJUST             0x0000003b
+#define MSR_IA32_BNDCFGS		0x00000d90
+
+#define MSR_IA32_BNDCFGS_RSVD		0x00000ffc
+
+#define MSR_IA32_XSS			0x00000da0
+
+#define FEATURE_CONTROL_LOCKED				(1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
+#define FEATURE_CONTROL_LMCE				(1<<20)
+
+#define MSR_IA32_APICBASE		0x0000001b
+#define MSR_IA32_APICBASE_BSP		(1<<8)
+#define MSR_IA32_APICBASE_ENABLE	(1<<11)
+#define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
+
+#define MSR_IA32_TSCDEADLINE		0x000006e0
+
+#define MSR_IA32_UCODE_WRITE		0x00000079
+#define MSR_IA32_UCODE_REV		0x0000008b
+
+#define MSR_IA32_SMM_MONITOR_CTL	0x0000009b
+#define MSR_IA32_SMBASE			0x0000009e
+
+#define MSR_IA32_PERF_STATUS		0x00000198
+#define MSR_IA32_PERF_CTL		0x00000199
+#define INTEL_PERF_CTL_MASK		0xffff
+#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
+#define MSR_AMD_PERF_STATUS		0xc0010063
+#define MSR_AMD_PERF_CTL		0xc0010062
+
+#define MSR_IA32_MPERF			0x000000e7
+#define MSR_IA32_APERF			0x000000e8
+
+#define MSR_IA32_THERM_CONTROL		0x0000019a
+#define MSR_IA32_THERM_INTERRUPT	0x0000019b
+
+#define THERM_INT_HIGH_ENABLE		(1 << 0)
+#define THERM_INT_LOW_ENABLE		(1 << 1)
+#define THERM_INT_PLN_ENABLE		(1 << 24)
+
+#define MSR_IA32_THERM_STATUS		0x0000019c
+
+#define THERM_STATUS_PROCHOT		(1 << 0)
+#define THERM_STATUS_POWER_LIMIT	(1 << 10)
+
+#define MSR_THERM2_CTL			0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT	(1ULL << 16)
+
+#define MSR_IA32_MISC_ENABLE		0x000001a0
+
+#define MSR_IA32_TEMPERATURE_TARGET	0x000001a2
+
+#define MSR_MISC_FEATURE_CONTROL	0x000001a4
+#define MSR_MISC_PWR_MGMT		0x000001aa
+
+#define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
+#define ENERGY_PERF_BIAS_PERFORMANCE		0
+#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE	4
+#define ENERGY_PERF_BIAS_NORMAL			6
+#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE	8
+#define ENERGY_PERF_BIAS_POWERSAVE		15
+
+#define MSR_IA32_PACKAGE_THERM_STATUS		0x000001b1
+
+#define PACKAGE_THERM_STATUS_PROCHOT		(1 << 0)
+#define PACKAGE_THERM_STATUS_POWER_LIMIT	(1 << 10)
+
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT	0x000001b2
+
+#define PACKAGE_THERM_INT_HIGH_ENABLE		(1 << 0)
+#define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
+#define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
+
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
+#define THERM_SHIFT_THRESHOLD0        8
+#define THERM_MASK_THRESHOLD0          (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE    (1 << 23)
+#define THERM_SHIFT_THRESHOLD1        16
+#define THERM_MASK_THRESHOLD1          (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0        (1 << 6)
+#define THERM_LOG_THRESHOLD0           (1 << 7)
+#define THERM_STATUS_THRESHOLD1        (1 << 8)
+#define THERM_LOG_THRESHOLD1           (1 << 9)
+
+/* MISC_ENABLE bits: architectural */
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT		0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING		(1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+#define MSR_IA32_MISC_ENABLE_TCC_BIT			1
+#define MSR_IA32_MISC_ENABLE_TCC			(1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+#define MSR_IA32_MISC_ENABLE_EMON_BIT			7
+#define MSR_IA32_MISC_ENABLE_EMON			(1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT		11
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL		(1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT		12
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL		(1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT	16
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP		(1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+#define MSR_IA32_MISC_ENABLE_MWAIT_BIT			18
+#define MSR_IA32_MISC_ENABLE_MWAIT			(1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT		22
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID		(1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT		23
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT		34
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE			(1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
+
+/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT		2
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT			(1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+#define MSR_IA32_MISC_ENABLE_TM1_BIT			3
+#define MSR_IA32_MISC_ENABLE_TM1			(1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT	4
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT	6
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT		8
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK		(1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT	9
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_BIT			10
+#define MSR_IA32_MISC_ENABLE_FERR			(1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT		10
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX		(1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+#define MSR_IA32_MISC_ENABLE_TM2_BIT			13
+#define MSR_IA32_MISC_ENABLE_TM2			(1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT	19
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT		20
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK		(1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT		24
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT		(1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT	37
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT		38
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT	39
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
+
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES	0x00000140
+
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT	0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT		BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT	1
+
+#define MSR_IA32_TSC_DEADLINE		0x000006E0
+
+/* P4/Xeon+ specific */
+#define MSR_IA32_MCG_EAX		0x00000180
+#define MSR_IA32_MCG_EBX		0x00000181
+#define MSR_IA32_MCG_ECX		0x00000182
+#define MSR_IA32_MCG_EDX		0x00000183
+#define MSR_IA32_MCG_ESI		0x00000184
+#define MSR_IA32_MCG_EDI		0x00000185
+#define MSR_IA32_MCG_EBP		0x00000186
+#define MSR_IA32_MCG_ESP		0x00000187
+#define MSR_IA32_MCG_EFLAGS		0x00000188
+#define MSR_IA32_MCG_EIP		0x00000189
+#define MSR_IA32_MCG_RESERVED		0x0000018a
+
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0		0x00000300
+#define MSR_P4_BPU_PERFCTR1		0x00000301
+#define MSR_P4_BPU_PERFCTR2		0x00000302
+#define MSR_P4_BPU_PERFCTR3		0x00000303
+#define MSR_P4_MS_PERFCTR0		0x00000304
+#define MSR_P4_MS_PERFCTR1		0x00000305
+#define MSR_P4_MS_PERFCTR2		0x00000306
+#define MSR_P4_MS_PERFCTR3		0x00000307
+#define MSR_P4_FLAME_PERFCTR0		0x00000308
+#define MSR_P4_FLAME_PERFCTR1		0x00000309
+#define MSR_P4_FLAME_PERFCTR2		0x0000030a
+#define MSR_P4_FLAME_PERFCTR3		0x0000030b
+#define MSR_P4_IQ_PERFCTR0		0x0000030c
+#define MSR_P4_IQ_PERFCTR1		0x0000030d
+#define MSR_P4_IQ_PERFCTR2		0x0000030e
+#define MSR_P4_IQ_PERFCTR3		0x0000030f
+#define MSR_P4_IQ_PERFCTR4		0x00000310
+#define MSR_P4_IQ_PERFCTR5		0x00000311
+#define MSR_P4_BPU_CCCR0		0x00000360
+#define MSR_P4_BPU_CCCR1		0x00000361
+#define MSR_P4_BPU_CCCR2		0x00000362
+#define MSR_P4_BPU_CCCR3		0x00000363
+#define MSR_P4_MS_CCCR0			0x00000364
+#define MSR_P4_MS_CCCR1			0x00000365
+#define MSR_P4_MS_CCCR2			0x00000366
+#define MSR_P4_MS_CCCR3			0x00000367
+#define MSR_P4_FLAME_CCCR0		0x00000368
+#define MSR_P4_FLAME_CCCR1		0x00000369
+#define MSR_P4_FLAME_CCCR2		0x0000036a
+#define MSR_P4_FLAME_CCCR3		0x0000036b
+#define MSR_P4_IQ_CCCR0			0x0000036c
+#define MSR_P4_IQ_CCCR1			0x0000036d
+#define MSR_P4_IQ_CCCR2			0x0000036e
+#define MSR_P4_IQ_CCCR3			0x0000036f
+#define MSR_P4_IQ_CCCR4			0x00000370
+#define MSR_P4_IQ_CCCR5			0x00000371
+#define MSR_P4_ALF_ESCR0		0x000003ca
+#define MSR_P4_ALF_ESCR1		0x000003cb
+#define MSR_P4_BPU_ESCR0		0x000003b2
+#define MSR_P4_BPU_ESCR1		0x000003b3
+#define MSR_P4_BSU_ESCR0		0x000003a0
+#define MSR_P4_BSU_ESCR1		0x000003a1
+#define MSR_P4_CRU_ESCR0		0x000003b8
+#define MSR_P4_CRU_ESCR1		0x000003b9
+#define MSR_P4_CRU_ESCR2		0x000003cc
+#define MSR_P4_CRU_ESCR3		0x000003cd
+#define MSR_P4_CRU_ESCR4		0x000003e0
+#define MSR_P4_CRU_ESCR5		0x000003e1
+#define MSR_P4_DAC_ESCR0		0x000003a8
+#define MSR_P4_DAC_ESCR1		0x000003a9
+#define MSR_P4_FIRM_ESCR0		0x000003a4
+#define MSR_P4_FIRM_ESCR1		0x000003a5
+#define MSR_P4_FLAME_ESCR0		0x000003a6
+#define MSR_P4_FLAME_ESCR1		0x000003a7
+#define MSR_P4_FSB_ESCR0		0x000003a2
+#define MSR_P4_FSB_ESCR1		0x000003a3
+#define MSR_P4_IQ_ESCR0			0x000003ba
+#define MSR_P4_IQ_ESCR1			0x000003bb
+#define MSR_P4_IS_ESCR0			0x000003b4
+#define MSR_P4_IS_ESCR1			0x000003b5
+#define MSR_P4_ITLB_ESCR0		0x000003b6
+#define MSR_P4_ITLB_ESCR1		0x000003b7
+#define MSR_P4_IX_ESCR0			0x000003c8
+#define MSR_P4_IX_ESCR1			0x000003c9
+#define MSR_P4_MOB_ESCR0		0x000003aa
+#define MSR_P4_MOB_ESCR1		0x000003ab
+#define MSR_P4_MS_ESCR0			0x000003c0
+#define MSR_P4_MS_ESCR1			0x000003c1
+#define MSR_P4_PMH_ESCR0		0x000003ac
+#define MSR_P4_PMH_ESCR1		0x000003ad
+#define MSR_P4_RAT_ESCR0		0x000003bc
+#define MSR_P4_RAT_ESCR1		0x000003bd
+#define MSR_P4_SAAT_ESCR0		0x000003ae
+#define MSR_P4_SAAT_ESCR1		0x000003af
+#define MSR_P4_SSU_ESCR0		0x000003be
+#define MSR_P4_SSU_ESCR1		0x000003bf /* guess: not in manual */
+
+#define MSR_P4_TBPU_ESCR0		0x000003c2
+#define MSR_P4_TBPU_ESCR1		0x000003c3
+#define MSR_P4_TC_ESCR0			0x000003c4
+#define MSR_P4_TC_ESCR1			0x000003c5
+#define MSR_P4_U2L_ESCR0		0x000003b0
+#define MSR_P4_U2L_ESCR1		0x000003b1
+
+#define MSR_P4_PEBS_MATRIX_VERT		0x000003f2
+
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0	0x00000309
+#define MSR_CORE_PERF_FIXED_CTR1	0x0000030a
+#define MSR_CORE_PERF_FIXED_CTR2	0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL	0x0000038d
+#define MSR_CORE_PERF_GLOBAL_STATUS	0x0000038e
+#define MSR_CORE_PERF_GLOBAL_CTRL	0x0000038f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL	0x00000390
+
+/* Geode defined MSRs */
+#define MSR_GEODE_BUSCONT_CONF0		0x00001900
+
+/* Intel VT MSRs */
+#define MSR_IA32_VMX_BASIC              0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS      0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS     0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS          0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS         0x00000484
+#define MSR_IA32_VMX_MISC               0x00000485
+#define MSR_IA32_VMX_CR0_FIXED0         0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1         0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0         0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1         0x00000489
+#define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
+#define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS  0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS      0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS     0x00000490
+#define MSR_IA32_VMX_VMFUNC             0x00000491
+
+/* VMX_BASIC bits and bitmasks */
+#define VMX_BASIC_VMCS_SIZE_SHIFT	32
+#define VMX_BASIC_TRUE_CTLS		(1ULL << 55)
+#define VMX_BASIC_64		0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE_SHIFT	50
+#define VMX_BASIC_MEM_TYPE_MASK	0x003c000000000000LLU
+#define VMX_BASIC_MEM_TYPE_WB	6LLU
+#define VMX_BASIC_INOUT		0x0040000000000000LLU
+
+/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
+/* AMD-V MSRs */
+
+#define MSR_VM_CR                       0xc0010114
+#define MSR_VM_IGNNE                    0xc0010115
+#define MSR_VM_HSAVE_PA                 0xc0010117
+
+#endif /* !SELFTEST_KVM_X86_H */
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000000..c9f5b7d4ce38
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,87 @@
+/*
+ * tools/testing/selftests/kvm/lib/assert.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include "test_util.h"
+
+#include <execinfo.h>
+#include <sys/syscall.h>
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+	/*
+	 * Build and run this command:
+	 *
+	 *	addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+	 *		grep -v test_dump_stack | cat -n 1>&2
+	 *
+	 * Note that the spacing is different and there's no newline.
+	 */
+	size_t i;
+	size_t n = 20;
+	void *stack[n];
+	const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+	const char *pipeline = "|cat -n 1>&2";
+	char cmd[strlen(addr2line) + strlen(pipeline) +
+		 /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+		 n * (((sizeof(void *)) * 2) + 1) +
+		 /* Null terminator: */
+		 1];
+	char *c;
+
+	n = backtrace(stack, n);
+	c = &cmd[0];
+	c += sprintf(c, "%s", addr2line);
+	/*
+	 * Skip the first 3 frames: backtrace, test_dump_stack, and
+	 * test_assert. We hope that backtrace isn't inlined and the other two
+	 * we've declared noinline.
+	 */
+	for (i = 2; i < n; i++)
+		c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+	c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+	system(cmd);
+#pragma GCC diagnostic pop
+}
+
+static pid_t gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+
+void __attribute__((noinline))
+test_assert(bool exp, const char *exp_str,
+	const char *file, unsigned int line, const char *fmt, ...)
+{
+	va_list ap;
+
+	if (!(exp)) {
+		va_start(ap, fmt);
+
+		fprintf(stderr, "==== Test Assertion Failure ====\n"
+			"  %s:%u: %s\n"
+			"  pid=%d tid=%d\n",
+			file, line, exp_str, getpid(), gettid());
+		test_dump_stack();
+		if (fmt) {
+			fputs("  ", stderr);
+			vfprintf(stderr, fmt, ap);
+			fputs("\n", stderr);
+		}
+		va_end(ap);
+
+		exit(254);
+	}
+
+	return;
+}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
new file mode 100644
index 000000000000..5eb857584aa3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -0,0 +1,197 @@
+/*
+ * tools/testing/selftests/kvm/lib/elf.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+#include <bits/endian.h>
+#include <linux/elf.h>
+
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+	off_t offset_rv;
+
+	/* Open the ELF file. */
+	int fd;
+	fd = open(filename, O_RDONLY);
+	TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+		"  filename: %s\n"
+		"  rv: %i errno: %i", filename, fd, errno);
+
+	/* Read in and validate ELF Identification Record.
+	 * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+	 * of the ELF header, which is at the beginning of the ELF file.
+	 * For now it is only safe to read the first EI_NIDENT bytes.  Once
+	 * read and validated, the value of e_ehsize can be used to determine
+	 * the real size of the ELF header.
+	 */
+	unsigned char ident[EI_NIDENT];
+	test_read(fd, ident, sizeof(ident));
+	TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+		&& (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+		"ELF MAGIC Mismatch,\n"
+		"  filename: %s\n"
+		"  ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+		"  Expected: %02x %02x %02x %02x",
+		filename,
+		ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+		ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+	TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+		"Current implementation only able to handle ELFCLASS64,\n"
+		"  filename: %s\n"
+		"  ident[EI_CLASS]: %02x\n"
+		"  expected: %02x",
+		filename,
+		ident[EI_CLASS], ELFCLASS64);
+	TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+			&& (ident[EI_DATA] == ELFDATA2LSB))
+		|| ((BYTE_ORDER == BIG_ENDIAN)
+			&& (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+		"implementation only able to handle\n"
+		"cases where the host and ELF file endianness\n"
+		"is the same:\n"
+		"  host BYTE_ORDER: %u\n"
+		"  host LITTLE_ENDIAN: %u\n"
+		"  host BIG_ENDIAN: %u\n"
+		"  ident[EI_DATA]: %u\n"
+		"  ELFDATA2LSB: %u\n"
+		"  ELFDATA2MSB: %u",
+		BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+		ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+	TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+		"Current implementation only able to handle current "
+		"ELF version,\n"
+		"  filename: %s\n"
+		"  ident[EI_VERSION]: %02x\n"
+		"  expected: %02x",
+		filename, ident[EI_VERSION], EV_CURRENT);
+
+	/* Read in the ELF header.
+	 * With the ELF Identification portion of the ELF header
+	 * validated, especially that the value at EI_VERSION is
+	 * as expected, it is now safe to read the entire ELF header.
+	 */
+	offset_rv = lseek(fd, 0, SEEK_SET);
+	TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+		"  rv: %zi expected: %i", offset_rv, 0);
+	test_read(fd, hdrp, sizeof(*hdrp));
+	TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+		"Unexpected physical header size,\n"
+		"  hdrp->e_phentsize: %x\n"
+		"  expected: %zx",
+		hdrp->e_phentsize, sizeof(Elf64_Phdr));
+	TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+		"Unexpected section header size,\n"
+		"  hdrp->e_shentsize: %x\n"
+		"  expected: %zx",
+		hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ *   filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ *   vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm.  On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+	uint32_t data_memslot, uint32_t pgd_memslot)
+{
+	off_t offset, offset_rv;
+	Elf64_Ehdr hdr;
+
+	/* Open the ELF file. */
+	int fd;
+	fd = open(filename, O_RDONLY);
+	TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+		"  filename: %s\n"
+		"  rv: %i errno: %i", filename, fd, errno);
+
+	/* Read in the ELF header. */
+	elfhdr_get(filename, &hdr);
+
+	/* For each program header.
+	 * The following ELF header members specify the location
+	 * and size of the program headers:
+	 *
+	 *   e_phoff - File offset to start of program headers
+	 *   e_phentsize - Size of each program header
+	 *   e_phnum - Number of program header entries
+	 */
+	for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+		/* Seek to the beginning of the program header. */
+		offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+		offset_rv = lseek(fd, offset, SEEK_SET);
+		TEST_ASSERT(offset_rv == offset,
+			"Failed to seek to begining of program header %u,\n"
+			"  filename: %s\n"
+			"  rv: %jd errno: %i",
+			n1, filename, (intmax_t) offset_rv, errno);
+
+		/* Read in the program header. */
+		Elf64_Phdr phdr;
+		test_read(fd, &phdr, sizeof(phdr));
+
+		/* Skip if this header doesn't describe a loadable segment. */
+		if (phdr.p_type != PT_LOAD)
+			continue;
+
+		/* Allocate memory for this segment within the VM. */
+		TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+			"memsize of 0,\n"
+			"  phdr index: %u p_memsz: 0x%" PRIx64,
+			n1, (uint64_t) phdr.p_memsz);
+		vm_vaddr_t seg_vstart = phdr.p_vaddr;
+		seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+		vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+		seg_vend |= vm->page_size - 1;
+		size_t seg_size = seg_vend - seg_vstart + 1;
+
+		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+			data_memslot, pgd_memslot);
+		TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+			"virtual memory for segment at requested min addr,\n"
+			"  segment idx: %u\n"
+			"  seg_vstart: 0x%lx\n"
+			"  vaddr: 0x%lx",
+			n1, seg_vstart, vaddr);
+		memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+		/* TODO(lhuemill): Set permissions of each memory segment
+		 * based on the least-significant 3 bits of phdr.p_flags.
+		 */
+
+		/* Load portion of initial state that is contained within
+		 * the ELF file.
+		 */
+		if (phdr.p_filesz) {
+			offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+			TEST_ASSERT(offset_rv == phdr.p_offset,
+				"Seek to program segment offset failed,\n"
+				"  program header idx: %u errno: %i\n"
+				"  offset_rv: 0x%jx\n"
+				"  expected: 0x%jx\n",
+				n1, errno, (intmax_t) offset_rv,
+				(intmax_t) phdr.p_offset);
+			test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
+				phdr.p_filesz);
+		}
+	}
+}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
new file mode 100644
index 000000000000..cff869ffe6ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -0,0 +1,158 @@
+/*
+ * tools/testing/selftests/kvm/lib/io.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ *   + Interrupted system call (EINTR)
+ *   + Write of less than requested amount
+ *   + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO).  Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes.  All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ *  fd    - Opened file descriptor to file to be written.
+ *  count - Number of bytes to write.
+ *
+ * Output:
+ *  buf   - Starting address of data to be written.
+ *
+ * Return:
+ *  On success, number of bytes written.
+ *  On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+	ssize_t rc;
+	ssize_t num_written = 0;
+	size_t num_left = count;
+	const char *ptr = buf;
+
+	/* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+	 * write(2) manpage for details.
+	 */
+	TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+	do {
+		rc = write(fd, ptr, num_left);
+
+		switch (rc) {
+		case -1:
+			TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+				    "Unexpected write failure,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			continue;
+
+		case 0:
+			TEST_ASSERT(false, "Unexpected EOF,\n"
+				    "  rc: %zi num_written: %zi num_left: %zu",
+				    rc, num_written, num_left);
+			break;
+
+		default:
+			TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n"
+				"  rc: %zi errno: %i", rc, errno);
+			num_written += rc;
+			num_left -= rc;
+			ptr += rc;
+			break;
+		}
+	} while (num_written < count);
+
+	return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ *   + Interrupted system call (EINTR)
+ *   + Read of less than requested amount
+ *   + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO).  Such errors cause a TEST_ASSERT failure.  Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read.  A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read.  It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes.  All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ *  fd    - Opened file descriptor to file to be read.
+ *  count - Number of bytes to read.
+ *
+ * Output:
+ *  buf   - Starting address of where to write the bytes read.
+ *
+ * Return:
+ *  On success, number of bytes read.
+ *  On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+	ssize_t rc;
+	ssize_t num_read = 0;
+	size_t num_left = count;
+	char *ptr = buf;
+
+	/* Note: Count of zero is allowed (see "If count is zero" portion of
+	 * read(2) manpage for details.
+	 */
+	TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+	do {
+		rc = read(fd, ptr, num_left);
+
+		switch (rc) {
+		case -1:
+			TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+				    "Unexpected read failure,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			break;
+
+		case 0:
+			TEST_ASSERT(false, "Unexpected EOF,\n"
+				    "  rc: %zi num_read: %zi num_left: %zu",
+				    rc, num_read, num_left);
+			break;
+
+		default:
+			TEST_ASSERT(rc > 0, "Unexpected ret from read,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			num_read += rc;
+			num_left -= rc;
+			ptr += rc;
+			break;
+		}
+	} while (num_read < count);
+
+	return num_read;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000000..7ca1bb40c498
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1480 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#define KVM_DEV_PATH "/dev/kvm"
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+#define KVM_UTIL_MIN_PADDR      0x2000
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+	size_t mask = size - 1;
+	TEST_ASSERT(size != 0 && !(size & (size - 1)),
+		    "size not a power of 2: %lu", size);
+	return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/* Capability
+ *
+ * Input Args:
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   On success, the Value corresponding to the capability (KVM_CAP_*)
+ *   specified by the value of cap.  On failure a TEST_ASSERT failure
+ *   is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_check_cap(long cap)
+{
+	int ret;
+	int kvm_fd;
+
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
+		KVM_DEV_PATH, kvm_fd, errno);
+
+	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+	TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+		"  rc: %i errno: %i", ret, errno);
+
+	close(kvm_fd);
+
+	return ret;
+}
+
+/* VM Create
+ *
+ * Input Args:
+ *   mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ *   phy_pages - Physical memory pages
+ *   perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0.  The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+{
+	struct kvm_vm *vm;
+	int kvm_fd;
+
+	/* Allocate memory. */
+	vm = calloc(1, sizeof(*vm));
+	TEST_ASSERT(vm != NULL, "Insufficent Memory");
+
+	vm->mode = mode;
+	kvm_fd = open(KVM_DEV_PATH, perm);
+	TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
+		KVM_DEV_PATH, kvm_fd, errno);
+
+	/* Create VM. */
+	vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+		"rc: %i errno: %i", vm->fd, errno);
+
+	close(kvm_fd);
+
+	/* Setup mode specific traits. */
+	switch (vm->mode) {
+	case VM_MODE_FLAT48PG:
+		vm->page_size = 0x1000;
+		vm->page_shift = 12;
+
+		/* Limit to 48-bit canonical virtual addresses. */
+		vm->vpages_valid = sparsebit_alloc();
+		sparsebit_set_num(vm->vpages_valid,
+			0, (1ULL << (48 - 1)) >> vm->page_shift);
+		sparsebit_set_num(vm->vpages_valid,
+			(~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
+			(1ULL << (48 - 1)) >> vm->page_shift);
+
+		/* Limit physical addresses to 52-bits. */
+		vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+		break;
+
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+	}
+
+	/* Allocate and setup memory for guest. */
+	vm->vpages_mapped = sparsebit_alloc();
+	if (phy_pages != 0)
+		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+					    0, 0, phy_pages, 0);
+
+	return vm;
+}
+
+/* Userspace Memory Region Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   start - Starting VM physical address
+ *   end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive.  If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned.  Null is returned only when no overlapping
+ * region exists.
+ */
+static struct userspace_mem_region *userspace_mem_region_find(
+	struct kvm_vm *vm, uint64_t start, uint64_t end)
+{
+	struct userspace_mem_region *region;
+
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		uint64_t existing_start = region->region.guest_phys_addr;
+		uint64_t existing_end = region->region.guest_phys_addr
+			+ region->region.memory_size - 1;
+		if (start <= existing_end && end >= existing_start)
+			return region;
+	}
+
+	return NULL;
+}
+
+/* KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   start - Starting VM physical address
+ *   end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+				 uint64_t end)
+{
+	struct userspace_mem_region *region;
+
+	region = userspace_mem_region_find(vm, start, end);
+	if (!region)
+		return NULL;
+
+	return &region->region;
+}
+
+/* VCPU Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+	uint32_t vcpuid)
+{
+	struct vcpu *vcpup;
+
+	for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
+		if (vcpup->id == vcpuid)
+			return vcpup;
+	}
+
+	return NULL;
+}
+
+/* VM VCPU Remove
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ */
+static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	int ret = close(vcpu->fd);
+	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
+		"errno: %i", ret, errno);
+
+	if (vcpu->next)
+		vcpu->next->prev = vcpu->prev;
+	if (vcpu->prev)
+		vcpu->prev->next = vcpu->next;
+	else
+		vm->vcpu_head = vcpu->next;
+	free(vcpu);
+}
+
+
+/* Destroys and frees the VM pointed to by vmp.
+ */
+void kvm_vm_free(struct kvm_vm *vmp)
+{
+	int ret;
+
+	if (vmp == NULL)
+		return;
+
+	/* Free userspace_mem_regions. */
+	while (vmp->userspace_mem_region_head) {
+		struct userspace_mem_region *region
+			= vmp->userspace_mem_region_head;
+
+		region->region.memory_size = 0;
+		ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
+			&region->region);
+		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+			"rc: %i errno: %i", ret, errno);
+
+		vmp->userspace_mem_region_head = region->next;
+		sparsebit_free(&region->unused_phy_pages);
+		ret = munmap(region->mmap_start, region->mmap_size);
+		TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
+			    ret, errno);
+
+		free(region);
+	}
+
+	/* Free VCPUs. */
+	while (vmp->vcpu_head)
+		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+	/* Free sparsebit arrays. */
+	sparsebit_free(&vmp->vpages_valid);
+	sparsebit_free(&vmp->vpages_mapped);
+
+	/* Close file descriptor for the VM. */
+	ret = close(vmp->fd);
+	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+
+	/* Free the structure describing the VM. */
+	free(vmp);
+}
+
+/* Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ *   hva - Starting host virtual address
+ *   vm - Virtual Machine
+ *   gva - Starting guest virtual address
+ *   len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ *   Returns 0 if the bytes starting at hva for a length of len
+ *   are equal the guest virtual bytes starting at gva.  Returns
+ *   a value < 0, if bytes at hva are less than those at gva.
+ *   Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hva, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by gva.
+ */
+int kvm_memcmp_hva_gva(void *hva,
+	struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+{
+	size_t amt;
+
+	/* Compare a batch of bytes until either a match is found
+	 * or all the bytes have been compared.
+	 */
+	for (uintptr_t offset = 0; offset < len; offset += amt) {
+		uintptr_t ptr1 = (uintptr_t)hva + offset;
+
+		/* Determine host address for guest virtual address
+		 * at offset.
+		 */
+		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+
+		/* Determine amount to compare on this pass.
+		 * Don't allow the comparsion to cross a page boundary.
+		 */
+		amt = len - offset;
+		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
+			amt = vm->page_size - (ptr1 % vm->page_size);
+		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
+			amt = vm->page_size - (ptr2 % vm->page_size);
+
+		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
+		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+
+		/* Perform the comparison.  If there is a difference
+		 * return that result to the caller, otherwise need
+		 * to continue on looking for a mismatch.
+		 */
+		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
+		if (ret != 0)
+			return ret;
+	}
+
+	/* No mismatch found.  Let the caller know the two memory
+	 * areas are equal.
+	 */
+	return 0;
+}
+
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+	struct kvm_cpuid2 *cpuid;
+	int nent = 100;
+	size_t size;
+
+	size = sizeof(*cpuid);
+	size += nent * sizeof(struct kvm_cpuid_entry2);
+	cpuid = malloc(size);
+	if (!cpuid) {
+		perror("malloc");
+		abort();
+	}
+
+	cpuid->nent = nent;
+
+	return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *   cpuid - The supported KVM CPUID
+ *
+ * Return: void
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+void kvm_get_supported_cpuid(struct kvm_cpuid2 *cpuid)
+{
+	int ret;
+	int kvm_fd;
+
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i",
+		KVM_DEV_PATH, kvm_fd, errno);
+
+	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+		    ret, errno);
+
+	close(kvm_fd);
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ *   cpuid: The cpuid.
+ *   function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+find_cpuid_index_entry(struct kvm_cpuid2 *cpuid, uint32_t function,
+		       uint32_t index)
+{
+	struct kvm_cpuid_entry2 *entry = NULL;
+	int i;
+
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == function &&
+		    cpuid->entries[i].index == index) {
+			entry = &cpuid->entries[i];
+			break;
+		}
+	}
+
+	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+		    function, index);
+	return entry;
+}
+
+/* VM Userspace Memory Region Add
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   backing_src - Storage source for this region.
+ *                 NULL to use anonymous memory.
+ *   guest_paddr - Starting guest physical address
+ *   slot - KVM region slot
+ *   npages - Number of physical pages
+ *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr.  The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+	enum vm_mem_backing_src_type src_type,
+	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+	uint32_t flags)
+{
+	int ret;
+	unsigned long pmem_size = 0;
+	struct userspace_mem_region *region;
+	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+
+	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+		"address not on a page boundary.\n"
+		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
+		guest_paddr, vm->page_size);
+	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+		<= vm->max_gfn, "Physical range beyond maximum "
+		"supported physical address,\n"
+		"  guest_paddr: 0x%lx npages: 0x%lx\n"
+		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		guest_paddr, npages, vm->max_gfn, vm->page_size);
+
+	/* Confirm a mem region with an overlapping address doesn't
+	 * already exist.
+	 */
+	region = (struct userspace_mem_region *) userspace_mem_region_find(
+		vm, guest_paddr, guest_paddr + npages * vm->page_size);
+	if (region != NULL)
+		TEST_ASSERT(false, "overlapping userspace_mem_region already "
+			"exists\n"
+			"  requested guest_paddr: 0x%lx npages: 0x%lx "
+			"page_size: 0x%x\n"
+			"  existing guest_paddr: 0x%lx size: 0x%lx",
+			guest_paddr, npages, vm->page_size,
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size);
+
+	/* Confirm no region with the requested slot already exists. */
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		if (region->region.slot == slot)
+			break;
+		if ((guest_paddr <= (region->region.guest_phys_addr
+				+ region->region.memory_size))
+			&& ((guest_paddr + npages * vm->page_size)
+				>= region->region.guest_phys_addr))
+			break;
+	}
+	if (region != NULL)
+		TEST_ASSERT(false, "A mem region with the requested slot "
+			"or overlapping physical memory range already exists.\n"
+			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
+			slot, guest_paddr, npages,
+			region->region.slot,
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size);
+
+	/* Allocate and initialize new mem region structure. */
+	region = calloc(1, sizeof(*region));
+	TEST_ASSERT(region != NULL, "Insufficient Memory");
+	region->mmap_size = npages * vm->page_size;
+
+	/* Enough memory to align up to a huge page. */
+	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
+		region->mmap_size += huge_page_size;
+	region->mmap_start = mmap(NULL, region->mmap_size,
+				  PROT_READ | PROT_WRITE,
+				  MAP_PRIVATE | MAP_ANONYMOUS
+				  | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+				  -1, 0);
+	TEST_ASSERT(region->mmap_start != MAP_FAILED,
+		    "test_malloc failed, mmap_start: %p errno: %i",
+		    region->mmap_start, errno);
+
+	/* Align THP allocation up to start of a huge page. */
+	region->host_mem = align(region->mmap_start,
+				 src_type == VM_MEM_SRC_ANONYMOUS_THP ?  huge_page_size : 1);
+
+	/* As needed perform madvise */
+	if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+		ret = madvise(region->host_mem, npages * vm->page_size,
+			     src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+		TEST_ASSERT(ret == 0, "madvise failed,\n"
+			    "  addr: %p\n"
+			    "  length: 0x%lx\n"
+			    "  src_type: %x",
+			    region->host_mem, npages * vm->page_size, src_type);
+	}
+
+	region->unused_phy_pages = sparsebit_alloc();
+	sparsebit_set_num(region->unused_phy_pages,
+		guest_paddr >> vm->page_shift, npages);
+	region->region.slot = slot;
+	region->region.flags = flags;
+	region->region.guest_phys_addr = guest_paddr;
+	region->region.memory_size = npages * vm->page_size;
+	region->region.userspace_addr = (uintptr_t) region->host_mem;
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+		"  rc: %i errno: %i\n"
+		"  slot: %u flags: 0x%x\n"
+		"  guest_phys_addr: 0x%lx size: 0x%lx",
+		ret, errno, slot, flags,
+		guest_paddr, (uint64_t) region->region.memory_size);
+
+	/* Add to linked-list of memory regions. */
+	if (vm->userspace_mem_region_head)
+		vm->userspace_mem_region_head->prev = region;
+	region->next = vm->userspace_mem_region_head;
+	vm->userspace_mem_region_head = region;
+}
+
+/* Memslot to region
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to memory region structure that describe memory region
+ *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
+ *   on error (e.g. currently no memory region using memslot as a KVM
+ *   memory slot ID).
+ */
+static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
+	uint32_t memslot)
+{
+	struct userspace_mem_region *region;
+
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		if (region->region.slot == memslot)
+			break;
+	}
+	if (region == NULL) {
+		fprintf(stderr, "No mem region with the requested slot found,\n"
+			"  requested slot: %u\n", memslot);
+		fputs("---- vm dump ----\n", stderr);
+		vm_dump(stderr, vm, 2);
+		TEST_ASSERT(false, "Mem region not found");
+	}
+
+	return region;
+}
+
+/* VM Memory Region Flags Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+{
+	int ret;
+	struct userspace_mem_region *region;
+
+	/* Locate memory region. */
+	region = memslot2region(vm, slot);
+
+	region->region.flags = flags;
+
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+		"  rc: %i errno: %i slot: %u flags: 0x%x",
+		ret, errno, slot, flags);
+}
+
+/* VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+	int dev_fd, ret;
+
+	dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+	TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i",
+		__func__, KVM_DEV_PATH, dev_fd, errno);
+
+	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+	TEST_ASSERT(ret >= sizeof(struct kvm_run),
+		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
+		__func__, ret, errno);
+
+	close(dev_fd);
+
+	return ret;
+}
+
+/* VM VCPU Add
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates and adds to the VM specified by vm and virtual CPU with
+ * the ID given by vcpuid.
+ */
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu;
+
+	/* Confirm a vcpu with the specified id doesn't already exist. */
+	vcpu = vcpu_find(vm, vcpuid);
+	if (vcpu != NULL)
+		TEST_ASSERT(false, "vcpu with the specified id "
+			"already exists,\n"
+			"  requested vcpuid: %u\n"
+			"  existing vcpuid: %u state: %p",
+			vcpuid, vcpu->id, vcpu->state);
+
+	/* Allocate and initialize new vcpu structure. */
+	vcpu = calloc(1, sizeof(*vcpu));
+	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+	vcpu->id = vcpuid;
+	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
+		vcpu->fd, errno);
+
+	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+		vcpu_mmap_sz(), sizeof(*vcpu->state));
+	vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+		"vcpu id: %u errno: %i", vcpuid, errno);
+
+	/* Add to linked-list of VCPUs. */
+	if (vm->vcpu_head)
+		vm->vcpu_head->prev = vcpu;
+	vcpu->next = vm->vcpu_head;
+	vm->vcpu_head = vcpu;
+
+	vcpu_setup(vm, vcpuid);
+}
+
+/* VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   sz - Size (bytes)
+ *   vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Lowest virtual address at or below vaddr_min, with at least
+ *   sz unused bytes.  TEST_ASSERT failure if no area of at least
+ *   size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes.  A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+	vm_vaddr_t vaddr_min)
+{
+	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+
+	/* Determine lowest permitted virtual page index. */
+	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
+	if ((pgidx_start * vm->page_size) < vaddr_min)
+			goto no_va_found;
+
+	/* Loop over section with enough valid virtual page indexes. */
+	if (!sparsebit_is_set_num(vm->vpages_valid,
+		pgidx_start, pages))
+		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
+			pgidx_start, pages);
+	do {
+		/*
+		 * Are there enough unused virtual pages available at
+		 * the currently proposed starting virtual page index.
+		 * If not, adjust proposed starting index to next
+		 * possible.
+		 */
+		if (sparsebit_is_clear_num(vm->vpages_mapped,
+			pgidx_start, pages))
+			goto va_found;
+		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
+			pgidx_start, pages);
+		if (pgidx_start == 0)
+			goto no_va_found;
+
+		/*
+		 * If needed, adjust proposed starting virtual address,
+		 * to next range of valid virtual addresses.
+		 */
+		if (!sparsebit_is_set_num(vm->vpages_valid,
+			pgidx_start, pages)) {
+			pgidx_start = sparsebit_next_set_num(
+				vm->vpages_valid, pgidx_start, pages);
+			if (pgidx_start == 0)
+				goto no_va_found;
+		}
+	} while (pgidx_start != 0);
+
+no_va_found:
+	TEST_ASSERT(false, "No vaddr of specified pages available, "
+		"pages: 0x%lx", pages);
+
+	/* NOT REACHED */
+	return -1;
+
+va_found:
+	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
+		pgidx_start, pages),
+		"Unexpected, invalid virtual page index range,\n"
+		"  pgidx_start: 0x%lx\n"
+		"  pages: 0x%lx",
+		pgidx_start, pages);
+	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
+		pgidx_start, pages),
+		"Unexpected, pages already mapped,\n"
+		"  pgidx_start: 0x%lx\n"
+		"  pages: 0x%lx",
+		pgidx_start, pages);
+
+	return pgidx_start * vm->page_size;
+}
+
+/* VM Virtual Address Allocate
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   sz - Size in bytes
+ *   vaddr_min - Minimum starting virtual address
+ *   data_memslot - Memory region slot for data pages
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm.  The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min.  Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+	uint32_t data_memslot, uint32_t pgd_memslot)
+{
+	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+	virt_pgd_alloc(vm, pgd_memslot);
+
+	/* Find an unused range of virtual page addresses of at least
+	 * pages in length.
+	 */
+	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+	/* Map the virtual pages. */
+	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+		pages--, vaddr += vm->page_size) {
+		vm_paddr_t paddr;
+
+		paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+
+		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+
+		sparsebit_set(vm->vpages_mapped,
+			vaddr >> vm->page_shift);
+	}
+
+	return vaddr_start;
+}
+
+/* Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by gpa, within the VM given by vm.  When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing gpa exists.
+ */
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	struct userspace_mem_region *region;
+	for (region = vm->userspace_mem_region_head; region;
+	     region = region->next) {
+		if ((gpa >= region->region.guest_phys_addr)
+			&& (gpa <= (region->region.guest_phys_addr
+				+ region->region.memory_size - 1)))
+			return (void *) ((uintptr_t) region->host_mem
+				+ (gpa - region->region.guest_phys_addr));
+	}
+
+	TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+	return NULL;
+}
+
+/* Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   hva - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hva, within the VM given by vm.  When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hva exists.
+ */
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+{
+	struct userspace_mem_region *region;
+	for (region = vm->userspace_mem_region_head; region;
+	     region = region->next) {
+		if ((hva >= region->host_mem)
+			&& (hva <= (region->host_mem
+				+ region->region.memory_size - 1)))
+			return (vm_paddr_t) ((uintptr_t)
+				region->region.guest_phys_addr
+				+ (hva - (uintptr_t) region->host_mem));
+	}
+
+	TEST_ASSERT(false, "No mapping to a guest physical address, "
+		"hva: %p", hva);
+	return -1;
+}
+
+/* VM Create IRQ Chip
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(struct kvm_vm *vm)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU State
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	return vcpu->state;
+}
+
+/* VM VCPU Run
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	int ret = _vcpu_run(vm, vcpuid);
+	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int rc;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+        do {
+		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+	} while (rc == -1 && errno == EINTR);
+	return rc;
+}
+
+/* VM VCPU Set MP State
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+	struct kvm_mp_state *mp_state)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU Regs Get
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/* VM VCPU Regs Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Set the regs. */
+	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Set the regs. */
+	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   num - number of arguments
+ *   ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args.  Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+
+	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	if (num >= 1)
+		regs.rdi = va_arg(ap, uint64_t);
+
+	if (num >= 2)
+		regs.rsi = va_arg(ap, uint64_t);
+
+	if (num >= 3)
+		regs.rdx = va_arg(ap, uint64_t);
+
+	if (num >= 4)
+		regs.rcx = va_arg(ap, uint64_t);
+
+	if (num >= 5)
+		regs.r8 = va_arg(ap, uint64_t);
+
+	if (num >= 6)
+		regs.r9 = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
+/* VM VCPU System Regs Get
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	/* Get the regs. */
+	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/* VM VCPU System Regs Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
+	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+}
+
+/* VCPU Ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(struct kvm_vm *vm,
+	uint32_t vcpuid, unsigned long cmd, void *arg)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, cmd, arg);
+	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
+		cmd, ret, errno, strerror(errno));
+}
+
+/* VM Ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, cmd, arg);
+	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
+		cmd, ret, errno, strerror(errno));
+}
+
+/* VM Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	struct userspace_mem_region *region;
+	struct vcpu *vcpu;
+
+	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+	fprintf(stream, "%*sMem Regions:\n", indent, "");
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+			"host_virt: %p\n", indent + 2, "",
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size,
+			region->host_mem);
+		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+		sparsebit_dump(stream, region->unused_phy_pages, 0);
+	}
+	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+	fprintf(stream, "%*spgd_created: %u\n", indent, "",
+		vm->pgd_created);
+	if (vm->pgd_created) {
+		fprintf(stream, "%*sVirtual Translation Tables:\n",
+			indent + 2, "");
+		virt_dump(stream, vm, indent + 4);
+	}
+	fprintf(stream, "%*sVCPUs:\n", indent, "");
+	for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+		vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* VM VCPU Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+	uint32_t vcpuid, uint8_t indent)
+{
+		struct kvm_regs regs;
+		struct kvm_sregs sregs;
+
+		fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+		fprintf(stream, "%*sregs:\n", indent + 2, "");
+		vcpu_regs_get(vm, vcpuid, &regs);
+		regs_dump(stream, &regs, indent + 4);
+
+		fprintf(stream, "%*ssregs:\n", indent + 2, "");
+		vcpu_sregs_get(vm, vcpuid, &sregs);
+		sregs_dump(stream, &sregs, indent + 4);
+}
+
+/* Known KVM exit reasons */
+static struct exit_reason {
+	unsigned int reason;
+	const char *name;
+} exit_reasons_known[] = {
+	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
+	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
+	{KVM_EXIT_IO, "IO"},
+	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
+	{KVM_EXIT_DEBUG, "DEBUG"},
+	{KVM_EXIT_HLT, "HLT"},
+	{KVM_EXIT_MMIO, "MMIO"},
+	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+	{KVM_EXIT_INTR, "INTR"},
+	{KVM_EXIT_SET_TPR, "SET_TPR"},
+	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+	{KVM_EXIT_S390_RESET, "S390_RESET"},
+	{KVM_EXIT_DCR, "DCR"},
+	{KVM_EXIT_NMI, "NMI"},
+	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+	{KVM_EXIT_OSI, "OSI"},
+	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/* Exit Reason String
+ *
+ * Input Args:
+ *   exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason.  If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+	unsigned int n1;
+
+	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+		if (exit_reason == exit_reasons_known[n1].reason)
+			return exit_reasons_known[n1].name;
+	}
+
+	return "Unknown";
+}
+
+/* Physical Page Allocate
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   paddr_min - Physical address minimum
+ *   memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting physical address
+ *
+ * Within the VM specified by vm, locates an available physical page
+ * at or above paddr_min.  If found, the page is marked as in use
+ * and its address is returned.  A TEST_ASSERT failure occurs if no
+ * page is available at or above paddr_min.
+ */
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+	vm_paddr_t paddr_min, uint32_t memslot)
+{
+	struct userspace_mem_region *region;
+	sparsebit_idx_t pg;
+
+	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+		"not divisable by page size.\n"
+		"  paddr_min: 0x%lx page_size: 0x%x",
+		paddr_min, vm->page_size);
+
+	/* Locate memory region. */
+	region = memslot2region(vm, memslot);
+
+	/* Locate next available physical page at or above paddr_min. */
+	pg = paddr_min >> vm->page_shift;
+
+	if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+		pg = sparsebit_next_set(region->unused_phy_pages, pg);
+		if (pg == 0) {
+			fprintf(stderr, "No guest physical page available, "
+				"paddr_min: 0x%lx page_size: 0x%x memslot: %u",
+				paddr_min, vm->page_size, memslot);
+			fputs("---- vm dump ----\n", stderr);
+			vm_dump(stderr, vm, 2);
+			abort();
+		}
+	}
+
+	/* Specify page as in use and return its address. */
+	sparsebit_clear(region->unused_phy_pages, pg);
+
+	return pg * vm->page_size;
+}
+
+/* Address Guest Virtual to Host Virtual
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent host virtual address
+ */
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000000..a0bd1980c81c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,67 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#ifndef KVM_UTIL_INTERNAL_H
+#define KVM_UTIL_INTERNAL_H 1
+
+#include "sparsebit.h"
+
+#ifndef BITS_PER_BYTE
+#define BITS_PER_BYTE           8
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#endif
+
+#define DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+/* Concrete definition of struct kvm_vm. */
+struct userspace_mem_region {
+	struct userspace_mem_region *next, *prev;
+	struct kvm_userspace_memory_region region;
+	struct sparsebit *unused_phy_pages;
+	int fd;
+	off_t offset;
+	void *host_mem;
+	void *mmap_start;
+	size_t mmap_size;
+};
+
+struct vcpu {
+	struct vcpu *next, *prev;
+	uint32_t id;
+	int fd;
+	struct kvm_run *state;
+};
+
+struct kvm_vm {
+	int mode;
+	int fd;
+	unsigned int page_size;
+	unsigned int page_shift;
+	uint64_t max_gfn;
+	struct vcpu *vcpu_head;
+	struct userspace_mem_region *userspace_mem_region_head;
+	struct sparsebit *vpages_valid;
+	struct sparsebit *vpages_mapped;
+	bool pgd_created;
+	vm_paddr_t pgd;
+};
+
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+	uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid);
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+	uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+	uint8_t indent);
+
+#endif
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000000..0c5cf3e0cb6f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2087 @@
+/*
+ * Sparse bit array
+ *
+ * Copyright (C) 2018, Google LLC.
+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64.  A sparsebit array is allocated through
+ * the use sparsebit_alloc() and free'd via sparsebit_free(),
+ * such as in the following:
+ *
+ *   struct sparsebit *s;
+ *   s = sparsebit_alloc();
+ *   sparsebit_free(&s);
+ *
+ * The struct sparsebit type resolves down to a struct sparsebit.
+ * Note that, sparsebit_free() takes a pointer to the sparsebit
+ * structure.  This is so that sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of sparsebit_alloc() and the call of
+ * sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated sparsebit array.  All of
+ * these operations take as a parameter the value returned from
+ * sparsebit_alloc() and most also take a bit index.  Frequently
+ * used routines include:
+ *
+ *  ---- Query Operations
+ *  sparsebit_is_set(s, idx)
+ *  sparsebit_is_clear(s, idx)
+ *  sparsebit_any_set(s)
+ *  sparsebit_first_set(s)
+ *  sparsebit_next_set(s, prev_idx)
+ *
+ *  ---- Modifying Operations
+ *  sparsebit_set(s, idx)
+ *  sparsebit_clear(s, idx)
+ *  sparsebit_set_num(s, idx, num);
+ *  sparsebit_clear_num(s, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array.  This can be done via code with the following structure:
+ *
+ *   sparsebit_idx_t idx;
+ *   if (sparsebit_any_set(s)) {
+ *     idx = sparsebit_first_set(s);
+ *     do {
+ *       ...
+ *       idx = sparsebit_next_set(s, idx);
+ *     } while (idx != 0);
+ *   }
+ *
+ * The index of the first bit set needs to be obtained via
+ * sparsebit_first_set(), because sparsebit_next_set(), needs
+ * the index of the previously set.  The sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set.  This is because sparsebit_first_set()
+ * aborts if sparsebit_first_set() is called with no bits set.
+ * It is the callers responsibility to assure that the
+ * sparsebit array has at least a single bit set before calling
+ * sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of sparsebit is
+ * opaque to the caller.  One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation.  This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set.  It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ *   typedef uint64_t sparsebit_idx_t;
+ *   typedef uint64_t sparsebit_num_t;
+ *
+ *   sparsebit_idx_t idx;
+ *   uint32_t mask;
+ *   sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after.  The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set.  For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ *   node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ *   node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ *   + Node are only used to represent bits that are set.
+ *     Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ *   + Sum of bits set in all the nodes is equal to the value of
+ *     the struct sparsebit_pvt num_set member.
+ *
+ *   + The setting of at least one bit is always described in a nodes
+ *     mask (mask >= 1).
+ *
+ *   + A node with all mask bits set only occurs when the last bit
+ *     described by the previous node is not equal to this nodes
+ *     starting index - 1.  All such occurences of this condition are
+ *     avoided by moving the setting of the nodes mask bits into
+ *     the previous nodes num_after setting.
+ *
+ *   + Node starting index is evenly divisable by the number of bits
+ *     within a nodes mask member.
+ *
+ *   + Nodes never represent a range of bits that wrap around the
+ *     highest supported index.
+ *
+ *      (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
+ *
+ *     As a consequence of the above, the num_after member of a node
+ *     will always be <=:
+ *
+ *       maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ *   + Nodes within the binary search tree are sorted based on each
+ *     nodes starting index.
+ *
+ *   + The range of bits described by any two nodes do not overlap.  The
+ *     range of bits described by a single node is:
+ *
+ *       start: node->idx
+ *       end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code.  For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct sparsebit_pvt num_set member is updated.  Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists.  At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set.  Such temporary violations must be corrected before
+ * returning to the caller.  These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include "test_util.h"
+#include "sparsebit.h"
+#include <limits.h>
+#include <assert.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
+
+struct node {
+	struct node *parent;
+	struct node *left;
+	struct node *right;
+	sparsebit_idx_t idx; /* index of least-significant bit in mask */
+	sparsebit_num_t num_after; /* num contiguously set after mask */
+	mask_t mask;
+};
+
+struct sparsebit {
+	/*
+	 * Points to root node of the binary search
+	 * tree.  Equal to NULL when no bits are set in
+	 * the entire sparsebit array.
+	 */
+	struct node *root;
+
+	/*
+	 * A redundant count of the total number of bits set.  Used for
+	 * diagnostic purposes and to change the time complexity of
+	 * sparsebit_num_set() from O(n) to O(1).
+	 * Note: Due to overflow, a value of 0 means none or all set.
+	 */
+	sparsebit_num_t num_set;
+};
+
+/* Returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static sparsebit_num_t node_num_set(struct node *nodep)
+{
+	return nodep->num_after + __builtin_popcount(nodep->mask);
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static struct node *node_first(struct sparsebit *s)
+{
+	struct node *nodep;
+
+	for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+		;
+
+	return nodep;
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index > the index of the node pointed to by np.
+ * Returns NULL if no node with a higher index exists.
+ */
+static struct node *node_next(struct sparsebit *s, struct node *np)
+{
+	struct node *nodep = np;
+
+	/*
+	 * If current node has a right child, next node is the left-most
+	 * of the right child.
+	 */
+	if (nodep->right) {
+		for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+			;
+		return nodep;
+	}
+
+	/*
+	 * No right child.  Go up until node is left child of a parent.
+	 * That parent is then the next node.
+	 */
+	while (nodep->parent && nodep == nodep->parent->right)
+		nodep = nodep->parent;
+
+	return nodep->parent;
+}
+
+/* Searches for and returns a pointer to the node that describes the
+ * highest index < the index of the node pointed to by np.
+ * Returns NULL if no node with a lower index exists.
+ */
+static struct node *node_prev(struct sparsebit *s, struct node *np)
+{
+	struct node *nodep = np;
+
+	/*
+	 * If current node has a left child, next node is the right-most
+	 * of the left child.
+	 */
+	if (nodep->left) {
+		for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+			;
+		return (struct node *) nodep;
+	}
+
+	/*
+	 * No left child.  Go up until node is right child of a parent.
+	 * That parent is then the next node.
+	 */
+	while (nodep->parent && nodep == nodep->parent->left)
+		nodep = nodep->parent;
+
+	return (struct node *) nodep->parent;
+}
+
+
+/* Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * Returns the newly allocated copy of subtree.
+ */
+static struct node *node_copy_subtree(struct node *subtree)
+{
+	struct node *root;
+
+	/* Duplicate the node at the root of the subtree */
+	root = calloc(1, sizeof(*root));
+	if (!root) {
+		perror("calloc");
+		abort();
+	}
+
+	root->idx = subtree->idx;
+	root->mask = subtree->mask;
+	root->num_after = subtree->num_after;
+
+	/* As needed, recursively duplicate the left and right subtrees */
+	if (subtree->left) {
+		root->left = node_copy_subtree(subtree->left);
+		root->left->parent = root;
+	}
+
+	if (subtree->right) {
+		root->right = node_copy_subtree(subtree->right);
+		root->right->parent = root;
+	}
+
+	return root;
+}
+
+/* Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx.  A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask.  Returns NULL if there is no such node.
+ */
+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Find the node that describes the setting of the bit at idx */
+	for (nodep = s->root; nodep;
+	     nodep = nodep->idx > idx ? nodep->left : nodep->right) {
+		if (idx >= nodep->idx &&
+		    idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+			break;
+	}
+
+	return nodep;
+}
+
+/* Entry Requirements:
+ *   + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx.  Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
+ */
+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep, *parentp, *prev;
+
+	/* Allocate and initialize the new node. */
+	nodep = calloc(1, sizeof(*nodep));
+	if (!nodep) {
+		perror("calloc");
+		abort();
+	}
+
+	nodep->idx = idx & -MASK_BITS;
+
+	/* If no nodes, set it up as the root node. */
+	if (!s->root) {
+		s->root = nodep;
+		return nodep;
+	}
+
+	/*
+	 * Find the parent where the new node should be attached
+	 * and add the node there.
+	 */
+	parentp = s->root;
+	while (true) {
+		if (idx < parentp->idx) {
+			if (!parentp->left) {
+				parentp->left = nodep;
+				nodep->parent = parentp;
+				break;
+			}
+			parentp = parentp->left;
+		} else {
+			assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
+			if (!parentp->right) {
+				parentp->right = nodep;
+				nodep->parent = parentp;
+				break;
+			}
+			parentp = parentp->right;
+		}
+	}
+
+	/*
+	 * Does num_after bits of previous node overlap with the mask
+	 * of the new node?  If so set the bits in the new nodes mask
+	 * and reduce the previous nodes num_after.
+	 */
+	prev = node_prev(s, nodep);
+	while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
+		unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+			- nodep->idx;
+		assert(prev->num_after > 0);
+		assert(n1 < MASK_BITS);
+		assert(!(nodep->mask & (1 << n1)));
+		nodep->mask |= (1 << n1);
+		prev->num_after--;
+	}
+
+	return nodep;
+}
+
+/* Returns whether all the bits in the sparsebit array are set.  */
+bool sparsebit_all_set(struct sparsebit *s)
+{
+	/*
+	 * If any nodes there must be at least one bit set.  Only case
+	 * where a bit is set and total num set is 0, is when all bits
+	 * are set.
+	 */
+	return s->root && s->num_set == 0;
+}
+
+/* Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(struct sparsebit *s, struct node *nodep)
+{
+	struct node *tmp;
+	sparsebit_num_t num_set;
+
+	num_set = node_num_set(nodep);
+	assert(s->num_set >= num_set || sparsebit_all_set(s));
+	s->num_set -= node_num_set(nodep);
+
+	/* Have both left and right child */
+	if (nodep->left && nodep->right) {
+		/*
+		 * Move left children to the leftmost leaf node
+		 * of the right child.
+		 */
+		for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+			;
+		tmp->left = nodep->left;
+		nodep->left = NULL;
+		tmp->left->parent = tmp;
+	}
+
+	/* Left only child */
+	if (nodep->left) {
+		if (!nodep->parent) {
+			s->root = nodep->left;
+			nodep->left->parent = NULL;
+		} else {
+			nodep->left->parent = nodep->parent;
+			if (nodep == nodep->parent->left)
+				nodep->parent->left = nodep->left;
+			else {
+				assert(nodep == nodep->parent->right);
+				nodep->parent->right = nodep->left;
+			}
+		}
+
+		nodep->parent = nodep->left = nodep->right = NULL;
+		free(nodep);
+
+		return;
+	}
+
+
+	/* Right only child */
+	if (nodep->right) {
+		if (!nodep->parent) {
+			s->root = nodep->right;
+			nodep->right->parent = NULL;
+		} else {
+			nodep->right->parent = nodep->parent;
+			if (nodep == nodep->parent->left)
+				nodep->parent->left = nodep->right;
+			else {
+				assert(nodep == nodep->parent->right);
+				nodep->parent->right = nodep->right;
+			}
+		}
+
+		nodep->parent = nodep->left = nodep->right = NULL;
+		free(nodep);
+
+		return;
+	}
+
+	/* Leaf Node */
+	if (!nodep->parent) {
+		s->root = NULL;
+	} else {
+		if (nodep->parent->left == nodep)
+			nodep->parent->left = NULL;
+		else {
+			assert(nodep == nodep->parent->right);
+			nodep->parent->right = NULL;
+		}
+	}
+
+	nodep->parent = nodep->left = nodep->right = NULL;
+	free(nodep);
+
+	return;
+}
+
+/* Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index.  If no such node exists, a new
+ * node at the specified index is created.  Returns the new node.
+ *
+ * idx must start of a mask boundary.
+ */
+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep1, *nodep2;
+	sparsebit_idx_t offset;
+	sparsebit_num_t orig_num_after;
+
+	assert(!(idx % MASK_BITS));
+
+	/*
+	 * Is there a node that describes the setting of idx?
+	 * If not, add it.
+	 */
+	nodep1 = node_find(s, idx);
+	if (!nodep1)
+		return node_add(s, idx);
+
+	/*
+	 * All done if the starting index of the node is where the
+	 * split should occur.
+	 */
+	if (nodep1->idx == idx)
+		return nodep1;
+
+	/*
+	 * Split point not at start of mask, so it must be part of
+	 * bits described by num_after.
+	 */
+
+	/*
+	 * Calculate offset within num_after for where the split is
+	 * to occur.
+	 */
+	offset = idx - (nodep1->idx + MASK_BITS);
+	orig_num_after = nodep1->num_after;
+
+	/*
+	 * Add a new node to describe the bits starting at
+	 * the split point.
+	 */
+	nodep1->num_after = offset;
+	nodep2 = node_add(s, idx);
+
+	/* Move bits after the split point into the new node */
+	nodep2->num_after = orig_num_after - offset;
+	if (nodep2->num_after >= MASK_BITS) {
+		nodep2->mask = ~(mask_t) 0;
+		nodep2->num_after -= MASK_BITS;
+	} else {
+		nodep2->mask = (1 << nodep2->num_after) - 1;
+		nodep2->num_after = 0;
+	}
+
+	return nodep2;
+}
+
+/* Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form.  For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes.  Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction.  Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered.  It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap.  Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes.  This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ *   + Node are only used to represent bits that are set.
+ *     Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ *   + The setting of at least one bit is always described in a nodes
+ *     mask (mask >= 1).
+ *
+ *   + A node with all mask bits set only occurs when the last bit
+ *     described by the previous node is not equal to this nodes
+ *     starting index - 1.  All such occurences of this condition are
+ *     avoided by moving the setting of the nodes mask bits into
+ *     the previous nodes num_after setting.
+ */
+static void node_reduce(struct sparsebit *s, struct node *nodep)
+{
+	bool reduction_performed;
+
+	do {
+		reduction_performed = false;
+		struct node *prev, *next, *tmp;
+
+		/* 1) Potential reductions within the current node. */
+
+		/* Nodes with all bits cleared may be removed. */
+		if (nodep->mask == 0 && nodep->num_after == 0) {
+			/*
+			 * About to remove the node pointed to by
+			 * nodep, which normally would cause a problem
+			 * for the next pass through the reduction loop,
+			 * because the node at the starting point no longer
+			 * exists.  This potential problem is handled
+			 * by first remembering the location of the next
+			 * or previous nodes.  Doesn't matter which, because
+			 * once the node at nodep is removed, there will be
+			 * no other nodes between prev and next.
+			 *
+			 * Note, the checks performed on nodep against both
+			 * both prev and next both check for an adjacent
+			 * node that can be reduced into a single node.  As
+			 * such, after removing the node at nodep, doesn't
+			 * matter whether the nodep for the next pass
+			 * through the loop is equal to the previous pass
+			 * prev or next node.  Either way, on the next pass
+			 * the one not selected will become either the
+			 * prev or next node.
+			 */
+			tmp = node_next(s, nodep);
+			if (!tmp)
+				tmp = node_prev(s, nodep);
+
+			node_rm(s, nodep);
+			nodep = NULL;
+
+			nodep = tmp;
+			reduction_performed = true;
+			continue;
+		}
+
+		/*
+		 * When the mask is 0, can reduce the amount of num_after
+		 * bits by moving the initial num_after bits into the mask.
+		 */
+		if (nodep->mask == 0) {
+			assert(nodep->num_after != 0);
+			assert(nodep->idx + MASK_BITS > nodep->idx);
+
+			nodep->idx += MASK_BITS;
+
+			if (nodep->num_after >= MASK_BITS) {
+				nodep->mask = ~0;
+				nodep->num_after -= MASK_BITS;
+			} else {
+				nodep->mask = (1u << nodep->num_after) - 1;
+				nodep->num_after = 0;
+			}
+
+			reduction_performed = true;
+			continue;
+		}
+
+		/*
+		 * 2) Potential reductions between the current and
+		 * previous nodes.
+		 */
+		prev = node_prev(s, nodep);
+		if (prev) {
+			sparsebit_idx_t prev_highest_bit;
+
+			/* Nodes with no bits set can be removed. */
+			if (prev->mask == 0 && prev->num_after == 0) {
+				node_rm(s, prev);
+
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * All mask bits set and previous node has
+			 * adjacent index.
+			 */
+			if (nodep->mask + 1 == 0 &&
+			    prev->idx + MASK_BITS == nodep->idx) {
+				prev->num_after += MASK_BITS + nodep->num_after;
+				nodep->mask = 0;
+				nodep->num_after = 0;
+
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * Is node adjacent to previous node and the node
+			 * contains a single contiguous range of bits
+			 * starting from the beginning of the mask?
+			 */
+			prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
+			if (prev_highest_bit + 1 == nodep->idx &&
+			    (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
+				/*
+				 * How many contiguous bits are there?
+				 * Is equal to the total number of set
+				 * bits, due to an earlier check that
+				 * there is a single contiguous range of
+				 * set bits.
+				 */
+				unsigned int num_contiguous
+					= __builtin_popcount(nodep->mask);
+				assert((num_contiguous > 0) &&
+				       ((1ULL << num_contiguous) - 1) == nodep->mask);
+
+				prev->num_after += num_contiguous;
+				nodep->mask = 0;
+
+				/*
+				 * For predictable performance, handle special
+				 * case where all mask bits are set and there
+				 * is a non-zero num_after setting.  This code
+				 * is functionally correct without the following
+				 * conditionalized statements, but without them
+				 * the value of num_after is only reduced by
+				 * the number of mask bits per pass.  There are
+				 * cases where num_after can be close to 2^64.
+				 * Without this code it could take nearly
+				 * (2^64) / 32 passes to perform the full
+				 * reduction.
+				 */
+				if (num_contiguous == MASK_BITS) {
+					prev->num_after += nodep->num_after;
+					nodep->num_after = 0;
+				}
+
+				reduction_performed = true;
+				continue;
+			}
+		}
+
+		/*
+		 * 3) Potential reductions between the current and
+		 * next nodes.
+		 */
+		next = node_next(s, nodep);
+		if (next) {
+			/* Nodes with no bits set can be removed. */
+			if (next->mask == 0 && next->num_after == 0) {
+				node_rm(s, next);
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * Is next node index adjacent to current node
+			 * and has a mask with all bits set?
+			 */
+			if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
+			    next->mask == ~(mask_t) 0) {
+				nodep->num_after += MASK_BITS;
+				next->mask = 0;
+				nodep->num_after += next->num_after;
+				next->num_after = 0;
+
+				node_rm(s, next);
+				next = NULL;
+
+				reduction_performed = true;
+				continue;
+			}
+		}
+	} while (nodep && reduction_performed);
+}
+
+/* Returns whether the bit at the index given by idx, within the
+ * sparsebit array is set or not.
+ */
+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Find the node that describes the setting of the bit at idx */
+	for (nodep = s->root; nodep;
+	     nodep = nodep->idx > idx ? nodep->left : nodep->right)
+		if (idx >= nodep->idx &&
+		    idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+			goto have_node;
+
+	return false;
+
+have_node:
+	/* Bit is set if it is any of the bits described by num_after */
+	if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
+		return true;
+
+	/* Is the corresponding mask bit set */
+	assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
+	return !!(nodep->mask & (1 << (idx - nodep->idx)));
+}
+
+/* Within the sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Skip bits that are already set */
+	if (sparsebit_is_set(s, idx))
+		return;
+
+	/*
+	 * Get a node where the bit at idx is described by the mask.
+	 * The node_split will also create a node, if there isn't
+	 * already a node that describes the setting of bit.
+	 */
+	nodep = node_split(s, idx & -MASK_BITS);
+
+	/* Set the bit within the nodes mask */
+	assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+	assert(!(nodep->mask & (1 << (idx - nodep->idx))));
+	nodep->mask |= 1 << (idx - nodep->idx);
+	s->num_set++;
+
+	node_reduce(s, nodep);
+}
+
+/* Within the sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Skip bits that are already cleared */
+	if (!sparsebit_is_set(s, idx))
+		return;
+
+	/* Is there a node that describes the setting of this bit? */
+	nodep = node_find(s, idx);
+	if (!nodep)
+		return;
+
+	/*
+	 * If a num_after bit, split the node, so that the bit is
+	 * part of a node mask.
+	 */
+	if (idx >= nodep->idx + MASK_BITS)
+		nodep = node_split(s, idx & -MASK_BITS);
+
+	/*
+	 * After node_split above, bit at idx should be within the mask.
+	 * Clear that bit.
+	 */
+	assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+	assert(nodep->mask & (1 << (idx - nodep->idx)));
+	nodep->mask &= ~(1 << (idx - nodep->idx));
+	assert(s->num_set > 0 || sparsebit_all_set(s));
+	s->num_set--;
+
+	node_reduce(s, nodep);
+}
+
+/* Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep.  Each line of output
+ * is prefixed by the number of spaces given by indent.  On each
+ * recursion, the indent amount is increased by 2.  This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, struct node *nodep,
+	unsigned int indent)
+{
+	char *node_type;
+
+	/* Dump contents of node */
+	if (!nodep->parent)
+		node_type = "root";
+	else if (nodep == nodep->parent->left)
+		node_type = "left";
+	else {
+		assert(nodep == nodep->parent->right);
+		node_type = "right";
+	}
+	fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+	fprintf(stream, "%*s  parent: %p left: %p right: %p\n", indent, "",
+		nodep->parent, nodep->left, nodep->right);
+	fprintf(stream, "%*s  idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+		indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+	/* If present, dump contents of left child nodes */
+	if (nodep->left)
+		dump_nodes(stream, nodep->left, indent + 2);
+
+	/* If present, dump contents of right child nodes */
+	if (nodep->right)
+		dump_nodes(stream, nodep->right, indent + 2);
+}
+
+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
+{
+	mask_t leading = (mask_t)1 << start;
+	int n1 = __builtin_ctz(nodep->mask & -leading);
+
+	return nodep->idx + n1;
+}
+
+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
+{
+	mask_t leading = (mask_t)1 << start;
+	int n1 = __builtin_ctz(~nodep->mask & -leading);
+
+	return nodep->idx + n1;
+}
+
+/* Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of s.  Each line of output is prefixed with the number
+ * of spaces given by indent.  The output is completely implementation
+ * dependent and subject to change.  Output from this function should only
+ * be used for diagnostic purposes.  For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+	unsigned int indent)
+{
+	/* Dump the contents of s */
+	fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+	fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+	if (s->root)
+		dump_nodes(stream, s->root, indent);
+}
+
+/* Allocates and returns a new sparsebit array. The initial state
+ * of the newly allocated sparsebit array has all bits cleared.
+ */
+struct sparsebit *sparsebit_alloc(void)
+{
+	struct sparsebit *s;
+
+	/* Allocate top level structure. */
+	s = calloc(1, sizeof(*s));
+	if (!s) {
+		perror("calloc");
+		abort();
+	}
+
+	return s;
+}
+
+/* Frees the implementation dependent data for the sparsebit array
+ * pointed to by s and poisons the pointer to that data.
+ */
+void sparsebit_free(struct sparsebit **sbitp)
+{
+	struct sparsebit *s = *sbitp;
+
+	if (!s)
+		return;
+
+	sparsebit_clear_all(s);
+	free(s);
+	*sbitp = NULL;
+}
+
+/* Makes a copy of the sparsebit array given by s, to the sparsebit
+ * array given by d.  Note, d must have already been allocated via
+ * sparsebit_alloc().  It can though already have bits set, which
+ * if different from src will be cleared.
+ */
+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+{
+	/* First clear any bits already set in the destination */
+	sparsebit_clear_all(d);
+
+	if (s->root) {
+		d->root = node_copy_subtree(s->root);
+		d->num_set = s->num_set;
+	}
+}
+
+/* Returns whether num consecutive bits starting at idx are all set.  */
+bool sparsebit_is_set_num(struct sparsebit *s,
+	sparsebit_idx_t idx, sparsebit_num_t num)
+{
+	sparsebit_idx_t next_cleared;
+
+	assert(num > 0);
+	assert(idx + num - 1 >= idx);
+
+	/* With num > 0, the first bit must be set. */
+	if (!sparsebit_is_set(s, idx))
+		return false;
+
+	/* Find the next cleared bit */
+	next_cleared = sparsebit_next_clear(s, idx);
+
+	/*
+	 * If no cleared bits beyond idx, then there are at least num
+	 * set bits. idx + num doesn't wrap.  Otherwise check if
+	 * there are enough set bits between idx and the next cleared bit.
+	 */
+	return next_cleared == 0 || next_cleared - idx >= num;
+}
+
+/* Returns whether the bit at the index given by idx.  */
+bool sparsebit_is_clear(struct sparsebit *s,
+	sparsebit_idx_t idx)
+{
+	return !sparsebit_is_set(s, idx);
+}
+
+/* Returns whether num consecutive bits starting at idx are all cleared.  */
+bool sparsebit_is_clear_num(struct sparsebit *s,
+	sparsebit_idx_t idx, sparsebit_num_t num)
+{
+	sparsebit_idx_t next_set;
+
+	assert(num > 0);
+	assert(idx + num - 1 >= idx);
+
+	/* With num > 0, the first bit must be cleared. */
+	if (!sparsebit_is_clear(s, idx))
+		return false;
+
+	/* Find the next set bit */
+	next_set = sparsebit_next_set(s, idx);
+
+	/*
+	 * If no set bits beyond idx, then there are at least num
+	 * cleared bits. idx + num doesn't wrap.  Otherwise check if
+	 * there are enough cleared bits between idx and the next set bit.
+	 */
+	return next_set == 0 || next_set - idx >= num;
+}
+
+/* Returns the total number of bits set.  Note: 0 is also returned for
+ * the case of all bits set.  This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value.  Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
+ * to determine if the sparsebit array has any bits set.
+ */
+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+{
+	return s->num_set;
+}
+
+/* Returns whether any bit is set in the sparsebit array.  */
+bool sparsebit_any_set(struct sparsebit *s)
+{
+	/*
+	 * Nodes only describe set bits.  If any nodes then there
+	 * is at least 1 bit set.
+	 */
+	if (!s->root)
+		return false;
+
+	/*
+	 * Every node should have a non-zero mask.  For now will
+	 * just assure that the root node has a non-zero mask,
+	 * which is a quick check that at least 1 bit is set.
+	 */
+	assert(s->root->mask != 0);
+	assert(s->num_set > 0 ||
+	       (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
+		s->root->mask == ~(mask_t) 0));
+
+	return true;
+}
+
+/* Returns whether all the bits in the sparsebit array are cleared.  */
+bool sparsebit_all_clear(struct sparsebit *s)
+{
+	return !sparsebit_any_set(s);
+}
+
+/* Returns whether all the bits in the sparsebit array are set.  */
+bool sparsebit_any_clear(struct sparsebit *s)
+{
+	return !sparsebit_all_set(s);
+}
+
+/* Returns the index of the first set bit.  Abort if no bits are set.
+ */
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+{
+	struct node *nodep;
+
+	/* Validate at least 1 bit is set */
+	assert(sparsebit_any_set(s));
+
+	nodep = node_first(s);
+	return node_first_set(nodep, 0);
+}
+
+/* Returns the index of the first cleared bit.  Abort if
+ * no bits are cleared.
+ */
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+{
+	struct node *nodep1, *nodep2;
+
+	/* Validate at least 1 bit is cleared. */
+	assert(sparsebit_any_clear(s));
+
+	/* If no nodes or first node index > 0 then lowest cleared is 0 */
+	nodep1 = node_first(s);
+	if (!nodep1 || nodep1->idx > 0)
+		return 0;
+
+	/* Does the mask in the first node contain any cleared bits. */
+	if (nodep1->mask != ~(mask_t) 0)
+		return node_first_clear(nodep1, 0);
+
+	/*
+	 * All mask bits set in first node.  If there isn't a second node
+	 * then the first cleared bit is the first bit after the bits
+	 * described by the first node.
+	 */
+	nodep2 = node_next(s, nodep1);
+	if (!nodep2) {
+		/*
+		 * No second node.  First cleared bit is first bit beyond
+		 * bits described by first node.
+		 */
+		assert(nodep1->mask == ~(mask_t) 0);
+		assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+	}
+
+	/*
+	 * There is a second node.
+	 * If it is not adjacent to the first node, then there is a gap
+	 * of cleared bits between the nodes, and the first cleared bit
+	 * is the first bit within the gap.
+	 */
+	if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * Second node is adjacent to the first node.
+	 * Because it is adjacent, its mask should be non-zero.  If all
+	 * its mask bits are set, then with it being adjacent, it should
+	 * have had the mask bits moved into the num_after setting of the
+	 * previous node.
+	 */
+	return node_first_clear(nodep2, 0);
+}
+
+/* Returns index of next bit set within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+	sparsebit_idx_t prev)
+{
+	sparsebit_idx_t lowest_possible = prev + 1;
+	sparsebit_idx_t start;
+	struct node *nodep;
+
+	/* A bit after the highest index can't be set. */
+	if (lowest_possible == 0)
+		return 0;
+
+	/*
+	 * Find the leftmost 'candidate' overlapping or to the right
+	 * of lowest_possible.
+	 */
+	struct node *candidate = NULL;
+
+	/* True iff lowest_possible is within candidate */
+	bool contains = false;
+
+	/*
+	 * Find node that describes setting of bit at lowest_possible.
+	 * If such a node doesn't exist, find the node with the lowest
+	 * starting index that is > lowest_possible.
+	 */
+	for (nodep = s->root; nodep;) {
+		if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+			>= lowest_possible) {
+			candidate = nodep;
+			if (candidate->idx <= lowest_possible) {
+				contains = true;
+				break;
+			}
+			nodep = nodep->left;
+		} else {
+			nodep = nodep->right;
+		}
+	}
+	if (!candidate)
+		return 0;
+
+	assert(candidate->mask != 0);
+
+	/* Does the candidate node describe the setting of lowest_possible? */
+	if (!contains) {
+		/*
+		 * Candidate doesn't describe setting of bit at lowest_possible.
+		 * Candidate points to the first node with a starting index
+		 * > lowest_possible.
+		 */
+		assert(candidate->idx > lowest_possible);
+
+		return node_first_set(candidate, 0);
+	}
+
+	/*
+	 * Candidate describes setting of bit at lowest_possible.
+	 * Note: although the node describes the setting of the bit
+	 * at lowest_possible, its possible that its setting and the
+	 * setting of all latter bits described by this node are 0.
+	 * For now, just handle the cases where this node describes
+	 * a bit at or after an index of lowest_possible that is set.
+	 */
+	start = lowest_possible - candidate->idx;
+
+	if (start < MASK_BITS && candidate->mask >= (1 << start))
+		return node_first_set(candidate, start);
+
+	if (candidate->num_after) {
+		sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
+
+		return lowest_possible < first_num_after_idx
+			? first_num_after_idx : lowest_possible;
+	}
+
+	/*
+	 * Although candidate node describes setting of bit at
+	 * the index of lowest_possible, all bits at that index and
+	 * latter that are described by candidate are cleared.  With
+	 * this, the next bit is the first bit in the next node, if
+	 * such a node exists.  If a next node doesn't exist, then
+	 * there is no next set bit.
+	 */
+	candidate = node_next(s, candidate);
+	if (!candidate)
+		return 0;
+
+	return node_first_set(candidate, 0);
+}
+
+/* Returns index of next bit cleared within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+	sparsebit_idx_t prev)
+{
+	sparsebit_idx_t lowest_possible = prev + 1;
+	sparsebit_idx_t idx;
+	struct node *nodep1, *nodep2;
+
+	/* A bit after the highest index can't be set. */
+	if (lowest_possible == 0)
+		return 0;
+
+	/*
+	 * Does a node describing the setting of lowest_possible exist?
+	 * If not, the bit at lowest_possible is cleared.
+	 */
+	nodep1 = node_find(s, lowest_possible);
+	if (!nodep1)
+		return lowest_possible;
+
+	/* Does a mask bit in node 1 describe the next cleared bit. */
+	for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
+		if (!(nodep1->mask & (1 << idx)))
+			return nodep1->idx + idx;
+
+	/*
+	 * Next cleared bit is not described by node 1.  If there
+	 * isn't a next node, then next cleared bit is described
+	 * by bit after the bits described by the first node.
+	 */
+	nodep2 = node_next(s, nodep1);
+	if (!nodep2)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * There is a second node.
+	 * If it is not adjacent to the first node, then there is a gap
+	 * of cleared bits between the nodes, and the next cleared bit
+	 * is the first bit within the gap.
+	 */
+	if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * Second node is adjacent to the first node.
+	 * Because it is adjacent, its mask should be non-zero.  If all
+	 * its mask bits are set, then with it being adjacent, it should
+	 * have had the mask bits moved into the num_after setting of the
+	 * previous node.
+	 */
+	return node_first_clear(nodep2, 0);
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits.  Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	sparsebit_idx_t idx;
+
+	assert(num >= 1);
+
+	for (idx = sparsebit_next_set(s, start);
+		idx != 0 && idx + num - 1 >= idx;
+		idx = sparsebit_next_set(s, idx)) {
+		assert(sparsebit_is_set(s, idx));
+
+		/*
+		 * Does the sequence of bits starting at idx consist of
+		 * num set bits?
+		 */
+		if (sparsebit_is_set_num(s, idx, num))
+			return idx;
+
+		/*
+		 * Sequence of set bits at idx isn't large enough.
+		 * Skip this entire sequence of set bits.
+		 */
+		idx = sparsebit_next_clear(s, idx);
+		if (idx == 0)
+			return 0;
+	}
+
+	return 0;
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits.  Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	sparsebit_idx_t idx;
+
+	assert(num >= 1);
+
+	for (idx = sparsebit_next_clear(s, start);
+		idx != 0 && idx + num - 1 >= idx;
+		idx = sparsebit_next_clear(s, idx)) {
+		assert(sparsebit_is_clear(s, idx));
+
+		/*
+		 * Does the sequence of bits starting at idx consist of
+		 * num cleared bits?
+		 */
+		if (sparsebit_is_clear_num(s, idx, num))
+			return idx;
+
+		/*
+		 * Sequence of cleared bits at idx isn't large enough.
+		 * Skip this entire sequence of cleared bits.
+		 */
+		idx = sparsebit_next_set(s, idx);
+		if (idx == 0)
+			return 0;
+	}
+
+	return 0;
+}
+
+/* Sets the bits * in the inclusive range idx through idx + num - 1.  */
+void sparsebit_set_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	struct node *nodep, *next;
+	unsigned int n1;
+	sparsebit_idx_t idx;
+	sparsebit_num_t n;
+	sparsebit_idx_t middle_start, middle_end;
+
+	assert(num > 0);
+	assert(start + num - 1 >= start);
+
+	/*
+	 * Leading - bits before first mask boundary.
+	 *
+	 * TODO(lhuemill): With some effort it may be possible to
+	 *   replace the following loop with a sequential sequence
+	 *   of statements.  High level sequence would be:
+	 *
+	 *     1. Use node_split() to force node that describes setting
+	 *        of idx to be within the mask portion of a node.
+	 *     2. Form mask of bits to be set.
+	 *     3. Determine number of mask bits already set in the node
+	 *        and store in a local variable named num_already_set.
+	 *     4. Set the appropriate mask bits within the node.
+	 *     5. Increment struct sparsebit_pvt num_set member
+	 *        by the number of bits that were actually set.
+	 *        Exclude from the counts bits that were already set.
+	 *     6. Before returning to the caller, use node_reduce() to
+	 *        handle the multiple corner cases that this method
+	 *        introduces.
+	 */
+	for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+		bit_set(s, idx);
+
+	/* Middle - bits spanning one or more entire mask */
+	middle_start = idx;
+	middle_end = middle_start + (n & -MASK_BITS) - 1;
+	if (n >= MASK_BITS) {
+		nodep = node_split(s, middle_start);
+
+		/*
+		 * As needed, split just after end of middle bits.
+		 * No split needed if end of middle bits is at highest
+		 * supported bit index.
+		 */
+		if (middle_end + 1 > middle_end)
+			(void) node_split(s, middle_end + 1);
+
+		/* Delete nodes that only describe bits within the middle. */
+		for (next = node_next(s, nodep);
+			next && (next->idx < middle_end);
+			next = node_next(s, nodep)) {
+			assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+			node_rm(s, next);
+			next = NULL;
+		}
+
+		/* As needed set each of the mask bits */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (!(nodep->mask & (1 << n1))) {
+				nodep->mask |= 1 << n1;
+				s->num_set++;
+			}
+		}
+
+		s->num_set -= nodep->num_after;
+		nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
+		s->num_set += nodep->num_after;
+
+		node_reduce(s, nodep);
+	}
+	idx = middle_end + 1;
+	n -= middle_end - middle_start + 1;
+
+	/* Trailing - bits at and beyond last mask boundary */
+	assert(n < MASK_BITS);
+	for (; n > 0; idx++, n--)
+		bit_set(s, idx);
+}
+
+/* Clears the bits * in the inclusive range idx through idx + num - 1.  */
+void sparsebit_clear_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	struct node *nodep, *next;
+	unsigned int n1;
+	sparsebit_idx_t idx;
+	sparsebit_num_t n;
+	sparsebit_idx_t middle_start, middle_end;
+
+	assert(num > 0);
+	assert(start + num - 1 >= start);
+
+	/* Leading - bits before first mask boundary */
+	for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+		bit_clear(s, idx);
+
+	/* Middle - bits spanning one or more entire mask */
+	middle_start = idx;
+	middle_end = middle_start + (n & -MASK_BITS) - 1;
+	if (n >= MASK_BITS) {
+		nodep = node_split(s, middle_start);
+
+		/*
+		 * As needed, split just after end of middle bits.
+		 * No split needed if end of middle bits is at highest
+		 * supported bit index.
+		 */
+		if (middle_end + 1 > middle_end)
+			(void) node_split(s, middle_end + 1);
+
+		/* Delete nodes that only describe bits within the middle. */
+		for (next = node_next(s, nodep);
+			next && (next->idx < middle_end);
+			next = node_next(s, nodep)) {
+			assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+			node_rm(s, next);
+			next = NULL;
+		}
+
+		/* As needed clear each of the mask bits */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (nodep->mask & (1 << n1)) {
+				nodep->mask &= ~(1 << n1);
+				s->num_set--;
+			}
+		}
+
+		/* Clear any bits described by num_after */
+		s->num_set -= nodep->num_after;
+		nodep->num_after = 0;
+
+		/*
+		 * Delete the node that describes the beginning of
+		 * the middle bits and perform any allowed reductions
+		 * with the nodes prev or next of nodep.
+		 */
+		node_reduce(s, nodep);
+		nodep = NULL;
+	}
+	idx = middle_end + 1;
+	n -= middle_end - middle_start + 1;
+
+	/* Trailing - bits at and beyond last mask boundary */
+	assert(n < MASK_BITS);
+	for (; n > 0; idx++, n--)
+		bit_clear(s, idx);
+}
+
+/* Sets the bit at the index given by idx.  */
+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	sparsebit_set_num(s, idx, 1);
+}
+
+/* Clears the bit at the index given by idx.  */
+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	sparsebit_clear_num(s, idx, 1);
+}
+
+/* Sets the bits in the entire addressable range of the sparsebit array.  */
+void sparsebit_set_all(struct sparsebit *s)
+{
+	sparsebit_set(s, 0);
+	sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
+	assert(sparsebit_all_set(s));
+}
+
+/* Clears the bits in the entire addressable range of the sparsebit array.  */
+void sparsebit_clear_all(struct sparsebit *s)
+{
+	sparsebit_clear(s, 0);
+	sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
+	assert(!sparsebit_any_set(s));
+}
+
+static size_t display_range(FILE *stream, sparsebit_idx_t low,
+	sparsebit_idx_t high, bool prepend_comma_space)
+{
+	char *fmt_str;
+	size_t sz;
+
+	/* Determine the printf format string */
+	if (low == high)
+		fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
+	else
+		fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+	/*
+	 * When stream is NULL, just determine the size of what would
+	 * have been printed, else print the range.
+	 */
+	if (!stream)
+		sz = snprintf(NULL, 0, fmt_str, low, high);
+	else
+		sz = fprintf(stream, fmt_str, low, high);
+
+	return sz;
+}
+
+
+/* Dumps to the FILE stream given by stream, the bit settings
+ * of s.  Each line of output is prefixed with the number of
+ * spaces given by indent.  The length of each line is implementation
+ * dependent and does not depend on the indent amount.  The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ *   0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
+ * are set.  Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits.  This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void sparsebit_dump(FILE *stream, struct sparsebit *s,
+	unsigned int indent)
+{
+	size_t current_line_len = 0;
+	size_t sz;
+	struct node *nodep;
+
+	if (!sparsebit_any_set(s))
+		return;
+
+	/* Display initial indent */
+	fprintf(stream, "%*s", indent, "");
+
+	/* For each node */
+	for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
+		unsigned int n1;
+		sparsebit_idx_t low, high;
+
+		/* For each group of bits in the mask */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (nodep->mask & (1 << n1)) {
+				low = high = nodep->idx + n1;
+
+				for (; n1 < MASK_BITS; n1++) {
+					if (nodep->mask & (1 << n1))
+						high = nodep->idx + n1;
+					else
+						break;
+				}
+
+				if ((n1 == MASK_BITS) && nodep->num_after)
+					high += nodep->num_after;
+
+				/*
+				 * How much room will it take to display
+				 * this range.
+				 */
+				sz = display_range(NULL, low, high,
+					current_line_len != 0);
+
+				/*
+				 * If there is not enough room, display
+				 * a newline plus the indent of the next
+				 * line.
+				 */
+				if (current_line_len + sz > DUMP_LINE_MAX) {
+					fputs("\n", stream);
+					fprintf(stream, "%*s", indent, "");
+					current_line_len = 0;
+				}
+
+				/* Display the range */
+				sz = display_range(stream, low, high,
+					current_line_len != 0);
+				current_line_len += sz;
+			}
+		}
+
+		/*
+		 * If num_after and most significant-bit of mask is not
+		 * set, then still need to display a range for the bits
+		 * described by num_after.
+		 */
+		if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
+			low = nodep->idx + MASK_BITS;
+			high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+			/*
+			 * How much room will it take to display
+			 * this range.
+			 */
+			sz = display_range(NULL, low, high,
+				current_line_len != 0);
+
+			/*
+			 * If there is not enough room, display
+			 * a newline plus the indent of the next
+			 * line.
+			 */
+			if (current_line_len + sz > DUMP_LINE_MAX) {
+				fputs("\n", stream);
+				fprintf(stream, "%*s", indent, "");
+				current_line_len = 0;
+			}
+
+			/* Display the range */
+			sz = display_range(stream, low, high,
+				current_line_len != 0);
+			current_line_len += sz;
+		}
+	}
+	fputs("\n", stream);
+}
+
+/* Validates the internal state of the sparsebit array given by
+ * s.  On error, diagnostic information is printed to stderr and
+ * abort is called.
+ */
+void sparsebit_validate_internal(struct sparsebit *s)
+{
+	bool error_detected = false;
+	struct node *nodep, *prev = NULL;
+	sparsebit_num_t total_bits_set = 0;
+	unsigned int n1;
+
+	/* For each node */
+	for (nodep = node_first(s); nodep;
+		prev = nodep, nodep = node_next(s, nodep)) {
+
+		/*
+		 * Increase total bits set by the number of bits set
+		 * in this node.
+		 */
+		for (n1 = 0; n1 < MASK_BITS; n1++)
+			if (nodep->mask & (1 << n1))
+				total_bits_set++;
+
+		total_bits_set += nodep->num_after;
+
+		/*
+		 * Arbitrary choice as to whether a mask of 0 is allowed
+		 * or not.  For diagnostic purposes it is beneficial to
+		 * have only one valid means to represent a set of bits.
+		 * To support this an arbitrary choice has been made
+		 * to not allow a mask of zero.
+		 */
+		if (nodep->mask == 0) {
+			fprintf(stderr, "Node mask of zero, "
+				"nodep: %p nodep->mask: 0x%x",
+				nodep, nodep->mask);
+			error_detected = true;
+			break;
+		}
+
+		/*
+		 * Validate num_after is not greater than the max index
+		 * - the number of mask bits.  The num_after member
+		 * uses 0-based indexing and thus has no value that
+		 * represents all bits set.  This limitation is handled
+		 * by requiring a non-zero mask.  With a non-zero mask,
+		 * MASK_BITS worth of bits are described by the mask,
+		 * which makes the largest needed num_after equal to:
+		 *
+		 *    (~(sparsebit_num_t) 0) - MASK_BITS + 1
+		 */
+		if (nodep->num_after
+			> (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
+			fprintf(stderr, "num_after too large, "
+				"nodep: %p nodep->num_after: 0x%lx",
+				nodep, nodep->num_after);
+			error_detected = true;
+			break;
+		}
+
+		/* Validate node index is divisible by the mask size */
+		if (nodep->idx % MASK_BITS) {
+			fprintf(stderr, "Node index not divisable by "
+				"mask size,\n"
+				"  nodep: %p nodep->idx: 0x%lx "
+				"MASK_BITS: %lu\n",
+				nodep, nodep->idx, MASK_BITS);
+			error_detected = true;
+			break;
+		}
+
+		/*
+		 * Validate bits described by node don't wrap beyond the
+		 * highest supported index.
+		 */
+		if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
+			fprintf(stderr, "Bits described by node wrap "
+				"beyond highest supported index,\n"
+				"  nodep: %p nodep->idx: 0x%lx\n"
+				"  MASK_BITS: %lu nodep->num_after: 0x%lx",
+				nodep, nodep->idx, MASK_BITS, nodep->num_after);
+			error_detected = true;
+			break;
+		}
+
+		/* Check parent pointers. */
+		if (nodep->left) {
+			if (nodep->left->parent != nodep) {
+				fprintf(stderr, "Left child parent pointer "
+					"doesn't point to this node,\n"
+					"  nodep: %p nodep->left: %p "
+					"nodep->left->parent: %p",
+					nodep, nodep->left,
+					nodep->left->parent);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (nodep->right) {
+			if (nodep->right->parent != nodep) {
+				fprintf(stderr, "Right child parent pointer "
+					"doesn't point to this node,\n"
+					"  nodep: %p nodep->right: %p "
+					"nodep->right->parent: %p",
+					nodep, nodep->right,
+					nodep->right->parent);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (!nodep->parent) {
+			if (s->root != nodep) {
+				fprintf(stderr, "Unexpected root node, "
+					"s->root: %p nodep: %p",
+					s->root, nodep);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (prev) {
+			/*
+			 * Is index of previous node before index of
+			 * current node?
+			 */
+			if (prev->idx >= nodep->idx) {
+				fprintf(stderr, "Previous node index "
+					">= current node index,\n"
+					"  prev: %p prev->idx: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx",
+					prev, prev->idx, nodep, nodep->idx);
+				error_detected = true;
+				break;
+			}
+
+			/*
+			 * Nodes occur in asscending order, based on each
+			 * nodes starting index.
+			 */
+			if ((prev->idx + MASK_BITS + prev->num_after - 1)
+				>= nodep->idx) {
+				fprintf(stderr, "Previous node bit range "
+					"overlap with current node bit range,\n"
+					"  prev: %p prev->idx: 0x%lx "
+					"prev->num_after: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx "
+					"nodep->num_after: 0x%lx\n"
+					"  MASK_BITS: %lu",
+					prev, prev->idx, prev->num_after,
+					nodep, nodep->idx, nodep->num_after,
+					MASK_BITS);
+				error_detected = true;
+				break;
+			}
+
+			/*
+			 * When the node has all mask bits set, it shouldn't
+			 * be adjacent to the last bit described by the
+			 * previous node.
+			 */
+			if (nodep->mask == ~(mask_t) 0 &&
+			    prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
+				fprintf(stderr, "Current node has mask with "
+					"all bits set and is adjacent to the "
+					"previous node,\n"
+					"  prev: %p prev->idx: 0x%lx "
+					"prev->num_after: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx "
+					"nodep->num_after: 0x%lx\n"
+					"  MASK_BITS: %lu",
+					prev, prev->idx, prev->num_after,
+					nodep, nodep->idx, nodep->num_after,
+					MASK_BITS);
+
+				error_detected = true;
+				break;
+			}
+		}
+	}
+
+	if (!error_detected) {
+		/*
+		 * Is sum of bits set in each node equal to the count
+		 * of total bits set.
+		 */
+		if (s->num_set != total_bits_set) {
+			fprintf(stderr, "Number of bits set missmatch,\n"
+				"  s->num_set: 0x%lx total_bits_set: 0x%lx",
+				s->num_set, total_bits_set);
+
+			error_detected = true;
+		}
+	}
+
+	if (error_detected) {
+		fputs("  dump_internal:\n", stderr);
+		sparsebit_dump_internal(stderr, s, 4);
+		abort();
+	}
+}
+
+
+#ifdef FUZZ
+/* A simple but effective fuzzing driver.  Look for bugs with the help
+ * of some invariants and of a trivial representation of sparsebit.
+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
+ * afl-fuzz do the magic. :)
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct range {
+	sparsebit_idx_t first, last;
+	bool set;
+};
+
+struct sparsebit *s;
+struct range ranges[1000];
+int num_ranges;
+
+static bool get_value(sparsebit_idx_t idx)
+{
+	int i;
+
+	for (i = num_ranges; --i >= 0; )
+		if (ranges[i].first <= idx && idx <= ranges[i].last)
+			return ranges[i].set;
+
+	return false;
+}
+
+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
+{
+	sparsebit_num_t num;
+	sparsebit_idx_t next;
+
+	if (first < last) {
+		num = last - first + 1;
+	} else {
+		num = first - last + 1;
+		first = last;
+		last = first + num - 1;
+	}
+
+	switch (code) {
+	case 0:
+		sparsebit_set(s, first);
+		assert(sparsebit_is_set(s, first));
+		assert(!sparsebit_is_clear(s, first));
+		assert(sparsebit_any_set(s));
+		assert(!sparsebit_all_clear(s));
+		if (get_value(first))
+			return;
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = first, .set = true };
+		break;
+	case 1:
+		sparsebit_clear(s, first);
+		assert(!sparsebit_is_set(s, first));
+		assert(sparsebit_is_clear(s, first));
+		assert(sparsebit_any_clear(s));
+		assert(!sparsebit_all_set(s));
+		if (!get_value(first))
+			return;
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = first, .set = false };
+		break;
+	case 2:
+		assert(sparsebit_is_set(s, first) == get_value(first));
+		assert(sparsebit_is_clear(s, first) == !get_value(first));
+		break;
+	case 3:
+		if (sparsebit_any_set(s))
+			assert(get_value(sparsebit_first_set(s)));
+		if (sparsebit_any_clear(s))
+			assert(!get_value(sparsebit_first_clear(s)));
+		sparsebit_set_all(s);
+		assert(!sparsebit_any_clear(s));
+		assert(sparsebit_all_set(s));
+		num_ranges = 0;
+		ranges[num_ranges++] = (struct range)
+			{ .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
+		break;
+	case 4:
+		if (sparsebit_any_set(s))
+			assert(get_value(sparsebit_first_set(s)));
+		if (sparsebit_any_clear(s))
+			assert(!get_value(sparsebit_first_clear(s)));
+		sparsebit_clear_all(s);
+		assert(!sparsebit_any_set(s));
+		assert(sparsebit_all_clear(s));
+		num_ranges = 0;
+		break;
+	case 5:
+		next = sparsebit_next_set(s, first);
+		assert(next == 0 || next > first);
+		assert(next == 0 || get_value(next));
+		break;
+	case 6:
+		next = sparsebit_next_clear(s, first);
+		assert(next == 0 || next > first);
+		assert(next == 0 || !get_value(next));
+		break;
+	case 7:
+		next = sparsebit_next_clear(s, first);
+		if (sparsebit_is_set_num(s, first, num)) {
+			assert(next == 0 || next > last);
+			if (first)
+				next = sparsebit_next_set(s, first - 1);
+			else if (sparsebit_any_set(s))
+				next = sparsebit_first_set(s);
+			else
+				return;
+			assert(next == first);
+		} else {
+			assert(sparsebit_is_clear(s, first) || next <= last);
+		}
+		break;
+	case 8:
+		next = sparsebit_next_set(s, first);
+		if (sparsebit_is_clear_num(s, first, num)) {
+			assert(next == 0 || next > last);
+			if (first)
+				next = sparsebit_next_clear(s, first - 1);
+			else if (sparsebit_any_clear(s))
+				next = sparsebit_first_clear(s);
+			else
+				return;
+			assert(next == first);
+		} else {
+			assert(sparsebit_is_set(s, first) || next <= last);
+		}
+		break;
+	case 9:
+		sparsebit_set_num(s, first, num);
+		assert(sparsebit_is_set_num(s, first, num));
+		assert(!sparsebit_is_clear_num(s, first, num));
+		assert(sparsebit_any_set(s));
+		assert(!sparsebit_all_clear(s));
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = last, .set = true };
+		break;
+	case 10:
+		sparsebit_clear_num(s, first, num);
+		assert(!sparsebit_is_set_num(s, first, num));
+		assert(sparsebit_is_clear_num(s, first, num));
+		assert(sparsebit_any_clear(s));
+		assert(!sparsebit_all_set(s));
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = last, .set = false };
+		break;
+	case 11:
+		sparsebit_validate_internal(s);
+		break;
+	default:
+		break;
+	}
+}
+
+unsigned char get8(void)
+{
+	int ch;
+
+	ch = getchar();
+	if (ch == EOF)
+		exit(0);
+	return ch;
+}
+
+uint64_t get64(void)
+{
+	uint64_t x;
+
+	x = get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	return (x << 8) | get8();
+}
+
+int main(void)
+{
+	s = sparsebit_alloc();
+	for (;;) {
+		uint8_t op = get8() & 0xf;
+		uint64_t first = get64();
+		uint64_t last = get64();
+
+		operate(op, first, last);
+	}
+}
+#endif
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c
new file mode 100644
index 000000000000..2f17675f4275
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86.c
@@ -0,0 +1,700 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+#include "x86.h"
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t dirty:1;
+	uint64_t reserved_07:1;
+	uint64_t global:1;
+	uint64_t ignored_11_09:3;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+/* Register Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   regs - register
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by regs, to the FILE stream
+ * given by steam.
+ */
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+	       uint8_t indent)
+{
+	fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+		"rcx: 0x%.16llx rdx: 0x%.16llx\n",
+		indent, "",
+		regs->rax, regs->rbx, regs->rcx, regs->rdx);
+	fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+		"rsp: 0x%.16llx rbp: 0x%.16llx\n",
+		indent, "",
+		regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+	fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
+		"r10: 0x%.16llx r11: 0x%.16llx\n",
+		indent, "",
+		regs->r8, regs->r9, regs->r10, regs->r11);
+	fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+		"r14: 0x%.16llx r15: 0x%.16llx\n",
+		indent, "",
+		regs->r12, regs->r13, regs->r14, regs->r15);
+	fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+		indent, "",
+		regs->rip, regs->rflags);
+}
+
+/* Segment Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   segment - KVM segment
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by segment, to the FILE stream
+ * given by steam.
+ */
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+			 uint8_t indent)
+{
+	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+		"selector: 0x%.4x type: 0x%.2x\n",
+		indent, "", segment->base, segment->limit,
+		segment->selector, segment->type);
+	fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+		"db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+		indent, "", segment->present, segment->dpl,
+		segment->db, segment->s, segment->l);
+	fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+		"unusable: 0x%.2x padding: 0x%.2x\n",
+		indent, "", segment->g, segment->avl,
+		segment->unusable, segment->padding);
+}
+
+/* dtable Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   dtable - KVM dtable
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream
+ * given by steam.
+ */
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+			uint8_t indent)
+{
+	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+		"padding: 0x%.4x 0x%.4x 0x%.4x\n",
+		indent, "", dtable->base, dtable->limit,
+		dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+/* System Register Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   sregs - System registers
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by sregs, to the FILE stream
+ * given by steam.
+ */
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+		uint8_t indent)
+{
+	unsigned int i;
+
+	fprintf(stream, "%*scs:\n", indent, "");
+	segment_dump(stream, &sregs->cs, indent + 2);
+	fprintf(stream, "%*sds:\n", indent, "");
+	segment_dump(stream, &sregs->ds, indent + 2);
+	fprintf(stream, "%*ses:\n", indent, "");
+	segment_dump(stream, &sregs->es, indent + 2);
+	fprintf(stream, "%*sfs:\n", indent, "");
+	segment_dump(stream, &sregs->fs, indent + 2);
+	fprintf(stream, "%*sgs:\n", indent, "");
+	segment_dump(stream, &sregs->gs, indent + 2);
+	fprintf(stream, "%*sss:\n", indent, "");
+	segment_dump(stream, &sregs->ss, indent + 2);
+	fprintf(stream, "%*str:\n", indent, "");
+	segment_dump(stream, &sregs->tr, indent + 2);
+	fprintf(stream, "%*sldt:\n", indent, "");
+	segment_dump(stream, &sregs->ldt, indent + 2);
+
+	fprintf(stream, "%*sgdt:\n", indent, "");
+	dtable_dump(stream, &sregs->gdt, indent + 2);
+	fprintf(stream, "%*sidt:\n", indent, "");
+	dtable_dump(stream, &sregs->idt, indent + 2);
+
+	fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+		"cr3: 0x%.16llx cr4: 0x%.16llx\n",
+		indent, "",
+		sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+	fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+		"apic_base: 0x%.16llx\n",
+		indent, "",
+		sregs->cr8, sregs->efer, sregs->apic_base);
+
+	fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+		fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+			sregs->interrupt_bitmap[i]);
+	}
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+	int rc;
+
+	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	/* If needed, create page map l4 table. */
+	if (!vm->pgd_created) {
+		vm_paddr_t paddr = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		vm->pgd = paddr;
+
+		/* Set pointer to pgd tables in all the VCPUs that
+		 * have already been created.  Future VCPUs will have
+		 * the value set as each one is created.
+		 */
+		for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
+			vcpu = vcpu->next) {
+			struct kvm_sregs sregs;
+
+			/* Obtain the current system register settings */
+			vcpu_sregs_get(vm, vcpu->id, &sregs);
+
+			/* Set and store the pointer to the start of the
+			 * pgd tables.
+			 */
+			sregs.cr3 = vm->pgd;
+			vcpu_sregs_set(vm, vcpu->id, &sregs);
+		}
+
+		vm->pgd_created = true;
+	}
+}
+
+/* VM Virtual Page Map
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - VM Virtual Address
+ *   paddr - VM Physical Address
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	uint32_t pgd_memslot)
+{
+	uint16_t index[4];
+	struct pageMapL4Entry *pml4e;
+
+	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	TEST_ASSERT((vaddr % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x",
+		vaddr, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx",
+		vaddr);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->max_gfn, vm->page_size);
+
+	index[0] = (vaddr >> 12) & 0x1ffu;
+	index[1] = (vaddr >> 21) & 0x1ffu;
+	index[2] = (vaddr >> 30) & 0x1ffu;
+	index[3] = (vaddr >> 39) & 0x1ffu;
+
+	/* Allocate page directory pointer table if not present. */
+	pml4e = addr_gpa2hva(vm, vm->pgd);
+	if (!pml4e[index[3]].present) {
+		pml4e[index[3]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pml4e[index[3]].writable = true;
+		pml4e[index[3]].present = true;
+	}
+
+	/* Allocate page directory table if not present. */
+	struct pageDirectoryPointerEntry *pdpe;
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	if (!pdpe[index[2]].present) {
+		pdpe[index[2]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pdpe[index[2]].writable = true;
+		pdpe[index[2]].present = true;
+	}
+
+	/* Allocate page table if not present. */
+	struct pageDirectoryEntry *pde;
+	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	if (!pde[index[1]].present) {
+		pde[index[1]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pde[index[1]].writable = true;
+		pde[index[1]].present = true;
+	}
+
+	/* Fill in page table entry. */
+	struct pageTableEntry *pte;
+	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	pte[index[0]].address = paddr >> vm->page_shift;
+	pte[index[0]].writable = true;
+	pte[index[0]].present = 1;
+}
+
+/* Virtual Translation Tables Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the contents of all the
+ * virtual translation tables for the VM given by vm.
+ */
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	struct pageMapL4Entry *pml4e, *pml4e_start;
+	struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+	struct pageDirectoryEntry *pde, *pde_start;
+	struct pageTableEntry *pte, *pte_start;
+
+	if (!vm->pgd_created)
+		return;
+
+	fprintf(stream, "%*s                                          "
+		"                no\n", indent, "");
+	fprintf(stream, "%*s      index hvaddr         gpaddr         "
+		"addr         w exec dirty\n",
+		indent, "");
+	pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
+		vm->pgd);
+	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+		pml4e = &pml4e_start[n1];
+		if (!pml4e->present)
+			continue;
+		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+			" %u\n",
+			indent, "",
+			pml4e - pml4e_start, pml4e,
+			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+			pml4e->writable, pml4e->execute_disable);
+
+		pdpe_start = addr_gpa2hva(vm, pml4e->address
+			* vm->page_size);
+		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+			pdpe = &pdpe_start[n2];
+			if (!pdpe->present)
+				continue;
+			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
+				"%u  %u\n",
+				indent, "",
+				pdpe - pdpe_start, pdpe,
+				addr_hva2gpa(vm, pdpe),
+				(uint64_t) pdpe->address, pdpe->writable,
+				pdpe->execute_disable);
+
+			pde_start = addr_gpa2hva(vm,
+				pdpe->address * vm->page_size);
+			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+				pde = &pde_start[n3];
+				if (!pde->present)
+					continue;
+				fprintf(stream, "%*spde   0x%-3zx %p "
+					"0x%-12lx 0x%-10lx %u  %u\n",
+					indent, "", pde - pde_start, pde,
+					addr_hva2gpa(vm, pde),
+					(uint64_t) pde->address, pde->writable,
+					pde->execute_disable);
+
+				pte_start = addr_gpa2hva(vm,
+					pde->address * vm->page_size);
+				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+					pte = &pte_start[n4];
+					if (!pte->present)
+						continue;
+					fprintf(stream, "%*spte   0x%-3zx %p "
+						"0x%-12lx 0x%-10lx %u  %u "
+						"    %u    0x%-10lx\n",
+						indent, "",
+						pte - pte_start, pte,
+						addr_hva2gpa(vm, pte),
+						(uint64_t) pte->address,
+						pte->writable,
+						pte->execute_disable,
+						pte->dirty,
+						((uint64_t) n1 << 27)
+							| ((uint64_t) n2 << 18)
+							| ((uint64_t) n3 << 9)
+							| ((uint64_t) n4));
+				}
+			}
+		}
+	}
+}
+
+/* Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *   segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->unusable = true;
+}
+
+/* Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ *   selector - selector value
+ *
+ * Output Args:
+ *   segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a code segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
+	struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->selector = selector;
+	segp->limit = 0xFFFFFFFFu;
+	segp->s = 0x1; /* kTypeCodeData */
+	segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+					  * | kFlagCodeReadable
+					  */
+	segp->g = true;
+	segp->l = true;
+	segp->present = 1;
+}
+
+/* Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ *   selector - selector value
+ *
+ * Output Args:
+ *   segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a data segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
+	struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->selector = selector;
+	segp->limit = 0xFFFFFFFFu;
+	segp->s = 0x1; /* kTypeCodeData */
+	segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+					  * | kFlagDataWritable
+					  */
+	segp->g = true;
+	segp->present = true;
+}
+
+/* Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm.  When found, the host
+ * virtual address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint16_t index[4];
+	struct pageMapL4Entry *pml4e;
+	struct pageDirectoryPointerEntry *pdpe;
+	struct pageDirectoryEntry *pde;
+	struct pageTableEntry *pte;
+	void *hva;
+
+	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	index[0] = (gva >> 12) & 0x1ffu;
+	index[1] = (gva >> 21) & 0x1ffu;
+	index[2] = (gva >> 30) & 0x1ffu;
+	index[3] = (gva >> 39) & 0x1ffu;
+
+	if (!vm->pgd_created)
+		goto unmapped_gva;
+	pml4e = addr_gpa2hva(vm, vm->pgd);
+	if (!pml4e[index[3]].present)
+		goto unmapped_gva;
+
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	if (!pdpe[index[2]].present)
+		goto unmapped_gva;
+
+	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	if (!pde[index[1]].present)
+		goto unmapped_gva;
+
+	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	if (!pte[index[0]].present)
+		goto unmapped_gva;
+
+	return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+
+unmapped_gva:
+	TEST_ASSERT(false, "No mapping for vm virtual address, "
+		    "gva: 0x%lx", gva);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid)
+{
+	struct kvm_sregs sregs;
+
+	/* Set mode specific system register values. */
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+
+	switch (vm->mode) {
+	case VM_MODE_FLAT48PG:
+		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+		sregs.cr4 |= X86_CR4_PAE;
+		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+		kvm_seg_set_unusable(&sregs.ldt);
+		kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);
+		kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);
+		kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);
+		break;
+
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+	}
+	vcpu_sregs_set(vm, vcpuid, &sregs);
+
+	/* If virtual translation table have been setup, set system register
+	 * to point to the tables.  It's okay if they haven't been setup yet,
+	 * in that the code that sets up the virtual translation tables, will
+	 * go back through any VCPUs that have already been created and set
+	 * their values.
+	 */
+	if (vm->pgd_created) {
+		struct kvm_sregs sregs;
+
+		vcpu_sregs_get(vm, vcpuid, &sregs);
+
+		sregs.cr3 = vm->pgd;
+		vcpu_sregs_set(vm, vcpuid, &sregs);
+	}
+}
+/* Adds a vCPU with reasonable defaults (i.e., a stack)
+ *
+ * Input Args:
+ *   vcpuid - The id of the VCPU to add to the VM.
+ *   guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	struct kvm_mp_state mp_state;
+	struct kvm_regs regs;
+	vm_vaddr_t stack_vaddr;
+	stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	/* Create VCPU */
+	vm_vcpu_add(vm, vcpuid);
+
+	/* Setup guest general purpose registers */
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs.rflags = regs.rflags | 0x2;
+	regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+	regs.rip = (unsigned long) guest_code;
+	vcpu_regs_set(vm, vcpuid, &regs);
+
+	/* Setup the MP state */
+	mp_state.mp_state = 0;
+	vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/* VM VCPU CPUID Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU id
+ *   cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(struct kvm_vm *vm,
+		uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int rc;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+	TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
+		    rc, errno);
+
+}
+/* Create a VM with reasonable defaults
+ *
+ * Input Args:
+ *   vcpuid - The id of the single VCPU to add to the VM.
+ *   guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
+{
+	struct kvm_vm *vm;
+
+	/* Create VM */
+	vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+	/* Setup guest code */
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+	/* Setup IRQ Chip */
+	vm_create_irqchip(vm);
+
+	/* Add the first vCPU. */
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c
new file mode 100644
index 000000000000..090fd3f19352
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_sregs_test.c
@@ -0,0 +1,54 @@
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID                  5
+
+int main(int argc, char *argv[])
+{
+	struct kvm_sregs sregs;
+	struct kvm_vm *vm;
+	int rc;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, NULL);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	sregs.apic_base = 1 << 10;
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+		    sregs.apic_base);
+	sregs.apic_base = 1 << 11;
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+		    sregs.apic_base);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
new file mode 100644
index 000000000000..428e9473f5e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -0,0 +1,232 @@
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+#define PORT_HOST_SYNC 0x1000
+
+static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
+{
+	        __asm__ __volatile__("in %[port], %%al"
+				     :
+				     : [port]"d"(port), "D"(arg0), "S"(arg1)
+				     : "rax");
+}
+
+#define exit_to_l0(_port, _arg0, _arg1) \
+        __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
+
+#define GUEST_ASSERT(_condition) do { \
+	if (!(_condition)) \
+		exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\
+} while (0)
+
+void guest_code(void)
+{
+	for (;;) {
+		exit_to_l0(PORT_HOST_SYNC, "hello", 0);
+		asm volatile ("inc %r11");
+	}
+}
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+	TEST_ASSERT(left->reg == right->reg, \
+		    "Register " #reg \
+		    " values did not match: 0x%llx, 0x%llx\n", \
+		    left->reg, right->reg)
+	REG_COMPARE(rax);
+	REG_COMPARE(rbx);
+	REG_COMPARE(rcx);
+	REG_COMPARE(rdx);
+	REG_COMPARE(rsi);
+	REG_COMPARE(rdi);
+	REG_COMPARE(rsp);
+	REG_COMPARE(rbp);
+	REG_COMPARE(r8);
+	REG_COMPARE(r9);
+	REG_COMPARE(r10);
+	REG_COMPARE(r11);
+	REG_COMPARE(r12);
+	REG_COMPARE(r13);
+	REG_COMPARE(r14);
+	REG_COMPARE(r15);
+	REG_COMPARE(rip);
+	REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+				struct kvm_vcpu_events *right)
+{
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	struct kvm_vcpu_events events;
+	int rv, cap;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+	TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS,
+		    "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n",
+		    cap, KVM_SYNC_X86_VALID_FIELDS);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, guest_code);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Request reading invalid register set from VCPU. */
+	run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+	/* Request setting invalid register set into VCPU. */
+	run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+	/* Request and verify all valid register sets. */
+	/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+	run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs.sregs);
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+	compare_vcpu_events(&events, &run->s.regs.events);
+
+	/* Set and verify various register values. */
+	run->s.regs.regs.r11 = 0xBAD1DEA;
+	run->s.regs.sregs.apic_base = 1 << 11;
+	/* TODO run->s.regs.events.XYZ = ABC; */
+
+	run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+	run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+	TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+		    "apic_base sync regs value incorrect 0x%llx.",
+		    run->s.regs.sregs.apic_base);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs.sregs);
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+	compare_vcpu_events(&events, &run->s.regs.events);
+
+	/* Clear kvm_dirty_regs bits, verify new s.regs values are
+	 * overwritten with existing guest values.
+	 */
+	run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.regs.r11 = 0xDEADBEEF;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+
+	/* Clear kvm_valid_regs bits and kvm_dirty_bits.
+	 * Verify s.regs values are not overwritten with existing guest values
+	 * and that guest values are not overwritten with kvm_sync_regs values.
+	 */
+	run->kvm_valid_regs = 0;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.regs.r11 = 0xAAAA;
+	regs.r11 = 0xBAC0;
+	vcpu_regs_set(vm, VCPU_ID, &regs);
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	TEST_ASSERT(regs.r11 == 0xBAC0 + 1,
+		    "r11 guest value incorrect 0x%llx.",
+		    regs.r11);
+
+	/* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+	 * with existing guest values but that guest values are overwritten
+	 * with kvm_sync_regs values.
+	 */
+	run->kvm_valid_regs = 0;
+	run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS;
+	run->s.regs.regs.r11 = 0xBBBB;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	TEST_ASSERT(regs.r11 == 0xBBBB + 1,
+		    "r11 guest value incorrect 0x%llx.",
+		    regs.r11);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 7de482a0519d..195e9d4739a9 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -20,6 +20,7 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
 .ONESHELL:
 define RUN_TESTS
+	@export KSFT_TAP_LEVEL=`echo 1`;
 	@test_num=`echo 0`;
 	@echo "TAP version 13";
 	@for TEST in $(1); do				\
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
index 04dc1e6ef2ce..9161679b1e1a 100644
--- a/tools/testing/selftests/powerpc/benchmarks/.gitignore
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -1,5 +1,7 @@
 gettimeofday
 context_switch
+fork
+exec_target
 mmap_bench
 futex_bench
 null_syscall
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index a35058e3766c..b4d7432a0ecd 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
 
 CFLAGS += -O2
 
@@ -10,3 +11,7 @@ $(TEST_GEN_PROGS): ../harness.c
 $(OUTPUT)/context_switch: ../utils.c
 $(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
 $(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644
index 000000000000..3c9c144192be
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+void _exit(int);
+void _start(void)
+{
+	_exit(0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644
index 000000000000..d312e638cb37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/fork.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+	cpu_set_t cpuset;
+
+	if (cpu == -1)
+		return;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+		perror("sched_setaffinity");
+		exit(1);
+	}
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+	int pid;
+
+	pid = fork();
+	if (pid == -1) {
+		perror("fork");
+		exit(1);
+	}
+
+	if (pid)
+		return;
+
+	set_cpu(cpu);
+
+	fn(arg);
+
+	exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+	char *const argv[] = { "./exec_target", NULL };
+
+	if (execve("./exec_target", argv, NULL) == -1) {
+		perror("execve");
+		exit(1);
+	}
+}
+
+static void bench_fork(void)
+{
+	while (1) {
+		pid_t pid = fork();
+		if (pid == -1) {
+			perror("fork");
+			exit(1);
+		}
+		if (pid == 0) {
+			if (do_exec)
+				run_exec();
+			_exit(0);
+		}
+		pid = waitpid(pid, NULL, 0);
+		if (pid == -1) {
+			perror("waitpid");
+			exit(1);
+		}
+		iterations++;
+	}
+}
+
+static void bench_vfork(void)
+{
+	while (1) {
+		pid_t pid = vfork();
+		if (pid == -1) {
+			perror("fork");
+			exit(1);
+		}
+		if (pid == 0) {
+			if (do_exec)
+				run_exec();
+			_exit(0);
+		}
+		pid = waitpid(pid, NULL, 0);
+		if (pid == -1) {
+			perror("waitpid");
+			exit(1);
+		}
+		iterations++;
+	}
+}
+
+static void *null_fn(void *arg)
+{
+	pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+	pthread_t tid;
+	cpu_set_t cpuset;
+	pthread_attr_t attr;
+	int rc;
+
+	rc = pthread_attr_init(&attr);
+	if (rc) {
+		errno = rc;
+		perror("pthread_attr_init");
+		exit(1);
+	}
+
+	if (cpu != -1) {
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu, &cpuset);
+
+		rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+		if (rc) {
+			errno = rc;
+			perror("pthread_attr_setaffinity_np");
+			exit(1);
+		}
+	}
+
+	while (1) {
+		rc = pthread_create(&tid, &attr, null_fn, NULL);
+		if (rc) {
+			errno = rc;
+			perror("pthread_create");
+			exit(1);
+		}
+		rc = pthread_join(tid, NULL);
+		if (rc) {
+			errno = rc;
+			perror("pthread_join");
+			exit(1);
+		}
+		iterations++;
+	}
+}
+
+static void sigalrm_handler(int junk)
+{
+	unsigned long i = iterations;
+
+	printf("%ld\n", i - iterations_prev);
+	iterations_prev = i;
+
+	if (--timeout == 0)
+		kill(0, SIGUSR1);
+
+	alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+	exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+	signal(SIGALRM, sigalrm_handler);
+	alarm(1);
+
+	if (do_fork)
+		bench_fork();
+	else if (do_vfork)
+		bench_vfork();
+	else
+		bench_thread();
+
+	return NULL;
+}
+
+static struct option options[] = {
+	{ "fork", no_argument, &do_fork, 1 },
+	{ "vfork", no_argument, &do_vfork, 1 },
+	{ "exec", no_argument, &do_exec, 1 },
+	{ "timeout", required_argument, 0, 's' },
+	{ "exec-target", no_argument, &exec_target, 1 },
+	{ NULL },
+};
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage: fork <options> CPU\n\n");
+	fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+	fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+	fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+	fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+	fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+	signed char c;
+
+	while (1) {
+		int option_index = 0;
+
+		c = getopt_long(argc, argv, "", options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 0:
+			if (options[option_index].flag != 0)
+				break;
+
+			usage();
+			exit(1);
+			break;
+
+		case 's':
+			timeout = atoi(optarg);
+			break;
+
+		default:
+			usage();
+			exit(1);
+		}
+	}
+
+	if (do_fork && do_vfork) {
+		usage();
+		exit(1);
+	}
+	if (do_exec && !do_fork && !do_vfork) {
+		usage();
+		exit(1);
+	}
+
+	if (do_exec) {
+		char *dirname = strdup(argv[0]);
+		int i;
+		i = strlen(dirname) - 1;
+		while (i) {
+			if (dirname[i] == '/') {
+				dirname[i] = '\0';
+				if (chdir(dirname) == -1) {
+					perror("chdir");
+					exit(1);
+				}
+				break;
+			}
+			i--;
+		}
+	}
+
+	if (exec_target) {
+		exit(0);
+	}
+
+	if (((argc - optind) != 1)) {
+		cpu = -1;
+	} else {
+		cpu = atoi(argv[optind++]);
+	}
+
+	if (do_exec)
+		exec_file = argv[0];
+
+	set_cpu(cpu);
+
+	printf("Using ");
+	if (do_fork)
+		printf("fork");
+	else if (do_vfork)
+		printf("vfork");
+	else
+		printf("clone");
+
+	if (do_exec)
+		printf(" + exec");
+
+	printf(" on cpu %d\n", cpu);
+
+	/* Create a new process group so we can signal everyone for exit */
+	setpgid(getpid(), getpid());
+
+	signal(SIGUSR1, sigusr1_handler);
+
+	start_process_on(bench_proc, NULL, cpu);
+
+	while (1)
+		sleep(3600);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index ac4a52e19e59..eedce3366f64 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -5,8 +5,8 @@ CFLAGS += -I$(CURDIR)
 CFLAGS += -D SELFTEST
 CFLAGS += -maltivec
 
-# Use our CFLAGS for the implicit .S rule
-ASFLAGS = $(CFLAGS)
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
 
 TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
 EXTRA_SOURCES := validate.c ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 5c72ff978f27..c0e45d2dde25 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -4,7 +4,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
 
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
 	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
-	$(SIGNAL_CONTEXT_CHK_TESTS)
+	$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644
index 000000000000..85d63449243b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+	uint64_t ret;
+
+	asm __volatile__(
+		"li             3,1             ;"
+		"tbegin.                        ;"
+		"beq            1f              ;"
+		"li             3,0             ;"
+		"tsuspend.                      ;"
+		"1:                             ;"
+		"std%X[ret]     3, %[ret]       ;"
+		: [ret] "=m"(ret)
+		:
+		: "memory", "3", "cr0");
+
+	if (ret)
+		exit(1);
+
+	/*
+	 * We return from the signal handle while in a suspended transaction
+	 */
+}
+
+
+int tm_sigreturn(void)
+{
+	struct sigaction sa;
+	uint64_t ret = 0;
+
+	SKIP_IF(!have_htm());
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = handler;
+	sigemptyset(&sa.sa_mask);
+
+	if (sigaction(SIGSEGV, &sa, NULL))
+		exit(1);
+
+	asm __volatile__(
+		"tbegin.                        ;"
+		"beq            1f              ;"
+		"li             3,0             ;"
+		"std            3,0(3)          ;" /* trigger SEGV */
+		"li             3,1             ;"
+		"std%X[ret]     3,%[ret]        ;"
+		"tend.                          ;"
+		"b              2f              ;"
+		"1:                             ;"
+		"li             3,2             ;"
+		"std%X[ret]     3,%[ret]        ;"
+		"2:                             ;"
+		: [ret] "=m"(ret)
+		:
+		: "memory", "3", "cr0");
+
+	if (ret != 2)
+		exit(1);
+
+	exit(0);
+}
+
+int main(void)
+{
+	return test_harness(tm_sigreturn, "tm_sigreturn");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index e6a0fad2bfd0..156c8e750259 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -80,7 +80,7 @@ bool is_failure(uint64_t condition_reg)
 	return ((condition_reg >> 28) & 0xa) == 0xa;
 }
 
-void *ping(void *input)
+void *tm_una_ping(void *input)
 {
 
 	/*
@@ -280,7 +280,7 @@ void *ping(void *input)
 }
 
 /* Thread to force context switch */
-void *pong(void *not_used)
+void *tm_una_pong(void *not_used)
 {
 	/* Wait thread get its name "pong". */
 	if (DEBUG)
@@ -311,11 +311,11 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
 	do {
 		int rc;
 
-		/* Bind 'ping' to CPU 0, as specified in 'attr'. */
-		rc = pthread_create(&t0, attr, ping, (void *) &flags);
+		/* Bind to CPU 0, as specified in 'attr'. */
+		rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
 		if (rc)
 			pr_err(rc, "pthread_create()");
-		rc = pthread_setname_np(t0, "ping");
+		rc = pthread_setname_np(t0, "tm_una_ping");
 		if (rc)
 			pr_warn(rc, "pthread_setname_np");
 		rc = pthread_join(t0, &ret_value);
@@ -333,13 +333,15 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
 	}
 }
 
-int main(int argc, char **argv)
+int tm_unavailable_test(void)
 {
 	int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
 	pthread_t t1;
 	pthread_attr_t attr;
 	cpu_set_t cpuset;
 
+	SKIP_IF(!have_htm());
+
 	/* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
 	CPU_ZERO(&cpuset);
 	CPU_SET(0, &cpuset);
@@ -354,12 +356,12 @@ int main(int argc, char **argv)
 	if (rc)
 		pr_err(rc, "pthread_attr_setaffinity_np()");
 
-	rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+	rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
 	if (rc)
 		pr_err(rc, "pthread_create()");
 
 	/* Name it for systemtap convenience */
-	rc = pthread_setname_np(t1, "pong");
+	rc = pthread_setname_np(t1, "tm_una_pong");
 	if (rc)
 		pr_warn(rc, "pthread_create()");
 
@@ -394,3 +396,9 @@ int main(int argc, char **argv)
 		exit(0);
 	}
 }
+
+int main(int argc, char **argv)
+{
+	test_harness_set_timeout(220);
+	return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644
index 000000000000..6c16f77c722c
--- /dev/null
+++ b/tools/testing/selftests/proc/.gitignore
@@ -0,0 +1,8 @@
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644
index 000000000000..dbb87e56264c
--- /dev/null
+++ b/tools/testing/selftests/proc/Makefile
@@ -0,0 +1,13 @@
+CFLAGS += -Wall -O2
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644
index 000000000000..68fbd2b35884
--- /dev/null
+++ b/tools/testing/selftests/proc/config
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644
index 000000000000..fcff7047000d
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+	pid_t pid;
+	int wstatus;
+
+	if (unshare(CLONE_NEWPID) == -1) {
+		if (errno == ENOSYS || errno == EPERM)
+			return 2;
+		return 1;
+	}
+
+	pid = fork();
+	if (pid == -1)
+		return 1;
+	if (pid == 0) {
+		char buf[128], *p;
+		int fd;
+		ssize_t rv;
+
+		fd = open("/proc/loadavg" , O_RDONLY);
+		if (fd == -1)
+			return 1;
+		rv = read(fd, buf, sizeof(buf));
+		if (rv < 3)
+			return 1;
+		p = buf + rv;
+
+		/* pid 1 */
+		if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+			return 1;
+
+		pid = fork();
+		if (pid == -1)
+			return 1;
+		if (pid == 0)
+			return 0;
+		if (waitpid(pid, NULL, 0) == -1)
+			return 1;
+
+		lseek(fd, 0, SEEK_SET);
+		rv = read(fd, buf, sizeof(buf));
+		if (rv < 3)
+			return 1;
+		p = buf + rv;
+
+		/* pid 2 */
+		if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+			return 1;
+
+		return 0;
+	}
+
+	if (waitpid(pid, &wstatus, 0) == -1)
+		return 1;
+	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+		return 0;
+	return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644
index 000000000000..4209c64283d6
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1)
+		exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+		return;
+	exit(1);
+}
+
+int main(void)
+{
+	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	void *p;
+	int fd;
+	unsigned long a, b;
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd == -1)
+		return 1;
+
+	p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+	if (p == MAP_FAILED)
+		return 1;
+
+	a = (unsigned long)p;
+	b = (unsigned long)p + PAGE_SIZE;
+
+	pass("/proc/self/map_files/%lx-%lx", a, b);
+	fail("/proc/self/map_files/ %lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx -%lx", a, b);
+	fail("/proc/self/map_files/%lx- %lx", a, b);
+	fail("/proc/self/map_files/%lx-%lx ", a, b);
+	fail("/proc/self/map_files/0%lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx-0%lx", a, b);
+	if (sizeof(long) == 4) {
+		fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+	} else if (sizeof(long) == 8) {
+		fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+	} else
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644
index 000000000000..6f1f4a6e1ecb
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with address 0. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1)
+		exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+		return;
+	exit(1);
+}
+
+int main(void)
+{
+	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	void *p;
+	int fd;
+	unsigned long a, b;
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd == -1)
+		return 1;
+
+	p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+	if (p == MAP_FAILED) {
+		if (errno == EPERM)
+			return 2;
+		return 1;
+	}
+
+	a = (unsigned long)p;
+	b = (unsigned long)p + PAGE_SIZE;
+
+	pass("/proc/self/map_files/%lx-%lx", a, b);
+	fail("/proc/self/map_files/ %lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx -%lx", a, b);
+	fail("/proc/self/map_files/%lx- %lx", a, b);
+	fail("/proc/self/map_files/%lx-%lx ", a, b);
+	fail("/proc/self/map_files/0%lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx-0%lx", a, b);
+	if (sizeof(long) == 4) {
+		fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+	} else if (sizeof(long) == 8) {
+		fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+	} else
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644
index 000000000000..5ab5f4810e43
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+	return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+	char buf1[64];
+	char buf2[64];
+	int fd;
+	ssize_t rv;
+
+	fd = open("/proc/self/syscall", O_RDONLY);
+	if (fd == -1) {
+		if (errno == ENOENT)
+			return 2;
+		return 1;
+	}
+
+	/* Do direct system call as libc can wrap anything. */
+	snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+		 (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+	memset(buf2, 0, sizeof(buf2));
+	rv = sys_read(fd, buf2, sizeof(buf2));
+	if (rv < 0)
+		return 1;
+	if (rv < strlen(buf1))
+		return 1;
+	if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644
index 000000000000..a38b2fbaa7ad
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-wchan.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+	char buf[64];
+	int fd;
+
+	fd = open("/proc/self/wchan", O_RDONLY);
+	if (fd == -1) {
+		if (errno == ENOENT)
+			return 2;
+		return 1;
+	}
+
+	buf[0] = '\0';
+	if (read(fd, buf, sizeof(buf)) != 1)
+		return 1;
+	if (buf[0] != '0')
+		return 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644
index 000000000000..781f7a50fc3f
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+	uint64_t start, u0, u1, i0, i1;
+	int fd;
+
+	fd = open("/proc/uptime", O_RDONLY);
+	assert(fd >= 0);
+
+	proc_uptime(fd, &u0, &i0);
+	start = u0;
+	do {
+		proc_uptime(fd, &u1, &i1);
+		assert(u1 >= u0);
+		assert(i1 >= i0);
+		u0 = u1;
+		i0 = i1;
+	} while (u1 - start < 100);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644
index 000000000000..30e2b7849089
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+	return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+	return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+	unsigned int len;
+	unsigned long *m;
+	unsigned int cpu;
+	uint64_t u0, u1, i0, i1;
+	int fd;
+
+	/* find out "nr_cpu_ids" */
+	m = NULL;
+	len = 0;
+	do {
+		len += sizeof(unsigned long);
+		free(m);
+		m = malloc(len);
+	} while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+	fd = open("/proc/uptime", O_RDONLY);
+	assert(fd >= 0);
+
+	proc_uptime(fd, &u0, &i0);
+	for (cpu = 0; cpu < len * 8; cpu++) {
+		memset(m, 0, len);
+		m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+		/* CPU might not exist, ignore error */
+		sys_sched_setaffinity(0, len, m);
+
+		proc_uptime(fd, &u1, &i1);
+		assert(u1 >= u0);
+		assert(i1 >= i0);
+		u0 = u1;
+		i0 = i1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644
index 000000000000..0e464b50e9d9
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+	if (*p == '0') {
+		*end = (char *)p + 1;
+		return 0;
+	} else if ('1' <= *p && *p <= '9') {
+		unsigned long long val;
+
+		errno = 0;
+		val = strtoull(p, end, 10);
+		assert(errno == 0);
+		return val;
+	} else
+		assert(0);
+}
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+	uint64_t val1, val2;
+	char buf[64], *p;
+	ssize_t rv;
+
+	/* save "p < end" checks */
+	memset(buf, 0, sizeof(buf));
+	rv = pread(fd, buf, sizeof(buf), 0);
+	assert(0 <= rv && rv <= sizeof(buf));
+	buf[sizeof(buf) - 1] = '\0';
+
+	p = buf;
+
+	val1 = xstrtoull(p, &p);
+	assert(p[0] == '.');
+	assert('0' <= p[1] && p[1] <= '9');
+	assert('0' <= p[2] && p[2] <= '9');
+	assert(p[3] == ' ');
+
+	val2 = (p[1] - '0') * 10 + p[2] - '0';
+	*uptime = val1 * 100 + val2;
+
+	p += 4;
+
+	val1 = xstrtoull(p, &p);
+	assert(p[0] == '.');
+	assert('0' <= p[1] && p[1] <= '9');
+	assert('0' <= p[2] && p[2] <= '9');
+	assert(p[3] == '\n');
+
+	val2 = (p[1] - '0') * 10 + p[2] - '0';
+	*idle = val1 * 100 + val2;
+
+	assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644
index 000000000000..1e73c2232097
--- /dev/null
+++ b/tools/testing/selftests/proc/read.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static inline bool streq(const char *s1, const char *s2)
+{
+	return strcmp(s1, s2) == 0;
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+	struct dirent *de;
+
+	errno = 0;
+	de = readdir(d);
+	if (!de && errno != 0) {
+		exit(1);
+	}
+	return de;
+}
+
+static void f_reg(DIR *d, const char *filename)
+{
+	char buf[4096];
+	int fd;
+	ssize_t rv;
+
+	/* read from /proc/kmsg can block */
+	fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+	if (fd == -1)
+		return;
+	rv = read(fd, buf, sizeof(buf));
+	assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+	close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+	int fd;
+	ssize_t rv;
+
+	fd = openat(dirfd(d), filename, O_WRONLY);
+	if (fd == -1)
+		return;
+	rv = write(fd, buf, len);
+	assert((0 <= rv && rv <= len) || rv == -1);
+	close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+	char buf[4096];
+	ssize_t rv;
+
+	rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+	assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+	struct dirent *de;
+
+	de = xreaddir(d);
+	assert(de->d_type == DT_DIR);
+	assert(streq(de->d_name, "."));
+
+	de = xreaddir(d);
+	assert(de->d_type == DT_DIR);
+	assert(streq(de->d_name, ".."));
+
+	while ((de = xreaddir(d))) {
+		assert(!streq(de->d_name, "."));
+		assert(!streq(de->d_name, ".."));
+
+		switch (de->d_type) {
+			DIR *dd;
+			int fd;
+
+		case DT_REG:
+			if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+				f_reg_write(d, de->d_name, "h", 1);
+			} else if (level == 1 && streq(de->d_name, "clear_refs")) {
+				f_reg_write(d, de->d_name, "1", 1);
+			} else if (level == 3 && streq(de->d_name, "clear_refs")) {
+				f_reg_write(d, de->d_name, "1", 1);
+			} else {
+				f_reg(d, de->d_name);
+			}
+			break;
+		case DT_DIR:
+			fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+			if (fd == -1)
+				continue;
+			dd = fdopendir(fd);
+			if (!dd)
+				continue;
+			f(dd, level + 1);
+			closedir(dd);
+			break;
+		case DT_LNK:
+			f_lnk(d, de->d_name);
+			break;
+		default:
+			assert(0);
+		}
+	}
+}
+
+int main(void)
+{
+	DIR *d;
+
+	d = opendir("/proc");
+	if (!d)
+		return 2;
+	f(d, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 5df609950a66..168c66d74fc5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2860,6 +2860,7 @@ TEST(get_metadata)
 	int pipefd[2];
 	char buf;
 	struct seccomp_metadata md;
+	long ret;
 
 	ASSERT_EQ(0, pipe(pipefd));
 
@@ -2893,16 +2894,26 @@ TEST(get_metadata)
 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
 
+	/* Past here must not use ASSERT or child process is never killed. */
+
 	md.filter_off = 0;
-	ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+	errno = 0;
+	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+	EXPECT_EQ(sizeof(md), ret) {
+		if (errno == EINVAL)
+			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+	}
+
 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
 	EXPECT_EQ(md.filter_off, 0);
 
 	md.filter_off = 1;
-	ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+	EXPECT_EQ(sizeof(md), ret);
 	EXPECT_EQ(md.flags, 0);
 	EXPECT_EQ(md.filter_off, 1);
 
+skip:
 	ASSERT_EQ(0, kill(pid, SIGKILL));
 }
 
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index 8bc479fa37e6..efc84cbe8277 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -178,7 +178,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
 	*vcpu_cpsr(vcpu) = cpsr;
 
 	/* Note: These now point to the banked copies */
-	*vcpu_spsr(vcpu) = new_spsr_value;
+	vcpu_write_spsr(vcpu, new_spsr_value);
 	*vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
 
 	/* Branch to exception vector */
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 282389eb204f..bd3d57f40f1b 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -545,9 +545,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	 * The kernel may decide to run userspace after calling vcpu_put, so
 	 * we reset cntvoff to 0 to ensure a consistent read between user
 	 * accesses to the virtual counter and kernel access to the physical
-	 * counter.
+	 * counter of non-VHE case. For VHE, the virtual counter uses a fixed
+	 * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
 	 */
-	set_cntvoff(0);
+	if (!has_vhe())
+		set_cntvoff(0);
 }
 
 /*
@@ -856,11 +858,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 		return ret;
 
 no_vgic:
-	preempt_disable();
 	timer->enabled = 1;
-	kvm_timer_vcpu_load(vcpu);
-	preempt_enable();
-
 	return 0;
 }
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 53572304843b..dba629c5f8ac 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	kvm_arm_set_running_vcpu(vcpu);
 	kvm_vgic_load(vcpu);
 	kvm_timer_vcpu_load(vcpu);
+	kvm_vcpu_load_sysregs(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	kvm_vcpu_put_sysregs(vcpu);
 	kvm_timer_vcpu_put(vcpu);
 	kvm_vgic_put(vcpu);
 
@@ -420,7 +422,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
  */
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
+	bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
+	return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
 		&& !v->arch.power_off && !v->arch.pause);
 }
 
@@ -632,27 +635,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	if (unlikely(!kvm_vcpu_initialized(vcpu)))
 		return -ENOEXEC;
 
-	vcpu_load(vcpu);
-
 	ret = kvm_vcpu_first_run_init(vcpu);
 	if (ret)
-		goto out;
+		return ret;
 
 	if (run->exit_reason == KVM_EXIT_MMIO) {
 		ret = kvm_handle_mmio_return(vcpu, vcpu->run);
 		if (ret)
-			goto out;
-		if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) {
-			ret = 0;
-			goto out;
-		}
-
+			return ret;
+		if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
+			return 0;
 	}
 
-	if (run->immediate_exit) {
-		ret = -EINTR;
-		goto out;
-	}
+	if (run->immediate_exit)
+		return -EINTR;
+
+	vcpu_load(vcpu);
 
 	kvm_sigset_activate(vcpu);
 
@@ -719,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
 		    kvm_request_pending(vcpu)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
+			isb(); /* Ensure work in x_flush_hwstate is committed */
 			kvm_pmu_sync_hwstate(vcpu);
 			if (static_branch_unlikely(&userspace_irqchip_in_use))
 				kvm_timer_sync_hwstate(vcpu);
@@ -735,13 +734,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 */
 		trace_kvm_entry(*vcpu_pc(vcpu));
 		guest_enter_irqoff();
-		if (has_vhe())
-			kvm_arm_vhe_guest_enter();
-
-		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
-		if (has_vhe())
+		if (has_vhe()) {
+			kvm_arm_vhe_guest_enter();
+			ret = kvm_vcpu_run_vhe(vcpu);
 			kvm_arm_vhe_guest_exit();
+		} else {
+			ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
+		}
+
 		vcpu->mode = OUTSIDE_GUEST_MODE;
 		vcpu->stat.exits++;
 		/*
@@ -811,7 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	kvm_sigset_deactivate(vcpu);
 
-out:
 	vcpu_put(vcpu);
 	return ret;
 }
@@ -820,18 +820,18 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
 {
 	int bit_index;
 	bool set;
-	unsigned long *ptr;
+	unsigned long *hcr;
 
 	if (number == KVM_ARM_IRQ_CPU_IRQ)
 		bit_index = __ffs(HCR_VI);
 	else /* KVM_ARM_IRQ_CPU_FIQ */
 		bit_index = __ffs(HCR_VF);
 
-	ptr = (unsigned long *)&vcpu->arch.irq_lines;
+	hcr = vcpu_hcr(vcpu);
 	if (level)
-		set = test_and_set_bit(bit_index, ptr);
+		set = test_and_set_bit(bit_index, hcr);
 	else
-		set = test_and_clear_bit(bit_index, ptr);
+		set = test_and_clear_bit(bit_index, hcr);
 
 	/*
 	 * If we didn't change anything, no need to wake up or kick other CPUs
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f24404b3c8df..77754a62eb0c 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
 	write_sysreg(cntvoff, cntvoff_el2);
 }
 
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
 void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
 {
-	/*
-	 * We don't need to do this for VHE since the host kernel runs in EL2
-	 * with HCR_EL2.TGE ==1, which makes those bits have no impact.
-	 */
-	if (!has_vhe()) {
-		u64 val;
+	u64 val;
 
-		/* Allow physical timer/counter access for the host */
-		val = read_sysreg(cnthctl_el2);
-		val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
-		write_sysreg(val, cnthctl_el2);
-	}
+	/* Allow physical timer/counter access for the host */
+	val = read_sysreg(cnthctl_el2);
+	val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+	write_sysreg(val, cnthctl_el2);
 }
 
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
 void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
 {
-	if (!has_vhe()) {
-		u64 val;
+	u64 val;
 
-		/*
-		 * Disallow physical timer access for the guest
-		 * Physical counter access is allowed
-		 */
-		val = read_sysreg(cnthctl_el2);
-		val &= ~CNTHCTL_EL1PCEN;
-		val |= CNTHCTL_EL1PCTEN;
-		write_sysreg(val, cnthctl_el2);
-	}
+	/*
+	 * Disallow physical timer access for the guest
+	 * Physical counter access is allowed
+	 */
+	val = read_sysreg(cnthctl_el2);
+	val &= ~CNTHCTL_EL1PCEN;
+	val |= CNTHCTL_EL1PCTEN;
+	write_sysreg(val, cnthctl_el2);
 }
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
deleted file mode 100644
index 4fe6e797e8b3..000000000000
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/compiler.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-
-static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
-{
-	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
-	u32 elrsr0, elrsr1;
-
-	elrsr0 = readl_relaxed(base + GICH_ELRSR0);
-	if (unlikely(nr_lr > 32))
-		elrsr1 = readl_relaxed(base + GICH_ELRSR1);
-	else
-		elrsr1 = 0;
-
-	cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-}
-
-static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
-{
-	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	int i;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
-	for (i = 0; i < used_lrs; i++) {
-		if (cpu_if->vgic_elrsr & (1UL << i))
-			cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
-		else
-			cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
-
-		writel_relaxed(0, base + GICH_LR0 + (i * 4));
-	}
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	struct vgic_dist *vgic = &kvm->arch.vgic;
-	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
-	if (!base)
-		return;
-
-	if (used_lrs) {
-		cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
-
-		save_elrsr(vcpu, base);
-		save_lrs(vcpu, base);
-
-		writel_relaxed(0, base + GICH_HCR);
-	} else {
-		cpu_if->vgic_elrsr = ~0UL;
-		cpu_if->vgic_apr = 0;
-	}
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	struct vgic_dist *vgic = &kvm->arch.vgic;
-	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
-	int i;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
-	if (!base)
-		return;
-
-	if (used_lrs) {
-		writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
-		writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
-		for (i = 0; i < used_lrs; i++) {
-			writel_relaxed(cpu_if->vgic_lr[i],
-				       base + GICH_LR0 + (i * 4));
-		}
-	}
-}
-
-#ifdef CONFIG_ARM64
-/*
- * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
- *				     guest.
- *
- * @vcpu: the offending vcpu
- *
- * Returns:
- *  1: GICV access successfully performed
- *  0: Not a GICV access
- * -1: Illegal GICV access
- */
-int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-	struct vgic_dist *vgic = &kvm->arch.vgic;
-	phys_addr_t fault_ipa;
-	void __iomem *addr;
-	int rd;
-
-	/* Build the full address */
-	fault_ipa  = kvm_vcpu_get_fault_ipa(vcpu);
-	fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
-
-	/* If not for GICV, move on */
-	if (fault_ipa <  vgic->vgic_cpu_base ||
-	    fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
-		return 0;
-
-	/* Reject anything but a 32bit access */
-	if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
-		return -1;
-
-	/* Not aligned? Don't bother */
-	if (fault_ipa & 3)
-		return -1;
-
-	rd = kvm_vcpu_dabt_get_rd(vcpu);
-	addr  = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va);
-	addr += fault_ipa - vgic->vgic_cpu_base;
-
-	if (kvm_vcpu_dabt_iswrite(vcpu)) {
-		u32 data = vcpu_data_guest_to_host(vcpu,
-						   vcpu_get_reg(vcpu, rd),
-						   sizeof(u32));
-		writel_relaxed(data, addr);
-	} else {
-		u32 data = readl_relaxed(addr);
-		vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
-							       sizeof(u32)));
-	}
-
-	return 1;
-}
-#endif
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index b89ce5432214..616e5a433ab0 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -21,6 +21,7 @@
 
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 #define vtr_to_max_lr_idx(v)		((v) & 0xf)
 #define vtr_to_nr_pre_bits(v)		((((u32)(v) >> 26) & 7) + 1)
@@ -208,89 +209,68 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-	u64 val;
 
 	/*
 	 * Make sure stores to the GIC via the memory mapped interface
-	 * are now visible to the system register interface.
+	 * are now visible to the system register interface when reading the
+	 * LRs, and when reading back the VMCR on non-VHE systems.
 	 */
-	if (!cpu_if->vgic_sre) {
-		dsb(sy);
-		isb();
-		cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
+	if (used_lrs || !has_vhe()) {
+		if (!cpu_if->vgic_sre) {
+			dsb(sy);
+			isb();
+		}
 	}
 
 	if (used_lrs) {
 		int i;
-		u32 nr_pre_bits;
+		u32 elrsr;
 
-		cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
+		elrsr = read_gicreg(ICH_ELSR_EL2);
 
-		write_gicreg(0, ICH_HCR_EL2);
-		val = read_gicreg(ICH_VTR_EL2);
-		nr_pre_bits = vtr_to_nr_pre_bits(val);
+		write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
 
 		for (i = 0; i < used_lrs; i++) {
-			if (cpu_if->vgic_elrsr & (1 << i))
+			if (elrsr & (1 << i))
 				cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
 			else
 				cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
 
 			__gic_v3_set_lr(0, i);
 		}
+	}
+}
 
-		switch (nr_pre_bits) {
-		case 7:
-			cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
-			cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
-		case 6:
-			cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
-		default:
-			cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
-		}
+void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	int i;
 
-		switch (nr_pre_bits) {
-		case 7:
-			cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
-			cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
-		case 6:
-			cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
-		default:
-			cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
-		}
-	} else {
-		if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
-		    cpu_if->its_vpe.its_vm)
-			write_gicreg(0, ICH_HCR_EL2);
-
-		cpu_if->vgic_elrsr = 0xffff;
-		cpu_if->vgic_ap0r[0] = 0;
-		cpu_if->vgic_ap0r[1] = 0;
-		cpu_if->vgic_ap0r[2] = 0;
-		cpu_if->vgic_ap0r[3] = 0;
-		cpu_if->vgic_ap1r[0] = 0;
-		cpu_if->vgic_ap1r[1] = 0;
-		cpu_if->vgic_ap1r[2] = 0;
-		cpu_if->vgic_ap1r[3] = 0;
-	}
+	if (used_lrs) {
+		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 
-	val = read_gicreg(ICC_SRE_EL2);
-	write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+		for (i = 0; i < used_lrs; i++)
+			__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+	}
 
-	if (!cpu_if->vgic_sre) {
-		/* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
-		isb();
-		write_gicreg(1, ICC_SRE_EL1);
+	/*
+	 * Ensure that writes to the LRs, and on non-VHE systems ensure that
+	 * the write to the VMCR in __vgic_v3_activate_traps(), will have
+	 * reached the (re)distributors. This ensure the guest will read the
+	 * correct values from the memory-mapped interface.
+	 */
+	if (used_lrs || !has_vhe()) {
+		if (!cpu_if->vgic_sre) {
+			isb();
+			dsb(sy);
+		}
 	}
 }
 
-void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-	u64 val;
-	u32 nr_pre_bits;
-	int i;
 
 	/*
 	 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -299,70 +279,135 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	 * consequences. So we must make sure that ICC_SRE_EL1 has
 	 * been actually programmed with the value we want before
 	 * starting to mess with the rest of the GIC, and VMCR_EL2 in
-	 * particular.
+	 * particular.  This logic must be called before
+	 * __vgic_v3_restore_state().
 	 */
 	if (!cpu_if->vgic_sre) {
 		write_gicreg(0, ICC_SRE_EL1);
 		isb();
 		write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+
+		if (has_vhe()) {
+			/*
+			 * Ensure that the write to the VMCR will have reached
+			 * the (re)distributors. This ensure the guest will
+			 * read the correct values from the memory-mapped
+			 * interface.
+			 */
+			isb();
+			dsb(sy);
+		}
 	}
 
-	val = read_gicreg(ICH_VTR_EL2);
-	nr_pre_bits = vtr_to_nr_pre_bits(val);
+	/*
+	 * Prevent the guest from touching the GIC system registers if
+	 * SRE isn't enabled for GICv3 emulation.
+	 */
+	write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+		     ICC_SRE_EL2);
 
-	if (used_lrs) {
+	/*
+	 * If we need to trap system registers, we must write
+	 * ICH_HCR_EL2 anyway, even if no interrupts are being
+	 * injected,
+	 */
+	if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+	    cpu_if->its_vpe.its_vm)
 		write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+}
 
-		switch (nr_pre_bits) {
-		case 7:
-			__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
-			__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
-		case 6:
-			__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
-		default:
-			__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
-		}
-
-		switch (nr_pre_bits) {
-		case 7:
-			__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
-			__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
-		case 6:
-			__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
-		default:
-			__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
-		}
+void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 val;
 
-		for (i = 0; i < used_lrs; i++)
-			__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
-	} else {
-		/*
-		 * If we need to trap system registers, we must write
-		 * ICH_HCR_EL2 anyway, even if no interrupts are being
-		 * injected. Same thing if GICv4 is used, as VLPI
-		 * delivery is gated by ICH_HCR_EL2.En.
-		 */
-		if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
-		    cpu_if->its_vpe.its_vm)
-			write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+	if (!cpu_if->vgic_sre) {
+		cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
 	}
 
-	/*
-	 * Ensures that the above will have reached the
-	 * (re)distributors. This ensure the guest will read the
-	 * correct values from the memory-mapped interface.
-	 */
+	val = read_gicreg(ICC_SRE_EL2);
+	write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+
 	if (!cpu_if->vgic_sre) {
+		/* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
 		isb();
-		dsb(sy);
+		write_gicreg(1, ICC_SRE_EL1);
 	}
 
 	/*
-	 * Prevent the guest from touching the GIC system registers if
-	 * SRE isn't enabled for GICv3 emulation.
+	 * If we were trapping system registers, we enabled the VGIC even if
+	 * no interrupts were being injected, and we disable it again here.
 	 */
-	write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
-		     ICC_SRE_EL2);
+	if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+	    cpu_if->its_vpe.its_vm)
+		write_gicreg(0, ICH_HCR_EL2);
+}
+
+void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if;
+	u64 val;
+	u32 nr_pre_bits;
+
+	vcpu = kern_hyp_va(vcpu);
+	cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+	val = read_gicreg(ICH_VTR_EL2);
+	nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+	switch (nr_pre_bits) {
+	case 7:
+		cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
+		cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
+	case 6:
+		cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
+	default:
+		cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
+	}
+
+	switch (nr_pre_bits) {
+	case 7:
+		cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
+		cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
+	case 6:
+		cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
+	default:
+		cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
+	}
+}
+
+void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if;
+	u64 val;
+	u32 nr_pre_bits;
+
+	vcpu = kern_hyp_va(vcpu);
+	cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+	val = read_gicreg(ICH_VTR_EL2);
+	nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+	switch (nr_pre_bits) {
+	case 7:
+		__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
+		__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
+	case 6:
+		__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
+	default:
+		__vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
+	}
+
+	switch (nr_pre_bits) {
+	case 7:
+		__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
+		__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
+	case 6:
+		__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
+	default:
+		__vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
+	}
 }
 
 void __hyp_text __vgic_v3_init_lrs(void)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b960acdd0c05..7f6a944db23d 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -43,6 +43,8 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
+static unsigned long io_map_base;
+
 #define S2_PGD_SIZE	(PTRS_PER_S2_PGD * sizeof(pgd_t))
 #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
 
@@ -479,7 +481,13 @@ static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
 		clear_hyp_pgd_entry(pgd);
 }
 
-static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd)
+{
+	return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1);
+}
+
+static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
+			      phys_addr_t start, u64 size)
 {
 	pgd_t *pgd;
 	phys_addr_t addr = start, end = start + size;
@@ -489,7 +497,7 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
 	 * We don't unmap anything from HYP, except at the hyp tear down.
 	 * Hence, we don't have to invalidate the TLBs here.
 	 */
-	pgd = pgdp + pgd_index(addr);
+	pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (!pgd_none(*pgd))
@@ -497,32 +505,50 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
 	} while (pgd++, addr = next, addr != end);
 }
 
+static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+	__unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size);
+}
+
+static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+	__unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size);
+}
+
 /**
  * free_hyp_pgds - free Hyp-mode page tables
  *
  * Assumes hyp_pgd is a page table used strictly in Hyp-mode and
  * therefore contains either mappings in the kernel memory area (above
- * PAGE_OFFSET), or device mappings in the vmalloc range (from
- * VMALLOC_START to VMALLOC_END).
+ * PAGE_OFFSET), or device mappings in the idmap range.
  *
- * boot_hyp_pgd should only map two pages for the init code.
+ * boot_hyp_pgd should only map the idmap range, and is only used in
+ * the extended idmap case.
  */
 void free_hyp_pgds(void)
 {
+	pgd_t *id_pgd;
+
 	mutex_lock(&kvm_hyp_pgd_mutex);
 
+	id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd;
+
+	if (id_pgd) {
+		/* In case we never called hyp_mmu_init() */
+		if (!io_map_base)
+			io_map_base = hyp_idmap_start;
+		unmap_hyp_idmap_range(id_pgd, io_map_base,
+				      hyp_idmap_start + PAGE_SIZE - io_map_base);
+	}
+
 	if (boot_hyp_pgd) {
-		unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 		free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
 		boot_hyp_pgd = NULL;
 	}
 
 	if (hyp_pgd) {
-		unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
 		unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
 				(uintptr_t)high_memory - PAGE_OFFSET);
-		unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
-				VMALLOC_END - VMALLOC_START);
 
 		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
 		hyp_pgd = NULL;
@@ -634,7 +660,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
 	addr = start & PAGE_MASK;
 	end = PAGE_ALIGN(end);
 	do {
-		pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
+		pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
 
 		if (pgd_none(*pgd)) {
 			pud = pud_alloc_one(NULL, addr);
@@ -708,29 +734,115 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot)
 	return 0;
 }
 
+static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
+					unsigned long *haddr, pgprot_t prot)
+{
+	pgd_t *pgd = hyp_pgd;
+	unsigned long base;
+	int ret = 0;
+
+	mutex_lock(&kvm_hyp_pgd_mutex);
+
+	/*
+	 * This assumes that we we have enough space below the idmap
+	 * page to allocate our VAs. If not, the check below will
+	 * kick. A potential alternative would be to detect that
+	 * overflow and switch to an allocation above the idmap.
+	 *
+	 * The allocated size is always a multiple of PAGE_SIZE.
+	 */
+	size = PAGE_ALIGN(size + offset_in_page(phys_addr));
+	base = io_map_base - size;
+
+	/*
+	 * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+	 * allocating the new area, as it would indicate we've
+	 * overflowed the idmap/IO address range.
+	 */
+	if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+		ret = -ENOMEM;
+	else
+		io_map_base = base;
+
+	mutex_unlock(&kvm_hyp_pgd_mutex);
+
+	if (ret)
+		goto out;
+
+	if (__kvm_cpu_uses_extended_idmap())
+		pgd = boot_hyp_pgd;
+
+	ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
+				    base, base + size,
+				    __phys_to_pfn(phys_addr), prot);
+	if (ret)
+		goto out;
+
+	*haddr = base + offset_in_page(phys_addr);
+
+out:
+	return ret;
+}
+
 /**
- * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
- * @from:	The kernel start VA of the range
- * @to:		The kernel end VA of the range (exclusive)
+ * create_hyp_io_mappings - Map IO into both kernel and HYP
  * @phys_addr:	The physical start address which gets mapped
- *
- * The resulting HYP VA is the same as the kernel VA, modulo
- * HYP_PAGE_OFFSET.
+ * @size:	Size of the region being mapped
+ * @kaddr:	Kernel VA for this mapping
+ * @haddr:	HYP VA for this mapping
  */
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+			   void __iomem **kaddr,
+			   void __iomem **haddr)
 {
-	unsigned long start = kern_hyp_va((unsigned long)from);
-	unsigned long end = kern_hyp_va((unsigned long)to);
+	unsigned long addr;
+	int ret;
 
-	if (is_kernel_in_hyp_mode())
+	*kaddr = ioremap(phys_addr, size);
+	if (!*kaddr)
+		return -ENOMEM;
+
+	if (is_kernel_in_hyp_mode()) {
+		*haddr = *kaddr;
 		return 0;
+	}
 
-	/* Check for a valid kernel IO mapping */
-	if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
-		return -EINVAL;
+	ret = __create_hyp_private_mapping(phys_addr, size,
+					   &addr, PAGE_HYP_DEVICE);
+	if (ret) {
+		iounmap(*kaddr);
+		*kaddr = NULL;
+		*haddr = NULL;
+		return ret;
+	}
+
+	*haddr = (void __iomem *)addr;
+	return 0;
+}
 
-	return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end,
-				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
+/**
+ * create_hyp_exec_mappings - Map an executable range into HYP
+ * @phys_addr:	The physical start address which gets mapped
+ * @size:	Size of the region being mapped
+ * @haddr:	HYP VA for this mapping
+ */
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+			     void **haddr)
+{
+	unsigned long addr;
+	int ret;
+
+	BUG_ON(is_kernel_in_hyp_mode());
+
+	ret = __create_hyp_private_mapping(phys_addr, size,
+					   &addr, PAGE_HYP_EXEC);
+	if (ret) {
+		*haddr = NULL;
+		return ret;
+	}
+
+	*haddr = (void *)addr;
+	return 0;
 }
 
 /**
@@ -1801,7 +1913,9 @@ int kvm_mmu_init(void)
 	int err;
 
 	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
 	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE);
 	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
 	/*
@@ -1812,10 +1926,11 @@ int kvm_mmu_init(void)
 
 	kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
 	kvm_debug("HYP VA range: %lx:%lx\n",
-		  kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
+		  kern_hyp_va(PAGE_OFFSET),
+		  kern_hyp_va((unsigned long)high_memory - 1));
 
 	if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
-	    hyp_idmap_start <  kern_hyp_va(~0UL) &&
+	    hyp_idmap_start <  kern_hyp_va((unsigned long)high_memory - 1) &&
 	    hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
 		/*
 		 * The idmap page is intersecting with the VA space,
@@ -1859,6 +1974,7 @@ int kvm_mmu_init(void)
 			goto out;
 	}
 
+	io_map_base = hyp_idmap_start;
 	return 0;
 out:
 	free_hyp_pgds();
@@ -2035,7 +2151,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
  */
 void kvm_set_way_flush(struct kvm_vcpu *vcpu)
 {
-	unsigned long hcr = vcpu_get_hcr(vcpu);
+	unsigned long hcr = *vcpu_hcr(vcpu);
 
 	/*
 	 * If this is the first time we do a S/W operation
@@ -2050,7 +2166,7 @@ void kvm_set_way_flush(struct kvm_vcpu *vcpu)
 		trace_kvm_set_way_flush(*vcpu_pc(vcpu),
 					vcpu_has_cache_enabled(vcpu));
 		stage2_flush_vm(vcpu->kvm);
-		vcpu_set_hcr(vcpu, hcr | HCR_TVM);
+		*vcpu_hcr(vcpu) = hcr | HCR_TVM;
 	}
 }
 
@@ -2068,7 +2184,7 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled)
 
 	/* Caches are now on, stop trapping VM ops (until a S/W op) */
 	if (now_enabled)
-		vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM);
+		*vcpu_hcr(vcpu) &= ~HCR_TVM;
 
 	trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled);
 }
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 8a9c42366db7..1c5b76c46e26 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -37,7 +37,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 
 	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
 	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
-	counter = vcpu_sys_reg(vcpu, reg);
+	counter = __vcpu_sys_reg(vcpu, reg);
 
 	/* The real counter value is equal to the value of counter register plus
 	 * the value perf event counts.
@@ -61,7 +61,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
 
 	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
 	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
-	vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
+	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
 }
 
 /**
@@ -78,7 +78,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 		counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
 		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
 		       ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
-		vcpu_sys_reg(vcpu, reg) = counter;
+		__vcpu_sys_reg(vcpu, reg) = counter;
 		perf_event_disable(pmc->perf_event);
 		perf_event_release_kernel(pmc->perf_event);
 		pmc->perf_event = NULL;
@@ -125,7 +125,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
 {
-	u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
+	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT;
 
 	val &= ARMV8_PMU_PMCR_N_MASK;
 	if (val == 0)
@@ -147,7 +147,7 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
 	struct kvm_pmc *pmc;
 
-	if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
+	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
 		return;
 
 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
@@ -193,10 +193,10 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
 {
 	u64 reg = 0;
 
-	if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
-		reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0);
-		reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
-		reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1);
+	if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
+		reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0);
+		reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+		reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1);
 		reg &= kvm_pmu_valid_counter_mask(vcpu);
 	}
 
@@ -295,7 +295,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 	int idx = pmc->idx;
 
-	vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
+	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
 
 	if (kvm_pmu_overflow_status(vcpu)) {
 		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
@@ -316,19 +316,19 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
 	if (val == 0)
 		return;
 
-	enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+	enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
 		if (!(val & BIT(i)))
 			continue;
-		type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
+		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
 		       & ARMV8_PMU_EVTYPE_EVENT;
 		if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
 		    && (enable & BIT(i))) {
-			reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
 			reg = lower_32_bits(reg);
-			vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
+			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
 			if (!reg)
-				vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
 		}
 	}
 }
@@ -348,7 +348,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 	mask = kvm_pmu_valid_counter_mask(vcpu);
 	if (val & ARMV8_PMU_PMCR_E) {
 		kvm_pmu_enable_counter(vcpu,
-				vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
+		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
 	} else {
 		kvm_pmu_disable_counter(vcpu, mask);
 	}
@@ -369,8 +369,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 
 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
 {
-	return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
-	       (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
+	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
+	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
 }
 
 /**
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 743ca5cb05ef..68378fe17a0e 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -166,12 +166,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
 	kvm->arch.vgic.in_kernel = true;
 	kvm->arch.vgic.vgic_model = type;
 
-	/*
-	 * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init)
-	 * it is stored in distributor struct for asm save/restore purpose
-	 */
-	kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base;
-
 	kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
 	kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
@@ -302,17 +296,6 @@ int vgic_init(struct kvm *kvm)
 
 	dist->initialized = true;
 
-	/*
-	 * If we're initializing GICv2 on-demand when first running the VCPU
-	 * then we need to load the VGIC state onto the CPU.  We can detect
-	 * this easily by checking if we are in between vcpu_load and vcpu_put
-	 * when we just initialized the VGIC.
-	 */
-	preempt_disable();
-	vcpu = kvm_arm_get_running_vcpu();
-	if (vcpu)
-		kvm_vgic_load(vcpu);
-	preempt_enable();
 out:
 	return ret;
 }
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 465095355666..a8f07243aa9f 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -316,21 +316,24 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_irq *irq;
 	u32 *intids;
-	int irq_count = dist->lpi_list_count, i = 0;
+	int irq_count, i = 0;
 
 	/*
-	 * We use the current value of the list length, which may change
-	 * after the kmalloc. We don't care, because the guest shouldn't
-	 * change anything while the command handling is still running,
-	 * and in the worst case we would miss a new IRQ, which one wouldn't
-	 * expect to be covered by this command anyway.
+	 * There is an obvious race between allocating the array and LPIs
+	 * being mapped/unmapped. If we ended up here as a result of a
+	 * command, we're safe (locks are held, preventing another
+	 * command). If coming from another path (such as enabling LPIs),
+	 * we must be careful not to overrun the array.
 	 */
+	irq_count = READ_ONCE(dist->lpi_list_count);
 	intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL);
 	if (!intids)
 		return -ENOMEM;
 
 	spin_lock(&dist->lpi_list_lock);
 	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+		if (i == irq_count)
+			break;
 		/* We don't need to "get" the IRQ, as we hold the list lock. */
 		if (irq->target_vcpu != vcpu)
 			continue;
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 29556f71b691..45aa433f018f 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -105,12 +105,9 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 
 		/*
 		 * Clear soft pending state when level irqs have been acked.
-		 * Always regenerate the pending state.
 		 */
-		if (irq->config == VGIC_CONFIG_LEVEL) {
-			if (!(val & GICH_LR_PENDING_BIT))
-				irq->pending_latch = false;
-		}
+		if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE))
+			irq->pending_latch = false;
 
 		/*
 		 * Level-triggered mapped IRQs are special because we only
@@ -153,8 +150,35 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 {
 	u32 val = irq->intid;
+	bool allow_pending = true;
+
+	if (irq->active)
+		val |= GICH_LR_ACTIVE_BIT;
+
+	if (irq->hw) {
+		val |= GICH_LR_HW;
+		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
+		/*
+		 * Never set pending+active on a HW interrupt, as the
+		 * pending state is kept at the physical distributor
+		 * level.
+		 */
+		if (irq->active)
+			allow_pending = false;
+	} else {
+		if (irq->config == VGIC_CONFIG_LEVEL) {
+			val |= GICH_LR_EOI;
 
-	if (irq_is_pending(irq)) {
+			/*
+			 * Software resampling doesn't work very well
+			 * if we allow P+A, so let's not do that.
+			 */
+			if (irq->active)
+				allow_pending = false;
+		}
+	}
+
+	if (allow_pending && irq_is_pending(irq)) {
 		val |= GICH_LR_PENDING_BIT;
 
 		if (irq->config == VGIC_CONFIG_EDGE)
@@ -171,24 +195,6 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 		}
 	}
 
-	if (irq->active)
-		val |= GICH_LR_ACTIVE_BIT;
-
-	if (irq->hw) {
-		val |= GICH_LR_HW;
-		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
-		/*
-		 * Never set pending+active on a HW interrupt, as the
-		 * pending state is kept at the physical distributor
-		 * level.
-		 */
-		if (irq->active && irq_is_pending(irq))
-			val &= ~GICH_LR_PENDING_BIT;
-	} else {
-		if (irq->config == VGIC_CONFIG_LEVEL)
-			val |= GICH_LR_EOI;
-	}
-
 	/*
 	 * Level-triggered mapped IRQs are special because we only observe
 	 * rising edges as input to the VGIC.  We therefore lower the line
@@ -272,7 +278,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu)
 	 * anyway.
 	 */
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
-	vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
 
 	/* Get the show on the road... */
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
@@ -368,16 +373,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
 	if (!PAGE_ALIGNED(info->vcpu.start) ||
 	    !PAGE_ALIGNED(resource_size(&info->vcpu))) {
 		kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n");
-		kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start,
-							     resource_size(&info->vcpu));
-		if (!kvm_vgic_global_state.vcpu_base_va) {
-			kvm_err("Cannot ioremap GICV\n");
-			return -ENOMEM;
-		}
 
-		ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va,
-					     kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu),
-					     info->vcpu.start);
+		ret = create_hyp_io_mappings(info->vcpu.start,
+					     resource_size(&info->vcpu),
+					     &kvm_vgic_global_state.vcpu_base_va,
+					     &kvm_vgic_global_state.vcpu_hyp_va);
 		if (ret) {
 			kvm_err("Cannot map GICV into hyp\n");
 			goto out;
@@ -386,26 +386,18 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
 		static_branch_enable(&vgic_v2_cpuif_trap);
 	}
 
-	kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start,
-						   resource_size(&info->vctrl));
-	if (!kvm_vgic_global_state.vctrl_base) {
-		kvm_err("Cannot ioremap GICH\n");
-		ret = -ENOMEM;
+	ret = create_hyp_io_mappings(info->vctrl.start,
+				     resource_size(&info->vctrl),
+				     &kvm_vgic_global_state.vctrl_base,
+				     &kvm_vgic_global_state.vctrl_hyp);
+	if (ret) {
+		kvm_err("Cannot map VCTRL into hyp\n");
 		goto out;
 	}
 
 	vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR);
 	kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1;
 
-	ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base,
-				     kvm_vgic_global_state.vctrl_base +
-					 resource_size(&info->vctrl),
-				     info->vctrl.start);
-	if (ret) {
-		kvm_err("Cannot map VCTRL into hyp\n");
-		goto out;
-	}
-
 	ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
 	if (ret) {
 		kvm_err("Cannot register GICv2 KVM device\n");
@@ -429,18 +421,74 @@ out:
 	return ret;
 }
 
+static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
+{
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	u64 elrsr;
+	int i;
+
+	elrsr = readl_relaxed(base + GICH_ELRSR0);
+	if (unlikely(used_lrs > 32))
+		elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32;
+
+	for (i = 0; i < used_lrs; i++) {
+		if (elrsr & (1UL << i))
+			cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
+		else
+			cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
+
+		writel_relaxed(0, base + GICH_LR0 + (i * 4));
+	}
+}
+
+void vgic_v2_save_state(struct kvm_vcpu *vcpu)
+{
+	void __iomem *base = kvm_vgic_global_state.vctrl_base;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+
+	if (!base)
+		return;
+
+	if (used_lrs) {
+		save_lrs(vcpu, base);
+		writel_relaxed(0, base + GICH_HCR);
+	}
+}
+
+void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	void __iomem *base = kvm_vgic_global_state.vctrl_base;
+	u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+	int i;
+
+	if (!base)
+		return;
+
+	if (used_lrs) {
+		writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
+		for (i = 0; i < used_lrs; i++) {
+			writel_relaxed(cpu_if->vgic_lr[i],
+				       base + GICH_LR0 + (i * 4));
+		}
+	}
+}
+
 void vgic_v2_load(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
 
-	writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR);
+	writel_relaxed(cpu_if->vgic_vmcr,
+		       kvm_vgic_global_state.vctrl_base + GICH_VMCR);
+	writel_relaxed(cpu_if->vgic_apr,
+		       kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
 
 void vgic_v2_put(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
 
-	cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR);
+	cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
+	cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 0ff2006f3781..8195f52ae6f0 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -16,6 +16,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <kvm/arm_vgic.h>
+#include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_asm.h>
 
@@ -96,12 +97,9 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 
 		/*
 		 * Clear soft pending state when level irqs have been acked.
-		 * Always regenerate the pending state.
 		 */
-		if (irq->config == VGIC_CONFIG_LEVEL) {
-			if (!(val & ICH_LR_PENDING_BIT))
-				irq->pending_latch = false;
-		}
+		if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
+			irq->pending_latch = false;
 
 		/*
 		 * Level-triggered mapped IRQs are special because we only
@@ -135,8 +133,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 {
 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
 	u64 val = irq->intid;
+	bool allow_pending = true;
+
+	if (irq->active)
+		val |= ICH_LR_ACTIVE_BIT;
+
+	if (irq->hw) {
+		val |= ICH_LR_HW;
+		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
+		/*
+		 * Never set pending+active on a HW interrupt, as the
+		 * pending state is kept at the physical distributor
+		 * level.
+		 */
+		if (irq->active)
+			allow_pending = false;
+	} else {
+		if (irq->config == VGIC_CONFIG_LEVEL) {
+			val |= ICH_LR_EOI;
+
+			/*
+			 * Software resampling doesn't work very well
+			 * if we allow P+A, so let's not do that.
+			 */
+			if (irq->active)
+				allow_pending = false;
+		}
+	}
 
-	if (irq_is_pending(irq)) {
+	if (allow_pending && irq_is_pending(irq)) {
 		val |= ICH_LR_PENDING_BIT;
 
 		if (irq->config == VGIC_CONFIG_EDGE)
@@ -154,24 +179,6 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 		}
 	}
 
-	if (irq->active)
-		val |= ICH_LR_ACTIVE_BIT;
-
-	if (irq->hw) {
-		val |= ICH_LR_HW;
-		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
-		/*
-		 * Never set pending+active on a HW interrupt, as the
-		 * pending state is kept at the physical distributor
-		 * level.
-		 */
-		if (irq->active && irq_is_pending(irq))
-			val &= ~ICH_LR_PENDING_BIT;
-	} else {
-		if (irq->config == VGIC_CONFIG_LEVEL)
-			val |= ICH_LR_EOI;
-	}
-
 	/*
 	 * Level-triggered mapped IRQs are special because we only observe
 	 * rising edges as input to the VGIC.  We therefore lower the line
@@ -274,7 +281,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
 	 * anyway.
 	 */
 	vgic_v3->vgic_vmcr = 0;
-	vgic_v3->vgic_elrsr = ~0;
 
 	/*
 	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
@@ -595,6 +601,11 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
 	 */
 	if (likely(cpu_if->vgic_sre))
 		kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
+
+	kvm_call_hyp(__vgic_v3_restore_aprs, vcpu);
+
+	if (has_vhe())
+		__vgic_v3_activate_traps(vcpu);
 }
 
 void vgic_v3_put(struct kvm_vcpu *vcpu)
@@ -603,4 +614,9 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
 
 	if (likely(cpu_if->vgic_sre))
 		cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
+
+	kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
+
+	if (has_vhe())
+		__vgic_v3_deactivate_traps(vcpu);
 }
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 8201899126f6..e74baec76361 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -19,6 +19,7 @@
 #include <linux/list_sort.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <asm/kvm_hyp.h>
 
 #include "vgic.h"
 
@@ -808,6 +809,24 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
 		vgic_clear_lr(vcpu, count);
 }
 
+static inline bool can_access_vgic_from_kernel(void)
+{
+	/*
+	 * GICv2 can always be accessed from the kernel because it is
+	 * memory-mapped, and VHE systems can access GICv3 EL2 system
+	 * registers.
+	 */
+	return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
+}
+
+static inline void vgic_save_state(struct kvm_vcpu *vcpu)
+{
+	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+		vgic_v2_save_state(vcpu);
+	else
+		__vgic_v3_save_state(vcpu);
+}
+
 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
@@ -819,11 +838,22 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 		return;
 
+	if (can_access_vgic_from_kernel())
+		vgic_save_state(vcpu);
+
 	if (vgic_cpu->used_lrs)
 		vgic_fold_lr_state(vcpu);
 	vgic_prune_ap_list(vcpu);
 }
 
+static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
+		vgic_v2_restore_state(vcpu);
+	else
+		__vgic_v3_restore_state(vcpu);
+}
+
 /* Flush our emulation state into the GIC hardware before entering the guest. */
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 {
@@ -846,6 +876,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 	spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
 	vgic_flush_lr_state(vcpu);
 	spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+
+	if (can_access_vgic_from_kernel())
+		vgic_restore_state(vcpu);
 }
 
 void kvm_vgic_load(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index f5b8519e5546..830e815748a0 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -178,6 +178,9 @@ void vgic_v2_init_lrs(void);
 void vgic_v2_load(struct kvm_vcpu *vcpu);
 void vgic_v2_put(struct kvm_vcpu *vcpu);
 
+void vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
 static inline void vgic_get_irq_kref(struct vgic_irq *irq)
 {
 	if (irq->intid < VGIC_MIN_LPI)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 65dea3ffef68..c7b2e927f699 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3398,21 +3398,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
 	return kvm_io_bus_cmp(p1, p2);
 }
 
-static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
-			  gpa_t addr, int len)
-{
-	bus->range[bus->dev_count++] = (struct kvm_io_range) {
-		.addr = addr,
-		.len = len,
-		.dev = dev,
-	};
-
-	sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
-		kvm_io_bus_sort_cmp, NULL);
-
-	return 0;
-}
-
 static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
 			     gpa_t addr, int len)
 {
@@ -3553,7 +3538,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			    int len, struct kvm_io_device *dev)
 {
+	int i;
 	struct kvm_io_bus *new_bus, *bus;
+	struct kvm_io_range range;
 
 	bus = kvm_get_bus(kvm, bus_idx);
 	if (!bus)
@@ -3567,9 +3554,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
 	if (!new_bus)
 		return -ENOMEM;
-	memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count *
-	       sizeof(struct kvm_io_range)));
-	kvm_io_bus_insert_dev(new_bus, dev, addr, len);
+
+	range = (struct kvm_io_range) {
+		.addr = addr,
+		.len = len,
+		.dev = dev,
+	};
+
+	for (i = 0; i < bus->dev_count; i++)
+		if (kvm_io_bus_cmp(&bus->range[i], &range) > 0)
+			break;
+
+	memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
+	new_bus->dev_count++;
+	new_bus->range[i] = range;
+	memcpy(new_bus->range + i + 1, bus->range + i,
+		(bus->dev_count - i) * sizeof(struct kvm_io_range));
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
 	synchronize_srcu_expedited(&kvm->srcu);
 	kfree(bus);