804 files changed, 32888 insertions, 13809 deletions
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000000..faffc0d5af4e
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,428 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# clang-format configuration file. Intended for clang-format >= 4.
+#
+# For more information, see:
+#
+#   Documentation/process/clang-format.rst
+#   https://clang.llvm.org/docs/ClangFormat.html
+#   https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+#
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+#AlignEscapedNewlines: Left # Unknown to clang-format-4.0
+AlignOperands: true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: false
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  #AfterExternBlock: false # Unknown to clang-format-5.0
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+  #SplitEmptyFunction: true # Unknown to clang-format-4.0
+  #SplitEmptyRecord: true # Unknown to clang-format-4.0
+  #SplitEmptyNamespace: true # Unknown to clang-format-4.0
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Custom
+#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: false
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+#CompactNamespaces: false # Unknown to clang-format-4.0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 8
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+#FixNamespaceComments: false # Unknown to clang-format-4.0
+
+# Taken from:
+#   git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \
+#   | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$,  - '\1'," \
+#   | sort | uniq
+ForEachMacros:
+  - 'apei_estatus_for_each_section'
+  - 'ata_for_each_dev'
+  - 'ata_for_each_link'
+  - 'ax25_for_each'
+  - 'ax25_uid_for_each'
+  - 'bio_for_each_integrity_vec'
+  - '__bio_for_each_segment'
+  - 'bio_for_each_segment'
+  - 'bio_for_each_segment_all'
+  - 'bio_list_for_each'
+  - 'bip_for_each_vec'
+  - 'blkg_for_each_descendant_post'
+  - 'blkg_for_each_descendant_pre'
+  - 'blk_queue_for_each_rl'
+  - 'bond_for_each_slave'
+  - 'bond_for_each_slave_rcu'
+  - 'btree_for_each_safe128'
+  - 'btree_for_each_safe32'
+  - 'btree_for_each_safe64'
+  - 'btree_for_each_safel'
+  - 'card_for_each_dev'
+  - 'cgroup_taskset_for_each'
+  - 'cgroup_taskset_for_each_leader'
+  - 'cpufreq_for_each_entry'
+  - 'cpufreq_for_each_entry_idx'
+  - 'cpufreq_for_each_valid_entry'
+  - 'cpufreq_for_each_valid_entry_idx'
+  - 'css_for_each_child'
+  - 'css_for_each_descendant_post'
+  - 'css_for_each_descendant_pre'
+  - 'device_for_each_child_node'
+  - 'drm_atomic_crtc_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane'
+  - 'drm_atomic_crtc_state_for_each_plane_state'
+  - 'drm_for_each_connector_iter'
+  - 'drm_for_each_crtc'
+  - 'drm_for_each_encoder'
+  - 'drm_for_each_encoder_mask'
+  - 'drm_for_each_fb'
+  - 'drm_for_each_legacy_plane'
+  - 'drm_for_each_plane'
+  - 'drm_for_each_plane_mask'
+  - 'drm_mm_for_each_hole'
+  - 'drm_mm_for_each_node'
+  - 'drm_mm_for_each_node_in_range'
+  - 'drm_mm_for_each_node_safe'
+  - 'for_each_active_drhd_unit'
+  - 'for_each_active_iommu'
+  - 'for_each_available_child_of_node'
+  - 'for_each_bio'
+  - 'for_each_board_func_rsrc'
+  - 'for_each_bvec'
+  - 'for_each_child_of_node'
+  - 'for_each_clear_bit'
+  - 'for_each_clear_bit_from'
+  - 'for_each_cmsghdr'
+  - 'for_each_compatible_node'
+  - 'for_each_console'
+  - 'for_each_cpu'
+  - 'for_each_cpu_and'
+  - 'for_each_cpu_not'
+  - 'for_each_cpu_wrap'
+  - 'for_each_dev_addr'
+  - 'for_each_dma_cap_mask'
+  - 'for_each_drhd_unit'
+  - 'for_each_dss_dev'
+  - 'for_each_efi_memory_desc'
+  - 'for_each_efi_memory_desc_in_map'
+  - 'for_each_endpoint_of_node'
+  - 'for_each_evictable_lru'
+  - 'for_each_fib6_node_rt_rcu'
+  - 'for_each_fib6_walker_rt'
+  - 'for_each_free_mem_range'
+  - 'for_each_free_mem_range_reverse'
+  - 'for_each_func_rsrc'
+  - 'for_each_hstate'
+  - 'for_each_if'
+  - 'for_each_iommu'
+  - 'for_each_ip_tunnel_rcu'
+  - 'for_each_irq_nr'
+  - 'for_each_lru'
+  - 'for_each_matching_node'
+  - 'for_each_matching_node_and_match'
+  - 'for_each_memblock'
+  - 'for_each_memblock_type'
+  - 'for_each_memcg_cache_index'
+  - 'for_each_mem_pfn_range'
+  - 'for_each_mem_range'
+  - 'for_each_mem_range_rev'
+  - 'for_each_migratetype_order'
+  - 'for_each_msi_entry'
+  - 'for_each_net'
+  - 'for_each_netdev'
+  - 'for_each_netdev_continue'
+  - 'for_each_netdev_continue_rcu'
+  - 'for_each_netdev_feature'
+  - 'for_each_netdev_in_bond_rcu'
+  - 'for_each_netdev_rcu'
+  - 'for_each_netdev_reverse'
+  - 'for_each_netdev_safe'
+  - 'for_each_net_rcu'
+  - 'for_each_new_connector_in_state'
+  - 'for_each_new_crtc_in_state'
+  - 'for_each_new_plane_in_state'
+  - 'for_each_new_private_obj_in_state'
+  - 'for_each_node'
+  - 'for_each_node_by_name'
+  - 'for_each_node_by_type'
+  - 'for_each_node_mask'
+  - 'for_each_node_state'
+  - 'for_each_node_with_cpus'
+  - 'for_each_node_with_property'
+  - 'for_each_of_allnodes'
+  - 'for_each_of_allnodes_from'
+  - 'for_each_of_pci_range'
+  - 'for_each_old_connector_in_state'
+  - 'for_each_old_crtc_in_state'
+  - 'for_each_oldnew_connector_in_state'
+  - 'for_each_oldnew_crtc_in_state'
+  - 'for_each_oldnew_plane_in_state'
+  - 'for_each_oldnew_private_obj_in_state'
+  - 'for_each_old_plane_in_state'
+  - 'for_each_old_private_obj_in_state'
+  - 'for_each_online_cpu'
+  - 'for_each_online_node'
+  - 'for_each_online_pgdat'
+  - 'for_each_pci_bridge'
+  - 'for_each_pci_dev'
+  - 'for_each_pci_msi_entry'
+  - 'for_each_populated_zone'
+  - 'for_each_possible_cpu'
+  - 'for_each_present_cpu'
+  - 'for_each_prime_number'
+  - 'for_each_prime_number_from'
+  - 'for_each_process'
+  - 'for_each_process_thread'
+  - 'for_each_property_of_node'
+  - 'for_each_reserved_mem_region'
+  - 'for_each_resv_unavail_range'
+  - 'for_each_rtdcom'
+  - 'for_each_rtdcom_safe'
+  - 'for_each_set_bit'
+  - 'for_each_set_bit_from'
+  - 'for_each_sg'
+  - 'for_each_sg_page'
+  - '__for_each_thread'
+  - 'for_each_thread'
+  - 'for_each_zone'
+  - 'for_each_zone_zonelist'
+  - 'for_each_zone_zonelist_nodemask'
+  - 'fwnode_for_each_available_child_node'
+  - 'fwnode_for_each_child_node'
+  - 'fwnode_graph_for_each_endpoint'
+  - 'gadget_for_each_ep'
+  - 'hash_for_each'
+  - 'hash_for_each_possible'
+  - 'hash_for_each_possible_rcu'
+  - 'hash_for_each_possible_rcu_notrace'
+  - 'hash_for_each_possible_safe'
+  - 'hash_for_each_rcu'
+  - 'hash_for_each_safe'
+  - 'hctx_for_each_ctx'
+  - 'hlist_bl_for_each_entry'
+  - 'hlist_bl_for_each_entry_rcu'
+  - 'hlist_bl_for_each_entry_safe'
+  - 'hlist_for_each'
+  - 'hlist_for_each_entry'
+  - 'hlist_for_each_entry_continue'
+  - 'hlist_for_each_entry_continue_rcu'
+  - 'hlist_for_each_entry_continue_rcu_bh'
+  - 'hlist_for_each_entry_from'
+  - 'hlist_for_each_entry_from_rcu'
+  - 'hlist_for_each_entry_rcu'
+  - 'hlist_for_each_entry_rcu_bh'
+  - 'hlist_for_each_entry_rcu_notrace'
+  - 'hlist_for_each_entry_safe'
+  - '__hlist_for_each_rcu'
+  - 'hlist_for_each_safe'
+  - 'hlist_nulls_for_each_entry'
+  - 'hlist_nulls_for_each_entry_from'
+  - 'hlist_nulls_for_each_entry_rcu'
+  - 'hlist_nulls_for_each_entry_safe'
+  - 'ide_host_for_each_port'
+  - 'ide_port_for_each_dev'
+  - 'ide_port_for_each_present_dev'
+  - 'idr_for_each_entry'
+  - 'idr_for_each_entry_continue'
+  - 'idr_for_each_entry_ul'
+  - 'inet_bind_bucket_for_each'
+  - 'inet_lhash2_for_each_icsk_rcu'
+  - 'iov_for_each'
+  - 'key_for_each'
+  - 'key_for_each_safe'
+  - 'klp_for_each_func'
+  - 'klp_for_each_object'
+  - 'kvm_for_each_memslot'
+  - 'kvm_for_each_vcpu'
+  - 'list_for_each'
+  - 'list_for_each_entry'
+  - 'list_for_each_entry_continue'
+  - 'list_for_each_entry_continue_rcu'
+  - 'list_for_each_entry_continue_reverse'
+  - 'list_for_each_entry_from'
+  - 'list_for_each_entry_from_reverse'
+  - 'list_for_each_entry_lockless'
+  - 'list_for_each_entry_rcu'
+  - 'list_for_each_entry_reverse'
+  - 'list_for_each_entry_safe'
+  - 'list_for_each_entry_safe_continue'
+  - 'list_for_each_entry_safe_from'
+  - 'list_for_each_entry_safe_reverse'
+  - 'list_for_each_prev'
+  - 'list_for_each_prev_safe'
+  - 'list_for_each_safe'
+  - 'llist_for_each'
+  - 'llist_for_each_entry'
+  - 'llist_for_each_entry_safe'
+  - 'llist_for_each_safe'
+  - 'media_device_for_each_entity'
+  - 'media_device_for_each_intf'
+  - 'media_device_for_each_link'
+  - 'media_device_for_each_pad'
+  - 'netdev_for_each_lower_dev'
+  - 'netdev_for_each_lower_private'
+  - 'netdev_for_each_lower_private_rcu'
+  - 'netdev_for_each_mc_addr'
+  - 'netdev_for_each_uc_addr'
+  - 'netdev_for_each_upper_dev_rcu'
+  - 'netdev_hw_addr_list_for_each'
+  - 'nft_rule_for_each_expr'
+  - 'nla_for_each_attr'
+  - 'nla_for_each_nested'
+  - 'nlmsg_for_each_attr'
+  - 'nlmsg_for_each_msg'
+  - 'nr_neigh_for_each'
+  - 'nr_neigh_for_each_safe'
+  - 'nr_node_for_each'
+  - 'nr_node_for_each_safe'
+  - 'of_for_each_phandle'
+  - 'of_property_for_each_string'
+  - 'of_property_for_each_u32'
+  - 'pci_bus_for_each_resource'
+  - 'ping_portaddr_for_each_entry'
+  - 'plist_for_each'
+  - 'plist_for_each_continue'
+  - 'plist_for_each_entry'
+  - 'plist_for_each_entry_continue'
+  - 'plist_for_each_entry_safe'
+  - 'plist_for_each_safe'
+  - 'pnp_for_each_card'
+  - 'pnp_for_each_dev'
+  - 'protocol_for_each_card'
+  - 'protocol_for_each_dev'
+  - 'queue_for_each_hw_ctx'
+  - 'radix_tree_for_each_contig'
+  - 'radix_tree_for_each_slot'
+  - 'radix_tree_for_each_tagged'
+  - 'rbtree_postorder_for_each_entry_safe'
+  - 'resource_list_for_each_entry'
+  - 'resource_list_for_each_entry_safe'
+  - 'rhl_for_each_entry_rcu'
+  - 'rhl_for_each_rcu'
+  - 'rht_for_each'
+  - 'rht_for_each_continue'
+  - 'rht_for_each_entry'
+  - 'rht_for_each_entry_continue'
+  - 'rht_for_each_entry_rcu'
+  - 'rht_for_each_entry_rcu_continue'
+  - 'rht_for_each_entry_safe'
+  - 'rht_for_each_rcu'
+  - 'rht_for_each_rcu_continue'
+  - '__rq_for_each_bio'
+  - 'rq_for_each_segment'
+  - 'scsi_for_each_prot_sg'
+  - 'scsi_for_each_sg'
+  - 'sctp_for_each_hentry'
+  - 'sctp_skb_for_each'
+  - 'shdma_for_each_chan'
+  - '__shost_for_each_device'
+  - 'shost_for_each_device'
+  - 'sk_for_each'
+  - 'sk_for_each_bound'
+  - 'sk_for_each_entry_offset_rcu'
+  - 'sk_for_each_from'
+  - 'sk_for_each_rcu'
+  - 'sk_for_each_safe'
+  - 'sk_nulls_for_each'
+  - 'sk_nulls_for_each_from'
+  - 'sk_nulls_for_each_rcu'
+  - 'snd_pcm_group_for_each_entry'
+  - 'snd_soc_dapm_widget_for_each_path'
+  - 'snd_soc_dapm_widget_for_each_path_safe'
+  - 'snd_soc_dapm_widget_for_each_sink_path'
+  - 'snd_soc_dapm_widget_for_each_source_path'
+  - 'tb_property_for_each'
+  - 'udp_portaddr_for_each_entry'
+  - 'udp_portaddr_for_each_entry_rcu'
+  - 'usb_hub_for_each_child'
+  - 'v4l2_device_for_each_subdev'
+  - 'v4l2_m2m_for_each_dst_buf'
+  - 'v4l2_m2m_for_each_dst_buf_safe'
+  - 'v4l2_m2m_for_each_src_buf'
+  - 'v4l2_m2m_for_each_src_buf_safe'
+  - 'zorro_for_each_dev'
+
+#IncludeBlocks: Preserve # Unknown to clang-format-5.0
+IncludeCategories:
+  - Regex: '.*'
+    Priority: 1
+IncludeIsMainRegex: '(Test)?$'
+IndentCaseLabels: false
+#IndentPPDirectives: None # Unknown to clang-format-5.0
+IndentWidth: 8
+IndentWrappedFunctionNames: true
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: Inner
+#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0
+ObjCBlockIndentWidth: 8
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+
+# Taken from git's rules
+#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+PointerAlignment: Right
+ReflowComments: false
+SortIncludes: false
+#SortUsingDeclarations: false # Unknown to clang-format-4.0
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0
+#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0
+SpaceBeforeParens: ControlStatements
+#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles: false
+SpacesInContainerLiterals: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp03
+TabWidth: 8
+UseTab: Always
+...
diff --git a/.gitignore b/.gitignore
index 85bcc2696442..a1dfd2acd9c3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,6 +81,7 @@ modules.builtin
 !.gitignore
 !.mailmap
 !.cocciconfig
+!.clang-format
 
 #
 # Generated include files
diff --git a/Documentation/ABI/testing/sysfs-class-rtc b/Documentation/ABI/testing/sysfs-class-rtc
index cf60412882f0..95984289a4ee 100644
--- a/Documentation/ABI/testing/sysfs-class-rtc
+++ b/Documentation/ABI/testing/sysfs-class-rtc
@@ -43,6 +43,14 @@ Contact:	linux-rtc@vger.kernel.org
 Description:
 		(RO) The name of the RTC corresponding to this sysfs directory
 
+What:		/sys/class/rtc/rtcX/range
+Date:		January 2018
+KernelVersion:	4.16
+Contact:	linux-rtc@vger.kernel.org
+Description:
+		Valid time range for the RTC, as seconds from epoch, formatted
+		as [min, max]
+
 What:		/sys/class/rtc/rtcX/since_epoch
 Date:		March 2006
 KernelVersion:	2.6.17
@@ -57,14 +65,6 @@ Contact:	linux-rtc@vger.kernel.org
 Description:
 		(RO) RTC-provided time in 24-hour notation (hh:mm:ss)
 
-What:		/sys/class/rtc/rtcX/*/nvmem
-Date:		February 2016
-KernelVersion:	4.6
-Contact:	linux-rtc@vger.kernel.org
-Description:
-		(RW) The non volatile storage exported as a raw file, as
-		described in Documentation/nvmem/nvmem.txt
-
 What:		/sys/class/rtc/rtcX/offset
 Date:		February 2016
 KernelVersion:	4.6
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index a4af2e124e24..3682e99234c2 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt
@@ -262,7 +262,7 @@ When oom event notifier is registered, event will be delivered.
 2.6 Locking
 
    lock_page_cgroup()/unlock_page_cgroup() should not be called under
-   mapping->tree_lock.
+   the i_pages lock.
 
    Other lock order is following:
    PG_locked.
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index 978463a7c81e..073f128af5a7 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -97,12 +97,10 @@ flags	- flags of the cpufreq driver
 ==================================================================
 For details about OPP, see Documentation/power/opp.txt
 
-dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
-	cpufreq_table_validate_and_show() which is provided with the list of
-	frequencies that are available for operation. This function provides
-	a ready to use conversion routine to translate the OPP layer's internal
-	information about the available frequencies into a format readily
-	providable to cpufreq.
+dev_pm_opp_init_cpufreq_table -
+	This function provides a ready to use conversion routine to translate
+	the OPP layer's internal information about the available frequencies
+	into a format readily providable to cpufreq.
 
 	WARNING: Do not use this function in interrupt context.
 
@@ -112,7 +110,7 @@ dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
 		/* Do things */
 		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
 		if (!r)
-			cpufreq_table_validate_and_show(policy, freq_table);
+			policy->freq_table = freq_table;
 		/* Do other things */
 	 }
 
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 61546ac578d6..6e353d00cdc6 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -259,10 +259,8 @@ CPUFREQ_ENTRY_INVALID. The entries don't need to be in sorted in any
 particular order, but if they are cpufreq core will do DVFS a bit
 quickly for them as search for best match is faster.
 
-By calling cpufreq_table_validate_and_show(), the cpuinfo.min_freq and
-cpuinfo.max_freq values are detected, and policy->min and policy->max
-are set to the same values. This is helpful for the per-CPU
-initialization stage.
+The cpufreq table is verified automatically by the core if the policy contains a
+valid pointer in its policy->freq_table field.
 
 cpufreq_frequency_table_verify() assures that at least one valid
 frequency is within policy->min and policy->max, and all other criteria
diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt
index b6f44f490ed7..d1587f434e7b 100644
--- a/Documentation/cpuidle/sysfs.txt
+++ b/Documentation/cpuidle/sysfs.txt
@@ -40,6 +40,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -50,6 +51,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -60,6 +62,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 
@@ -70,6 +73,7 @@ total 0
 -r--r--r-- 1 root root 4096 Feb  8 10:42 latency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 name
 -r--r--r-- 1 root root 4096 Feb  8 10:42 power
+-r--r--r-- 1 root root 4096 Feb  8 10:42 residency
 -r--r--r-- 1 root root 4096 Feb  8 10:42 time
 -r--r--r-- 1 root root 4096 Feb  8 10:42 usage
 --------------------------------------------------------------------------------
@@ -78,6 +82,8 @@ total 0
 * desc : Small description about the idle state (string)
 * disable : Option to disable this idle state (bool) -> see note below
 * latency : Latency to exit out of this idle state (in microseconds)
+* residency : Time after which a state becomes more effecient than any
+  shallower state (in microseconds)
 * name : Name of the idle state (string)
 * power : Power consumed while in this idle state (in milliwatts)
 * time : Total time spent in this idle state (in microseconds)
diff --git a/Documentation/devicetree/bindings/dma/mtk-hsdma.txt b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt
new file mode 100644
index 000000000000..4bb317359dc6
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mtk-hsdma.txt
@@ -0,0 +1,33 @@
+MediaTek High-Speed DMA Controller
+==================================
+
+This device follows the generic DMA bindings defined in dma/dma.txt.
+
+Required properties:
+
+- compatible:	Must be one of
+		  "mediatek,mt7622-hsdma": for MT7622 SoC
+		  "mediatek,mt7623-hsdma": for MT7623 SoC
+- reg:		Should contain the register's base address and length.
+- interrupts:	Should contain a reference to the interrupt used by this
+		device.
+- clocks:	Should be the clock specifiers corresponding to the entry in
+		clock-names property.
+- clock-names:	Should contain "hsdma" entries.
+- power-domains: Phandle to the power domain that the device is part of
+- #dma-cells: 	The length of the DMA specifier, must be <1>. This one cell
+		in dmas property of a client device represents the channel
+		number.
+Example:
+
+        hsdma: dma-controller@1b007000 {
+		compatible = "mediatek,mt7623-hsdma";
+		reg = <0 0x1b007000 0 0x1000>;
+		interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_LOW>;
+		clocks = <&ethsys CLK_ETHSYS_HSDMA>;
+		clock-names = "hsdma";
+		power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
+		#dma-cells = <1>;
+	};
+
+DMA clients must use the format described in dma/dma.txt file.
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
index 9cbf5d9df8fd..cf5b9e44432c 100644
--- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -15,6 +15,10 @@ Required properties:
   the secure world.
 - qcom,controlled-remotely : optional, indicates that the bam is controlled by
   remote proccessor i.e. execution environment.
+- num-channels : optional, indicates supported number of DMA channels in a
+  remotely controlled bam.
+- qcom,num-ees : optional, indicates supported number of Execution Environments
+  in a remotely controlled bam.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
index 891db41e9420..aadfb236d53a 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -18,6 +18,7 @@ Required Properties:
 	      Examples with soctypes are:
 		- "renesas,dmac-r8a7743" (RZ/G1M)
 		- "renesas,dmac-r8a7745" (RZ/G1E)
+		- "renesas,dmac-r8a77470" (RZ/G1C)
 		- "renesas,dmac-r8a7790" (R-Car H2)
 		- "renesas,dmac-r8a7791" (R-Car M2-W)
 		- "renesas,dmac-r8a7792" (R-Car V2H)
@@ -26,6 +27,7 @@ Required Properties:
 		- "renesas,dmac-r8a7795" (R-Car H3)
 		- "renesas,dmac-r8a7796" (R-Car M3-W)
 		- "renesas,dmac-r8a77970" (R-Car V3M)
+		- "renesas,dmac-r8a77980" (R-Car V3H)
 
 - reg: base address and length of the registers block for the DMAC
 
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
index f3d1f151ba80..9dc935e24e55 100644
--- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
@@ -11,6 +11,7 @@ Required Properties:
 	  - "renesas,r8a7794-usb-dmac" (R-Car E2)
 	  - "renesas,r8a7795-usb-dmac" (R-Car H3)
 	  - "renesas,r8a7796-usb-dmac" (R-Car M3-W)
+	  - "renesas,r8a77965-usb-dmac" (R-Car M3-N)
 - reg: base address and length of the registers block for the DMAC
 - interrupts: interrupt specifiers for the DMAC, one for each entry in
   interrupt-names.
diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
new file mode 100644
index 000000000000..f237b7928283
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
@@ -0,0 +1,41 @@
+Synopsys DesignWare AXI DMA Controller
+
+Required properties:
+- compatible: "snps,axi-dma-1.01a"
+- reg: Address range of the DMAC registers. This should include
+  all of the per-channel registers.
+- interrupt: Should contain the DMAC interrupt number.
+- interrupt-parent: Should be the phandle for the interrupt controller
+  that services interrupts for this device.
+- dma-channels: Number of channels supported by hardware.
+- snps,dma-masters: Number of AXI masters supported by the hardware.
+- snps,data-width: Maximum AXI data width supported by hardware.
+  (0 - 8bits, 1 - 16bits, 2 - 32bits, ..., 6 - 512bits)
+- snps,priority: Priority of channel. Array size is equal to the number of
+  dma-channels. Priority value must be programmed within [0:dma-channels-1]
+  range. (0 - minimum priority)
+- snps,block-size: Maximum block size supported by the controller channel.
+  Array size is equal to the number of dma-channels.
+
+Optional properties:
+- snps,axi-max-burst-len: Restrict master AXI burst length by value specified
+  in this property. If this property is missing the maximum AXI burst length
+  supported by DMAC is used. [1:256]
+
+Example:
+
+dmac: dma-controller@80000 {
+	compatible = "snps,axi-dma-1.01a";
+	reg = <0x80000 0x400>;
+	clocks = <&core_clk>, <&cfgr_clk>;
+	clock-names = "core-clk", "cfgr-clk";
+	interrupt-parent = <&intc>;
+	interrupts = <27>;
+
+	dma-channels = <4>;
+	snps,dma-masters = <2>;
+	snps,data-width = <3>;
+	snps,block-size = <4096 4096 4096 4096>;
+	snps,priority = <0 1 2 3>;
+	snps,axi-max-burst-len = <16>;
+};
diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt
index 0b55718bf889..c5f519097204 100644
--- a/Documentation/devicetree/bindings/dma/stm32-dma.txt
+++ b/Documentation/devicetree/bindings/dma/stm32-dma.txt
@@ -62,14 +62,14 @@ channel: a phandle to the DMA controller plus the following four integer cells:
 	0x1: medium
 	0x2: high
 	0x3: very high
-4. A 32bit mask specifying the DMA FIFO threshold configuration which are device
-   dependent:
- -bit 0-1: Fifo threshold
+4. A 32bit bitfield value specifying DMA features which are device dependent:
+ -bit 0-1: DMA FIFO threshold selection
 	0x0: 1/4 full FIFO
 	0x1: 1/2 full FIFO
 	0x2: 3/4 full FIFO
 	0x3: full FIFO
 
+
 Example:
 
 	usart1: serial@40011000 {
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
index 1fd5d69647ca..ffadb7c6f1f3 100644
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -11,6 +11,8 @@ Required Properties:
     the device is compatible with the R-Car Gen2 VMSA-compatible IPMMU.
 
     - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
+    - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
+    - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
     - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
     - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
     - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU.
@@ -19,7 +21,8 @@ Required Properties:
     - "renesas,ipmmu-r8a7796" for the R8A7796 (R-Car M3-W) IPMMU.
     - "renesas,ipmmu-r8a77970" for the R8A77970 (R-Car V3M) IPMMU.
     - "renesas,ipmmu-r8a77995" for the R8A77995 (R-Car D3) IPMMU.
-    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU.
+    - "renesas,ipmmu-vmsa" for generic R-Car Gen2 or RZ/G1 VMSA-compatible
+			   IPMMU.
 
   - reg: Base address and size of the IPMMU registers.
   - interrupts: Specifiers for the MMU fault interrupts. For instances that
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
index 2098f7732264..6ecefea1c6f9 100644
--- a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
@@ -14,6 +14,11 @@ Required properties:
                     "single-master" device, and needs no additional information
                     to associate with its master device.  See:
                     Documentation/devicetree/bindings/iommu/iommu.txt
+- clocks          : A list of clocks required for the IOMMU to be accessible by
+                    the host CPU.
+- clock-names     : Should contain the following:
+	"iface" - Main peripheral bus clock (PCLK/HCL) (required)
+	"aclk"  - AXI bus clock (required)
 
 Optional properties:
 - rockchip,disable-mmu-reset : Don't use the mmu reset operation.
@@ -27,5 +32,7 @@ Example:
 		reg = <0xff940300 0x100>;
 		interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-names = "vopl_mmu";
+		clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
+		clock-names = "aclk", "iface";
 		#iommu-cells = <0>;
 	};
diff --git a/Documentation/devicetree/bindings/mips/mscc.txt b/Documentation/devicetree/bindings/mips/mscc.txt
new file mode 100644
index 000000000000..ae15ec333542
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/mscc.txt
@@ -0,0 +1,43 @@
+* Microsemi MIPS CPUs
+
+Boards with a SoC of the Microsemi MIPS family shall have the following
+properties:
+
+Required properties:
+- compatible: "mscc,ocelot"
+
+
+* Other peripherals:
+
+o CPU chip regs:
+
+The SoC has a few registers (DEVCPU_GCB:CHIP_REGS) handling miscellaneous
+functionalities: chip ID, general purpose register for software use, reset
+controller, hardware status and configuration, efuses.
+
+Required properties:
+- compatible: Should be "mscc,ocelot-chip-regs", "simple-mfd", "syscon"
+- reg : Should contain registers location and length
+
+Example:
+	syscon@71070000 {
+		compatible = "mscc,ocelot-chip-regs", "simple-mfd", "syscon";
+		reg = <0x71070000 0x1c>;
+	};
+
+
+o CPU system control:
+
+The SoC has a few registers (ICPU_CFG:CPU_SYSTEM_CTRL) handling configuration of
+the CPU: 8 general purpose registers, reset control, CPU en/disabling, CPU
+endianness, CPU bus control, CPU status.
+
+Required properties:
+- compatible: Should be "mscc,ocelot-cpu-syscon", "syscon"
+- reg : Should contain registers location and length
+
+Example:
+	syscon@70000000 {
+		compatible = "mscc,ocelot-cpu-syscon", "syscon";
+		reg = <0x70000000 0x2c>;
+	};
diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt
new file mode 100644
index 000000000000..5cfa4f016a00
--- /dev/null
+++ b/Documentation/devicetree/bindings/pmem/pmem-region.txt
@@ -0,0 +1,65 @@
+Device-tree bindings for persistent memory regions
+-----------------------------------------------------
+
+Persistent memory refers to a class of memory devices that are:
+
+	a) Usable as main system memory (i.e. cacheable), and
+	b) Retain their contents across power failure.
+
+Given b) it is best to think of persistent memory as a kind of memory mapped
+storage device. To ensure data integrity the operating system needs to manage
+persistent regions separately to the normal memory pool. To aid with that this
+binding provides a standardised interface for discovering where persistent
+memory regions exist inside the physical address space.
+
+Bindings for the region nodes:
+-----------------------------
+
+Required properties:
+	- compatible = "pmem-region"
+
+	- reg = <base, size>;
+		The reg property should specificy an address range that is
+		translatable to a system physical address range. This address
+		range should be mappable as normal system memory would be
+		(i.e cacheable).
+
+		If the reg property contains multiple address ranges
+		each address range will be treated as though it was specified
+		in a separate device node. Having multiple address ranges in a
+		node implies no special relationship between the two ranges.
+
+Optional properties:
+	- Any relevant NUMA assocativity properties for the target platform.
+
+	- volatile; This property indicates that this region is actually
+	  backed by non-persistent memory. This lets the OS know that it
+	  may skip the cache flushes required to ensure data is made
+	  persistent after a write.
+
+	  If this property is absent then the OS must assume that the region
+	  is backed by non-volatile memory.
+
+Examples:
+--------------------
+
+	/*
+	 * This node specifies one 4KB region spanning from
+	 * 0x5000 to 0x5fff that is backed by non-volatile memory.
+	 */
+	pmem@5000 {
+		compatible = "pmem-region";
+		reg = <0x00005000 0x00001000>;
+	};
+
+	/*
+	 * This node specifies two 4KB regions that are backed by
+	 * volatile (normal) memory.
+	 */
+	pmem@6000 {
+		compatible = "pmem-region";
+		reg = < 0x00006000 0x00001000
+			0x00008000 0x00001000 >;
+		volatile;
+	};
+
diff --git a/Documentation/devicetree/bindings/rtc/isil,isl12026.txt b/Documentation/devicetree/bindings/rtc/isil,isl12026.txt
new file mode 100644
index 000000000000..2e0be45193bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/isil,isl12026.txt
@@ -0,0 +1,28 @@
+ISL12026 I2C RTC/EEPROM
+
+ISL12026 is an I2C RTC/EEPROM combination device.  The RTC and control
+registers respond at bus address 0x6f, and the EEPROM array responds
+at bus address 0x57.  The canonical "reg" value will be for the RTC portion.
+
+Required properties supported by the device:
+
+ - "compatible": must be "isil,isl12026"
+ - "reg": I2C bus address of the device (always 0x6f)
+
+Optional properties:
+
+ - "isil,pwr-bsw": If present PWR.BSW bit must be set to the specified
+                   value for proper operation.
+
+ - "isil,pwr-sbib": If present PWR.SBIB bit must be set to the specified
+                    value for proper operation.
+
+
+Example:
+
+	rtc@6f {
+		compatible = "isil,isl12026";
+		reg = <0x6f>;
+		isil,pwr-bsw = <0>;
+		isil,pwr-sbib = <1>;
+	}
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 12e8b3e576b0..b5f978a4cac6 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -225,6 +225,7 @@ motorola	Motorola, Inc.
 moxa	Moxa Inc.
 mpl	MPL AG
 mqmaker	mqmaker Inc.
+mscc	Microsemi Corporation
 msi	Micro-Star International Co. Ltd.
 mti	Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
 multi-inno	Multi-Inno Technology Co.,Ltd
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
index 0b302a11718a..d7f011ddc150 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.txt
@@ -62,6 +62,18 @@ subdirectories, and a summation of all nested file sizes.  This makes
 the identification of large disk space consumers relatively quick, as
 no 'du' or similar recursive scan of the file system is required.
 
+Finally, Ceph also allows quotas to be set on any directory in the system.
+The quota can restrict the number of bytes or the number of files stored
+beneath that point in the directory hierarchy.  Quotas can be set using
+extended attributes 'ceph.quota.max_files' and 'ceph.quota.max_bytes', eg:
+
+ setfattr -n ceph.quota.max_bytes -v 100000000 /some/dir
+ getfattr -n ceph.quota.max_bytes /some/dir
+
+A limitation of the current quotas implementation is that it relies on the
+cooperation of the client mounting the file system to stop writers when a
+limit is reached.  A modified or adversarial client cannot be prevented
+from writing as much data as it needs.
 
 Mount Syntax
 ============
@@ -137,6 +149,10 @@ Mount Options
   noasyncreaddir
 	Do not use the dcache as above for readdir.
 
+  noquotadf
+        Report overall filesystem usage in statfs instead of using the root
+        directory quota.
+
 More Information
 ================
 
diff --git a/Documentation/hwmon/adm1275 b/Documentation/hwmon/adm1275
index 791bc0bd91e6..39033538eb03 100644
--- a/Documentation/hwmon/adm1275
+++ b/Documentation/hwmon/adm1275
@@ -6,6 +6,10 @@ Supported chips:
     Prefix: 'adm1075'
     Addresses scanned: -
     Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1075.pdf
+  * Analog Devices ADM1272
+    Prefix: 'adm1272'
+    Addresses scanned: -
+    Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1272.pdf
   * Analog Devices ADM1275
     Prefix: 'adm1275'
     Addresses scanned: -
@@ -29,11 +33,11 @@ Author: Guenter Roeck <linux@roeck-us.net>
 Description
 -----------
 
-This driver supports hardware monitoring for Analog Devices ADM1075, ADM1275,
-ADM1276, ADM1278, ADM1293, and ADM1294 Hot-Swap Controller and Digital
-Power Monitors.
+This driver supports hardware monitoring for Analog Devices ADM1075, ADM1272,
+ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 Hot-Swap Controller and
+Digital Power Monitors.
 
-ADM1075, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 are hot-swap
+ADM1075, ADM1272, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294 are hot-swap
 controllers that allow a circuit board to be removed from or inserted into
 a live backplane. They also feature current and voltage readback via an
 integrated 12 bit analog-to-digital converter (ADC), accessed using a
@@ -100,11 +104,10 @@ power1_input_lowest	Lowest observed input power. ADM1293 and ADM1294 only.
 power1_input_highest	Highest observed input power.
 power1_reset_history	Write any value to reset history.
 
-			Power attributes are supported on ADM1075, ADM1276,
-			ADM1293, and ADM1294.
+			Power attributes are supported on ADM1075, ADM1272,
+			ADM1276, ADM1293, and ADM1294.
 
 temp1_input		Chip temperature.
-			Temperature attributes are only available on ADM1278.
 temp1_max		Maximum chip temperature.
 temp1_max_alarm		Temperature alarm.
 temp1_crit		Critical chip temperature.
@@ -112,4 +115,5 @@ temp1_crit_alarm	Critical temperature high alarm.
 temp1_highest		Highest observed temperature.
 temp1_reset_history	Write any value to reset history.
 
-			Temperature attributes are supported on ADM1278.
+			Temperature attributes are supported on ADM1272 and
+			ADM1278.
diff --git a/Documentation/hwmon/lm92 b/Documentation/hwmon/lm92
index 22f68ad032cf..cfa99a353b8c 100644
--- a/Documentation/hwmon/lm92
+++ b/Documentation/hwmon/lm92
@@ -11,10 +11,8 @@ Supported chips:
     Addresses scanned: none, force parameter needed
     Datasheet: http://www.national.com/pf/LM/LM76.html
   * Maxim MAX6633/MAX6634/MAX6635
-    Prefix: 'lm92'
-    Addresses scanned: I2C 0x48 - 0x4b
-    MAX6633 with address in 0x40 - 0x47, 0x4c - 0x4f needs force parameter
-    and MAX6634 with address in 0x4c - 0x4f needs force parameter
+    Prefix: 'max6635'
+    Addresses scanned: none, force parameter needed
     Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3074
 
 Authors:
diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775
index 76add4c9cd68..bd59834d310f 100644
--- a/Documentation/hwmon/nct6775
+++ b/Documentation/hwmon/nct6775
@@ -36,6 +36,14 @@ Supported chips:
     Prefix: 'nct6793'
     Addresses scanned: ISA address retrieved from Super I/O registers
     Datasheet: Available from Nuvoton upon request
+  * Nuvoton NCT6795D
+    Prefix: 'nct6795'
+    Addresses scanned: ISA address retrieved from Super I/O registers
+    Datasheet: Available from Nuvoton upon request
+  * Nuvoton NCT6796D
+    Prefix: 'nct6796'
+    Addresses scanned: ISA address retrieved from Super I/O registers
+    Datasheet: Available from Nuvoton upon request
 
 Authors:
         Guenter Roeck <linux@roeck-us.net>
@@ -88,10 +96,10 @@ The mode works for fan1-fan5.
 sysfs attributes
 ----------------
 
-pwm[1-5] - this file stores PWM duty cycle or DC value (fan speed) in range:
+pwm[1-7] - this file stores PWM duty cycle or DC value (fan speed) in range:
 	   0 (lowest speed) to 255 (full)
 
-pwm[1-5]_enable - this file controls mode of fan/temperature control:
+pwm[1-7]_enable - this file controls mode of fan/temperature control:
 	* 0 Fan control disabled (fans set to maximum speed)
 	* 1 Manual mode, write to pwm[0-5] any value 0-255
 	* 2 "Thermal Cruise" mode
@@ -99,16 +107,16 @@ pwm[1-5]_enable - this file controls mode of fan/temperature control:
 	* 4 "Smart Fan III" mode (NCT6775F only)
 	* 5 "Smart Fan IV" mode
 
-pwm[1-5]_mode - controls if output is PWM or DC level
+pwm[1-7]_mode - controls if output is PWM or DC level
         * 0 DC output
         * 1 PWM output
 
 Common fan control attributes
 -----------------------------
 
-pwm[1-5]_temp_sel	Temperature source. Value is temperature sensor index.
+pwm[1-7]_temp_sel	Temperature source. Value is temperature sensor index.
 			For example, select '1' for temp1_input.
-pwm[1-5]_weight_temp_sel
+pwm[1-7]_weight_temp_sel
 			Secondary temperature source. Value is temperature
 			sensor index. For example, select '1' for temp1_input.
 			Set to 0 to disable secondary temperature control.
@@ -116,16 +124,16 @@ pwm[1-5]_weight_temp_sel
 If secondary temperature functionality is enabled, it is controlled with the
 following attributes.
 
-pwm[1-5]_weight_duty_step
+pwm[1-7]_weight_duty_step
 			Duty step size.
-pwm[1-5]_weight_temp_step
+pwm[1-7]_weight_temp_step
 			Temperature step size. With each step over
 			temp_step_base, the value of weight_duty_step is added
 			to the current pwm value.
-pwm[1-5]_weight_temp_step_base
+pwm[1-7]_weight_temp_step_base
 			Temperature at which secondary temperature control kicks
 			in.
-pwm[1-5]_weight_temp_step_tol
+pwm[1-7]_weight_temp_step_tol
 			Temperature step tolerance.
 
 Thermal Cruise mode (2)
@@ -133,9 +141,9 @@ Thermal Cruise mode (2)
 
 If the temperature is in the range defined by:
 
-pwm[1-5]_target_temp	Target temperature, unit millidegree Celsius
+pwm[1-7]_target_temp	Target temperature, unit millidegree Celsius
 			(range 0 - 127000)
-pwm[1-5]_temp_tolerance
+pwm[1-7]_temp_tolerance
 			Target temperature tolerance, unit millidegree Celsius
 
 there are no changes to fan speed. Once the temperature leaves the interval, fan
@@ -143,14 +151,14 @@ speed increases (if temperature is higher that desired) or decreases (if
 temperature is lower than desired), using the following limits and time
 intervals.
 
-pwm[1-5]_start		fan pwm start value (range 1 - 255), to start fan
+pwm[1-7]_start		fan pwm start value (range 1 - 255), to start fan
 			when the temperature is above defined range.
-pwm[1-5]_floor		lowest fan pwm (range 0 - 255) if temperature is below
+pwm[1-7]_floor		lowest fan pwm (range 0 - 255) if temperature is below
 			the defined range. If set to 0, the fan is expected to
 			stop if the temperature is below the defined range.
-pwm[1-5]_step_up_time	milliseconds before fan speed is increased
-pwm[1-5]_step_down_time	milliseconds before fan speed is decreased
-pwm[1-5]_stop_time	how many milliseconds must elapse to switch
+pwm[1-7]_step_up_time	milliseconds before fan speed is increased
+pwm[1-7]_step_down_time	milliseconds before fan speed is decreased
+pwm[1-7]_stop_time	how many milliseconds must elapse to switch
 			corresponding fan off (when the temperature was below
 			defined range).
 
@@ -159,8 +167,8 @@ Speed Cruise mode (3)
 
 This modes tries to keep the fan speed constant.
 
-fan[1-5]_target		Target fan speed
-fan[1-5]_tolerance
+fan[1-7]_target		Target fan speed
+fan[1-7]_tolerance
 			Target speed tolerance
 
 
@@ -177,19 +185,19 @@ points should be set to higher temperatures and higher pwm values to achieve
 higher fan speeds with increasing temperature. The last data point reflects
 critical temperature mode, in which the fans should run at full speed.
 
-pwm[1-5]_auto_point[1-7]_pwm
+pwm[1-7]_auto_point[1-7]_pwm
 			pwm value to be set if temperature reaches matching
 			temperature range.
-pwm[1-5]_auto_point[1-7]_temp
+pwm[1-7]_auto_point[1-7]_temp
 			Temperature over which the matching pwm is enabled.
-pwm[1-5]_temp_tolerance
+pwm[1-7]_temp_tolerance
 			Temperature tolerance, unit millidegree Celsius
-pwm[1-5]_crit_temp_tolerance
+pwm[1-7]_crit_temp_tolerance
 			Temperature tolerance for critical temperature,
 			unit millidegree Celsius
 
-pwm[1-5]_step_up_time	milliseconds before fan speed is increased
-pwm[1-5]_step_down_time	milliseconds before fan speed is decreased
+pwm[1-7]_step_up_time	milliseconds before fan speed is increased
+pwm[1-7]_step_down_time	milliseconds before fan speed is decreased
 
 Usage Notes
 -----------
diff --git a/Documentation/hwmon/sht21 b/Documentation/hwmon/sht21
index 47f4765db256..8b3cdda541c1 100644
--- a/Documentation/hwmon/sht21
+++ b/Documentation/hwmon/sht21
@@ -6,13 +6,13 @@ Supported chips:
     Prefix: 'sht21'
     Addresses scanned: none
     Datasheet: Publicly available at the Sensirion website
-    http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT21.pdf
+    http://www.sensirion.com/file/datasheet_sht21
 
   * Sensirion SHT25
-    Prefix: 'sht21'
+    Prefix: 'sht25'
     Addresses scanned: none
     Datasheet: Publicly available at the Sensirion website
-    http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT25.pdf
+    http://www.sensirion.com/file/datasheet_sht25
 
 Author:
   Urs Fleisch <urs.fleisch@sensirion.com>
diff --git a/Documentation/hwmon/sht3x b/Documentation/hwmon/sht3x
index b0d88184f48e..d9daa6ab1e8e 100644
--- a/Documentation/hwmon/sht3x
+++ b/Documentation/hwmon/sht3x
@@ -5,7 +5,7 @@ Supported chips:
   * Sensirion SHT3x-DIS
     Prefix: 'sht3x'
     Addresses scanned: none
-    Datasheet: http://www.sensirion.com/fileadmin/user_upload/customers/sensirion/Dokumente/Humidity/Sensirion_Humidity_Datasheet_SHT3x_DIS.pdf
+    Datasheet: https://www.sensirion.com/file/datasheet_sht3x_digital
 
 Author:
   David Frey <david.frey@sensirion.com>
diff --git a/Documentation/media/kapi/v4l2-dev.rst b/Documentation/media/kapi/v4l2-dev.rst
index 7bb0505b60f1..eb03ccc41c41 100644
--- a/Documentation/media/kapi/v4l2-dev.rst
+++ b/Documentation/media/kapi/v4l2-dev.rst
@@ -31,7 +31,7 @@ of the video device exits.
 The default :c:func:`video_device_release` callback currently
 just calls ``kfree`` to free the allocated memory.
 
-There is also a ::c:func:`video_device_release_empty` function that does
+There is also a :c:func:`video_device_release_empty` function that does
 nothing (is empty) and should be used if the struct is embedded and there
 is nothing to do when it is released.
 
diff --git a/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst b/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst
index 45e76e5bc1ea..582fda488810 100644
--- a/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst
+++ b/Documentation/media/uapi/mediactl/media-ioc-enum-entities.rst
@@ -89,7 +89,7 @@ id's until they get an error.
 
        -
        -
-       -  Entity type, see :ref:`media-entity-type` for details.
+       -  Entity type, see :ref:`media-entity-functions` for details.
 
     -  .. row 4
 
diff --git a/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst b/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst
index c8f9ea37db2d..c4055ddf070a 100644
--- a/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst
+++ b/Documentation/media/uapi/mediactl/media-ioc-g-topology.rst
@@ -205,13 +205,13 @@ desired arrays with the media graph elements.
 
        -  ``function``
 
-       -  Entity main function, see :ref:`media-entity-type` for details.
+       -  Entity main function, see :ref:`media-entity-functions` for details.
 
     -  .. row 4
 
        -  __u32
 
-       -  ``reserved``\ [12]
+       -  ``reserved``\ [6]
 
        -  Reserved for future extensions. Drivers and applications must set
 	  this array to zero.
@@ -334,7 +334,7 @@ desired arrays with the media graph elements.
 
        -  __u32
 
-       -  ``reserved``\ [9]
+       -  ``reserved``\ [5]
 
        -  Reserved for future extensions. Drivers and applications must set
 	  this array to zero.
@@ -390,7 +390,7 @@ desired arrays with the media graph elements.
 
        -  __u32
 
-       -  ``reserved``\ [5]
+       -  ``reserved``\ [6]
 
        -  Reserved for future extensions. Drivers and applications must set
 	  this array to zero.
diff --git a/Documentation/media/uapi/mediactl/media-types.rst b/Documentation/media/uapi/mediactl/media-types.rst
index f92f10b7ffbd..2dda14bd89b7 100644
--- a/Documentation/media/uapi/mediactl/media-types.rst
+++ b/Documentation/media/uapi/mediactl/media-types.rst
@@ -7,11 +7,11 @@ Types and flags used to represent the media graph elements
 
 ..  tabularcolumns:: |p{8.2cm}|p{10.3cm}|
 
-.. _media-entity-type:
+.. _media-entity-functions:
 
 .. cssclass:: longtable
 
-.. flat-table:: Media entity types
+.. flat-table:: Media entity functions
     :header-rows:  0
     :stub-columns: 0
 
diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index d5f3eb6e674a..03931f9b1285 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -3565,7 +3565,7 @@ enum v4l2_dv_it_content_type -
     HDMI carries 5V on one of the pins). This is often used to power an
     eeprom which contains EDID information, such that the source can
     read the EDID even if the sink is in standby/power off. Each bit
-    corresponds to an input pad on the transmitter. If an input pad
+    corresponds to an input pad on the receiver. If an input pad
     cannot detect whether power is present, then the bit for that pad
     will be 0. This read-only control is applicable to DVI-D, HDMI and
     DisplayPort connectors.
diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
index 337e8188caf1..ef52f637d8e9 100644
--- a/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-v4l2-mplane.rst
@@ -55,12 +55,14 @@ describing all planes of that format.
       - ``pixelformat``
       - The pixel format. Both single- and multi-planar four character
 	codes can be used.
-    * - enum :c:type:`v4l2_field`
+    * - __u32
       - ``field``
-      - See struct :c:type:`v4l2_pix_format`.
-    * - enum :c:type:`v4l2_colorspace`
+      - Field order, from enum :c:type:`v4l2_field`.
+        See struct :c:type:`v4l2_pix_format`.
+    * - __u32
       - ``colorspace``
-      - See struct :c:type:`v4l2_pix_format`.
+      - Colorspace encoding, from enum :c:type:`v4l2_colorspace`.
+        See struct :c:type:`v4l2_pix_format`.
     * - struct :c:type:`v4l2_plane_pix_format`
       - ``plane_fmt[VIDEO_MAX_PLANES]``
       - An array of structures describing format of each plane this pixel
@@ -73,24 +75,34 @@ describing all planes of that format.
     * - __u8
       - ``flags``
       - Flags set by the application or driver, see :ref:`format-flags`.
-    * - enum :c:type:`v4l2_ycbcr_encoding`
+    * - union {
+      - (anonymous)
+      -
+    * - __u8
       - ``ycbcr_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_hsv_encoding`
+    * - __u8
       - ``hsv_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_quantization`
+    * - }
+      -
+      -
+    * - __u8
       - ``quantization``
-      - This information supplements the ``colorspace`` and must be set by
+      - Quantization range, from enum :c:type:`v4l2_quantization`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_xfer_func`
+    * - __u8
       - ``xfer_func``
-      - This information supplements the ``colorspace`` and must be set by
+      - Transfer function, from enum :c:type:`v4l2_xfer_func`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
     * - __u8
diff --git a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
index 6622938c1b41..826f2305da01 100644
--- a/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-v4l2.rst
@@ -40,9 +40,10 @@ Single-planar format structure
 	RGB formats in :ref:`rgb-formats`, YUV formats in
 	:ref:`yuv-formats`, and reserved codes in
 	:ref:`reserved-formats`
-    * - enum :c:type:`v4l2_field`
+    * - __u32
       - ``field``
-      - Video images are typically interlaced. Applications can request to
+      - Field order, from enum :c:type:`v4l2_field`.
+        Video images are typically interlaced. Applications can request to
 	capture or output only the top or bottom field, or both fields
 	interlaced or sequentially stored in one buffer or alternating in
 	separate buffers. Drivers return the actual field order selected.
@@ -82,9 +83,10 @@ Single-planar format structure
 	driver. Usually this is ``bytesperline`` times ``height``. When
 	the image consists of variable length compressed data this is the
 	maximum number of bytes required to hold an image.
-    * - enum :c:type:`v4l2_colorspace`
+    * - __u32
       - ``colorspace``
-      - This information supplements the ``pixelformat`` and must be set
+      - Image colorspace, from enum :c:type:`v4l2_colorspace`.
+        This information supplements the ``pixelformat`` and must be set
 	by the driver for capture streams and by the application for
 	output streams, see :ref:`colorspaces`.
     * - __u32
@@ -116,23 +118,33 @@ Single-planar format structure
     * - __u32
       - ``flags``
       - Flags set by the application or driver, see :ref:`format-flags`.
-    * - enum :c:type:`v4l2_ycbcr_encoding`
+    * - union {
+      - (anonymous)
+      -
+    * - __u32
       - ``ycbcr_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - Y'CbCr encoding, from enum :c:type:`v4l2_ycbcr_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_hsv_encoding`
+    * - __u32
       - ``hsv_enc``
-      - This information supplements the ``colorspace`` and must be set by
+      - HSV encoding, from enum :c:type:`v4l2_hsv_encoding`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_quantization`
+    * - }
+      -
+      -
+    * - __u32
       - ``quantization``
-      - This information supplements the ``colorspace`` and must be set by
+      - Quantization range, from enum :c:type:`v4l2_quantization`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
-    * - enum :c:type:`v4l2_xfer_func`
+    * - __u32
       - ``xfer_func``
-      - This information supplements the ``colorspace`` and must be set by
+      - Transfer function, from enum :c:type:`v4l2_xfer_func`.
+        This information supplements the ``colorspace`` and must be set by
 	the driver for capture streams and by the application for output
 	streams, see :ref:`colorspaces`.
diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index 26b106071364..eb4b185d168c 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -58,6 +58,14 @@ can never be transgressed.  If there is a good reason to go against the
 style (a line which becomes far less readable if split to fit within the
 80-column limit, for example), just do it.
 
+Note that you can also use the ``clang-format`` tool to help you with
+these rules, to quickly re-format parts of your code automatically,
+and to review full files in order to spot coding style mistakes,
+typos and possible improvements. It is also handy for sorting ``#includes``,
+for aligning variables/macros, for reflowing text and other similar tasks.
+See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
+for more details.
+
 
 Abstraction layers
 ******************
diff --git a/Documentation/process/clang-format.rst b/Documentation/process/clang-format.rst
new file mode 100644
index 000000000000..6710c0707721
--- /dev/null
+++ b/Documentation/process/clang-format.rst
@@ -0,0 +1,184 @@
+.. _clangformat:
+
+clang-format
+============
+
+``clang-format`` is a tool to format C/C++/... code according to
+a set of rules and heuristics. Like most tools, it is not perfect
+nor covers every single case, but it is good enough to be helpful.
+
+``clang-format`` can be used for several purposes:
+
+  - Quickly reformat a block of code to the kernel style. Specially useful
+    when moving code around and aligning/sorting. See clangformatreformat_.
+
+  - Spot style mistakes, typos and possible improvements in files
+    you maintain, patches you review, diffs, etc. See clangformatreview_.
+
+  - Help you follow the coding style rules, specially useful for those
+    new to kernel development or working at the same time in several
+    projects with different coding styles.
+
+Its configuration file is ``.clang-format`` in the root of the kernel tree.
+The rules contained there try to approximate the most common kernel
+coding style. They also try to follow :ref:`Documentation/process/coding-style.rst <codingstyle>`
+as much as possible. Since not all the kernel follows the same style,
+it is possible that you may want to tweak the defaults for a particular
+subsystem or folder. To do so, you can override the defaults by writing
+another ``.clang-format`` file in a subfolder.
+
+The tool itself has already been included in the repositories of popular
+Linux distributions for a long time. Search for ``clang-format`` in
+your repositories. Otherwise, you can either download pre-built
+LLVM/clang binaries or build the source code from:
+
+    http://releases.llvm.org/download.html
+
+See more information about the tool at:
+
+    https://clang.llvm.org/docs/ClangFormat.html
+
+    https://clang.llvm.org/docs/ClangFormatStyleOptions.html
+
+
+.. _clangformatreview:
+
+Review files and patches for coding style
+-----------------------------------------
+
+By running the tool in its inline mode, you can review full subsystems,
+folders or individual files for code style mistakes, typos or improvements.
+
+To do so, you can run something like::
+
+    # Make sure your working directory is clean!
+    clang-format -i kernel/*.[ch]
+
+And then take a look at the git diff.
+
+Counting the lines of such a diff is also useful for improving/tweaking
+the style options in the configuration file; as well as testing new
+``clang-format`` features/versions.
+
+``clang-format`` also supports reading unified diffs, so you can review
+patches and git diffs easily. See the documentation at:
+
+    https://clang.llvm.org/docs/ClangFormat.html#script-for-patch-reformatting
+
+To avoid ``clang-format`` formatting some portion of a file, you can do::
+
+    int formatted_code;
+    // clang-format off
+        void    unformatted_code  ;
+    // clang-format on
+    void formatted_code_again;
+
+While it might be tempting to use this to keep a file always in sync with
+``clang-format``, specially if you are writing new files or if you are
+a maintainer, please note that people might be running different
+``clang-format`` versions or not have it available at all. Therefore,
+you should probably refrain yourself from using this in kernel sources;
+at least until we see if ``clang-format`` becomes commonplace.
+
+
+.. _clangformatreformat:
+
+Reformatting blocks of code
+---------------------------
+
+By using an integration with your text editor, you can reformat arbitrary
+blocks (selections) of code with a single keystroke. This is specially
+useful when moving code around, for complex code that is deeply intended,
+for multi-line macros (and aligning their backslashes), etc.
+
+Remember that you can always tweak the changes afterwards in those cases
+where the tool did not do an optimal job. But as a first approximation,
+it can be very useful.
+
+There are integrations for many popular text editors. For some of them,
+like vim, emacs, BBEdit and Visual Studio you can find support built-in.
+For instructions, read the appropiate section at:
+
+    https://clang.llvm.org/docs/ClangFormat.html
+
+For Atom, Eclipse, Sublime Text, Visual Studio Code, XCode and other
+editors and IDEs you should be able to find ready-to-use plugins.
+
+For this use case, consider using a secondary ``.clang-format``
+so that you can tweak a few options. See clangformatextra_.
+
+
+.. _clangformatmissing:
+
+Missing support
+---------------
+
+``clang-format`` is missing support for some things that are common
+in kernel code. They are easy to remember, so if you use the tool
+regularly, you will quickly learn to avoid/ignore those.
+
+In particular, some very common ones you will notice are:
+
+  - Aligned blocks of one-line ``#defines``, e.g.::
+
+        #define TRACING_MAP_BITS_DEFAULT       11
+        #define TRACING_MAP_BITS_MAX           17
+        #define TRACING_MAP_BITS_MIN           7
+
+    vs.::
+
+        #define TRACING_MAP_BITS_DEFAULT 11
+        #define TRACING_MAP_BITS_MAX 17
+        #define TRACING_MAP_BITS_MIN 7
+
+  - Aligned designated initializers, e.g.::
+
+        static const struct file_operations uprobe_events_ops = {
+                .owner          = THIS_MODULE,
+                .open           = probes_open,
+                .read           = seq_read,
+                .llseek         = seq_lseek,
+                .release        = seq_release,
+                .write          = probes_write,
+        };
+
+    vs.::
+
+        static const struct file_operations uprobe_events_ops = {
+                .owner = THIS_MODULE,
+                .open = probes_open,
+                .read = seq_read,
+                .llseek = seq_lseek,
+                .release = seq_release,
+                .write = probes_write,
+        };
+
+
+.. _clangformatextra:
+
+Extra features/options
+----------------------
+
+Some features/style options are not enabled by default in the configuration
+file in order to minimize the differences between the output and the current
+code. In other words, to make the difference as small as possible,
+which makes reviewing full-file style, as well diffs and patches as easy
+as possible.
+
+In other cases (e.g. particular subsystems/folders/files), the kernel style
+might be different and enabling some of these options may approximate
+better the style there.
+
+For instance:
+
+  - Aligning assignments (``AlignConsecutiveAssignments``).
+
+  - Aligning declarations (``AlignConsecutiveDeclarations``).
+
+  - Reflowing text in comments (``ReflowComments``).
+
+  - Sorting ``#includes`` (``SortIncludes``).
+
+They are typically useful for block re-formatting, rather than full-file.
+You might want to create another ``.clang-format`` file and use that one
+from your editor/IDE instead.
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index d98deb62c400..4e7c0a1c427a 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -631,6 +631,14 @@ options ``-kr -i8`` (stands for ``K&R, 8 character indents``), or use
 re-formatting you may want to take a look at the man page.  But
 remember: ``indent`` is not a fix for bad programming.
 
+Note that you can also use the ``clang-format`` tool to help you with
+these rules, to quickly re-format parts of your code automatically,
+and to review full files in order to spot coding style mistakes,
+typos and possible improvements. It is also handy for sorting ``#includes``,
+for aligning variables/macros, for reflowing text and other similar tasks.
+See the file :ref:`Documentation/process/clang-format.rst <clangformat>`
+for more details.
+
 
 10) Kconfig configuration files
 -------------------------------
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 412314eebda6..eded671d55eb 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -964,32 +964,34 @@ detect a hard lockup condition.
 
 tainted:
 
-Non-zero if the kernel has been tainted.  Numeric values, which
-can be ORed together:
-
-   1 - A module with a non-GPL license has been loaded, this
-       includes modules with no license.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   2 - A module was force loaded by insmod -f.
-       Set by modutils >= 2.4.9 and module-init-tools.
-   4 - Unsafe SMP processors: SMP with CPUs not designed for SMP.
-   8 - A module was forcibly unloaded from the system by rmmod -f.
-  16 - A hardware machine check error occurred on the system.
-  32 - A bad page was discovered on the system.
-  64 - The user has asked that the system be marked "tainted".  This
-       could be because they are running software that directly modifies
-       the hardware, or for other reasons.
- 128 - The system has died.
- 256 - The ACPI DSDT has been overridden with one supplied by the user
-        instead of using the one provided by the hardware.
- 512 - A kernel warning has occurred.
-1024 - A module from drivers/staging was loaded.
-2048 - The system is working around a severe firmware bug.
-4096 - An out-of-tree module has been loaded.
-8192 - An unsigned module has been loaded in a kernel supporting module
-       signature.
-16384 - A soft lockup has previously occurred on the system.
-32768 - The kernel has been live patched.
+Non-zero if the kernel has been tainted. Numeric values, which can be
+ORed together. The letters are seen in "Tainted" line of Oops reports.
+
+     1 (P):  A module with a non-GPL license has been loaded, this
+             includes modules with no license.
+             Set by modutils >= 2.4.9 and module-init-tools.
+     2 (F): A module was force loaded by insmod -f.
+            Set by modutils >= 2.4.9 and module-init-tools.
+     4 (S): Unsafe SMP processors: SMP with CPUs not designed for SMP.
+     8 (R): A module was forcibly unloaded from the system by rmmod -f.
+    16 (M): A hardware machine check error occurred on the system.
+    32 (B): A bad page was discovered on the system.
+    64 (U): The user has asked that the system be marked "tainted". This
+            could be because they are running software that directly modifies
+            the hardware, or for other reasons.
+   128 (D): The system has died.
+   256 (A): The ACPI DSDT has been overridden with one supplied by the user
+            instead of using the one provided by the hardware.
+   512 (W): A kernel warning has occurred.
+  1024 (C): A module from drivers/staging was loaded.
+  2048 (I): The system is working around a severe firmware bug.
+  4096 (O): An out-of-tree module has been loaded.
+  8192 (E): An unsigned module has been loaded in a kernel supporting module
+            signature.
+ 16384 (L): A soft lockup has previously occurred on the system.
+ 32768 (K): The kernel has been live patched.
+ 65536 (X): Auxiliary taint, defined and used by for distros.
+131072 (T): The kernel was built with the struct randomization plugin.
 
 ==============================================================
 
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index ff234d229cbb..17256f2ad919 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -312,8 +312,6 @@ The lowmem_reserve_ratio is an array. You can see them by reading this file.
 % cat /proc/sys/vm/lowmem_reserve_ratio
 256     256     32
 -
-Note: # of this elements is one fewer than number of zones. Because the highest
-      zone's value is not necessary for following calculation.
 
 But, these values are not used directly. The kernel calculates # of protection
 pages for each zones from them. These are shown as array of protection pages
@@ -364,7 +362,8 @@ As above expression, they are reciprocal number of ratio.
 pages of higher zones on the node.
 
 If you would like to protect more pages, smaller values are effective.
-The minimum value is 1 (1/1 -> 100%).
+The minimum value is 1 (1/1 -> 100%). The value less than 1 completely
+disables protection of the pages.
 
 ==============================================================
 
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index bdf1963ba6ba..a5ea2cb0082b 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -520,1550 +520,4 @@ The following commands are supported:
   totals derived from one or more trace event format fields and/or
   event counts (hitcount).
 
-  The format of a hist trigger is as follows::
-
-        hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
-          [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
-          [:clear][:name=histname1] [if <filter>]
-
-  When a matching event is hit, an entry is added to a hash table
-  using the key(s) and value(s) named.  Keys and values correspond to
-  fields in the event's format description.  Values must correspond to
-  numeric fields - on an event hit, the value(s) will be added to a
-  sum kept for that field.  The special string 'hitcount' can be used
-  in place of an explicit value field - this is simply a count of
-  event hits.  If 'values' isn't specified, an implicit 'hitcount'
-  value will be automatically created and used as the only value.
-  Keys can be any field, or the special string 'stacktrace', which
-  will use the event's kernel stacktrace as the key.  The keywords
-  'keys' or 'key' can be used to specify keys, and the keywords
-  'values', 'vals', or 'val' can be used to specify values.  Compound
-  keys consisting of up to two fields can be specified by the 'keys'
-  keyword.  Hashing a compound key produces a unique entry in the
-  table for each unique combination of component keys, and can be
-  useful for providing more fine-grained summaries of event data.
-  Additionally, sort keys consisting of up to two fields can be
-  specified by the 'sort' keyword.  If more than one field is
-  specified, the result will be a 'sort within a sort': the first key
-  is taken to be the primary sort key and the second the secondary
-  key.  If a hist trigger is given a name using the 'name' parameter,
-  its histogram data will be shared with other triggers of the same
-  name, and trigger hits will update this common data.  Only triggers
-  with 'compatible' fields can be combined in this way; triggers are
-  'compatible' if the fields named in the trigger share the same
-  number and type of fields and those fields also have the same names.
-  Note that any two events always share the compatible 'hitcount' and
-  'stacktrace' fields and can therefore be combined using those
-  fields, however pointless that may be.
-
-  'hist' triggers add a 'hist' file to each event's subdirectory.
-  Reading the 'hist' file for the event will dump the hash table in
-  its entirety to stdout.  If there are multiple hist triggers
-  attached to an event, there will be a table for each trigger in the
-  output.  The table displayed for a named trigger will be the same as
-  any other instance having the same name. Each printed hash table
-  entry is a simple list of the keys and values comprising the entry;
-  keys are printed first and are delineated by curly braces, and are
-  followed by the set of value fields for the entry.  By default,
-  numeric fields are displayed as base-10 integers.  This can be
-  modified by appending any of the following modifiers to the field
-  name:
-
-        - .hex        display a number as a hex value
-	- .sym        display an address as a symbol
-	- .sym-offset display an address as a symbol and offset
-	- .syscall    display a syscall id as a system call name
-	- .execname   display a common_pid as a program name
-
-  Note that in general the semantics of a given field aren't
-  interpreted when applying a modifier to it, but there are some
-  restrictions to be aware of in this regard:
-
-    - only the 'hex' modifier can be used for values (because values
-      are essentially sums, and the other modifiers don't make sense
-      in that context).
-    - the 'execname' modifier can only be used on a 'common_pid'.  The
-      reason for this is that the execname is simply the 'comm' value
-      saved for the 'current' process when an event was triggered,
-      which is the same as the common_pid value saved by the event
-      tracing code.  Trying to apply that comm value to other pid
-      values wouldn't be correct, and typically events that care save
-      pid-specific comm fields in the event itself.
-
-  A typical usage scenario would be the following to enable a hist
-  trigger, read its current contents, and then turn it off::
-
-	  # echo 'hist:keys=skbaddr.hex:vals=len' > \
-	    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-	  # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-	  # echo '!hist:keys=skbaddr.hex:vals=len' > \
-	    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  The trigger file itself can be read to show the details of the
-  currently attached hist trigger.  This information is also displayed
-  at the top of the 'hist' file when read.
-
-  By default, the size of the hash table is 2048 entries.  The 'size'
-  parameter can be used to specify more or fewer than that.  The units
-  are in terms of hashtable entries - if a run uses more entries than
-  specified, the results will show the number of 'drops', the number
-  of hits that were ignored.  The size should be a power of 2 between
-  128 and 131072 (any non- power-of-2 number specified will be rounded
-  up).
-
-  The 'sort' parameter can be used to specify a value field to sort
-  on.  The default if unspecified is 'hitcount' and the default sort
-  order is 'ascending'.  To sort in the opposite direction, append
-  .descending' to the sort key.
-
-  The 'pause' parameter can be used to pause an existing hist trigger
-  or to start a hist trigger but not log any events until told to do
-  so.  'continue' or 'cont' can be used to start or restart a paused
-  hist trigger.
-
-  The 'clear' parameter will clear the contents of a running hist
-  trigger and leave its current paused/active state.
-
-  Note that the 'pause', 'cont', and 'clear' parameters should be
-  applied using 'append' shell operator ('>>') if applied to an
-  existing trigger, rather than via the '>' operator, which will cause
-  the trigger to be removed through truncation.
-
-- enable_hist/disable_hist
-
-  The enable_hist and disable_hist triggers can be used to have one
-  event conditionally start and stop another event's already-attached
-  hist trigger.  Any number of enable_hist and disable_hist triggers
-  can be attached to a given event, allowing that event to kick off
-  and stop aggregations on a host of other events.
-
-  The format is very similar to the enable/disable_event triggers::
-
-      enable_hist:<system>:<event>[:count]
-      disable_hist:<system>:<event>[:count]
-
-  Instead of enabling or disabling the tracing of the target event
-  into the trace buffer as the enable/disable_event triggers do, the
-  enable/disable_hist triggers enable or disable the aggregation of
-  the target event into a hash table.
-
-  A typical usage scenario for the enable_hist/disable_hist triggers
-  would be to first set up a paused hist trigger on some event,
-  followed by an enable_hist/disable_hist pair that turns the hist
-  aggregation on and off when conditions of interest are hit::
-
-	  # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
-	    /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-	  # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
-	    /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-	  # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
-	    /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  The above sets up an initially paused hist trigger which is unpaused
-  and starts aggregating events when a given program is executed, and
-  which stops aggregating when the process exits and the hist trigger
-  is paused again.
-
-  The examples below provide a more concrete illustration of the
-  concepts and typical usage patterns discussed above.
-
-
-6.2 'hist' trigger examples
----------------------------
-
-  The first set of examples creates aggregations using the kmalloc
-  event.  The fields that can be used for the hist trigger are listed
-  in the kmalloc event's format file::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format
-    name: kmalloc
-    ID: 374
-    format:
-	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
-	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
-	field:unsigned char common_preempt_count;		offset:3;	size:1;	signed:0;
-	field:int common_pid;					offset:4;	size:4;	signed:1;
-
-	field:unsigned long call_site;				offset:8;	size:8;	signed:0;
-	field:const void * ptr;					offset:16;	size:8;	signed:0;
-	field:size_t bytes_req;					offset:24;	size:8;	signed:0;
-	field:size_t bytes_alloc;				offset:32;	size:8;	signed:0;
-	field:gfp_t gfp_flags;					offset:40;	size:4;	signed:0;
-
-  We'll start by creating a hist trigger that generates a simple table
-  that lists the total number of bytes requested for each function in
-  the kernel that made one or more calls to kmalloc::
-
-    # echo 'hist:key=call_site:val=bytes_req' > \
-            /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  This tells the tracing system to create a 'hist' trigger using the
-  call_site field of the kmalloc event as the key for the table, which
-  just means that each unique call_site address will have an entry
-  created for it in the table.  The 'val=bytes_req' parameter tells
-  the hist trigger that for each unique entry (call_site) in the
-  table, it should keep a running total of the number of bytes
-  requested by that call_site.
-
-  We'll let it run for awhile and then dump the contents of the 'hist'
-  file in the kmalloc event's subdirectory (for readability, a number
-  of entries have been omitted)::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: 18446744072106379007 } hitcount:          1  bytes_req:        176
-    { call_site: 18446744071579557049 } hitcount:          1  bytes_req:       1024
-    { call_site: 18446744071580608289 } hitcount:          1  bytes_req:      16384
-    { call_site: 18446744071581827654 } hitcount:          1  bytes_req:         24
-    { call_site: 18446744071580700980 } hitcount:          1  bytes_req:          8
-    { call_site: 18446744071579359876 } hitcount:          1  bytes_req:        152
-    { call_site: 18446744071580795365 } hitcount:          3  bytes_req:        144
-    { call_site: 18446744071581303129 } hitcount:          3  bytes_req:        144
-    { call_site: 18446744071580713234 } hitcount:          4  bytes_req:       2560
-    { call_site: 18446744071580933750 } hitcount:          4  bytes_req:        736
-    .
-    .
-    .
-    { call_site: 18446744072106047046 } hitcount:         69  bytes_req:       5576
-    { call_site: 18446744071582116407 } hitcount:         73  bytes_req:       2336
-    { call_site: 18446744072106054684 } hitcount:        136  bytes_req:     140504
-    { call_site: 18446744072106224230 } hitcount:        136  bytes_req:      19584
-    { call_site: 18446744072106078074 } hitcount:        153  bytes_req:       2448
-    { call_site: 18446744072106062406 } hitcount:        153  bytes_req:      36720
-    { call_site: 18446744071582507929 } hitcount:        153  bytes_req:      37088
-    { call_site: 18446744072102520590 } hitcount:        273  bytes_req:      10920
-    { call_site: 18446744071582143559 } hitcount:        358  bytes_req:        716
-    { call_site: 18446744072106465852 } hitcount:        417  bytes_req:      56712
-    { call_site: 18446744072102523378 } hitcount:        485  bytes_req:      27160
-    { call_site: 18446744072099568646 } hitcount:       1676  bytes_req:      33520
-
-    Totals:
-        Hits: 4610
-        Entries: 45
-        Dropped: 0
-
-  The output displays a line for each entry, beginning with the key
-  specified in the trigger, followed by the value(s) also specified in
-  the trigger.  At the beginning of the output is a line that displays
-  the trigger info, which can also be displayed by reading the
-  'trigger' file::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-    hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-  At the end of the output are a few lines that display the overall
-  totals for the run.  The 'Hits' field shows the total number of
-  times the event trigger was hit, the 'Entries' field shows the total
-  number of used entries in the hash table, and the 'Dropped' field
-  shows the number of hits that were dropped because the number of
-  used entries for the run exceeded the maximum number of entries
-  allowed for the table (normally 0, but if not a hint that you may
-  want to increase the size of the table using the 'size' parameter).
-
-  Notice in the above output that there's an extra field, 'hitcount',
-  which wasn't specified in the trigger.  Also notice that in the
-  trigger info output, there's a parameter, 'sort=hitcount', which
-  wasn't specified in the trigger either.  The reason for that is that
-  every trigger implicitly keeps a count of the total number of hits
-  attributed to a given entry, called the 'hitcount'.  That hitcount
-  information is explicitly displayed in the output, and in the
-  absence of a user-specified sort parameter, is used as the default
-  sort field.
-
-  The value 'hitcount' can be used in place of an explicit value in
-  the 'values' parameter if you don't really need to have any
-  particular field summed and are mainly interested in hit
-  frequencies.
-
-  To turn the hist trigger off, simply call up the trigger in the
-  command history and re-execute it with a '!' prepended::
-
-    # echo '!hist:key=call_site:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  Finally, notice that the call_site as displayed in the output above
-  isn't really very useful.  It's an address, but normally addresses
-  are displayed in hex.  To have a numeric field displayed as a hex
-  value, simply append '.hex' to the field name in the trigger::
-
-    # echo 'hist:key=call_site.hex:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: ffffffffa026b291 } hitcount:          1  bytes_req:        433
-    { call_site: ffffffffa07186ff } hitcount:          1  bytes_req:        176
-    { call_site: ffffffff811ae721 } hitcount:          1  bytes_req:      16384
-    { call_site: ffffffff811c5134 } hitcount:          1  bytes_req:          8
-    { call_site: ffffffffa04a9ebb } hitcount:          1  bytes_req:        511
-    { call_site: ffffffff8122e0a6 } hitcount:          1  bytes_req:         12
-    { call_site: ffffffff8107da84 } hitcount:          1  bytes_req:        152
-    { call_site: ffffffff812d8246 } hitcount:          1  bytes_req:         24
-    { call_site: ffffffff811dc1e5 } hitcount:          3  bytes_req:        144
-    { call_site: ffffffffa02515e8 } hitcount:          3  bytes_req:        648
-    { call_site: ffffffff81258159 } hitcount:          3  bytes_req:        144
-    { call_site: ffffffff811c80f4 } hitcount:          4  bytes_req:        544
-    .
-    .
-    .
-    { call_site: ffffffffa06c7646 } hitcount:        106  bytes_req:       8024
-    { call_site: ffffffffa06cb246 } hitcount:        132  bytes_req:      31680
-    { call_site: ffffffffa06cef7a } hitcount:        132  bytes_req:       2112
-    { call_site: ffffffff8137e399 } hitcount:        132  bytes_req:      23232
-    { call_site: ffffffffa06c941c } hitcount:        185  bytes_req:     171360
-    { call_site: ffffffffa06f2a66 } hitcount:        185  bytes_req:      26640
-    { call_site: ffffffffa036a70e } hitcount:        265  bytes_req:      10600
-    { call_site: ffffffff81325447 } hitcount:        292  bytes_req:        584
-    { call_site: ffffffffa072da3c } hitcount:        446  bytes_req:      60656
-    { call_site: ffffffffa036b1f2 } hitcount:        526  bytes_req:      29456
-    { call_site: ffffffffa0099c06 } hitcount:       1780  bytes_req:      35600
-
-    Totals:
-        Hits: 4775
-        Entries: 46
-        Dropped: 0
-
-  Even that's only marginally more useful - while hex values do look
-  more like addresses, what users are typically more interested in
-  when looking at text addresses are the corresponding symbols
-  instead.  To have an address displayed as symbolic value instead,
-  simply append '.sym' or '.sym-offset' to the field name in the
-  trigger::
-
-    # echo 'hist:key=call_site.sym:val=bytes_req' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active]
-
-    { call_site: [ffffffff810adcb9] syslog_print_all                              } hitcount:          1  bytes_req:       1024
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffff8154acbe] usb_alloc_urb                                 } hitcount:          1  bytes_req:        192
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff811febd5] fsnotify_alloc_group                          } hitcount:          2  bytes_req:        528
-    { call_site: [ffffffff81440f58] __tty_buffer_request_room                     } hitcount:          2  bytes_req:       2624
-    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          2  bytes_req:         96
-    { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211]      } hitcount:          2  bytes_req:        464
-    { call_site: [ffffffff81672406] tcp_get_metrics                               } hitcount:          2  bytes_req:        304
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff81089b05] sched_create_group                            } hitcount:          2  bytes_req:       1424
-    .
-    .
-    .
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1185  bytes_req:     123240
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:       1185  bytes_req:     104280
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       1402  bytes_req:     190672
-    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       1518  bytes_req:     146208
-    { call_site: [ffffffffa029070e] drm_vma_node_allow [drm]                      } hitcount:       1746  bytes_req:      69840
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       2021  bytes_req:     792312
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       2592  bytes_req:     145152
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2629  bytes_req:     378576
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2629  bytes_req:    3783248
-    { call_site: [ffffffff81325607] apparmor_file_alloc_security                  } hitcount:       5192  bytes_req:      10384
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       5529  bytes_req:     110584
-    { call_site: [ffffffff8131ebf7] aa_alloc_task_context                         } hitcount:      21943  bytes_req:     702176
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:      55759  bytes_req:    5074265
-
-    Totals:
-        Hits: 109928
-        Entries: 71
-        Dropped: 0
-
-  Because the default sort key above is 'hitcount', the above shows a
-  the list of call_sites by increasing hitcount, so that at the bottom
-  we see the functions that made the most kmalloc calls during the
-  run.  If instead we we wanted to see the top kmalloc callers in
-  terms of the number of bytes requested rather than the number of
-  calls, and we wanted the top caller to appear at the top, we can use
-  the 'sort' parameter, along with the 'descending' modifier::
-
-    # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2186  bytes_req:    3397464
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1790  bytes_req:     712176
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       8132  bytes_req:     513135
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        106  bytes_req:     440128
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2186  bytes_req:     314784
-    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       2174  bytes_req:     208992
-    { call_site: [ffffffff811ae8e1] __kmalloc                                     } hitcount:          8  bytes_req:     131072
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:        859  bytes_req:     116824
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       1834  bytes_req:     102704
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:        972  bytes_req:     101088
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:        972  bytes_req:      85536
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       3333  bytes_req:      66664
-    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        209  bytes_req:      61632
-    .
-    .
-    .
-    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffff812d8406] copy_semundo                                  } hitcount:          2  bytes_req:         48
-    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          1  bytes_req:         48
-    { call_site: [ffffffffa027121a] drm_getmagic [drm]                            } hitcount:          1  bytes_req:         48
-    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
-    { call_site: [ffffffff811c52f4] bprm_change_interp                            } hitcount:          2  bytes_req:         16
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
-
-    Totals:
-        Hits: 32133
-        Entries: 81
-        Dropped: 0
-
-  To display the offset and size information in addition to the symbol
-  name, just use 'sym-offset' instead::
-
-    # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915]                  } hitcount:       4569  bytes_req:    3163720
-    { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915]                      } hitcount:       4569  bytes_req:     657936
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915]      } hitcount:       1519  bytes_req:     472936
-    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915]      } hitcount:       3050  bytes_req:     211832
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50                                 } hitcount:         34  bytes_req:     148384
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915]                  } hitcount:       1385  bytes_req:     144040
-    { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0                                   } hitcount:          8  bytes_req:     131072
-    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm]              } hitcount:       1385  bytes_req:     121880
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm]                  } hitcount:       1848  bytes_req:     103488
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915]            } hitcount:        461  bytes_req:      62696
-    { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm]                      } hitcount:       1541  bytes_req:      61640
-    { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0                                } hitcount:         57  bytes_req:      57456
-    .
-    .
-    .
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0                       } hitcount:          2  bytes_req:        128
-    { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm]                      } hitcount:          3  bytes_req:         96
-    { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0                         } hitcount:          8  bytes_req:         96
-    { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650                            } hitcount:          3  bytes_req:         84
-    { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110                              } hitcount:          1  bytes_req:          8
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid]                     } hitcount:          1  bytes_req:          7
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid]                    } hitcount:          1  bytes_req:          7
-
-    Totals:
-        Hits: 26098
-        Entries: 64
-        Dropped: 0
-
-  We can also add multiple fields to the 'values' parameter.  For
-  example, we might want to see the total number of bytes allocated
-  alongside bytes requested, and display the result sorted by bytes
-  allocated in a descending order::
-
-    # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active]
-
-    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       7403  bytes_req:    4084360  bytes_alloc:    5958016
-    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        541  bytes_req:    2213968  bytes_alloc:    2228224
-    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       7404  bytes_req:    1066176  bytes_alloc:    1421568
-    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1565  bytes_req:     557368  bytes_alloc:    1037760
-    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       9557  bytes_req:     595778  bytes_alloc:     695744
-    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       5839  bytes_req:     430680  bytes_alloc:     470400
-    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       2388  bytes_req:     324768  bytes_alloc:     458496
-    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       3911  bytes_req:     219016  bytes_alloc:     250304
-    { call_site: [ffffffff815f8d7b] sk_prot_alloc                                 } hitcount:        235  bytes_req:     236880  bytes_alloc:     240640
-    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        557  bytes_req:     169024  bytes_alloc:     221760
-    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       9378  bytes_req:     187548  bytes_alloc:     206312
-    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1519  bytes_req:     157976  bytes_alloc:     194432
-    .
-    .
-    .
-    { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach                 } hitcount:          2  bytes_req:        144  bytes_alloc:        192
-    { call_site: [ffffffff81097ee8] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
-    { call_site: [ffffffff81213e80] load_elf_binary                               } hitcount:          3  bytes_req:         84  bytes_alloc:         96
-    { call_site: [ffffffff81079a2e] kthread_create_on_node                        } hitcount:          1  bytes_req:         56  bytes_alloc:         64
-    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7  bytes_alloc:          8
-    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8  bytes_alloc:          8
-    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7  bytes_alloc:          8
-
-    Totals:
-        Hits: 66598
-        Entries: 65
-        Dropped: 0
-
-  Finally, to finish off our kmalloc example, instead of simply having
-  the hist trigger display symbolic call_sites, we can have the hist
-  trigger additionally display the complete set of kernel stack traces
-  that led to each call_site.  To do that, we simply use the special
-  value 'stacktrace' for the key parameter::
-
-    # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
-           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
-
-  The above trigger will use the kernel stack trace in effect when an
-  event is triggered as the key for the hash table.  This allows the
-  enumeration of every kernel callpath that led up to a particular
-  event, along with a running total of any of the event fields for
-  that event.  Here we tally bytes requested and bytes allocated for
-  every callpath in the system that led up to a kmalloc (in this case
-  every callpath to a kmalloc for a kernel compile)::
-
-    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
-    # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
-
-    { stacktrace:
-         __kmalloc_track_caller+0x10b/0x1a0
-         kmemdup+0x20/0x50
-         hidraw_report_event+0x8a/0x120 [hid]
-         hid_report_raw_event+0x3ea/0x440 [hid]
-         hid_input_report+0x112/0x190 [hid]
-         hid_irq_in+0xc2/0x260 [usbhid]
-         __usb_hcd_giveback_urb+0x72/0x120
-         usb_giveback_urb_bh+0x9e/0xe0
-         tasklet_hi_action+0xf8/0x100
-         __do_softirq+0x114/0x2c0
-         irq_exit+0xa5/0xb0
-         do_IRQ+0x5a/0xf0
-         ret_from_intr+0x0/0x30
-         cpuidle_enter+0x17/0x20
-         cpu_startup_entry+0x315/0x3e0
-         rest_init+0x7c/0x80
-    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
-         __kmalloc_track_caller+0x10b/0x1a0
-         kmemdup+0x20/0x50
-         hidraw_report_event+0x8a/0x120 [hid]
-         hid_report_raw_event+0x3ea/0x440 [hid]
-         hid_input_report+0x112/0x190 [hid]
-         hid_irq_in+0xc2/0x260 [usbhid]
-         __usb_hcd_giveback_urb+0x72/0x120
-         usb_giveback_urb_bh+0x9e/0xe0
-         tasklet_hi_action+0xf8/0x100
-         __do_softirq+0x114/0x2c0
-         irq_exit+0xa5/0xb0
-         do_IRQ+0x5a/0xf0
-         ret_from_intr+0x0/0x30
-    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
-    { stacktrace:
-         kmem_cache_alloc_trace+0xeb/0x150
-         aa_alloc_task_context+0x27/0x40
-         apparmor_cred_prepare+0x1f/0x50
-         security_prepare_creds+0x16/0x20
-         prepare_creds+0xdf/0x1a0
-         SyS_capset+0xb5/0x200
-         system_call_fastpath+0x12/0x6a
-    } hitcount:          1  bytes_req:         32  bytes_alloc:         32
-    .
-    .
-    .
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         i915_gem_execbuffer2+0x6c/0x2c0 [i915]
-         drm_ioctl+0x349/0x670 [drm]
-         do_vfs_ioctl+0x2f0/0x4f0
-         SyS_ioctl+0x81/0xa0
-         system_call_fastpath+0x12/0x6a
-    } hitcount:      17726  bytes_req:   13944120  bytes_alloc:   19593808
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         load_elf_phdrs+0x76/0xa0
-         load_elf_binary+0x102/0x1650
-         search_binary_handler+0x97/0x1d0
-         do_execveat_common.isra.34+0x551/0x6e0
-         SyS_execve+0x3a/0x50
-         return_from_execve+0x0/0x23
-    } hitcount:      33348  bytes_req:   17152128  bytes_alloc:   20226048
-    { stacktrace:
-         kmem_cache_alloc_trace+0xeb/0x150
-         apparmor_file_alloc_security+0x27/0x40
-         security_file_alloc+0x16/0x20
-         get_empty_filp+0x93/0x1c0
-         path_openat+0x31/0x5f0
-         do_filp_open+0x3a/0x90
-         do_sys_open+0x128/0x220
-         SyS_open+0x1e/0x20
-         system_call_fastpath+0x12/0x6a
-    } hitcount:    4766422  bytes_req:    9532844  bytes_alloc:   38131376
-    { stacktrace:
-         __kmalloc+0x11b/0x1b0
-         seq_buf_alloc+0x1b/0x50
-         seq_read+0x2cc/0x370
-         proc_reg_read+0x3d/0x80
-         __vfs_read+0x28/0xe0
-         vfs_read+0x86/0x140
-         SyS_read+0x46/0xb0
-         system_call_fastpath+0x12/0x6a
-    } hitcount:      19133  bytes_req:   78368768  bytes_alloc:   78368768
-
-    Totals:
-        Hits: 6085872
-        Entries: 253
-        Dropped: 0
-
-  If you key a hist trigger on common_pid, in order for example to
-  gather and display sorted totals for each process, you can use the
-  special .execname modifier to display the executable names for the
-  processes in the table rather than raw pids.  The example below
-  keeps a per-process sum of total bytes read::
-
-    # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \
-           /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
-
-    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist
-    # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active]
-
-    { common_pid: gnome-terminal  [      3196] } hitcount:        280  count:    1093512
-    { common_pid: Xorg            [      1309] } hitcount:        525  count:     256640
-    { common_pid: compiz          [      2889] } hitcount:         59  count:     254400
-    { common_pid: bash            [      8710] } hitcount:          3  count:      66369
-    { common_pid: dbus-daemon-lau [      8703] } hitcount:         49  count:      47739
-    { common_pid: irqbalance      [      1252] } hitcount:         27  count:      27648
-    { common_pid: 01ifupdown      [      8705] } hitcount:          3  count:      17216
-    { common_pid: dbus-daemon     [       772] } hitcount:         10  count:      12396
-    { common_pid: Socket Thread   [      8342] } hitcount:         11  count:      11264
-    { common_pid: nm-dhcp-client. [      8701] } hitcount:          6  count:       7424
-    { common_pid: gmain           [      1315] } hitcount:         18  count:       6336
-    .
-    .
-    .
-    { common_pid: postgres        [      1892] } hitcount:          2  count:         32
-    { common_pid: postgres        [      1891] } hitcount:          2  count:         32
-    { common_pid: gmain           [      8704] } hitcount:          2  count:         32
-    { common_pid: upstart-dbus-br [      2740] } hitcount:         21  count:         21
-    { common_pid: nm-dispatcher.a [      8696] } hitcount:          1  count:         16
-    { common_pid: indicator-datet [      2904] } hitcount:          1  count:         16
-    { common_pid: gdbus           [      2998] } hitcount:          1  count:         16
-    { common_pid: rtkit-daemon    [      2052] } hitcount:          1  count:          8
-    { common_pid: init            [         1] } hitcount:          2  count:          2
-
-    Totals:
-        Hits: 2116
-        Entries: 51
-        Dropped: 0
-
-  Similarly, if you key a hist trigger on syscall id, for example to
-  gather and display a list of systemwide syscall hits, you can use
-  the special .syscall modifier to display the syscall names rather
-  than raw ids.  The example below keeps a running total of syscall
-  counts for the system during the run::
-
-    # echo 'hist:key=id.syscall:val=hitcount' > \
-           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-    # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active]
-
-    { id: sys_fsync                     [ 74] } hitcount:          1
-    { id: sys_newuname                  [ 63] } hitcount:          1
-    { id: sys_prctl                     [157] } hitcount:          1
-    { id: sys_statfs                    [137] } hitcount:          1
-    { id: sys_symlink                   [ 88] } hitcount:          1
-    { id: sys_sendmmsg                  [307] } hitcount:          1
-    { id: sys_semctl                    [ 66] } hitcount:          1
-    { id: sys_readlink                  [ 89] } hitcount:          3
-    { id: sys_bind                      [ 49] } hitcount:          3
-    { id: sys_getsockname               [ 51] } hitcount:          3
-    { id: sys_unlink                    [ 87] } hitcount:          3
-    { id: sys_rename                    [ 82] } hitcount:          4
-    { id: unknown_syscall               [ 58] } hitcount:          4
-    { id: sys_connect                   [ 42] } hitcount:          4
-    { id: sys_getpid                    [ 39] } hitcount:          4
-    .
-    .
-    .
-    { id: sys_rt_sigprocmask            [ 14] } hitcount:        952
-    { id: sys_futex                     [202] } hitcount:       1534
-    { id: sys_write                     [  1] } hitcount:       2689
-    { id: sys_setitimer                 [ 38] } hitcount:       2797
-    { id: sys_read                      [  0] } hitcount:       3202
-    { id: sys_select                    [ 23] } hitcount:       3773
-    { id: sys_writev                    [ 20] } hitcount:       4531
-    { id: sys_poll                      [  7] } hitcount:       8314
-    { id: sys_recvmsg                   [ 47] } hitcount:      13738
-    { id: sys_ioctl                     [ 16] } hitcount:      21843
-
-    Totals:
-        Hits: 67612
-        Entries: 72
-        Dropped: 0
-
-  The syscall counts above provide a rough overall picture of system
-  call activity on the system; we can see for example that the most
-  popular system call on this system was the 'sys_ioctl' system call.
-
-  We can use 'compound' keys to refine that number and provide some
-  further insight as to which processes exactly contribute to the
-  overall ioctl count.
-
-  The command below keeps a hitcount for every unique combination of
-  system call id and pid - the end result is essentially a table
-  that keeps a per-pid sum of system call hits.  The results are
-  sorted using the system call id as the primary key, and the
-  hitcount sum as the secondary key::
-
-      # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \
-             /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-      # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-      # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active]
-
-      { id: sys_read                      [  0], common_pid: rtkit-daemon    [      1877] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: gdbus           [      2976] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: console-kit-dae [      3400] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: postgres        [      1865] } hitcount:          1
-      { id: sys_read                      [  0], common_pid: deja-dup-monito [      3543] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: NetworkManager  [       890] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: evolution-calen [      3048] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: postgres        [      1864] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: nm-applet       [      3022] } hitcount:          2
-      { id: sys_read                      [  0], common_pid: whoopsie        [      1212] } hitcount:          2
-      .
-      .
-      .
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8479] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      3472] } hitcount:         12
-      { id: sys_ioctl                     [ 16], common_pid: gnome-terminal  [      3199] } hitcount:         16
-      { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:       1808
-      { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:       5580
-      .
-      .
-      .
-      { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2690] } hitcount:          3
-      { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2688] } hitcount:         16
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [       975] } hitcount:          2
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3204] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2888] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3003] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2873] } hitcount:          4
-      { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3196] } hitcount:          6
-      { id: sys_openat                    [257], common_pid: java            [      2623] } hitcount:          2
-      { id: sys_eventfd2                  [290], common_pid: ibus-ui-gtk3    [      2760] } hitcount:          4
-      { id: sys_eventfd2                  [290], common_pid: compiz          [      2994] } hitcount:          6
-
-      Totals:
-          Hits: 31536
-          Entries: 323
-          Dropped: 0
-
-  The above list does give us a breakdown of the ioctl syscall by
-  pid, but it also gives us quite a bit more than that, which we
-  don't really care about at the moment.  Since we know the syscall
-  id for sys_ioctl (16, displayed next to the sys_ioctl name), we
-  can use that to filter out all the other syscalls::
-
-      # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \
-             /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
-
-      # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
-      # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active]
-
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2769] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: evolution-addre [      8571] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      3003] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2781] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2829] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8726] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: bash            [      8508] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2970] } hitcount:          1
-      { id: sys_ioctl                     [ 16], common_pid: gmain           [      2768] } hitcount:          1
-      .
-      .
-      .
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8559] } hitcount:         45
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8555] } hitcount:         48
-      { id: sys_ioctl                     [ 16], common_pid: pool            [      8551] } hitcount:         48
-      { id: sys_ioctl                     [ 16], common_pid: avahi-daemon    [       896] } hitcount:         66
-      { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:      26674
-      { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:      73443
-
-      Totals:
-          Hits: 101162
-          Entries: 103
-          Dropped: 0
-
-  The above output shows that 'compiz' and 'Xorg' are far and away
-  the heaviest ioctl callers (which might lead to questions about
-  whether they really need to be making all those calls and to
-  possible avenues for further investigation.)
-
-  The compound key examples used a key and a sum value (hitcount) to
-  sort the output, but we can just as easily use two keys instead.
-  Here's an example where we use a compound key composed of the the
-  common_pid and size event fields.  Sorting with pid as the primary
-  key and 'size' as the secondary key allows us to display an
-  ordered summary of the recvfrom sizes, with counts, received by
-  each process::
-
-      # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \
-             /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger
-
-      # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist
-      # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active]
-
-      { common_pid: smbd            [       784], size:          4 } hitcount:          1
-      { common_pid: dnsmasq         [      1412], size:       4096 } hitcount:        672
-      { common_pid: postgres        [      1796], size:       1000 } hitcount:          6
-      { common_pid: postgres        [      1867], size:       1000 } hitcount:         10
-      { common_pid: bamfdaemon      [      2787], size:         28 } hitcount:          2
-      { common_pid: bamfdaemon      [      2787], size:      14360 } hitcount:          1
-      { common_pid: compiz          [      2994], size:          8 } hitcount:          1
-      { common_pid: compiz          [      2994], size:         20 } hitcount:         11
-      { common_pid: gnome-terminal  [      3199], size:          4 } hitcount:          2
-      { common_pid: firefox         [      8817], size:          4 } hitcount:          1
-      { common_pid: firefox         [      8817], size:          8 } hitcount:          5
-      { common_pid: firefox         [      8817], size:        588 } hitcount:          2
-      { common_pid: firefox         [      8817], size:        628 } hitcount:          1
-      { common_pid: firefox         [      8817], size:       6944 } hitcount:          1
-      { common_pid: firefox         [      8817], size:     408880 } hitcount:          2
-      { common_pid: firefox         [      8822], size:          8 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        160 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        320 } hitcount:          2
-      { common_pid: firefox         [      8822], size:        352 } hitcount:          1
-      .
-      .
-      .
-      { common_pid: pool            [      8923], size:       1960 } hitcount:         10
-      { common_pid: pool            [      8923], size:       2048 } hitcount:         10
-      { common_pid: pool            [      8924], size:       1960 } hitcount:         10
-      { common_pid: pool            [      8924], size:       2048 } hitcount:         10
-      { common_pid: pool            [      8928], size:       1964 } hitcount:          4
-      { common_pid: pool            [      8928], size:       1965 } hitcount:          2
-      { common_pid: pool            [      8928], size:       2048 } hitcount:          6
-      { common_pid: pool            [      8929], size:       1982 } hitcount:          1
-      { common_pid: pool            [      8929], size:       2048 } hitcount:          1
-
-      Totals:
-          Hits: 2016
-          Entries: 224
-          Dropped: 0
-
-  The above example also illustrates the fact that although a compound
-  key is treated as a single entity for hashing purposes, the sub-keys
-  it's composed of can be accessed independently.
-
-  The next example uses a string field as the hash key and
-  demonstrates how you can manually pause and continue a hist trigger.
-  In this example, we'll aggregate fork counts and don't expect a
-  large number of entries in the hash table, so we'll drop it to a
-  much smaller number, say 256::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: whoopsie                            } hitcount:          1
-    { child_comm: smbd                                } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: postgres                            } hitcount:          2
-    { child_comm: bash                                } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: dhclient                            } hitcount:          4
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: nm-dispatcher.a                     } hitcount:          8
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dbus-daemon                         } hitcount:          8
-    { child_comm: glib-pacrunner                      } hitcount:         10
-    { child_comm: evolution                           } hitcount:         23
-
-    Totals:
-        Hits: 89
-        Entries: 20
-        Dropped: 0
-
-  If we want to pause the hist trigger, we can simply append :pause to
-  the command that started the trigger.  Notice that the trigger info
-  displays as [paused]::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: smbd                                } hitcount:          2
-    { child_comm: bash                                } hitcount:          3
-    { child_comm: whoopsie                            } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: postgres                            } hitcount:          6
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dhclient                            } hitcount:         10
-    { child_comm: emacs                               } hitcount:         12
-    { child_comm: dbus-daemon                         } hitcount:         20
-    { child_comm: nm-dispatcher.a                     } hitcount:         20
-    { child_comm: evolution                           } hitcount:         35
-    { child_comm: glib-pacrunner                      } hitcount:         59
-
-    Totals:
-        Hits: 199
-        Entries: 21
-        Dropped: 0
-
-  To manually continue having the trigger aggregate events, append
-  :cont instead.  Notice that the trigger info displays as [active]
-  again, and the data has changed::
-
-    # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
-
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: dconf worker                        } hitcount:          1
-    { child_comm: kthreadd                            } hitcount:          1
-    { child_comm: gdbus                               } hitcount:          1
-    { child_comm: ibus-daemon                         } hitcount:          1
-    { child_comm: Socket Thread                       } hitcount:          2
-    { child_comm: evolution-alarm                     } hitcount:          2
-    { child_comm: smbd                                } hitcount:          2
-    { child_comm: whoopsie                            } hitcount:          3
-    { child_comm: compiz                              } hitcount:          3
-    { child_comm: evolution-sourc                     } hitcount:          4
-    { child_comm: bash                                } hitcount:          5
-    { child_comm: pool                                } hitcount:          5
-    { child_comm: postgres                            } hitcount:          6
-    { child_comm: firefox                             } hitcount:          8
-    { child_comm: dhclient                            } hitcount:         11
-    { child_comm: emacs                               } hitcount:         12
-    { child_comm: dbus-daemon                         } hitcount:         22
-    { child_comm: nm-dispatcher.a                     } hitcount:         22
-    { child_comm: evolution                           } hitcount:         35
-    { child_comm: glib-pacrunner                      } hitcount:         59
-
-    Totals:
-        Hits: 206
-        Entries: 21
-        Dropped: 0
-
-  The previous example showed how to start and stop a hist trigger by
-  appending 'pause' and 'continue' to the hist trigger command.  A
-  hist trigger can also be started in a paused state by initially
-  starting the trigger with ':pause' appended.  This allows you to
-  start the trigger only when you're ready to start collecting data
-  and not before.  For example, you could start the trigger in a
-  paused state, then unpause it and do something you want to measure,
-  then pause the trigger again when done.
-
-  Of course, doing this manually can be difficult and error-prone, but
-  it is possible to automatically start and stop a hist trigger based
-  on some condition, via the enable_hist and disable_hist triggers.
-
-  For example, suppose we wanted to take a look at the relative
-  weights in terms of skb length for each callpath that leads to a
-  netif_receieve_skb event when downloading a decent-sized file using
-  wget.
-
-  First we set up an initially paused stacktrace trigger on the
-  netif_receive_skb event::
-
-    # echo 'hist:key=stacktrace:vals=len:pause' > \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  Next, we set up an 'enable_hist' trigger on the sched_process_exec
-  event, with an 'if filename==/usr/bin/wget' filter.  The effect of
-  this new trigger is that it will 'unpause' the hist trigger we just
-  set up on netif_receive_skb if and only if it sees a
-  sched_process_exec event with a filename of '/usr/bin/wget'.  When
-  that happens, all netif_receive_skb events are aggregated into a
-  hash table keyed on stacktrace::
-
-    # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-  The aggregation continues until the netif_receive_skb is paused
-  again, which is what the following disable_hist event does by
-  creating a similar setup on the sched_process_exit event, using the
-  filter 'comm==wget'::
-
-    # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  Whenever a process exits and the comm field of the disable_hist
-  trigger filter matches 'comm==wget', the netif_receive_skb hist
-  trigger is disabled.
-
-  The overall effect is that netif_receive_skb events are aggregated
-  into the hash table for only the duration of the wget.  Executing a
-  wget command and then listing the 'hist' file will display the
-  output generated by the wget command::
-
-    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
-
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_receive+0xc8/0x100
-         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
-         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
-         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
-         ieee80211_rx+0x31d/0x900 [mac80211]
-         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
-         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
-         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-         ret_from_fork+0x42/0x70
-    } hitcount:         85  len:      28884
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_complete+0xa4/0xe0
-         dev_gro_receive+0x23a/0x360
-         napi_gro_receive+0x30/0x100
-         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
-         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
-         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
-         ieee80211_rx+0x31d/0x900 [mac80211]
-         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
-         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
-         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-    } hitcount:         98  len:     664329
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         process_backlog+0xa8/0x150
-         net_rx_action+0x15d/0x340
-         __do_softirq+0x114/0x2c0
-         do_softirq_own_stack+0x1c/0x30
-         do_softirq+0x65/0x70
-         __local_bh_enable_ip+0xb5/0xc0
-         ip_finish_output+0x1f4/0x840
-         ip_output+0x6b/0xc0
-         ip_local_out_sk+0x31/0x40
-         ip_send_skb+0x1a/0x50
-         udp_send_skb+0x173/0x2a0
-         udp_sendmsg+0x2bf/0x9f0
-         inet_sendmsg+0x64/0xa0
-         sock_sendmsg+0x3d/0x50
-    } hitcount:        115  len:      13030
-    { stacktrace:
-         __netif_receive_skb_core+0x46d/0x990
-         __netif_receive_skb+0x18/0x60
-         netif_receive_skb_internal+0x23/0x90
-         napi_gro_complete+0xa4/0xe0
-         napi_gro_flush+0x6d/0x90
-         iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi]
-         irq_thread_fn+0x20/0x50
-         irq_thread+0x11f/0x150
-         kthread+0xd2/0xf0
-         ret_from_fork+0x42/0x70
-    } hitcount:        934  len:    5512212
-
-    Totals:
-        Hits: 1232
-        Entries: 4
-        Dropped: 0
-
-  The above shows all the netif_receive_skb callpaths and their total
-  lengths for the duration of the wget command.
-
-  The 'clear' hist trigger param can be used to clear the hash table.
-  Suppose we wanted to try another run of the previous example but
-  this time also wanted to see the complete list of events that went
-  into the histogram.  In order to avoid having to set everything up
-  again, we can just clear the histogram first::
-
-    # echo 'hist:key=stacktrace:vals=len:clear' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  Just to verify that it is in fact cleared, here's what we now see in
-  the hist file::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-  Since we want to see the detailed list of every netif_receive_skb
-  event occurring during the new run, which are in fact the same
-  events being aggregated into the hash table, we add some additional
-  'enable_event' events to the triggering sched_process_exec and
-  sched_process_exit events as such::
-
-    # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-
-    # echo 'disable_event:net:netif_receive_skb if comm==wget' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-
-  If you read the trigger files for the sched_process_exec and
-  sched_process_exit triggers, you should see two triggers for each:
-  one enabling/disabling the hist aggregation and the other
-  enabling/disabling the logging of events::
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
-    enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
-    enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
-    enable_event:net:netif_receive_skb:unlimited if comm==wget
-    disable_hist:net:netif_receive_skb:unlimited if comm==wget
-
-  In other words, whenever either of the sched_process_exec or
-  sched_process_exit events is hit and matches 'wget', it enables or
-  disables both the histogram and the event log, and what you end up
-  with is a hash table and set of events just covering the specified
-  duration.  Run the wget command again::
-
-    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
-
-  Displaying the 'hist' file should show something similar to what you
-  saw in the last run, but this time you should also see the
-  individual events in the trace file::
-
-    # cat /sys/kernel/debug/tracing/trace
-
-    # tracer: nop
-    #
-    # entries-in-buffer/entries-written: 183/1426   #P:4
-    #
-    #                              _-----=> irqs-off
-    #                             / _----=> need-resched
-    #                            | / _---=> hardirq/softirq
-    #                            || / _--=> preempt-depth
-    #                            ||| /     delay
-    #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
-    #              | |       |   ||||       |         |
-                wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60
-                wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60
-             dnsmasq-1382  [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130
-             dnsmasq-1382  [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138
-    ##### CPU 2 buffer started ####
-      irq/29-iwlwifi-559   [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500
-      irq/29-iwlwifi-559   [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948
-      irq/29-iwlwifi-559   [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500
-    ....
-
-
-  The following example demonstrates how multiple hist triggers can be
-  attached to a given event.  This capability can be useful for
-  creating a set of different summaries derived from the same set of
-  events, or for comparing the effects of different filters, among
-  other things.
-  ::
-
-    # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=skbaddr.hex:vals=len' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:keys=len:vals=common_preempt_count' >> \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-
-  The above set of commands create four triggers differing only in
-  their filters, along with a completely different though fairly
-  nonsensical trigger.  Note that in order to append multiple hist
-  triggers to the same file, you should use the '>>' operator to
-  append them ('>' will also add the new hist trigger, but will remove
-  any existing hist triggers beforehand).
-
-  Displaying the contents of the 'hist' file for the event shows the
-  contents of all five histograms::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
-
-    # event histogram
-    #
-    # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active]
-    #
-
-    { len:        176 } hitcount:          1  common_preempt_count:          0
-    { len:        223 } hitcount:          1  common_preempt_count:          0
-    { len:       4854 } hitcount:          1  common_preempt_count:          0
-    { len:        395 } hitcount:          1  common_preempt_count:          0
-    { len:        177 } hitcount:          1  common_preempt_count:          0
-    { len:        446 } hitcount:          1  common_preempt_count:          0
-    { len:       1601 } hitcount:          1  common_preempt_count:          0
-    .
-    .
-    .
-    { len:       1280 } hitcount:         66  common_preempt_count:          0
-    { len:        116 } hitcount:         81  common_preempt_count:         40
-    { len:        708 } hitcount:        112  common_preempt_count:          0
-    { len:         46 } hitcount:        221  common_preempt_count:          0
-    { len:       1264 } hitcount:        458  common_preempt_count:          0
-
-    Totals:
-        Hits: 1428
-        Entries: 147
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff8800baee5e00 } hitcount:          1  len:        130
-    { skbaddr: ffff88005f3d5600 } hitcount:          1  len:       1280
-    { skbaddr: ffff88005f3d4900 } hitcount:          1  len:       1280
-    { skbaddr: ffff88009fed6300 } hitcount:          1  len:        115
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:        115
-    { skbaddr: ffff88008cdb1900 } hitcount:          1  len:         46
-    { skbaddr: ffff880064b5ef00 } hitcount:          1  len:        118
-    { skbaddr: ffff880044e3c700 } hitcount:          1  len:         60
-    { skbaddr: ffff880100065900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d46bd500 } hitcount:          1  len:        116
-    { skbaddr: ffff88005f3d5f00 } hitcount:          1  len:       1280
-    { skbaddr: ffff880100064700 } hitcount:          1  len:        365
-    { skbaddr: ffff8800badb6f00 } hitcount:          1  len:         60
-    .
-    .
-    .
-    { skbaddr: ffff88009fe0be00 } hitcount:         27  len:      24677
-    { skbaddr: ffff88009fe0a400 } hitcount:         27  len:      23052
-    { skbaddr: ffff88009fe0b700 } hitcount:         31  len:      25589
-    { skbaddr: ffff88009fe0b600 } hitcount:         32  len:      27326
-    { skbaddr: ffff88006a462800 } hitcount:         68  len:      71678
-    { skbaddr: ffff88006a463700 } hitcount:         70  len:      72678
-    { skbaddr: ffff88006a462b00 } hitcount:         71  len:      77589
-    { skbaddr: ffff88006a463600 } hitcount:         73  len:      71307
-    { skbaddr: ffff88006a462200 } hitcount:         81  len:      81032
-
-    Totals:
-        Hits: 1451
-        Entries: 318
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active]
-    #
-
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active]
-    #
-
-    { skbaddr: ffff88009fd2c300 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcce00 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcd700 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcda00 } hitcount:          1  len:      21492
-    { skbaddr: ffff8800ae2e2d00 } hitcount:          1  len:       7212
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          1  len:       7212
-    { skbaddr: ffff88006a4df500 } hitcount:          1  len:       4854
-    { skbaddr: ffff88008ce47b00 } hitcount:          1  len:      18636
-    { skbaddr: ffff8800ae2e2200 } hitcount:          1  len:      12924
-    { skbaddr: ffff88005f3e1000 } hitcount:          1  len:       4356
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          2  len:      24420
-    { skbaddr: ffff8800d2bcc200 } hitcount:          2  len:      12996
-
-    Totals:
-        Hits: 14
-        Entries: 12
-        Dropped: 0
-
-
-    # event histogram
-    #
-    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active]
-    #
-
-
-    Totals:
-        Hits: 0
-        Entries: 0
-        Dropped: 0
-
-  Named triggers can be used to have triggers share a common set of
-  histogram data.  This capability is mostly useful for combining the
-  output of events generated by tracepoints contained inside inline
-  functions, but names can be used in a hist trigger on any event.
-  For example, these two triggers when hit will update the same 'len'
-  field in the shared 'foo' histogram data::
-
-    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
-    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
-           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  You can see that they're updating common histogram data by reading
-  each event's hist files at the same time::
-
-    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist;
-      cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-    # event histogram
-    #
-    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
-    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
-    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
-    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
-    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
-    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
-    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
-    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
-    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
-    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
-    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
-    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
-    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
-    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
-    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
-    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
-    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
-    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
-    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
-    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
-    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
-    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
-    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
-    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
-    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
-    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
-    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
-    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
-    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
-    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
-    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
-    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
-    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
-    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
-    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
-
-    Totals:
-        Hits: 81
-        Entries: 42
-        Dropped: 0
-    # event histogram
-    #
-    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
-    #
-
-    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
-    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
-    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
-    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
-    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
-    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
-    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
-    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
-    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
-    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
-    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
-    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
-    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
-    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
-    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
-    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
-    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
-    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
-    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
-    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
-    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
-    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
-    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
-    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
-    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
-    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
-    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
-    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
-    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
-    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
-    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
-    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
-    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
-    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
-    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
-    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
-    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
-    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
-
-    Totals:
-        Hits: 81
-        Entries: 42
-        Dropped: 0
-
-  And here's an example that shows how to combine histogram data from
-  any two events even if they don't share any 'compatible' fields
-  other than 'hitcount' and 'stacktrace'.  These commands create a
-  couple of triggers named 'bar' using those fields::
-
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
-           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
-    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
-          /sys/kernel/debug/tracing/events/net/netif_rx/trigger
-
-  And displaying the output of either shows some interesting if
-  somewhat confusing output::
-
-    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
-    # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
-
-    # event histogram
-    #
-    # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
-    #
-
-    { stacktrace:
-             _do_fork+0x18e/0x330
-             kernel_thread+0x29/0x30
-             kthreadd+0x154/0x1b0
-             ret_from_fork+0x3f/0x70
-    } hitcount:          1
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx_ni+0x20/0x70
-             dev_loopback_xmit+0xaa/0xd0
-             ip_mc_output+0x126/0x240
-             ip_local_out_sk+0x31/0x40
-             igmp_send_report+0x1e9/0x230
-             igmp_timer_expire+0xe9/0x120
-             call_timer_fn+0x39/0xf0
-             run_timer_softirq+0x1e1/0x290
-             __do_softirq+0xfd/0x290
-             irq_exit+0x98/0xb0
-             smp_apic_timer_interrupt+0x4a/0x60
-             apic_timer_interrupt+0x6d/0x80
-             cpuidle_enter+0x17/0x20
-             call_cpuidle+0x3b/0x60
-             cpu_startup_entry+0x22d/0x310
-    } hitcount:          1
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx_ni+0x20/0x70
-             dev_loopback_xmit+0xaa/0xd0
-             ip_mc_output+0x17f/0x240
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x13e/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             SYSC_sendto+0xef/0x170
-             SyS_sendto+0xe/0x10
-             entry_SYSCALL_64_fastpath+0x12/0x6a
-    } hitcount:          2
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             ___sys_sendmsg+0x14e/0x270
-    } hitcount:         76
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             ___sys_sendmsg+0x269/0x270
-    } hitcount:         77
-    { stacktrace:
-             netif_rx_internal+0xb2/0xd0
-             netif_rx+0x1c/0x60
-             loopback_xmit+0x6c/0xb0
-             dev_hard_start_xmit+0x219/0x3a0
-             __dev_queue_xmit+0x415/0x4f0
-             dev_queue_xmit_sk+0x13/0x20
-             ip_finish_output2+0x237/0x340
-             ip_finish_output+0x113/0x1d0
-             ip_output+0x66/0xc0
-             ip_local_out_sk+0x31/0x40
-             ip_send_skb+0x1a/0x50
-             udp_send_skb+0x16d/0x270
-             udp_sendmsg+0x2bf/0x980
-             inet_sendmsg+0x67/0xa0
-             sock_sendmsg+0x38/0x50
-             SYSC_sendto+0xef/0x170
-    } hitcount:         88
-    { stacktrace:
-             _do_fork+0x18e/0x330
-             SyS_clone+0x19/0x20
-             entry_SYSCALL_64_fastpath+0x12/0x6a
-    } hitcount:        244
-
-    Totals:
-        Hits: 489
-        Entries: 7
-        Dropped: 0
+  See Documentation/trace/histogram.txt for details and examples.
diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index fdf5fb54a04c..e45f0786f3f9 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -543,6 +543,30 @@ of ftrace. Here is a list of some of the key files:
 
 	See events.txt for more information.
 
+  timestamp_mode:
+
+	Certain tracers may change the timestamp mode used when
+	logging trace events into the event buffer.  Events with
+	different modes can coexist within a buffer but the mode in
+	effect when an event is logged determines which timestamp mode
+	is used for that event.  The default timestamp mode is
+	'delta'.
+
+	Usual timestamp modes for tracing:
+
+	  # cat timestamp_mode
+	  [delta] absolute
+
+	  The timestamp mode with the square brackets around it is the
+	  one in effect.
+
+	  delta: Default timestamp mode - timestamp is a delta against
+	         a per-buffer timestamp.
+
+	  absolute: The timestamp is a full timestamp, not a delta
+                 against some other value.  As such it takes up more
+                 space and is less efficient.
+
   hwlat_detector:
 
 	Directory for the Hardware Latency Detector.
diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt
new file mode 100644
index 000000000000..6e05510afc28
--- /dev/null
+++ b/Documentation/trace/histogram.txt
@@ -0,0 +1,1995 @@
+			     Event Histograms
+
+		    Documentation written by Tom Zanussi
+
+1. Introduction
+===============
+
+  Histogram triggers are special event triggers that can be used to
+  aggregate trace event data into histograms.  For information on
+  trace events and event triggers, see Documentation/trace/events.txt.
+
+
+2. Histogram Trigger Command
+============================
+
+  A histogram trigger command is an event trigger command that
+  aggregates event hits into a hash table keyed on one or more trace
+  event format fields (or stacktrace) and a set of running totals
+  derived from one or more trace event format fields and/or event
+  counts (hitcount).
+
+  The format of a hist trigger is as follows:
+
+        hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
+          [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
+          [:clear][:name=histname1] [if <filter>]
+
+  When a matching event is hit, an entry is added to a hash table
+  using the key(s) and value(s) named.  Keys and values correspond to
+  fields in the event's format description.  Values must correspond to
+  numeric fields - on an event hit, the value(s) will be added to a
+  sum kept for that field.  The special string 'hitcount' can be used
+  in place of an explicit value field - this is simply a count of
+  event hits.  If 'values' isn't specified, an implicit 'hitcount'
+  value will be automatically created and used as the only value.
+  Keys can be any field, or the special string 'stacktrace', which
+  will use the event's kernel stacktrace as the key.  The keywords
+  'keys' or 'key' can be used to specify keys, and the keywords
+  'values', 'vals', or 'val' can be used to specify values.  Compound
+  keys consisting of up to two fields can be specified by the 'keys'
+  keyword.  Hashing a compound key produces a unique entry in the
+  table for each unique combination of component keys, and can be
+  useful for providing more fine-grained summaries of event data.
+  Additionally, sort keys consisting of up to two fields can be
+  specified by the 'sort' keyword.  If more than one field is
+  specified, the result will be a 'sort within a sort': the first key
+  is taken to be the primary sort key and the second the secondary
+  key.  If a hist trigger is given a name using the 'name' parameter,
+  its histogram data will be shared with other triggers of the same
+  name, and trigger hits will update this common data.  Only triggers
+  with 'compatible' fields can be combined in this way; triggers are
+  'compatible' if the fields named in the trigger share the same
+  number and type of fields and those fields also have the same names.
+  Note that any two events always share the compatible 'hitcount' and
+  'stacktrace' fields and can therefore be combined using those
+  fields, however pointless that may be.
+
+  'hist' triggers add a 'hist' file to each event's subdirectory.
+  Reading the 'hist' file for the event will dump the hash table in
+  its entirety to stdout.  If there are multiple hist triggers
+  attached to an event, there will be a table for each trigger in the
+  output.  The table displayed for a named trigger will be the same as
+  any other instance having the same name. Each printed hash table
+  entry is a simple list of the keys and values comprising the entry;
+  keys are printed first and are delineated by curly braces, and are
+  followed by the set of value fields for the entry.  By default,
+  numeric fields are displayed as base-10 integers.  This can be
+  modified by appending any of the following modifiers to the field
+  name:
+
+        .hex        display a number as a hex value
+	.sym        display an address as a symbol
+	.sym-offset display an address as a symbol and offset
+	.syscall    display a syscall id as a system call name
+	.execname   display a common_pid as a program name
+	.log2       display log2 value rather than raw number
+	.usecs      display a common_timestamp in microseconds
+
+  Note that in general the semantics of a given field aren't
+  interpreted when applying a modifier to it, but there are some
+  restrictions to be aware of in this regard:
+
+    - only the 'hex' modifier can be used for values (because values
+      are essentially sums, and the other modifiers don't make sense
+      in that context).
+    - the 'execname' modifier can only be used on a 'common_pid'.  The
+      reason for this is that the execname is simply the 'comm' value
+      saved for the 'current' process when an event was triggered,
+      which is the same as the common_pid value saved by the event
+      tracing code.  Trying to apply that comm value to other pid
+      values wouldn't be correct, and typically events that care save
+      pid-specific comm fields in the event itself.
+
+  A typical usage scenario would be the following to enable a hist
+  trigger, read its current contents, and then turn it off:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len' > \
+    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+  # echo '!hist:keys=skbaddr.hex:vals=len' > \
+    /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  The trigger file itself can be read to show the details of the
+  currently attached hist trigger.  This information is also displayed
+  at the top of the 'hist' file when read.
+
+  By default, the size of the hash table is 2048 entries.  The 'size'
+  parameter can be used to specify more or fewer than that.  The units
+  are in terms of hashtable entries - if a run uses more entries than
+  specified, the results will show the number of 'drops', the number
+  of hits that were ignored.  The size should be a power of 2 between
+  128 and 131072 (any non- power-of-2 number specified will be rounded
+  up).
+
+  The 'sort' parameter can be used to specify a value field to sort
+  on.  The default if unspecified is 'hitcount' and the default sort
+  order is 'ascending'.  To sort in the opposite direction, append
+  .descending' to the sort key.
+
+  The 'pause' parameter can be used to pause an existing hist trigger
+  or to start a hist trigger but not log any events until told to do
+  so.  'continue' or 'cont' can be used to start or restart a paused
+  hist trigger.
+
+  The 'clear' parameter will clear the contents of a running hist
+  trigger and leave its current paused/active state.
+
+  Note that the 'pause', 'cont', and 'clear' parameters should be
+  applied using 'append' shell operator ('>>') if applied to an
+  existing trigger, rather than via the '>' operator, which will cause
+  the trigger to be removed through truncation.
+
+- enable_hist/disable_hist
+
+  The enable_hist and disable_hist triggers can be used to have one
+  event conditionally start and stop another event's already-attached
+  hist trigger.  Any number of enable_hist and disable_hist triggers
+  can be attached to a given event, allowing that event to kick off
+  and stop aggregations on a host of other events.
+
+  The format is very similar to the enable/disable_event triggers:
+
+      enable_hist:<system>:<event>[:count]
+      disable_hist:<system>:<event>[:count]
+
+  Instead of enabling or disabling the tracing of the target event
+  into the trace buffer as the enable/disable_event triggers do, the
+  enable/disable_hist triggers enable or disable the aggregation of
+  the target event into a hash table.
+
+  A typical usage scenario for the enable_hist/disable_hist triggers
+  would be to first set up a paused hist trigger on some event,
+  followed by an enable_hist/disable_hist pair that turns the hist
+  aggregation on and off when conditions of interest are hit:
+
+  # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \
+    /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+    /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+  # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+    /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  The above sets up an initially paused hist trigger which is unpaused
+  and starts aggregating events when a given program is executed, and
+  which stops aggregating when the process exits and the hist trigger
+  is paused again.
+
+  The examples below provide a more concrete illustration of the
+  concepts and typical usage patterns discussed above.
+
+  'special' event fields
+  ------------------------
+
+  There are a number of 'special event fields' available for use as
+  keys or values in a hist trigger.  These look like and behave as if
+  they were actual event fields, but aren't really part of the event's
+  field definition or format file.  They are however available for any
+  event, and can be used anywhere an actual event field could be.
+  They are:
+
+    common_timestamp       u64 - timestamp (from ring buffer) associated
+                                 with the event, in nanoseconds.  May be
+				 modified by .usecs to have timestamps
+				 interpreted as microseconds.
+    cpu                    int - the cpu on which the event occurred.
+
+  Extended error information
+  --------------------------
+
+  For some error conditions encountered when invoking a hist trigger
+  command, extended error information is available via the
+  corresponding event's 'hist' file.  Reading the hist file after an
+  error will display more detailed information about what went wrong,
+  if information is available.  This extended error information will
+  be available until the next hist trigger command for that event.
+
+  If available for a given error condition, the extended error
+  information and usage takes the following form:
+
+    # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger
+    echo: write error: Invalid argument
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist
+    ERROR: Couldn't yyy: zzz
+      Last command: xxx
+
+6.2 'hist' trigger examples
+---------------------------
+
+  The first set of examples creates aggregations using the kmalloc
+  event.  The fields that can be used for the hist trigger are listed
+  in the kmalloc event's format file:
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format
+    name: kmalloc
+    ID: 374
+    format:
+	field:unsigned short common_type;	offset:0;	size:2;	signed:0;
+	field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
+	field:unsigned char common_preempt_count;		offset:3;	size:1;	signed:0;
+	field:int common_pid;					offset:4;	size:4;	signed:1;
+
+	field:unsigned long call_site;				offset:8;	size:8;	signed:0;
+	field:const void * ptr;					offset:16;	size:8;	signed:0;
+	field:size_t bytes_req;					offset:24;	size:8;	signed:0;
+	field:size_t bytes_alloc;				offset:32;	size:8;	signed:0;
+	field:gfp_t gfp_flags;					offset:40;	size:4;	signed:0;
+
+  We'll start by creating a hist trigger that generates a simple table
+  that lists the total number of bytes requested for each function in
+  the kernel that made one or more calls to kmalloc:
+
+    # echo 'hist:key=call_site:val=bytes_req' > \
+            /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  This tells the tracing system to create a 'hist' trigger using the
+  call_site field of the kmalloc event as the key for the table, which
+  just means that each unique call_site address will have an entry
+  created for it in the table.  The 'val=bytes_req' parameter tells
+  the hist trigger that for each unique entry (call_site) in the
+  table, it should keep a running total of the number of bytes
+  requested by that call_site.
+
+  We'll let it run for awhile and then dump the contents of the 'hist'
+  file in the kmalloc event's subdirectory (for readability, a number
+  of entries have been omitted):
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: 18446744072106379007 } hitcount:          1  bytes_req:        176
+    { call_site: 18446744071579557049 } hitcount:          1  bytes_req:       1024
+    { call_site: 18446744071580608289 } hitcount:          1  bytes_req:      16384
+    { call_site: 18446744071581827654 } hitcount:          1  bytes_req:         24
+    { call_site: 18446744071580700980 } hitcount:          1  bytes_req:          8
+    { call_site: 18446744071579359876 } hitcount:          1  bytes_req:        152
+    { call_site: 18446744071580795365 } hitcount:          3  bytes_req:        144
+    { call_site: 18446744071581303129 } hitcount:          3  bytes_req:        144
+    { call_site: 18446744071580713234 } hitcount:          4  bytes_req:       2560
+    { call_site: 18446744071580933750 } hitcount:          4  bytes_req:        736
+    .
+    .
+    .
+    { call_site: 18446744072106047046 } hitcount:         69  bytes_req:       5576
+    { call_site: 18446744071582116407 } hitcount:         73  bytes_req:       2336
+    { call_site: 18446744072106054684 } hitcount:        136  bytes_req:     140504
+    { call_site: 18446744072106224230 } hitcount:        136  bytes_req:      19584
+    { call_site: 18446744072106078074 } hitcount:        153  bytes_req:       2448
+    { call_site: 18446744072106062406 } hitcount:        153  bytes_req:      36720
+    { call_site: 18446744071582507929 } hitcount:        153  bytes_req:      37088
+    { call_site: 18446744072102520590 } hitcount:        273  bytes_req:      10920
+    { call_site: 18446744071582143559 } hitcount:        358  bytes_req:        716
+    { call_site: 18446744072106465852 } hitcount:        417  bytes_req:      56712
+    { call_site: 18446744072102523378 } hitcount:        485  bytes_req:      27160
+    { call_site: 18446744072099568646 } hitcount:       1676  bytes_req:      33520
+
+    Totals:
+        Hits: 4610
+        Entries: 45
+        Dropped: 0
+
+  The output displays a line for each entry, beginning with the key
+  specified in the trigger, followed by the value(s) also specified in
+  the trigger.  At the beginning of the output is a line that displays
+  the trigger info, which can also be displayed by reading the
+  'trigger' file:
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+    hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+  At the end of the output are a few lines that display the overall
+  totals for the run.  The 'Hits' field shows the total number of
+  times the event trigger was hit, the 'Entries' field shows the total
+  number of used entries in the hash table, and the 'Dropped' field
+  shows the number of hits that were dropped because the number of
+  used entries for the run exceeded the maximum number of entries
+  allowed for the table (normally 0, but if not a hint that you may
+  want to increase the size of the table using the 'size' parameter).
+
+  Notice in the above output that there's an extra field, 'hitcount',
+  which wasn't specified in the trigger.  Also notice that in the
+  trigger info output, there's a parameter, 'sort=hitcount', which
+  wasn't specified in the trigger either.  The reason for that is that
+  every trigger implicitly keeps a count of the total number of hits
+  attributed to a given entry, called the 'hitcount'.  That hitcount
+  information is explicitly displayed in the output, and in the
+  absence of a user-specified sort parameter, is used as the default
+  sort field.
+
+  The value 'hitcount' can be used in place of an explicit value in
+  the 'values' parameter if you don't really need to have any
+  particular field summed and are mainly interested in hit
+  frequencies.
+
+  To turn the hist trigger off, simply call up the trigger in the
+  command history and re-execute it with a '!' prepended:
+
+    # echo '!hist:key=call_site:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  Finally, notice that the call_site as displayed in the output above
+  isn't really very useful.  It's an address, but normally addresses
+  are displayed in hex.  To have a numeric field displayed as a hex
+  value, simply append '.hex' to the field name in the trigger:
+
+    # echo 'hist:key=call_site.hex:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: ffffffffa026b291 } hitcount:          1  bytes_req:        433
+    { call_site: ffffffffa07186ff } hitcount:          1  bytes_req:        176
+    { call_site: ffffffff811ae721 } hitcount:          1  bytes_req:      16384
+    { call_site: ffffffff811c5134 } hitcount:          1  bytes_req:          8
+    { call_site: ffffffffa04a9ebb } hitcount:          1  bytes_req:        511
+    { call_site: ffffffff8122e0a6 } hitcount:          1  bytes_req:         12
+    { call_site: ffffffff8107da84 } hitcount:          1  bytes_req:        152
+    { call_site: ffffffff812d8246 } hitcount:          1  bytes_req:         24
+    { call_site: ffffffff811dc1e5 } hitcount:          3  bytes_req:        144
+    { call_site: ffffffffa02515e8 } hitcount:          3  bytes_req:        648
+    { call_site: ffffffff81258159 } hitcount:          3  bytes_req:        144
+    { call_site: ffffffff811c80f4 } hitcount:          4  bytes_req:        544
+    .
+    .
+    .
+    { call_site: ffffffffa06c7646 } hitcount:        106  bytes_req:       8024
+    { call_site: ffffffffa06cb246 } hitcount:        132  bytes_req:      31680
+    { call_site: ffffffffa06cef7a } hitcount:        132  bytes_req:       2112
+    { call_site: ffffffff8137e399 } hitcount:        132  bytes_req:      23232
+    { call_site: ffffffffa06c941c } hitcount:        185  bytes_req:     171360
+    { call_site: ffffffffa06f2a66 } hitcount:        185  bytes_req:      26640
+    { call_site: ffffffffa036a70e } hitcount:        265  bytes_req:      10600
+    { call_site: ffffffff81325447 } hitcount:        292  bytes_req:        584
+    { call_site: ffffffffa072da3c } hitcount:        446  bytes_req:      60656
+    { call_site: ffffffffa036b1f2 } hitcount:        526  bytes_req:      29456
+    { call_site: ffffffffa0099c06 } hitcount:       1780  bytes_req:      35600
+
+    Totals:
+        Hits: 4775
+        Entries: 46
+        Dropped: 0
+
+  Even that's only marginally more useful - while hex values do look
+  more like addresses, what users are typically more interested in
+  when looking at text addresses are the corresponding symbols
+  instead.  To have an address displayed as symbolic value instead,
+  simply append '.sym' or '.sym-offset' to the field name in the
+  trigger:
+
+    # echo 'hist:key=call_site.sym:val=bytes_req' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active]
+
+    { call_site: [ffffffff810adcb9] syslog_print_all                              } hitcount:          1  bytes_req:       1024
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffff8154acbe] usb_alloc_urb                                 } hitcount:          1  bytes_req:        192
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff811febd5] fsnotify_alloc_group                          } hitcount:          2  bytes_req:        528
+    { call_site: [ffffffff81440f58] __tty_buffer_request_room                     } hitcount:          2  bytes_req:       2624
+    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          2  bytes_req:         96
+    { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211]      } hitcount:          2  bytes_req:        464
+    { call_site: [ffffffff81672406] tcp_get_metrics                               } hitcount:          2  bytes_req:        304
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff81089b05] sched_create_group                            } hitcount:          2  bytes_req:       1424
+    .
+    .
+    .
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1185  bytes_req:     123240
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:       1185  bytes_req:     104280
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       1402  bytes_req:     190672
+    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       1518  bytes_req:     146208
+    { call_site: [ffffffffa029070e] drm_vma_node_allow [drm]                      } hitcount:       1746  bytes_req:      69840
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       2021  bytes_req:     792312
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       2592  bytes_req:     145152
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2629  bytes_req:     378576
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2629  bytes_req:    3783248
+    { call_site: [ffffffff81325607] apparmor_file_alloc_security                  } hitcount:       5192  bytes_req:      10384
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       5529  bytes_req:     110584
+    { call_site: [ffffffff8131ebf7] aa_alloc_task_context                         } hitcount:      21943  bytes_req:     702176
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:      55759  bytes_req:    5074265
+
+    Totals:
+        Hits: 109928
+        Entries: 71
+        Dropped: 0
+
+  Because the default sort key above is 'hitcount', the above shows a
+  the list of call_sites by increasing hitcount, so that at the bottom
+  we see the functions that made the most kmalloc calls during the
+  run.  If instead we we wanted to see the top kmalloc callers in
+  terms of the number of bytes requested rather than the number of
+  calls, and we wanted the top caller to appear at the top, we can use
+  the 'sort' parameter, along with the 'descending' modifier:
+
+    # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       2186  bytes_req:    3397464
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1790  bytes_req:     712176
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       8132  bytes_req:     513135
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        106  bytes_req:     440128
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       2186  bytes_req:     314784
+    { call_site: [ffffffff812891ca] ext4_find_extent                              } hitcount:       2174  bytes_req:     208992
+    { call_site: [ffffffff811ae8e1] __kmalloc                                     } hitcount:          8  bytes_req:     131072
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:        859  bytes_req:     116824
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       1834  bytes_req:     102704
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:        972  bytes_req:     101088
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm]                } hitcount:        972  bytes_req:      85536
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       3333  bytes_req:      66664
+    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        209  bytes_req:      61632
+    .
+    .
+    .
+    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffff812d8406] copy_semundo                                  } hitcount:          2  bytes_req:         48
+    { call_site: [ffffffff81200ba6] inotify_new_group                             } hitcount:          1  bytes_req:         48
+    { call_site: [ffffffffa027121a] drm_getmagic [drm]                            } hitcount:          1  bytes_req:         48
+    { call_site: [ffffffff811e3a25] __seq_open_private                            } hitcount:          1  bytes_req:         40
+    { call_site: [ffffffff811c52f4] bprm_change_interp                            } hitcount:          2  bytes_req:         16
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7
+
+    Totals:
+        Hits: 32133
+        Entries: 81
+        Dropped: 0
+
+  To display the offset and size information in addition to the symbol
+  name, just use 'sym-offset' instead:
+
+    # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915]                  } hitcount:       4569  bytes_req:    3163720
+    { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915]                      } hitcount:       4569  bytes_req:     657936
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915]      } hitcount:       1519  bytes_req:     472936
+    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915]      } hitcount:       3050  bytes_req:     211832
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50                                 } hitcount:         34  bytes_req:     148384
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915]                  } hitcount:       1385  bytes_req:     144040
+    { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0                                   } hitcount:          8  bytes_req:     131072
+    { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm]              } hitcount:       1385  bytes_req:     121880
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm]                  } hitcount:       1848  bytes_req:     103488
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915]            } hitcount:        461  bytes_req:      62696
+    { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm]                      } hitcount:       1541  bytes_req:      61640
+    { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0                                } hitcount:         57  bytes_req:      57456
+    .
+    .
+    .
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0                       } hitcount:          2  bytes_req:        128
+    { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm]                      } hitcount:          3  bytes_req:         96
+    { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0                         } hitcount:          8  bytes_req:         96
+    { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650                            } hitcount:          3  bytes_req:         84
+    { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110                              } hitcount:          1  bytes_req:          8
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid]                     } hitcount:          1  bytes_req:          7
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid]                    } hitcount:          1  bytes_req:          7
+
+    Totals:
+        Hits: 26098
+        Entries: 64
+        Dropped: 0
+
+  We can also add multiple fields to the 'values' parameter.  For
+  example, we might want to see the total number of bytes allocated
+  alongside bytes requested, and display the result sorted by bytes
+  allocated in a descending order:
+
+    # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active]
+
+    { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915]                   } hitcount:       7403  bytes_req:    4084360  bytes_alloc:    5958016
+    { call_site: [ffffffff811e2a1b] seq_buf_alloc                                 } hitcount:        541  bytes_req:    2213968  bytes_alloc:    2228224
+    { call_site: [ffffffffa0489a66] intel_ring_begin [i915]                       } hitcount:       7404  bytes_req:    1066176  bytes_alloc:    1421568
+    { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       1565  bytes_req:     557368  bytes_alloc:    1037760
+    { call_site: [ffffffff8125847d] ext4_htree_store_dirent                       } hitcount:       9557  bytes_req:     595778  bytes_alloc:     695744
+    { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915]         } hitcount:       5839  bytes_req:     430680  bytes_alloc:     470400
+    { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915]            } hitcount:       2388  bytes_req:     324768  bytes_alloc:     458496
+    { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm]                   } hitcount:       3911  bytes_req:     219016  bytes_alloc:     250304
+    { call_site: [ffffffff815f8d7b] sk_prot_alloc                                 } hitcount:        235  bytes_req:     236880  bytes_alloc:     240640
+    { call_site: [ffffffff8137e559] sg_kmalloc                                    } hitcount:        557  bytes_req:     169024  bytes_alloc:     221760
+    { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid]                    } hitcount:       9378  bytes_req:     187548  bytes_alloc:     206312
+    { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915]                   } hitcount:       1519  bytes_req:     157976  bytes_alloc:     194432
+    .
+    .
+    .
+    { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach                 } hitcount:          2  bytes_req:        144  bytes_alloc:        192
+    { call_site: [ffffffff81097ee8] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff8109524a] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81095225] alloc_fair_sched_group                        } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81097ec2] alloc_rt_sched_group                          } hitcount:          2  bytes_req:        128  bytes_alloc:        128
+    { call_site: [ffffffff81213e80] load_elf_binary                               } hitcount:          3  bytes_req:         84  bytes_alloc:         96
+    { call_site: [ffffffff81079a2e] kthread_create_on_node                        } hitcount:          1  bytes_req:         56  bytes_alloc:         64
+    { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid]                      } hitcount:          1  bytes_req:          7  bytes_alloc:          8
+    { call_site: [ffffffff8154bc62] usb_control_msg                               } hitcount:          1  bytes_req:          8  bytes_alloc:          8
+    { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid]                     } hitcount:          1  bytes_req:          7  bytes_alloc:          8
+
+    Totals:
+        Hits: 66598
+        Entries: 65
+        Dropped: 0
+
+  Finally, to finish off our kmalloc example, instead of simply having
+  the hist trigger display symbolic call_sites, we can have the hist
+  trigger additionally display the complete set of kernel stack traces
+  that led to each call_site.  To do that, we simply use the special
+  value 'stacktrace' for the key parameter:
+
+    # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \
+           /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+  The above trigger will use the kernel stack trace in effect when an
+  event is triggered as the key for the hash table.  This allows the
+  enumeration of every kernel callpath that led up to a particular
+  event, along with a running total of any of the event fields for
+  that event.  Here we tally bytes requested and bytes allocated for
+  every callpath in the system that led up to a kmalloc (in this case
+  every callpath to a kmalloc for a kernel compile):
+
+    # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist
+    # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active]
+
+    { stacktrace:
+         __kmalloc_track_caller+0x10b/0x1a0
+         kmemdup+0x20/0x50
+         hidraw_report_event+0x8a/0x120 [hid]
+         hid_report_raw_event+0x3ea/0x440 [hid]
+         hid_input_report+0x112/0x190 [hid]
+         hid_irq_in+0xc2/0x260 [usbhid]
+         __usb_hcd_giveback_urb+0x72/0x120
+         usb_giveback_urb_bh+0x9e/0xe0
+         tasklet_hi_action+0xf8/0x100
+         __do_softirq+0x114/0x2c0
+         irq_exit+0xa5/0xb0
+         do_IRQ+0x5a/0xf0
+         ret_from_intr+0x0/0x30
+         cpuidle_enter+0x17/0x20
+         cpu_startup_entry+0x315/0x3e0
+         rest_init+0x7c/0x80
+    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
+    { stacktrace:
+         __kmalloc_track_caller+0x10b/0x1a0
+         kmemdup+0x20/0x50
+         hidraw_report_event+0x8a/0x120 [hid]
+         hid_report_raw_event+0x3ea/0x440 [hid]
+         hid_input_report+0x112/0x190 [hid]
+         hid_irq_in+0xc2/0x260 [usbhid]
+         __usb_hcd_giveback_urb+0x72/0x120
+         usb_giveback_urb_bh+0x9e/0xe0
+         tasklet_hi_action+0xf8/0x100
+         __do_softirq+0x114/0x2c0
+         irq_exit+0xa5/0xb0
+         do_IRQ+0x5a/0xf0
+         ret_from_intr+0x0/0x30
+    } hitcount:          3  bytes_req:         21  bytes_alloc:         24
+    { stacktrace:
+         kmem_cache_alloc_trace+0xeb/0x150
+         aa_alloc_task_context+0x27/0x40
+         apparmor_cred_prepare+0x1f/0x50
+         security_prepare_creds+0x16/0x20
+         prepare_creds+0xdf/0x1a0
+         SyS_capset+0xb5/0x200
+         system_call_fastpath+0x12/0x6a
+    } hitcount:          1  bytes_req:         32  bytes_alloc:         32
+    .
+    .
+    .
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         i915_gem_execbuffer2+0x6c/0x2c0 [i915]
+         drm_ioctl+0x349/0x670 [drm]
+         do_vfs_ioctl+0x2f0/0x4f0
+         SyS_ioctl+0x81/0xa0
+         system_call_fastpath+0x12/0x6a
+    } hitcount:      17726  bytes_req:   13944120  bytes_alloc:   19593808
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         load_elf_phdrs+0x76/0xa0
+         load_elf_binary+0x102/0x1650
+         search_binary_handler+0x97/0x1d0
+         do_execveat_common.isra.34+0x551/0x6e0
+         SyS_execve+0x3a/0x50
+         return_from_execve+0x0/0x23
+    } hitcount:      33348  bytes_req:   17152128  bytes_alloc:   20226048
+    { stacktrace:
+         kmem_cache_alloc_trace+0xeb/0x150
+         apparmor_file_alloc_security+0x27/0x40
+         security_file_alloc+0x16/0x20
+         get_empty_filp+0x93/0x1c0
+         path_openat+0x31/0x5f0
+         do_filp_open+0x3a/0x90
+         do_sys_open+0x128/0x220
+         SyS_open+0x1e/0x20
+         system_call_fastpath+0x12/0x6a
+    } hitcount:    4766422  bytes_req:    9532844  bytes_alloc:   38131376
+    { stacktrace:
+         __kmalloc+0x11b/0x1b0
+         seq_buf_alloc+0x1b/0x50
+         seq_read+0x2cc/0x370
+         proc_reg_read+0x3d/0x80
+         __vfs_read+0x28/0xe0
+         vfs_read+0x86/0x140
+         SyS_read+0x46/0xb0
+         system_call_fastpath+0x12/0x6a
+    } hitcount:      19133  bytes_req:   78368768  bytes_alloc:   78368768
+
+    Totals:
+        Hits: 6085872
+        Entries: 253
+        Dropped: 0
+
+  If you key a hist trigger on common_pid, in order for example to
+  gather and display sorted totals for each process, you can use the
+  special .execname modifier to display the executable names for the
+  processes in the table rather than raw pids.  The example below
+  keeps a per-process sum of total bytes read:
+
+    # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \
+           /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist
+    # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active]
+
+    { common_pid: gnome-terminal  [      3196] } hitcount:        280  count:    1093512
+    { common_pid: Xorg            [      1309] } hitcount:        525  count:     256640
+    { common_pid: compiz          [      2889] } hitcount:         59  count:     254400
+    { common_pid: bash            [      8710] } hitcount:          3  count:      66369
+    { common_pid: dbus-daemon-lau [      8703] } hitcount:         49  count:      47739
+    { common_pid: irqbalance      [      1252] } hitcount:         27  count:      27648
+    { common_pid: 01ifupdown      [      8705] } hitcount:          3  count:      17216
+    { common_pid: dbus-daemon     [       772] } hitcount:         10  count:      12396
+    { common_pid: Socket Thread   [      8342] } hitcount:         11  count:      11264
+    { common_pid: nm-dhcp-client. [      8701] } hitcount:          6  count:       7424
+    { common_pid: gmain           [      1315] } hitcount:         18  count:       6336
+    .
+    .
+    .
+    { common_pid: postgres        [      1892] } hitcount:          2  count:         32
+    { common_pid: postgres        [      1891] } hitcount:          2  count:         32
+    { common_pid: gmain           [      8704] } hitcount:          2  count:         32
+    { common_pid: upstart-dbus-br [      2740] } hitcount:         21  count:         21
+    { common_pid: nm-dispatcher.a [      8696] } hitcount:          1  count:         16
+    { common_pid: indicator-datet [      2904] } hitcount:          1  count:         16
+    { common_pid: gdbus           [      2998] } hitcount:          1  count:         16
+    { common_pid: rtkit-daemon    [      2052] } hitcount:          1  count:          8
+    { common_pid: init            [         1] } hitcount:          2  count:          2
+
+    Totals:
+        Hits: 2116
+        Entries: 51
+        Dropped: 0
+
+  Similarly, if you key a hist trigger on syscall id, for example to
+  gather and display a list of systemwide syscall hits, you can use
+  the special .syscall modifier to display the syscall names rather
+  than raw ids.  The example below keeps a running total of syscall
+  counts for the system during the run:
+
+    # echo 'hist:key=id.syscall:val=hitcount' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active]
+
+    { id: sys_fsync                     [ 74] } hitcount:          1
+    { id: sys_newuname                  [ 63] } hitcount:          1
+    { id: sys_prctl                     [157] } hitcount:          1
+    { id: sys_statfs                    [137] } hitcount:          1
+    { id: sys_symlink                   [ 88] } hitcount:          1
+    { id: sys_sendmmsg                  [307] } hitcount:          1
+    { id: sys_semctl                    [ 66] } hitcount:          1
+    { id: sys_readlink                  [ 89] } hitcount:          3
+    { id: sys_bind                      [ 49] } hitcount:          3
+    { id: sys_getsockname               [ 51] } hitcount:          3
+    { id: sys_unlink                    [ 87] } hitcount:          3
+    { id: sys_rename                    [ 82] } hitcount:          4
+    { id: unknown_syscall               [ 58] } hitcount:          4
+    { id: sys_connect                   [ 42] } hitcount:          4
+    { id: sys_getpid                    [ 39] } hitcount:          4
+    .
+    .
+    .
+    { id: sys_rt_sigprocmask            [ 14] } hitcount:        952
+    { id: sys_futex                     [202] } hitcount:       1534
+    { id: sys_write                     [  1] } hitcount:       2689
+    { id: sys_setitimer                 [ 38] } hitcount:       2797
+    { id: sys_read                      [  0] } hitcount:       3202
+    { id: sys_select                    [ 23] } hitcount:       3773
+    { id: sys_writev                    [ 20] } hitcount:       4531
+    { id: sys_poll                      [  7] } hitcount:       8314
+    { id: sys_recvmsg                   [ 47] } hitcount:      13738
+    { id: sys_ioctl                     [ 16] } hitcount:      21843
+
+    Totals:
+        Hits: 67612
+        Entries: 72
+        Dropped: 0
+
+    The syscall counts above provide a rough overall picture of system
+    call activity on the system; we can see for example that the most
+    popular system call on this system was the 'sys_ioctl' system call.
+
+    We can use 'compound' keys to refine that number and provide some
+    further insight as to which processes exactly contribute to the
+    overall ioctl count.
+
+    The command below keeps a hitcount for every unique combination of
+    system call id and pid - the end result is essentially a table
+    that keeps a per-pid sum of system call hits.  The results are
+    sorted using the system call id as the primary key, and the
+    hitcount sum as the secondary key:
+
+    # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active]
+
+    { id: sys_read                      [  0], common_pid: rtkit-daemon    [      1877] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: gdbus           [      2976] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: console-kit-dae [      3400] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: postgres        [      1865] } hitcount:          1
+    { id: sys_read                      [  0], common_pid: deja-dup-monito [      3543] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: NetworkManager  [       890] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: evolution-calen [      3048] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: postgres        [      1864] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: nm-applet       [      3022] } hitcount:          2
+    { id: sys_read                      [  0], common_pid: whoopsie        [      1212] } hitcount:          2
+    .
+    .
+    .
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8479] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      3472] } hitcount:         12
+    { id: sys_ioctl                     [ 16], common_pid: gnome-terminal  [      3199] } hitcount:         16
+    { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:       1808
+    { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:       5580
+    .
+    .
+    .
+    { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2690] } hitcount:          3
+    { id: sys_waitid                    [247], common_pid: upstart-dbus-br [      2688] } hitcount:         16
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [       975] } hitcount:          2
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3204] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2888] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3003] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      2873] } hitcount:          4
+    { id: sys_inotify_add_watch         [254], common_pid: gmain           [      3196] } hitcount:          6
+    { id: sys_openat                    [257], common_pid: java            [      2623] } hitcount:          2
+    { id: sys_eventfd2                  [290], common_pid: ibus-ui-gtk3    [      2760] } hitcount:          4
+    { id: sys_eventfd2                  [290], common_pid: compiz          [      2994] } hitcount:          6
+
+    Totals:
+        Hits: 31536
+        Entries: 323
+        Dropped: 0
+
+    The above list does give us a breakdown of the ioctl syscall by
+    pid, but it also gives us quite a bit more than that, which we
+    don't really care about at the moment.  Since we know the syscall
+    id for sys_ioctl (16, displayed next to the sys_ioctl name), we
+    can use that to filter out all the other syscalls:
+
+    # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \
+           /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger
+
+    # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist
+    # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active]
+
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2769] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: evolution-addre [      8571] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      3003] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2781] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2829] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8726] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: bash            [      8508] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2970] } hitcount:          1
+    { id: sys_ioctl                     [ 16], common_pid: gmain           [      2768] } hitcount:          1
+    .
+    .
+    .
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8559] } hitcount:         45
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8555] } hitcount:         48
+    { id: sys_ioctl                     [ 16], common_pid: pool            [      8551] } hitcount:         48
+    { id: sys_ioctl                     [ 16], common_pid: avahi-daemon    [       896] } hitcount:         66
+    { id: sys_ioctl                     [ 16], common_pid: Xorg            [      1267] } hitcount:      26674
+    { id: sys_ioctl                     [ 16], common_pid: compiz          [      2994] } hitcount:      73443
+
+    Totals:
+        Hits: 101162
+        Entries: 103
+        Dropped: 0
+
+    The above output shows that 'compiz' and 'Xorg' are far and away
+    the heaviest ioctl callers (which might lead to questions about
+    whether they really need to be making all those calls and to
+    possible avenues for further investigation.)
+
+    The compound key examples used a key and a sum value (hitcount) to
+    sort the output, but we can just as easily use two keys instead.
+    Here's an example where we use a compound key composed of the the
+    common_pid and size event fields.  Sorting with pid as the primary
+    key and 'size' as the secondary key allows us to display an
+    ordered summary of the recvfrom sizes, with counts, received by
+    each process:
+
+    # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \
+           /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger
+
+    # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist
+    # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active]
+
+    { common_pid: smbd            [       784], size:          4 } hitcount:          1
+    { common_pid: dnsmasq         [      1412], size:       4096 } hitcount:        672
+    { common_pid: postgres        [      1796], size:       1000 } hitcount:          6
+    { common_pid: postgres        [      1867], size:       1000 } hitcount:         10
+    { common_pid: bamfdaemon      [      2787], size:         28 } hitcount:          2
+    { common_pid: bamfdaemon      [      2787], size:      14360 } hitcount:          1
+    { common_pid: compiz          [      2994], size:          8 } hitcount:          1
+    { common_pid: compiz          [      2994], size:         20 } hitcount:         11
+    { common_pid: gnome-terminal  [      3199], size:          4 } hitcount:          2
+    { common_pid: firefox         [      8817], size:          4 } hitcount:          1
+    { common_pid: firefox         [      8817], size:          8 } hitcount:          5
+    { common_pid: firefox         [      8817], size:        588 } hitcount:          2
+    { common_pid: firefox         [      8817], size:        628 } hitcount:          1
+    { common_pid: firefox         [      8817], size:       6944 } hitcount:          1
+    { common_pid: firefox         [      8817], size:     408880 } hitcount:          2
+    { common_pid: firefox         [      8822], size:          8 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        160 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        320 } hitcount:          2
+    { common_pid: firefox         [      8822], size:        352 } hitcount:          1
+    .
+    .
+    .
+    { common_pid: pool            [      8923], size:       1960 } hitcount:         10
+    { common_pid: pool            [      8923], size:       2048 } hitcount:         10
+    { common_pid: pool            [      8924], size:       1960 } hitcount:         10
+    { common_pid: pool            [      8924], size:       2048 } hitcount:         10
+    { common_pid: pool            [      8928], size:       1964 } hitcount:          4
+    { common_pid: pool            [      8928], size:       1965 } hitcount:          2
+    { common_pid: pool            [      8928], size:       2048 } hitcount:          6
+    { common_pid: pool            [      8929], size:       1982 } hitcount:          1
+    { common_pid: pool            [      8929], size:       2048 } hitcount:          1
+
+    Totals:
+        Hits: 2016
+        Entries: 224
+        Dropped: 0
+
+  The above example also illustrates the fact that although a compound
+  key is treated as a single entity for hashing purposes, the sub-keys
+  it's composed of can be accessed independently.
+
+  The next example uses a string field as the hash key and
+  demonstrates how you can manually pause and continue a hist trigger.
+  In this example, we'll aggregate fork counts and don't expect a
+  large number of entries in the hash table, so we'll drop it to a
+  much smaller number, say 256:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: whoopsie                            } hitcount:          1
+    { child_comm: smbd                                } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: postgres                            } hitcount:          2
+    { child_comm: bash                                } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: dhclient                            } hitcount:          4
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: nm-dispatcher.a                     } hitcount:          8
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dbus-daemon                         } hitcount:          8
+    { child_comm: glib-pacrunner                      } hitcount:         10
+    { child_comm: evolution                           } hitcount:         23
+
+    Totals:
+        Hits: 89
+        Entries: 20
+        Dropped: 0
+
+  If we want to pause the hist trigger, we can simply append :pause to
+  the command that started the trigger.  Notice that the trigger info
+  displays as [paused]:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: smbd                                } hitcount:          2
+    { child_comm: bash                                } hitcount:          3
+    { child_comm: whoopsie                            } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: postgres                            } hitcount:          6
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dhclient                            } hitcount:         10
+    { child_comm: emacs                               } hitcount:         12
+    { child_comm: dbus-daemon                         } hitcount:         20
+    { child_comm: nm-dispatcher.a                     } hitcount:         20
+    { child_comm: evolution                           } hitcount:         35
+    { child_comm: glib-pacrunner                      } hitcount:         59
+
+    Totals:
+        Hits: 199
+        Entries: 21
+        Dropped: 0
+
+  To manually continue having the trigger aggregate events, append
+  :cont instead.  Notice that the trigger info displays as [active]
+  again, and the data has changed:
+
+    # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active]
+
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: dconf worker                        } hitcount:          1
+    { child_comm: kthreadd                            } hitcount:          1
+    { child_comm: gdbus                               } hitcount:          1
+    { child_comm: ibus-daemon                         } hitcount:          1
+    { child_comm: Socket Thread                       } hitcount:          2
+    { child_comm: evolution-alarm                     } hitcount:          2
+    { child_comm: smbd                                } hitcount:          2
+    { child_comm: whoopsie                            } hitcount:          3
+    { child_comm: compiz                              } hitcount:          3
+    { child_comm: evolution-sourc                     } hitcount:          4
+    { child_comm: bash                                } hitcount:          5
+    { child_comm: pool                                } hitcount:          5
+    { child_comm: postgres                            } hitcount:          6
+    { child_comm: firefox                             } hitcount:          8
+    { child_comm: dhclient                            } hitcount:         11
+    { child_comm: emacs                               } hitcount:         12
+    { child_comm: dbus-daemon                         } hitcount:         22
+    { child_comm: nm-dispatcher.a                     } hitcount:         22
+    { child_comm: evolution                           } hitcount:         35
+    { child_comm: glib-pacrunner                      } hitcount:         59
+
+    Totals:
+        Hits: 206
+        Entries: 21
+        Dropped: 0
+
+  The previous example showed how to start and stop a hist trigger by
+  appending 'pause' and 'continue' to the hist trigger command.  A
+  hist trigger can also be started in a paused state by initially
+  starting the trigger with ':pause' appended.  This allows you to
+  start the trigger only when you're ready to start collecting data
+  and not before.  For example, you could start the trigger in a
+  paused state, then unpause it and do something you want to measure,
+  then pause the trigger again when done.
+
+  Of course, doing this manually can be difficult and error-prone, but
+  it is possible to automatically start and stop a hist trigger based
+  on some condition, via the enable_hist and disable_hist triggers.
+
+  For example, suppose we wanted to take a look at the relative
+  weights in terms of skb length for each callpath that leads to a
+  netif_receieve_skb event when downloading a decent-sized file using
+  wget.
+
+  First we set up an initially paused stacktrace trigger on the
+  netif_receive_skb event:
+
+    # echo 'hist:key=stacktrace:vals=len:pause' > \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  Next, we set up an 'enable_hist' trigger on the sched_process_exec
+  event, with an 'if filename==/usr/bin/wget' filter.  The effect of
+  this new trigger is that it will 'unpause' the hist trigger we just
+  set up on netif_receive_skb if and only if it sees a
+  sched_process_exec event with a filename of '/usr/bin/wget'.  When
+  that happens, all netif_receive_skb events are aggregated into a
+  hash table keyed on stacktrace:
+
+    # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+  The aggregation continues until the netif_receive_skb is paused
+  again, which is what the following disable_hist event does by
+  creating a similar setup on the sched_process_exit event, using the
+  filter 'comm==wget':
+
+    # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  Whenever a process exits and the comm field of the disable_hist
+  trigger filter matches 'comm==wget', the netif_receive_skb hist
+  trigger is disabled.
+
+  The overall effect is that netif_receive_skb events are aggregated
+  into the hash table for only the duration of the wget.  Executing a
+  wget command and then listing the 'hist' file will display the
+  output generated by the wget command:
+
+    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_receive+0xc8/0x100
+         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+         ieee80211_rx+0x31d/0x900 [mac80211]
+         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+         ret_from_fork+0x42/0x70
+    } hitcount:         85  len:      28884
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_complete+0xa4/0xe0
+         dev_gro_receive+0x23a/0x360
+         napi_gro_receive+0x30/0x100
+         ieee80211_deliver_skb+0xd6/0x270 [mac80211]
+         ieee80211_rx_handlers+0xccf/0x22f0 [mac80211]
+         ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211]
+         ieee80211_rx+0x31d/0x900 [mac80211]
+         iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm]
+         iwl_rx_dispatch+0x8e/0xf0 [iwldvm]
+         iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+    } hitcount:         98  len:     664329
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         process_backlog+0xa8/0x150
+         net_rx_action+0x15d/0x340
+         __do_softirq+0x114/0x2c0
+         do_softirq_own_stack+0x1c/0x30
+         do_softirq+0x65/0x70
+         __local_bh_enable_ip+0xb5/0xc0
+         ip_finish_output+0x1f4/0x840
+         ip_output+0x6b/0xc0
+         ip_local_out_sk+0x31/0x40
+         ip_send_skb+0x1a/0x50
+         udp_send_skb+0x173/0x2a0
+         udp_sendmsg+0x2bf/0x9f0
+         inet_sendmsg+0x64/0xa0
+         sock_sendmsg+0x3d/0x50
+    } hitcount:        115  len:      13030
+    { stacktrace:
+         __netif_receive_skb_core+0x46d/0x990
+         __netif_receive_skb+0x18/0x60
+         netif_receive_skb_internal+0x23/0x90
+         napi_gro_complete+0xa4/0xe0
+         napi_gro_flush+0x6d/0x90
+         iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi]
+         irq_thread_fn+0x20/0x50
+         irq_thread+0x11f/0x150
+         kthread+0xd2/0xf0
+         ret_from_fork+0x42/0x70
+    } hitcount:        934  len:    5512212
+
+    Totals:
+        Hits: 1232
+        Entries: 4
+        Dropped: 0
+
+  The above shows all the netif_receive_skb callpaths and their total
+  lengths for the duration of the wget command.
+
+  The 'clear' hist trigger param can be used to clear the hash table.
+  Suppose we wanted to try another run of the previous example but
+  this time also wanted to see the complete list of events that went
+  into the histogram.  In order to avoid having to set everything up
+  again, we can just clear the histogram first:
+
+    # echo 'hist:key=stacktrace:vals=len:clear' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  Just to verify that it is in fact cleared, here's what we now see in
+  the hist file:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+    # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused]
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+  Since we want to see the detailed list of every netif_receive_skb
+  event occurring during the new run, which are in fact the same
+  events being aggregated into the hash table, we add some additional
+  'enable_event' events to the triggering sched_process_exec and
+  sched_process_exit events as such:
+
+    # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+
+    # echo 'disable_event:net:netif_receive_skb if comm==wget' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+
+  If you read the trigger files for the sched_process_exec and
+  sched_process_exit triggers, you should see two triggers for each:
+  one enabling/disabling the hist aggregation and the other
+  enabling/disabling the logging of events:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger
+    enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+    enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger
+    enable_event:net:netif_receive_skb:unlimited if comm==wget
+    disable_hist:net:netif_receive_skb:unlimited if comm==wget
+
+  In other words, whenever either of the sched_process_exec or
+  sched_process_exit events is hit and matches 'wget', it enables or
+  disables both the histogram and the event log, and what you end up
+  with is a hash table and set of events just covering the specified
+  duration.  Run the wget command again:
+
+    $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz
+
+  Displaying the 'hist' file should show something similar to what you
+  saw in the last run, but this time you should also see the
+  individual events in the trace file:
+
+    # cat /sys/kernel/debug/tracing/trace
+
+    # tracer: nop
+    #
+    # entries-in-buffer/entries-written: 183/1426   #P:4
+    #
+    #                              _-----=> irqs-off
+    #                             / _----=> need-resched
+    #                            | / _---=> hardirq/softirq
+    #                            || / _--=> preempt-depth
+    #                            ||| /     delay
+    #           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
+    #              | |       |   ||||       |         |
+                wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60
+                wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60
+             dnsmasq-1382  [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130
+             dnsmasq-1382  [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138
+    ##### CPU 2 buffer started ####
+      irq/29-iwlwifi-559   [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500
+      irq/29-iwlwifi-559   [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948
+      irq/29-iwlwifi-559   [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500
+    .
+    .
+    .
+
+  The following example demonstrates how multiple hist triggers can be
+  attached to a given event.  This capability can be useful for
+  creating a set of different summaries derived from the same set of
+  events, or for comparing the effects of different filters, among
+  other things.
+
+    # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=skbaddr.hex:vals=len' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:keys=len:vals=common_preempt_count' >> \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+
+  The above set of commands create four triggers differing only in
+  their filters, along with a completely different though fairly
+  nonsensical trigger.  Note that in order to append multiple hist
+  triggers to the same file, you should use the '>>' operator to
+  append them ('>' will also add the new hist trigger, but will remove
+  any existing hist triggers beforehand).
+
+  Displaying the contents of the 'hist' file for the event shows the
+  contents of all five histograms:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist
+
+    # event histogram
+    #
+    # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active]
+    #
+
+    { len:        176 } hitcount:          1  common_preempt_count:          0
+    { len:        223 } hitcount:          1  common_preempt_count:          0
+    { len:       4854 } hitcount:          1  common_preempt_count:          0
+    { len:        395 } hitcount:          1  common_preempt_count:          0
+    { len:        177 } hitcount:          1  common_preempt_count:          0
+    { len:        446 } hitcount:          1  common_preempt_count:          0
+    { len:       1601 } hitcount:          1  common_preempt_count:          0
+    .
+    .
+    .
+    { len:       1280 } hitcount:         66  common_preempt_count:          0
+    { len:        116 } hitcount:         81  common_preempt_count:         40
+    { len:        708 } hitcount:        112  common_preempt_count:          0
+    { len:         46 } hitcount:        221  common_preempt_count:          0
+    { len:       1264 } hitcount:        458  common_preempt_count:          0
+
+    Totals:
+        Hits: 1428
+        Entries: 147
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff8800baee5e00 } hitcount:          1  len:        130
+    { skbaddr: ffff88005f3d5600 } hitcount:          1  len:       1280
+    { skbaddr: ffff88005f3d4900 } hitcount:          1  len:       1280
+    { skbaddr: ffff88009fed6300 } hitcount:          1  len:        115
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:        115
+    { skbaddr: ffff88008cdb1900 } hitcount:          1  len:         46
+    { skbaddr: ffff880064b5ef00 } hitcount:          1  len:        118
+    { skbaddr: ffff880044e3c700 } hitcount:          1  len:         60
+    { skbaddr: ffff880100065900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d46bd500 } hitcount:          1  len:        116
+    { skbaddr: ffff88005f3d5f00 } hitcount:          1  len:       1280
+    { skbaddr: ffff880100064700 } hitcount:          1  len:        365
+    { skbaddr: ffff8800badb6f00 } hitcount:          1  len:         60
+    .
+    .
+    .
+    { skbaddr: ffff88009fe0be00 } hitcount:         27  len:      24677
+    { skbaddr: ffff88009fe0a400 } hitcount:         27  len:      23052
+    { skbaddr: ffff88009fe0b700 } hitcount:         31  len:      25589
+    { skbaddr: ffff88009fe0b600 } hitcount:         32  len:      27326
+    { skbaddr: ffff88006a462800 } hitcount:         68  len:      71678
+    { skbaddr: ffff88006a463700 } hitcount:         70  len:      72678
+    { skbaddr: ffff88006a462b00 } hitcount:         71  len:      77589
+    { skbaddr: ffff88006a463600 } hitcount:         73  len:      71307
+    { skbaddr: ffff88006a462200 } hitcount:         81  len:      81032
+
+    Totals:
+        Hits: 1451
+        Entries: 318
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active]
+    #
+
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active]
+    #
+
+    { skbaddr: ffff88009fd2c300 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcce00 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcd700 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcda00 } hitcount:          1  len:      21492
+    { skbaddr: ffff8800ae2e2d00 } hitcount:          1  len:       7212
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          1  len:       7212
+    { skbaddr: ffff88006a4df500 } hitcount:          1  len:       4854
+    { skbaddr: ffff88008ce47b00 } hitcount:          1  len:      18636
+    { skbaddr: ffff8800ae2e2200 } hitcount:          1  len:      12924
+    { skbaddr: ffff88005f3e1000 } hitcount:          1  len:       4356
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          2  len:      24420
+    { skbaddr: ffff8800d2bcc200 } hitcount:          2  len:      12996
+
+    Totals:
+        Hits: 14
+        Entries: 12
+        Dropped: 0
+
+
+    # event histogram
+    #
+    # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active]
+    #
+
+
+    Totals:
+        Hits: 0
+        Entries: 0
+        Dropped: 0
+
+  Named triggers can be used to have triggers share a common set of
+  histogram data.  This capability is mostly useful for combining the
+  output of events generated by tracepoints contained inside inline
+  functions, but names can be used in a hist trigger on any event.
+  For example, these two triggers when hit will update the same 'len'
+  field in the shared 'foo' histogram data:
+
+    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+           /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger
+    # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \
+           /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  You can see that they're updating common histogram data by reading
+  each event's hist files at the same time:
+
+    # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist;
+      cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+    # event histogram
+    #
+    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
+    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
+    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
+    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
+    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
+    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
+    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
+    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
+    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
+    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
+    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
+    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
+    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
+    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
+    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
+    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
+    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
+    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
+    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
+    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
+    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
+    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
+    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
+    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
+    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
+    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
+    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
+    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
+    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
+    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
+    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
+    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
+    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
+    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
+    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
+
+    Totals:
+        Hits: 81
+        Entries: 42
+        Dropped: 0
+    # event histogram
+    #
+    # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active]
+    #
+
+    { skbaddr: ffff88000ad53500 } hitcount:          1  len:         46
+    { skbaddr: ffff8800af5a1500 } hitcount:          1  len:         76
+    { skbaddr: ffff8800d62a1900 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bccb00 } hitcount:          1  len:        468
+    { skbaddr: ffff8800d3c69900 } hitcount:          1  len:         46
+    { skbaddr: ffff88009ff09100 } hitcount:          1  len:         52
+    { skbaddr: ffff88010f13ab00 } hitcount:          1  len:        168
+    { skbaddr: ffff88006a54f400 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcc500 } hitcount:          1  len:        260
+    { skbaddr: ffff880064505000 } hitcount:          1  len:         46
+    { skbaddr: ffff8800baf24e00 } hitcount:          1  len:         32
+    { skbaddr: ffff88009fe0ad00 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d3edff00 } hitcount:          1  len:         44
+    { skbaddr: ffff88009fe0b400 } hitcount:          1  len:        168
+    { skbaddr: ffff8800a1c55a00 } hitcount:          1  len:         40
+    { skbaddr: ffff8800d2bcd100 } hitcount:          1  len:         40
+    { skbaddr: ffff880064505f00 } hitcount:          1  len:        174
+    { skbaddr: ffff8800a8bff200 } hitcount:          1  len:        160
+    { skbaddr: ffff880044e3cc00 } hitcount:          1  len:         76
+    { skbaddr: ffff8800a8bfe700 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcdc00 } hitcount:          1  len:         32
+    { skbaddr: ffff8800a1f64800 } hitcount:          1  len:         46
+    { skbaddr: ffff8800d2bcde00 } hitcount:          1  len:        988
+    { skbaddr: ffff88006a5dea00 } hitcount:          1  len:         46
+    { skbaddr: ffff88002e37a200 } hitcount:          1  len:         44
+    { skbaddr: ffff8800a1f32c00 } hitcount:          2  len:        676
+    { skbaddr: ffff88000ad52600 } hitcount:          2  len:        107
+    { skbaddr: ffff8800a1f91e00 } hitcount:          2  len:         92
+    { skbaddr: ffff8800af5a0200 } hitcount:          2  len:        142
+    { skbaddr: ffff8800d2bcc600 } hitcount:          2  len:        220
+    { skbaddr: ffff8800ba36f500 } hitcount:          2  len:         92
+    { skbaddr: ffff8800d021f800 } hitcount:          2  len:         92
+    { skbaddr: ffff8800a1f33600 } hitcount:          2  len:        675
+    { skbaddr: ffff8800a8bfff00 } hitcount:          3  len:        138
+    { skbaddr: ffff8800d62a1300 } hitcount:          3  len:        138
+    { skbaddr: ffff88002e37a100 } hitcount:          4  len:        184
+    { skbaddr: ffff880064504400 } hitcount:          4  len:        184
+    { skbaddr: ffff8800a8bfec00 } hitcount:          4  len:        184
+    { skbaddr: ffff88000ad53700 } hitcount:          5  len:        230
+    { skbaddr: ffff8800d2bcdb00 } hitcount:          5  len:        196
+    { skbaddr: ffff8800a1f90000 } hitcount:          6  len:        276
+    { skbaddr: ffff88006a54f900 } hitcount:          6  len:        276
+
+    Totals:
+        Hits: 81
+        Entries: 42
+        Dropped: 0
+
+  And here's an example that shows how to combine histogram data from
+  any two events even if they don't share any 'compatible' fields
+  other than 'hitcount' and 'stacktrace'.  These commands create a
+  couple of triggers named 'bar' using those fields:
+
+    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+           /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger
+    # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \
+          /sys/kernel/debug/tracing/events/net/netif_rx/trigger
+
+  And displaying the output of either shows some interesting if
+  somewhat confusing output:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist
+    # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist
+
+    # event histogram
+    #
+    # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active]
+    #
+
+    { stacktrace:
+             _do_fork+0x18e/0x330
+             kernel_thread+0x29/0x30
+             kthreadd+0x154/0x1b0
+             ret_from_fork+0x3f/0x70
+    } hitcount:          1
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx_ni+0x20/0x70
+             dev_loopback_xmit+0xaa/0xd0
+             ip_mc_output+0x126/0x240
+             ip_local_out_sk+0x31/0x40
+             igmp_send_report+0x1e9/0x230
+             igmp_timer_expire+0xe9/0x120
+             call_timer_fn+0x39/0xf0
+             run_timer_softirq+0x1e1/0x290
+             __do_softirq+0xfd/0x290
+             irq_exit+0x98/0xb0
+             smp_apic_timer_interrupt+0x4a/0x60
+             apic_timer_interrupt+0x6d/0x80
+             cpuidle_enter+0x17/0x20
+             call_cpuidle+0x3b/0x60
+             cpu_startup_entry+0x22d/0x310
+    } hitcount:          1
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx_ni+0x20/0x70
+             dev_loopback_xmit+0xaa/0xd0
+             ip_mc_output+0x17f/0x240
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x13e/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             SYSC_sendto+0xef/0x170
+             SyS_sendto+0xe/0x10
+             entry_SYSCALL_64_fastpath+0x12/0x6a
+    } hitcount:          2
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             ___sys_sendmsg+0x14e/0x270
+    } hitcount:         76
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             ___sys_sendmsg+0x269/0x270
+    } hitcount:         77
+    { stacktrace:
+             netif_rx_internal+0xb2/0xd0
+             netif_rx+0x1c/0x60
+             loopback_xmit+0x6c/0xb0
+             dev_hard_start_xmit+0x219/0x3a0
+             __dev_queue_xmit+0x415/0x4f0
+             dev_queue_xmit_sk+0x13/0x20
+             ip_finish_output2+0x237/0x340
+             ip_finish_output+0x113/0x1d0
+             ip_output+0x66/0xc0
+             ip_local_out_sk+0x31/0x40
+             ip_send_skb+0x1a/0x50
+             udp_send_skb+0x16d/0x270
+             udp_sendmsg+0x2bf/0x980
+             inet_sendmsg+0x67/0xa0
+             sock_sendmsg+0x38/0x50
+             SYSC_sendto+0xef/0x170
+    } hitcount:         88
+    { stacktrace:
+             _do_fork+0x18e/0x330
+             SyS_clone+0x19/0x20
+             entry_SYSCALL_64_fastpath+0x12/0x6a
+    } hitcount:        244
+
+    Totals:
+        Hits: 489
+        Entries: 7
+        Dropped: 0
+
+
+2.2 Inter-event hist triggers
+-----------------------------
+
+Inter-event hist triggers are hist triggers that combine values from
+one or more other events and create a histogram using that data.  Data
+from an inter-event histogram can in turn become the source for
+further combined histograms, thus providing a chain of related
+histograms, which is important for some applications.
+
+The most important example of an inter-event quantity that can be used
+in this manner is latency, which is simply a difference in timestamps
+between two events.  Although latency is the most important
+inter-event quantity, note that because the support is completely
+general across the trace event subsystem, any event field can be used
+in an inter-event quantity.
+
+An example of a histogram that combines data from other histograms
+into a useful chain would be a 'wakeupswitch latency' histogram that
+combines a 'wakeup latency' histogram and a 'switch latency'
+histogram.
+
+Normally, a hist trigger specification consists of a (possibly
+compound) key along with one or more numeric values, which are
+continually updated sums associated with that key.  A histogram
+specification in this case consists of individual key and value
+specifications that refer to trace event fields associated with a
+single event type.
+
+The inter-event hist trigger extension allows fields from multiple
+events to be referenced and combined into a multi-event histogram
+specification.  In support of this overall goal, a few enabling
+features have been added to the hist trigger support:
+
+  - In order to compute an inter-event quantity, a value from one
+    event needs to saved and then referenced from another event.  This
+    requires the introduction of support for histogram 'variables'.
+
+  - The computation of inter-event quantities and their combination
+    require some minimal amount of support for applying simple
+    expressions to variables (+ and -).
+
+  - A histogram consisting of inter-event quantities isn't logically a
+    histogram on either event (so having the 'hist' file for either
+    event host the histogram output doesn't really make sense).  To
+    address the idea that the histogram is associated with a
+    combination of events, support is added allowing the creation of
+    'synthetic' events that are events derived from other events.
+    These synthetic events are full-fledged events just like any other
+    and can be used as such, as for instance to create the
+    'combination' histograms mentioned previously.
+
+  - A set of 'actions' can be associated with histogram entries -
+    these can be used to generate the previously mentioned synthetic
+    events, but can also be used for other purposes, such as for
+    example saving context when a 'max' latency has been hit.
+
+  - Trace events don't have a 'timestamp' associated with them, but
+    there is an implicit timestamp saved along with an event in the
+    underlying ftrace ring buffer.  This timestamp is now exposed as a
+    a synthetic field named 'common_timestamp' which can be used in
+    histograms as if it were any other event field; it isn't an actual
+    field in the trace format but rather is a synthesized value that
+    nonetheless can be used as if it were an actual field.  By default
+    it is in units of nanoseconds; appending '.usecs' to a
+    common_timestamp field changes the units to microseconds.
+
+A note on inter-event timestamps: If common_timestamp is used in a
+histogram, the trace buffer is automatically switched over to using
+absolute timestamps and the "global" trace clock, in order to avoid
+bogus timestamp differences with other clocks that aren't coherent
+across CPUs.  This can be overridden by specifying one of the other
+trace clocks instead, using the "clock=XXX" hist trigger attribute,
+where XXX is any of the clocks listed in the tracing/trace_clock
+pseudo-file.
+
+These features are described in more detail in the following sections.
+
+2.2.1 Histogram Variables
+-------------------------
+
+Variables are simply named locations used for saving and retrieving
+values between matching events.  A 'matching' event is defined as an
+event that has a matching key - if a variable is saved for a histogram
+entry corresponding to that key, any subsequent event with a matching
+key can access that variable.
+
+A variable's value is normally available to any subsequent event until
+it is set to something else by a subsequent event.  The one exception
+to that rule is that any variable used in an expression is essentially
+'read-once' - once it's used by an expression in a subsequent event,
+it's reset to its 'unset' state, which means it can't be used again
+unless it's set again.  This ensures not only that an event doesn't
+use an uninitialized variable in a calculation, but that that variable
+is used only once and not for any unrelated subsequent match.
+
+The basic syntax for saving a variable is to simply prefix a unique
+variable name not corresponding to any keyword along with an '=' sign
+to any event field.
+
+Either keys or values can be saved and retrieved in this way.  This
+creates a variable named 'ts0' for a histogram entry with the key
+'next_pid':
+
+  # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... >> \
+	event/trigger
+
+The ts0 variable can be accessed by any subsequent event having the
+same pid as 'next_pid'.
+
+Variable references are formed by prepending the variable name with
+the '$' sign.  Thus for example, the ts0 variable above would be
+referenced as '$ts0' in expressions.
+
+Because 'vals=' is used, the common_timestamp variable value above
+will also be summed as a normal histogram value would (though for a
+timestamp it makes little sense).
+
+The below shows that a key value can also be saved in the same way:
+
+  # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger
+
+If a variable isn't a key variable or prefixed with 'vals=', the
+associated event field will be saved in a variable but won't be summed
+as a value:
+
+  # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger
+
+Multiple variables can be assigned at the same time.  The below would
+result in both ts0 and b being created as variables, with both
+common_timestamp and field1 additionally being summed as values:
+
+  # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \
+	event/trigger
+
+Note that variable assignments can appear either preceding or
+following their use.  The command below behaves identically to the
+command above:
+
+  # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \
+	event/trigger
+
+Any number of variables not bound to a 'vals=' prefix can also be
+assigned by simply separating them with colons.  Below is the same
+thing but without the values being summed in the histogram:
+
+  # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger
+
+Variables set as above can be referenced and used in expressions on
+another event.
+
+For example, here's how a latency can be calculated:
+
+  # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger
+  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger
+
+In the first line above, the event's timetamp is saved into the
+variable ts0.  In the next line, ts0 is subtracted from the second
+event's timestamp to produce the latency, which is then assigned into
+yet another variable, 'wakeup_lat'.  The hist trigger below in turn
+makes use of the wakeup_lat variable to compute a combined latency
+using the same key and variable from yet another event:
+
+  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger
+
+2.2.2 Synthetic Events
+----------------------
+
+Synthetic events are user-defined events generated from hist trigger
+variables or fields associated with one or more other events.  Their
+purpose is to provide a mechanism for displaying data spanning
+multiple events consistent with the existing and already familiar
+usage for normal events.
+
+To define a synthetic event, the user writes a simple specification
+consisting of the name of the new event along with one or more
+variables and their types, which can be any valid field type,
+separated by semicolons, to the tracing/synthetic_events file.
+
+For instance, the following creates a new event named 'wakeup_latency'
+with 3 fields: lat, pid, and prio.  Each of those fields is simply a
+variable reference to a variable on another event:
+
+  # echo 'wakeup_latency \
+          u64 lat; \
+          pid_t pid; \
+	  int prio' >> \
+	  /sys/kernel/debug/tracing/synthetic_events
+
+Reading the tracing/synthetic_events file lists all the currently
+defined synthetic events, in this case the event defined above:
+
+  # cat /sys/kernel/debug/tracing/synthetic_events
+    wakeup_latency u64 lat; pid_t pid; int prio
+
+An existing synthetic event definition can be removed by prepending
+the command that defined it with a '!':
+
+  # echo '!wakeup_latency u64 lat pid_t pid int prio' >> \
+    /sys/kernel/debug/tracing/synthetic_events
+
+At this point, there isn't yet an actual 'wakeup_latency' event
+instantiated in the event subsytem - for this to happen, a 'hist
+trigger action' needs to be instantiated and bound to actual fields
+and variables defined on other events (see Section 6.3.3 below).
+
+Once that is done, an event instance is created, and a histogram can
+be defined using it:
+
+  # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
+        /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+The new event is created under the tracing/events/synthetic/ directory
+and looks and behaves just like any other event:
+
+  # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency
+        enable  filter  format  hist  id  trigger
+
+Like any other event, once a histogram is enabled for the event, the
+output can be displayed by reading the event's 'hist' file.
+
+2.2.3 Hist trigger 'actions'
+----------------------------
+
+A hist trigger 'action' is a function that's executed whenever a
+histogram entry is added or updated.
+
+The default 'action' if no special function is explicity specified is
+as it always has been, to simply update the set of values associated
+with an entry.  Some applications, however, may want to perform
+additional actions at that point, such as generate another event, or
+compare and save a maximum.
+
+The following additional actions are available.  To specify an action
+for a given event, simply specify the action between colons in the
+hist trigger specification.
+
+  - onmatch(matching.event).<synthetic_event_name>(param list)
+
+    The 'onmatch(matching.event).<synthetic_event_name>(params)' hist
+    trigger action is invoked whenever an event matches and the
+    histogram entry would be added or updated.  It causes the named
+    synthetic event to be generated with the values given in the
+    'param list'.  The result is the generation of a synthetic event
+    that consists of the values contained in those variables at the
+    time the invoking event was hit.
+
+    The 'param list' consists of one or more parameters which may be
+    either variables or fields defined on either the 'matching.event'
+    or the target event.  The variables or fields specified in the
+    param list may be either fully-qualified or unqualified.  If a
+    variable is specified as unqualified, it must be unique between
+    the two events.  A field name used as a param can be unqualified
+    if it refers to the target event, but must be fully qualified if
+    it refers to the matching event.  A fully-qualified name is of the
+    form 'system.event_name.$var_name' or 'system.event_name.field'.
+
+    The 'matching.event' specification is simply the fully qualified
+    event name of the event that matches the target event for the
+    onmatch() functionality, in the form 'system.event_name'.
+
+    Finally, the number and type of variables/fields in the 'param
+    list' must match the number and types of the fields in the
+    synthetic event being generated.
+
+    As an example the below defines a simple synthetic event and uses
+    a variable defined on the sched_wakeup_new event as a parameter
+    when invoking the synthetic event.  Here we define the synthetic
+    event:
+
+    # echo 'wakeup_new_test pid_t pid' >> \
+           /sys/kernel/debug/tracing/synthetic_events
+
+    # cat /sys/kernel/debug/tracing/synthetic_events
+          wakeup_new_test pid_t pid
+
+    The following hist trigger both defines the missing testpid
+    variable and specifies an onmatch() action that generates a
+    wakeup_new_test synthetic event whenever a sched_wakeup_new event
+    occurs, which because of the 'if comm == "cyclictest"' filter only
+    happens when the executable is cyclictest:
+
+    # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\
+            wakeup_new_test($testpid) if comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
+
+    Creating and displaying a histogram based on those events is now
+    just a matter of using the fields and new synthetic event in the
+    tracing/events/synthetic directory, as usual:
+
+    # echo 'hist:keys=pid:sort=pid' >> \
+           /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger
+
+    Running 'cyclictest' should cause wakeup_new events to generate
+    wakeup_new_test synthetic events which should result in histogram
+    output in the wakeup_new_test event's hist file:
+
+    # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/hist
+
+    A more typical usage would be to use two events to calculate a
+    latency.  The following example uses a set of hist triggers to
+    produce a 'wakeup_latency' histogram:
+
+    First, we define a 'wakeup_latency' synthetic event:
+
+    # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \
+            /sys/kernel/debug/tracing/synthetic_events
+
+    Next, we specify that whenever we see a sched_waking event for a
+    cyclictest thread, save the timestamp in a 'ts0' variable:
+
+    # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=common_timestamp.usecs \
+            if comm=="cyclictest"' >> \
+	    /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+    Then, when the corresponding thread is actually scheduled onto the
+    CPU by a sched_switch event, calculate the latency and use that
+    along with another variable and an event field to generate a
+    wakeup_latency synthetic event:
+
+    # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\
+            onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\
+	            $saved_pid,next_prio) if next_comm=="cyclictest"' >> \
+	    /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+    We also need to create a histogram on the wakeup_latency synthetic
+    event in order to aggregate the generated synthetic event data:
+
+    # echo 'hist:keys=pid,prio,lat:sort=pid,lat' >> \
+            /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
+
+    Finally, once we've run cyclictest to actually generate some
+    events, we can see the output by looking at the wakeup_latency
+    synthetic event's hist file:
+
+    # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
+
+  - onmax(var).save(field,..	.)
+
+    The 'onmax(var).save(field,...)' hist trigger action is invoked
+    whenever the value of 'var' associated with a histogram entry
+    exceeds the current maximum contained in that variable.
+
+    The end result is that the trace event fields specified as the
+    onmax.save() params will be saved if 'var' exceeds the current
+    maximum for that hist trigger entry.  This allows context from the
+    event that exhibited the new maximum to be saved for later
+    reference.  When the histogram is displayed, additional fields
+    displaying the saved values will be printed.
+
+    As an example the below defines a couple of hist triggers, one for
+    sched_waking and another for sched_switch, keyed on pid.  Whenever
+    a sched_waking occurs, the timestamp is saved in the entry
+    corresponding to the current pid, and when the scheduler switches
+    back to that pid, the timestamp difference is calculated.  If the
+    resulting latency, stored in wakeup_lat, exceeds the current
+    maximum latency, the values specified in the save() fields are
+    recoreded:
+
+    # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
+            if comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+
+    # echo 'hist:keys=next_pid:\
+            wakeup_lat=common_timestamp.usecs-$ts0:\
+            onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \
+            if next_comm=="cyclictest"' >> \
+            /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+
+    When the histogram is displayed, the max value and the saved
+    values corresponding to the max are displayed following the rest
+    of the fields:
+
+    # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+      { next_pid:       2255 } hitcount:        239
+        common_timestamp-ts0:          0
+        max:         27
+	next_comm: cyclictest
+        prev_pid:          0  prev_prio:        120  prev_comm: swapper/1
+
+      { next_pid:       2256 } hitcount:       2355
+        common_timestamp-ts0: 0
+        max:         49  next_comm: cyclictest
+        prev_pid:          0  prev_prio:        120  prev_comm: swapper/0
+
+      Totals:
+          Hits: 12970
+          Entries: 2
+          Dropped: 0
diff --git a/Documentation/vm/hmm.txt b/Documentation/vm/hmm.txt
index 4d3aac9f4a5d..2d1d6f69e91b 100644
--- a/Documentation/vm/hmm.txt
+++ b/Documentation/vm/hmm.txt
@@ -1,152 +1,160 @@
 Heterogeneous Memory Management (HMM)
 
-Transparently allow any component of a program to use any memory region of said
-program with a device without using device specific memory allocator. This is
-becoming a requirement to simplify the use of advance heterogeneous computing
-where GPU, DSP or FPGA are use to perform various computations.
-
-This document is divided as follow, in the first section i expose the problems
-related to the use of a device specific allocator. The second section i expose
-the hardware limitations that are inherent to many platforms. The third section
-gives an overview of HMM designs. The fourth section explains how CPU page-
-table mirroring works and what is HMM purpose in this context. Fifth section
-deals with how device memory is represented inside the kernel. Finaly the last
-section present the new migration helper that allow to leverage the device DMA
-engine.
-
-
-1) Problems of using device specific memory allocator:
-2) System bus, device memory characteristics
-3) Share address space and migration
+Provide infrastructure and helpers to integrate non-conventional memory (device
+memory like GPU on board memory) into regular kernel path, with the cornerstone
+of this being specialized struct page for such memory (see sections 5 to 7 of
+this document).
+
+HMM also provides optional helpers for SVM (Share Virtual Memory), i.e.,
+allowing a device to transparently access program address coherently with the
+CPU meaning that any valid pointer on the CPU is also a valid pointer for the
+device. This is becoming mandatory to simplify the use of advanced hetero-
+geneous computing where GPU, DSP, or FPGA are used to perform various
+computations on behalf of a process.
+
+This document is divided as follows: in the first section I expose the problems
+related to using device specific memory allocators. In the second section, I
+expose the hardware limitations that are inherent to many platforms. The third
+section gives an overview of the HMM design. The fourth section explains how
+CPU page-table mirroring works and the purpose of HMM in this context. The
+fifth section deals with how device memory is represented inside the kernel.
+Finally, the last section presents a new migration helper that allows lever-
+aging the device DMA engine.
+
+
+1) Problems of using a device specific memory allocator:
+2) I/O bus, device memory characteristics
+3) Shared address space and migration
 4) Address space mirroring implementation and API
 5) Represent and manage device memory from core kernel point of view
-6) Migrate to and from device memory
+6) Migration to and from device memory
 7) Memory cgroup (memcg) and rss accounting
 
 
 -------------------------------------------------------------------------------
 
-1) Problems of using device specific memory allocator:
-
-Device with large amount of on board memory (several giga bytes) like GPU have
-historically manage their memory through dedicated driver specific API. This
-creates a disconnect between memory allocated and managed by device driver and
-regular application memory (private anonymous, share memory or regular file
-back memory). From here on i will refer to this aspect as split address space.
-I use share address space to refer to the opposite situation ie one in which
-any memory region can be use by device transparently.
-
-Split address space because device can only access memory allocated through the
-device specific API. This imply that all memory object in a program are not
-equal from device point of view which complicate large program that rely on a
-wide set of libraries.
-
-Concretly this means that code that wants to leverage device like GPU need to
-copy object between genericly allocated memory (malloc, mmap private/share/)
-and memory allocated through the device driver API (this still end up with an
-mmap but of the device file).
-
-For flat dataset (array, grid, image, ...) this isn't too hard to achieve but
-complex data-set (list, tree, ...) are hard to get right. Duplicating a complex
-data-set need to re-map all the pointer relations between each of its elements.
-This is error prone and program gets harder to debug because of the duplicate
-data-set.
-
-Split address space also means that library can not transparently use data they
-are getting from core program or other library and thus each library might have
-to duplicate its input data-set using specific memory allocator. Large project
-suffer from this and waste resources because of the various memory copy.
-
-Duplicating each library API to accept as input or output memory allocted by
+1) Problems of using a device specific memory allocator:
+
+Devices with a large amount of on board memory (several gigabytes) like GPUs
+have historically managed their memory through dedicated driver specific APIs.
+This creates a disconnect between memory allocated and managed by a device
+driver and regular application memory (private anonymous, shared memory, or
+regular file backed memory). From here on I will refer to this aspect as split
+address space. I use shared address space to refer to the opposite situation:
+i.e., one in which any application memory region can be used by a device
+transparently.
+
+Split address space happens because device can only access memory allocated
+through device specific API. This implies that all memory objects in a program
+are not equal from the device point of view which complicates large programs
+that rely on a wide set of libraries.
+
+Concretely this means that code that wants to leverage devices like GPUs needs
+to copy object between generically allocated memory (malloc, mmap private, mmap
+share) and memory allocated through the device driver API (this still ends up
+with an mmap but of the device file).
+
+For flat data sets (array, grid, image, ...) this isn't too hard to achieve but
+complex data sets (list, tree, ...) are hard to get right. Duplicating a
+complex data set needs to re-map all the pointer relations between each of its
+elements. This is error prone and program gets harder to debug because of the
+duplicate data set and addresses.
+
+Split address space also means that libraries cannot transparently use data
+they are getting from the core program or another library and thus each library
+might have to duplicate its input data set using the device specific memory
+allocator. Large projects suffer from this and waste resources because of the
+various memory copies.
+
+Duplicating each library API to accept as input or output memory allocated by
 each device specific allocator is not a viable option. It would lead to a
-combinatorial explosions in the library entry points.
+combinatorial explosion in the library entry points.
 
-Finaly with the advance of high level language constructs (in C++ but in other
-language too) it is now possible for compiler to leverage GPU or other devices
-without even the programmer knowledge. Some of compiler identified patterns are
-only do-able with a share address. It is as well more reasonable to use a share
-address space for all the other patterns.
+Finally, with the advance of high level language constructs (in C++ but in
+other languages too) it is now possible for the compiler to leverage GPUs and
+other devices without programmer knowledge. Some compiler identified patterns
+are only do-able with a shared address space. It is also more reasonable to use
+a shared address space for all other patterns.
 
 
 -------------------------------------------------------------------------------
 
-2) System bus, device memory characteristics
+2) I/O bus, device memory characteristics
 
-System bus cripple share address due to few limitations. Most system bus only
-allow basic memory access from device to main memory, even cache coherency is
-often optional. Access to device memory from CPU is even more limited, most
-often than not it is not cache coherent.
+I/O buses cripple shared address spaces due to a few limitations. Most I/O
+buses only allow basic memory access from device to main memory; even cache
+coherency is often optional. Access to device memory from CPU is even more
+limited. More often than not, it is not cache coherent.
 
-If we only consider the PCIE bus than device can access main memory (often
-through an IOMMU) and be cache coherent with the CPUs. However it only allows
-a limited set of atomic operation from device on main memory. This is worse
-in the other direction the CPUs can only access a limited range of the device
-memory and can not perform atomic operations on it. Thus device memory can not
-be consider like regular memory from kernel point of view.
+If we only consider the PCIE bus, then a device can access main memory (often
+through an IOMMU) and be cache coherent with the CPUs. However, it only allows
+a limited set of atomic operations from device on main memory. This is worse
+in the other direction: the CPU can only access a limited range of the device
+memory and cannot perform atomic operations on it. Thus device memory cannot
+be considered the same as regular memory from the kernel point of view.
 
 Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
-and 16 lanes). This is 33 times less that fastest GPU memory (1 TBytes/s).
-The final limitation is latency, access to main memory from the device has an
-order of magnitude higher latency than when the device access its own memory.
+and 16 lanes). This is 33 times less than the fastest GPU memory (1 TBytes/s).
+The final limitation is latency. Access to main memory from the device has an
+order of magnitude higher latency than when the device accesses its own memory.
 
-Some platform are developing new system bus or additions/modifications to PCIE
-to address some of those limitations (OpenCAPI, CCIX). They mainly allow two
+Some platforms are developing new I/O buses or additions/modifications to PCIE
+to address some of these limitations (OpenCAPI, CCIX). They mainly allow two-
 way cache coherency between CPU and device and allow all atomic operations the
-architecture supports. Saddly not all platform are following this trends and
-some major architecture are left without hardware solutions to those problems.
+architecture supports. Sadly, not all platforms are following this trend and
+some major architectures are left without hardware solutions to these problems.
 
-So for share address space to make sense not only we must allow device to
-access any memory memory but we must also permit any memory to be migrated to
-device memory while device is using it (blocking CPU access while it happens).
+So for shared address space to make sense, not only must we allow devices to
+access any memory but we must also permit any memory to be migrated to device
+memory while device is using it (blocking CPU access while it happens).
 
 
 -------------------------------------------------------------------------------
 
-3) Share address space and migration
+3) Shared address space and migration
 
 HMM intends to provide two main features. First one is to share the address
-space by duplication the CPU page table into the device page table so same
-address point to same memory and this for any valid main memory address in
+space by duplicating the CPU page table in the device page table so the same
+address points to the same physical memory for any valid main memory address in
 the process address space.
 
-To achieve this, HMM offer a set of helpers to populate the device page table
+To achieve this, HMM offers a set of helpers to populate the device page table
 while keeping track of CPU page table updates. Device page table updates are
-not as easy as CPU page table updates. To update the device page table you must
-allow a buffer (or use a pool of pre-allocated buffer) and write GPU specifics
-commands in it to perform the update (unmap, cache invalidations and flush,
-...). This can not be done through common code for all device. Hence why HMM
-provides helpers to factor out everything that can be while leaving the gory
-details to the device driver.
-
-The second mechanism HMM provide is a new kind of ZONE_DEVICE memory that does
-allow to allocate a struct page for each page of the device memory. Those page
-are special because the CPU can not map them. They however allow to migrate
-main memory to device memory using exhisting migration mechanism and everything
-looks like if page was swap out to disk from CPU point of view. Using a struct
-page gives the easiest and cleanest integration with existing mm mechanisms.
-Again here HMM only provide helpers, first to hotplug new ZONE_DEVICE memory
-for the device memory and second to perform migration. Policy decision of what
-and when to migrate things is left to the device driver.
-
-Note that any CPU access to a device page trigger a page fault and a migration
-back to main memory ie when a page backing an given address A is migrated from
-a main memory page to a device page then any CPU access to address A trigger a
-page fault and initiate a migration back to main memory.
-
-
-With this two features, HMM not only allow a device to mirror a process address
-space and keeps both CPU and device page table synchronize, but also allow to
-leverage device memory by migrating part of data-set that is actively use by a
-device.
+not as easy as CPU page table updates. To update the device page table, you must
+allocate a buffer (or use a pool of pre-allocated buffers) and write GPU
+specific commands in it to perform the update (unmap, cache invalidations, and
+flush, ...). This cannot be done through common code for all devices. Hence
+why HMM provides helpers to factor out everything that can be while leaving the
+hardware specific details to the device driver.
+
+The second mechanism HMM provides is a new kind of ZONE_DEVICE memory that
+allows allocating a struct page for each page of the device memory. Those pages
+are special because the CPU cannot map them. However, they allow migrating
+main memory to device memory using existing migration mechanisms and everything
+looks like a page is swapped out to disk from the CPU point of view. Using a
+struct page gives the easiest and cleanest integration with existing mm mech-
+anisms. Here again, HMM only provides helpers, first to hotplug new ZONE_DEVICE
+memory for the device memory and second to perform migration. Policy decisions
+of what and when to migrate things is left to the device driver.
+
+Note that any CPU access to a device page triggers a page fault and a migration
+back to main memory. For example, when a page backing a given CPU address A is
+migrated from a main memory page to a device page, then any CPU access to
+address A triggers a page fault and initiates a migration back to main memory.
+
+With these two features, HMM not only allows a device to mirror process address
+space and keeping both CPU and device page table synchronized, but also lever-
+ages device memory by migrating the part of the data set that is actively being
+used by the device.
 
 
 -------------------------------------------------------------------------------
 
 4) Address space mirroring implementation and API
 
-Address space mirroring main objective is to allow to duplicate range of CPU
-page table into a device page table and HMM helps keeping both synchronize. A
-device driver that want to mirror a process address space must start with the
+Address space mirroring's main objective is to allow duplication of a range of
+CPU page table into a device page table; HMM helps keep both synchronized. A
+device driver that wants to mirror a process address space must start with the
 registration of an hmm_mirror struct:
 
  int hmm_mirror_register(struct hmm_mirror *mirror,
@@ -154,9 +162,9 @@ registration of an hmm_mirror struct:
  int hmm_mirror_register_locked(struct hmm_mirror *mirror,
                                 struct mm_struct *mm);
 
-The locked variant is to be use when the driver is already holding the mmap_sem
-of the mm in write mode. The mirror struct has a set of callback that are use
-to propagate CPU page table:
+The locked variant is to be used when the driver is already holding mmap_sem
+of the mm in write mode. The mirror struct has a set of callbacks that are used
+to propagate CPU page tables:
 
  struct hmm_mirror_ops {
      /* sync_cpu_device_pagetables() - synchronize page tables
@@ -181,13 +189,13 @@ to propagate CPU page table:
                      unsigned long end);
  };
 
-Device driver must perform update to the range following action (turn range
-read only, or fully unmap, ...). Once driver callback returns the device must
-be done with the update.
+The device driver must perform the update action to the range (mark range
+read only, or fully unmap, ...). The device must be done with the update before
+the driver callback returns.
 
 
-When device driver wants to populate a range of virtual address it can use
-either:
+When the device driver wants to populate a range of virtual addresses, it can
+use either:
  int hmm_vma_get_pfns(struct vm_area_struct *vma,
                       struct hmm_range *range,
                       unsigned long start,
@@ -201,17 +209,19 @@ either:
                    bool write,
                    bool block);
 
-First one (hmm_vma_get_pfns()) will only fetch present CPU page table entry and
-will not trigger a page fault on missing or non present entry. The second one
-do trigger page fault on missing or read only entry if write parameter is true.
-Page fault use the generic mm page fault code path just like a CPU page fault.
+The first one (hmm_vma_get_pfns()) will only fetch present CPU page table
+entries and will not trigger a page fault on missing or non-present entries.
+The second one does trigger a page fault on missing or read-only entry if the
+write parameter is true. Page faults use the generic mm page fault code path
+just like a CPU page fault.
 
-Both function copy CPU page table into their pfns array argument. Each entry in
-that array correspond to an address in the virtual range. HMM provide a set of
-flags to help driver identify special CPU page table entries.
+Both functions copy CPU page table entries into their pfns array argument. Each
+entry in that array corresponds to an address in the virtual range. HMM
+provides a set of flags to help the driver identify special CPU page table
+entries.
 
 Locking with the update() callback is the most important aspect the driver must
-respect in order to keep things properly synchronize. The usage pattern is :
+respect in order to keep things properly synchronized. The usage pattern is:
 
  int driver_populate_range(...)
  {
@@ -233,43 +243,44 @@ respect in order to keep things properly synchronize. The usage pattern is :
       return 0;
  }
 
-The driver->update lock is the same lock that driver takes inside its update()
-callback. That lock must be call before hmm_vma_range_done() to avoid any race
-with a concurrent CPU page table update.
+The driver->update lock is the same lock that the driver takes inside its
+update() callback. That lock must be held before hmm_vma_range_done() to avoid
+any race with a concurrent CPU page table update.
 
-HMM implements all this on top of the mmu_notifier API because we wanted to a
-simpler API and also to be able to perform optimization latter own like doing
-concurrent device update in multi-devices scenario.
+HMM implements all this on top of the mmu_notifier API because we wanted a
+simpler API and also to be able to perform optimizations latter on like doing
+concurrent device updates in multi-devices scenario.
 
-HMM also serve as an impedence missmatch between how CPU page table update are
-done (by CPU write to the page table and TLB flushes) from how device update
-their own page table. Device update is a multi-step process, first appropriate
-commands are write to a buffer, then this buffer is schedule for execution on
-the device. It is only once the device has executed commands in the buffer that
-the update is done. Creating and scheduling update command buffer can happen
-concurrently for multiple devices. Waiting for each device to report commands
-as executed is serialize (there is no point in doing this concurrently).
+HMM also serves as an impedance mismatch between how CPU page table updates
+are done (by CPU write to the page table and TLB flushes) and how devices
+update their own page table. Device updates are a multi-step process. First,
+appropriate commands are written to a buffer, then this buffer is scheduled for
+execution on the device. It is only once the device has executed commands in
+the buffer that the update is done. Creating and scheduling the update command
+buffer can happen concurrently for multiple devices. Waiting for each device to
+report commands as executed is serialized (there is no point in doing this
+concurrently).
 
 
 -------------------------------------------------------------------------------
 
 5) Represent and manage device memory from core kernel point of view
 
-Several differents design were try to support device memory. First one use
-device specific data structure to keep information about migrated memory and
-HMM hooked itself in various place of mm code to handle any access to address
-that were back by device memory. It turns out that this ended up replicating
-most of the fields of struct page and also needed many kernel code path to be
-updated to understand this new kind of memory.
+Several different designs were tried to support device memory. First one used
+a device specific data structure to keep information about migrated memory and
+HMM hooked itself in various places of mm code to handle any access to
+addresses that were backed by device memory. It turns out that this ended up
+replicating most of the fields of struct page and also needed many kernel code
+paths to be updated to understand this new kind of memory.
 
-Thing is most kernel code path never try to access the memory behind a page
-but only care about struct page contents. Because of this HMM switchted to
-directly using struct page for device memory which left most kernel code path
-un-aware of the difference. We only need to make sure that no one ever try to
-map those page from the CPU side.
+Most kernel code paths never try to access the memory behind a page
+but only care about struct page contents. Because of this, HMM switched to
+directly using struct page for device memory which left most kernel code paths
+unaware of the difference. We only need to make sure that no one ever tries to
+map those pages from the CPU side.
 
-HMM provide a set of helpers to register and hotplug device memory as a new
-region needing struct page. This is offer through a very simple API:
+HMM provides a set of helpers to register and hotplug device memory as a new
+region needing a struct page. This is offered through a very simple API:
 
  struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
                                    struct device *device,
@@ -289,18 +300,19 @@ The hmm_devmem_ops is where most of the important things are:
  };
 
 The first callback (free()) happens when the last reference on a device page is
-drop. This means the device page is now free and no longer use by anyone. The
-second callback happens whenever CPU try to access a device page which it can
-not do. This second callback must trigger a migration back to system memory.
+dropped. This means the device page is now free and no longer used by anyone.
+The second callback happens whenever the CPU tries to access a device page
+which it cannot do. This second callback must trigger a migration back to
+system memory.
 
 
 -------------------------------------------------------------------------------
 
-6) Migrate to and from device memory
+6) Migration to and from device memory
 
-Because CPU can not access device memory, migration must use device DMA engine
-to perform copy from and to device memory. For this we need a new migration
-helper:
+Because the CPU cannot access device memory, migration must use the device DMA
+engine to perform copy from and to device memory. For this we need a new
+migration helper:
 
  int migrate_vma(const struct migrate_vma_ops *ops,
                  struct vm_area_struct *vma,
@@ -311,15 +323,15 @@ helper:
                  unsigned long *dst,
                  void *private);
 
-Unlike other migration function it works on a range of virtual address, there
-is two reasons for that. First device DMA copy has a high setup overhead cost
+Unlike other migration functions it works on a range of virtual address, there
+are two reasons for that. First, device DMA copy has a high setup overhead cost
 and thus batching multiple pages is needed as otherwise the migration overhead
-make the whole excersie pointless. The second reason is because driver trigger
-such migration base on range of address the device is actively accessing.
+makes the whole exercise pointless. The second reason is because the
+migration might be for a range of addresses the device is actively accessing.
 
-The migrate_vma_ops struct define two callbacks. First one (alloc_and_copy())
-control destination memory allocation and copy operation. Second one is there
-to allow device driver to perform cleanup operation after migration.
+The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
+controls destination memory allocation and copy operation. Second one is there
+to allow the device driver to perform cleanup operations after migration.
 
  struct migrate_vma_ops {
      void (*alloc_and_copy)(struct vm_area_struct *vma,
@@ -336,19 +348,19 @@ to allow device driver to perform cleanup operation after migration.
                               void *private);
  };
 
-It is important to stress that this migration helpers allow for hole in the
+It is important to stress that these migration helpers allow for holes in the
 virtual address range. Some pages in the range might not be migrated for all
-the usual reasons (page is pin, page is lock, ...). This helper does not fail
-but just skip over those pages.
+the usual reasons (page is pinned, page is locked, ...). This helper does not
+fail but just skips over those pages.
 
-The alloc_and_copy() might as well decide to not migrate all pages in the
-range (for reasons under the callback control). For those the callback just
-have to leave the corresponding dst entry empty.
+The alloc_and_copy() might decide to not migrate all pages in the
+range (for reasons under the callback control). For those, the callback just
+has to leave the corresponding dst entry empty.
 
-Finaly the migration of the struct page might fails (for file back page) for
+Finally, the migration of the struct page might fail (for file backed page) for
 various reasons (failure to freeze reference, or update page cache, ...). If
-that happens then the finalize_and_map() can catch any pages that was not
-migrated. Note those page were still copied to new page and thus we wasted
+that happens, then the finalize_and_map() can catch any pages that were not
+migrated. Note those pages were still copied to a new page and thus we wasted
 bandwidth but this is considered as a rare event and a price that we are
 willing to pay to keep all the code simpler.
 
@@ -358,27 +370,27 @@ willing to pay to keep all the code simpler.
 7) Memory cgroup (memcg) and rss accounting
 
 For now device memory is accounted as any regular page in rss counters (either
-anonymous if device page is use for anonymous, file if device page is use for
-file back page or shmem if device page is use for share memory). This is a
-deliberate choice to keep existing application that might start using device
-memory without knowing about it to keep runing unimpacted.
-
-Drawbacks is that OOM killer might kill an application using a lot of device
-memory and not a lot of regular system memory and thus not freeing much system
-memory. We want to gather more real world experience on how application and
-system react under memory pressure in the presence of device memory before
+anonymous if device page is used for anonymous, file if device page is used for
+file backed page or shmem if device page is used for shared memory). This is a
+deliberate choice to keep existing applications, that might start using device
+memory without knowing about it, running unimpacted.
+
+A drawback is that the OOM killer might kill an application using a lot of
+device memory and not a lot of regular system memory and thus not freeing much
+system memory. We want to gather more real world experience on how applications
+and system react under memory pressure in the presence of device memory before
 deciding to account device memory differently.
 
 
-Same decision was made for memory cgroup. Device memory page are accounted
+Same decision was made for memory cgroup. Device memory pages are accounted
 against same memory cgroup a regular page would be accounted to. This does
 simplify migration to and from device memory. This also means that migration
-back from device memory to regular memory can not fail because it would
+back from device memory to regular memory cannot fail because it would
 go above memory cgroup limit. We might revisit this choice latter on once we
-get more experience in how device memory is use and its impact on memory
+get more experience in how device memory is used and its impact on memory
 resource control.
 
 
-Note that device memory can never be pin nor by device driver nor through GUP
+Note that device memory can never be pinned by device driver nor through GUP
 and thus such memory is always free upon process exit. Or when last reference
-is drop in case of share memory or file back memory.
+is dropped in case of shared memory or file backed memory.
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
index 0478ae2ad44a..496868072e24 100644
--- a/Documentation/vm/page_migration
+++ b/Documentation/vm/page_migration
@@ -90,7 +90,7 @@ Steps:
 
 1. Lock the page to be migrated
 
-2. Insure that writeback is complete.
+2. Ensure that writeback is complete.
 
 3. Lock the new page that we want to move to. It is locked so that accesses to
    this (not yet uptodate) page immediately lock while the move is in progress.
@@ -100,8 +100,8 @@ Steps:
    mapcount is not zero then we do not migrate the page. All user space
    processes that attempt to access the page will now wait on the page lock.
 
-5. The radix tree lock is taken. This will cause all processes trying
-   to access the page via the mapping to block on the radix tree spinlock.
+5. The i_pages lock is taken. This will cause all processes trying
+   to access the page via the mapping to block on the spinlock.
 
 6. The refcount of the page is examined and we back out if references remain
    otherwise we know that we are the only one referencing this page.
@@ -114,12 +114,12 @@ Steps:
 
 9. The radix tree is changed to point to the new page.
 
-10. The reference count of the old page is dropped because the radix tree
+10. The reference count of the old page is dropped because the address space
     reference is gone. A reference to the new page is established because
-    the new page is referenced to by the radix tree.
+    the new page is referenced by the address space.
 
-11. The radix tree lock is dropped. With that lookups in the mapping
-    become possible again. Processes will move from spinning on the tree_lock
+11. The i_pages lock is dropped. With that lookups in the mapping
+    become possible again. Processes will move from spinning on the lock
     to sleeping on the locked new page.
 
 12. The page contents are copied to the new page.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6d296bdce328..b60179d948bb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1232,10 +1232,15 @@ F:	Documentation/devicetree/bindings/i2c/i2c-aspeed.txt
 
 ARM/ASPEED MACHINE SUPPORT
 M:	Joel Stanley <joel@jms.id.au>
-S:	Maintained
+R:	Andrew Jeffery <andrew@aj.id.au>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-aspeed@lists.ozlabs.org (moderated for non-subscribers)
+Q:	https://patchwork.ozlabs.org/project/linux-aspeed/list/
+S:	Supported
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/joel/aspeed.git
 F:	arch/arm/mach-aspeed/
 F:	arch/arm/boot/dts/aspeed-*
-F:	drivers/*/*aspeed*
+N:	aspeed
 
 ARM/ATMEL AT91 Clock Support
 M:	Boris Brezillon <boris.brezillon@bootlin.com>
@@ -1743,7 +1748,7 @@ F:	arch/arm/mach-orion5x/ts78xx-*
 ARM/OXNAS platform support
 M:	Neil Armstrong <narmstrong@baylibre.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	linux-oxnas@lists.tuxfamily.org (moderated for non-subscribers)
+L:	linux-oxnas@groups.io (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-oxnas/
 F:	arch/arm/boot/dts/ox8*.dts*
@@ -4392,7 +4397,7 @@ S:	Maintained
 F:	drivers/staging/fsl-dpaa2/ethsw
 
 DPT_I2O SCSI RAID DRIVER
-M:	Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+M:	Adaptec OEM Raid Solutions <aacraid@microsemi.com>
 L:	linux-scsi@vger.kernel.org
 W:	http://www.adaptec.com/
 S:	Maintained
@@ -6410,6 +6415,7 @@ L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/hmm*
 F:	include/linux/hmm*
+F:	Documentation/vm/hmm.txt
 
 HOST AP DRIVER
 M:	Jouni Malinen <j@w1.fi>
@@ -7344,7 +7350,7 @@ F:	include/linux/ipmi*
 F:	include/uapi/linux/ipmi*
 
 IPS SCSI RAID DRIVER
-M:	Adaptec OEM Raid Solutions <aacraid@adaptec.com>
+M:	Adaptec OEM Raid Solutions <aacraid@microsemi.com>
 L:	linux-scsi@vger.kernel.org
 W:	http://www.adaptec.com/
 S:	Maintained
@@ -8048,6 +8054,14 @@ Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/pmem*
 
+LIBNVDIMM: DEVICETREE BINDINGS
+M:	Oliver O'Halloran <oohall@gmail.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:	Supported
+F:	drivers/nvdimm/of_pmem.c
+F:	Documentation/devicetree/bindings/pmem/pmem-region.txt
+
 LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
 M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-nvdimm@lists.01.org
@@ -8851,6 +8865,15 @@ M:	Sean Wang <sean.wang@mediatek.com>
 S:	Maintained
 F:	drivers/media/rc/mtk-cir.c
 
+MEDIATEK DMA DRIVER
+M:	Sean Wang <sean.wang@mediatek.com>
+L:	dmaengine@vger.kernel.org
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/dma/mtk-*
+F:	drivers/dma/mediatek/
+
 MEDIATEK PMIC LED DRIVER
 M:	Sean Wang <sean.wang@mediatek.com>
 S:	Maintained
@@ -9222,6 +9245,15 @@ S:	Maintained
 F:	drivers/usb/misc/usb251xb.c
 F:	Documentation/devicetree/bindings/usb/usb251xb.txt
 
+MICROSEMI MIPS SOCS
+M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
+L:	linux-mips@linux-mips.org
+S:	Maintained
+F:	arch/mips/generic/board-ocelot.c
+F:	arch/mips/configs/generic/board-ocelot.config
+F:	arch/mips/boot/dts/mscc/
+F:	Documentation/devicetree/bindings/mips/mscc.txt
+
 MICROSEMI SMART ARRAY SMARTPQI DRIVER (smartpqi)
 M:	Don Brace <don.brace@microsemi.com>
 L:	esc.storagedev@microsemi.com
@@ -11736,7 +11768,7 @@ F:	drivers/char/random.c
 
 RAPIDIO SUBSYSTEM
 M:	Matt Porter <mporter@kernel.crashing.org>
-M:	Alexandre Bounine <alexandre.bounine@idt.com>
+M:	Alexandre Bounine <alex.bou9@gmail.com>
 S:	Maintained
 F:	drivers/rapidio/
 
@@ -11810,7 +11842,7 @@ X:	kernel/torture.c
 
 REAL TIME CLOCK (RTC) SUBSYSTEM
 M:	Alessandro Zummo <a.zummo@towertech.it>
-M:	Alexandre Belloni <alexandre.belloni@free-electrons.com>
+M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
 L:	linux-rtc@vger.kernel.org
 Q:	http://patchwork.ozlabs.org/project/rtc-linux/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
@@ -13465,6 +13497,12 @@ S:	Maintained
 F:	drivers/gpio/gpio-dwapb.c
 F:	Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
 
+SYNOPSYS DESIGNWARE AXI DMAC DRIVER
+M:	Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+S:	Maintained
+F:	drivers/dma/dwi-axi-dmac/
+F:	Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
+
 SYNOPSYS DESIGNWARE DMAC DRIVER
 M:	Viresh Kumar <vireshk@kernel.org>
 R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 2dbdf59258d9..f9d4e6b6d4bd 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -32,6 +32,7 @@
 #define MAP_NONBLOCK	0x40000		/* do not block on IO */
 #define MAP_STACK	0x80000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x100000	/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE	0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_SYNC		2		/* synchronous memory sync */
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 74504b154256..869080bedb89 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -318,10 +318,8 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
 extern void flush_kernel_dcache_page(struct page *);
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_user_range(vma,page,addr,len) \
 	flush_dcache_page(page)
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 496667703693..ed8fd0d19a3e 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -22,12 +22,6 @@
 #include <mach/memory.h>
 #endif
 
-/*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
 /* PAGE_OFFSET - the virtual address of the start of the kernel image */
 #define PAGE_OFFSET		UL(CONFIG_PAGE_OFFSET)
 
diff --git a/arch/arm/mach-npcm/npcm7xx.c b/arch/arm/mach-npcm/npcm7xx.c
index 5f7cd88103ef..c5f77d854c4f 100644
--- a/arch/arm/mach-npcm/npcm7xx.c
+++ b/arch/arm/mach-npcm/npcm7xx.c
@@ -17,4 +17,6 @@ static const char *const npcm7xx_dt_match[] = {
 DT_MACHINE_START(NPCM7XX_DT, "NPCM7XX Chip family")
 	.atag_offset	= 0x100,
 	.dt_compat	= npcm7xx_dt_match,
+	.l2c_aux_val	= 0x0,
+	.l2c_aux_mask	= ~0x0,
 MACHINE_END
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ada8eb206a90..8c398fedbbb6 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -466,6 +466,12 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
 void __init dma_contiguous_remap(void)
 {
 	int i;
+
+	if (!dma_mmu_remap_num)
+		return;
+
+	/* call flush_cache_all() since CMA area would be large enough */
+	flush_cache_all();
 	for (i = 0; i < dma_mmu_remap_num; i++) {
 		phys_addr_t start = dma_mmu_remap[i].base;
 		phys_addr_t end = start + dma_mmu_remap[i].size;
@@ -498,7 +504,15 @@ void __init dma_contiguous_remap(void)
 		flush_tlb_kernel_range(__phys_to_virt(start),
 				       __phys_to_virt(end));
 
-		iotable_init(&map, 1);
+		/*
+		 * All the memory in CMA region will be on ZONE_MOVABLE.
+		 * If that zone is considered as highmem, the memory in CMA
+		 * region is also considered as highmem even if it's
+		 * physical address belong to lowmem. In this case,
+		 * re-mapping isn't required.
+		 */
+		if (!is_highmem_idx(ZONE_MOVABLE))
+			iotable_init(&map, 1);
 	}
 }
 
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index eb1de66517d5..f866870db749 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -21,20 +21,20 @@
 #define MIN_GAP (128*1024*1024UL)
 #define MAX_GAP ((TASK_SIZE)/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -180,18 +180,18 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
 	if (current->flags & PF_RANDOMIZE)
 		random_factor = arch_mmap_rnd();
 
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 7dfcec4700fe..0094c6653b06 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -140,10 +140,8 @@ static inline void __flush_icache_all(void)
 	dsb(ish);
 }
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
 /*
  * We don't appear to need to do anything here.  In fact, if we did, we'd
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 50fa96a49792..49d99214f43c 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -29,12 +29,6 @@
 #include <asm/sizes.h>
 
 /*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
-/*
  * Size of the PCI I/O space. This must remain a power of two so that
  * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses.
  */
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index decccffb03ca..842c8a5fcd53 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -38,12 +38,12 @@
 #define MIN_GAP (SZ_128M)
 #define MAX_GAP	(STACK_TOP/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
@@ -62,9 +62,9 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
 
 	/* Values close to RLIM_INFINITY can overflow. */
@@ -83,7 +83,7 @@ static unsigned long mmap_base(unsigned long rnd)
  * This function, called very early during the creation of a new process VM
  * image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
@@ -94,11 +94,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality bit is set, or
 	 * if the expected stack growth is unlimited:
 	 */
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile
index 6f6096ff05a4..6ab942e6c534 100644
--- a/arch/c6x/Makefile
+++ b/arch/c6x/Makefile
@@ -25,6 +25,7 @@ KBUILD_AFLAGS   += -mbig-endian
 LINKFLAGS       += -mbig-endian
 KBUILD_LDFLAGS  += -mbig-endian
 LDFLAGS += -EB
+CHECKFLAGS	+= -D_BIG_ENDIAN
 endif
 
 head-y          := arch/c6x/kernel/head.o
diff --git a/arch/c6x/kernel/asm-offsets.c b/arch/c6x/kernel/asm-offsets.c
index cff57764fcad..0f8fde494875 100644
--- a/arch/c6x/kernel/asm-offsets.c
+++ b/arch/c6x/kernel/asm-offsets.c
@@ -107,7 +107,6 @@ void foo(void)
 	/* These would be unneccessary if we ran asm files
 	 * through the preprocessor.
 	 */
-	DEFINE(KTHREAD_SIZE, THREAD_SIZE);
 	DEFINE(KTHREAD_SHIFT, THREAD_SHIFT);
 	DEFINE(KTHREAD_START_SP, THREAD_START_SP);
 	DEFINE(ENOSYS_, ENOSYS);
diff --git a/arch/c6x/platforms/plldata.c b/arch/c6x/platforms/plldata.c
index e8b6cc6a7b5a..1ef04b5ab93f 100644
--- a/arch/c6x/platforms/plldata.c
+++ b/arch/c6x/platforms/plldata.c
@@ -19,6 +19,7 @@
 
 #include <asm/clock.h>
 #include <asm/setup.h>
+#include <asm/special_insns.h>
 #include <asm/irq.h>
 
 /*
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 114b93488193..5de871eb4a59 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -47,9 +47,10 @@ extern int pci_proc_domain(struct pci_bus *bus);
 
 struct vm_area_struct;
 
-/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
-#define HAVE_PCI_MMAP		1
-#define arch_can_pci_mmap_io()	1
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP			1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE	1
+#define arch_can_pci_mmap_io()		1
 
 extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
 			   size_t count);
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index e53b8532353c..db8b1fa83452 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -33,6 +33,8 @@ extern int mem_init_done;
 #define PAGE_KERNEL		__pgprot(0) /* these mean nothing to non MMU */
 
 #define pgprot_noncached(x)	(x)
+#define pgprot_writecombine	pgprot_noncached
+#define pgprot_device		pgprot_noncached
 
 #define __swp_type(x)		(0)
 #define __swp_offset(x)		(0)
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index ae79e8638d50..161f9758c631 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -151,72 +151,22 @@ void pcibios_set_master(struct pci_dev *dev)
 }
 
 /*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
- *  -- paulus.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
  */
 
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap.  They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
-					       resource_size_t *offset,
-					       enum pci_mmap_state mmap_state)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	unsigned long io_offset = 0;
-	int i, res_bit;
+	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+	resource_size_t ioaddr = pci_resource_start(pdev, bar);
 
 	if (!hose)
-		return NULL;		/* should never happen */
-
-	/* If memory, add on the PCI bridge address offset */
-	if (mmap_state == pci_mmap_mem) {
-#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
-		*offset += hose->pci_mem_offset;
-#endif
-		res_bit = IORESOURCE_MEM;
-	} else {
-		io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-		*offset += io_offset;
-		res_bit = IORESOURCE_IO;
-	}
-
-	/*
-	 * Check that the offset requested corresponds to one of the
-	 * resources of the device.
-	 */
-	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
-		struct resource *rp = &dev->resource[i];
-		int flags = rp->flags;
+		return -EINVAL;		/* should never happen */
 
-		/* treat ROM as memory (should be already) */
-		if (i == PCI_ROM_RESOURCE)
-			flags |= IORESOURCE_MEM;
-
-		/* Active and same type? */
-		if ((flags & res_bit) == 0)
-			continue;
-
-		/* In the range of this resource? */
-		if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
-			continue;
-
-		/* found it! construct the final physical address */
-		if (mmap_state == pci_mmap_io)
-			*offset += hose->io_base_phys - io_offset;
-		return rp;
-	}
+	/* Convert to an offset within this PCI controller */
+	ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
 
-	return NULL;
+	vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+	return 0;
 }
 
 /*
@@ -268,37 +218,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
 	return prot;
 }
 
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture.  The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, int bar, struct vm_area_struct *vma,
-			enum pci_mmap_state mmap_state, int write_combine)
-{
-	resource_size_t offset =
-		((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
-	struct resource *rp;
-	int ret;
-
-	rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
-	if (rp == NULL)
-		return -EINVAL;
-
-	vma->vm_pgoff = offset >> PAGE_SHIFT;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
-			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
-	return ret;
-}
-
 /* This provides legacy IO read access on a bus */
 int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
 {
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 61e9a24297b7..225c95da23ce 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2029,6 +2029,7 @@ config CPU_MIPSR6
 	select CPU_HAS_RIXI
 	select HAVE_ARCH_BITREVERSE
 	select MIPS_ASID_BITS_VARIABLE
+	select MIPS_CRC_SUPPORT
 	select MIPS_SPRAM
 
 config EVA
@@ -2502,6 +2503,9 @@ config MIPS_ASID_BITS
 config MIPS_ASID_BITS_VARIABLE
 	bool
 
+config MIPS_CRC_SUPPORT
+	bool
+
 #
 # - Highmem only makes sense for the 32-bit kernel.
 # - The current highmem code will only work properly on physically indexed
@@ -2850,8 +2854,7 @@ config CRASH_DUMP
 
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded"
-	default "0xffffffff84000000" if 64BIT
-	default "0x84000000" if 32BIT
+	default "0xffffffff84000000"
 	depends on CRASH_DUMP
 	help
 	  This gives the CKSEG0 or KSEG0 address where the kernel is loaded.
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index d1ca839c3981..5e9fce076ab6 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -222,6 +222,8 @@ xpa-cflags-y				:= $(mips-cflags)
 xpa-cflags-$(micromips-ase)		+= -mmicromips -Wa$(comma)-fatal-warnings
 toolchain-xpa				:= $(call cc-option-yn,$(xpa-cflags-y) -mxpa)
 cflags-$(toolchain-xpa)			+= -DTOOLCHAIN_SUPPORTS_XPA
+toolchain-crc				:= $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
+cflags-$(toolchain-crc)			+= -DTOOLCHAIN_SUPPORTS_CRC
 
 #
 # Firmware support
@@ -249,20 +251,12 @@ ifdef CONFIG_PHYSICAL_START
 load-y					= $(CONFIG_PHYSICAL_START)
 endif
 
-entry-noisa-y				= 0x$(shell $(NM) vmlinux 2>/dev/null \
-					| grep "\bkernel_entry\b" | cut -f1 -d \ )
-ifdef CONFIG_CPU_MICROMIPS
-  #
-  # Set the ISA bit, since the kernel_entry symbol in the ELF will have it
-  # clear which would lead to images containing addresses which bootloaders may
-  # jump to as MIPS32 code.
-  #
-  entry-y = $(patsubst %0,%1,$(patsubst %2,%3,$(patsubst %4,%5, \
-              $(patsubst %6,%7,$(patsubst %8,%9,$(patsubst %a,%b, \
-              $(patsubst %c,%d,$(patsubst %e,%f,$(entry-noisa-y)))))))))
-else
-  entry-y = $(entry-noisa-y)
-endif
+# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
+entry-y		= $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
+			| sed -n '/^start address / { \
+				s/^.* //; \
+				s/0x\([0-7].......\)$$/0x00000000\1/; \
+				s/0x\(........\)$$/0xffffffff\1/; p }')
 
 cflags-y			+= -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)		+= arch/mips/pci/
@@ -330,6 +324,7 @@ libs-y			+= arch/mips/math-emu/
 # See arch/mips/Kbuild for content of core part of the kernel
 core-y += arch/mips/
 
+drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
 drivers-$(CONFIG_OPROFILE)	+= arch/mips/oprofile/
 
 # suspend and hibernation support
@@ -473,6 +468,21 @@ define archhelp
 	echo
 	echo '  {micro32,32,64}{r1,r2,r6}{el,}_defconfig <BOARDS="list of boards">'
 	echo
+	echo '  Where BOARDS is some subset of the following:'
+	for board in $(sort $(BOARDS)); do echo "    $${board}"; done
+	echo
+	echo '  Specifically the following generic default configurations are'
+	echo '  supported:'
+	echo
+	$(foreach cfg,$(generic_defconfigs),
+	  printf "  %-24s - Build generic kernel for $(call describe_generic_defconfig,$(cfg))\n" $(cfg);)
+	echo
+	echo '  The following legacy default configurations have been converted to'
+	echo '  generic and can still be used:'
+	echo
+	$(foreach cfg,$(sort $(legacy_defconfigs)),
+	  printf "  %-24s - Build $($(cfg)-y)\n" $(cfg);)
+	echo
 	echo '  Otherwise, the following default configurations are available:'
 endef
 
@@ -507,6 +517,10 @@ endef
 $(eval $(call gen_generic_defconfigs,32 64,r1 r2 r6,eb el))
 $(eval $(call gen_generic_defconfigs,micro32,r2,eb el))
 
+define describe_generic_defconfig
+$(subst 32r,MIPS32 r,$(subst 64r,MIPS64 r,$(subst el, little endian,$(patsubst %_defconfig,%,$(1)))))
+endef
+
 .PHONY: $(generic_defconfigs)
 $(generic_defconfigs):
 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
@@ -543,14 +557,18 @@ generic_defconfig:
 # now that the boards have been converted to use the generic kernel they are
 # wrappers around the generic rules above.
 #
-.PHONY: sead3_defconfig
-sead3_defconfig:
-	$(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs		+= ocelot_defconfig
+ocelot_defconfig-y		:= 32r2el_defconfig BOARDS=ocelot
+
+legacy_defconfigs		+= sead3_defconfig
+sead3_defconfig-y		:= 32r2el_defconfig BOARDS=sead-3
+
+legacy_defconfigs		+= sead3micro_defconfig
+sead3micro_defconfig-y		:= micro32r2el_defconfig BOARDS=sead-3
 
-.PHONY: sead3micro_defconfig
-sead3micro_defconfig:
-	$(Q)$(MAKE) -f $(srctree)/Makefile micro32r2el_defconfig BOARDS=sead-3
+legacy_defconfigs		+= xilfpga_defconfig
+xilfpga_defconfig-y		:= 32r2el_defconfig BOARDS=xilfpga
 
-.PHONY: xilfpga_defconfig
-xilfpga_defconfig:
-	$(Q)$(MAKE) -f $(srctree)/Makefile 32r2el_defconfig BOARDS=xilfpga
+.PHONY: $(legacy_defconfigs)
+$(legacy_defconfigs):
+	$(Q)$(MAKE) -f $(srctree)/Makefile $($@-y)
diff --git a/arch/mips/alchemy/board-gpr.c b/arch/mips/alchemy/board-gpr.c
index 328d697e72b4..4e79dbd54a33 100644
--- a/arch/mips/alchemy/board-gpr.c
+++ b/arch/mips/alchemy/board-gpr.c
@@ -190,7 +190,7 @@ static struct platform_device gpr_mtd_device = {
 /*
  * LEDs
  */
-static struct gpio_led gpr_gpio_leds[] = {
+static const struct gpio_led gpr_gpio_leds[] = {
 	{	/* green */
 		.name			= "gpr:green",
 		.gpio			= 4,
diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c
index 85bb75669b0d..aab55aaf3d62 100644
--- a/arch/mips/alchemy/board-mtx1.c
+++ b/arch/mips/alchemy/board-mtx1.c
@@ -145,7 +145,7 @@ static struct platform_device mtx1_wdt = {
 	.resource = mtx1_wdt_res,
 };
 
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
 	{
 		.name	= "mtx1:green",
 		.gpio = 211,
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index e1675c25d5d4..f09262e0a72f 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -346,7 +346,7 @@ static struct platform_device ar7_udc = {
 /*****************************************************************************
  * LEDs
  ****************************************************************************/
-static struct gpio_led default_leds[] = {
+static const struct gpio_led default_leds[] = {
 	{
 		.name			= "status",
 		.gpio			= 8,
@@ -354,12 +354,12 @@ static struct gpio_led default_leds[] = {
 	},
 };
 
-static struct gpio_led titan_leds[] = {
+static const struct gpio_led titan_leds[] = {
 	{ .name = "status", .gpio = 8, .active_low = 1, },
 	{ .name = "wifi", .gpio = 13, .active_low = 1, },
 };
 
-static struct gpio_led dsl502t_leds[] = {
+static const struct gpio_led dsl502t_leds[] = {
 	{
 		.name			= "status",
 		.gpio			= 9,
@@ -377,7 +377,7 @@ static struct gpio_led dsl502t_leds[] = {
 	},
 };
 
-static struct gpio_led dg834g_leds[] = {
+static const struct gpio_led dg834g_leds[] = {
 	{
 		.name			= "ppp",
 		.gpio			= 6,
@@ -406,7 +406,7 @@ static struct gpio_led dg834g_leds[] = {
 	},
 };
 
-static struct gpio_led fb_sl_leds[] = {
+static const struct gpio_led fb_sl_leds[] = {
 	{
 		.name			= "1",
 		.gpio			= 7,
@@ -433,7 +433,7 @@ static struct gpio_led fb_sl_leds[] = {
 	},
 };
 
-static struct gpio_led fb_fon_leds[] = {
+static const struct gpio_led fb_fon_leds[] = {
 	{
 		.name			= "1",
 		.gpio			= 8,
@@ -459,7 +459,7 @@ static struct gpio_led fb_fon_leds[] = {
 	},
 };
 
-static struct gpio_led gt701_leds[] = {
+static const struct gpio_led gt701_leds[] = {
 	{
 		.name			= "inet:green",
 		.gpio			= 13,
diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c
index 88a8fb2bbc71..88d400d256c4 100644
--- a/arch/mips/bcm47xx/buttons.c
+++ b/arch/mips/bcm47xx/buttons.c
@@ -355,7 +355,7 @@ bcm47xx_buttons_luxul_xwr_600_v1[] = {
 
 static const struct gpio_keys_button
 bcm47xx_buttons_luxul_xwr_1750_v1[] = {
-	BCM47XX_GPIO_KEY(14, BTN_TASK),
+	BCM47XX_GPIO_KEY(14, KEY_RESTART),
 };
 
 /* Microsoft */
diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
index 8307a8a02667..34a7b3fbdfd9 100644
--- a/arch/mips/bcm47xx/leds.c
+++ b/arch/mips/bcm47xx/leds.c
@@ -409,6 +409,12 @@ bcm47xx_leds_luxul_xap_1500_v1[] __initconst = {
 };
 
 static const struct gpio_led
+bcm47xx_leds_luxul_xap1500_v1_extra[] __initconst = {
+	BCM47XX_GPIO_LED(44, "green", "5ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
 bcm47xx_leds_luxul_xbr_4400_v1[] __initconst = {
 	BCM47XX_GPIO_LED(12, "green", "usb", 0, LEDS_GPIO_DEFSTATE_OFF),
 	BCM47XX_GPIO_LED_TRIGGER(15, "green", "status", 0, "timer"),
@@ -435,6 +441,11 @@ bcm47xx_leds_luxul_xwr_1750_v1[] __initconst = {
 	BCM47XX_GPIO_LED(15, "green", "wps", 0, LEDS_GPIO_DEFSTATE_OFF),
 };
 
+static const struct gpio_led
+bcm47xx_leds_luxul_xwr1750_v1_extra[] __initconst = {
+	BCM47XX_GPIO_LED(76, "green", "2ghz", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
 /* Microsoft */
 
 static const struct gpio_led
@@ -528,6 +539,12 @@ static struct gpio_led_platform_data bcm47xx_leds_pdata;
 	bcm47xx_leds_pdata.num_leds = ARRAY_SIZE(dev_leds);		\
 } while (0)
 
+static struct gpio_led_platform_data bcm47xx_leds_pdata_extra __initdata = {};
+#define bcm47xx_set_pdata_extra(dev_leds) do {				\
+	bcm47xx_leds_pdata_extra.leds = dev_leds;			\
+	bcm47xx_leds_pdata_extra.num_leds = ARRAY_SIZE(dev_leds);	\
+} while (0)
+
 void __init bcm47xx_leds_register(void)
 {
 	enum bcm47xx_board board = bcm47xx_board_get();
@@ -705,6 +722,7 @@ void __init bcm47xx_leds_register(void)
 		break;
 	case BCM47XX_BOARD_LUXUL_XAP_1500_V1:
 		bcm47xx_set_pdata(bcm47xx_leds_luxul_xap_1500_v1);
+		bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xap1500_v1_extra);
 		break;
 	case BCM47XX_BOARD_LUXUL_XBR_4400_V1:
 		bcm47xx_set_pdata(bcm47xx_leds_luxul_xbr_4400_v1);
@@ -717,6 +735,7 @@ void __init bcm47xx_leds_register(void)
 		break;
 	case BCM47XX_BOARD_LUXUL_XWR_1750_V1:
 		bcm47xx_set_pdata(bcm47xx_leds_luxul_xwr_1750_v1);
+		bcm47xx_set_pdata_extra(bcm47xx_leds_luxul_xwr1750_v1_extra);
 		break;
 
 	case BCM47XX_BOARD_MICROSOFT_MN700:
@@ -760,4 +779,6 @@ void __init bcm47xx_leds_register(void)
 	}
 
 	gpio_led_register_device(-1, &bcm47xx_leds_pdata);
+	if (bcm47xx_leds_pdata_extra.num_leds)
+		gpio_led_register_device(0, &bcm47xx_leds_pdata_extra);
 }
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index e2c6f131c8eb..1e79cab8e269 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -4,6 +4,7 @@ subdir-y	+= cavium-octeon
 subdir-y	+= img
 subdir-y	+= ingenic
 subdir-y	+= lantiq
+subdir-y	+= mscc
 subdir-y	+= mti
 subdir-y	+= netlogic
 subdir-y	+= ni
diff --git a/arch/mips/boot/dts/brcm/bcm7125.dtsi b/arch/mips/boot/dts/brcm/bcm7125.dtsi
index 2f9ef565e5d0..5bf77b6fcceb 100644
--- a/arch/mips/boot/dts/brcm/bcm7125.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7125.dtsi
@@ -198,6 +198,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
diff --git a/arch/mips/boot/dts/brcm/bcm7346.dtsi b/arch/mips/boot/dts/brcm/bcm7346.dtsi
index 02e426fe6013..2afa0dada575 100644
--- a/arch/mips/boot/dts/brcm/bcm7346.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7346.dtsi
@@ -233,6 +233,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -243,6 +250,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@4067c0 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x4067c0 0x40>;
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x60>;
@@ -483,5 +501,49 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0xa000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7358.dtsi b/arch/mips/boot/dts/brcm/bcm7358.dtsi
index 1089d6ebc841..6375fc77f389 100644
--- a/arch/mips/boot/dts/brcm/bcm7358.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7358.dtsi
@@ -217,6 +217,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4066a8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4066a8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408240 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408240 0x30>;
@@ -362,5 +369,15 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7360.dtsi b/arch/mips/boot/dts/brcm/bcm7360.dtsi
index 4b87ebec407a..a57cacea91cf 100644
--- a/arch/mips/boot/dts/brcm/bcm7360.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7360.dtsi
@@ -209,6 +209,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4066a8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4066a8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -219,6 +226,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@406680 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x406680 0x40>;
+		};
+
 		upg_gio: gpio@406500 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406500 0xa0>;
@@ -402,5 +420,49 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0xa000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7362.dtsi b/arch/mips/boot/dts/brcm/bcm7362.dtsi
index ca657df34b6d..728b9e9f84b8 100644
--- a/arch/mips/boot/dts/brcm/bcm7362.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7362.dtsi
@@ -205,6 +205,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4066a8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4066a8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -215,6 +222,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@406680 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x406680 0x40>;
+		};
+
 		upg_gio: gpio@406500 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406500 0xa0>;
@@ -398,5 +416,49 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@408e80 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x408e80 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0xa000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7420.dtsi b/arch/mips/boot/dts/brcm/bcm7420.dtsi
index d262e11bc3f9..9540c27f12e7 100644
--- a/arch/mips/boot/dts/brcm/bcm7420.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7420.dtsi
@@ -214,6 +214,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
diff --git a/arch/mips/boot/dts/brcm/bcm7425.dtsi b/arch/mips/boot/dts/brcm/bcm7425.dtsi
index e4fb9b6e6dce..410e61ebaf9e 100644
--- a/arch/mips/boot/dts/brcm/bcm7425.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7425.dtsi
@@ -232,6 +232,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -242,6 +249,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@4067c0 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x4067c0 0x40>;
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
@@ -494,5 +512,76 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@409580 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x409580 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0x1a000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
+
+		memory-controller@1 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x10000 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm7435.dtsi b/arch/mips/boot/dts/brcm/bcm7435.dtsi
index 1484e8990e52..8398b7f68bf4 100644
--- a/arch/mips/boot/dts/brcm/bcm7435.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm7435.dtsi
@@ -247,6 +247,13 @@
 			status = "disabled";
 		};
 
+		watchdog: watchdog@4067e8 {
+			clocks = <&upg_clk>;
+			compatible = "brcm,bcm7038-wdt";
+			reg = <0x4067e8 0x14>;
+			status = "disabled";
+		};
+
 		aon_pm_l2_intc: interrupt-controller@408440 {
 			compatible = "brcm,l2-intc";
 			reg = <0x408440 0x30>;
@@ -257,6 +264,17 @@
 			brcm,irq-can-wake;
 		};
 
+		aon_ctrl: syscon@408000 {
+			compatible = "brcm,brcmstb-aon-ctrl";
+			reg = <0x408000 0x100>, <0x408200 0x200>;
+			reg-names = "aon-ctrl", "aon-sram";
+		};
+
+		timers: timer@4067c0 {
+			compatible = "brcm,brcmstb-timers";
+			reg = <0x4067c0 0x40>;
+		};
+
 		upg_gio: gpio@406700 {
 			compatible = "brcm,brcmstb-gpio";
 			reg = <0x406700 0x80>;
@@ -509,5 +527,76 @@
 			interrupt-names = "mspi_done";
 			status = "disabled";
 		};
+
+		waketimer: waketimer@409580 {
+			compatible = "brcm,brcmstb-waketimer";
+			reg = <0x409580 0x14>;
+			interrupts = <0x3>;
+			interrupt-parent = <&aon_pm_l2_intc>;
+			interrupt-names = "timer";
+			clocks = <&upg_clk>;
+			status = "disabled";
+		};
+	};
+
+	memory_controllers {
+		compatible = "simple-bus";
+		ranges = <0x0 0x103b0000 0x1a000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		memory-controller@0 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x0 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
+
+		memory-controller@1 {
+			compatible = "brcm,brcmstb-memc", "simple-bus";
+			ranges = <0x0 0x10000 0xa000>;
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			memc-arb@1000 {
+				compatible = "brcm,brcmstb-memc-arb";
+				reg = <0x1000 0x248>;
+			};
+
+			memc-ddr@2000 {
+				compatible = "brcm,brcmstb-memc-ddr";
+				reg = <0x2000 0x300>;
+			};
+
+			ddr-phy@6000 {
+				compatible = "brcm,brcmstb-ddr-phy";
+				reg = <0x6000 0xc8>;
+			};
+
+			shimphy@8000 {
+				compatible = "brcm,brcmstb-ddr-shimphy";
+				reg = <0x8000 0x13c>;
+			};
+		};
 	};
 };
diff --git a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
index 7f59ea2ded6c..79e9769f7e00 100644
--- a/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97125cbmb.dts
@@ -50,6 +50,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 /* FIXME: USB is wonky; disable it for now */
 &ehci0 {
 	status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
index 9e7d5228f2b7..28370ff77eeb 100644
--- a/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
@@ -59,6 +59,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -114,3 +118,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97358svmb.dts b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
index 708207a0002d..41c1b510c230 100644
--- a/arch/mips/boot/dts/brcm/bcm97358svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97358svmb.dts
@@ -55,6 +55,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -106,3 +110,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97360svmb.dts b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
index 73c6dc9c8c6d..9f6c6c9b7ea7 100644
--- a/arch/mips/boot/dts/brcm/bcm97360svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97360svmb.dts
@@ -50,6 +50,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -109,3 +113,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97362svmb.dts b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
index 37bacfdcf9d9..df8b755c390f 100644
--- a/arch/mips/boot/dts/brcm/bcm97362svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97362svmb.dts
@@ -47,6 +47,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -78,3 +82,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97420c.dts b/arch/mips/boot/dts/brcm/bcm97420c.dts
index f96241e94874..086faeaa384a 100644
--- a/arch/mips/boot/dts/brcm/bcm97420c.dts
+++ b/arch/mips/boot/dts/brcm/bcm97420c.dts
@@ -60,6 +60,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 /* FIXME: MAC driver comes up but cannot attach to PHY */
 &enet0 {
 	status = "disabled";
diff --git a/arch/mips/boot/dts/brcm/bcm97425svmb.dts b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
index ce762c7b2e54..0ed22217bf3a 100644
--- a/arch/mips/boot/dts/brcm/bcm97425svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97425svmb.dts
@@ -61,6 +61,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -144,3 +148,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97435svmb.dts b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
index d4dd31a543fd..2c145a883aef 100644
--- a/arch/mips/boot/dts/brcm/bcm97435svmb.dts
+++ b/arch/mips/boot/dts/brcm/bcm97435svmb.dts
@@ -61,6 +61,10 @@
 	status = "okay";
 };
 
+&watchdog {
+	status = "okay";
+};
+
 &enet0 {
 	status = "okay";
 };
@@ -120,3 +124,7 @@
 &mspi {
 	status = "okay";
 };
+
+&waketimer {
+	status = "okay";
+};
diff --git a/arch/mips/boot/dts/img/boston.dts b/arch/mips/boot/dts/img/boston.dts
index 2cd49b60e030..1bd105428f61 100644
--- a/arch/mips/boot/dts/img/boston.dts
+++ b/arch/mips/boot/dts/img/boston.dts
@@ -157,7 +157,7 @@
 					#address-cells = <1>;
 					#size-cells = <0>;
 
-					rtc@0x68 {
+					rtc@68 {
 						compatible = "st,m41t81s";
 						reg = <0x68>;
 					};
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index a4cc52214dbd..38078594cf97 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -110,22 +110,22 @@
 					reg = <0x0 0x0 0x0 0x800000>;
 				};
 
-				partition@0x800000 {
+				partition@800000 {
 					label = "u-boot";
 					reg = <0x0 0x800000 0x0 0x200000>;
 				};
 
-				partition@0xa00000 {
+				partition@a00000 {
 					label = "u-boot-env";
 					reg = <0x0 0xa00000 0x0 0x200000>;
 				};
 
-				partition@0xc00000 {
+				partition@c00000 {
 					label = "boot";
 					reg = <0x0 0xc00000 0x0 0x4000000>;
 				};
 
-				partition@0x8c00000 {
+				partition@4c00000 {
 					label = "system";
 					reg = <0x0 0x4c00000 0x1 0xfb400000>;
 				};
diff --git a/arch/mips/boot/dts/mscc/Makefile b/arch/mips/boot/dts/mscc/Makefile
new file mode 100644
index 000000000000..c51164537c02
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/Makefile
@@ -0,0 +1,3 @@
+dtb-$(CONFIG_LEGACY_BOARD_OCELOT)	+= ocelot_pcb123.dtb
+
+obj-y				+= $(patsubst %.dtb, %.dtb.o, $(dtb-y))
diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
new file mode 100644
index 000000000000..dd239cab2f9d
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "mscc,ocelot";
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			compatible = "mips,mips24KEc";
+			device_type = "cpu";
+			clocks = <&cpu_clk>;
+			reg = <0>;
+		};
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	cpuintc: interrupt-controller {
+		#address-cells = <0>;
+		#interrupt-cells = <1>;
+		interrupt-controller;
+		compatible = "mti,cpu-interrupt-controller";
+	};
+
+	cpu_clk: cpu-clock {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <500000000>;
+	};
+
+	ahb_clk: ahb-clk {
+		compatible = "fixed-factor-clock";
+		#clock-cells = <0>;
+		clocks = <&cpu_clk>;
+		clock-div = <2>;
+		clock-mult = <1>;
+	};
+
+	ahb@70000000 {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0x70000000 0x2000000>;
+
+		interrupt-parent = <&intc>;
+
+		cpu_ctrl: syscon@0 {
+			compatible = "mscc,ocelot-cpu-syscon", "syscon";
+			reg = <0x0 0x2c>;
+		};
+
+		intc: interrupt-controller@70 {
+			compatible = "mscc,ocelot-icpu-intr";
+			reg = <0x70 0x70>;
+			#interrupt-cells = <1>;
+			interrupt-controller;
+			interrupt-parent = <&cpuintc>;
+			interrupts = <2>;
+		};
+
+		uart0: serial@100000 {
+			pinctrl-0 = <&uart_pins>;
+			pinctrl-names = "default";
+			compatible = "ns16550a";
+			reg = <0x100000 0x20>;
+			interrupts = <6>;
+			clocks = <&ahb_clk>;
+			reg-io-width = <4>;
+			reg-shift = <2>;
+
+			status = "disabled";
+		};
+
+		uart2: serial@100800 {
+			pinctrl-0 = <&uart2_pins>;
+			pinctrl-names = "default";
+			compatible = "ns16550a";
+			reg = <0x100800 0x20>;
+			interrupts = <7>;
+			clocks = <&ahb_clk>;
+			reg-io-width = <4>;
+			reg-shift = <2>;
+
+			status = "disabled";
+		};
+
+		reset@1070008 {
+			compatible = "mscc,ocelot-chip-reset";
+			reg = <0x1070008 0x4>;
+		};
+
+		gpio: pinctrl@1070034 {
+			compatible = "mscc,ocelot-pinctrl";
+			reg = <0x1070034 0x68>;
+			gpio-controller;
+			#gpio-cells = <2>;
+			gpio-ranges = <&gpio 0 0 22>;
+
+			uart_pins: uart-pins {
+				pins = "GPIO_6", "GPIO_7";
+				function = "uart";
+			};
+
+			uart2_pins: uart2-pins {
+				pins = "GPIO_12", "GPIO_13";
+				function = "uart2";
+			};
+		};
+	};
+};
diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
new file mode 100644
index 000000000000..29d6414f8886
--- /dev/null
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/dts-v1/;
+
+#include "ocelot.dtsi"
+
+/ {
+	compatible = "mscc,ocelot-pcb123", "mscc,ocelot";
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x0 0x0e000000>;
+	};
+};
+
+&uart0 {
+	status = "okay";
+};
+
+&uart2 {
+	status = "okay";
+};
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index d99f5242169e..b3aec101a65d 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	parent_irq = irq_of_parse_and_map(ciu_node, 0);
 	if (!parent_irq) {
-		pr_err("ERROR: Couldn't acquire parent_irq for %s\n.",
+		pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
 			ciu_node->name);
 		return -EINVAL;
 	}
@@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	addr = of_get_address(ciu_node, 0, NULL, NULL);
 	if (!addr) {
-		pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name);
+		pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
 		return -EINVAL;
 	}
 	host_data->raw_reg = (u64)phys_to_virt(
@@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	addr = of_get_address(ciu_node, 1, NULL, NULL);
 	if (!addr) {
-		pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name);
+		pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
 		return -EINVAL;
 	}
 	host_data->en_reg = (u64)phys_to_virt(
@@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 
 	r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
 	if (r) {
-		pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.",
+		pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
 			ciu_node->name);
 		return r;
 	}
@@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
 					   &octeon_irq_domain_cib_ops,
 					   host_data);
 	if (!cib_domain) {
-		pr_err("ERROR: Couldn't irq_domain_add_linear()\n.");
+		pr_err("ERROR: Couldn't irq_domain_add_linear()\n");
 		return -ENOMEM;
 	}
 
diff --git a/arch/mips/configs/bmips_stb_defconfig b/arch/mips/configs/bmips_stb_defconfig
index 3cefa6bc01dd..47aecb8750e6 100644
--- a/arch/mips/configs/bmips_stb_defconfig
+++ b/arch/mips/configs/bmips_stb_defconfig
@@ -72,6 +72,7 @@ CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
+CONFIG_SOC_BRCMSTB=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/generic/32r6.config
index ca606e71f4d0..1a5d5ea4ab2b 100644
--- a/arch/mips/configs/generic/32r6.config
+++ b/arch/mips/configs/generic/32r6.config
@@ -1,2 +1,4 @@
 CONFIG_CPU_MIPS32_R6=y
 CONFIG_HIGHMEM=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/generic/64r6.config
index 7cac0339c4d5..5dd8e8503e34 100644
--- a/arch/mips/configs/generic/64r6.config
+++ b/arch/mips/configs/generic/64r6.config
@@ -2,3 +2,5 @@ CONFIG_CPU_MIPS64_R6=y
 CONFIG_64BIT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
+
+CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config
new file mode 100644
index 000000000000..aa815761d85e
--- /dev/null
+++ b/arch/mips/configs/generic/board-ocelot.config
@@ -0,0 +1,35 @@
+# require CONFIG_CPU_MIPS32_R2=y
+
+CONFIG_LEGACY_BOARD_OCELOT=y
+
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_PLATFORM=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_MTD_UBI=y
+
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+
+CONFIG_GPIO_SYSFS=y
+
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_MUX=y
+
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_SPIDEV=y
+
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_OCELOT_RESET=y
+
+CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
new file mode 100644
index 000000000000..e07aca572c2e
--- /dev/null
+++ b/arch/mips/crypto/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for MIPS crypto files..
+#
+
+obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
new file mode 100644
index 000000000000..7d1d2425746f
--- /dev/null
+++ b/arch/mips/crypto/crc32-mips.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
+ *
+ * Module based on arm64/crypto/crc32-arm.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ */
+
+#include <linux/unaligned/access_ok.h>
+#include <linux/cpufeature.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/mipsregs.h>
+
+#include <crypto/internal/hash.h>
+
+enum crc_op_size {
+	b, h, w, d,
+};
+
+enum crc_type {
+	crc32,
+	crc32c,
+};
+
+#ifndef TOOLCHAIN_SUPPORTS_CRC
+#define _ASM_MACRO_CRC32(OP, SZ, TYPE)					  \
+_ASM_MACRO_3R(OP, rt, rs, rt2,						  \
+	".ifnc	\\rt, \\rt2\n\t"					  \
+	".error	\"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \
+	".endif\n\t"							  \
+	_ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) |	  \
+			  ((SZ) <<  6) | ((TYPE) << 8))			  \
+	_ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) |	  \
+			  ((SZ) << 14) | ((TYPE) << 3)))
+_ASM_MACRO_CRC32(crc32b,  0, 0);
+_ASM_MACRO_CRC32(crc32h,  1, 0);
+_ASM_MACRO_CRC32(crc32w,  2, 0);
+_ASM_MACRO_CRC32(crc32d,  3, 0);
+_ASM_MACRO_CRC32(crc32cb, 0, 1);
+_ASM_MACRO_CRC32(crc32ch, 1, 1);
+_ASM_MACRO_CRC32(crc32cw, 2, 1);
+_ASM_MACRO_CRC32(crc32cd, 3, 1);
+#define _ASM_SET_CRC ""
+#else /* !TOOLCHAIN_SUPPORTS_CRC */
+#define _ASM_SET_CRC ".set\tcrc\n\t"
+#endif
+
+#define _CRC32(crc, value, size, type)		\
+do {						\
+	__asm__ __volatile__(			\
+		".set	push\n\t"		\
+		_ASM_SET_CRC			\
+		#type #size "	%0, %1, %0\n\t"	\
+		".set	pop"			\
+		: "+r" (crc)			\
+		: "r" (value));			\
+} while (0)
+
+#define CRC32(crc, value, size) \
+	_CRC32(crc, value, size, crc32)
+
+#define CRC32C(crc, value, size) \
+	_CRC32(crc, value, size, crc32c)
+
+static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		u64 value = get_unaligned_le64(p);
+
+		CRC32(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		u32 value = get_unaligned_le32(p);
+
+		CRC32(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		u16 value = get_unaligned_le16(p);
+
+		CRC32(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		u8 value = *p++;
+
+		CRC32(crc, value, b);
+	}
+
+	return crc;
+}
+
+static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
+{
+	u32 crc = crc_;
+
+#ifdef CONFIG_64BIT
+	while (len >= sizeof(u64)) {
+		u64 value = get_unaligned_le64(p);
+
+		CRC32C(crc, value, d);
+		p += sizeof(u64);
+		len -= sizeof(u64);
+	}
+
+	if (len & sizeof(u32)) {
+#else /* !CONFIG_64BIT */
+	while (len >= sizeof(u32)) {
+#endif
+		u32 value = get_unaligned_le32(p);
+
+		CRC32C(crc, value, w);
+		p += sizeof(u32);
+		len -= sizeof(u32);
+	}
+
+	if (len & sizeof(u16)) {
+		u16 value = get_unaligned_le16(p);
+
+		CRC32C(crc, value, h);
+		p += sizeof(u16);
+	}
+
+	if (len & sizeof(u8)) {
+		u8 value = *p++;
+
+		CRC32C(crc, value, b);
+	}
+	return crc;
+}
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+struct chksum_ctx {
+	u32 key;
+};
+
+struct chksum_desc_ctx {
+	u32 crc;
+};
+
+static int chksum_init(struct shash_desc *desc)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = mctx->key;
+
+	return 0;
+}
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
+
+	if (keylen != sizeof(mctx->key)) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	mctx->key = get_unaligned_le32(key);
+	return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksumc_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(ctx->crc, out);
+	return 0;
+}
+
+static int chksumc_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	put_unaligned_le32(~ctx->crc, out);
+	return 0;
+}
+
+static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
+{
+	put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out);
+	return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(ctx->crc, data, len, out);
+}
+
+static int chksumc_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksumc_finup(ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksum_finup(mctx->key, data, length, out);
+}
+
+static int chksumc_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
+
+	return __chksumc_finup(mctx->key, data, length, out);
+}
+
+static int chksum_cra_init(struct crypto_tfm *tfm)
+{
+	struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
+
+	mctx->key = ~0;
+	return 0;
+}
+
+static struct shash_alg crc32_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksum_update,
+	.final			=	chksum_final,
+	.finup			=	chksum_finup,
+	.digest			=	chksum_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32",
+		.cra_driver_name	=	"crc32-mips-hw",
+		.cra_priority		=	300,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static struct shash_alg crc32c_alg = {
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.setkey			=	chksum_setkey,
+	.init			=	chksum_init,
+	.update			=	chksumc_update,
+	.final			=	chksumc_final,
+	.finup			=	chksumc_finup,
+	.digest			=	chksumc_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crc32c",
+		.cra_driver_name	=	"crc32c-mips-hw",
+		.cra_priority		=	300,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_alignmask		=	0,
+		.cra_ctxsize		=	sizeof(struct chksum_ctx),
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	chksum_cra_init,
+	}
+};
+
+static int __init crc32_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_shash(&crc32_alg);
+
+	if (err)
+		return err;
+
+	err = crypto_register_shash(&crc32c_alg);
+
+	if (err) {
+		crypto_unregister_shash(&crc32_alg);
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit crc32_mod_exit(void)
+{
+	crypto_unregister_shash(&crc32_alg);
+	crypto_unregister_shash(&crc32c_alg);
+}
+
+MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
+MODULE_LICENSE("GPL v2");
+
+module_cpu_feature_match(MIPS_CRC32, crc32_mod_init);
+module_exit(crc32_mod_exit);
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
index 2ff3b17bfab1..ba9b2c8cce68 100644
--- a/arch/mips/generic/Kconfig
+++ b/arch/mips/generic/Kconfig
@@ -27,6 +27,22 @@ config LEGACY_BOARD_SEAD3
 	  Enable this to include support for booting on MIPS SEAD-3 FPGA-based
 	  development boards, which boot using a legacy boot protocol.
 
+comment "MSCC Ocelot doesn't work with SEAD3 enabled"
+	depends on LEGACY_BOARD_SEAD3
+
+config LEGACY_BOARD_OCELOT
+	bool "Support MSCC Ocelot boards"
+	depends on LEGACY_BOARD_SEAD3=n
+	select LEGACY_BOARDS
+	select MSCC_OCELOT
+
+config MSCC_OCELOT
+	bool
+	select GPIOLIB
+	select MSCC_OCELOT_IRQ
+	select SYS_HAS_EARLY_PRINTK
+	select USE_GENERIC_EARLY_PRINTK_8250
+
 comment "FIT/UHI Boards"
 
 config FIT_IMAGE_FDT_BOSTON
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile
index 5c31e0c4697d..d03a36f869a4 100644
--- a/arch/mips/generic/Makefile
+++ b/arch/mips/generic/Makefile
@@ -14,5 +14,6 @@ obj-y += proc.o
 
 obj-$(CONFIG_YAMON_DT_SHIM)		+= yamon-dt.o
 obj-$(CONFIG_LEGACY_BOARD_SEAD3)	+= board-sead3.o
+obj-$(CONFIG_LEGACY_BOARD_OCELOT)	+= board-ocelot.o
 obj-$(CONFIG_KEXEC)			+= kexec.o
 obj-$(CONFIG_VIRT_BOARD_RANCHU)		+= board-ranchu.o
diff --git a/arch/mips/generic/board-ocelot.c b/arch/mips/generic/board-ocelot.c
new file mode 100644
index 000000000000..06d92fb37769
--- /dev/null
+++ b/arch/mips/generic/board-ocelot.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/*
+ * Microsemi MIPS SoC support
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+#include <asm/machine.h>
+#include <asm/prom.h>
+
+#define DEVCPU_GCB_CHIP_REGS_CHIP_ID	0x71070000
+#define CHIP_ID_PART_ID			GENMASK(27, 12)
+
+#define OCELOT_PART_ID			(0x7514 << 12)
+
+#define UART_UART			0x70100000
+
+static __init bool ocelot_detect(void)
+{
+	u32 rev;
+	int idx;
+
+	/* Look for the TLB entry set up by redboot before trying to use it */
+	write_c0_entryhi(DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	tlb_probe_hazard();
+	idx = read_c0_index();
+	if (idx < 0)
+		return 0;
+
+	/* A TLB entry exists, lets assume its usable and check the CHIP ID */
+	rev = __raw_readl((void __iomem *)DEVCPU_GCB_CHIP_REGS_CHIP_ID);
+
+	if ((rev & CHIP_ID_PART_ID) != OCELOT_PART_ID)
+		return 0;
+
+	/* Copy command line from bootloader early for Initrd detection */
+	if (fw_arg0 < 10 && (fw_arg1 & 0xFFF00000) == 0x80000000) {
+		unsigned int prom_argc = fw_arg0;
+		const char **prom_argv = (const char **)fw_arg1;
+
+		if (prom_argc > 1 && strlen(prom_argv[1]) > 0)
+			/* ignore all built-in args if any f/w args given */
+			strcpy(arcs_cmdline, prom_argv[1]);
+	}
+
+	return 1;
+}
+
+static void __init ocelot_earlyprintk_init(void)
+{
+	void __iomem *uart_base;
+
+	uart_base = ioremap_nocache(UART_UART, 0x20);
+	setup_8250_early_printk_port((unsigned long)uart_base, 2, 50000);
+}
+
+static void __init ocelot_late_init(void)
+{
+	ocelot_earlyprintk_init();
+}
+
+static __init const void *ocelot_fixup_fdt(const void *fdt,
+					   const void *match_data)
+{
+	/* This has to be done so late because ioremap needs to work */
+	late_time_init = ocelot_late_init;
+
+	return fdt;
+}
+
+extern char __dtb_ocelot_pcb123_begin[];
+
+MIPS_MACHINE(ocelot) = {
+	.fdt = __dtb_ocelot_pcb123_begin,
+	.fixup_fdt = ocelot_fixup_fdt,
+	.detect = ocelot_detect,
+};
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 721b698bfe3c..5f74590e0bea 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -11,6 +11,7 @@
 
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
+#include <asm/isa-rev.h>
 #include <cpu-feature-overrides.h>
 
 /*
@@ -493,7 +494,7 @@
 # define cpu_has_perf		(cpu_data[0].options & MIPS_CPU_PERF)
 #endif
 
-#if defined(CONFIG_SMP) && defined(__mips_isa_rev) && (__mips_isa_rev >= 6)
+#if defined(CONFIG_SMP) && (MIPS_ISA_REV >= 6)
 /*
  * Some systems share FTLB RAMs between threads within a core (siblings in
  * kernel parlance). This means that FTLB entries may become invalid at almost
@@ -525,7 +526,7 @@
 #  define cpu_has_shared_ftlb_entries \
 	(current_cpu_data.options & MIPS_CPU_SHARED_FTLB_ENTRIES)
 # endif
-#endif /* SMP && __mips_isa_rev >= 6 */
+#endif /* SMP && MIPS_ISA_REV >= 6 */
 
 #ifndef cpu_has_shared_ftlb_ram
 # define cpu_has_shared_ftlb_ram 0
diff --git a/arch/mips/include/asm/isa-rev.h b/arch/mips/include/asm/isa-rev.h
new file mode 100644
index 000000000000..683ea3454dcb
--- /dev/null
+++ b/arch/mips/include/asm/isa-rev.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 MIPS Tech, LLC
+ * Author: Matt Redfearn <matt.redfearn@mips.com>
+ */
+
+#ifndef __MIPS_ASM_ISA_REV_H__
+#define __MIPS_ASM_ISA_REV_H__
+
+/*
+ * The ISA revision level. This is 0 for MIPS I to V and N for
+ * MIPS{32,64}rN.
+ */
+
+/* If the compiler has defined __mips_isa_rev, believe it. */
+#ifdef __mips_isa_rev
+#define MIPS_ISA_REV __mips_isa_rev
+#else
+/* The compiler hasn't defined the isa rev so assume it's MIPS I - V (0) */
+#define MIPS_ISA_REV 0
+#endif
+
+
+#endif /* __MIPS_ASM_ISA_REV_H__ */
diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
index aa3800c82332..d99ca862dae3 100644
--- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
+++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
@@ -167,7 +167,7 @@
 #define AR71XX_AHB_DIV_MASK		0x7
 
 #define AR724X_PLL_REG_CPU_CONFIG	0x00
-#define AR724X_PLL_REG_PCIE_CONFIG	0x18
+#define AR724X_PLL_REG_PCIE_CONFIG	0x10
 
 #define AR724X_PLL_FB_SHIFT		0
 #define AR724X_PLL_FB_MASK		0x3ff
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 858752dac337..f65859784a4c 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -664,6 +664,7 @@
 #define MIPS_CONF5_FRE		(_ULCAST_(1) << 8)
 #define MIPS_CONF5_UFE		(_ULCAST_(1) << 9)
 #define MIPS_CONF5_CA2		(_ULCAST_(1) << 14)
+#define MIPS_CONF5_CRCP		(_ULCAST_(1) << 18)
 #define MIPS_CONF5_MSAEN	(_ULCAST_(1) << 27)
 #define MIPS_CONF5_EVA		(_ULCAST_(1) << 28)
 #define MIPS_CONF5_CV		(_ULCAST_(1) << 29)
diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h
index 600ad8fd6835..a2aba4b059e6 100644
--- a/arch/mips/include/uapi/asm/hwcap.h
+++ b/arch/mips/include/uapi/asm/hwcap.h
@@ -5,5 +5,6 @@
 /* HWCAP flags */
 #define HWCAP_MIPS_R6		(1 << 0)
 #define HWCAP_MIPS_MSA		(1 << 1)
+#define HWCAP_MIPS_CRC32	(1 << 2)
 
 #endif /* _UAPI_ASM_HWCAP_H */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 606e02ca4b6c..3035ca499cd8 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -50,6 +50,7 @@
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
 #define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 
 /*
  * Flags for msync
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index cf3fd549e16d..6b07b739f914 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -848,6 +848,9 @@ static inline unsigned int decode_config5(struct cpuinfo_mips *c)
 	if (config5 & MIPS_CONF5_CA2)
 		c->ases |= MIPS_ASE_MIPS16E2;
 
+	if (config5 & MIPS_CONF5_CRCP)
+		elf_hwcap |= HWCAP_MIPS_CRC32;
+
 	return config5 & MIPS_CONF_M;
 }
 
diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c
index 421e06dfee72..55c3fbeb2df6 100644
--- a/arch/mips/kernel/pm-cps.c
+++ b/arch/mips/kernel/pm-cps.c
@@ -12,6 +12,7 @@
 #include <linux/init.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/suspend.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/cacheflush.h>
@@ -670,6 +671,34 @@ static int cps_pm_online_cpu(unsigned int cpu)
 	return 0;
 }
 
+static int cps_pm_power_notifier(struct notifier_block *this,
+				 unsigned long event, void *ptr)
+{
+	unsigned int stat;
+
+	switch (event) {
+	case PM_SUSPEND_PREPARE:
+		stat = read_cpc_cl_stat_conf();
+		/*
+		 * If we're attempting to suspend the system and power down all
+		 * of the cores, the JTAG detect bit indicates that the CPC will
+		 * instead put the cores into clock-off state. In this state
+		 * a connected debugger can cause the CPU to attempt
+		 * interactions with the powered down system. At best this will
+		 * fail. At worst, it can hang the NoC, requiring a hard reset.
+		 * To avoid this, just block system suspend if a JTAG probe
+		 * is detected.
+		 */
+		if (stat & CPC_Cx_STAT_CONF_EJTAG_PROBE) {
+			pr_warn("JTAG probe is connected - abort suspend\n");
+			return NOTIFY_BAD;
+		}
+		return NOTIFY_DONE;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
 static int __init cps_pm_init(void)
 {
 	/* A CM is required for all non-coherent states */
@@ -705,6 +734,8 @@ static int __init cps_pm_init(void)
 		pr_warn("pm-cps: no CPC, clock & power gating unavailable\n");
 	}
 
+	pm_notifier(cps_pm_power_notifier, 0);
+
 	return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mips/cps_pm:online",
 				 cps_pm_online_cpu, NULL);
 }
diff --git a/arch/mips/kernel/reset.c b/arch/mips/kernel/reset.c
index 7c746d3458e7..6288780b779e 100644
--- a/arch/mips/kernel/reset.c
+++ b/arch/mips/kernel/reset.c
@@ -13,6 +13,9 @@
 #include <linux/reboot.h>
 #include <linux/delay.h>
 
+#include <asm/compiler.h>
+#include <asm/idle.h>
+#include <asm/mipsregs.h>
 #include <asm/reboot.h>
 
 /*
@@ -26,6 +29,62 @@ void (*pm_power_off)(void);
 
 EXPORT_SYMBOL(pm_power_off);
 
+static void machine_hang(void)
+{
+	/*
+	 * We're hanging the system so we don't want to be interrupted anymore.
+	 * Any interrupt handlers that ran would at best be useless & at worst
+	 * go awry because the system isn't in a functional state.
+	 */
+	local_irq_disable();
+
+	/*
+	 * Mask all interrupts, giving us a better chance of remaining in the
+	 * low power wait state.
+	 */
+	clear_c0_status(ST0_IM);
+
+	while (true) {
+		if (cpu_has_mips_r) {
+			/*
+			 * We know that the wait instruction is supported so
+			 * make use of it directly, leaving interrupts
+			 * disabled.
+			 */
+			asm volatile(
+				".set	push\n\t"
+				".set	" MIPS_ISA_ARCH_LEVEL "\n\t"
+				"wait\n\t"
+				".set	pop");
+		} else if (cpu_wait) {
+			/*
+			 * Try the cpu_wait() callback. This isn't ideal since
+			 * it'll re-enable interrupts, but that ought to be
+			 * harmless given that they're all masked.
+			 */
+			cpu_wait();
+			local_irq_disable();
+		} else {
+			/*
+			 * We're going to burn some power running round the
+			 * loop, but we don't really have a choice. This isn't
+			 * a path we should expect to run for long during
+			 * typical use anyway.
+			 */
+		}
+
+		/*
+		 * In most modern MIPS CPUs interrupts will cause the wait
+		 * instruction to graduate even when disabled, and in some
+		 * cases even when masked. In order to prevent a timer
+		 * interrupt from continuously taking us out of the low power
+		 * wait state, we clear any pending timer interrupt here.
+		 */
+		if (cpu_has_counter)
+			write_c0_compare(0);
+	}
+}
+
 void machine_restart(char *command)
 {
 	if (_machine_restart)
@@ -38,8 +97,7 @@ void machine_restart(char *command)
 	do_kernel_restart(command);
 	mdelay(1000);
 	pr_emerg("Reboot failed -- System halted\n");
-	local_irq_disable();
-	while (1);
+	machine_hang();
 }
 
 void machine_halt(void)
@@ -51,8 +109,7 @@ void machine_halt(void)
 	preempt_disable();
 	smp_send_stop();
 #endif
-	local_irq_disable();
-	while (1);
+	machine_hang();
 }
 
 void machine_power_off(void)
@@ -64,6 +121,5 @@ void machine_power_off(void)
 	preempt_disable();
 	smp_send_stop();
 #endif
-	local_irq_disable();
-	while (1);
+	machine_hang();
 }
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 5f8b0a9e30b3..563188ac6fa2 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -155,7 +155,8 @@ void __init detect_memory_region(phys_addr_t start, phys_addr_t sz_min, phys_add
 	add_memory_region(start, size, BOOT_MEM_RAM);
 }
 
-bool __init memory_region_available(phys_addr_t start, phys_addr_t size)
+static bool __init __maybe_unused memory_region_available(phys_addr_t start,
+							  phys_addr_t size)
 {
 	int i;
 	bool in_ram = false, free = true;
@@ -453,7 +454,7 @@ static void __init bootmem_init(void)
 		pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
 			(min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
 			min_low_pfn - ARCH_PFN_OFFSET);
-	} else if (min_low_pfn < ARCH_PFN_OFFSET) {
+	} else if (ARCH_PFN_OFFSET - min_low_pfn > 0UL) {
 		pr_info("%lu free pages won't be used\n",
 			ARCH_PFN_OFFSET - min_low_pfn);
 	}
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 84b7b592b834..400676ce03f4 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -30,7 +30,6 @@
 #include <linux/hardirq.h>
 #include <linux/gfp.h>
 #include <linux/kcore.h>
-#include <linux/export.h>
 #include <linux/initrd.h>
 
 #include <asm/asm-offsets.h>
@@ -46,7 +45,6 @@
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/fixmap.h>
-#include <asm/maar.h>
 
 /*
  * We have up to 8 empty zeroed pages so we can map one of the right colour
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 33d3251ecd37..2f616ebeb7e0 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -24,20 +24,20 @@ EXPORT_SYMBOL(shm_align_mask);
 #define MIN_GAP (128*1024*1024UL)
 #define MAX_GAP ((TASK_SIZE)/6*5)
 
-static int mmap_is_legacy(void)
+static int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
 }
 
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -158,18 +158,18 @@ unsigned long arch_mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
 	if (current->flags & PF_RANDOMIZE)
 		random_factor = arch_mmap_rnd();
 
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
index 88a2075305d1..57154c5883b6 100644
--- a/arch/mips/net/bpf_jit_asm.S
+++ b/arch/mips/net/bpf_jit_asm.S
@@ -11,6 +11,7 @@
  */
 
 #include <asm/asm.h>
+#include <asm/isa-rev.h>
 #include <asm/regdef.h>
 #include "bpf_jit.h"
 
@@ -65,7 +66,7 @@ FEXPORT(sk_load_word_positive)
 	lw	$r_A, 0(t1)
 	.set	noreorder
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	wsbh	t0, $r_A
 	rotr	$r_A, t0, 16
 # else
@@ -92,7 +93,7 @@ FEXPORT(sk_load_half_positive)
 	PTR_ADDU t1, $r_skb_data, offset
 	lhu	$r_A, 0(t1)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	wsbh	$r_A, $r_A
 # else
 	sll	t0, $r_A, 8
@@ -170,7 +171,7 @@ FEXPORT(sk_load_byte_positive)
 NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
 	bpf_slow_path_common(4)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	wsbh	t0, $r_s0
 	jr	$r_ra
 	 rotr	$r_A, t0, 16
@@ -196,7 +197,7 @@ NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
 NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
 	bpf_slow_path_common(2)
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+# if MIPS_ISA_REV >= 2
 	jr	$r_ra
 	 wsbh	$r_A, $r_s0
 # else
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
index 407f155f0bb6..f6b77788124a 100644
--- a/arch/mips/pci/pci-mt7620.c
+++ b/arch/mips/pci/pci-mt7620.c
@@ -315,6 +315,7 @@ static int mt7620_pci_probe(struct platform_device *pdev)
 		break;
 
 	case MT762X_SOC_MT7628AN:
+	case MT762X_SOC_MT7688:
 		if (mt7628_pci_hw_init(pdev))
 			return -1;
 		break;
diff --git a/arch/mips/txx9/rbtx4927/setup.c b/arch/mips/txx9/rbtx4927/setup.c
index f5b367e20dff..31955c1d5555 100644
--- a/arch/mips/txx9/rbtx4927/setup.c
+++ b/arch/mips/txx9/rbtx4927/setup.c
@@ -319,7 +319,7 @@ static void __init rbtx4927_mtd_init(void)
 
 static void __init rbtx4927_gpioled_init(void)
 {
-	static struct gpio_led leds[] = {
+	static const struct gpio_led leds[] = {
 		{ .name = "gpioled:green:0", .gpio = 0, .active_low = 1, },
 		{ .name = "gpioled:green:1", .gpio = 1, .active_low = 1, },
 	};
diff --git a/arch/mips/vdso/elf.S b/arch/mips/vdso/elf.S
index be37bbb1f061..428a1917afc6 100644
--- a/arch/mips/vdso/elf.S
+++ b/arch/mips/vdso/elf.S
@@ -10,6 +10,8 @@
 
 #include "vdso.h"
 
+#include <asm/isa-rev.h>
+
 #include <linux/elfnote.h>
 #include <linux/version.h>
 
@@ -40,11 +42,7 @@ __mips_abiflags:
 	.byte	__mips		/* isa_level */
 
 	/* isa_rev */
-#ifdef __mips_isa_rev
-	.byte	__mips_isa_rev
-#else
-	.byte	0
-#endif
+	.byte	MIPS_ISA_REV
 
 	/* gpr_size */
 #ifdef __mips64
@@ -54,7 +52,7 @@ __mips_abiflags:
 #endif
 
 	/* cpr1_size */
-#if (defined(__mips_isa_rev) && __mips_isa_rev >= 6) || defined(__mips64)
+#if (MIPS_ISA_REV >= 6) || defined(__mips64)
 	.byte	2		/* AFL_REG_64 */
 #else
 	.byte	1		/* AFL_REG_32 */
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 7b9b20a381cb..1240f148ec0f 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -34,8 +34,8 @@ void flush_anon_page(struct vm_area_struct *vma,
 void flush_kernel_dcache_page(struct page *page);
 void flush_icache_range(unsigned long start, unsigned long end);
 void flush_icache_page(struct vm_area_struct *vma, struct page *page);
-#define flush_dcache_mmap_lock(mapping)   spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)   xa_lock_irq(&(mapping)->i_pages)
+#define flush_dcache_mmap_unlock(mapping) xa_unlock_irq(&(mapping)->i_pages)
 
 #else
 #include <asm-generic/cacheflush.h>
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 55e383c173f7..18eb9f69f806 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -46,9 +46,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 extern void flush_dcache_range(unsigned long start, unsigned long end);
 extern void invalidate_dcache_range(unsigned long start, unsigned long end);
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #endif /* _ASM_NIOS2_CACHEFLUSH_H */
diff --git a/arch/nios2/kernel/time.c b/arch/nios2/kernel/time.c
index 20e86209ef2e..ab88b6dd4679 100644
--- a/arch/nios2/kernel/time.c
+++ b/arch/nios2/kernel/time.c
@@ -336,9 +336,9 @@ static int __init nios2_time_init(struct device_node *timer)
 	return ret;
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
-	ts->tv_sec = mktime(2007, 1, 1, 0, 0, 0);
+	ts->tv_sec = mktime64(2007, 1, 1, 0, 0, 0);
 	ts->tv_nsec = 0;
 }
 
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index bd5ce31936f5..0c83644bfa5c 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -55,10 +55,8 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *page);
 
-#define flush_dcache_mmap_lock(mapping) \
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping) \
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
+#define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
 
 #define flush_icache_page(vma,page)	do { 		\
 	flush_kernel_dcache_page(page);			\
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index a056a642bb31..870fbf8c7088 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -26,6 +26,7 @@
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
 #define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 
 #define MS_SYNC		1		/* synchronous memory sync */
 #define MS_ASYNC	2		/* sync memory asynchronously */
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 8c99ebbe2bac..43b308cfdf53 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -70,12 +70,18 @@ static inline unsigned long COLOR_ALIGN(unsigned long addr,
  * Top of mmap area (just below the process stack).
  */
 
-static unsigned long mmap_upper_limit(void)
+/*
+ * When called from arch_get_unmapped_area(), rlim_stack will be NULL,
+ * indicating that "current" should be used instead of a passed-in
+ * value from the exec bprm as done with arch_pick_mmap_layout().
+ */
+static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
 {
 	unsigned long stack_base;
 
 	/* Limit stack size - see setup_arg_pages() in fs/exec.c */
-	stack_base = rlimit_max(RLIMIT_STACK);
+	stack_base = rlim_stack ? rlim_stack->rlim_max
+				: rlimit_max(RLIMIT_STACK);
 	if (stack_base > STACK_SIZE_MAX)
 		stack_base = STACK_SIZE_MAX;
 
@@ -127,7 +133,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	info.flags = 0;
 	info.length = len;
 	info.low_limit = mm->mmap_legacy_base;
-	info.high_limit = mmap_upper_limit();
+	info.high_limit = mmap_upper_limit(NULL);
 	info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
 	info.align_offset = shared_align_offset(last_mmap, pgoff);
 	addr = vm_unmapped_area(&info);
@@ -250,10 +256,10 @@ static unsigned long mmap_legacy_base(void)
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	mm->mmap_legacy_base = mmap_legacy_base();
-	mm->mmap_base = mmap_upper_limit();
+	mm->mmap_base = mmap_upper_limit(rlim_stack);
 
 	if (mmap_is_legacy()) {
 		mm->mmap_base = mm->mmap_legacy_base;
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index f7e684560186..c3830400ca28 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -174,7 +174,7 @@ static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
 
 	/* we treat tod_sec as unsigned, so this can work until year 2106 */
 	rtc_time64_to_tm(tod_data.tod_sec, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index d503f344e476..b24ce40acd47 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -39,12 +39,12 @@
 #define MIN_GAP (128*1024*1024)
 #define MAX_GAP (TASK_SIZE/6*5)
 
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
 
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 
 	return sysctl_legacy_va_layout;
@@ -76,9 +76,10 @@ static inline unsigned long stack_maxrandom_size(void)
 		return (1<<30);
 }
 
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+				      struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
 
 	/* Values close to RLIM_INFINITY can overflow. */
@@ -196,26 +197,28 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
 }
 
 static void radix__arch_pick_mmap_layout(struct mm_struct *mm,
-					unsigned long random_factor)
+					unsigned long random_factor,
+					struct rlimit *rlim_stack)
 {
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = radix__arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = radix__arch_get_unmapped_area_topdown;
 	}
 }
 #else
 /* dummy */
 extern void radix__arch_pick_mmap_layout(struct mm_struct *mm,
-					unsigned long random_factor);
+					unsigned long random_factor,
+					struct rlimit *rlim_stack);
 #endif
 /*
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
@@ -223,16 +226,17 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		random_factor = arch_mmap_rnd();
 
 	if (radix_enabled())
-		return radix__arch_pick_mmap_layout(mm, random_factor);
+		return radix__arch_pick_mmap_layout(mm, random_factor,
+						    rlim_stack);
 	/*
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = TASK_UNMAPPED_BASE;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 9a8a084e4aba..4c615fcb0cf0 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -75,8 +75,7 @@ EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
 /*
  * Taken from alloc_migrate_target with changes to remove CMA allocations
  */
-struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
-					int **resultp)
+struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
 {
 	gfp_t gfp_mask = GFP_USER;
 	struct page *new_page;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 516e23de5a3d..48fbb41af5d1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -824,6 +824,9 @@ static int __init opal_init(void)
 	/* Create i2c platform devices */
 	opal_pdev_init("ibm,opal-i2c");
 
+	/* Handle non-volatile memory devices */
+	opal_pdev_init("pmem-region");
+
 	/* Setup a heatbeat thread if requested by OPAL */
 	opal_init_heartbeat();
 
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 831bdcf407bb..0a7627cdb34e 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void)
 #define MIN_GAP (32*1024*1024)
 #define MAX_GAP (STACK_TOP/6*5)
 
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
 {
 	if (current->personality & ADDR_COMPAT_LAYOUT)
 		return 1;
-	if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
 		return 1;
 	return sysctl_legacy_va_layout;
 }
@@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
 	return TASK_UNMAPPED_BASE + rnd;
 }
 
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+				      struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 
 	if (gap < MIN_GAP)
 		gap = MIN_GAP;
@@ -184,7 +185,7 @@ check_asce_limit:
  * This function, called very early during the creation of a new
  * process VM image, sets up which VM layout function to use:
  */
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = 0UL;
 
@@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
-	if (mmap_is_legacy()) {
+	if (mmap_is_legacy(rlim_stack)) {
 		mm->mmap_base = mmap_base_legacy(random_factor);
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base(random_factor);
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 348a17ecdf66..9ef8de63f28b 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void)
 	return rnd << PAGE_SHIFT;
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	unsigned long random_factor = mmap_rnd();
 	unsigned long gap;
@@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * Fall back to the standard layout if the personality
 	 * bit is set, or if the expected stack growth is unlimited:
 	 */
-	gap = rlimit(RLIMIT_STACK);
+	gap = rlim_stack->rlim_cur;
 	if (!test_thread_flag(TIF_32BIT) ||
 	    (current->personality & ADDR_COMPAT_LAYOUT) ||
 	    gap == RLIM_INFINITY ||
diff --git a/arch/um/Kconfig.net b/arch/um/Kconfig.net
index e871af24d9cd..c390f3deb0dc 100644
--- a/arch/um/Kconfig.net
+++ b/arch/um/Kconfig.net
@@ -109,6 +109,17 @@ config UML_NET_DAEMON
         more than one without conflict.  If you don't need UML networking,
         say N.
 
+config UML_NET_VECTOR
+	bool "Vector I/O high performance network devices"
+	depends on UML_NET
+	help
+	This User-Mode Linux network driver uses multi-message send
+	and receive functions. The host running the UML guest must have
+	a linux kernel version above 3.0 and a libc version > 2.13.
+	This driver provides tap, raw, gre and l2tpv3 network transports
+	with up to 4 times higher network throughput than the UML network
+	drivers.
+
 config UML_NET_VDE
 	bool "VDE transport"
 	depends on UML_NET
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index e7582e1d248c..16b3cebddafb 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -9,6 +9,7 @@
 slip-objs := slip_kern.o slip_user.o
 slirp-objs := slirp_kern.o slirp_user.o
 daemon-objs := daemon_kern.o daemon_user.o
+vector-objs := vector_kern.o vector_user.o vector_transports.o
 umcast-objs := umcast_kern.o umcast_user.o
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
 obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 
+obj-$(CONFIG_UML_NET_VECTOR) += vector.o
 obj-$(CONFIG_UML_NET_VDE) += vde.o
 obj-$(CONFIG_UML_NET_MCAST) += umcast.o
 obj-$(CONFIG_UML_NET_PCAP) += pcap.o
@@ -61,7 +63,7 @@ obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
 
 # pcap_user.o must be added explicitly.
-USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o
+USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
 CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
 
 include arch/um/scripts/Makefile.rules
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index acbe6c67afba..05588f9466c7 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -171,56 +171,19 @@ int enable_chan(struct line *line)
 	return err;
 }
 
-/* Items are added in IRQ context, when free_irq can't be called, and
- * removed in process context, when it can.
- * This handles interrupt sources which disappear, and which need to
- * be permanently disabled.  This is discovered in IRQ context, but
- * the freeing of the IRQ must be done later.
- */
-static DEFINE_SPINLOCK(irqs_to_free_lock);
-static LIST_HEAD(irqs_to_free);
-
-void free_irqs(void)
-{
-	struct chan *chan;
-	LIST_HEAD(list);
-	struct list_head *ele;
-	unsigned long flags;
-
-	spin_lock_irqsave(&irqs_to_free_lock, flags);
-	list_splice_init(&irqs_to_free, &list);
-	spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-
-	list_for_each(ele, &list) {
-		chan = list_entry(ele, struct chan, free_list);
-
-		if (chan->input && chan->enabled)
-			um_free_irq(chan->line->driver->read_irq, chan);
-		if (chan->output && chan->enabled)
-			um_free_irq(chan->line->driver->write_irq, chan);
-		chan->enabled = 0;
-	}
-}
-
 static void close_one_chan(struct chan *chan, int delay_free_irq)
 {
-	unsigned long flags;
-
 	if (!chan->opened)
 		return;
 
-	if (delay_free_irq) {
-		spin_lock_irqsave(&irqs_to_free_lock, flags);
-		list_add(&chan->free_list, &irqs_to_free);
-		spin_unlock_irqrestore(&irqs_to_free_lock, flags);
-	}
-	else {
-		if (chan->input && chan->enabled)
-			um_free_irq(chan->line->driver->read_irq, chan);
-		if (chan->output && chan->enabled)
-			um_free_irq(chan->line->driver->write_irq, chan);
-		chan->enabled = 0;
-	}
+    /* we can safely call free now - it will be marked
+     *  as free and freed once the IRQ stopped processing
+     */
+	if (chan->input && chan->enabled)
+		um_free_irq(chan->line->driver->read_irq, chan);
+	if (chan->output && chan->enabled)
+		um_free_irq(chan->line->driver->write_irq, chan);
+	chan->enabled = 0;
 	if (chan->ops->close != NULL)
 		(*chan->ops->close)(chan->fd, chan->data);
 
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 366e57f5e8d6..8d80b27502e6 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data)
 	if (err)
 		return err;
 	if (output)
-		err = um_request_irq(driver->write_irq, fd, IRQ_WRITE,
+		err = um_request_irq(driver->write_irq, fd, IRQ_NONE,
 				     line_write_interrupt, IRQF_SHARED,
 				     driver->write_irq_name, data);
 	return err;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index b305f8247909..3ef1b48e064a 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -288,7 +288,7 @@ static void uml_net_user_timer_expire(struct timer_list *t)
 #endif
 }
 
-static void setup_etheraddr(struct net_device *dev, char *str)
+void uml_net_setup_etheraddr(struct net_device *dev, char *str)
 {
 	unsigned char *addr = dev->dev_addr;
 	char *end;
@@ -412,7 +412,7 @@ static void eth_configure(int n, void *init, char *mac,
 	 */
 	snprintf(dev->name, sizeof(dev->name), "eth%d", n);
 
-	setup_etheraddr(dev, mac);
+	uml_net_setup_etheraddr(dev, mac);
 
 	printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
 
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 37c51a6be690..778a0e52d5a5 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -13,6 +13,7 @@
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
 #include <linux/uaccess.h>
+#include <init.h>
 #include <irq_kern.h>
 #include <os.h>
 
@@ -154,7 +155,14 @@ err_out_cleanup_hw:
 /*
  * rng_cleanup - shutdown RNG module
  */
-static void __exit rng_cleanup (void)
+
+static void cleanup(void)
+{
+	free_irq_by_fd(random_fd);
+	os_close_file(random_fd);
+}
+
+static void __exit rng_cleanup(void)
 {
 	os_close_file(random_fd);
 	misc_deregister (&rng_miscdev);
@@ -162,6 +170,7 @@ static void __exit rng_cleanup (void)
 
 module_init (rng_init);
 module_exit (rng_cleanup);
+__uml_exitcall(cleanup);
 
 MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver");
 MODULE_LICENSE("GPL");
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index b55fe9bf5d3e..d4e8c497ae86 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1587,11 +1587,11 @@ int io_thread(void *arg)
 
 		do {
 			res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
-			if (res > 0) {
+			if (res >= 0) {
 				written += res;
 			} else {
 				if (res != -EAGAIN) {
-					printk("io_thread - read failed, fd = %d, "
+					printk("io_thread - write failed, fd = %d, "
 					       "err = %d\n", kernel_fd, -n);
 				}
 			}
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
new file mode 100644
index 000000000000..02168fe25105
--- /dev/null
+++ b/arch/um/drivers/vector_kern.c
@@ -0,0 +1,1633 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
+ * James Leu (jleu@mindspring.net).
+ * Copyright (C) 2001 by various other people who didn't put their name here.
+ * Licensed under the GPL.
+ */
+
+#include <linux/version.h>
+#include <linux/bootmem.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <net_kern.h>
+#include <os.h>
+#include "mconsole_kern.h"
+#include "vector_user.h"
+#include "vector_kern.h"
+
+/*
+ * Adapted from network devices with the following major changes:
+ * All transports are static - simplifies the code significantly
+ * Multiple FDs/IRQs per device
+ * Vector IO optionally used for read/write, falling back to legacy
+ * based on configuration and/or availability
+ * Configuration is no longer positional - L2TPv3 and GRE require up to
+ * 10 parameters, passing this as positional is not fit for purpose.
+ * Only socket transports are supported
+ */
+
+
+#define DRIVER_NAME "uml-vector"
+#define DRIVER_VERSION "01"
+struct vector_cmd_line_arg {
+	struct list_head list;
+	int unit;
+	char *arguments;
+};
+
+struct vector_device {
+	struct list_head list;
+	struct net_device *dev;
+	struct platform_device pdev;
+	int unit;
+	int opened;
+};
+
+static LIST_HEAD(vec_cmd_line);
+
+static DEFINE_SPINLOCK(vector_devices_lock);
+static LIST_HEAD(vector_devices);
+
+static int driver_registered;
+
+static void vector_eth_configure(int n, struct arglist *def);
+
+/* Argument accessors to set variables (and/or set default values)
+ * mtu, buffer sizing, default headroom, etc
+ */
+
+#define DEFAULT_HEADROOM 2
+#define SAFETY_MARGIN 32
+#define DEFAULT_VECTOR_SIZE 64
+#define TX_SMALL_PACKET 128
+#define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1)
+
+static const struct {
+	const char string[ETH_GSTRING_LEN];
+} ethtool_stats_keys[] = {
+	{ "rx_queue_max" },
+	{ "rx_queue_running_average" },
+	{ "tx_queue_max" },
+	{ "tx_queue_running_average" },
+	{ "rx_encaps_errors" },
+	{ "tx_timeout_count" },
+	{ "tx_restart_queue" },
+	{ "tx_kicks" },
+	{ "tx_flow_control_xon" },
+	{ "tx_flow_control_xoff" },
+	{ "rx_csum_offload_good" },
+	{ "rx_csum_offload_errors"},
+	{ "sg_ok"},
+	{ "sg_linearized"},
+};
+
+#define VECTOR_NUM_STATS	ARRAY_SIZE(ethtool_stats_keys)
+
+static void vector_reset_stats(struct vector_private *vp)
+{
+	vp->estats.rx_queue_max = 0;
+	vp->estats.rx_queue_running_average = 0;
+	vp->estats.tx_queue_max = 0;
+	vp->estats.tx_queue_running_average = 0;
+	vp->estats.rx_encaps_errors = 0;
+	vp->estats.tx_timeout_count = 0;
+	vp->estats.tx_restart_queue = 0;
+	vp->estats.tx_kicks = 0;
+	vp->estats.tx_flow_control_xon = 0;
+	vp->estats.tx_flow_control_xoff = 0;
+	vp->estats.sg_ok = 0;
+	vp->estats.sg_linearized = 0;
+}
+
+static int get_mtu(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "mtu");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return ETH_MAX_PACKET;
+}
+
+static int get_depth(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "depth");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return DEFAULT_VECTOR_SIZE;
+}
+
+static int get_headroom(struct arglist *def)
+{
+	char *mtu = uml_vector_fetch_arg(def, "headroom");
+	long result;
+
+	if (mtu != NULL) {
+		if (kstrtoul(mtu, 10, &result) == 0)
+			return result;
+	}
+	return DEFAULT_HEADROOM;
+}
+
+static int get_req_size(struct arglist *def)
+{
+	char *gro = uml_vector_fetch_arg(def, "gro");
+	long result;
+
+	if (gro != NULL) {
+		if (kstrtoul(gro, 10, &result) == 0) {
+			if (result > 0)
+				return 65536;
+		}
+	}
+	return get_mtu(def) + ETH_HEADER_OTHER +
+		get_headroom(def) + SAFETY_MARGIN;
+}
+
+
+static int get_transport_options(struct arglist *def)
+{
+	char *transport = uml_vector_fetch_arg(def, "transport");
+	char *vector = uml_vector_fetch_arg(def, "vec");
+
+	int vec_rx = VECTOR_RX;
+	int vec_tx = VECTOR_TX;
+	long parsed;
+
+	if (vector != NULL) {
+		if (kstrtoul(vector, 10, &parsed) == 0) {
+			if (parsed == 0) {
+				vec_rx = 0;
+				vec_tx = 0;
+			}
+		}
+	}
+
+
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return (vec_rx | VECTOR_BPF);
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return (vec_rx | vec_tx);
+	return (vec_rx | vec_tx);
+}
+
+
+/* A mini-buffer for packet drop read
+ * All of our supported transports are datagram oriented and we always
+ * read using recvmsg or recvmmsg. If we pass a buffer which is smaller
+ * than the packet size it still counts as full packet read and will
+ * clean the incoming stream to keep sigio/epoll happy
+ */
+
+#define DROP_BUFFER_SIZE 32
+
+static char *drop_buffer;
+
+/* Array backed queues optimized for bulk enqueue/dequeue and
+ * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
+ * For more details and full design rationale see
+ * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
+ */
+
+
+/*
+ * Advance the mmsg queue head by n = advance. Resets the queue to
+ * maximum enqueue/dequeue-at-once capacity if possible. Called by
+ * dequeuers. Caller must hold the head_lock!
+ */
+
+static int vector_advancehead(struct vector_queue *qi, int advance)
+{
+	int queue_depth;
+
+	qi->head =
+		(qi->head + advance)
+			% qi->max_depth;
+
+
+	spin_lock(&qi->tail_lock);
+	qi->queue_depth -= advance;
+
+	/* we are at 0, use this to
+	 * reset head and tail so we can use max size vectors
+	 */
+
+	if (qi->queue_depth == 0) {
+		qi->head = 0;
+		qi->tail = 0;
+	}
+	queue_depth = qi->queue_depth;
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+}
+
+/*	Advance the queue tail by n = advance.
+ *	This is called by enqueuers which should hold the
+ *	head lock already
+ */
+
+static int vector_advancetail(struct vector_queue *qi, int advance)
+{
+	int queue_depth;
+
+	qi->tail =
+		(qi->tail + advance)
+			% qi->max_depth;
+	spin_lock(&qi->head_lock);
+	qi->queue_depth += advance;
+	queue_depth = qi->queue_depth;
+	spin_unlock(&qi->head_lock);
+	return queue_depth;
+}
+
+static int prep_msg(struct vector_private *vp,
+	struct sk_buff *skb,
+	struct iovec *iov)
+{
+	int iov_index = 0;
+	int nr_frags, frag;
+	skb_frag_t *skb_frag;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags > MAX_IOV_SIZE) {
+		if (skb_linearize(skb) != 0)
+			goto drop;
+	}
+	if (vp->header_size > 0) {
+		iov[iov_index].iov_len = vp->header_size;
+		vp->form_header(iov[iov_index].iov_base, skb, vp);
+		iov_index++;
+	}
+	iov[iov_index].iov_base = skb->data;
+	if (nr_frags > 0) {
+		iov[iov_index].iov_len = skb->len - skb->data_len;
+		vp->estats.sg_ok++;
+	} else
+		iov[iov_index].iov_len = skb->len;
+	iov_index++;
+	for (frag = 0; frag < nr_frags; frag++) {
+		skb_frag = &skb_shinfo(skb)->frags[frag];
+		iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+		iov[iov_index].iov_len = skb_frag_size(skb_frag);
+		iov_index++;
+	}
+	return iov_index;
+drop:
+	return -1;
+}
+/*
+ * Generic vector enqueue with support for forming headers using transport
+ * specific callback. Allows GRE, L2TPv3, RAW and other transports
+ * to use a common enqueue procedure in vector mode
+ */
+
+static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	int queue_depth;
+	int packet_len;
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	int iov_count;
+
+	spin_lock(&qi->tail_lock);
+	spin_lock(&qi->head_lock);
+	queue_depth = qi->queue_depth;
+	spin_unlock(&qi->head_lock);
+
+	if (skb)
+		packet_len = skb->len;
+
+	if (queue_depth < qi->max_depth) {
+
+		*(qi->skbuff_vector + qi->tail) = skb;
+		mmsg_vector += qi->tail;
+		iov_count = prep_msg(
+			vp,
+			skb,
+			mmsg_vector->msg_hdr.msg_iov
+		);
+		if (iov_count < 1)
+			goto drop;
+		mmsg_vector->msg_hdr.msg_iovlen = iov_count;
+		mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr;
+		mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size;
+		queue_depth = vector_advancetail(qi, 1);
+	} else
+		goto drop;
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+drop:
+	qi->dev->stats.tx_dropped++;
+	if (skb != NULL) {
+		packet_len = skb->len;
+		dev_consume_skb_any(skb);
+		netdev_completed_queue(qi->dev, 1, packet_len);
+	}
+	spin_unlock(&qi->tail_lock);
+	return queue_depth;
+}
+
+static int consume_vector_skbs(struct vector_queue *qi, int count)
+{
+	struct sk_buff *skb;
+	int skb_index;
+	int bytes_compl = 0;
+
+	for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) {
+		skb = *(qi->skbuff_vector + skb_index);
+		/* mark as empty to ensure correct destruction if
+		 * needed
+		 */
+		bytes_compl += skb->len;
+		*(qi->skbuff_vector + skb_index) = NULL;
+		dev_consume_skb_any(skb);
+	}
+	qi->dev->stats.tx_bytes += bytes_compl;
+	qi->dev->stats.tx_packets += count;
+	netdev_completed_queue(qi->dev, count, bytes_compl);
+	return vector_advancehead(qi, count);
+}
+
+/*
+ * Generic vector deque via sendmmsg with support for forming headers
+ * using transport specific callback. Allows GRE, L2TPv3, RAW and
+ * other transports to use a common dequeue procedure in vector mode
+ */
+
+
+static int vector_send(struct vector_queue *qi)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *send_from;
+	int result = 0, send_len, queue_depth = qi->max_depth;
+
+	if (spin_trylock(&qi->head_lock)) {
+		if (spin_trylock(&qi->tail_lock)) {
+			/* update queue_depth to current value */
+			queue_depth = qi->queue_depth;
+			spin_unlock(&qi->tail_lock);
+			while (queue_depth > 0) {
+				/* Calculate the start of the vector */
+				send_len = queue_depth;
+				send_from = qi->mmsg_vector;
+				send_from += qi->head;
+				/* Adjust vector size if wraparound */
+				if (send_len + qi->head > qi->max_depth)
+					send_len = qi->max_depth - qi->head;
+				/* Try to TX as many packets as possible */
+				if (send_len > 0) {
+					result = uml_vector_sendmmsg(
+						 vp->fds->tx_fd,
+						 send_from,
+						 send_len,
+						 0
+					);
+					vp->in_write_poll =
+						(result != send_len);
+				}
+				/* For some of the sendmmsg error scenarios
+				 * we may end being unsure in the TX success
+				 * for all packets. It is safer to declare
+				 * them all TX-ed and blame the network.
+				 */
+				if (result < 0) {
+					if (net_ratelimit())
+						netdev_err(vp->dev, "sendmmsg err=%i\n",
+							result);
+					result = send_len;
+				}
+				if (result > 0) {
+					queue_depth =
+						consume_vector_skbs(qi, result);
+					/* This is equivalent to an TX IRQ.
+					 * Restart the upper layers to feed us
+					 * more packets.
+					 */
+					if (result > vp->estats.tx_queue_max)
+						vp->estats.tx_queue_max = result;
+					vp->estats.tx_queue_running_average =
+						(vp->estats.tx_queue_running_average + result) >> 1;
+				}
+				netif_trans_update(qi->dev);
+				netif_wake_queue(qi->dev);
+				/* if TX is busy, break out of the send loop,
+				 *  poll write IRQ will reschedule xmit for us
+				 */
+				if (result != send_len) {
+					vp->estats.tx_restart_queue++;
+					break;
+				}
+			}
+		}
+		spin_unlock(&qi->head_lock);
+	} else {
+		tasklet_schedule(&vp->tx_poll);
+	}
+	return queue_depth;
+}
+
+/* Queue destructor. Deliberately stateless so we can use
+ * it in queue cleanup if initialization fails.
+ */
+
+static void destroy_queue(struct vector_queue *qi)
+{
+	int i;
+	struct iovec *iov;
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *mmsg_vector;
+
+	if (qi == NULL)
+		return;
+	/* deallocate any skbuffs - we rely on any unused to be
+	 * set to NULL.
+	 */
+	if (qi->skbuff_vector != NULL) {
+		for (i = 0; i < qi->max_depth; i++) {
+			if (*(qi->skbuff_vector + i) != NULL)
+				dev_kfree_skb_any(*(qi->skbuff_vector + i));
+		}
+		kfree(qi->skbuff_vector);
+	}
+	/* deallocate matching IOV structures including header buffs */
+	if (qi->mmsg_vector != NULL) {
+		mmsg_vector = qi->mmsg_vector;
+		for (i = 0; i < qi->max_depth; i++) {
+			iov = mmsg_vector->msg_hdr.msg_iov;
+			if (iov != NULL) {
+				if ((vp->header_size > 0) &&
+					(iov->iov_base != NULL))
+					kfree(iov->iov_base);
+				kfree(iov);
+			}
+			mmsg_vector++;
+		}
+		kfree(qi->mmsg_vector);
+	}
+	kfree(qi);
+}
+
+/*
+ * Queue constructor. Create a queue with a given side.
+ */
+static struct vector_queue *create_queue(
+	struct vector_private *vp,
+	int max_size,
+	int header_size,
+	int num_extra_frags)
+{
+	struct vector_queue *result;
+	int i;
+	struct iovec *iov;
+	struct mmsghdr *mmsg_vector;
+
+	result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL);
+	if (result == NULL)
+		goto out_fail;
+	result->max_depth = max_size;
+	result->dev = vp->dev;
+	result->mmsg_vector = kmalloc(
+		(sizeof(struct mmsghdr) * max_size), GFP_KERNEL);
+	result->skbuff_vector = kmalloc(
+		(sizeof(void *) * max_size), GFP_KERNEL);
+	if (result->mmsg_vector == NULL || result->skbuff_vector == NULL)
+		goto out_fail;
+
+	mmsg_vector = result->mmsg_vector;
+	for (i = 0; i < max_size; i++) {
+		/* Clear all pointers - we use non-NULL as marking on
+		 * what to free on destruction
+		 */
+		*(result->skbuff_vector + i) = NULL;
+		mmsg_vector->msg_hdr.msg_iov = NULL;
+		mmsg_vector++;
+	}
+	mmsg_vector = result->mmsg_vector;
+	result->max_iov_frags = num_extra_frags;
+	for (i = 0; i < max_size; i++) {
+		if (vp->header_size > 0)
+			iov = kmalloc(
+				sizeof(struct iovec) * (3 + num_extra_frags),
+				GFP_KERNEL
+			);
+		else
+			iov = kmalloc(
+				sizeof(struct iovec) * (2 + num_extra_frags),
+				GFP_KERNEL
+			);
+		if (iov == NULL)
+			goto out_fail;
+		mmsg_vector->msg_hdr.msg_iov = iov;
+		mmsg_vector->msg_hdr.msg_iovlen = 1;
+		mmsg_vector->msg_hdr.msg_control = NULL;
+		mmsg_vector->msg_hdr.msg_controllen = 0;
+		mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT;
+		mmsg_vector->msg_hdr.msg_name = NULL;
+		mmsg_vector->msg_hdr.msg_namelen = 0;
+		if (vp->header_size > 0) {
+			iov->iov_base = kmalloc(header_size, GFP_KERNEL);
+			if (iov->iov_base == NULL)
+				goto out_fail;
+			iov->iov_len = header_size;
+			mmsg_vector->msg_hdr.msg_iovlen = 2;
+			iov++;
+		}
+		iov->iov_base = NULL;
+		iov->iov_len = 0;
+		mmsg_vector++;
+	}
+	spin_lock_init(&result->head_lock);
+	spin_lock_init(&result->tail_lock);
+	result->queue_depth = 0;
+	result->head = 0;
+	result->tail = 0;
+	return result;
+out_fail:
+	destroy_queue(result);
+	return NULL;
+}
+
+/*
+ * We do not use the RX queue as a proper wraparound queue for now
+ * This is not necessary because the consumption via netif_rx()
+ * happens in-line. While we can try using the return code of
+ * netif_rx() for flow control there are no drivers doing this today.
+ * For this RX specific use we ignore the tail/head locks and
+ * just read into a prepared queue filled with skbuffs.
+ */
+
+static struct sk_buff *prep_skb(
+	struct vector_private *vp,
+	struct user_msghdr *msg)
+{
+	int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+	struct sk_buff *result;
+	int iov_index = 0, len;
+	struct iovec *iov = msg->msg_iov;
+	int err, nr_frags, frag;
+	skb_frag_t *skb_frag;
+
+	if (vp->req_size <= linear)
+		len = linear;
+	else
+		len = vp->req_size;
+	result = alloc_skb_with_frags(
+		linear,
+		len - vp->max_packet,
+		3,
+		&err,
+		GFP_ATOMIC
+	);
+	if (vp->header_size > 0)
+		iov_index++;
+	if (result == NULL) {
+		iov[iov_index].iov_base = NULL;
+		iov[iov_index].iov_len = 0;
+		goto done;
+	}
+	skb_reserve(result, vp->headroom);
+	result->dev = vp->dev;
+	skb_put(result, vp->max_packet);
+	result->data_len = len - vp->max_packet;
+	result->len += len - vp->max_packet;
+	skb_reset_mac_header(result);
+	result->ip_summed = CHECKSUM_NONE;
+	iov[iov_index].iov_base = result->data;
+	iov[iov_index].iov_len = vp->max_packet;
+	iov_index++;
+
+	nr_frags = skb_shinfo(result)->nr_frags;
+	for (frag = 0; frag < nr_frags; frag++) {
+		skb_frag = &skb_shinfo(result)->frags[frag];
+		iov[iov_index].iov_base = skb_frag_address_safe(skb_frag);
+		if (iov[iov_index].iov_base != NULL)
+			iov[iov_index].iov_len = skb_frag_size(skb_frag);
+		else
+			iov[iov_index].iov_len = 0;
+		iov_index++;
+	}
+done:
+	msg->msg_iovlen = iov_index;
+	return result;
+}
+
+
+/* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
+
+static void prep_queue_for_rx(struct vector_queue *qi)
+{
+	struct vector_private *vp = netdev_priv(qi->dev);
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	void **skbuff_vector = qi->skbuff_vector;
+	int i;
+
+	if (qi->queue_depth == 0)
+		return;
+	for (i = 0; i < qi->queue_depth; i++) {
+		/* it is OK if allocation fails - recvmmsg with NULL data in
+		 * iov argument still performs an RX, just drops the packet
+		 * This allows us stop faffing around with a "drop buffer"
+		 */
+
+		*skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr);
+		skbuff_vector++;
+		mmsg_vector++;
+	}
+	qi->queue_depth = 0;
+}
+
+static struct vector_device *find_device(int n)
+{
+	struct vector_device *device;
+	struct list_head *ele;
+
+	spin_lock(&vector_devices_lock);
+	list_for_each(ele, &vector_devices) {
+		device = list_entry(ele, struct vector_device, list);
+		if (device->unit == n)
+			goto out;
+	}
+	device = NULL;
+ out:
+	spin_unlock(&vector_devices_lock);
+	return device;
+}
+
+static int vector_parse(char *str, int *index_out, char **str_out,
+			char **error_out)
+{
+	int n, len, err;
+	char *start = str;
+
+	len = strlen(str);
+
+	while ((*str != ':') && (strlen(str) > 1))
+		str++;
+	if (*str != ':') {
+		*error_out = "Expected ':' after device number";
+		return -EINVAL;
+	}
+	*str = '\0';
+
+	err = kstrtouint(start, 0, &n);
+	if (err < 0) {
+		*error_out = "Bad device number";
+		return err;
+	}
+
+	str++;
+	if (find_device(n)) {
+		*error_out = "Device already configured";
+		return -EINVAL;
+	}
+
+	*index_out = n;
+	*str_out = str;
+	return 0;
+}
+
+static int vector_config(char *str, char **error_out)
+{
+	int err, n;
+	char *params;
+	struct arglist *parsed;
+
+	err = vector_parse(str, &n, &params, error_out);
+	if (err != 0)
+		return err;
+
+	/* This string is broken up and the pieces used by the underlying
+	 * driver. We should copy it to make sure things do not go wrong
+	 * later.
+	 */
+
+	params = kstrdup(params, GFP_KERNEL);
+	if (params == NULL) {
+		*error_out = "vector_config failed to strdup string";
+		return -ENOMEM;
+	}
+
+	parsed = uml_parse_vector_ifspec(params);
+
+	if (parsed == NULL) {
+		*error_out = "vector_config failed to parse parameters";
+		return -EINVAL;
+	}
+
+	vector_eth_configure(n, parsed);
+	return 0;
+}
+
+static int vector_id(char **str, int *start_out, int *end_out)
+{
+	char *end;
+	int n;
+
+	n = simple_strtoul(*str, &end, 0);
+	if ((*end != '\0') || (end == *str))
+		return -1;
+
+	*start_out = n;
+	*end_out = n;
+	*str = end;
+	return n;
+}
+
+static int vector_remove(int n, char **error_out)
+{
+	struct vector_device *vec_d;
+	struct net_device *dev;
+	struct vector_private *vp;
+
+	vec_d = find_device(n);
+	if (vec_d == NULL)
+		return -ENODEV;
+	dev = vec_d->dev;
+	vp = netdev_priv(dev);
+	if (vp->fds != NULL)
+		return -EBUSY;
+	unregister_netdev(dev);
+	platform_device_unregister(&vec_d->pdev);
+	return 0;
+}
+
+/*
+ * There is no shared per-transport initialization code, so
+ * we will just initialize each interface one by one and
+ * add them to a list
+ */
+
+static struct platform_driver uml_net_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+	},
+};
+
+
+static void vector_device_release(struct device *dev)
+{
+	struct vector_device *device = dev_get_drvdata(dev);
+	struct net_device *netdev = device->dev;
+
+	list_del(&device->list);
+	kfree(device);
+	free_netdev(netdev);
+}
+
+/* Bog standard recv using recvmsg - not used normally unless the user
+ * explicitly specifies not to use recvmmsg vector RX.
+ */
+
+static int vector_legacy_rx(struct vector_private *vp)
+{
+	int pkt_len;
+	struct user_msghdr hdr;
+	struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */
+	int iovpos = 0;
+	struct sk_buff *skb;
+	int header_check;
+
+	hdr.msg_name = NULL;
+	hdr.msg_namelen = 0;
+	hdr.msg_iov = (struct iovec *) &iov;
+	hdr.msg_control = NULL;
+	hdr.msg_controllen = 0;
+	hdr.msg_flags = 0;
+
+	if (vp->header_size > 0) {
+		iov[0].iov_base = vp->header_rxbuffer;
+		iov[0].iov_len = vp->header_size;
+	}
+
+	skb = prep_skb(vp, &hdr);
+
+	if (skb == NULL) {
+		/* Read a packet into drop_buffer and don't do
+		 * anything with it.
+		 */
+		iov[iovpos].iov_base = drop_buffer;
+		iov[iovpos].iov_len = DROP_BUFFER_SIZE;
+		hdr.msg_iovlen = 1;
+		vp->dev->stats.rx_dropped++;
+	}
+
+	pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0);
+
+	if (skb != NULL) {
+		if (pkt_len > vp->header_size) {
+			if (vp->header_size > 0) {
+				header_check = vp->verify_header(
+					vp->header_rxbuffer, skb, vp);
+				if (header_check < 0) {
+					dev_kfree_skb_irq(skb);
+					vp->dev->stats.rx_dropped++;
+					vp->estats.rx_encaps_errors++;
+					return 0;
+				}
+				if (header_check > 0) {
+					vp->estats.rx_csum_offload_good++;
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+				}
+			}
+			pskb_trim(skb, pkt_len - vp->rx_header_size);
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			vp->dev->stats.rx_bytes += skb->len;
+			vp->dev->stats.rx_packets++;
+			netif_rx(skb);
+		} else {
+			dev_kfree_skb_irq(skb);
+		}
+	}
+	return pkt_len;
+}
+
+/*
+ * Packet at a time TX which falls back to vector TX if the
+ * underlying transport is busy.
+ */
+
+
+
+static int writev_tx(struct vector_private *vp, struct sk_buff *skb)
+{
+	struct iovec iov[3 + MAX_IOV_SIZE];
+	int iov_count, pkt_len = 0;
+
+	iov[0].iov_base = vp->header_txbuffer;
+	iov_count = prep_msg(vp, skb, (struct iovec *) &iov);
+
+	if (iov_count < 1)
+		goto drop;
+	pkt_len = uml_vector_writev(
+		vp->fds->tx_fd,
+		(struct iovec *) &iov,
+		iov_count
+	);
+
+	netif_trans_update(vp->dev);
+	netif_wake_queue(vp->dev);
+
+	if (pkt_len > 0) {
+		vp->dev->stats.tx_bytes += skb->len;
+		vp->dev->stats.tx_packets++;
+	} else {
+		vp->dev->stats.tx_dropped++;
+	}
+	consume_skb(skb);
+	return pkt_len;
+drop:
+	vp->dev->stats.tx_dropped++;
+	consume_skb(skb);
+	return pkt_len;
+}
+
+/*
+ * Receive as many messages as we can in one call using the special
+ * mmsg vector matched to an skb vector which we prepared earlier.
+ */
+
+static int vector_mmsg_rx(struct vector_private *vp)
+{
+	int packet_count, i;
+	struct vector_queue *qi = vp->rx_queue;
+	struct sk_buff *skb;
+	struct mmsghdr *mmsg_vector = qi->mmsg_vector;
+	void **skbuff_vector = qi->skbuff_vector;
+	int header_check;
+
+	/* Refresh the vector and make sure it is with new skbs and the
+	 * iovs are updated to point to them.
+	 */
+
+	prep_queue_for_rx(qi);
+
+	/* Fire the Lazy Gun - get as many packets as we can in one go. */
+
+	packet_count = uml_vector_recvmmsg(
+		vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0);
+
+	if (packet_count <= 0)
+		return packet_count;
+
+	/* We treat packet processing as enqueue, buffer refresh as dequeue
+	 * The queue_depth tells us how many buffers have been used and how
+	 * many do we need to prep the next time prep_queue_for_rx() is called.
+	 */
+
+	qi->queue_depth = packet_count;
+
+	for (i = 0; i < packet_count; i++) {
+		skb = (*skbuff_vector);
+		if (mmsg_vector->msg_len > vp->header_size) {
+			if (vp->header_size > 0) {
+				header_check = vp->verify_header(
+					mmsg_vector->msg_hdr.msg_iov->iov_base,
+					skb,
+					vp
+				);
+				if (header_check < 0) {
+				/* Overlay header failed to verify - discard.
+				 * We can actually keep this skb and reuse it,
+				 * but that will make the prep logic too
+				 * complex.
+				 */
+					dev_kfree_skb_irq(skb);
+					vp->estats.rx_encaps_errors++;
+					continue;
+				}
+				if (header_check > 0) {
+					vp->estats.rx_csum_offload_good++;
+					skb->ip_summed = CHECKSUM_UNNECESSARY;
+				}
+			}
+			pskb_trim(skb,
+				mmsg_vector->msg_len - vp->rx_header_size);
+			skb->protocol = eth_type_trans(skb, skb->dev);
+			/*
+			 * We do not need to lock on updating stats here
+			 * The interrupt loop is non-reentrant.
+			 */
+			vp->dev->stats.rx_bytes += skb->len;
+			vp->dev->stats.rx_packets++;
+			netif_rx(skb);
+		} else {
+			/* Overlay header too short to do anything - discard.
+			 * We can actually keep this skb and reuse it,
+			 * but that will make the prep logic too complex.
+			 */
+			if (skb != NULL)
+				dev_kfree_skb_irq(skb);
+		}
+		(*skbuff_vector) = NULL;
+		/* Move to the next buffer element */
+		mmsg_vector++;
+		skbuff_vector++;
+	}
+	if (packet_count > 0) {
+		if (vp->estats.rx_queue_max < packet_count)
+			vp->estats.rx_queue_max = packet_count;
+		vp->estats.rx_queue_running_average =
+			(vp->estats.rx_queue_running_average + packet_count) >> 1;
+	}
+	return packet_count;
+}
+
+static void vector_rx(struct vector_private *vp)
+{
+	int err;
+
+	if ((vp->options & VECTOR_RX) > 0)
+		while ((err = vector_mmsg_rx(vp)) > 0)
+			;
+	else
+		while ((err = vector_legacy_rx(vp)) > 0)
+			;
+	if ((err != 0) && net_ratelimit())
+		netdev_err(vp->dev, "vector_rx: error(%d)\n", err);
+}
+
+static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	int queue_depth = 0;
+
+	if ((vp->options & VECTOR_TX) == 0) {
+		writev_tx(vp, skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* We do BQL only in the vector path, no point doing it in
+	 * packet at a time mode as there is no device queue
+	 */
+
+	netdev_sent_queue(vp->dev, skb->len);
+	queue_depth = vector_enqueue(vp->tx_queue, skb);
+
+	/* if the device queue is full, stop the upper layers and
+	 * flush it.
+	 */
+
+	if (queue_depth >= vp->tx_queue->max_depth - 1) {
+		vp->estats.tx_kicks++;
+		netif_stop_queue(dev);
+		vector_send(vp->tx_queue);
+		return NETDEV_TX_OK;
+	}
+	if (skb->xmit_more) {
+		mod_timer(&vp->tl, vp->coalesce);
+		return NETDEV_TX_OK;
+	}
+	if (skb->len < TX_SMALL_PACKET) {
+		vp->estats.tx_kicks++;
+		vector_send(vp->tx_queue);
+	} else
+		tasklet_schedule(&vp->tx_poll);
+	return NETDEV_TX_OK;
+}
+
+static irqreturn_t vector_rx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct vector_private *vp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return IRQ_NONE;
+	vector_rx(vp);
+	return IRQ_HANDLED;
+
+}
+
+static irqreturn_t vector_tx_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct vector_private *vp = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return IRQ_NONE;
+	/* We need to pay attention to it only if we got
+	 * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise
+	 * we ignore it. In the future, it may be worth
+	 * it to improve the IRQ controller a bit to make
+	 * tweaking the IRQ mask less costly
+	 */
+
+	if (vp->in_write_poll)
+		tasklet_schedule(&vp->tx_poll);
+	return IRQ_HANDLED;
+
+}
+
+static int irq_rr;
+
+static int vector_net_close(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	unsigned long flags;
+
+	netif_stop_queue(dev);
+	del_timer(&vp->tl);
+
+	if (vp->fds == NULL)
+		return 0;
+
+	/* Disable and free all IRQS */
+	if (vp->rx_irq > 0) {
+		um_free_irq(vp->rx_irq, dev);
+		vp->rx_irq = 0;
+	}
+	if (vp->tx_irq > 0) {
+		um_free_irq(vp->tx_irq, dev);
+		vp->tx_irq = 0;
+	}
+	tasklet_kill(&vp->tx_poll);
+	if (vp->fds->rx_fd > 0) {
+		os_close_file(vp->fds->rx_fd);
+		vp->fds->rx_fd = -1;
+	}
+	if (vp->fds->tx_fd > 0) {
+		os_close_file(vp->fds->tx_fd);
+		vp->fds->tx_fd = -1;
+	}
+	if (vp->bpf != NULL)
+		kfree(vp->bpf);
+	if (vp->fds->remote_addr != NULL)
+		kfree(vp->fds->remote_addr);
+	if (vp->transport_data != NULL)
+		kfree(vp->transport_data);
+	if (vp->header_rxbuffer != NULL)
+		kfree(vp->header_rxbuffer);
+	if (vp->header_txbuffer != NULL)
+		kfree(vp->header_txbuffer);
+	if (vp->rx_queue != NULL)
+		destroy_queue(vp->rx_queue);
+	if (vp->tx_queue != NULL)
+		destroy_queue(vp->tx_queue);
+	kfree(vp->fds);
+	vp->fds = NULL;
+	spin_lock_irqsave(&vp->lock, flags);
+	vp->opened = false;
+	spin_unlock_irqrestore(&vp->lock, flags);
+	return 0;
+}
+
+/* TX tasklet */
+
+static void vector_tx_poll(unsigned long data)
+{
+	struct vector_private *vp = (struct vector_private *)data;
+
+	vp->estats.tx_kicks++;
+	vector_send(vp->tx_queue);
+}
+static void vector_reset_tx(struct work_struct *work)
+{
+	struct vector_private *vp =
+		container_of(work, struct vector_private, reset_tx);
+	netdev_reset_queue(vp->dev);
+	netif_start_queue(vp->dev);
+	netif_wake_queue(vp->dev);
+}
+static int vector_net_open(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	unsigned long flags;
+	int err = -EINVAL;
+	struct vector_device *vdevice;
+
+	spin_lock_irqsave(&vp->lock, flags);
+	if (vp->opened) {
+		spin_unlock_irqrestore(&vp->lock, flags);
+		return -ENXIO;
+	}
+	vp->opened = true;
+	spin_unlock_irqrestore(&vp->lock, flags);
+
+	vp->fds = uml_vector_user_open(vp->unit, vp->parsed);
+
+	if (vp->fds == NULL)
+		goto out_close;
+
+	if (build_transport_data(vp) < 0)
+		goto out_close;
+
+	if ((vp->options & VECTOR_RX) > 0) {
+		vp->rx_queue = create_queue(
+			vp,
+			get_depth(vp->parsed),
+			vp->rx_header_size,
+			MAX_IOV_SIZE
+		);
+		vp->rx_queue->queue_depth = get_depth(vp->parsed);
+	} else {
+		vp->header_rxbuffer = kmalloc(
+			vp->rx_header_size,
+			GFP_KERNEL
+		);
+		if (vp->header_rxbuffer == NULL)
+			goto out_close;
+	}
+	if ((vp->options & VECTOR_TX) > 0) {
+		vp->tx_queue = create_queue(
+			vp,
+			get_depth(vp->parsed),
+			vp->header_size,
+			MAX_IOV_SIZE
+		);
+	} else {
+		vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL);
+		if (vp->header_txbuffer == NULL)
+			goto out_close;
+	}
+
+	/* READ IRQ */
+	err = um_request_irq(
+		irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd,
+			IRQ_READ, vector_rx_interrupt,
+			IRQF_SHARED, dev->name, dev);
+	if (err != 0) {
+		netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err);
+		err = -ENETUNREACH;
+		goto out_close;
+	}
+	vp->rx_irq = irq_rr + VECTOR_BASE_IRQ;
+	dev->irq = irq_rr + VECTOR_BASE_IRQ;
+	irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+
+	/* WRITE IRQ - we need it only if we have vector TX */
+	if ((vp->options & VECTOR_TX) > 0) {
+		err = um_request_irq(
+			irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd,
+				IRQ_WRITE, vector_tx_interrupt,
+				IRQF_SHARED, dev->name, dev);
+		if (err != 0) {
+			netdev_err(dev,
+				"vector_open: failed to get tx irq(%d)\n", err);
+			err = -ENETUNREACH;
+			goto out_close;
+		}
+		vp->tx_irq = irq_rr + VECTOR_BASE_IRQ;
+		irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE;
+	}
+
+	if ((vp->options & VECTOR_QDISC_BYPASS) != 0) {
+		if (!uml_raw_enable_qdisc_bypass(vp->fds->rx_fd))
+			vp->options = vp->options | VECTOR_BPF;
+	}
+
+	if ((vp->options & VECTOR_BPF) != 0)
+		vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr);
+
+	netif_start_queue(dev);
+
+	/* clear buffer - it can happen that the host side of the interface
+	 * is full when we get here. In this case, new data is never queued,
+	 * SIGIOs never arrive, and the net never works.
+	 */
+
+	vector_rx(vp);
+
+	vector_reset_stats(vp);
+	vdevice = find_device(vp->unit);
+	vdevice->opened = 1;
+
+	if ((vp->options & VECTOR_TX) != 0)
+		add_timer(&vp->tl);
+	return 0;
+out_close:
+	vector_net_close(dev);
+	return err;
+}
+
+
+static void vector_net_set_multicast_list(struct net_device *dev)
+{
+	/* TODO: - we can do some BPF games here */
+	return;
+}
+
+static void vector_net_tx_timeout(struct net_device *dev)
+{
+	struct vector_private *vp = netdev_priv(dev);
+
+	vp->estats.tx_timeout_count++;
+	netif_trans_update(dev);
+	schedule_work(&vp->reset_tx);
+}
+
+static netdev_features_t vector_fix_features(struct net_device *dev,
+	netdev_features_t features)
+{
+	features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+	return features;
+}
+
+static int vector_set_features(struct net_device *dev,
+	netdev_features_t features)
+{
+	struct vector_private *vp = netdev_priv(dev);
+	/* Adjust buffer sizes for GSO/GRO. Unfortunately, there is
+	 * no way to negotiate it on raw sockets, so we can change
+	 * only our side.
+	 */
+	if (features & NETIF_F_GRO)
+		/* All new frame buffers will be GRO-sized */
+		vp->req_size = 65536;
+	else
+		/* All new frame buffers will be normal sized */
+		vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN;
+	return 0;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void vector_net_poll_controller(struct net_device *dev)
+{
+	disable_irq(dev->irq);
+	vector_rx_interrupt(dev->irq, dev);
+	enable_irq(dev->irq);
+}
+#endif
+
+static void vector_net_get_drvinfo(struct net_device *dev,
+				struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRIVER_VERSION, sizeof(info->version));
+}
+
+static void vector_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	ring->rx_max_pending = vp->rx_queue->max_depth;
+	ring->tx_max_pending = vp->tx_queue->max_depth;
+	ring->rx_pending = vp->rx_queue->max_depth;
+	ring->tx_pending = vp->tx_queue->max_depth;
+}
+
+static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		*buf = '\0';
+		break;
+	case ETH_SS_STATS:
+		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+static int vector_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return 0;
+	case ETH_SS_STATS:
+		return VECTOR_NUM_STATS;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void vector_get_ethtool_stats(struct net_device *dev,
+	struct ethtool_stats *estats,
+	u64 *tmp_stats)
+{
+	struct vector_private *vp = netdev_priv(dev);
+
+	memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats));
+}
+
+static int vector_get_coalesce(struct net_device *netdev,
+					struct ethtool_coalesce *ec)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ;
+	return 0;
+}
+
+static int vector_set_coalesce(struct net_device *netdev,
+					struct ethtool_coalesce *ec)
+{
+	struct vector_private *vp = netdev_priv(netdev);
+
+	vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000;
+	if (vp->coalesce == 0)
+		vp->coalesce = 1;
+	return 0;
+}
+
+static const struct ethtool_ops vector_net_ethtool_ops = {
+	.get_drvinfo	= vector_net_get_drvinfo,
+	.get_link	= ethtool_op_get_link,
+	.get_ts_info	= ethtool_op_get_ts_info,
+	.get_ringparam	= vector_get_ringparam,
+	.get_strings	= vector_get_strings,
+	.get_sset_count	= vector_get_sset_count,
+	.get_ethtool_stats = vector_get_ethtool_stats,
+	.get_coalesce	= vector_get_coalesce,
+	.set_coalesce	= vector_set_coalesce,
+};
+
+
+static const struct net_device_ops vector_netdev_ops = {
+	.ndo_open		= vector_net_open,
+	.ndo_stop		= vector_net_close,
+	.ndo_start_xmit		= vector_net_start_xmit,
+	.ndo_set_rx_mode	= vector_net_set_multicast_list,
+	.ndo_tx_timeout		= vector_net_tx_timeout,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_fix_features	= vector_fix_features,
+	.ndo_set_features	= vector_set_features,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = vector_net_poll_controller,
+#endif
+};
+
+
+static void vector_timer_expire(struct timer_list *t)
+{
+	struct vector_private *vp = from_timer(vp, t, tl);
+
+	vp->estats.tx_kicks++;
+	vector_send(vp->tx_queue);
+}
+
+static void vector_eth_configure(
+		int n,
+		struct arglist *def
+	)
+{
+	struct vector_device *device;
+	struct net_device *dev;
+	struct vector_private *vp;
+	int err;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (device == NULL) {
+		printk(KERN_ERR "eth_configure failed to allocate struct "
+				 "vector_device\n");
+		return;
+	}
+	dev = alloc_etherdev(sizeof(struct vector_private));
+	if (dev == NULL) {
+		printk(KERN_ERR "eth_configure: failed to allocate struct "
+				 "net_device for vec%d\n", n);
+		goto out_free_device;
+	}
+
+	dev->mtu = get_mtu(def);
+
+	INIT_LIST_HEAD(&device->list);
+	device->unit = n;
+
+	/* If this name ends up conflicting with an existing registered
+	 * netdevice, that is OK, register_netdev{,ice}() will notice this
+	 * and fail.
+	 */
+	snprintf(dev->name, sizeof(dev->name), "vec%d", n);
+	uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
+	vp = netdev_priv(dev);
+
+	/* sysfs register */
+	if (!driver_registered) {
+		platform_driver_register(&uml_net_driver);
+		driver_registered = 1;
+	}
+	device->pdev.id = n;
+	device->pdev.name = DRIVER_NAME;
+	device->pdev.dev.release = vector_device_release;
+	dev_set_drvdata(&device->pdev.dev, device);
+	if (platform_device_register(&device->pdev))
+		goto out_free_netdev;
+	SET_NETDEV_DEV(dev, &device->pdev.dev);
+
+	device->dev = dev;
+
+	*vp = ((struct vector_private)
+		{
+		.list			= LIST_HEAD_INIT(vp->list),
+		.dev			= dev,
+		.unit			= n,
+		.options		= get_transport_options(def),
+		.rx_irq			= 0,
+		.tx_irq			= 0,
+		.parsed			= def,
+		.max_packet		= get_mtu(def) + ETH_HEADER_OTHER,
+		/* TODO - we need to calculate headroom so that ip header
+		 * is 16 byte aligned all the time
+		 */
+		.headroom		= get_headroom(def),
+		.form_header		= NULL,
+		.verify_header		= NULL,
+		.header_rxbuffer	= NULL,
+		.header_txbuffer	= NULL,
+		.header_size		= 0,
+		.rx_header_size		= 0,
+		.rexmit_scheduled	= false,
+		.opened			= false,
+		.transport_data		= NULL,
+		.in_write_poll		= false,
+		.coalesce		= 2,
+		.req_size		= get_req_size(def)
+		});
+
+	dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST);
+	tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp);
+	INIT_WORK(&vp->reset_tx, vector_reset_tx);
+
+	timer_setup(&vp->tl, vector_timer_expire, 0);
+	spin_lock_init(&vp->lock);
+
+	/* FIXME */
+	dev->netdev_ops = &vector_netdev_ops;
+	dev->ethtool_ops = &vector_net_ethtool_ops;
+	dev->watchdog_timeo = (HZ >> 1);
+	/* primary IRQ - fixme */
+	dev->irq = 0; /* we will adjust this once opened */
+
+	rtnl_lock();
+	err = register_netdevice(dev);
+	rtnl_unlock();
+	if (err)
+		goto out_undo_user_init;
+
+	spin_lock(&vector_devices_lock);
+	list_add(&device->list, &vector_devices);
+	spin_unlock(&vector_devices_lock);
+
+	return;
+
+out_undo_user_init:
+	return;
+out_free_netdev:
+	free_netdev(dev);
+out_free_device:
+	kfree(device);
+}
+
+
+
+
+/*
+ * Invoked late in the init
+ */
+
+static int __init vector_init(void)
+{
+	struct list_head *ele;
+	struct vector_cmd_line_arg *def;
+	struct arglist *parsed;
+
+	list_for_each(ele, &vec_cmd_line) {
+		def = list_entry(ele, struct vector_cmd_line_arg, list);
+		parsed = uml_parse_vector_ifspec(def->arguments);
+		if (parsed != NULL)
+			vector_eth_configure(def->unit, parsed);
+	}
+	return 0;
+}
+
+
+/* Invoked at initial argument parsing, only stores
+ * arguments until a proper vector_init is called
+ * later
+ */
+
+static int __init vector_setup(char *str)
+{
+	char *error;
+	int n, err;
+	struct vector_cmd_line_arg *new;
+
+	err = vector_parse(str, &n, &str, &error);
+	if (err) {
+		printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
+				 str, error);
+		return 1;
+	}
+	new = alloc_bootmem(sizeof(*new));
+	INIT_LIST_HEAD(&new->list);
+	new->unit = n;
+	new->arguments = str;
+	list_add_tail(&new->list, &vec_cmd_line);
+	return 1;
+}
+
+__setup("vec", vector_setup);
+__uml_help(vector_setup,
+"vec[0-9]+:<option>=<value>,<option>=<value>\n"
+"	 Configure a vector io network device.\n\n"
+);
+
+late_initcall(vector_init);
+
+static struct mc_device vector_mc = {
+	.list		= LIST_HEAD_INIT(vector_mc.list),
+	.name		= "vec",
+	.config		= vector_config,
+	.get_config	= NULL,
+	.id		= vector_id,
+	.remove		= vector_remove,
+};
+
+#ifdef CONFIG_INET
+static int vector_inetaddr_event(
+	struct notifier_block *this,
+	unsigned long event,
+	void *ptr)
+{
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vector_inetaddr_notifier = {
+	.notifier_call		= vector_inetaddr_event,
+};
+
+static void inet_register(void)
+{
+	register_inetaddr_notifier(&vector_inetaddr_notifier);
+}
+#else
+static inline void inet_register(void)
+{
+}
+#endif
+
+static int vector_net_init(void)
+{
+	mconsole_register_dev(&vector_mc);
+	inet_register();
+	return 0;
+}
+
+__initcall(vector_net_init);
+
+
+
diff --git a/arch/um/drivers/vector_kern.h b/arch/um/drivers/vector_kern.h
new file mode 100644
index 000000000000..0b0a767b9076
--- /dev/null
+++ b/arch/um/drivers/vector_kern.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_KERN_H
+#define __UM_VECTOR_KERN_H
+
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/list.h>
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include "vector_user.h"
+
+/* Queue structure specially adapted for multiple enqueue/dequeue
+ * in a mmsgrecv/mmsgsend context
+ */
+
+/* Dequeue method */
+
+#define QUEUE_SENDMSG 0
+#define QUEUE_SENDMMSG 1
+
+#define VECTOR_RX 1
+#define VECTOR_TX (1 << 1)
+#define VECTOR_BPF (1 << 2)
+#define VECTOR_QDISC_BYPASS (1 << 3)
+
+#define ETH_MAX_PACKET 1500
+#define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */
+
+struct vector_queue {
+	struct mmsghdr *mmsg_vector;
+	void **skbuff_vector;
+	 /* backlink to device which owns us */
+	struct net_device *dev;
+	spinlock_t head_lock;
+	spinlock_t tail_lock;
+	int queue_depth, head, tail, max_depth, max_iov_frags;
+	short options;
+};
+
+struct vector_estats {
+	uint64_t rx_queue_max;
+	uint64_t rx_queue_running_average;
+	uint64_t tx_queue_max;
+	uint64_t tx_queue_running_average;
+	uint64_t rx_encaps_errors;
+	uint64_t tx_timeout_count;
+	uint64_t tx_restart_queue;
+	uint64_t tx_kicks;
+	uint64_t tx_flow_control_xon;
+	uint64_t tx_flow_control_xoff;
+	uint64_t rx_csum_offload_good;
+	uint64_t rx_csum_offload_errors;
+	uint64_t sg_ok;
+	uint64_t sg_linearized;
+};
+
+#define VERIFY_HEADER_NOK -1
+#define VERIFY_HEADER_OK 0
+#define VERIFY_CSUM_OK 1
+
+struct vector_private {
+	struct list_head list;
+	spinlock_t lock;
+	struct net_device *dev;
+
+	int unit;
+
+	/* Timeout timer in TX */
+
+	struct timer_list tl;
+
+	/* Scheduled "remove device" work */
+	struct work_struct reset_tx;
+	struct vector_fds *fds;
+
+	struct vector_queue *rx_queue;
+	struct vector_queue *tx_queue;
+
+	int rx_irq;
+	int tx_irq;
+
+	struct arglist *parsed;
+
+	void *transport_data; /* transport specific params if needed */
+
+	int max_packet;
+	int req_size; /* different from max packet - used for TSO */
+	int headroom;
+
+	int options;
+
+	/* remote address if any - some transports will leave this as null */
+
+	int header_size;
+	int rx_header_size;
+	int coalesce;
+
+	void *header_rxbuffer;
+	void *header_txbuffer;
+
+	int (*form_header)(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp);
+	int (*verify_header)(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp);
+
+	spinlock_t stats_lock;
+
+	struct tasklet_struct tx_poll;
+	bool rexmit_scheduled;
+	bool opened;
+	bool in_write_poll;
+
+	/* ethtool stats */
+
+	struct vector_estats estats;
+	void *bpf;
+
+	char user[0];
+};
+
+extern int build_transport_data(struct vector_private *vp);
+
+#endif
diff --git a/arch/um/drivers/vector_transports.c b/arch/um/drivers/vector_transports.c
new file mode 100644
index 000000000000..9065047f844b
--- /dev/null
+++ b/arch/um/drivers/vector_transports.c
@@ -0,0 +1,458 @@
+/*
+ * Copyright (C) 2017 - Cambridge Greys Limited
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
+ * Licensed under the GPL.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <asm/byteorder.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/virtio_net.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_byteorder.h>
+#include <linux/netdev_features.h>
+#include "vector_user.h"
+#include "vector_kern.h"
+
+#define GOOD_LINEAR 512
+#define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface"
+
+struct gre_minimal_header {
+	uint16_t header;
+	uint16_t arptype;
+};
+
+
+struct uml_gre_data {
+	uint32_t rx_key;
+	uint32_t tx_key;
+	uint32_t sequence;
+
+	bool ipv6;
+	bool has_sequence;
+	bool pin_sequence;
+	bool checksum;
+	bool key;
+	struct gre_minimal_header expected_header;
+
+	uint32_t checksum_offset;
+	uint32_t key_offset;
+	uint32_t sequence_offset;
+
+};
+
+struct uml_l2tpv3_data {
+	uint64_t rx_cookie;
+	uint64_t tx_cookie;
+	uint64_t rx_session;
+	uint64_t tx_session;
+	uint32_t counter;
+
+	bool udp;
+	bool ipv6;
+	bool has_counter;
+	bool pin_counter;
+	bool cookie;
+	bool cookie_is_64;
+
+	uint32_t cookie_offset;
+	uint32_t session_offset;
+	uint32_t counter_offset;
+};
+
+static int l2tpv3_form_header(uint8_t *header,
+	struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_l2tpv3_data *td = vp->transport_data;
+	uint32_t *counter;
+
+	if (td->udp)
+		*(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET);
+	(*(uint32_t *) (header + td->session_offset)) = td->tx_session;
+
+	if (td->cookie) {
+		if (td->cookie_is_64)
+			(*(uint64_t *)(header + td->cookie_offset)) =
+				td->tx_cookie;
+		else
+			(*(uint32_t *)(header + td->cookie_offset)) =
+				td->tx_cookie;
+	}
+	if (td->has_counter) {
+		counter = (uint32_t *)(header + td->counter_offset);
+		if (td->pin_counter) {
+			*counter = 0;
+		} else {
+			td->counter++;
+			*counter = cpu_to_be32(td->counter);
+		}
+	}
+	return 0;
+}
+
+static int gre_form_header(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_gre_data *td = vp->transport_data;
+	uint32_t *sequence;
+	*((uint32_t *) header) = *((uint32_t *) &td->expected_header);
+	if (td->key)
+		(*(uint32_t *) (header + td->key_offset)) = td->tx_key;
+	if (td->has_sequence) {
+		sequence = (uint32_t *)(header + td->sequence_offset);
+		if (td->pin_sequence)
+			*sequence = 0;
+		else
+			*sequence = cpu_to_be32(++td->sequence);
+	}
+	return 0;
+}
+
+static int raw_form_header(uint8_t *header,
+		struct sk_buff *skb, struct vector_private *vp)
+{
+	struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+	virtio_net_hdr_from_skb(
+		skb,
+		vheader,
+		virtio_legacy_is_little_endian(),
+		false
+	);
+
+	return 0;
+}
+
+static int l2tpv3_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+	struct uml_l2tpv3_data *td = vp->transport_data;
+	uint32_t *session;
+	uint64_t cookie;
+
+	if ((!td->udp) && (!td->ipv6))
+		header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+	/* we do not do a strict check for "data" packets as per
+	 * the RFC spec because the pure IP spec does not have
+	 * that anyway.
+	 */
+
+	if (td->cookie) {
+		if (td->cookie_is_64)
+			cookie = *(uint64_t *)(header + td->cookie_offset);
+		else
+			cookie = *(uint32_t *)(header + td->cookie_offset);
+		if (cookie != td->rx_cookie) {
+			if (net_ratelimit())
+				netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id");
+			return -1;
+		}
+	}
+	session = (uint32_t *) (header + td->session_offset);
+	if (*session != td->rx_session) {
+		if (net_ratelimit())
+			netdev_err(vp->dev, "uml_l2tpv3: session mismatch");
+		return -1;
+	}
+	return 0;
+}
+
+static int gre_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+
+	uint32_t key;
+	struct uml_gre_data *td = vp->transport_data;
+
+	if (!td->ipv6)
+		header += sizeof(struct iphdr) /* fix for ipv4 raw */;
+
+	if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) {
+		if (net_ratelimit())
+			netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x",
+				*((uint32_t *) &td->expected_header),
+				*((uint32_t *) header)
+			);
+		return -1;
+	}
+
+	if (td->key) {
+		key = (*(uint32_t *)(header + td->key_offset));
+		if (key != td->rx_key) {
+			if (net_ratelimit())
+				netdev_err(vp->dev, "unknown key id %0x, expecting %0x",
+						key, td->rx_key);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int raw_verify_header(
+	uint8_t *header, struct sk_buff *skb, struct vector_private *vp)
+{
+	struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header;
+
+	if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) &&
+		(vp->req_size != 65536)) {
+		if (net_ratelimit())
+			netdev_err(
+				vp->dev,
+				GSO_ERROR
+		);
+	}
+	if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0)
+		return 1;
+
+	virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian());
+	return 0;
+}
+
+static bool get_uint_param(
+	struct arglist *def, char *param, unsigned int *result)
+{
+	char *arg = uml_vector_fetch_arg(def, param);
+
+	if (arg != NULL) {
+		if (kstrtoint(arg, 0, result) == 0)
+			return true;
+	}
+	return false;
+}
+
+static bool get_ulong_param(
+	struct arglist *def, char *param, unsigned long *result)
+{
+	char *arg = uml_vector_fetch_arg(def, param);
+
+	if (arg != NULL) {
+		if (kstrtoul(arg, 0, result) == 0)
+			return true;
+		return true;
+	}
+	return false;
+}
+
+static int build_gre_transport_data(struct vector_private *vp)
+{
+	struct uml_gre_data *td;
+	int temp_int;
+	int temp_rx;
+	int temp_tx;
+
+	vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL);
+	if (vp->transport_data == NULL)
+		return -ENOMEM;
+	td = vp->transport_data;
+	td->sequence = 0;
+
+	td->expected_header.arptype = GRE_IRB;
+	td->expected_header.header = 0;
+
+	vp->form_header = &gre_form_header;
+	vp->verify_header = &gre_verify_header;
+	vp->header_size = 4;
+	td->key_offset = 4;
+	td->sequence_offset = 4;
+	td->checksum_offset = 4;
+
+	td->ipv6 = false;
+	if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+		if (temp_int > 0)
+			td->ipv6 = true;
+	}
+	td->key = false;
+	if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) {
+		if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) {
+			td->key = true;
+			td->expected_header.header |= GRE_MODE_KEY;
+			td->rx_key = cpu_to_be32(temp_rx);
+			td->tx_key = cpu_to_be32(temp_tx);
+			vp->header_size += 4;
+			td->sequence_offset += 4;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	td->sequence = false;
+	if (get_uint_param(vp->parsed, "sequence", &temp_int)) {
+		if (temp_int > 0) {
+			vp->header_size += 4;
+			td->has_sequence = true;
+			td->expected_header.header |= GRE_MODE_SEQUENCE;
+			if (get_uint_param(
+				vp->parsed, "pin_sequence", &temp_int)) {
+				if (temp_int > 0)
+					td->pin_sequence = true;
+			}
+		}
+	}
+	vp->rx_header_size = vp->header_size;
+	if (!td->ipv6)
+		vp->rx_header_size += sizeof(struct iphdr);
+	return 0;
+}
+
+static int build_l2tpv3_transport_data(struct vector_private *vp)
+{
+
+	struct uml_l2tpv3_data *td;
+	int temp_int, temp_rxs, temp_txs;
+	unsigned long temp_rx;
+	unsigned long temp_tx;
+
+	vp->transport_data = kmalloc(
+		sizeof(struct uml_l2tpv3_data), GFP_KERNEL);
+
+	if (vp->transport_data == NULL)
+		return -ENOMEM;
+
+	td = vp->transport_data;
+
+	vp->form_header = &l2tpv3_form_header;
+	vp->verify_header = &l2tpv3_verify_header;
+	td->counter = 0;
+
+	vp->header_size = 4;
+	td->session_offset = 0;
+	td->cookie_offset = 4;
+	td->counter_offset = 4;
+
+
+	td->ipv6 = false;
+	if (get_uint_param(vp->parsed, "v6", &temp_int)) {
+		if (temp_int > 0)
+			td->ipv6 = true;
+	}
+
+	if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) {
+		if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) {
+			td->tx_session = cpu_to_be32(temp_txs);
+			td->rx_session = cpu_to_be32(temp_rxs);
+		} else {
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	td->cookie_is_64  = false;
+	if (get_uint_param(vp->parsed, "cookie64", &temp_int)) {
+		if (temp_int > 0)
+			td->cookie_is_64  = true;
+	}
+	td->cookie = false;
+	if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) {
+		if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) {
+			td->cookie = true;
+			if (td->cookie_is_64) {
+				td->rx_cookie = cpu_to_be64(temp_rx);
+				td->tx_cookie = cpu_to_be64(temp_tx);
+				vp->header_size += 8;
+				td->counter_offset += 8;
+			} else {
+				td->rx_cookie = cpu_to_be32(temp_rx);
+				td->tx_cookie = cpu_to_be32(temp_tx);
+				vp->header_size += 4;
+				td->counter_offset += 4;
+			}
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	td->has_counter = false;
+	if (get_uint_param(vp->parsed, "counter", &temp_int)) {
+		if (temp_int > 0) {
+			td->has_counter = true;
+			vp->header_size += 4;
+			if (get_uint_param(
+				vp->parsed, "pin_counter", &temp_int)) {
+				if (temp_int > 0)
+					td->pin_counter = true;
+			}
+		}
+	}
+
+	if (get_uint_param(vp->parsed, "udp", &temp_int)) {
+		if (temp_int > 0) {
+			td->udp = true;
+			vp->header_size += 4;
+			td->counter_offset += 4;
+			td->session_offset += 4;
+			td->cookie_offset += 4;
+		}
+	}
+
+	vp->rx_header_size = vp->header_size;
+	if ((!td->ipv6) && (!td->udp))
+		vp->rx_header_size += sizeof(struct iphdr);
+
+	return 0;
+}
+
+static int build_raw_transport_data(struct vector_private *vp)
+{
+	if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+		if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd))
+			return -1;
+		vp->form_header = &raw_form_header;
+		vp->verify_header = &raw_verify_header;
+		vp->header_size = sizeof(struct virtio_net_hdr);
+		vp->rx_header_size = sizeof(struct virtio_net_hdr);
+		vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO);
+		vp->dev->features |=
+			(NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+				NETIF_F_TSO | NETIF_F_GRO);
+		netdev_info(
+			vp->dev,
+			"raw: using vnet headers for tso and tx/rx checksum"
+		);
+	}
+	return 0;
+}
+
+static int build_tap_transport_data(struct vector_private *vp)
+{
+	if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) {
+		vp->form_header = &raw_form_header;
+		vp->verify_header = &raw_verify_header;
+		vp->header_size = sizeof(struct virtio_net_hdr);
+		vp->rx_header_size = sizeof(struct virtio_net_hdr);
+		vp->dev->hw_features |=
+			(NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		vp->dev->features |=
+			(NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
+				NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO);
+		netdev_info(
+			vp->dev,
+			"tap/raw: using vnet headers for tso and tx/rx checksum"
+		);
+	} else {
+		return 0; /* do not try to enable tap too if raw failed */
+	}
+	if (uml_tap_enable_vnet_headers(vp->fds->tx_fd))
+		return 0;
+	return -1;
+}
+
+int build_transport_data(struct vector_private *vp)
+{
+	char *transport = uml_vector_fetch_arg(vp->parsed, "transport");
+
+	if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+		return build_gre_transport_data(vp);
+	if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+		return build_l2tpv3_transport_data(vp);
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return build_raw_transport_data(vp);
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return build_tap_transport_data(vp);
+	return 0;
+}
+
diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c
new file mode 100644
index 000000000000..4d6a78e31089
--- /dev/null
+++ b/arch/um/drivers/vector_user.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/ether.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <linux/virtio_net.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <os.h>
+#include <um_malloc.h>
+#include "vector_user.h"
+
+#define ID_GRE 0
+#define ID_L2TPV3 1
+#define ID_MAX 1
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define VNET_HDR_FAIL "could not enable vnet headers on fd %d"
+#define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s"
+#define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i"
+#define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n"
+
+/* This is very ugly and brute force lookup, but it is done
+ * only once at initialization so not worth doing hashes or
+ * anything more intelligent
+ */
+
+char *uml_vector_fetch_arg(struct arglist *ifspec, char *token)
+{
+	int i;
+
+	for (i = 0; i < ifspec->numargs; i++) {
+		if (strcmp(ifspec->tokens[i], token) == 0)
+			return ifspec->values[i];
+	}
+	return NULL;
+
+}
+
+struct arglist *uml_parse_vector_ifspec(char *arg)
+{
+	struct arglist *result;
+	int pos, len;
+	bool parsing_token = true, next_starts = true;
+
+	if (arg == NULL)
+		return NULL;
+	result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL);
+	if (result == NULL)
+		return NULL;
+	result->numargs = 0;
+	len = strlen(arg);
+	for (pos = 0; pos < len; pos++) {
+		if (next_starts) {
+			if (parsing_token) {
+				result->tokens[result->numargs] = arg + pos;
+			} else {
+				result->values[result->numargs] = arg + pos;
+				result->numargs++;
+			}
+			next_starts = false;
+		}
+		if (*(arg + pos) == '=') {
+			if (parsing_token)
+				parsing_token = false;
+			else
+				goto cleanup;
+			next_starts = true;
+			(*(arg + pos)) = '\0';
+		}
+		if (*(arg + pos) == ',') {
+			parsing_token = true;
+			next_starts = true;
+			(*(arg + pos)) = '\0';
+		}
+	}
+	return result;
+cleanup:
+	printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg);
+	kfree(result);
+	return NULL;
+}
+
+/*
+ * Socket/FD configuration functions. These return an structure
+ * of rx and tx descriptors to cover cases where these are not
+ * the same (f.e. read via raw socket and write via tap).
+ */
+
+#define PATH_NET_TUN "/dev/net/tun"
+
+static struct vector_fds *user_init_tap_fds(struct arglist *ifspec)
+{
+	struct ifreq ifr;
+	int fd = -1;
+	struct sockaddr_ll sock;
+	int err = -ENOMEM, offload;
+	char *iface;
+	struct vector_fds *result = NULL;
+
+	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+	if (iface == NULL) {
+		printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n");
+		goto tap_cleanup;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result == NULL) {
+		printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n");
+		goto tap_cleanup;
+	}
+	result->rx_fd = -1;
+	result->tx_fd = -1;
+	result->remote_addr = NULL;
+	result->remote_addr_size = 0;
+
+	/* TAP */
+
+	fd = open(PATH_NET_TUN, O_RDWR);
+	if (fd < 0) {
+		printk(UM_KERN_ERR "uml_tap: failed to open tun device\n");
+		goto tap_cleanup;
+	}
+	result->tx_fd = fd;
+	memset(&ifr, 0, sizeof(ifr));
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+
+	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	if (err != 0) {
+		printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n");
+		goto tap_cleanup;
+	}
+
+	offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+	ioctl(fd, TUNSETOFFLOAD, offload);
+
+	/* RAW */
+
+	fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+	if (fd == -1) {
+		printk(UM_KERN_ERR
+			"uml_tap: failed to create socket: %i\n", -errno);
+		goto tap_cleanup;
+	}
+	result->rx_fd = fd;
+	memset(&ifr, 0, sizeof(ifr));
+	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+	if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+		printk(UM_KERN_ERR
+			"uml_tap: failed to set interface: %i\n", -errno);
+		goto tap_cleanup;
+	}
+
+	sock.sll_family = AF_PACKET;
+	sock.sll_protocol = htons(ETH_P_ALL);
+	sock.sll_ifindex = ifr.ifr_ifindex;
+
+	if (bind(fd,
+		(struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+		printk(UM_KERN_ERR
+			"user_init_tap: failed to bind raw pair, err %d\n",
+				-errno);
+		goto tap_cleanup;
+	}
+	return result;
+tap_cleanup:
+	printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err);
+	if (result != NULL) {
+		if (result->rx_fd >= 0)
+			os_close_file(result->rx_fd);
+		if (result->tx_fd >= 0)
+			os_close_file(result->tx_fd);
+		kfree(result);
+	}
+	return NULL;
+}
+
+
+static struct vector_fds *user_init_raw_fds(struct arglist *ifspec)
+{
+	struct ifreq ifr;
+	int rxfd = -1, txfd = -1;
+	struct sockaddr_ll sock;
+	int err = -ENOMEM;
+	char *iface;
+	struct vector_fds *result = NULL;
+
+	iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME);
+	if (iface == NULL)
+		goto cleanup;
+
+	rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL);
+	if (rxfd == -1) {
+		err = -errno;
+		goto cleanup;
+	}
+	txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */
+	if (txfd == -1) {
+		err = -errno;
+		goto cleanup;
+	}
+	memset(&ifr, 0, sizeof(ifr));
+	strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+	if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) {
+		err = -errno;
+		goto cleanup;
+	}
+
+	sock.sll_family = AF_PACKET;
+	sock.sll_protocol = htons(ETH_P_ALL);
+	sock.sll_ifindex = ifr.ifr_ifindex;
+
+	if (bind(rxfd,
+		(struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+		err = -errno;
+		goto cleanup;
+	}
+
+	sock.sll_family = AF_PACKET;
+	sock.sll_protocol = htons(ETH_P_IP);
+	sock.sll_ifindex = ifr.ifr_ifindex;
+
+	if (bind(txfd,
+		(struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) {
+		err = -errno;
+		goto cleanup;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result != NULL) {
+		result->rx_fd = rxfd;
+		result->tx_fd = txfd;
+		result->remote_addr = NULL;
+		result->remote_addr_size = 0;
+	}
+	return result;
+cleanup:
+	printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err);
+	if (rxfd >= 0)
+		os_close_file(rxfd);
+	if (txfd >= 0)
+		os_close_file(txfd);
+	if (result != NULL)
+		kfree(result);
+	return NULL;
+}
+
+
+bool uml_raw_enable_qdisc_bypass(int fd)
+{
+	int optval = 1;
+
+	if (setsockopt(fd,
+		SOL_PACKET, PACKET_QDISC_BYPASS,
+		&optval, sizeof(optval)) != 0) {
+		return false;
+	}
+	return true;
+}
+
+bool uml_raw_enable_vnet_headers(int fd)
+{
+	int optval = 1;
+
+	if (setsockopt(fd,
+		SOL_PACKET, PACKET_VNET_HDR,
+		&optval, sizeof(optval)) != 0) {
+		printk(UM_KERN_INFO VNET_HDR_FAIL, fd);
+		return false;
+	}
+	return true;
+}
+bool uml_tap_enable_vnet_headers(int fd)
+{
+	unsigned int features;
+	int len = sizeof(struct virtio_net_hdr);
+
+	if (ioctl(fd, TUNGETFEATURES, &features) == -1) {
+		printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno));
+		return false;
+	}
+	if ((features & IFF_VNET_HDR) == 0) {
+		printk(UM_KERN_INFO "tapraw: No VNET HEADER support");
+		return false;
+	}
+	ioctl(fd, TUNSETVNETHDRSZ, &len);
+	return true;
+}
+
+static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id)
+{
+	int err = -ENOMEM;
+	int fd = -1, gairet;
+	struct addrinfo srchints;
+	struct addrinfo dsthints;
+	bool v6, udp;
+	char *value;
+	char *src, *dst, *srcport, *dstport;
+	struct addrinfo *gairesult = NULL;
+	struct vector_fds *result = NULL;
+
+
+	value = uml_vector_fetch_arg(ifspec, "v6");
+	v6 = false;
+	udp = false;
+	if (value != NULL) {
+		if (strtol((const char *) value, NULL, 10) > 0)
+			v6 = true;
+	}
+
+	value = uml_vector_fetch_arg(ifspec, "udp");
+	if (value != NULL) {
+		if (strtol((const char *) value, NULL, 10) > 0)
+			udp = true;
+	}
+	src = uml_vector_fetch_arg(ifspec, "src");
+	dst = uml_vector_fetch_arg(ifspec, "dst");
+	srcport = uml_vector_fetch_arg(ifspec, "srcport");
+	dstport = uml_vector_fetch_arg(ifspec, "dstport");
+
+	memset(&dsthints, 0, sizeof(dsthints));
+
+	if (v6)
+		dsthints.ai_family = AF_INET6;
+	else
+		dsthints.ai_family = AF_INET;
+
+	switch (id) {
+	case ID_GRE:
+		dsthints.ai_socktype = SOCK_RAW;
+		dsthints.ai_protocol = IPPROTO_GRE;
+		break;
+	case ID_L2TPV3:
+		if (udp) {
+			dsthints.ai_socktype = SOCK_DGRAM;
+			dsthints.ai_protocol = 0;
+		} else {
+			dsthints.ai_socktype = SOCK_RAW;
+			dsthints.ai_protocol = IPPROTO_L2TP;
+		}
+		break;
+	default:
+		printk(KERN_ERR "Unsupported socket type\n");
+		return NULL;
+	}
+	memcpy(&srchints, &dsthints, sizeof(struct addrinfo));
+
+	gairet = getaddrinfo(src, srcport, &dsthints, &gairesult);
+	if ((gairet != 0) || (gairesult == NULL)) {
+		printk(UM_KERN_ERR
+			"socket_open : could not resolve src, error = %s",
+			gai_strerror(gairet)
+		);
+		return NULL;
+	}
+	fd = socket(gairesult->ai_family,
+		gairesult->ai_socktype, gairesult->ai_protocol);
+	if (fd == -1) {
+		printk(UM_KERN_ERR
+			"socket_open : could not open socket, error = %d",
+			-errno
+		);
+		goto cleanup;
+	}
+	if (bind(fd,
+		(struct sockaddr *) gairesult->ai_addr,
+		gairesult->ai_addrlen)) {
+		printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno);
+		goto cleanup;
+	}
+
+	if (gairesult != NULL)
+		freeaddrinfo(gairesult);
+
+	gairesult = NULL;
+
+	gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult);
+	if ((gairet != 0) || (gairesult == NULL)) {
+		printk(UM_KERN_ERR
+			"socket_open : could not resolve dst, error = %s",
+			gai_strerror(gairet)
+		);
+		return NULL;
+	}
+
+	result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL);
+	if (result != NULL) {
+		result->rx_fd = fd;
+		result->tx_fd = fd;
+		result->remote_addr = uml_kmalloc(
+			gairesult->ai_addrlen, UM_GFP_KERNEL);
+		if (result->remote_addr == NULL)
+			goto cleanup;
+		result->remote_addr_size = gairesult->ai_addrlen;
+		memcpy(
+			result->remote_addr,
+			gairesult->ai_addr,
+			gairesult->ai_addrlen
+		);
+	}
+	freeaddrinfo(gairesult);
+	return result;
+cleanup:
+	if (gairesult != NULL)
+		freeaddrinfo(gairesult);
+	printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err);
+	if (fd >= 0)
+		os_close_file(fd);
+	if (result != NULL) {
+		if (result->remote_addr != NULL)
+			kfree(result->remote_addr);
+		kfree(result);
+	}
+	return NULL;
+}
+
+struct vector_fds *uml_vector_user_open(
+	int unit,
+	struct arglist *parsed
+)
+{
+	char *transport;
+
+	if (parsed == NULL) {
+		printk(UM_KERN_ERR "no parsed config for unit %d\n", unit);
+		return NULL;
+	}
+	transport = uml_vector_fetch_arg(parsed, "transport");
+	if (transport == NULL) {
+		printk(UM_KERN_ERR "missing transport for unit %d\n", unit);
+		return NULL;
+	}
+	if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0)
+		return user_init_raw_fds(parsed);
+	if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0)
+		return user_init_tap_fds(parsed);
+	if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0)
+		return user_init_socket_fds(parsed, ID_GRE);
+	if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0)
+		return user_init_socket_fds(parsed, ID_L2TPV3);
+	return NULL;
+}
+
+
+int uml_vector_sendmsg(int fd, void *hdr, int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr,  flags));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_recvmsg(int fd, void *hdr, int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr,  flags));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_writev(int fd, void *hdr, int iovcount)
+{
+	int n;
+
+	CATCH_EINTR(n = writev(fd, (struct iovec *) hdr,  iovcount));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_sendmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags)
+{
+	int n;
+
+	CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+
+int uml_vector_recvmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags)
+{
+	int n;
+
+	CATCH_EINTR(
+		n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0));
+	if ((n < 0) && (errno == EAGAIN))
+		return 0;
+	if (n >= 0)
+		return n;
+	else
+		return -errno;
+}
+int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len)
+{
+	int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len);
+
+	if (err < 0)
+		printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno);
+	return err;
+}
+
+#define DEFAULT_BPF_LEN 6
+
+void *uml_vector_default_bpf(int fd, void *mac)
+{
+	struct sock_filter *bpf;
+	uint32_t *mac1 = (uint32_t *)(mac + 2);
+	uint16_t *mac2 = (uint16_t *) mac;
+	struct sock_fprog bpf_prog = {
+		.len = 6,
+		.filter = NULL,
+	};
+
+	bpf = uml_kmalloc(
+		sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL);
+	if (bpf != NULL) {
+		bpf_prog.filter = bpf;
+		/* ld	[8] */
+		bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 };
+		/* jeq	#0xMAC[2-6] jt 2 jf 5*/
+		bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)};
+		/* ldh	[6] */
+		bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 };
+		/* jeq	#0xMAC[0-1] jt 4 jf 5 */
+		bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)};
+		/* ret	#0 */
+		bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 };
+		/* ret	#0x40000 */
+		bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 };
+		if (uml_vector_attach_bpf(
+			fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) {
+			kfree(bpf);
+			bpf = NULL;
+		}
+	}
+	return bpf;
+}
+
diff --git a/arch/um/drivers/vector_user.h b/arch/um/drivers/vector_user.h
new file mode 100644
index 000000000000..d7cbff73b7ff
--- /dev/null
+++ b/arch/um/drivers/vector_user.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Licensed under the GPL
+ */
+
+#ifndef __UM_VECTOR_USER_H
+#define __UM_VECTOR_USER_H
+
+#define MAXVARGS	20
+
+#define TOKEN_IFNAME "ifname"
+
+#define TRANS_RAW "raw"
+#define TRANS_RAW_LEN strlen(TRANS_RAW)
+
+#define TRANS_TAP "tap"
+#define TRANS_TAP_LEN strlen(TRANS_TAP)
+
+
+#define TRANS_GRE "gre"
+#define TRANS_GRE_LEN strlen(TRANS_RAW)
+
+#define TRANS_L2TPV3 "l2tpv3"
+#define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3)
+
+#ifndef IPPROTO_GRE
+#define IPPROTO_GRE 0x2F
+#endif
+
+#define GRE_MODE_CHECKSUM	cpu_to_be16(8 << 12)	/* checksum */
+#define GRE_MODE_RESERVED	cpu_to_be16(4 << 12)	/* unused */
+#define GRE_MODE_KEY		cpu_to_be16(2 << 12)	/* KEY present */
+#define GRE_MODE_SEQUENCE	cpu_to_be16(1 << 12)	/* sequence */
+
+#define GRE_IRB cpu_to_be16(0x6558)
+
+#define L2TPV3_DATA_PACKET 0x30000
+
+/* IANA-assigned IP protocol ID for L2TPv3 */
+
+#ifndef IPPROTO_L2TP
+#define IPPROTO_L2TP 0x73
+#endif
+
+struct arglist {
+	int	numargs;
+	char	*tokens[MAXVARGS];
+	char	*values[MAXVARGS];
+};
+
+/* Separating read and write FDs allows us to have different
+ * rx and tx method. Example - read tap via raw socket using
+ * recvmmsg, write using legacy tap write calls
+ */
+
+struct vector_fds {
+	int rx_fd;
+	int tx_fd;
+	void *remote_addr;
+	int remote_addr_size;
+};
+
+#define VECTOR_READ	1
+#define VECTOR_WRITE	(1 < 1)
+#define VECTOR_HEADERS	(1 < 2)
+
+extern struct arglist *uml_parse_vector_ifspec(char *arg);
+
+extern struct vector_fds *uml_vector_user_open(
+	int unit,
+	struct arglist *parsed
+);
+
+extern char *uml_vector_fetch_arg(
+	struct arglist *ifspec,
+	char *token
+);
+
+extern int uml_vector_recvmsg(int fd, void *hdr, int flags);
+extern int uml_vector_sendmsg(int fd, void *hdr, int flags);
+extern int uml_vector_writev(int fd, void *hdr, int iovcount);
+extern int uml_vector_sendmmsg(
+	int fd, void *msgvec,
+	unsigned int vlen,
+	unsigned int flags
+);
+extern int uml_vector_recvmmsg(
+	int fd,
+	void *msgvec,
+	unsigned int vlen,
+	unsigned int flags
+);
+extern void *uml_vector_default_bpf(int fd, void *mac);
+extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len);
+extern bool uml_raw_enable_qdisc_bypass(int fd);
+extern bool uml_raw_enable_vnet_headers(int fd);
+extern bool uml_tap_enable_vnet_headers(int fd);
+
+
+#endif
diff --git a/arch/um/include/asm/asm-prototypes.h b/arch/um/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..5898a26daa0d
--- /dev/null
+++ b/arch/um/include/asm/asm-prototypes.h
@@ -0,0 +1 @@
+#include <asm-generic/asm-prototypes.h>
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index b5cdd3f91157..49ed3e35b35a 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -18,7 +18,19 @@
 #define XTERM_IRQ 		13
 #define RANDOM_IRQ 		14
 
+#ifdef CONFIG_UML_NET_VECTOR
+
+#define VECTOR_BASE_IRQ		15
+#define VECTOR_IRQ_SPACE	8
+
+#define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
+
+#else
+
 #define LAST_IRQ RANDOM_IRQ
+
+#endif
+
 #define NR_IRQS (LAST_IRQ + 1)
 
 #endif
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index df5633053957..a7a6120f19d5 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -7,6 +7,7 @@
 #define __IRQ_USER_H__
 
 #include <sysdep/ptrace.h>
+#include <stdbool.h>
 
 struct irq_fd {
 	struct irq_fd *next;
@@ -15,10 +16,17 @@ struct irq_fd {
 	int type;
 	int irq;
 	int events;
-	int current_events;
+	bool active;
+	bool pending;
+	bool purge;
 };
 
-enum { IRQ_READ, IRQ_WRITE };
+#define IRQ_READ  0
+#define IRQ_WRITE 1
+#define IRQ_NONE 2
+#define MAX_IRQ_TYPE (IRQ_NONE + 1)
+
+
 
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/net_kern.h b/arch/um/include/shared/net_kern.h
index 012ac87d4900..40442b98b173 100644
--- a/arch/um/include/shared/net_kern.h
+++ b/arch/um/include/shared/net_kern.h
@@ -65,5 +65,7 @@ extern int tap_setup_common(char *str, char *type, char **dev_name,
 			    char **mac_out, char **gate_addr);
 extern void register_transport(struct transport *new);
 extern unsigned short eth_protocol(struct sk_buff *skb);
+extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
+
 
 #endif
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index d8ddaf9790d2..048ae37eb5aa 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -290,15 +290,16 @@ extern void halt_skas(void);
 extern void reboot_skas(void);
 
 /* irq.c */
-extern int os_waiting_for_events(struct irq_fd *active_fds);
-extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds);
-extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2);
-extern void os_free_irq_later(struct irq_fd *active_fds,
-		int irq, void *dev_id);
-extern int os_get_pollfd(int i);
-extern void os_set_pollfd(int i, int fd);
+extern int os_waiting_for_events_epoll(void);
+extern void *os_epoll_get_data_pointer(int index);
+extern int os_epoll_triggered(int index, int events);
+extern int os_event_mask(int irq_type);
+extern int os_setup_epoll(void);
+extern int os_add_epoll_fd(int events, int fd, void *data);
+extern int os_mod_epoll_fd(int events, int fd, void *data);
+extern int os_del_epoll_fd(int fd);
 extern void os_set_ioignore(void);
+extern void os_close_epoll_fd(void);
 
 /* sigio.c */
 extern int add_sigio_fd(int fd);
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 23cb9350d47e..6b7f3827d6e4 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
@@ -16,243 +18,362 @@
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
+#include <irq_user.h>
 
-/*
- * This list is accessed under irq_lock, except in sigio_handler,
- * where it is safe from being modified.  IRQ handlers won't change it -
- * if an IRQ source has vanished, it will be freed by free_irqs just
- * before returning from sigio_handler.  That will process a separate
- * list of irqs to free, with its own locking, coming back here to
- * remove list elements, taking the irq_lock to do so.
+
+/* When epoll triggers we do not know why it did so
+ * we can also have different IRQs for read and write.
+ * This is why we keep a small irq_fd array for each fd -
+ * one entry per IRQ type
  */
-static struct irq_fd *active_fds = NULL;
-static struct irq_fd **last_irq_ptr = &active_fds;
 
-extern void free_irqs(void);
+struct irq_entry {
+	struct irq_entry *next;
+	int fd;
+	struct irq_fd *irq_array[MAX_IRQ_TYPE + 1];
+};
+
+static struct irq_entry *active_fds;
+
+static DEFINE_SPINLOCK(irq_lock);
+
+static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs)
+{
+/*
+ * irq->active guards against reentry
+ * irq->pending accumulates pending requests
+ * if pending is raised the irq_handler is re-run
+ * until pending is cleared
+ */
+	if (irq->active) {
+		irq->active = false;
+		do {
+			irq->pending = false;
+			do_IRQ(irq->irq, regs);
+		} while (irq->pending && (!irq->purge));
+		if (!irq->purge)
+			irq->active = true;
+	} else {
+		irq->pending = true;
+	}
+}
 
 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
-	struct irq_fd *irq_fd;
-	int n;
+	struct irq_entry *irq_entry;
+	struct irq_fd *irq;
+
+	int n, i, j;
 
 	while (1) {
-		n = os_waiting_for_events(active_fds);
+		/* This is now lockless - epoll keeps back-referencesto the irqs
+		 * which have trigger it so there is no need to walk the irq
+		 * list and lock it every time. We avoid locking by turning off
+		 * IO for a specific fd by executing os_del_epoll_fd(fd) before
+		 * we do any changes to the actual data structures
+		 */
+		n = os_waiting_for_events_epoll();
+
 		if (n <= 0) {
 			if (n == -EINTR)
 				continue;
-			else break;
+			else
+				break;
 		}
 
-		for (irq_fd = active_fds; irq_fd != NULL;
-		     irq_fd = irq_fd->next) {
-			if (irq_fd->current_events != 0) {
-				irq_fd->current_events = 0;
-				do_IRQ(irq_fd->irq, regs);
+		for (i = 0; i < n ; i++) {
+			/* Epoll back reference is the entry with 3 irq_fd
+			 * leaves - one for each irq type.
+			 */
+			irq_entry = (struct irq_entry *)
+				os_epoll_get_data_pointer(i);
+			for (j = 0; j < MAX_IRQ_TYPE ; j++) {
+				irq = irq_entry->irq_array[j];
+				if (irq == NULL)
+					continue;
+				if (os_epoll_triggered(i, irq->events) > 0)
+					irq_io_loop(irq, regs);
+				if (irq->purge) {
+					irq_entry->irq_array[j] = NULL;
+					kfree(irq);
+				}
 			}
 		}
 	}
+}
+
+static int assign_epoll_events_to_irq(struct irq_entry *irq_entry)
+{
+	int i;
+	int events = 0;
+	struct irq_fd *irq;
 
-	free_irqs();
+	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+		irq = irq_entry->irq_array[i];
+		if (irq != NULL)
+			events = irq->events | events;
+	}
+	if (events > 0) {
+	/* os_add_epoll will call os_mod_epoll if this already exists */
+		return os_add_epoll_fd(events, irq_entry->fd, irq_entry);
+	}
+	/* No events - delete */
+	return os_del_epoll_fd(irq_entry->fd);
 }
 
-static DEFINE_SPINLOCK(irq_lock);
+
 
 static int activate_fd(int irq, int fd, int type, void *dev_id)
 {
-	struct pollfd *tmp_pfd;
-	struct irq_fd *new_fd, *irq_fd;
+	struct irq_fd *new_fd;
+	struct irq_entry *irq_entry;
+	int i, err, events;
 	unsigned long flags;
-	int events, err, n;
 
 	err = os_set_fd_async(fd);
 	if (err < 0)
 		goto out;
 
-	err = -ENOMEM;
-	new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL);
-	if (new_fd == NULL)
-		goto out;
+	spin_lock_irqsave(&irq_lock, flags);
 
-	if (type == IRQ_READ)
-		events = UM_POLLIN | UM_POLLPRI;
-	else events = UM_POLLOUT;
-	*new_fd = ((struct irq_fd) { .next  		= NULL,
-				     .id 		= dev_id,
-				     .fd 		= fd,
-				     .type 		= type,
-				     .irq 		= irq,
-				     .events 		= events,
-				     .current_events 	= 0 } );
+	/* Check if we have an entry for this fd */
 
 	err = -EBUSY;
-	spin_lock_irqsave(&irq_lock, flags);
-	for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) {
-		if ((irq_fd->fd == fd) && (irq_fd->type == type)) {
-			printk(KERN_ERR "Registering fd %d twice\n", fd);
-			printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq);
-			printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id,
-			       dev_id);
+	for (irq_entry = active_fds;
+		irq_entry != NULL; irq_entry = irq_entry->next) {
+		if (irq_entry->fd == fd)
+			break;
+	}
+
+	if (irq_entry == NULL) {
+		/* This needs to be atomic as it may be called from an
+		 * IRQ context.
+		 */
+		irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC);
+		if (irq_entry == NULL) {
+			printk(KERN_ERR
+				"Failed to allocate new IRQ entry\n");
 			goto out_unlock;
 		}
+		irq_entry->fd = fd;
+		for (i = 0; i < MAX_IRQ_TYPE; i++)
+			irq_entry->irq_array[i] = NULL;
+		irq_entry->next = active_fds;
+		active_fds = irq_entry;
 	}
 
-	if (type == IRQ_WRITE)
-		fd = -1;
-
-	tmp_pfd = NULL;
-	n = 0;
+	/* Check if we are trying to re-register an interrupt for a
+	 * particular fd
+	 */
 
-	while (1) {
-		n = os_create_pollfd(fd, events, tmp_pfd, n);
-		if (n == 0)
-			break;
+	if (irq_entry->irq_array[type] != NULL) {
+		printk(KERN_ERR
+			"Trying to reregister IRQ %d FD %d TYPE %d ID %p\n",
+			irq, fd, type, dev_id
+		);
+		goto out_unlock;
+	} else {
+		/* New entry for this fd */
+
+		err = -ENOMEM;
+		new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC);
+		if (new_fd == NULL)
+			goto out_unlock;
 
-		/*
-		 * n > 0
-		 * It means we couldn't put new pollfd to current pollfds
-		 * and tmp_fds is NULL or too small for new pollfds array.
-		 * Needed size is equal to n as minimum.
-		 *
-		 * Here we have to drop the lock in order to call
-		 * kmalloc, which might sleep.
-		 * If something else came in and changed the pollfds array
-		 * so we will not be able to put new pollfd struct to pollfds
-		 * then we free the buffer tmp_fds and try again.
+		events = os_event_mask(type);
+
+		*new_fd = ((struct irq_fd) {
+			.id		= dev_id,
+			.irq		= irq,
+			.type		= type,
+			.events		= events,
+			.active		= true,
+			.pending	= false,
+			.purge		= false
+		});
+		/* Turn off any IO on this fd - allows us to
+		 * avoid locking the IRQ loop
 		 */
-		spin_unlock_irqrestore(&irq_lock, flags);
-		kfree(tmp_pfd);
-
-		tmp_pfd = kmalloc(n, GFP_KERNEL);
-		if (tmp_pfd == NULL)
-			goto out_kfree;
-
-		spin_lock_irqsave(&irq_lock, flags);
+		os_del_epoll_fd(irq_entry->fd);
+		irq_entry->irq_array[type] = new_fd;
 	}
 
-	*last_irq_ptr = new_fd;
-	last_irq_ptr = &new_fd->next;
-
+	/* Turn back IO on with the correct (new) IO event mask */
+	assign_epoll_events_to_irq(irq_entry);
 	spin_unlock_irqrestore(&irq_lock, flags);
-
-	/*
-	 * This calls activate_fd, so it has to be outside the critical
-	 * section.
-	 */
-	maybe_sigio_broken(fd, (type == IRQ_READ));
+	maybe_sigio_broken(fd, (type != IRQ_NONE));
 
 	return 0;
-
- out_unlock:
+out_unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
- out_kfree:
-	kfree(new_fd);
- out:
+out:
 	return err;
 }
 
-static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg)
+/*
+ * Walk the IRQ list and dispose of any unused entries.
+ * Should be done under irq_lock.
+ */
+
+static void garbage_collect_irq_entries(void)
 {
-	unsigned long flags;
+	int i;
+	bool reap;
+	struct irq_entry *walk;
+	struct irq_entry *previous = NULL;
+	struct irq_entry *to_free;
 
-	spin_lock_irqsave(&irq_lock, flags);
-	os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr);
-	spin_unlock_irqrestore(&irq_lock, flags);
+	if (active_fds == NULL)
+		return;
+	walk = active_fds;
+	while (walk != NULL) {
+		reap = true;
+		for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+			if (walk->irq_array[i] != NULL) {
+				reap = false;
+				break;
+			}
+		}
+		if (reap) {
+			if (previous == NULL)
+				active_fds = walk->next;
+			else
+				previous->next = walk->next;
+			to_free = walk;
+		} else {
+			to_free = NULL;
+		}
+		walk = walk->next;
+		if (to_free != NULL)
+			kfree(to_free);
+	}
 }
 
-struct irq_and_dev {
-	int irq;
-	void *dev;
-};
+/*
+ * Walk the IRQ list and get the descriptor for our FD
+ */
 
-static int same_irq_and_dev(struct irq_fd *irq, void *d)
+static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
-	struct irq_and_dev *data = d;
+	struct irq_entry *walk = active_fds;
 
-	return ((irq->irq == data->irq) && (irq->id == data->dev));
+	while (walk != NULL) {
+		if (walk->fd == fd)
+			return walk;
+		walk = walk->next;
+	}
+	return NULL;
 }
 
-static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
-{
-	struct irq_and_dev data = ((struct irq_and_dev) { .irq  = irq,
-							  .dev  = dev });
 
-	free_irq_by_cb(same_irq_and_dev, &data);
-}
+/*
+ * Walk the IRQ list and dispose of an entry for a specific
+ * device, fd and number. Note - if sharing an IRQ for read
+ * and writefor the same FD it will be disposed in either case.
+ * If this behaviour is undesirable use different IRQ ids.
+ */
 
-static int same_fd(struct irq_fd *irq, void *fd)
-{
-	return (irq->fd == *((int *)fd));
-}
+#define IGNORE_IRQ 1
+#define IGNORE_DEV (1<<1)
 
-void free_irq_by_fd(int fd)
+static void do_free_by_irq_and_dev(
+	struct irq_entry *irq_entry,
+	unsigned int irq,
+	void *dev,
+	int flags
+)
 {
-	free_irq_by_cb(same_fd, &fd);
+	int i;
+	struct irq_fd *to_free;
+
+	for (i = 0; i < MAX_IRQ_TYPE ; i++) {
+		if (irq_entry->irq_array[i] != NULL) {
+			if (
+			((flags & IGNORE_IRQ) ||
+				(irq_entry->irq_array[i]->irq == irq)) &&
+			((flags & IGNORE_DEV) ||
+				(irq_entry->irq_array[i]->id == dev))
+			) {
+				/* Turn off any IO on this fd - allows us to
+				 * avoid locking the IRQ loop
+				 */
+				os_del_epoll_fd(irq_entry->fd);
+				to_free = irq_entry->irq_array[i];
+				irq_entry->irq_array[i] = NULL;
+				assign_epoll_events_to_irq(irq_entry);
+				if (to_free->active)
+					to_free->purge = true;
+				else
+					kfree(to_free);
+			}
+		}
+	}
 }
 
-/* Must be called with irq_lock held */
-static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out)
+void free_irq_by_fd(int fd)
 {
-	struct irq_fd *irq;
-	int i = 0;
-	int fdi;
+	struct irq_entry *to_free;
+	unsigned long flags;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		if ((irq->fd == fd) && (irq->irq == irqnum))
-			break;
-		i++;
-	}
-	if (irq == NULL) {
-		printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n",
-		       fd);
-		goto out;
-	}
-	fdi = os_get_pollfd(i);
-	if ((fdi != -1) && (fdi != fd)) {
-		printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds "
-		       "and pollfds, fd %d vs %d, need %d\n", irq->fd,
-		       fdi, fd);
-		irq = NULL;
-		goto out;
+	spin_lock_irqsave(&irq_lock, flags);
+	to_free = get_irq_entry_by_fd(fd);
+	if (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			-1,
+			NULL,
+			IGNORE_IRQ | IGNORE_DEV
+		);
 	}
-	*index_out = i;
- out:
-	return irq;
+	garbage_collect_irq_entries();
+	spin_unlock_irqrestore(&irq_lock, flags);
 }
+EXPORT_SYMBOL(free_irq_by_fd);
 
-void reactivate_fd(int fd, int irqnum)
+static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
-	struct irq_fd *irq;
+	struct irq_entry *to_free;
 	unsigned long flags;
-	int i;
 
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	to_free = active_fds;
+	while (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			irq,
+			dev,
+			0
+		);
+		to_free = to_free->next;
 	}
-	os_set_pollfd(i, irq->fd);
+	garbage_collect_irq_entries();
 	spin_unlock_irqrestore(&irq_lock, flags);
+}
 
-	add_sigio_fd(fd);
+
+void reactivate_fd(int fd, int irqnum)
+{
+	/** NOP - we do auto-EOI now **/
 }
 
 void deactivate_fd(int fd, int irqnum)
 {
-	struct irq_fd *irq;
+	struct irq_entry *to_free;
 	unsigned long flags;
-	int i;
 
+	os_del_epoll_fd(fd);
 	spin_lock_irqsave(&irq_lock, flags);
-	irq = find_irq_by_fd(fd, irqnum, &i);
-	if (irq == NULL) {
-		spin_unlock_irqrestore(&irq_lock, flags);
-		return;
+	to_free = get_irq_entry_by_fd(fd);
+	if (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			irqnum,
+			NULL,
+			IGNORE_DEV
+		);
 	}
-
-	os_set_pollfd(i, -1);
+	garbage_collect_irq_entries();
 	spin_unlock_irqrestore(&irq_lock, flags);
-
 	ignore_sigio_fd(fd);
 }
 EXPORT_SYMBOL(deactivate_fd);
@@ -265,17 +386,28 @@ EXPORT_SYMBOL(deactivate_fd);
  */
 int deactivate_all_fds(void)
 {
-	struct irq_fd *irq;
-	int err;
+	unsigned long flags;
+	struct irq_entry *to_free;
 
-	for (irq = active_fds; irq != NULL; irq = irq->next) {
-		err = os_clear_fd_async(irq->fd);
-		if (err)
-			return err;
-	}
-	/* If there is a signal already queued, after unblocking ignore it */
+	spin_lock_irqsave(&irq_lock, flags);
+	/* Stop IO. The IRQ loop has no lock so this is our
+	 * only way of making sure we are safe to dispose
+	 * of all IRQ handlers
+	 */
 	os_set_ioignore();
-
+	to_free = active_fds;
+	while (to_free != NULL) {
+		do_free_by_irq_and_dev(
+			to_free,
+			-1,
+			NULL,
+			IGNORE_IRQ | IGNORE_DEV
+		);
+		to_free = to_free->next;
+	}
+	garbage_collect_irq_entries();
+	spin_unlock_irqrestore(&irq_lock, flags);
+	os_close_epoll_fd();
 	return 0;
 }
 
@@ -353,8 +485,11 @@ void __init init_IRQ(void)
 
 	irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
 
+
 	for (i = 1; i < NR_IRQS; i++)
 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
+	/* Initialize EPOLL Loop */
+	os_setup_epoll();
 }
 
 /*
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 7f69d17de354..052de4c8acb2 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -121,12 +121,12 @@ static void __init um_timer_setup(void)
 	clockevents_register_device(&timer_clockevent);
 }
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
 	long long nsecs = os_persistent_clock_emulation();
 
-	set_normalized_timespec(ts, nsecs / NSEC_PER_SEC,
-				nsecs % NSEC_PER_SEC);
+	set_normalized_timespec64(ts, nsecs / NSEC_PER_SEC,
+				  nsecs % NSEC_PER_SEC);
 }
 
 void __init time_init(void)
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 2db18cbbb0ea..c0197097c86e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -12,6 +12,7 @@
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/sysmacros.h>
 #include <sys/un.h>
 #include <sys/types.h>
 #include <os.h>
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index b9afb74b79ad..365823010346 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -1,135 +1,147 @@
 /*
+ * Copyright (C) 2017 - Cambridge Greys Ltd
+ * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Licensed under the GPL
  */
 
 #include <stdlib.h>
 #include <errno.h>
-#include <poll.h>
+#include <sys/epoll.h>
 #include <signal.h>
 #include <string.h>
 #include <irq_user.h>
 #include <os.h>
 #include <um_malloc.h>
 
+/* Epoll support */
+
+static int epollfd = -1;
+
+#define MAX_EPOLL_EVENTS 64
+
+static struct epoll_event epoll_events[MAX_EPOLL_EVENTS];
+
+/* Helper to return an Epoll data pointer from an epoll event structure.
+ * We need to keep this one on the userspace side to keep includes separate
+ */
+
+void *os_epoll_get_data_pointer(int index)
+{
+	return epoll_events[index].data.ptr;
+}
+
+/* Helper to compare events versus the events in the epoll structure.
+ * Same as above - needs to be on the userspace side
+ */
+
+
+int os_epoll_triggered(int index, int events)
+{
+	return epoll_events[index].events & events;
+}
+/* Helper to set the event mask.
+ * The event mask is opaque to the kernel side, because it does not have
+ * access to the right includes/defines for EPOLL constants.
+ */
+
+int os_event_mask(int irq_type)
+{
+	if (irq_type == IRQ_READ)
+		return EPOLLIN | EPOLLPRI;
+	if (irq_type == IRQ_WRITE)
+		return EPOLLOUT;
+	return 0;
+}
+
 /*
- * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
- * and os_free_irq_by_cb, which are called under irq_lock.
+ * Initial Epoll Setup
  */
-static struct pollfd *pollfds = NULL;
-static int pollfds_num = 0;
-static int pollfds_size = 0;
+int os_setup_epoll(void)
+{
+	epollfd = epoll_create(MAX_EPOLL_EVENTS);
+	return epollfd;
+}
 
-int os_waiting_for_events(struct irq_fd *active_fds)
+/*
+ * Helper to run the actual epoll_wait
+ */
+int os_waiting_for_events_epoll(void)
 {
-	struct irq_fd *irq_fd;
-	int i, n, err;
+	int n, err;
 
-	n = poll(pollfds, pollfds_num, 0);
+	n = epoll_wait(epollfd,
+		(struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0);
 	if (n < 0) {
 		err = -errno;
 		if (errno != EINTR)
-			printk(UM_KERN_ERR "os_waiting_for_events:"
-			       " poll returned %d, errno = %d\n", n, errno);
+			printk(
+				UM_KERN_ERR "os_waiting_for_events:"
+				" epoll returned %d, error = %s\n", n,
+				strerror(errno)
+			);
 		return err;
 	}
-
-	if (n == 0)
-		return 0;
-
-	irq_fd = active_fds;
-
-	for (i = 0; i < pollfds_num; i++) {
-		if (pollfds[i].revents != 0) {
-			irq_fd->current_events = pollfds[i].revents;
-			pollfds[i].fd = -1;
-		}
-		irq_fd = irq_fd->next;
-	}
 	return n;
 }
 
-int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds)
-{
-	if (pollfds_num == pollfds_size) {
-		if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) {
-			/* return min size needed for new pollfds area */
-			return (pollfds_size + 1) * sizeof(pollfds[0]);
-		}
-
-		if (pollfds != NULL) {
-			memcpy(tmp_pfd, pollfds,
-			       sizeof(pollfds[0]) * pollfds_size);
-			/* remove old pollfds */
-			kfree(pollfds);
-		}
-		pollfds = tmp_pfd;
-		pollfds_size++;
-	} else
-		kfree(tmp_pfd);	/* remove not used tmp_pfd */
-
-	pollfds[pollfds_num] = ((struct pollfd) { .fd		= fd,
-						  .events	= events,
-						  .revents	= 0 });
-	pollfds_num++;
-
-	return 0;
-}
 
-void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg,
-		struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2)
+/*
+ * Helper to add a fd to epoll
+ */
+int os_add_epoll_fd(int events, int fd, void *data)
 {
-	struct irq_fd **prev;
-	int i = 0;
-
-	prev = &active_fds;
-	while (*prev != NULL) {
-		if ((*test)(*prev, arg)) {
-			struct irq_fd *old_fd = *prev;
-			if ((pollfds[i].fd != -1) &&
-			    (pollfds[i].fd != (*prev)->fd)) {
-				printk(UM_KERN_ERR "os_free_irq_by_cb - "
-				       "mismatch between active_fds and "
-				       "pollfds, fd %d vs %d\n",
-				       (*prev)->fd, pollfds[i].fd);
-				goto out;
-			}
-
-			pollfds_num--;
-
-			/*
-			 * This moves the *whole* array after pollfds[i]
-			 * (though it doesn't spot as such)!
-			 */
-			memmove(&pollfds[i], &pollfds[i + 1],
-			       (pollfds_num - i) * sizeof(pollfds[0]));
-			if (*last_irq_ptr2 == &old_fd->next)
-				*last_irq_ptr2 = prev;
-
-			*prev = (*prev)->next;
-			if (old_fd->type == IRQ_WRITE)
-				ignore_sigio_fd(old_fd->fd);
-			kfree(old_fd);
-			continue;
-		}
-		prev = &(*prev)->next;
-		i++;
-	}
- out:
-	return;
+	struct epoll_event event;
+	int result;
+
+	event.data.ptr = data;
+	event.events = events | EPOLLET;
+	result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+	if ((result) && (errno == EEXIST))
+		result = os_mod_epoll_fd(events, fd, data);
+	if (result)
+		printk("epollctl add err fd %d, %s\n", fd, strerror(errno));
+	return result;
 }
 
-int os_get_pollfd(int i)
+/*
+ * Helper to mod the fd event mask and/or data backreference
+ */
+int os_mod_epoll_fd(int events, int fd, void *data)
 {
-	return pollfds[i].fd;
+	struct epoll_event event;
+	int result;
+
+	event.data.ptr = data;
+	event.events = events;
+	result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
+	if (result)
+		printk(UM_KERN_ERR
+			"epollctl mod err fd %d, %s\n", fd, strerror(errno));
+	return result;
 }
 
-void os_set_pollfd(int i, int fd)
+/*
+ * Helper to delete the epoll fd
+ */
+int os_del_epoll_fd(int fd)
 {
-	pollfds[i].fd = fd;
+	struct epoll_event event;
+	int result;
+	/* This is quiet as we use this as IO ON/OFF - so it is often
+	 * invoked on a non-existent fd
+	 */
+	result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event);
+	return result;
 }
 
 void os_set_ioignore(void)
 {
 	signal(SIGIO, SIG_IGN);
 }
+
+void os_close_epoll_fd(void)
+{
+	/* Needed so we do not leak an fd when rebooting */
+	os_close_file(epollfd);
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index a86d7cc2c2d8..bf0acb8aad8b 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -16,6 +16,7 @@
 #include <os.h>
 #include <sysdep/mcontext.h>
 #include <um_malloc.h>
+#include <sys/ucontext.h>
 
 void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
 	[SIGTRAP]	= relay_signal,
@@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
 
 static void hard_handler(int sig, siginfo_t *si, void *p)
 {
-	struct ucontext *uc = p;
+	ucontext_t *uc = p;
 	mcontext_t *mc = &uc->uc_mcontext;
 	unsigned long pending = 1UL << sig;
 
diff --git a/arch/unicore32/include/asm/cacheflush.h b/arch/unicore32/include/asm/cacheflush.h
index a5e08e2d5d6d..1d9132b66039 100644
--- a/arch/unicore32/include/asm/cacheflush.h
+++ b/arch/unicore32/include/asm/cacheflush.h
@@ -170,10 +170,8 @@ extern void flush_cache_page(struct vm_area_struct *vma,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-#define flush_dcache_mmap_lock(mapping)			\
-	spin_lock_irq(&(mapping)->tree_lock)
-#define flush_dcache_mmap_unlock(mapping)		\
-	spin_unlock_irq(&(mapping)->tree_lock)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
 #define flush_icache_user_range(vma, page, addr, len)	\
 	flush_dcache_page(page)
diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h
index 3bb0a29fd2d7..66bb9f6525c0 100644
--- a/arch/unicore32/include/asm/memory.h
+++ b/arch/unicore32/include/asm/memory.h
@@ -20,12 +20,6 @@
 #include <mach/memory.h>
 
 /*
- * Allow for constants defined here to be used from assembly code
- * by prepending the UL suffix only with actual C code compilation.
- */
-#define UL(x) _AC(x, UL)
-
-/*
  * PAGE_OFFSET - the virtual address of the start of the kernel image
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 199e15bd3ec5..ce8b4da07e35 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -122,12 +122,14 @@ struct x86_init_pci {
  * @guest_late_init:		guest late init
  * @x2apic_available:		X2APIC detection
  * @init_mem_mapping:		setup early mappings during init_mem_mapping()
+ * @init_after_bootmem:		guest init after boot allocator is finished
  */
 struct x86_hyper_init {
 	void (*init_platform)(void);
 	void (*guest_late_init)(void);
 	bool (*x2apic_available)(void);
 	void (*init_mem_mapping)(void);
+	void (*init_after_bootmem)(void);
 };
 
 /**
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ebda84a91510..3ab867603e81 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -92,6 +92,7 @@ struct x86_init_ops x86_init __initdata = {
 		.guest_late_init	= x86_init_noop,
 		.x2apic_available	= bool_x86_init_noop,
 		.init_mem_mapping	= x86_init_noop,
+		.init_after_bootmem	= x86_init_noop,
 	},
 
 	.acpi = {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 396e1f0151ac..8008db2bddb3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -778,6 +778,7 @@ void __init mem_init(void)
 	free_all_bootmem();
 
 	after_bootmem = 1;
+	x86_init.hyper.init_after_bootmem();
 
 	mem_init_print_info(NULL);
 	printk(KERN_INFO "virtual kernel memory layout:\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index dca9abf2b85c..66de40e45f58 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1185,6 +1185,7 @@ void __init mem_init(void)
 	/* this will put all memory onto the freelists */
 	free_all_bootmem();
 	after_bootmem = 1;
+	x86_init.hyper.init_after_bootmem();
 
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 155ecbac9e28..48c591251600 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void)
 	return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
 }
 
-static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
+			       struct rlimit *rlim_stack)
 {
-	unsigned long gap = rlimit(RLIMIT_STACK);
+	unsigned long gap = rlim_stack->rlim_cur;
 	unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
 	unsigned long gap_min, gap_max;
 
@@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd,
  * process VM image, sets up which VM layout function to use:
  */
 static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
-		unsigned long random_factor, unsigned long task_size)
+		unsigned long random_factor, unsigned long task_size,
+		struct rlimit *rlim_stack)
 {
 	*legacy_base = mmap_legacy_base(random_factor, task_size);
 	if (mmap_is_legacy())
 		*base = *legacy_base;
 	else
-		*base = mmap_base(random_factor, task_size);
+		*base = mmap_base(random_factor, task_size, rlim_stack);
 }
 
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	if (mmap_is_legacy())
 		mm->get_unmapped_area = arch_get_unmapped_area;
@@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 
 	arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
-			arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
+			arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
+			rlim_stack);
 
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
 	/*
@@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 	 * mmap_base, the compat syscall uses mmap_compat_base.
 	 */
 	arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
-			arch_rnd(mmap32_rnd_bits), task_size_32bit());
+			arch_rnd(mmap32_rnd_bits), task_size_32bit(),
+			rlim_stack);
 #endif
 }
 
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index 1518d2805ae8..27361cbb7ca9 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -6,11 +6,12 @@
 #include <sysdep/stub.h>
 #include <sysdep/faultinfo.h>
 #include <sysdep/mcontext.h>
+#include <sys/ucontext.h>
 
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
 {
-	struct ucontext *uc = p;
+	ucontext_t *uc = p;
 
 	GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA),
 			      &uc->uc_mcontext);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 3c2c2530737e..c36d23aa6c35 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1259,10 +1259,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	__userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
-	/* Work out if we support NX */
-	get_cpu_cap(&boot_cpu_data);
-	x86_configure_nx();
-
 	/* Get mfn list */
 	xen_build_dynamic_phys_to_machine();
 
@@ -1272,6 +1268,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 */
 	xen_setup_gdt(0);
 
+	/* Work out if we support NX */
+	get_cpu_cap(&boot_cpu_data);
+	x86_configure_nx();
+
 	xen_init_irq_ops();
 
 	/* Let's presume PV guests always boot on vCPU with id 0. */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index d20763472920..486c0a34d00b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
 
 static phys_addr_t xen_pt_base, xen_pt_size __initdata;
 
+static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
+
 /*
  * Just beyond the highest usermode address.  STACK_TOP_MAX has a
  * redzone above it, so round it up to a PGD boundary.
@@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr)
 }
 
 
+/*
+ * During early boot all page table pages are pinned, but we do not have struct
+ * pages, so return true until struct pages are ready.
+ */
 static bool xen_page_pinned(void *ptr)
 {
-	struct page *page = virt_to_page(ptr);
+	if (static_branch_likely(&xen_struct_pages_ready)) {
+		struct page *page = virt_to_page(ptr);
 
-	return PagePinned(page);
+		return PagePinned(page);
+	}
+	return true;
 }
 
 static void xen_extend_mmu_update(const struct mmu_update *update)
@@ -836,11 +845,6 @@ void xen_mm_pin_all(void)
 	spin_unlock(&pgd_lock);
 }
 
-/*
- * The init_mm pagetable is really pinned as soon as its created, but
- * that's before we have page structures to store the bits.  So do all
- * the book-keeping now.
- */
 static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 				  enum pt_level level)
 {
@@ -848,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
 	return 0;
 }
 
-static void __init xen_mark_init_mm_pinned(void)
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits.  So do all
+ * the book-keeping now once struct pages for allocated pages are
+ * initialized. This happens only after free_all_bootmem() is called.
+ */
+static void __init xen_after_bootmem(void)
 {
+	static_branch_enable(&xen_struct_pages_ready);
+#ifdef CONFIG_X86_64
+	SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
 	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
 
@@ -1623,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
 static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
 				    unsigned level)
 {
-	bool pinned = PagePinned(virt_to_page(mm->pgd));
+	bool pinned = xen_page_pinned(mm->pgd);
 
 	trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
 
 	if (pinned) {
 		struct page *page = pfn_to_page(pfn);
 
-		SetPagePinned(page);
+		if (static_branch_likely(&xen_struct_pages_ready))
+			SetPagePinned(page);
 
 		if (!PageHighMem(page)) {
 			xen_mc_batch();
@@ -2364,9 +2379,7 @@ static void __init xen_post_allocator_init(void)
 
 #ifdef CONFIG_X86_64
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
-	SetPagePinned(virt_to_page(level3_user_vsyscall));
 #endif
-	xen_mark_init_mm_pinned();
 }
 
 static void xen_leave_lazy_mmu(void)
@@ -2450,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 void __init xen_init_mmu_ops(void)
 {
 	x86_init.paging.pagetable_init = xen_pagetable_init;
+	x86_init.hyper.init_after_bootmem = xen_after_bootmem;
 
 	pv_mmu_ops = xen_mmu_ops;
 
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index c0c756c76afe..2e20ae2fa2d6 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
 	 * data back is to call:
 	 */
 	tick_nohz_idle_enter();
+	tick_nohz_idle_stop_tick_protected();
 
 	cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
 }
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 96f26e026783..5077ead5e59c 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -89,7 +89,9 @@ END(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
 		.ascii "!writable_page_tables|pae_pgdir_above_4gb")
 	ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
-		.long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0))
+		.long (1 << XENFEAT_writable_page_tables) |       \
+		      (1 << XENFEAT_dom0) |                       \
+		      (1 << XENFEAT_linux_rsdp_unrestricted))
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 3e9d01ada81f..58f29a9d895d 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -57,6 +57,7 @@
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
 #define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
 #define MAP_HUGETLB	0x80000		/* create a huge page mapping */
+#define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 #ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
 # define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be
 					 * uninitialized */
diff --git a/crypto/Kconfig b/crypto/Kconfig
index c0dabed5122e..76e8c88c97b4 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -500,6 +500,15 @@ config CRYPTO_CRC32_PCLMUL
 	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
 	  and gain better performance as compared with the table implementation.
 
+config CRYPTO_CRC32_MIPS
+	tristate "CRC32c and CRC32 CRC algorithm (MIPS)"
+	depends on MIPS_CRC_SUPPORT
+	select CRYPTO_HASH
+	help
+	  CRC32c and CRC32 CRC algorithms implemented using mips crypto
+	  instructions, when available.
+
+
 config CRYPTO_CRCT10DIF
 	tristate "CRCT10DIF algorithm"
 	select CRYPTO_HASH
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 9e702bc4960f..7a3a541046ed 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -34,6 +34,7 @@
 struct iort_its_msi_chip {
 	struct list_head	list;
 	struct fwnode_handle	*fw_node;
+	phys_addr_t		base_addr;
 	u32			translation_id;
 };
 
@@ -156,14 +157,16 @@ static LIST_HEAD(iort_msi_chip_list);
 static DEFINE_SPINLOCK(iort_msi_chip_lock);
 
 /**
- * iort_register_domain_token() - register domain token and related ITS ID
- * to the list from where we can get it back later on.
+ * iort_register_domain_token() - register domain token along with related
+ * ITS ID and base address to the list from where we can get it back later on.
  * @trans_id: ITS ID.
+ * @base: ITS base address.
  * @fw_node: Domain token.
  *
  * Returns: 0 on success, -ENOMEM if no memory when allocating list element
  */
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
+int iort_register_domain_token(int trans_id, phys_addr_t base,
+			       struct fwnode_handle *fw_node)
 {
 	struct iort_its_msi_chip *its_msi_chip;
 
@@ -173,6 +176,7 @@ int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
 
 	its_msi_chip->fw_node = fw_node;
 	its_msi_chip->translation_id = trans_id;
+	its_msi_chip->base_addr = base;
 
 	spin_lock(&iort_msi_chip_lock);
 	list_add(&its_msi_chip->list, &iort_msi_chip_list);
@@ -569,6 +573,24 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
 	return -ENODEV;
 }
 
+static int __maybe_unused iort_find_its_base(u32 its_id, phys_addr_t *base)
+{
+	struct iort_its_msi_chip *its_msi_chip;
+	int ret = -ENODEV;
+
+	spin_lock(&iort_msi_chip_lock);
+	list_for_each_entry(its_msi_chip, &iort_msi_chip_list, list) {
+		if (its_msi_chip->translation_id == its_id) {
+			*base = its_msi_chip->base_addr;
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&iort_msi_chip_lock);
+
+	return ret;
+}
+
 /**
  * iort_dev_find_its_id() - Find the ITS identifier for a device
  * @dev: The device.
@@ -754,6 +776,24 @@ static inline bool iort_iommu_driver_enabled(u8 type)
 }
 
 #ifdef CONFIG_IOMMU_API
+static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
+{
+	struct acpi_iort_node *iommu;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+	iommu = iort_get_iort_node(fwspec->iommu_fwnode);
+
+	if (iommu && (iommu->type == ACPI_IORT_NODE_SMMU_V3)) {
+		struct acpi_iort_smmu_v3 *smmu;
+
+		smmu = (struct acpi_iort_smmu_v3 *)iommu->node_data;
+		if (smmu->model == ACPI_IORT_SMMU_V3_HISILICON_HI161X)
+			return iommu;
+	}
+
+	return NULL;
+}
+
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(
 				struct iommu_fwspec *fwspec)
 {
@@ -770,6 +810,69 @@ static inline int iort_add_device_replay(const struct iommu_ops *ops,
 
 	return err;
 }
+
+/**
+ * iort_iommu_msi_get_resv_regions - Reserved region driver helper
+ * @dev: Device from iommu_get_resv_regions()
+ * @head: Reserved region list from iommu_get_resv_regions()
+ *
+ * Returns: Number of msi reserved regions on success (0 if platform
+ *          doesn't require the reservation or no associated msi regions),
+ *          appropriate error value otherwise. The ITS interrupt translation
+ *          spaces (ITS_base + SZ_64K, SZ_64K) associated with the device
+ *          are the msi reserved regions.
+ */
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{
+	struct acpi_iort_its_group *its;
+	struct acpi_iort_node *iommu_node, *its_node = NULL;
+	int i, resv = 0;
+
+	iommu_node = iort_get_msi_resv_iommu(dev);
+	if (!iommu_node)
+		return 0;
+
+	/*
+	 * Current logic to reserve ITS regions relies on HW topologies
+	 * where a given PCI or named component maps its IDs to only one
+	 * ITS group; if a PCI or named component can map its IDs to
+	 * different ITS groups through IORT mappings this function has
+	 * to be reworked to ensure we reserve regions for all ITS groups
+	 * a given PCI or named component may map IDs to.
+	 */
+
+	for (i = 0; i < dev->iommu_fwspec->num_ids; i++) {
+		its_node = iort_node_map_id(iommu_node,
+					dev->iommu_fwspec->ids[i],
+					NULL, IORT_MSI_TYPE);
+		if (its_node)
+			break;
+	}
+
+	if (!its_node)
+		return 0;
+
+	/* Move to ITS specific data */
+	its = (struct acpi_iort_its_group *)its_node->node_data;
+
+	for (i = 0; i < its->its_count; i++) {
+		phys_addr_t base;
+
+		if (!iort_find_its_base(its->identifiers[i], &base)) {
+			int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+			struct iommu_resv_region *region;
+
+			region = iommu_alloc_resv_region(base + SZ_64K, SZ_64K,
+							 prot, IOMMU_RESV_MSI);
+			if (region) {
+				list_add_tail(&region->list, head);
+				resv++;
+			}
+		}
+	}
+
+	return (resv == its->its_count) ? resv : -ENODEV;
+}
 #else
 static inline const struct iommu_ops *iort_fwspec_iommu_ops(
 				struct iommu_fwspec *fwspec)
@@ -777,6 +880,8 @@ static inline const struct iommu_ops *iort_fwspec_iommu_ops(
 static inline int iort_add_device_replay(const struct iommu_ops *ops,
 					 struct device *dev)
 { return 0; }
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{ return 0; }
 #endif
 
 static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 22a112b4f4d8..e2235ed3e4be 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -36,16 +36,6 @@ static bool force_enable_dimms;
 module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
 
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
-		"Number of times we overflow ARS results before abort");
-
 static bool disable_vendor_specific;
 module_param(disable_vendor_specific, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_vendor_specific,
@@ -60,6 +50,10 @@ module_param(default_dsm_family, int, S_IRUGO);
 MODULE_PARM_DESC(default_dsm_family,
 		"Try this DSM type first when identifying NVDIMM family");
 
+static bool no_init_ars;
+module_param(no_init_ars, bool, 0644);
+MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
+
 LIST_HEAD(acpi_descs);
 DEFINE_MUTEX(acpi_desc_lock);
 
@@ -197,7 +191,7 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd
 		 * In the _LSI, _LSR, _LSW case the locked status is
 		 * communicated via the read/write commands
 		 */
-		if (nfit_mem->has_lsi)
+		if (nfit_mem->has_lsr)
 			break;
 
 		if (status >> 16 & ND_CONFIG_LOCKED)
@@ -477,14 +471,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		in_buf.buffer.length = call_pkg->nd_size_in;
 	}
 
-	dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
-			__func__, dimm_name, cmd, func, in_buf.buffer.length);
+	dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n",
+		dimm_name, cmd, func, in_buf.buffer.length);
 	print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
 			in_buf.buffer.pointer,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
 	/* call the BIOS, prefer the named methods over _DSM if available */
-	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi)
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr)
 		out_obj = acpi_label_info(handle);
 	else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
 		struct nd_cmd_get_config_data_hdr *p = buf;
@@ -507,8 +501,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	}
 
 	if (!out_obj) {
-		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
-				cmd_name);
+		dev_dbg(dev, "%s _DSM failed cmd: %s\n", dimm_name, cmd_name);
 		return -EINVAL;
 	}
 
@@ -529,13 +522,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	}
 
 	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
-		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
-				__func__, dimm_name, cmd_name, out_obj->type);
+		dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
+				dimm_name, cmd_name, out_obj->type);
 		rc = -EINVAL;
 		goto out;
 	}
 
-	dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name,
+	dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
 			cmd_name, out_obj->buffer.length);
 	print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
 			out_obj->buffer.pointer,
@@ -547,14 +540,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 				out_obj->buffer.length - offset);
 
 		if (offset + out_size > out_obj->buffer.length) {
-			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s output object underflow cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			break;
 		}
 
 		if (in_buf.buffer.length + offset + out_size > buf_len) {
-			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s output overrun cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			rc = -ENXIO;
 			goto out;
 		}
@@ -656,7 +649,7 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_spa->list);
 	memcpy(nfit_spa->spa, spa, sizeof(*spa));
 	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
-	dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+	dev_dbg(dev, "spa index: %d type: %s\n",
 			spa->range_index,
 			spa_type_name(nfit_spa_type(spa)));
 	return true;
@@ -685,8 +678,8 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_memdev->list);
 	memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
 	list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-	dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
-			__func__, memdev->device_handle, memdev->range_index,
+	dev_dbg(dev, "memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
+			memdev->device_handle, memdev->range_index,
 			memdev->region_index, memdev->flags);
 	return true;
 }
@@ -754,7 +747,7 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_dcr->list);
 	memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
 	list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-	dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+	dev_dbg(dev, "dcr index: %d windows: %d\n",
 			dcr->region_index, dcr->windows);
 	return true;
 }
@@ -781,7 +774,7 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_bdw->list);
 	memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
 	list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
-	dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+	dev_dbg(dev, "bdw dcr: %d windows: %d\n",
 			bdw->region_index, bdw->windows);
 	return true;
 }
@@ -820,7 +813,7 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_idt->list);
 	memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
 	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
-	dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+	dev_dbg(dev, "idt index: %d num_lines: %d\n",
 			idt->interleave_index, idt->line_count);
 	return true;
 }
@@ -860,7 +853,7 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_flush->list);
 	memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
 	list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
-	dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+	dev_dbg(dev, "nfit_flush handle: %d hint_count: %d\n",
 			flush->device_handle, flush->hint_count);
 	return true;
 }
@@ -873,7 +866,7 @@ static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
 
 	mask = (1 << (pcap->highest_capability + 1)) - 1;
 	acpi_desc->platform_cap = pcap->capabilities & mask;
-	dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap);
+	dev_dbg(dev, "cap: %#x\n", acpi_desc->platform_cap);
 	return true;
 }
 
@@ -920,7 +913,7 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
 			return err;
 		break;
 	case ACPI_NFIT_TYPE_SMBIOS:
-		dev_dbg(dev, "%s: smbios\n", __func__);
+		dev_dbg(dev, "smbios\n");
 		break;
 	case ACPI_NFIT_TYPE_CAPABILITIES:
 		if (!add_platform_cap(acpi_desc, table))
@@ -1277,8 +1270,11 @@ static ssize_t scrub_show(struct device *dev,
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
+		mutex_lock(&acpi_desc->init_mutex);
 		rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
-				(work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+				work_busy(&acpi_desc->dwork.work)
+				&& !acpi_desc->cancel ? "+\n" : "\n");
+		mutex_unlock(&acpi_desc->init_mutex);
 	}
 	device_unlock(dev);
 	return rc;
@@ -1648,7 +1644,7 @@ void __acpi_nvdimm_notify(struct device *dev, u32 event)
 	struct nfit_mem *nfit_mem;
 	struct acpi_nfit_desc *acpi_desc;
 
-	dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
+	dev_dbg(dev->parent, "%s: event: %d\n", dev_name(dev),
 			event);
 
 	if (event != NFIT_NOTIFY_DIMM_HEALTH) {
@@ -1681,12 +1677,23 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
 	device_unlock(dev->parent);
 }
 
+static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
+{
+	acpi_handle handle;
+	acpi_status status;
+
+	status = acpi_get_handle(adev->handle, method, &handle);
+
+	if (ACPI_SUCCESS(status))
+		return true;
+	return false;
+}
+
 static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_mem *nfit_mem, u32 device_handle)
 {
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
-	union acpi_object *obj;
 	unsigned long dsm_mask;
 	const guid_t *guid;
 	int i;
@@ -1759,25 +1766,15 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 					1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
-	obj = acpi_label_info(adev_dimm->handle);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsi = 1;
-		dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev));
-	}
-
-	obj = acpi_label_read(adev_dimm->handle, 0, 0);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsr = 1;
+	if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
+			&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
 		dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
+		nfit_mem->has_lsr = true;
 	}
 
-	obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsw = 1;
+	if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
 		dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
+		nfit_mem->has_lsw = true;
 	}
 
 	return 0;
@@ -1866,10 +1863,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
 		}
 
-		if (nfit_mem->has_lsi)
+		if (nfit_mem->has_lsr) {
 			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
-		if (nfit_mem->has_lsr)
 			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
+		}
 		if (nfit_mem->has_lsw)
 			set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
 
@@ -2365,7 +2362,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	nvdimm = nd_blk_region_to_dimm(ndbr);
 	nfit_mem = nvdimm_provider_data(nvdimm);
 	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
-		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+		dev_dbg(dev, "missing%s%s%s\n",
 				nfit_mem ? "" : " nfit_mem",
 				(nfit_mem && nfit_mem->dcr) ? "" : " dcr",
 				(nfit_mem && nfit_mem->bdw) ? "" : " bdw");
@@ -2384,7 +2381,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
                         nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
 	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+		dev_dbg(dev, "%s failed to map bdw\n",
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 	}
@@ -2395,8 +2392,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
 			nfit_mem->memdev_bdw->interleave_ways);
 	if (rc) {
-		dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed to init bdw interleave\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
@@ -2407,7 +2404,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
 			nfit_mem->spa_dcr->length);
 	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+		dev_dbg(dev, "%s failed to map dcr\n",
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 	}
@@ -2418,15 +2415,15 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
 			nfit_mem->memdev_dcr->interleave_ways);
 	if (rc) {
-		dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed to init dcr interleave\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
 	rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
 	if (rc < 0) {
-		dev_dbg(dev, "%s: %s failed get DIMM flags\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed get DIMM flags\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
@@ -2476,7 +2473,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
 	memset(&ars_start, 0, sizeof(ars_start));
 	ars_start.address = spa->address;
 	ars_start.length = spa->length;
-	ars_start.flags = acpi_desc->ars_start_flags;
+	if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+		ars_start.flags = ND_ARS_RETURN_PREV_DATA;
 	if (nfit_spa_type(spa) == NFIT_SPA_PM)
 		ars_start.type = ND_ARS_PERSISTENT;
 	else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
@@ -2518,16 +2516,62 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
 	int rc, cmd_rc;
 
 	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
-			acpi_desc->ars_status_size, &cmd_rc);
+			acpi_desc->max_ars, &cmd_rc);
 	if (rc < 0)
 		return rc;
 	return cmd_rc;
 }
 
-static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc,
-		struct nd_cmd_ars_status *ars_status)
+static void ars_complete(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+	struct acpi_nfit_system_address *spa = nfit_spa->spa;
+	struct nd_region *nd_region = nfit_spa->nd_region;
+	struct device *dev;
+
+	if ((ars_status->address >= spa->address && ars_status->address
+				< spa->address + spa->length)
+			|| (ars_status->address < spa->address)) {
+		/*
+		 * Assume that if a scrub starts at an offset from the
+		 * start of nfit_spa that we are in the continuation
+		 * case.
+		 *
+		 * Otherwise, if the scrub covers the spa range, mark
+		 * any pending request complete.
+		 */
+		if (ars_status->address + ars_status->length
+				>= spa->address + spa->length)
+				/* complete */;
+		else
+			return;
+	} else
+		return;
+
+	if (test_bit(ARS_DONE, &nfit_spa->ars_state))
+		return;
+
+	if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
+		return;
+
+	if (nd_region) {
+		dev = nd_region_dev(nd_region);
+		nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+	} else
+		dev = acpi_desc->dev;
+
+	dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
+			test_bit(ARS_SHORT, &nfit_spa->ars_state)
+			? "short" : "long");
+	clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+	set_bit(ARS_DONE, &nfit_spa->ars_state);
+}
+
+static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
+	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
 	int rc;
 	u32 i;
 
@@ -2606,7 +2650,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_nfit_system_address *spa = nfit_spa->spa;
 	struct nd_blk_region_desc *ndbr_desc;
 	struct nfit_mem *nfit_mem;
-	int blk_valid = 0, rc;
+	int rc;
 
 	if (!nvdimm) {
 		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2626,15 +2670,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 		if (!nfit_mem || !nfit_mem->bdw) {
 			dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
 					spa->range_index, nvdimm_name(nvdimm));
-		} else {
-			mapping->size = nfit_mem->bdw->capacity;
-			mapping->start = nfit_mem->bdw->start_address;
-			ndr_desc->num_lanes = nfit_mem->bdw->windows;
-			blk_valid = 1;
+			break;
 		}
 
+		mapping->size = nfit_mem->bdw->capacity;
+		mapping->start = nfit_mem->bdw->start_address;
+		ndr_desc->num_lanes = nfit_mem->bdw->windows;
 		ndr_desc->mapping = mapping;
-		ndr_desc->num_mappings = blk_valid;
+		ndr_desc->num_mappings = 1;
 		ndbr_desc = to_blk_region_desc(ndr_desc);
 		ndbr_desc->enable = acpi_nfit_blk_region_enable;
 		ndbr_desc->do_io = acpi_desc->blk_do_io;
@@ -2682,8 +2725,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 		return 0;
 
 	if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
-		dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
-				__func__);
+		dev_dbg(acpi_desc->dev, "detected invalid spa index\n");
 		return 0;
 	}
 
@@ -2769,301 +2811,243 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 	return rc;
 }
 
-static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
-		u32 max_ars)
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc)
 {
 	struct device *dev = acpi_desc->dev;
 	struct nd_cmd_ars_status *ars_status;
 
-	if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
-		memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+	if (acpi_desc->ars_status) {
+		memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
 		return 0;
 	}
 
-	if (acpi_desc->ars_status)
-		devm_kfree(dev, acpi_desc->ars_status);
-	acpi_desc->ars_status = NULL;
-	ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+	ars_status = devm_kzalloc(dev, acpi_desc->max_ars, GFP_KERNEL);
 	if (!ars_status)
 		return -ENOMEM;
 	acpi_desc->ars_status = ars_status;
-	acpi_desc->ars_status_size = max_ars;
 	return 0;
 }
 
-static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_spa *nfit_spa)
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 {
-	struct acpi_nfit_system_address *spa = nfit_spa->spa;
 	int rc;
 
-	if (!nfit_spa->max_ars) {
-		struct nd_cmd_ars_cap ars_cap;
-
-		memset(&ars_cap, 0, sizeof(ars_cap));
-		rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
-		if (rc < 0)
-			return rc;
-		nfit_spa->max_ars = ars_cap.max_ars_out;
-		nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
-		/* check that the supported scrub types match the spa type */
-		if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
-				((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
-			return -ENOTTY;
-		else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
-				((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
-			return -ENOTTY;
-	}
-
-	if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+	if (ars_status_alloc(acpi_desc))
 		return -ENOMEM;
 
 	rc = ars_get_status(acpi_desc);
+
 	if (rc < 0 && rc != -ENOSPC)
 		return rc;
 
-	if (ars_status_process_records(acpi_desc, acpi_desc->ars_status))
+	if (ars_status_process_records(acpi_desc))
 		return -ENOMEM;
 
 	return 0;
 }
 
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_spa *nfit_spa)
+static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
+		int *query_rc)
 {
-	struct acpi_nfit_system_address *spa = nfit_spa->spa;
-	unsigned int overflow_retry = scrub_overflow_abort;
-	u64 init_ars_start = 0, init_ars_len = 0;
-	struct device *dev = acpi_desc->dev;
-	unsigned int tmo = scrub_timeout;
-	int rc;
+	int rc = *query_rc;
 
-	if (!nfit_spa->ars_required || !nfit_spa->nd_region)
-		return;
+	if (no_init_ars)
+		return acpi_nfit_register_region(acpi_desc, nfit_spa);
 
-	rc = ars_start(acpi_desc, nfit_spa);
-	/*
-	 * If we timed out the initial scan we'll still be busy here,
-	 * and will wait another timeout before giving up permanently.
-	 */
-	if (rc < 0 && rc != -EBUSY)
-		return;
-
-	do {
-		u64 ars_start, ars_len;
-
-		if (acpi_desc->cancel)
-			break;
-		rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-		if (rc == -ENOTTY)
-			break;
-		if (rc == -EBUSY && !tmo) {
-			dev_warn(dev, "range %d ars timeout, aborting\n",
-					spa->range_index);
-			break;
-		}
+	set_bit(ARS_REQ, &nfit_spa->ars_state);
+	set_bit(ARS_SHORT, &nfit_spa->ars_state);
 
+	switch (rc) {
+	case 0:
+	case -EAGAIN:
+		rc = ars_start(acpi_desc, nfit_spa);
 		if (rc == -EBUSY) {
-			/*
-			 * Note, entries may be appended to the list
-			 * while the lock is dropped, but the workqueue
-			 * being active prevents entries being deleted /
-			 * freed.
-			 */
-			mutex_unlock(&acpi_desc->init_mutex);
-			ssleep(1);
-			tmo--;
-			mutex_lock(&acpi_desc->init_mutex);
-			continue;
-		}
-
-		/* we got some results, but there are more pending... */
-		if (rc == -ENOSPC && overflow_retry--) {
-			if (!init_ars_len) {
-				init_ars_len = acpi_desc->ars_status->length;
-				init_ars_start = acpi_desc->ars_status->address;
-			}
-			rc = ars_continue(acpi_desc);
-		}
-
-		if (rc < 0) {
-			dev_warn(dev, "range %d ars continuation failed\n",
-					spa->range_index);
+			*query_rc = rc;
 			break;
-		}
-
-		if (init_ars_len) {
-			ars_start = init_ars_start;
-			ars_len = init_ars_len;
+		} else if (rc == 0) {
+			rc = acpi_nfit_query_poison(acpi_desc);
 		} else {
-			ars_start = acpi_desc->ars_status->address;
-			ars_len = acpi_desc->ars_status->length;
+			set_bit(ARS_FAILED, &nfit_spa->ars_state);
+			break;
 		}
-		dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
-				spa->range_index, ars_start, ars_len);
-		/* notify the region about new poison entries */
-		nvdimm_region_notify(nfit_spa->nd_region,
-				NVDIMM_REVALIDATE_POISON);
+		if (rc == -EAGAIN)
+			clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+		else if (rc == 0)
+			ars_complete(acpi_desc, nfit_spa);
 		break;
-	} while (1);
+	case -EBUSY:
+	case -ENOSPC:
+		break;
+	default:
+		set_bit(ARS_FAILED, &nfit_spa->ars_state);
+		break;
+	}
+
+	if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
+		set_bit(ARS_REQ, &nfit_spa->ars_state);
+
+	return acpi_nfit_register_region(acpi_desc, nfit_spa);
 }
 
-static void acpi_nfit_scrub(struct work_struct *work)
+static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
 {
-	struct device *dev;
-	u64 init_scrub_length = 0;
 	struct nfit_spa *nfit_spa;
-	u64 init_scrub_address = 0;
-	bool init_ars_done = false;
-	struct acpi_nfit_desc *acpi_desc;
-	unsigned int tmo = scrub_timeout;
-	unsigned int overflow_retry = scrub_overflow_abort;
-
-	acpi_desc = container_of(work, typeof(*acpi_desc), work);
-	dev = acpi_desc->dev;
-
-	/*
-	 * We scrub in 2 phases.  The first phase waits for any platform
-	 * firmware initiated scrubs to complete and then we go search for the
-	 * affected spa regions to mark them scanned.  In the second phase we
-	 * initiate a directed scrub for every range that was not scrubbed in
-	 * phase 1. If we're called for a 'rescan', we harmlessly pass through
-	 * the first phase, but really only care about running phase 2, where
-	 * regions can be notified of new poison.
-	 */
 
-	/* process platform firmware initiated scrubs */
- retry:
-	mutex_lock(&acpi_desc->init_mutex);
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		struct nd_cmd_ars_status *ars_status;
-		struct acpi_nfit_system_address *spa;
-		u64 ars_start, ars_len;
-		int rc;
-
-		if (acpi_desc->cancel)
-			break;
-
-		if (nfit_spa->nd_region)
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
+		ars_complete(acpi_desc, nfit_spa);
+	}
+}
 
-		if (init_ars_done) {
-			/*
-			 * No need to re-query, we're now just
-			 * reconciling all the ranges covered by the
-			 * initial scrub
-			 */
-			rc = 0;
-		} else
-			rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
-
-		if (rc == -ENOTTY) {
-			/* no ars capability, just register spa and move on */
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
-			continue;
-		}
-
-		if (rc == -EBUSY && !tmo) {
-			/* fallthrough to directed scrub in phase 2 */
-			dev_warn(dev, "timeout awaiting ars results, continuing...\n");
-			break;
-		} else if (rc == -EBUSY) {
-			mutex_unlock(&acpi_desc->init_mutex);
-			ssleep(1);
-			tmo--;
-			goto retry;
-		}
-
-		/* we got some results, but there are more pending... */
-		if (rc == -ENOSPC && overflow_retry--) {
-			ars_status = acpi_desc->ars_status;
-			/*
-			 * Record the original scrub range, so that we
-			 * can recall all the ranges impacted by the
-			 * initial scrub.
-			 */
-			if (!init_scrub_length) {
-				init_scrub_length = ars_status->length;
-				init_scrub_address = ars_status->address;
-			}
-			rc = ars_continue(acpi_desc);
-			if (rc == 0) {
-				mutex_unlock(&acpi_desc->init_mutex);
-				goto retry;
-			}
-		}
+static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
+		int query_rc)
+{
+	unsigned int tmo = acpi_desc->scrub_tmo;
+	struct device *dev = acpi_desc->dev;
+	struct nfit_spa *nfit_spa;
 
-		if (rc < 0) {
-			/*
-			 * Initial scrub failed, we'll give it one more
-			 * try below...
-			 */
-			break;
-		}
+	if (acpi_desc->cancel)
+		return 0;
 
-		/* We got some final results, record completed ranges */
-		ars_status = acpi_desc->ars_status;
-		if (init_scrub_length) {
-			ars_start = init_scrub_address;
-			ars_len = ars_start + init_scrub_length;
-		} else {
-			ars_start = ars_status->address;
-			ars_len = ars_status->length;
-		}
-		spa = nfit_spa->spa;
+	if (query_rc == -EBUSY) {
+		dev_dbg(dev, "ARS: ARS busy\n");
+		return min(30U * 60U, tmo * 2);
+	}
+	if (query_rc == -ENOSPC) {
+		dev_dbg(dev, "ARS: ARS continue\n");
+		ars_continue(acpi_desc);
+		return 1;
+	}
+	if (query_rc && query_rc != -EAGAIN) {
+		unsigned long long addr, end;
 
-		if (!init_ars_done) {
-			init_ars_done = true;
-			dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
-					ars_start, ars_len);
-		}
-		if (ars_start <= spa->address && ars_start + ars_len
-				>= spa->address + spa->length)
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
+		addr = acpi_desc->ars_status->address;
+		end = addr + acpi_desc->ars_status->length;
+		dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
+				query_rc);
 	}
 
-	/*
-	 * For all the ranges not covered by an initial scrub we still
-	 * want to see if there are errors, but it's ok to discover them
-	 * asynchronously.
-	 */
+	ars_complete_all(acpi_desc);
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		/*
-		 * Flag all the ranges that still need scrubbing, but
-		 * register them now to make data available.
-		 */
-		if (!nfit_spa->nd_region) {
-			nfit_spa->ars_required = 1;
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+			continue;
+		if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
+			int rc = ars_start(acpi_desc, nfit_spa);
+
+			clear_bit(ARS_DONE, &nfit_spa->ars_state);
+			dev = nd_region_dev(nfit_spa->nd_region);
+			dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
+					nfit_spa->spa->range_index, rc);
+			if (rc == 0 || rc == -EBUSY)
+				return 1;
+			dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+					nfit_spa->spa->range_index, rc);
+			set_bit(ARS_FAILED, &nfit_spa->ars_state);
 		}
 	}
-	acpi_desc->init_complete = 1;
+	return 0;
+}
 
-	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-		acpi_nfit_async_scrub(acpi_desc, nfit_spa);
-	acpi_desc->scrub_count++;
-	acpi_desc->ars_start_flags = 0;
-	if (acpi_desc->scrub_count_state)
-		sysfs_notify_dirent(acpi_desc->scrub_count_state);
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+	struct acpi_nfit_desc *acpi_desc;
+	unsigned int tmo;
+	int query_rc;
+
+	acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
+	mutex_lock(&acpi_desc->init_mutex);
+	query_rc = acpi_nfit_query_poison(acpi_desc);
+	tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
+	if (tmo) {
+		queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
+		acpi_desc->scrub_tmo = tmo;
+	} else {
+		acpi_desc->scrub_count++;
+		if (acpi_desc->scrub_count_state)
+			sysfs_notify_dirent(acpi_desc->scrub_count_state);
+	}
+	memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
 	mutex_unlock(&acpi_desc->init_mutex);
 }
 
+static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	int type = nfit_spa_type(nfit_spa->spa);
+	struct nd_cmd_ars_cap ars_cap;
+	int rc;
+
+	memset(&ars_cap, 0, sizeof(ars_cap));
+	rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+	if (rc < 0)
+		return;
+	/* check that the supported scrub types match the spa type */
+	if (type == NFIT_SPA_VOLATILE && ((ars_cap.status >> 16)
+				& ND_ARS_VOLATILE) == 0)
+		return;
+	if (type == NFIT_SPA_PM && ((ars_cap.status >> 16)
+				& ND_ARS_PERSISTENT) == 0)
+		return;
+
+	nfit_spa->max_ars = ars_cap.max_ars_out;
+	nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+	acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+	clear_bit(ARS_FAILED, &nfit_spa->ars_state);
+	set_bit(ARS_REQ, &nfit_spa->ars_state);
+}
+
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_spa *nfit_spa;
-	int rc;
+	int rc, query_rc;
+
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		set_bit(ARS_FAILED, &nfit_spa->ars_state);
+		switch (nfit_spa_type(nfit_spa->spa)) {
+		case NFIT_SPA_VOLATILE:
+		case NFIT_SPA_PM:
+			acpi_nfit_init_ars(acpi_desc, nfit_spa);
+			break;
+		}
+	}
+
+	/*
+	 * Reap any results that might be pending before starting new
+	 * short requests.
+	 */
+	query_rc = acpi_nfit_query_poison(acpi_desc);
+	if (query_rc == 0)
+		ars_complete_all(acpi_desc);
 
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-		if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
-			/* BLK regions don't need to wait for ars results */
+		switch (nfit_spa_type(nfit_spa->spa)) {
+		case NFIT_SPA_VOLATILE:
+		case NFIT_SPA_PM:
+			/* register regions and kick off initial ARS run */
+			rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+			if (rc)
+				return rc;
+			break;
+		case NFIT_SPA_BDW:
+			/* nothing to register */
+			break;
+		case NFIT_SPA_DCR:
+		case NFIT_SPA_VDISK:
+		case NFIT_SPA_VCD:
+		case NFIT_SPA_PDISK:
+		case NFIT_SPA_PCD:
+			/* register known regions that don't support ARS */
 			rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
 			if (rc)
 				return rc;
+			break;
+		default:
+			/* don't register unknown regions */
+			break;
 		}
 
-	acpi_desc->ars_start_flags = 0;
-	if (!acpi_desc->cancel)
-		queue_work(nfit_wq, &acpi_desc->work);
+	queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
 	return 0;
 }
 
@@ -3173,8 +3157,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
 		data = add_table(acpi_desc, &prev, data, end);
 
 	if (IS_ERR(data)) {
-		dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
-				PTR_ERR(data));
+		dev_dbg(dev, "nfit table parsing error: %ld\n",	PTR_ERR(data));
 		rc = PTR_ERR(data);
 		goto out_unlock;
 	}
@@ -3199,49 +3182,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_init);
 
-struct acpi_nfit_flush_work {
-	struct work_struct work;
-	struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
-	struct acpi_nfit_flush_work *flush;
-
-	flush = container_of(work, typeof(*flush), work);
-	complete(&flush->cmp);
-}
-
 static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
 	struct device *dev = acpi_desc->dev;
-	struct acpi_nfit_flush_work flush;
-	int rc;
 
-	/* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+	/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
 	device_lock(dev);
 	device_unlock(dev);
 
-	/* bounce the init_mutex to make init_complete valid */
+	/* Bounce the init_mutex to complete initial registration */
 	mutex_lock(&acpi_desc->init_mutex);
-	if (acpi_desc->cancel || acpi_desc->init_complete) {
-		mutex_unlock(&acpi_desc->init_mutex);
-		return 0;
-	}
-
-	/*
-	 * Scrub work could take 10s of seconds, userspace may give up so we
-	 * need to be interruptible while waiting.
-	 */
-	INIT_WORK_ONSTACK(&flush.work, flush_probe);
-	init_completion(&flush.cmp);
-	queue_work(nfit_wq, &flush.work);
 	mutex_unlock(&acpi_desc->init_mutex);
 
-	rc = wait_for_completion_interruptible(&flush.cmp);
-	cancel_work_sync(&flush.work);
-	return rc;
+	return 0;
 }
 
 static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
@@ -3260,20 +3214,18 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 	 * just needs guarantees that any ars it initiates are not
 	 * interrupted by any intervening start reqeusts from userspace.
 	 */
-	if (work_busy(&acpi_desc->work))
+	if (work_busy(&acpi_desc->dwork.work))
 		return -EBUSY;
 
 	return 0;
 }
 
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 {
 	struct device *dev = acpi_desc->dev;
+	int scheduled = 0, busy = 0;
 	struct nfit_spa *nfit_spa;
 
-	if (work_busy(&acpi_desc->work))
-		return -EBUSY;
-
 	mutex_lock(&acpi_desc->init_mutex);
 	if (acpi_desc->cancel) {
 		mutex_unlock(&acpi_desc->init_mutex);
@@ -3281,19 +3233,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
 	}
 
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		struct acpi_nfit_system_address *spa = nfit_spa->spa;
+		int type = nfit_spa_type(nfit_spa->spa);
 
-		if (nfit_spa_type(spa) != NFIT_SPA_PM)
+		if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
+			continue;
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
 
-		nfit_spa->ars_required = 1;
+		if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
+			busy++;
+		else {
+			if (test_bit(ARS_SHORT, &flags))
+				set_bit(ARS_SHORT, &nfit_spa->ars_state);
+			scheduled++;
+		}
+	}
+	if (scheduled) {
+		queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+		dev_dbg(dev, "ars_scan triggered\n");
 	}
-	acpi_desc->ars_start_flags = flags;
-	queue_work(nfit_wq, &acpi_desc->work);
-	dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
 	mutex_unlock(&acpi_desc->init_mutex);
 
-	return 0;
+	if (scheduled)
+		return 0;
+	if (busy)
+		return -EBUSY;
+	return -ENOTTY;
 }
 
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
@@ -3320,7 +3285,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 	INIT_LIST_HEAD(&acpi_desc->dimms);
 	INIT_LIST_HEAD(&acpi_desc->list);
 	mutex_init(&acpi_desc->init_mutex);
-	INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+	acpi_desc->scrub_tmo = 1;
+	INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
 
@@ -3344,6 +3310,7 @@ void acpi_nfit_shutdown(void *data)
 
 	mutex_lock(&acpi_desc->init_mutex);
 	acpi_desc->cancel = 1;
+	cancel_delayed_work_sync(&acpi_desc->dwork);
 	mutex_unlock(&acpi_desc->init_mutex);
 
 	/*
@@ -3397,8 +3364,8 @@ static int acpi_nfit_add(struct acpi_device *adev)
 			rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
 					obj->buffer.length);
 		else
-			dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
-				 __func__, (int) obj->type);
+			dev_dbg(dev, "invalid type %d, ignoring _FIT\n",
+				(int) obj->type);
 		kfree(buf.pointer);
 	} else
 		/* skip over the lead-in header table */
@@ -3427,7 +3394,7 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 
 	if (!dev->driver) {
 		/* dev->driver may be null if we're being removed */
-		dev_dbg(dev, "%s: no driver found for dev\n", __func__);
+		dev_dbg(dev, "no driver found for dev\n");
 		return;
 	}
 
@@ -3465,15 +3432,15 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
-	u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
-			0 : ND_ARS_RETURN_PREV_DATA;
+	unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
+			0 : 1 << ARS_SHORT;
 
 	acpi_nfit_ars_rescan(acpi_desc, flags);
 }
 
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
 {
-	dev_dbg(dev, "%s: event: 0x%x\n", __func__, event);
+	dev_dbg(dev, "event: 0x%x\n", event);
 
 	switch (event) {
 	case NFIT_NOTIFY_UPDATE:
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index b92921439657..e9626bf6ca29 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -51,9 +51,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 			if ((spa->address + spa->length - 1) < mce->addr)
 				continue;
 			found_match = 1;
-			dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
-				__func__, spa->range_index, spa->address,
-				spa->length);
+			dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
+				spa->range_index, spa->address, spa->length);
 			/*
 			 * We can break at the first match because we're going
 			 * to rescan all the SPA ranges. There shouldn't be any
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 50d36e166d70..7d15856a739f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -117,10 +117,17 @@ enum nfit_dimm_notifiers {
 	NFIT_NOTIFY_DIMM_HEALTH = 0x81,
 };
 
+enum nfit_ars_state {
+	ARS_REQ,
+	ARS_DONE,
+	ARS_SHORT,
+	ARS_FAILED,
+};
+
 struct nfit_spa {
 	struct list_head list;
 	struct nd_region *nd_region;
-	unsigned int ars_required:1;
+	unsigned long ars_state;
 	u32 clear_err_unit;
 	u32 max_ars;
 	struct acpi_nfit_system_address spa[0];
@@ -171,9 +178,8 @@ struct nfit_mem {
 	struct resource *flush_wpq;
 	unsigned long dsm_mask;
 	int family;
-	u32 has_lsi:1;
-	u32 has_lsr:1;
-	u32 has_lsw:1;
+	bool has_lsr;
+	bool has_lsw;
 };
 
 struct acpi_nfit_desc {
@@ -191,18 +197,18 @@ struct acpi_nfit_desc {
 	struct device *dev;
 	u8 ars_start_flags;
 	struct nd_cmd_ars_status *ars_status;
-	size_t ars_status_size;
-	struct work_struct work;
+	struct delayed_work dwork;
 	struct list_head list;
 	struct kernfs_node *scrub_count_state;
+	unsigned int max_ars;
 	unsigned int scrub_count;
 	unsigned int scrub_mode;
 	unsigned int cancel:1;
-	unsigned int init_complete:1;
 	unsigned long dimm_cmd_force_en;
 	unsigned long bus_cmd_force_en;
 	unsigned long bus_nfit_cmd_force_en;
 	unsigned int platform_cap;
+	unsigned int scrub_tmo;
 	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 			void *iobuf, u64 len, int rw);
 };
@@ -244,7 +250,7 @@ struct nfit_blk {
 
 extern struct list_head acpi_descs;
 extern struct mutex acpi_desc_lock;
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags);
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
 
 #ifdef CONFIG_X86_MCE
 void nfit_mce_register(void);
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index c7cf48ad5cb9..a651ab3490d8 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -533,7 +533,7 @@ int acpi_processor_notify_smm(struct module *calling_module)
 
 EXPORT_SYMBOL(acpi_processor_notify_smm);
 
-static int acpi_processor_get_psd(struct acpi_processor	*pr)
+int acpi_processor_get_psd(acpi_handle handle, struct acpi_psd_package *pdomain)
 {
 	int result = 0;
 	acpi_status status = AE_OK;
@@ -541,9 +541,8 @@ static int acpi_processor_get_psd(struct acpi_processor	*pr)
 	struct acpi_buffer format = {sizeof("NNNNN"), "NNNNN"};
 	struct acpi_buffer state = {0, NULL};
 	union acpi_object  *psd = NULL;
-	struct acpi_psd_package *pdomain;
 
-	status = acpi_evaluate_object(pr->handle, "_PSD", NULL, &buffer);
+	status = acpi_evaluate_object(handle, "_PSD", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		return -ENODEV;
 	}
@@ -561,8 +560,6 @@ static int acpi_processor_get_psd(struct acpi_processor	*pr)
 		goto end;
 	}
 
-	pdomain = &(pr->performance->domain_info);
-
 	state.length = sizeof(struct acpi_psd_package);
 	state.pointer = pdomain;
 
@@ -597,6 +594,7 @@ end:
 	kfree(buffer.pointer);
 	return result;
 }
+EXPORT_SYMBOL(acpi_processor_get_psd);
 
 int acpi_processor_preregister_performance(
 		struct acpi_processor_performance __percpu *performance)
@@ -645,7 +643,8 @@ int acpi_processor_preregister_performance(
 
 		pr->performance = per_cpu_ptr(performance, i);
 		cpumask_set_cpu(i, pr->performance->shared_cpu_map);
-		if (acpi_processor_get_psd(pr)) {
+		pdomain = &(pr->performance->domain_info);
+		if (acpi_processor_get_psd(pr->handle, pdomain)) {
 			retval = -EINVAL;
 			continue;
 		}
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 79fcd2bae96b..bffe8616bd55 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -837,11 +837,8 @@ int __init memory_dev_init(void)
 	 * during boot and have been initialized
 	 */
 	mutex_lock(&mem_sysfs_mutex);
-	for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) {
-		/* Don't iterate over sections we know are !present: */
-		if (i > __highest_present_section_nr)
-			break;
-
+	for (i = 0; i <= __highest_present_section_nr;
+		i += sections_per_block) {
 		err = add_memory_block(i);
 		if (!ret)
 			ret = err;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1e03b04819c8..07dc5419bd63 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -32,6 +32,7 @@
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/mon_client.h>
 #include <linux/ceph/cls_lock_client.h>
+#include <linux/ceph/striper.h>
 #include <linux/ceph/decode.h>
 #include <linux/parser.h>
 #include <linux/bsearch.h>
@@ -200,95 +201,81 @@ struct rbd_client {
 };
 
 struct rbd_img_request;
-typedef void (*rbd_img_callback_t)(struct rbd_img_request *);
-
-#define	BAD_WHICH	U32_MAX		/* Good which or bad which, which? */
-
-struct rbd_obj_request;
-typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *);
 
 enum obj_request_type {
-	OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES
+	OBJ_REQUEST_NODATA = 1,
+	OBJ_REQUEST_BIO,	/* pointer into provided bio (list) */
+	OBJ_REQUEST_BVECS,	/* pointer into provided bio_vec array */
+	OBJ_REQUEST_OWN_BVECS,	/* private bio_vec array, doesn't own pages */
 };
 
 enum obj_operation_type {
+	OBJ_OP_READ = 1,
 	OBJ_OP_WRITE,
-	OBJ_OP_READ,
 	OBJ_OP_DISCARD,
 };
 
-enum obj_req_flags {
-	OBJ_REQ_DONE,		/* completion flag: not done = 0, done = 1 */
-	OBJ_REQ_IMG_DATA,	/* object usage: standalone = 0, image = 1 */
-	OBJ_REQ_KNOWN,		/* EXISTS flag valid: no = 0, yes = 1 */
-	OBJ_REQ_EXISTS,		/* target exists: no = 0, yes = 1 */
+/*
+ * Writes go through the following state machine to deal with
+ * layering:
+ *
+ *                       need copyup
+ * RBD_OBJ_WRITE_GUARD ---------------> RBD_OBJ_WRITE_COPYUP
+ *        |     ^                              |
+ *        v     \------------------------------/
+ *      done
+ *        ^
+ *        |
+ * RBD_OBJ_WRITE_FLAT
+ *
+ * Writes start in RBD_OBJ_WRITE_GUARD or _FLAT, depending on whether
+ * there is a parent or not.
+ */
+enum rbd_obj_write_state {
+	RBD_OBJ_WRITE_FLAT = 1,
+	RBD_OBJ_WRITE_GUARD,
+	RBD_OBJ_WRITE_COPYUP,
 };
 
 struct rbd_obj_request {
-	u64			object_no;
-	u64			offset;		/* object start byte */
-	u64			length;		/* bytes from offset */
-	unsigned long		flags;
-
-	/*
-	 * An object request associated with an image will have its
-	 * img_data flag set; a standalone object request will not.
-	 *
-	 * A standalone object request will have which == BAD_WHICH
-	 * and a null obj_request pointer.
-	 *
-	 * An object request initiated in support of a layered image
-	 * object (to check for its existence before a write) will
-	 * have which == BAD_WHICH and a non-null obj_request pointer.
-	 *
-	 * Finally, an object request for rbd image data will have
-	 * which != BAD_WHICH, and will have a non-null img_request
-	 * pointer.  The value of which will be in the range
-	 * 0..(img_request->obj_request_count-1).
-	 */
+	struct ceph_object_extent ex;
 	union {
-		struct rbd_obj_request	*obj_request;	/* STAT op */
-		struct {
-			struct rbd_img_request	*img_request;
-			u64			img_offset;
-			/* links for img_request->obj_requests list */
-			struct list_head	links;
-		};
+		bool			tried_parent;	/* for reads */
+		enum rbd_obj_write_state write_state;	/* for writes */
 	};
-	u32			which;		/* posn image request list */
 
-	enum obj_request_type	type;
+	struct rbd_img_request	*img_request;
+	struct ceph_file_extent	*img_extents;
+	u32			num_img_extents;
+
 	union {
-		struct bio	*bio_list;
+		struct ceph_bio_iter	bio_pos;
 		struct {
-			struct page	**pages;
-			u32		page_count;
+			struct ceph_bvec_iter	bvec_pos;
+			u32			bvec_count;
+			u32			bvec_idx;
 		};
 	};
-	struct page		**copyup_pages;
-	u32			copyup_page_count;
+	struct bio_vec		*copyup_bvecs;
+	u32			copyup_bvec_count;
 
 	struct ceph_osd_request	*osd_req;
 
 	u64			xferred;	/* bytes transferred */
 	int			result;
 
-	rbd_obj_callback_t	callback;
-
 	struct kref		kref;
 };
 
 enum img_req_flags {
-	IMG_REQ_WRITE,		/* I/O direction: read = 0, write = 1 */
 	IMG_REQ_CHILD,		/* initiator: block = 0, child image = 1 */
 	IMG_REQ_LAYERED,	/* ENOENT handling: normal = 0, layered = 1 */
-	IMG_REQ_DISCARD,	/* discard: normal = 0, discard request = 1 */
 };
 
 struct rbd_img_request {
 	struct rbd_device	*rbd_dev;
-	u64			offset;	/* starting image byte offset */
-	u64			length;	/* byte count from offset */
+	enum obj_operation_type	op_type;
+	enum obj_request_type	data_type;
 	unsigned long		flags;
 	union {
 		u64			snap_id;	/* for reads */
@@ -298,26 +285,21 @@ struct rbd_img_request {
 		struct request		*rq;		/* block request */
 		struct rbd_obj_request	*obj_request;	/* obj req initiator */
 	};
-	struct page		**copyup_pages;
-	u32			copyup_page_count;
-	spinlock_t		completion_lock;/* protects next_completion */
-	u32			next_completion;
-	rbd_img_callback_t	callback;
+	spinlock_t		completion_lock;
 	u64			xferred;/* aggregate bytes transferred */
 	int			result;	/* first nonzero obj_request result */
 
+	struct list_head	object_extents;	/* obj_req.ex structs */
 	u32			obj_request_count;
-	struct list_head	obj_requests;	/* rbd_obj_request structs */
+	u32			pending_count;
 
 	struct kref		kref;
 };
 
 #define for_each_obj_request(ireq, oreq) \
-	list_for_each_entry(oreq, &(ireq)->obj_requests, links)
-#define for_each_obj_request_from(ireq, oreq) \
-	list_for_each_entry_from(oreq, &(ireq)->obj_requests, links)
+	list_for_each_entry(oreq, &(ireq)->object_extents, ex.oe_item)
 #define for_each_obj_request_safe(ireq, oreq, n) \
-	list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links)
+	list_for_each_entry_safe(oreq, n, &(ireq)->object_extents, ex.oe_item)
 
 enum rbd_watch_state {
 	RBD_WATCH_STATE_UNREGISTERED,
@@ -433,8 +415,6 @@ static DEFINE_SPINLOCK(rbd_client_list_lock);
 static struct kmem_cache	*rbd_img_request_cache;
 static struct kmem_cache	*rbd_obj_request_cache;
 
-static struct bio_set		*rbd_bio_clone;
-
 static int rbd_major;
 static DEFINE_IDA(rbd_dev_id_ida);
 
@@ -447,8 +427,6 @@ static bool single_major = true;
 module_param(single_major, bool, S_IRUGO);
 MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
 
-static int rbd_img_request_submit(struct rbd_img_request *img_request);
-
 static ssize_t rbd_add(struct bus_type *bus, const char *buf,
 		       size_t count);
 static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
@@ -458,7 +436,6 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
 static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
 				       size_t count);
 static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
-static void rbd_spec_put(struct rbd_spec *spec);
 
 static int rbd_dev_id_to_minor(int dev_id)
 {
@@ -577,9 +554,6 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
 #  define rbd_assert(expr)	((void) 0)
 #endif /* !RBD_DEBUG */
 
-static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request);
-static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
-static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
 static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
 
 static int rbd_dev_refresh(struct rbd_device *rbd_dev);
@@ -857,26 +831,6 @@ static char* obj_op_name(enum obj_operation_type op_type)
 }
 
 /*
- * Get a ceph client with specific addr and configuration, if one does
- * not exist create it.  Either way, ceph_opts is consumed by this
- * function.
- */
-static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
-{
-	struct rbd_client *rbdc;
-
-	mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
-	rbdc = rbd_client_find(ceph_opts);
-	if (rbdc)	/* using an existing client */
-		ceph_destroy_options(ceph_opts);
-	else
-		rbdc = rbd_client_create(ceph_opts);
-	mutex_unlock(&client_mutex);
-
-	return rbdc;
-}
-
-/*
  * Destroy ceph client
  *
  * Caller must hold rbd_client_list_lock.
@@ -904,6 +858,56 @@ static void rbd_put_client(struct rbd_client *rbdc)
 		kref_put(&rbdc->kref, rbd_client_release);
 }
 
+static int wait_for_latest_osdmap(struct ceph_client *client)
+{
+	u64 newest_epoch;
+	int ret;
+
+	ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+	if (ret)
+		return ret;
+
+	if (client->osdc.osdmap->epoch >= newest_epoch)
+		return 0;
+
+	ceph_osdc_maybe_request_map(&client->osdc);
+	return ceph_monc_wait_osdmap(&client->monc, newest_epoch,
+				     client->options->mount_timeout);
+}
+
+/*
+ * Get a ceph client with specific addr and configuration, if one does
+ * not exist create it.  Either way, ceph_opts is consumed by this
+ * function.
+ */
+static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
+{
+	struct rbd_client *rbdc;
+	int ret;
+
+	mutex_lock_nested(&client_mutex, SINGLE_DEPTH_NESTING);
+	rbdc = rbd_client_find(ceph_opts);
+	if (rbdc) {
+		ceph_destroy_options(ceph_opts);
+
+		/*
+		 * Using an existing client.  Make sure ->pg_pools is up to
+		 * date before we look up the pool id in do_rbd_add().
+		 */
+		ret = wait_for_latest_osdmap(rbdc->client);
+		if (ret) {
+			rbd_warn(NULL, "failed to get latest osdmap: %d", ret);
+			rbd_put_client(rbdc);
+			rbdc = ERR_PTR(ret);
+		}
+	} else {
+		rbdc = rbd_client_create(ceph_opts);
+	}
+	mutex_unlock(&client_mutex);
+
+	return rbdc;
+}
+
 static bool rbd_image_format_valid(u32 image_format)
 {
 	return image_format == 1 || image_format == 2;
@@ -1223,272 +1227,59 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
 	rbd_dev->mapping.features = 0;
 }
 
-static u64 rbd_segment_offset(struct rbd_device *rbd_dev, u64 offset)
-{
-	u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
-
-	return offset & (segment_size - 1);
-}
-
-static u64 rbd_segment_length(struct rbd_device *rbd_dev,
-				u64 offset, u64 length)
-{
-	u64 segment_size = rbd_obj_bytes(&rbd_dev->header);
-
-	offset &= segment_size - 1;
-
-	rbd_assert(length <= U64_MAX - offset);
-	if (offset + length > segment_size)
-		length = segment_size - offset;
-
-	return length;
-}
-
-/*
- * bio helpers
- */
-
-static void bio_chain_put(struct bio *chain)
-{
-	struct bio *tmp;
-
-	while (chain) {
-		tmp = chain;
-		chain = chain->bi_next;
-		bio_put(tmp);
-	}
-}
-
-/*
- * zeros a bio chain, starting at specific offset
- */
-static void zero_bio_chain(struct bio *chain, int start_ofs)
+static void zero_bvec(struct bio_vec *bv)
 {
-	struct bio_vec bv;
-	struct bvec_iter iter;
-	unsigned long flags;
 	void *buf;
-	int pos = 0;
-
-	while (chain) {
-		bio_for_each_segment(bv, chain, iter) {
-			if (pos + bv.bv_len > start_ofs) {
-				int remainder = max(start_ofs - pos, 0);
-				buf = bvec_kmap_irq(&bv, &flags);
-				memset(buf + remainder, 0,
-				       bv.bv_len - remainder);
-				flush_dcache_page(bv.bv_page);
-				bvec_kunmap_irq(buf, &flags);
-			}
-			pos += bv.bv_len;
-		}
+	unsigned long flags;
 
-		chain = chain->bi_next;
-	}
+	buf = bvec_kmap_irq(bv, &flags);
+	memset(buf, 0, bv->bv_len);
+	flush_dcache_page(bv->bv_page);
+	bvec_kunmap_irq(buf, &flags);
 }
 
-/*
- * similar to zero_bio_chain(), zeros data defined by a page array,
- * starting at the given byte offset from the start of the array and
- * continuing up to the given end offset.  The pages array is
- * assumed to be big enough to hold all bytes up to the end.
- */
-static void zero_pages(struct page **pages, u64 offset, u64 end)
+static void zero_bios(struct ceph_bio_iter *bio_pos, u32 off, u32 bytes)
 {
-	struct page **page = &pages[offset >> PAGE_SHIFT];
+	struct ceph_bio_iter it = *bio_pos;
 
-	rbd_assert(end > offset);
-	rbd_assert(end - offset <= (u64)SIZE_MAX);
-	while (offset < end) {
-		size_t page_offset;
-		size_t length;
-		unsigned long flags;
-		void *kaddr;
-
-		page_offset = offset & ~PAGE_MASK;
-		length = min_t(size_t, PAGE_SIZE - page_offset, end - offset);
-		local_irq_save(flags);
-		kaddr = kmap_atomic(*page);
-		memset(kaddr + page_offset, 0, length);
-		flush_dcache_page(*page);
-		kunmap_atomic(kaddr);
-		local_irq_restore(flags);
-
-		offset += length;
-		page++;
-	}
+	ceph_bio_iter_advance(&it, off);
+	ceph_bio_iter_advance_step(&it, bytes, ({
+		zero_bvec(&bv);
+	}));
 }
 
-/*
- * Clone a portion of a bio, starting at the given byte offset
- * and continuing for the number of bytes indicated.
- */
-static struct bio *bio_clone_range(struct bio *bio_src,
-					unsigned int offset,
-					unsigned int len,
-					gfp_t gfpmask)
+static void zero_bvecs(struct ceph_bvec_iter *bvec_pos, u32 off, u32 bytes)
 {
-	struct bio *bio;
-
-	bio = bio_clone_fast(bio_src, gfpmask, rbd_bio_clone);
-	if (!bio)
-		return NULL;	/* ENOMEM */
+	struct ceph_bvec_iter it = *bvec_pos;
 
-	bio_advance(bio, offset);
-	bio->bi_iter.bi_size = len;
-
-	return bio;
+	ceph_bvec_iter_advance(&it, off);
+	ceph_bvec_iter_advance_step(&it, bytes, ({
+		zero_bvec(&bv);
+	}));
 }
 
 /*
- * Clone a portion of a bio chain, starting at the given byte offset
- * into the first bio in the source chain and continuing for the
- * number of bytes indicated.  The result is another bio chain of
- * exactly the given length, or a null pointer on error.
- *
- * The bio_src and offset parameters are both in-out.  On entry they
- * refer to the first source bio and the offset into that bio where
- * the start of data to be cloned is located.
+ * Zero a range in @obj_req data buffer defined by a bio (list) or
+ * (private) bio_vec array.
  *
- * On return, bio_src is updated to refer to the bio in the source
- * chain that contains first un-cloned byte, and *offset will
- * contain the offset of that byte within that bio.
+ * @off is relative to the start of the data buffer.
  */
-static struct bio *bio_chain_clone_range(struct bio **bio_src,
-					unsigned int *offset,
-					unsigned int len,
-					gfp_t gfpmask)
+static void rbd_obj_zero_range(struct rbd_obj_request *obj_req, u32 off,
+			       u32 bytes)
 {
-	struct bio *bi = *bio_src;
-	unsigned int off = *offset;
-	struct bio *chain = NULL;
-	struct bio **end;
-
-	/* Build up a chain of clone bios up to the limit */
-
-	if (!bi || off >= bi->bi_iter.bi_size || !len)
-		return NULL;		/* Nothing to clone */
-
-	end = &chain;
-	while (len) {
-		unsigned int bi_size;
-		struct bio *bio;
-
-		if (!bi) {
-			rbd_warn(NULL, "bio_chain exhausted with %u left", len);
-			goto out_err;	/* EINVAL; ran out of bio's */
-		}
-		bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
-		bio = bio_clone_range(bi, off, bi_size, gfpmask);
-		if (!bio)
-			goto out_err;	/* ENOMEM */
-
-		*end = bio;
-		end = &bio->bi_next;
-
-		off += bi_size;
-		if (off == bi->bi_iter.bi_size) {
-			bi = bi->bi_next;
-			off = 0;
-		}
-		len -= bi_size;
-	}
-	*bio_src = bi;
-	*offset = off;
-
-	return chain;
-out_err:
-	bio_chain_put(chain);
-
-	return NULL;
-}
-
-/*
- * The default/initial value for all object request flags is 0.  For
- * each flag, once its value is set to 1 it is never reset to 0
- * again.
- */
-static void obj_request_img_data_set(struct rbd_obj_request *obj_request)
-{
-	if (test_and_set_bit(OBJ_REQ_IMG_DATA, &obj_request->flags)) {
-		struct rbd_device *rbd_dev;
-
-		rbd_dev = obj_request->img_request->rbd_dev;
-		rbd_warn(rbd_dev, "obj_request %p already marked img_data",
-			obj_request);
-	}
-}
-
-static bool obj_request_img_data_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_IMG_DATA, &obj_request->flags) != 0;
-}
-
-static void obj_request_done_set(struct rbd_obj_request *obj_request)
-{
-	if (test_and_set_bit(OBJ_REQ_DONE, &obj_request->flags)) {
-		struct rbd_device *rbd_dev = NULL;
-
-		if (obj_request_img_data_test(obj_request))
-			rbd_dev = obj_request->img_request->rbd_dev;
-		rbd_warn(rbd_dev, "obj_request %p already marked done",
-			obj_request);
+	switch (obj_req->img_request->data_type) {
+	case OBJ_REQUEST_BIO:
+		zero_bios(&obj_req->bio_pos, off, bytes);
+		break;
+	case OBJ_REQUEST_BVECS:
+	case OBJ_REQUEST_OWN_BVECS:
+		zero_bvecs(&obj_req->bvec_pos, off, bytes);
+		break;
+	default:
+		rbd_assert(0);
 	}
 }
 
-static bool obj_request_done_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_DONE, &obj_request->flags) != 0;
-}
-
-/*
- * This sets the KNOWN flag after (possibly) setting the EXISTS
- * flag.  The latter is set based on the "exists" value provided.
- *
- * Note that for our purposes once an object exists it never goes
- * away again.  It's possible that the response from two existence
- * checks are separated by the creation of the target object, and
- * the first ("doesn't exist") response arrives *after* the second
- * ("does exist").  In that case we ignore the second one.
- */
-static void obj_request_existence_set(struct rbd_obj_request *obj_request,
-				bool exists)
-{
-	if (exists)
-		set_bit(OBJ_REQ_EXISTS, &obj_request->flags);
-	set_bit(OBJ_REQ_KNOWN, &obj_request->flags);
-	smp_mb();
-}
-
-static bool obj_request_known_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_KNOWN, &obj_request->flags) != 0;
-}
-
-static bool obj_request_exists_test(struct rbd_obj_request *obj_request)
-{
-	smp_mb();
-	return test_bit(OBJ_REQ_EXISTS, &obj_request->flags) != 0;
-}
-
-static bool obj_request_overlaps_parent(struct rbd_obj_request *obj_request)
-{
-	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-
-	return obj_request->img_offset <
-	    round_up(rbd_dev->parent_overlap, rbd_obj_bytes(&rbd_dev->header));
-}
-
-static void rbd_obj_request_get(struct rbd_obj_request *obj_request)
-{
-	dout("%s: obj %p (was %d)\n", __func__, obj_request,
-		kref_read(&obj_request->kref));
-	kref_get(&obj_request->kref);
-}
-
 static void rbd_obj_request_destroy(struct kref *kref);
 static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
 {
@@ -1505,18 +1296,13 @@ static void rbd_img_request_get(struct rbd_img_request *img_request)
 	kref_get(&img_request->kref);
 }
 
-static bool img_request_child_test(struct rbd_img_request *img_request);
-static void rbd_parent_request_destroy(struct kref *kref);
 static void rbd_img_request_destroy(struct kref *kref);
 static void rbd_img_request_put(struct rbd_img_request *img_request)
 {
 	rbd_assert(img_request != NULL);
 	dout("%s: img %p (was %d)\n", __func__, img_request,
 		kref_read(&img_request->kref));
-	if (img_request_child_test(img_request))
-		kref_put(&img_request->kref, rbd_parent_request_destroy);
-	else
-		kref_put(&img_request->kref, rbd_img_request_destroy);
+	kref_put(&img_request->kref, rbd_img_request_destroy);
 }
 
 static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
@@ -1526,139 +1312,37 @@ static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
 
 	/* Image request now owns object's original reference */
 	obj_request->img_request = img_request;
-	obj_request->which = img_request->obj_request_count;
-	rbd_assert(!obj_request_img_data_test(obj_request));
-	obj_request_img_data_set(obj_request);
-	rbd_assert(obj_request->which != BAD_WHICH);
 	img_request->obj_request_count++;
-	list_add_tail(&obj_request->links, &img_request->obj_requests);
-	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
-		obj_request->which);
+	img_request->pending_count++;
+	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
 }
 
 static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request,
 					struct rbd_obj_request *obj_request)
 {
-	rbd_assert(obj_request->which != BAD_WHICH);
-
-	dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request,
-		obj_request->which);
-	list_del(&obj_request->links);
+	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
+	list_del(&obj_request->ex.oe_item);
 	rbd_assert(img_request->obj_request_count > 0);
 	img_request->obj_request_count--;
-	rbd_assert(obj_request->which == img_request->obj_request_count);
-	obj_request->which = BAD_WHICH;
-	rbd_assert(obj_request_img_data_test(obj_request));
 	rbd_assert(obj_request->img_request == img_request);
-	obj_request->img_request = NULL;
-	obj_request->callback = NULL;
 	rbd_obj_request_put(obj_request);
 }
 
-static bool obj_request_type_valid(enum obj_request_type type)
-{
-	switch (type) {
-	case OBJ_REQUEST_NODATA:
-	case OBJ_REQUEST_BIO:
-	case OBJ_REQUEST_PAGES:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static void rbd_img_obj_callback(struct rbd_obj_request *obj_request);
-
 static void rbd_obj_request_submit(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
 	dout("%s %p object_no %016llx %llu~%llu osd_req %p\n", __func__,
-	     obj_request, obj_request->object_no, obj_request->offset,
-	     obj_request->length, osd_req);
-	if (obj_request_img_data_test(obj_request)) {
-		WARN_ON(obj_request->callback != rbd_img_obj_callback);
-		rbd_img_request_get(obj_request->img_request);
-	}
+	     obj_request, obj_request->ex.oe_objno, obj_request->ex.oe_off,
+	     obj_request->ex.oe_len, osd_req);
 	ceph_osdc_start_request(osd_req->r_osdc, osd_req, false);
 }
 
-static void rbd_img_request_complete(struct rbd_img_request *img_request)
-{
-
-	dout("%s: img %p\n", __func__, img_request);
-
-	/*
-	 * If no error occurred, compute the aggregate transfer
-	 * count for the image request.  We could instead use
-	 * atomic64_cmpxchg() to update it as each object request
-	 * completes; not clear which way is better off hand.
-	 */
-	if (!img_request->result) {
-		struct rbd_obj_request *obj_request;
-		u64 xferred = 0;
-
-		for_each_obj_request(img_request, obj_request)
-			xferred += obj_request->xferred;
-		img_request->xferred = xferred;
-	}
-
-	if (img_request->callback)
-		img_request->callback(img_request);
-	else
-		rbd_img_request_put(img_request);
-}
-
 /*
  * The default/initial value for all image request flags is 0.  Each
  * is conditionally set to 1 at image request initialization time
  * and currently never change thereafter.
  */
-static void img_request_write_set(struct rbd_img_request *img_request)
-{
-	set_bit(IMG_REQ_WRITE, &img_request->flags);
-	smp_mb();
-}
-
-static bool img_request_write_test(struct rbd_img_request *img_request)
-{
-	smp_mb();
-	return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0;
-}
-
-/*
- * Set the discard flag when the img_request is an discard request
- */
-static void img_request_discard_set(struct rbd_img_request *img_request)
-{
-	set_bit(IMG_REQ_DISCARD, &img_request->flags);
-	smp_mb();
-}
-
-static bool img_request_discard_test(struct rbd_img_request *img_request)
-{
-	smp_mb();
-	return test_bit(IMG_REQ_DISCARD, &img_request->flags) != 0;
-}
-
-static void img_request_child_set(struct rbd_img_request *img_request)
-{
-	set_bit(IMG_REQ_CHILD, &img_request->flags);
-	smp_mb();
-}
-
-static void img_request_child_clear(struct rbd_img_request *img_request)
-{
-	clear_bit(IMG_REQ_CHILD, &img_request->flags);
-	smp_mb();
-}
-
-static bool img_request_child_test(struct rbd_img_request *img_request)
-{
-	smp_mb();
-	return test_bit(IMG_REQ_CHILD, &img_request->flags) != 0;
-}
-
 static void img_request_layered_set(struct rbd_img_request *img_request)
 {
 	set_bit(IMG_REQ_LAYERED, &img_request->flags);
@@ -1677,209 +1361,70 @@ static bool img_request_layered_test(struct rbd_img_request *img_request)
 	return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0;
 }
 
-static enum obj_operation_type
-rbd_img_request_op_type(struct rbd_img_request *img_request)
-{
-	if (img_request_write_test(img_request))
-		return OBJ_OP_WRITE;
-	else if (img_request_discard_test(img_request))
-		return OBJ_OP_DISCARD;
-	else
-		return OBJ_OP_READ;
-}
-
-static void
-rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request)
-{
-	u64 xferred = obj_request->xferred;
-	u64 length = obj_request->length;
-
-	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
-		obj_request, obj_request->img_request, obj_request->result,
-		xferred, length);
-	/*
-	 * ENOENT means a hole in the image.  We zero-fill the entire
-	 * length of the request.  A short read also implies zero-fill
-	 * to the end of the request.  An error requires the whole
-	 * length of the request to be reported finished with an error
-	 * to the block layer.  In each case we update the xferred
-	 * count to indicate the whole request was satisfied.
-	 */
-	rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
-	if (obj_request->result == -ENOENT) {
-		if (obj_request->type == OBJ_REQUEST_BIO)
-			zero_bio_chain(obj_request->bio_list, 0);
-		else
-			zero_pages(obj_request->pages, 0, length);
-		obj_request->result = 0;
-	} else if (xferred < length && !obj_request->result) {
-		if (obj_request->type == OBJ_REQUEST_BIO)
-			zero_bio_chain(obj_request->bio_list, xferred);
-		else
-			zero_pages(obj_request->pages, xferred, length);
-	}
-	obj_request->xferred = length;
-	obj_request_done_set(obj_request);
-}
-
-static void rbd_obj_request_complete(struct rbd_obj_request *obj_request)
+static bool rbd_obj_is_entire(struct rbd_obj_request *obj_req)
 {
-	dout("%s: obj %p cb %p\n", __func__, obj_request,
-		obj_request->callback);
-	obj_request->callback(obj_request);
-}
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
 
-static void rbd_obj_request_error(struct rbd_obj_request *obj_request, int err)
-{
-	obj_request->result = err;
-	obj_request->xferred = 0;
-	/*
-	 * kludge - mirror rbd_obj_request_submit() to match a put in
-	 * rbd_img_obj_callback()
-	 */
-	if (obj_request_img_data_test(obj_request)) {
-		WARN_ON(obj_request->callback != rbd_img_obj_callback);
-		rbd_img_request_get(obj_request->img_request);
-	}
-	obj_request_done_set(obj_request);
-	rbd_obj_request_complete(obj_request);
+	return !obj_req->ex.oe_off &&
+	       obj_req->ex.oe_len == rbd_dev->layout.object_size;
 }
 
-static void rbd_osd_read_callback(struct rbd_obj_request *obj_request)
+static bool rbd_obj_is_tail(struct rbd_obj_request *obj_req)
 {
-	struct rbd_img_request *img_request = NULL;
-	struct rbd_device *rbd_dev = NULL;
-	bool layered = false;
-
-	if (obj_request_img_data_test(obj_request)) {
-		img_request = obj_request->img_request;
-		layered = img_request && img_request_layered_test(img_request);
-		rbd_dev = img_request->rbd_dev;
-	}
-
-	dout("%s: obj %p img %p result %d %llu/%llu\n", __func__,
-		obj_request, img_request, obj_request->result,
-		obj_request->xferred, obj_request->length);
-	if (layered && obj_request->result == -ENOENT &&
-			obj_request->img_offset < rbd_dev->parent_overlap)
-		rbd_img_parent_read(obj_request);
-	else if (img_request)
-		rbd_img_obj_request_read_callback(obj_request);
-	else
-		obj_request_done_set(obj_request);
-}
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
 
-static void rbd_osd_write_callback(struct rbd_obj_request *obj_request)
-{
-	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
-		obj_request->result, obj_request->length);
-	/*
-	 * There is no such thing as a successful short write.  Set
-	 * it to our originally-requested length.
-	 */
-	obj_request->xferred = obj_request->length;
-	obj_request_done_set(obj_request);
+	return obj_req->ex.oe_off + obj_req->ex.oe_len ==
+					rbd_dev->layout.object_size;
 }
 
-static void rbd_osd_discard_callback(struct rbd_obj_request *obj_request)
+static u64 rbd_obj_img_extents_bytes(struct rbd_obj_request *obj_req)
 {
-	dout("%s: obj %p result %d %llu\n", __func__, obj_request,
-		obj_request->result, obj_request->length);
-	/*
-	 * There is no such thing as a successful short discard.  Set
-	 * it to our originally-requested length.
-	 */
-	obj_request->xferred = obj_request->length;
-	/* discarding a non-existent object is not a problem */
-	if (obj_request->result == -ENOENT)
-		obj_request->result = 0;
-	obj_request_done_set(obj_request);
+	return ceph_file_extents_bytes(obj_req->img_extents,
+				       obj_req->num_img_extents);
 }
 
-/*
- * For a simple stat call there's nothing to do.  We'll do more if
- * this is part of a write sequence for a layered image.
- */
-static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request)
+static bool rbd_img_is_write(struct rbd_img_request *img_req)
 {
-	dout("%s: obj %p\n", __func__, obj_request);
-	obj_request_done_set(obj_request);
+	switch (img_req->op_type) {
+	case OBJ_OP_READ:
+		return false;
+	case OBJ_OP_WRITE:
+	case OBJ_OP_DISCARD:
+		return true;
+	default:
+		rbd_assert(0);
+	}
 }
 
-static void rbd_osd_call_callback(struct rbd_obj_request *obj_request)
-{
-	dout("%s: obj %p\n", __func__, obj_request);
-
-	if (obj_request_img_data_test(obj_request))
-		rbd_osd_copyup_callback(obj_request);
-	else
-		obj_request_done_set(obj_request);
-}
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req);
 
 static void rbd_osd_req_callback(struct ceph_osd_request *osd_req)
 {
-	struct rbd_obj_request *obj_request = osd_req->r_priv;
-	u16 opcode;
+	struct rbd_obj_request *obj_req = osd_req->r_priv;
 
-	dout("%s: osd_req %p\n", __func__, osd_req);
-	rbd_assert(osd_req == obj_request->osd_req);
-	if (obj_request_img_data_test(obj_request)) {
-		rbd_assert(obj_request->img_request);
-		rbd_assert(obj_request->which != BAD_WHICH);
-	} else {
-		rbd_assert(obj_request->which == BAD_WHICH);
-	}
-
-	if (osd_req->r_result < 0)
-		obj_request->result = osd_req->r_result;
-
-	/*
-	 * We support a 64-bit length, but ultimately it has to be
-	 * passed to the block layer, which just supports a 32-bit
-	 * length field.
-	 */
-	obj_request->xferred = osd_req->r_ops[0].outdata_len;
-	rbd_assert(obj_request->xferred < (u64)UINT_MAX);
+	dout("%s osd_req %p result %d for obj_req %p\n", __func__, osd_req,
+	     osd_req->r_result, obj_req);
+	rbd_assert(osd_req == obj_req->osd_req);
 
-	opcode = osd_req->r_ops[0].op;
-	switch (opcode) {
-	case CEPH_OSD_OP_READ:
-		rbd_osd_read_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_SETALLOCHINT:
-		rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE ||
-			   osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL);
-		/* fall through */
-	case CEPH_OSD_OP_WRITE:
-	case CEPH_OSD_OP_WRITEFULL:
-		rbd_osd_write_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_STAT:
-		rbd_osd_stat_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_DELETE:
-	case CEPH_OSD_OP_TRUNCATE:
-	case CEPH_OSD_OP_ZERO:
-		rbd_osd_discard_callback(obj_request);
-		break;
-	case CEPH_OSD_OP_CALL:
-		rbd_osd_call_callback(obj_request);
-		break;
-	default:
-		rbd_warn(NULL, "unexpected OSD op: object_no %016llx opcode %d",
-			 obj_request->object_no, opcode);
-		break;
-	}
+	obj_req->result = osd_req->r_result < 0 ? osd_req->r_result : 0;
+	if (!obj_req->result && !rbd_img_is_write(obj_req->img_request))
+		obj_req->xferred = osd_req->r_result;
+	else
+		/*
+		 * Writes aren't allowed to return a data payload.  In some
+		 * guarded write cases (e.g. stat + zero on an empty object)
+		 * a stat response makes it through, but we don't care.
+		 */
+		obj_req->xferred = 0;
 
-	if (obj_request_done_test(obj_request))
-		rbd_obj_request_complete(obj_request);
+	rbd_obj_handle_request(obj_req);
 }
 
 static void rbd_osd_req_format_read(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
-	rbd_assert(obj_request_img_data_test(obj_request));
+	osd_req->r_flags = CEPH_OSD_FLAG_READ;
 	osd_req->r_snapid = obj_request->img_request->snap_id;
 }
 
@@ -1887,32 +1432,33 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request)
 {
 	struct ceph_osd_request *osd_req = obj_request->osd_req;
 
+	osd_req->r_flags = CEPH_OSD_FLAG_WRITE;
 	ktime_get_real_ts(&osd_req->r_mtime);
-	osd_req->r_data_offset = obj_request->offset;
+	osd_req->r_data_offset = obj_request->ex.oe_off;
 }
 
 static struct ceph_osd_request *
-__rbd_osd_req_create(struct rbd_device *rbd_dev,
-		     struct ceph_snap_context *snapc,
-		     int num_ops, unsigned int flags,
-		     struct rbd_obj_request *obj_request)
+rbd_osd_req_create(struct rbd_obj_request *obj_req, unsigned int num_ops)
 {
+	struct rbd_img_request *img_req = obj_req->img_request;
+	struct rbd_device *rbd_dev = img_req->rbd_dev;
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct ceph_osd_request *req;
 	const char *name_format = rbd_dev->image_format == 1 ?
 				      RBD_V1_DATA_FORMAT : RBD_V2_DATA_FORMAT;
 
-	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, false, GFP_NOIO);
+	req = ceph_osdc_alloc_request(osdc,
+			(rbd_img_is_write(img_req) ? img_req->snapc : NULL),
+			num_ops, false, GFP_NOIO);
 	if (!req)
 		return NULL;
 
-	req->r_flags = flags;
 	req->r_callback = rbd_osd_req_callback;
-	req->r_priv = obj_request;
+	req->r_priv = obj_req;
 
 	req->r_base_oloc.pool = rbd_dev->layout.pool_id;
 	if (ceph_oid_aprintf(&req->r_base_oid, GFP_NOIO, name_format,
-			rbd_dev->header.object_prefix, obj_request->object_no))
+			rbd_dev->header.object_prefix, obj_req->ex.oe_objno))
 		goto err_req;
 
 	if (ceph_osdc_alloc_messages(req, GFP_NOIO))
@@ -1925,83 +1471,20 @@ err_req:
 	return NULL;
 }
 
-/*
- * Create an osd request.  A read request has one osd op (read).
- * A write request has either one (watch) or two (hint+write) osd ops.
- * (All rbd data writes are prefixed with an allocation hint op, but
- * technically osd watch is a write request, hence this distinction.)
- */
-static struct ceph_osd_request *rbd_osd_req_create(
-					struct rbd_device *rbd_dev,
-					enum obj_operation_type op_type,
-					unsigned int num_ops,
-					struct rbd_obj_request *obj_request)
-{
-	struct ceph_snap_context *snapc = NULL;
-
-	if (obj_request_img_data_test(obj_request) &&
-		(op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) {
-		struct rbd_img_request *img_request = obj_request->img_request;
-		if (op_type == OBJ_OP_WRITE) {
-			rbd_assert(img_request_write_test(img_request));
-		} else {
-			rbd_assert(img_request_discard_test(img_request));
-		}
-		snapc = img_request->snapc;
-	}
-
-	rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2));
-
-	return __rbd_osd_req_create(rbd_dev, snapc, num_ops,
-	    (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) ?
-	    CEPH_OSD_FLAG_WRITE : CEPH_OSD_FLAG_READ, obj_request);
-}
-
-/*
- * Create a copyup osd request based on the information in the object
- * request supplied.  A copyup request has two or three osd ops, a
- * copyup method call, potentially a hint op, and a write or truncate
- * or zero op.
- */
-static struct ceph_osd_request *
-rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request)
-{
-	struct rbd_img_request *img_request;
-	int num_osd_ops = 3;
-
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
-	rbd_assert(img_request);
-	rbd_assert(img_request_write_test(img_request) ||
-			img_request_discard_test(img_request));
-
-	if (img_request_discard_test(img_request))
-		num_osd_ops = 2;
-
-	return __rbd_osd_req_create(img_request->rbd_dev,
-				    img_request->snapc, num_osd_ops,
-				    CEPH_OSD_FLAG_WRITE, obj_request);
-}
-
 static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req)
 {
 	ceph_osdc_put_request(osd_req);
 }
 
-static struct rbd_obj_request *
-rbd_obj_request_create(enum obj_request_type type)
+static struct rbd_obj_request *rbd_obj_request_create(void)
 {
 	struct rbd_obj_request *obj_request;
 
-	rbd_assert(obj_request_type_valid(type));
-
 	obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
 	if (!obj_request)
 		return NULL;
 
-	obj_request->which = BAD_WHICH;
-	obj_request->type = type;
-	INIT_LIST_HEAD(&obj_request->links);
+	ceph_object_extent_init(&obj_request->ex);
 	kref_init(&obj_request->kref);
 
 	dout("%s %p\n", __func__, obj_request);
@@ -2011,32 +1494,34 @@ rbd_obj_request_create(enum obj_request_type type)
 static void rbd_obj_request_destroy(struct kref *kref)
 {
 	struct rbd_obj_request *obj_request;
+	u32 i;
 
 	obj_request = container_of(kref, struct rbd_obj_request, kref);
 
 	dout("%s: obj %p\n", __func__, obj_request);
 
-	rbd_assert(obj_request->img_request == NULL);
-	rbd_assert(obj_request->which == BAD_WHICH);
-
 	if (obj_request->osd_req)
 		rbd_osd_req_destroy(obj_request->osd_req);
 
-	rbd_assert(obj_request_type_valid(obj_request->type));
-	switch (obj_request->type) {
+	switch (obj_request->img_request->data_type) {
 	case OBJ_REQUEST_NODATA:
-		break;		/* Nothing to do */
 	case OBJ_REQUEST_BIO:
-		if (obj_request->bio_list)
-			bio_chain_put(obj_request->bio_list);
-		break;
-	case OBJ_REQUEST_PAGES:
-		/* img_data requests don't own their page array */
-		if (obj_request->pages &&
-		    !obj_request_img_data_test(obj_request))
-			ceph_release_page_vector(obj_request->pages,
-						obj_request->page_count);
+	case OBJ_REQUEST_BVECS:
+		break;		/* Nothing to do */
+	case OBJ_REQUEST_OWN_BVECS:
+		kfree(obj_request->bvec_pos.bvecs);
 		break;
+	default:
+		rbd_assert(0);
+	}
+
+	kfree(obj_request->img_extents);
+	if (obj_request->copyup_bvecs) {
+		for (i = 0; i < obj_request->copyup_bvec_count; i++) {
+			if (obj_request->copyup_bvecs[i].bv_page)
+				__free_page(obj_request->copyup_bvecs[i].bv_page);
+		}
+		kfree(obj_request->copyup_bvecs);
 	}
 
 	kmem_cache_free(rbd_obj_request_cache, obj_request);
@@ -2111,7 +1596,6 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
  */
 static struct rbd_img_request *rbd_img_request_create(
 					struct rbd_device *rbd_dev,
-					u64 offset, u64 length,
 					enum obj_operation_type op_type,
 					struct ceph_snap_context *snapc)
 {
@@ -2122,27 +1606,21 @@ static struct rbd_img_request *rbd_img_request_create(
 		return NULL;
 
 	img_request->rbd_dev = rbd_dev;
-	img_request->offset = offset;
-	img_request->length = length;
-	if (op_type == OBJ_OP_DISCARD) {
-		img_request_discard_set(img_request);
-		img_request->snapc = snapc;
-	} else if (op_type == OBJ_OP_WRITE) {
-		img_request_write_set(img_request);
-		img_request->snapc = snapc;
-	} else {
+	img_request->op_type = op_type;
+	if (!rbd_img_is_write(img_request))
 		img_request->snap_id = rbd_dev->spec->snap_id;
-	}
+	else
+		img_request->snapc = snapc;
+
 	if (rbd_dev_parent_get(rbd_dev))
 		img_request_layered_set(img_request);
 
 	spin_lock_init(&img_request->completion_lock);
-	INIT_LIST_HEAD(&img_request->obj_requests);
+	INIT_LIST_HEAD(&img_request->object_extents);
 	kref_init(&img_request->kref);
 
-	dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
-		obj_op_name(op_type), offset, length, img_request);
-
+	dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
+	     obj_op_name(op_type), img_request);
 	return img_request;
 }
 
@@ -2165,829 +1643,934 @@ static void rbd_img_request_destroy(struct kref *kref)
 		rbd_dev_parent_put(img_request->rbd_dev);
 	}
 
-	if (img_request_write_test(img_request) ||
-		img_request_discard_test(img_request))
+	if (rbd_img_is_write(img_request))
 		ceph_put_snap_context(img_request->snapc);
 
 	kmem_cache_free(rbd_img_request_cache, img_request);
 }
 
-static struct rbd_img_request *rbd_parent_request_create(
-					struct rbd_obj_request *obj_request,
-					u64 img_offset, u64 length)
+static void prune_extents(struct ceph_file_extent *img_extents,
+			  u32 *num_img_extents, u64 overlap)
 {
-	struct rbd_img_request *parent_request;
-	struct rbd_device *rbd_dev;
+	u32 cnt = *num_img_extents;
 
-	rbd_assert(obj_request->img_request);
-	rbd_dev = obj_request->img_request->rbd_dev;
+	/* drop extents completely beyond the overlap */
+	while (cnt && img_extents[cnt - 1].fe_off >= overlap)
+		cnt--;
 
-	parent_request = rbd_img_request_create(rbd_dev->parent, img_offset,
-						length, OBJ_OP_READ, NULL);
-	if (!parent_request)
-		return NULL;
+	if (cnt) {
+		struct ceph_file_extent *ex = &img_extents[cnt - 1];
 
-	img_request_child_set(parent_request);
-	rbd_obj_request_get(obj_request);
-	parent_request->obj_request = obj_request;
+		/* trim final overlapping extent */
+		if (ex->fe_off + ex->fe_len > overlap)
+			ex->fe_len = overlap - ex->fe_off;
+	}
 
-	return parent_request;
+	*num_img_extents = cnt;
 }
 
-static void rbd_parent_request_destroy(struct kref *kref)
+/*
+ * Determine the byte range(s) covered by either just the object extent
+ * or the entire object in the parent image.
+ */
+static int rbd_obj_calc_img_extents(struct rbd_obj_request *obj_req,
+				    bool entire)
 {
-	struct rbd_img_request *parent_request;
-	struct rbd_obj_request *orig_request;
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	int ret;
 
-	parent_request = container_of(kref, struct rbd_img_request, kref);
-	orig_request = parent_request->obj_request;
+	if (!rbd_dev->parent_overlap)
+		return 0;
 
-	parent_request->obj_request = NULL;
-	rbd_obj_request_put(orig_request);
-	img_request_child_clear(parent_request);
+	ret = ceph_extent_to_file(&rbd_dev->layout, obj_req->ex.oe_objno,
+				  entire ? 0 : obj_req->ex.oe_off,
+				  entire ? rbd_dev->layout.object_size :
+							obj_req->ex.oe_len,
+				  &obj_req->img_extents,
+				  &obj_req->num_img_extents);
+	if (ret)
+		return ret;
 
-	rbd_img_request_destroy(kref);
+	prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
+		      rbd_dev->parent_overlap);
+	return 0;
 }
 
-static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
+static void rbd_osd_req_setup_data(struct rbd_obj_request *obj_req, u32 which)
 {
-	struct rbd_img_request *img_request;
-	unsigned int xferred;
-	int result;
-	bool more;
-
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
-
-	rbd_assert(obj_request->xferred <= (u64)UINT_MAX);
-	xferred = (unsigned int)obj_request->xferred;
-	result = obj_request->result;
-	if (result) {
-		struct rbd_device *rbd_dev = img_request->rbd_dev;
-		enum obj_operation_type op_type;
-
-		if (img_request_discard_test(img_request))
-			op_type = OBJ_OP_DISCARD;
-		else if (img_request_write_test(img_request))
-			op_type = OBJ_OP_WRITE;
-		else
-			op_type = OBJ_OP_READ;
-
-		rbd_warn(rbd_dev, "%s %llx at %llx (%llx)",
-			obj_op_name(op_type), obj_request->length,
-			obj_request->img_offset, obj_request->offset);
-		rbd_warn(rbd_dev, "  result %d xferred %x",
-			result, xferred);
-		if (!img_request->result)
-			img_request->result = result;
-		/*
-		 * Need to end I/O on the entire obj_request worth of
-		 * bytes in case of error.
-		 */
-		xferred = obj_request->length;
+	switch (obj_req->img_request->data_type) {
+	case OBJ_REQUEST_BIO:
+		osd_req_op_extent_osd_data_bio(obj_req->osd_req, which,
+					       &obj_req->bio_pos,
+					       obj_req->ex.oe_len);
+		break;
+	case OBJ_REQUEST_BVECS:
+	case OBJ_REQUEST_OWN_BVECS:
+		rbd_assert(obj_req->bvec_pos.iter.bi_size ==
+							obj_req->ex.oe_len);
+		rbd_assert(obj_req->bvec_idx == obj_req->bvec_count);
+		osd_req_op_extent_osd_data_bvec_pos(obj_req->osd_req, which,
+						    &obj_req->bvec_pos);
+		break;
+	default:
+		rbd_assert(0);
 	}
+}
 
-	if (img_request_child_test(img_request)) {
-		rbd_assert(img_request->obj_request != NULL);
-		more = obj_request->which < img_request->obj_request_count - 1;
-	} else {
-		blk_status_t status = errno_to_blk_status(result);
+static int rbd_obj_setup_read(struct rbd_obj_request *obj_req)
+{
+	obj_req->osd_req = rbd_osd_req_create(obj_req, 1);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
-		rbd_assert(img_request->rq != NULL);
+	osd_req_op_extent_init(obj_req->osd_req, 0, CEPH_OSD_OP_READ,
+			       obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
+	rbd_osd_req_setup_data(obj_req, 0);
 
-		more = blk_update_request(img_request->rq, status, xferred);
-		if (!more)
-			__blk_mq_end_request(img_request->rq, status);
-	}
+	rbd_osd_req_format_read(obj_req);
+	return 0;
+}
+
+static int __rbd_obj_setup_stat(struct rbd_obj_request *obj_req,
+				unsigned int which)
+{
+	struct page **pages;
 
-	return more;
+	/*
+	 * The response data for a STAT call consists of:
+	 *     le64 length;
+	 *     struct {
+	 *         le32 tv_sec;
+	 *         le32 tv_nsec;
+	 *     } mtime;
+	 */
+	pages = ceph_alloc_page_vector(1, GFP_NOIO);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	osd_req_op_init(obj_req->osd_req, which, CEPH_OSD_OP_STAT, 0);
+	osd_req_op_raw_data_in_pages(obj_req->osd_req, which, pages,
+				     8 + sizeof(struct ceph_timespec),
+				     0, false, true);
+	return 0;
 }
 
-static void rbd_img_obj_callback(struct rbd_obj_request *obj_request)
+static void __rbd_obj_setup_write(struct rbd_obj_request *obj_req,
+				  unsigned int which)
 {
-	struct rbd_img_request *img_request;
-	u32 which = obj_request->which;
-	bool more = true;
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	u16 opcode;
 
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
+	osd_req_op_alloc_hint_init(obj_req->osd_req, which++,
+				   rbd_dev->layout.object_size,
+				   rbd_dev->layout.object_size);
 
-	dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
-	rbd_assert(img_request != NULL);
-	rbd_assert(img_request->obj_request_count > 0);
-	rbd_assert(which != BAD_WHICH);
-	rbd_assert(which < img_request->obj_request_count);
+	if (rbd_obj_is_entire(obj_req))
+		opcode = CEPH_OSD_OP_WRITEFULL;
+	else
+		opcode = CEPH_OSD_OP_WRITE;
 
-	spin_lock_irq(&img_request->completion_lock);
-	if (which != img_request->next_completion)
-		goto out;
+	osd_req_op_extent_init(obj_req->osd_req, which, opcode,
+			       obj_req->ex.oe_off, obj_req->ex.oe_len, 0, 0);
+	rbd_osd_req_setup_data(obj_req, which++);
+
+	rbd_assert(which == obj_req->osd_req->r_num_ops);
+	rbd_osd_req_format_write(obj_req);
+}
 
-	for_each_obj_request_from(img_request, obj_request) {
-		rbd_assert(more);
-		rbd_assert(which < img_request->obj_request_count);
+static int rbd_obj_setup_write(struct rbd_obj_request *obj_req)
+{
+	unsigned int num_osd_ops, which = 0;
+	int ret;
 
-		if (!obj_request_done_test(obj_request))
-			break;
-		more = rbd_img_obj_end_request(obj_request);
-		which++;
+	/* reverse map the entire object onto the parent */
+	ret = rbd_obj_calc_img_extents(obj_req, true);
+	if (ret)
+		return ret;
+
+	if (obj_req->num_img_extents) {
+		obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+		num_osd_ops = 3; /* stat + setallochint + write/writefull */
+	} else {
+		obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+		num_osd_ops = 2; /* setallochint + write/writefull */
 	}
 
-	rbd_assert(more ^ (which == img_request->obj_request_count));
-	img_request->next_completion = which;
-out:
-	spin_unlock_irq(&img_request->completion_lock);
-	rbd_img_request_put(img_request);
+	obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
-	if (!more)
-		rbd_img_request_complete(img_request);
+	if (obj_req->num_img_extents) {
+		ret = __rbd_obj_setup_stat(obj_req, which++);
+		if (ret)
+			return ret;
+	}
+
+	__rbd_obj_setup_write(obj_req, which);
+	return 0;
 }
 
-/*
- * Add individual osd ops to the given ceph_osd_request and prepare
- * them for submission. num_ops is the current number of
- * osd operations already to the object request.
- */
-static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
-				struct ceph_osd_request *osd_request,
-				enum obj_operation_type op_type,
-				unsigned int num_ops)
-{
-	struct rbd_img_request *img_request = obj_request->img_request;
-	struct rbd_device *rbd_dev = img_request->rbd_dev;
-	u64 object_size = rbd_obj_bytes(&rbd_dev->header);
-	u64 offset = obj_request->offset;
-	u64 length = obj_request->length;
-	u64 img_end;
+static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
+				    unsigned int which)
+{
 	u16 opcode;
 
-	if (op_type == OBJ_OP_DISCARD) {
-		if (!offset && length == object_size &&
-		    (!img_request_layered_test(img_request) ||
-		     !obj_request_overlaps_parent(obj_request))) {
-			opcode = CEPH_OSD_OP_DELETE;
-		} else if ((offset + length == object_size)) {
+	if (rbd_obj_is_entire(obj_req)) {
+		if (obj_req->num_img_extents) {
+			osd_req_op_init(obj_req->osd_req, which++,
+					CEPH_OSD_OP_CREATE, 0);
 			opcode = CEPH_OSD_OP_TRUNCATE;
 		} else {
-			down_read(&rbd_dev->header_rwsem);
-			img_end = rbd_dev->header.image_size;
-			up_read(&rbd_dev->header_rwsem);
-
-			if (obj_request->img_offset + length == img_end)
-				opcode = CEPH_OSD_OP_TRUNCATE;
-			else
-				opcode = CEPH_OSD_OP_ZERO;
+			osd_req_op_init(obj_req->osd_req, which++,
+					CEPH_OSD_OP_DELETE, 0);
+			opcode = 0;
 		}
-	} else if (op_type == OBJ_OP_WRITE) {
-		if (!offset && length == object_size)
-			opcode = CEPH_OSD_OP_WRITEFULL;
-		else
-			opcode = CEPH_OSD_OP_WRITE;
-		osd_req_op_alloc_hint_init(osd_request, num_ops,
-					object_size, object_size);
-		num_ops++;
+	} else if (rbd_obj_is_tail(obj_req)) {
+		opcode = CEPH_OSD_OP_TRUNCATE;
 	} else {
-		opcode = CEPH_OSD_OP_READ;
+		opcode = CEPH_OSD_OP_ZERO;
 	}
 
-	if (opcode == CEPH_OSD_OP_DELETE)
-		osd_req_op_init(osd_request, num_ops, opcode, 0);
-	else
-		osd_req_op_extent_init(osd_request, num_ops, opcode,
-				       offset, length, 0, 0);
-
-	if (obj_request->type == OBJ_REQUEST_BIO)
-		osd_req_op_extent_osd_data_bio(osd_request, num_ops,
-					obj_request->bio_list, length);
-	else if (obj_request->type == OBJ_REQUEST_PAGES)
-		osd_req_op_extent_osd_data_pages(osd_request, num_ops,
-					obj_request->pages, length,
-					offset & ~PAGE_MASK, false, false);
-
-	/* Discards are also writes */
-	if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD)
-		rbd_osd_req_format_write(obj_request);
-	else
-		rbd_osd_req_format_read(obj_request);
+	if (opcode)
+		osd_req_op_extent_init(obj_req->osd_req, which++, opcode,
+				       obj_req->ex.oe_off, obj_req->ex.oe_len,
+				       0, 0);
+
+	rbd_assert(which == obj_req->osd_req->r_num_ops);
+	rbd_osd_req_format_write(obj_req);
 }
 
-/*
- * Split up an image request into one or more object requests, each
- * to a different object.  The "type" parameter indicates whether
- * "data_desc" is the pointer to the head of a list of bio
- * structures, or the base of a page array.  In either case this
- * function assumes data_desc describes memory sufficient to hold
- * all data described by the image request.
- */
-static int rbd_img_request_fill(struct rbd_img_request *img_request,
-					enum obj_request_type type,
-					void *data_desc)
+static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
 {
-	struct rbd_device *rbd_dev = img_request->rbd_dev;
-	struct rbd_obj_request *obj_request = NULL;
-	struct rbd_obj_request *next_obj_request;
-	struct bio *bio_list = NULL;
-	unsigned int bio_offset = 0;
-	struct page **pages = NULL;
-	enum obj_operation_type op_type;
-	u64 img_offset;
-	u64 resid;
-
-	dout("%s: img %p type %d data_desc %p\n", __func__, img_request,
-		(int)type, data_desc);
+	unsigned int num_osd_ops, which = 0;
+	int ret;
 
-	img_offset = img_request->offset;
-	resid = img_request->length;
-	rbd_assert(resid > 0);
-	op_type = rbd_img_request_op_type(img_request);
+	/* reverse map the entire object onto the parent */
+	ret = rbd_obj_calc_img_extents(obj_req, true);
+	if (ret)
+		return ret;
 
-	if (type == OBJ_REQUEST_BIO) {
-		bio_list = data_desc;
-		rbd_assert(img_offset ==
-			   bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
-	} else if (type == OBJ_REQUEST_PAGES) {
-		pages = data_desc;
+	if (rbd_obj_is_entire(obj_req)) {
+		obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+		if (obj_req->num_img_extents)
+			num_osd_ops = 2; /* create + truncate */
+		else
+			num_osd_ops = 1; /* delete */
+	} else {
+		if (obj_req->num_img_extents) {
+			obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+			num_osd_ops = 2; /* stat + truncate/zero */
+		} else {
+			obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+			num_osd_ops = 1; /* truncate/zero */
+		}
 	}
 
-	while (resid) {
-		struct ceph_osd_request *osd_req;
-		u64 object_no = img_offset >> rbd_dev->header.obj_order;
-		u64 offset = rbd_segment_offset(rbd_dev, img_offset);
-		u64 length = rbd_segment_length(rbd_dev, img_offset, resid);
-
-		obj_request = rbd_obj_request_create(type);
-		if (!obj_request)
-			goto out_unwind;
-
-		obj_request->object_no = object_no;
-		obj_request->offset = offset;
-		obj_request->length = length;
-
-		/*
-		 * set obj_request->img_request before creating the
-		 * osd_request so that it gets the right snapc
-		 */
-		rbd_img_obj_request_add(img_request, obj_request);
-
-		if (type == OBJ_REQUEST_BIO) {
-			unsigned int clone_size;
-
-			rbd_assert(length <= (u64)UINT_MAX);
-			clone_size = (unsigned int)length;
-			obj_request->bio_list =
-					bio_chain_clone_range(&bio_list,
-								&bio_offset,
-								clone_size,
-								GFP_NOIO);
-			if (!obj_request->bio_list)
-				goto out_unwind;
-		} else if (type == OBJ_REQUEST_PAGES) {
-			unsigned int page_count;
-
-			obj_request->pages = pages;
-			page_count = (u32)calc_pages_for(offset, length);
-			obj_request->page_count = page_count;
-			if ((offset + length) & ~PAGE_MASK)
-				page_count--;	/* more on last page */
-			pages += page_count;
-		}
+	obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
-		osd_req = rbd_osd_req_create(rbd_dev, op_type,
-					(op_type == OBJ_OP_WRITE) ? 2 : 1,
-					obj_request);
-		if (!osd_req)
-			goto out_unwind;
+	if (!rbd_obj_is_entire(obj_req) && obj_req->num_img_extents) {
+		ret = __rbd_obj_setup_stat(obj_req, which++);
+		if (ret)
+			return ret;
+	}
 
-		obj_request->osd_req = osd_req;
-		obj_request->callback = rbd_img_obj_callback;
-		obj_request->img_offset = img_offset;
+	__rbd_obj_setup_discard(obj_req, which);
+	return 0;
+}
 
-		rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0);
+/*
+ * For each object request in @img_req, allocate an OSD request, add
+ * individual OSD ops and prepare them for submission.  The number of
+ * OSD ops depends on op_type and the overlap point (if any).
+ */
+static int __rbd_img_fill_request(struct rbd_img_request *img_req)
+{
+	struct rbd_obj_request *obj_req;
+	int ret;
 
-		img_offset += length;
-		resid -= length;
+	for_each_obj_request(img_req, obj_req) {
+		switch (img_req->op_type) {
+		case OBJ_OP_READ:
+			ret = rbd_obj_setup_read(obj_req);
+			break;
+		case OBJ_OP_WRITE:
+			ret = rbd_obj_setup_write(obj_req);
+			break;
+		case OBJ_OP_DISCARD:
+			ret = rbd_obj_setup_discard(obj_req);
+			break;
+		default:
+			rbd_assert(0);
+		}
+		if (ret)
+			return ret;
 	}
 
 	return 0;
+}
 
-out_unwind:
-	for_each_obj_request_safe(img_request, obj_request, next_obj_request)
-		rbd_img_obj_request_del(img_request, obj_request);
+union rbd_img_fill_iter {
+	struct ceph_bio_iter	bio_iter;
+	struct ceph_bvec_iter	bvec_iter;
+};
 
-	return -ENOMEM;
-}
+struct rbd_img_fill_ctx {
+	enum obj_request_type	pos_type;
+	union rbd_img_fill_iter	*pos;
+	union rbd_img_fill_iter	iter;
+	ceph_object_extent_fn_t	set_pos_fn;
+	ceph_object_extent_fn_t	count_fn;
+	ceph_object_extent_fn_t	copy_fn;
+};
 
-static void
-rbd_osd_copyup_callback(struct rbd_obj_request *obj_request)
+static struct ceph_object_extent *alloc_object_extent(void *arg)
 {
-	struct rbd_img_request *img_request;
-	struct rbd_device *rbd_dev;
-	struct page **pages;
-	u32 page_count;
+	struct rbd_img_request *img_req = arg;
+	struct rbd_obj_request *obj_req;
 
-	dout("%s: obj %p\n", __func__, obj_request);
+	obj_req = rbd_obj_request_create();
+	if (!obj_req)
+		return NULL;
 
-	rbd_assert(obj_request->type == OBJ_REQUEST_BIO ||
-		obj_request->type == OBJ_REQUEST_NODATA);
-	rbd_assert(obj_request_img_data_test(obj_request));
-	img_request = obj_request->img_request;
-	rbd_assert(img_request);
+	rbd_img_obj_request_add(img_req, obj_req);
+	return &obj_req->ex;
+}
 
-	rbd_dev = img_request->rbd_dev;
-	rbd_assert(rbd_dev);
+/*
+ * While su != os && sc == 1 is technically not fancy (it's the same
+ * layout as su == os && sc == 1), we can't use the nocopy path for it
+ * because ->set_pos_fn() should be called only once per object.
+ * ceph_file_to_extents() invokes action_fn once per stripe unit, so
+ * treat su != os && sc == 1 as fancy.
+ */
+static bool rbd_layout_is_fancy(struct ceph_file_layout *l)
+{
+	return l->stripe_unit != l->object_size;
+}
 
-	pages = obj_request->copyup_pages;
-	rbd_assert(pages != NULL);
-	obj_request->copyup_pages = NULL;
-	page_count = obj_request->copyup_page_count;
-	rbd_assert(page_count);
-	obj_request->copyup_page_count = 0;
-	ceph_release_page_vector(pages, page_count);
+static int rbd_img_fill_request_nocopy(struct rbd_img_request *img_req,
+				       struct ceph_file_extent *img_extents,
+				       u32 num_img_extents,
+				       struct rbd_img_fill_ctx *fctx)
+{
+	u32 i;
+	int ret;
+
+	img_req->data_type = fctx->pos_type;
 
 	/*
-	 * We want the transfer count to reflect the size of the
-	 * original write request.  There is no such thing as a
-	 * successful short write, so if the request was successful
-	 * we can just set it to the originally-requested length.
+	 * Create object requests and set each object request's starting
+	 * position in the provided bio (list) or bio_vec array.
 	 */
-	if (!obj_request->result)
-		obj_request->xferred = obj_request->length;
+	fctx->iter = *fctx->pos;
+	for (i = 0; i < num_img_extents; i++) {
+		ret = ceph_file_to_extents(&img_req->rbd_dev->layout,
+					   img_extents[i].fe_off,
+					   img_extents[i].fe_len,
+					   &img_req->object_extents,
+					   alloc_object_extent, img_req,
+					   fctx->set_pos_fn, &fctx->iter);
+		if (ret)
+			return ret;
+	}
 
-	obj_request_done_set(obj_request);
+	return __rbd_img_fill_request(img_req);
 }
 
-static void
-rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
+/*
+ * Map a list of image extents to a list of object extents, create the
+ * corresponding object requests (normally each to a different object,
+ * but not always) and add them to @img_req.  For each object request,
+ * set up its data descriptor to point to the corresponding chunk(s) of
+ * @fctx->pos data buffer.
+ *
+ * Because ceph_file_to_extents() will merge adjacent object extents
+ * together, each object request's data descriptor may point to multiple
+ * different chunks of @fctx->pos data buffer.
+ *
+ * @fctx->pos data buffer is assumed to be large enough.
+ */
+static int rbd_img_fill_request(struct rbd_img_request *img_req,
+				struct ceph_file_extent *img_extents,
+				u32 num_img_extents,
+				struct rbd_img_fill_ctx *fctx)
 {
-	struct rbd_obj_request *orig_request;
-	struct ceph_osd_request *osd_req;
-	struct rbd_device *rbd_dev;
-	struct page **pages;
-	enum obj_operation_type op_type;
-	u32 page_count;
-	int img_result;
-	u64 parent_length;
-
-	rbd_assert(img_request_child_test(img_request));
-
-	/* First get what we need from the image request */
-
-	pages = img_request->copyup_pages;
-	rbd_assert(pages != NULL);
-	img_request->copyup_pages = NULL;
-	page_count = img_request->copyup_page_count;
-	rbd_assert(page_count);
-	img_request->copyup_page_count = 0;
-
-	orig_request = img_request->obj_request;
-	rbd_assert(orig_request != NULL);
-	rbd_assert(obj_request_type_valid(orig_request->type));
-	img_result = img_request->result;
-	parent_length = img_request->length;
-	rbd_assert(img_result || parent_length == img_request->xferred);
-	rbd_img_request_put(img_request);
+	struct rbd_device *rbd_dev = img_req->rbd_dev;
+	struct rbd_obj_request *obj_req;
+	u32 i;
+	int ret;
 
-	rbd_assert(orig_request->img_request);
-	rbd_dev = orig_request->img_request->rbd_dev;
-	rbd_assert(rbd_dev);
+	if (fctx->pos_type == OBJ_REQUEST_NODATA ||
+	    !rbd_layout_is_fancy(&rbd_dev->layout))
+		return rbd_img_fill_request_nocopy(img_req, img_extents,
+						   num_img_extents, fctx);
+
+	img_req->data_type = OBJ_REQUEST_OWN_BVECS;
 
 	/*
-	 * If the overlap has become 0 (most likely because the
-	 * image has been flattened) we need to free the pages
-	 * and re-submit the original write request.
+	 * Create object requests and determine ->bvec_count for each object
+	 * request.  Note that ->bvec_count sum over all object requests may
+	 * be greater than the number of bio_vecs in the provided bio (list)
+	 * or bio_vec array because when mapped, those bio_vecs can straddle
+	 * stripe unit boundaries.
 	 */
-	if (!rbd_dev->parent_overlap) {
-		ceph_release_page_vector(pages, page_count);
-		rbd_obj_request_submit(orig_request);
-		return;
+	fctx->iter = *fctx->pos;
+	for (i = 0; i < num_img_extents; i++) {
+		ret = ceph_file_to_extents(&rbd_dev->layout,
+					   img_extents[i].fe_off,
+					   img_extents[i].fe_len,
+					   &img_req->object_extents,
+					   alloc_object_extent, img_req,
+					   fctx->count_fn, &fctx->iter);
+		if (ret)
+			return ret;
 	}
 
-	if (img_result)
-		goto out_err;
+	for_each_obj_request(img_req, obj_req) {
+		obj_req->bvec_pos.bvecs = kmalloc_array(obj_req->bvec_count,
+					      sizeof(*obj_req->bvec_pos.bvecs),
+					      GFP_NOIO);
+		if (!obj_req->bvec_pos.bvecs)
+			return -ENOMEM;
+	}
 
 	/*
-	 * The original osd request is of no use to use any more.
-	 * We need a new one that can hold the three ops in a copyup
-	 * request.  Allocate the new copyup osd request for the
-	 * original request, and release the old one.
+	 * Fill in each object request's private bio_vec array, splitting and
+	 * rearranging the provided bio_vecs in stripe unit chunks as needed.
 	 */
-	img_result = -ENOMEM;
-	osd_req = rbd_osd_req_create_copyup(orig_request);
-	if (!osd_req)
-		goto out_err;
-	rbd_osd_req_destroy(orig_request->osd_req);
-	orig_request->osd_req = osd_req;
-	orig_request->copyup_pages = pages;
-	orig_request->copyup_page_count = page_count;
+	fctx->iter = *fctx->pos;
+	for (i = 0; i < num_img_extents; i++) {
+		ret = ceph_iterate_extents(&rbd_dev->layout,
+					   img_extents[i].fe_off,
+					   img_extents[i].fe_len,
+					   &img_req->object_extents,
+					   fctx->copy_fn, &fctx->iter);
+		if (ret)
+			return ret;
+	}
 
-	/* Initialize the copyup op */
+	return __rbd_img_fill_request(img_req);
+}
 
-	osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
-	osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0,
-						false, false);
+static int rbd_img_fill_nodata(struct rbd_img_request *img_req,
+			       u64 off, u64 len)
+{
+	struct ceph_file_extent ex = { off, len };
+	union rbd_img_fill_iter dummy;
+	struct rbd_img_fill_ctx fctx = {
+		.pos_type = OBJ_REQUEST_NODATA,
+		.pos = &dummy,
+	};
 
-	/* Add the other op(s) */
+	return rbd_img_fill_request(img_req, &ex, 1, &fctx);
+}
 
-	op_type = rbd_img_request_op_type(orig_request->img_request);
-	rbd_img_obj_request_fill(orig_request, osd_req, op_type, 1);
+static void set_bio_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bio_iter *it = arg;
 
-	/* All set, send it off. */
+	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+	obj_req->bio_pos = *it;
+	ceph_bio_iter_advance(it, bytes);
+}
 
-	rbd_obj_request_submit(orig_request);
-	return;
+static void count_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bio_iter *it = arg;
+
+	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+	ceph_bio_iter_advance_step(it, bytes, ({
+		obj_req->bvec_count++;
+	}));
 
-out_err:
-	ceph_release_page_vector(pages, page_count);
-	rbd_obj_request_error(orig_request, img_result);
 }
 
-/*
- * Read from the parent image the range of data that covers the
- * entire target of the given object request.  This is used for
- * satisfying a layered image write request when the target of an
- * object request from the image request does not exist.
- *
- * A page array big enough to hold the returned data is allocated
- * and supplied to rbd_img_request_fill() as the "data descriptor."
- * When the read completes, this page array will be transferred to
- * the original object request for the copyup operation.
- *
- * If an error occurs, it is recorded as the result of the original
- * object request in rbd_img_obj_exists_callback().
- */
-static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
-{
-	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-	struct rbd_img_request *parent_request = NULL;
-	u64 img_offset;
-	u64 length;
-	struct page **pages = NULL;
-	u32 page_count;
-	int result;
+static void copy_bio_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bio_iter *it = arg;
 
-	rbd_assert(rbd_dev->parent != NULL);
+	dout("%s objno %llu bytes %u\n", __func__, ex->oe_objno, bytes);
+	ceph_bio_iter_advance_step(it, bytes, ({
+		obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
+		obj_req->bvec_pos.iter.bi_size += bv.bv_len;
+	}));
+}
 
-	/*
-	 * Determine the byte range covered by the object in the
-	 * child image to which the original request was to be sent.
-	 */
-	img_offset = obj_request->img_offset - obj_request->offset;
-	length = rbd_obj_bytes(&rbd_dev->header);
+static int __rbd_img_fill_from_bio(struct rbd_img_request *img_req,
+				   struct ceph_file_extent *img_extents,
+				   u32 num_img_extents,
+				   struct ceph_bio_iter *bio_pos)
+{
+	struct rbd_img_fill_ctx fctx = {
+		.pos_type = OBJ_REQUEST_BIO,
+		.pos = (union rbd_img_fill_iter *)bio_pos,
+		.set_pos_fn = set_bio_pos,
+		.count_fn = count_bio_bvecs,
+		.copy_fn = copy_bio_bvecs,
+	};
 
-	/*
-	 * There is no defined parent data beyond the parent
-	 * overlap, so limit what we read at that boundary if
-	 * necessary.
-	 */
-	if (img_offset + length > rbd_dev->parent_overlap) {
-		rbd_assert(img_offset < rbd_dev->parent_overlap);
-		length = rbd_dev->parent_overlap - img_offset;
-	}
+	return rbd_img_fill_request(img_req, img_extents, num_img_extents,
+				    &fctx);
+}
 
-	/*
-	 * Allocate a page array big enough to receive the data read
-	 * from the parent.
-	 */
-	page_count = (u32)calc_pages_for(0, length);
-	pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
-	if (IS_ERR(pages)) {
-		result = PTR_ERR(pages);
-		pages = NULL;
-		goto out_err;
-	}
+static int rbd_img_fill_from_bio(struct rbd_img_request *img_req,
+				 u64 off, u64 len, struct bio *bio)
+{
+	struct ceph_file_extent ex = { off, len };
+	struct ceph_bio_iter it = { .bio = bio, .iter = bio->bi_iter };
 
-	result = -ENOMEM;
-	parent_request = rbd_parent_request_create(obj_request,
-						img_offset, length);
-	if (!parent_request)
-		goto out_err;
+	return __rbd_img_fill_from_bio(img_req, &ex, 1, &it);
+}
 
-	result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
-	if (result)
-		goto out_err;
+static void set_bvec_pos(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bvec_iter *it = arg;
 
-	parent_request->copyup_pages = pages;
-	parent_request->copyup_page_count = page_count;
-	parent_request->callback = rbd_img_obj_parent_read_full_callback;
+	obj_req->bvec_pos = *it;
+	ceph_bvec_iter_shorten(&obj_req->bvec_pos, bytes);
+	ceph_bvec_iter_advance(it, bytes);
+}
 
-	result = rbd_img_request_submit(parent_request);
-	if (!result)
-		return 0;
+static void count_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
+{
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bvec_iter *it = arg;
 
-	parent_request->copyup_pages = NULL;
-	parent_request->copyup_page_count = 0;
-out_err:
-	if (pages)
-		ceph_release_page_vector(pages, page_count);
-	if (parent_request)
-		rbd_img_request_put(parent_request);
-	return result;
+	ceph_bvec_iter_advance_step(it, bytes, ({
+		obj_req->bvec_count++;
+	}));
 }
 
-static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
+static void copy_bvecs(struct ceph_object_extent *ex, u32 bytes, void *arg)
 {
-	struct rbd_obj_request *orig_request;
-	struct rbd_device *rbd_dev;
-	int result;
+	struct rbd_obj_request *obj_req =
+	    container_of(ex, struct rbd_obj_request, ex);
+	struct ceph_bvec_iter *it = arg;
 
-	rbd_assert(!obj_request_img_data_test(obj_request));
+	ceph_bvec_iter_advance_step(it, bytes, ({
+		obj_req->bvec_pos.bvecs[obj_req->bvec_idx++] = bv;
+		obj_req->bvec_pos.iter.bi_size += bv.bv_len;
+	}));
+}
 
-	/*
-	 * All we need from the object request is the original
-	 * request and the result of the STAT op.  Grab those, then
-	 * we're done with the request.
-	 */
-	orig_request = obj_request->obj_request;
-	obj_request->obj_request = NULL;
-	rbd_obj_request_put(orig_request);
-	rbd_assert(orig_request);
-	rbd_assert(orig_request->img_request);
-
-	result = obj_request->result;
-	obj_request->result = 0;
-
-	dout("%s: obj %p for obj %p result %d %llu/%llu\n", __func__,
-		obj_request, orig_request, result,
-		obj_request->xferred, obj_request->length);
-	rbd_obj_request_put(obj_request);
+static int __rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
+				     struct ceph_file_extent *img_extents,
+				     u32 num_img_extents,
+				     struct ceph_bvec_iter *bvec_pos)
+{
+	struct rbd_img_fill_ctx fctx = {
+		.pos_type = OBJ_REQUEST_BVECS,
+		.pos = (union rbd_img_fill_iter *)bvec_pos,
+		.set_pos_fn = set_bvec_pos,
+		.count_fn = count_bvecs,
+		.copy_fn = copy_bvecs,
+	};
 
-	/*
-	 * If the overlap has become 0 (most likely because the
-	 * image has been flattened) we need to re-submit the
-	 * original request.
-	 */
-	rbd_dev = orig_request->img_request->rbd_dev;
-	if (!rbd_dev->parent_overlap) {
-		rbd_obj_request_submit(orig_request);
-		return;
-	}
+	return rbd_img_fill_request(img_req, img_extents, num_img_extents,
+				    &fctx);
+}
 
-	/*
-	 * Our only purpose here is to determine whether the object
-	 * exists, and we don't want to treat the non-existence as
-	 * an error.  If something else comes back, transfer the
-	 * error to the original request and complete it now.
-	 */
-	if (!result) {
-		obj_request_existence_set(orig_request, true);
-	} else if (result == -ENOENT) {
-		obj_request_existence_set(orig_request, false);
-	} else {
-		goto fail_orig_request;
-	}
+static int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
+				   struct ceph_file_extent *img_extents,
+				   u32 num_img_extents,
+				   struct bio_vec *bvecs)
+{
+	struct ceph_bvec_iter it = {
+		.bvecs = bvecs,
+		.iter = { .bi_size = ceph_file_extents_bytes(img_extents,
+							     num_img_extents) },
+	};
 
-	/*
-	 * Resubmit the original request now that we have recorded
-	 * whether the target object exists.
-	 */
-	result = rbd_img_obj_request_submit(orig_request);
-	if (result)
-		goto fail_orig_request;
+	return __rbd_img_fill_from_bvecs(img_req, img_extents, num_img_extents,
+					 &it);
+}
 
-	return;
+static void rbd_img_request_submit(struct rbd_img_request *img_request)
+{
+	struct rbd_obj_request *obj_request;
+
+	dout("%s: img %p\n", __func__, img_request);
+
+	rbd_img_request_get(img_request);
+	for_each_obj_request(img_request, obj_request)
+		rbd_obj_request_submit(obj_request);
 
-fail_orig_request:
-	rbd_obj_request_error(orig_request, result);
+	rbd_img_request_put(img_request);
 }
 
-static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
+static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
 {
-	struct rbd_device *rbd_dev = obj_request->img_request->rbd_dev;
-	struct rbd_obj_request *stat_request;
-	struct page **pages;
-	u32 page_count;
-	size_t size;
+	struct rbd_img_request *img_req = obj_req->img_request;
+	struct rbd_img_request *child_img_req;
 	int ret;
 
-	stat_request = rbd_obj_request_create(OBJ_REQUEST_PAGES);
-	if (!stat_request)
+	child_img_req = rbd_img_request_create(img_req->rbd_dev->parent,
+					       OBJ_OP_READ, NULL);
+	if (!child_img_req)
 		return -ENOMEM;
 
-	stat_request->object_no = obj_request->object_no;
+	__set_bit(IMG_REQ_CHILD, &child_img_req->flags);
+	child_img_req->obj_request = obj_req;
 
-	stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1,
-						   stat_request);
-	if (!stat_request->osd_req) {
-		ret = -ENOMEM;
-		goto fail_stat_request;
+	if (!rbd_img_is_write(img_req)) {
+		switch (img_req->data_type) {
+		case OBJ_REQUEST_BIO:
+			ret = __rbd_img_fill_from_bio(child_img_req,
+						      obj_req->img_extents,
+						      obj_req->num_img_extents,
+						      &obj_req->bio_pos);
+			break;
+		case OBJ_REQUEST_BVECS:
+		case OBJ_REQUEST_OWN_BVECS:
+			ret = __rbd_img_fill_from_bvecs(child_img_req,
+						      obj_req->img_extents,
+						      obj_req->num_img_extents,
+						      &obj_req->bvec_pos);
+			break;
+		default:
+			rbd_assert(0);
+		}
+	} else {
+		ret = rbd_img_fill_from_bvecs(child_img_req,
+					      obj_req->img_extents,
+					      obj_req->num_img_extents,
+					      obj_req->copyup_bvecs);
+	}
+	if (ret) {
+		rbd_img_request_put(child_img_req);
+		return ret;
+	}
+
+	rbd_img_request_submit(child_img_req);
+	return 0;
+}
+
+static bool rbd_obj_handle_read(struct rbd_obj_request *obj_req)
+{
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	int ret;
+
+	if (obj_req->result == -ENOENT &&
+	    rbd_dev->parent_overlap && !obj_req->tried_parent) {
+		/* reverse map this object extent onto the parent */
+		ret = rbd_obj_calc_img_extents(obj_req, false);
+		if (ret) {
+			obj_req->result = ret;
+			return true;
+		}
+
+		if (obj_req->num_img_extents) {
+			obj_req->tried_parent = true;
+			ret = rbd_obj_read_from_parent(obj_req);
+			if (ret) {
+				obj_req->result = ret;
+				return true;
+			}
+			return false;
+		}
 	}
 
 	/*
-	 * The response data for a STAT call consists of:
-	 *     le64 length;
-	 *     struct {
-	 *         le32 tv_sec;
-	 *         le32 tv_nsec;
-	 *     } mtime;
+	 * -ENOENT means a hole in the image -- zero-fill the entire
+	 * length of the request.  A short read also implies zero-fill
+	 * to the end of the request.  In both cases we update xferred
+	 * count to indicate the whole request was satisfied.
 	 */
-	size = sizeof (__le64) + sizeof (__le32) + sizeof (__le32);
-	page_count = (u32)calc_pages_for(0, size);
-	pages = ceph_alloc_page_vector(page_count, GFP_NOIO);
-	if (IS_ERR(pages)) {
-		ret = PTR_ERR(pages);
-		goto fail_stat_request;
+	if (obj_req->result == -ENOENT ||
+	    (!obj_req->result && obj_req->xferred < obj_req->ex.oe_len)) {
+		rbd_assert(!obj_req->xferred || !obj_req->result);
+		rbd_obj_zero_range(obj_req, obj_req->xferred,
+				   obj_req->ex.oe_len - obj_req->xferred);
+		obj_req->result = 0;
+		obj_req->xferred = obj_req->ex.oe_len;
 	}
 
-	osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
-	osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
-				     false, false);
-
-	rbd_obj_request_get(obj_request);
-	stat_request->obj_request = obj_request;
-	stat_request->pages = pages;
-	stat_request->page_count = page_count;
-	stat_request->callback = rbd_img_obj_exists_callback;
+	return true;
+}
 
-	rbd_obj_request_submit(stat_request);
-	return 0;
+/*
+ * copyup_bvecs pages are never highmem pages
+ */
+static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
+{
+	struct ceph_bvec_iter it = {
+		.bvecs = bvecs,
+		.iter = { .bi_size = bytes },
+	};
 
-fail_stat_request:
-	rbd_obj_request_put(stat_request);
-	return ret;
+	ceph_bvec_iter_advance_step(&it, bytes, ({
+		if (memchr_inv(page_address(bv.bv_page) + bv.bv_offset, 0,
+			       bv.bv_len))
+			return false;
+	}));
+	return true;
 }
 
-static bool img_obj_request_simple(struct rbd_obj_request *obj_request)
+static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
 {
-	struct rbd_img_request *img_request = obj_request->img_request;
-	struct rbd_device *rbd_dev = img_request->rbd_dev;
+	unsigned int num_osd_ops = obj_req->osd_req->r_num_ops;
 
-	/* Reads */
-	if (!img_request_write_test(img_request) &&
-	    !img_request_discard_test(img_request))
-		return true;
-
-	/* Non-layered writes */
-	if (!img_request_layered_test(img_request))
-		return true;
+	dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
+	rbd_assert(obj_req->osd_req->r_ops[0].op == CEPH_OSD_OP_STAT);
+	rbd_osd_req_destroy(obj_req->osd_req);
 
 	/*
-	 * Layered writes outside of the parent overlap range don't
-	 * share any data with the parent.
+	 * Create a copyup request with the same number of OSD ops as
+	 * the original request.  The original request was stat + op(s),
+	 * the new copyup request will be copyup + the same op(s).
 	 */
-	if (!obj_request_overlaps_parent(obj_request))
-		return true;
+	obj_req->osd_req = rbd_osd_req_create(obj_req, num_osd_ops);
+	if (!obj_req->osd_req)
+		return -ENOMEM;
 
 	/*
-	 * Entire-object layered writes - we will overwrite whatever
-	 * parent data there is anyway.
+	 * Only send non-zero copyup data to save some I/O and network
+	 * bandwidth -- zero copyup data is equivalent to the object not
+	 * existing.
 	 */
-	if (!obj_request->offset &&
-	    obj_request->length == rbd_obj_bytes(&rbd_dev->header))
-		return true;
+	if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) {
+		dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
+		bytes = 0;
+	}
 
-	/*
-	 * If the object is known to already exist, its parent data has
-	 * already been copied.
-	 */
-	if (obj_request_known_test(obj_request) &&
-	    obj_request_exists_test(obj_request))
-		return true;
+	osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
+			    "copyup");
+	osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0,
+					  obj_req->copyup_bvecs, bytes);
+
+	switch (obj_req->img_request->op_type) {
+	case OBJ_OP_WRITE:
+		__rbd_obj_setup_write(obj_req, 1);
+		break;
+	case OBJ_OP_DISCARD:
+		rbd_assert(!rbd_obj_is_entire(obj_req));
+		__rbd_obj_setup_discard(obj_req, 1);
+		break;
+	default:
+		rbd_assert(0);
+	}
 
-	return false;
+	rbd_obj_request_submit(obj_req);
+	return 0;
 }
 
-static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request)
+static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
 {
-	rbd_assert(obj_request_img_data_test(obj_request));
-	rbd_assert(obj_request_type_valid(obj_request->type));
-	rbd_assert(obj_request->img_request);
+	u32 i;
 
-	if (img_obj_request_simple(obj_request)) {
-		rbd_obj_request_submit(obj_request);
-		return 0;
-	}
+	rbd_assert(!obj_req->copyup_bvecs);
+	obj_req->copyup_bvec_count = calc_pages_for(0, obj_overlap);
+	obj_req->copyup_bvecs = kcalloc(obj_req->copyup_bvec_count,
+					sizeof(*obj_req->copyup_bvecs),
+					GFP_NOIO);
+	if (!obj_req->copyup_bvecs)
+		return -ENOMEM;
 
-	/*
-	 * It's a layered write.  The target object might exist but
-	 * we may not know that yet.  If we know it doesn't exist,
-	 * start by reading the data for the full target object from
-	 * the parent so we can use it for a copyup to the target.
-	 */
-	if (obj_request_known_test(obj_request))
-		return rbd_img_obj_parent_read_full(obj_request);
+	for (i = 0; i < obj_req->copyup_bvec_count; i++) {
+		unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
 
-	/* We don't know whether the target exists.  Go find out. */
+		obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
+		if (!obj_req->copyup_bvecs[i].bv_page)
+			return -ENOMEM;
+
+		obj_req->copyup_bvecs[i].bv_offset = 0;
+		obj_req->copyup_bvecs[i].bv_len = len;
+		obj_overlap -= len;
+	}
 
-	return rbd_img_obj_exists_submit(obj_request);
+	rbd_assert(!obj_overlap);
+	return 0;
 }
 
-static int rbd_img_request_submit(struct rbd_img_request *img_request)
+static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
 {
-	struct rbd_obj_request *obj_request;
-	struct rbd_obj_request *next_obj_request;
-	int ret = 0;
-
-	dout("%s: img %p\n", __func__, img_request);
+	struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
+	int ret;
 
-	rbd_img_request_get(img_request);
-	for_each_obj_request_safe(img_request, obj_request, next_obj_request) {
-		ret = rbd_img_obj_request_submit(obj_request);
-		if (ret)
-			goto out_put_ireq;
+	rbd_assert(obj_req->num_img_extents);
+	prune_extents(obj_req->img_extents, &obj_req->num_img_extents,
+		      rbd_dev->parent_overlap);
+	if (!obj_req->num_img_extents) {
+		/*
+		 * The overlap has become 0 (most likely because the
+		 * image has been flattened).  Use rbd_obj_issue_copyup()
+		 * to re-submit the original write request -- the copyup
+		 * operation itself will be a no-op, since someone must
+		 * have populated the child object while we weren't
+		 * looking.  Move to WRITE_FLAT state as we'll be done
+		 * with the operation once the null copyup completes.
+		 */
+		obj_req->write_state = RBD_OBJ_WRITE_FLAT;
+		return rbd_obj_issue_copyup(obj_req, 0);
 	}
 
-out_put_ireq:
-	rbd_img_request_put(img_request);
-	return ret;
+	ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
+	if (ret)
+		return ret;
+
+	obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
+	return rbd_obj_read_from_parent(obj_req);
 }
 
-static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
+static bool rbd_obj_handle_write(struct rbd_obj_request *obj_req)
 {
-	struct rbd_obj_request *obj_request;
-	struct rbd_device *rbd_dev;
-	u64 obj_end;
-	u64 img_xferred;
-	int img_result;
+	int ret;
 
-	rbd_assert(img_request_child_test(img_request));
+again:
+	switch (obj_req->write_state) {
+	case RBD_OBJ_WRITE_GUARD:
+		rbd_assert(!obj_req->xferred);
+		if (obj_req->result == -ENOENT) {
+			/*
+			 * The target object doesn't exist.  Read the data for
+			 * the entire target object up to the overlap point (if
+			 * any) from the parent, so we can use it for a copyup.
+			 */
+			ret = rbd_obj_handle_write_guard(obj_req);
+			if (ret) {
+				obj_req->result = ret;
+				return true;
+			}
+			return false;
+		}
+		/* fall through */
+	case RBD_OBJ_WRITE_FLAT:
+		if (!obj_req->result)
+			/*
+			 * There is no such thing as a successful short
+			 * write -- indicate the whole request was satisfied.
+			 */
+			obj_req->xferred = obj_req->ex.oe_len;
+		return true;
+	case RBD_OBJ_WRITE_COPYUP:
+		obj_req->write_state = RBD_OBJ_WRITE_GUARD;
+		if (obj_req->result)
+			goto again;
 
-	/* First get what we need from the image request and release it */
+		rbd_assert(obj_req->xferred);
+		ret = rbd_obj_issue_copyup(obj_req, obj_req->xferred);
+		if (ret) {
+			obj_req->result = ret;
+			return true;
+		}
+		return false;
+	default:
+		rbd_assert(0);
+	}
+}
 
-	obj_request = img_request->obj_request;
-	img_xferred = img_request->xferred;
-	img_result = img_request->result;
-	rbd_img_request_put(img_request);
+/*
+ * Returns true if @obj_req is completed, or false otherwise.
+ */
+static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+{
+	switch (obj_req->img_request->op_type) {
+	case OBJ_OP_READ:
+		return rbd_obj_handle_read(obj_req);
+	case OBJ_OP_WRITE:
+		return rbd_obj_handle_write(obj_req);
+	case OBJ_OP_DISCARD:
+		if (rbd_obj_handle_write(obj_req)) {
+			/*
+			 * Hide -ENOENT from delete/truncate/zero -- discarding
+			 * a non-existent object is not a problem.
+			 */
+			if (obj_req->result == -ENOENT) {
+				obj_req->result = 0;
+				obj_req->xferred = obj_req->ex.oe_len;
+			}
+			return true;
+		}
+		return false;
+	default:
+		rbd_assert(0);
+	}
+}
 
-	/*
-	 * If the overlap has become 0 (most likely because the
-	 * image has been flattened) we need to re-submit the
-	 * original request.
-	 */
-	rbd_assert(obj_request);
-	rbd_assert(obj_request->img_request);
-	rbd_dev = obj_request->img_request->rbd_dev;
-	if (!rbd_dev->parent_overlap) {
-		rbd_obj_request_submit(obj_request);
+static void rbd_obj_end_request(struct rbd_obj_request *obj_req)
+{
+	struct rbd_img_request *img_req = obj_req->img_request;
+
+	rbd_assert((!obj_req->result &&
+		    obj_req->xferred == obj_req->ex.oe_len) ||
+		   (obj_req->result < 0 && !obj_req->xferred));
+	if (!obj_req->result) {
+		img_req->xferred += obj_req->xferred;
 		return;
 	}
 
-	obj_request->result = img_result;
-	if (obj_request->result)
-		goto out;
+	rbd_warn(img_req->rbd_dev,
+		 "%s at objno %llu %llu~%llu result %d xferred %llu",
+		 obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
+		 obj_req->ex.oe_off, obj_req->ex.oe_len, obj_req->result,
+		 obj_req->xferred);
+	if (!img_req->result) {
+		img_req->result = obj_req->result;
+		img_req->xferred = 0;
+	}
+}
 
-	/*
-	 * We need to zero anything beyond the parent overlap
-	 * boundary.  Since rbd_img_obj_request_read_callback()
-	 * will zero anything beyond the end of a short read, an
-	 * easy way to do this is to pretend the data from the
-	 * parent came up short--ending at the overlap boundary.
-	 */
-	rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
-	obj_end = obj_request->img_offset + obj_request->length;
-	if (obj_end > rbd_dev->parent_overlap) {
-		u64 xferred = 0;
+static void rbd_img_end_child_request(struct rbd_img_request *img_req)
+{
+	struct rbd_obj_request *obj_req = img_req->obj_request;
 
-		if (obj_request->img_offset < rbd_dev->parent_overlap)
-			xferred = rbd_dev->parent_overlap -
-					obj_request->img_offset;
+	rbd_assert(test_bit(IMG_REQ_CHILD, &img_req->flags));
+	rbd_assert((!img_req->result &&
+		    img_req->xferred == rbd_obj_img_extents_bytes(obj_req)) ||
+		   (img_req->result < 0 && !img_req->xferred));
 
-		obj_request->xferred = min(img_xferred, xferred);
-	} else {
-		obj_request->xferred = img_xferred;
-	}
-out:
-	rbd_img_obj_request_read_callback(obj_request);
-	rbd_obj_request_complete(obj_request);
+	obj_req->result = img_req->result;
+	obj_req->xferred = img_req->xferred;
+	rbd_img_request_put(img_req);
 }
 
-static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
+static void rbd_img_end_request(struct rbd_img_request *img_req)
 {
-	struct rbd_img_request *img_request;
-	int result;
+	rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
+	rbd_assert((!img_req->result &&
+		    img_req->xferred == blk_rq_bytes(img_req->rq)) ||
+		   (img_req->result < 0 && !img_req->xferred));
 
-	rbd_assert(obj_request_img_data_test(obj_request));
-	rbd_assert(obj_request->img_request != NULL);
-	rbd_assert(obj_request->result == (s32) -ENOENT);
-	rbd_assert(obj_request_type_valid(obj_request->type));
+	blk_mq_end_request(img_req->rq,
+			   errno_to_blk_status(img_req->result));
+	rbd_img_request_put(img_req);
+}
 
-	/* rbd_read_finish(obj_request, obj_request->length); */
-	img_request = rbd_parent_request_create(obj_request,
-						obj_request->img_offset,
-						obj_request->length);
-	result = -ENOMEM;
-	if (!img_request)
-		goto out_err;
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req)
+{
+	struct rbd_img_request *img_req;
 
-	if (obj_request->type == OBJ_REQUEST_BIO)
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
-						obj_request->bio_list);
-	else
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
-						obj_request->pages);
-	if (result)
-		goto out_err;
+again:
+	if (!__rbd_obj_handle_request(obj_req))
+		return;
 
-	img_request->callback = rbd_img_parent_read_callback;
-	result = rbd_img_request_submit(img_request);
-	if (result)
-		goto out_err;
+	img_req = obj_req->img_request;
+	spin_lock(&img_req->completion_lock);
+	rbd_obj_end_request(obj_req);
+	rbd_assert(img_req->pending_count);
+	if (--img_req->pending_count) {
+		spin_unlock(&img_req->completion_lock);
+		return;
+	}
 
-	return;
-out_err:
-	if (img_request)
-		rbd_img_request_put(img_request);
-	obj_request->result = result;
-	obj_request->xferred = 0;
-	obj_request_done_set(obj_request);
+	spin_unlock(&img_req->completion_lock);
+	if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
+		obj_req = img_req->obj_request;
+		rbd_img_end_child_request(img_req);
+		goto again;
+	}
+	rbd_img_end_request(img_req);
 }
 
 static const struct rbd_client_id rbd_empty_cid;
@@ -3091,8 +2674,8 @@ static int __rbd_notify_op_lock(struct rbd_device *rbd_dev,
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
 	struct rbd_client_id cid = rbd_get_cid(rbd_dev);
-	int buf_size = 4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN;
-	char buf[buf_size];
+	char buf[4 + 8 + 8 + CEPH_ENCODING_START_BLK_LEN];
+	int buf_size = sizeof(buf);
 	void *p = buf;
 
 	dout("%s rbd_dev %p notify_op %d\n", __func__, rbd_dev, notify_op);
@@ -3610,8 +3193,8 @@ static void __rbd_acknowledge_notify(struct rbd_device *rbd_dev,
 				     u64 notify_id, u64 cookie, s32 *result)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
-	int buf_size = 4 + CEPH_ENCODING_START_BLK_LEN;
-	char buf[buf_size];
+	char buf[4 + CEPH_ENCODING_START_BLK_LEN];
+	int buf_size = sizeof(buf);
 	int ret;
 
 	if (result) {
@@ -3887,7 +3470,7 @@ static void rbd_reregister_watch(struct work_struct *work)
 
 	ret = rbd_dev_refresh(rbd_dev);
 	if (ret)
-		rbd_warn(rbd_dev, "reregisteration refresh failed: %d", ret);
+		rbd_warn(rbd_dev, "reregistration refresh failed: %d", ret);
 }
 
 /*
@@ -4070,8 +3653,7 @@ static void rbd_queue_workfn(struct work_struct *work)
 		}
 	}
 
-	img_request = rbd_img_request_create(rbd_dev, offset, length, op_type,
-					     snapc);
+	img_request = rbd_img_request_create(rbd_dev, op_type, snapc);
 	if (!img_request) {
 		result = -ENOMEM;
 		goto err_unlock;
@@ -4080,18 +3662,14 @@ static void rbd_queue_workfn(struct work_struct *work)
 	snapc = NULL; /* img_request consumes a ref */
 
 	if (op_type == OBJ_OP_DISCARD)
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,
-					      NULL);
+		result = rbd_img_fill_nodata(img_request, offset, length);
 	else
-		result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
-					      rq->bio);
-	if (result)
-		goto err_img_request;
-
-	result = rbd_img_request_submit(img_request);
+		result = rbd_img_fill_from_bio(img_request, offset, length,
+					       rq->bio);
 	if (result)
 		goto err_img_request;
 
+	rbd_img_request_submit(img_request);
 	if (must_be_locked)
 		up_read(&rbd_dev->lock_rwsem);
 	return;
@@ -4369,7 +3947,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
 	q->limits.max_sectors = queue_max_hw_sectors(q);
 	blk_queue_max_segments(q, USHRT_MAX);
-	blk_queue_max_segment_size(q, segment_size);
+	blk_queue_max_segment_size(q, UINT_MAX);
 	blk_queue_io_min(q, segment_size);
 	blk_queue_io_opt(q, segment_size);
 
@@ -5057,9 +4635,6 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 	} __attribute__ ((packed)) striping_info_buf = { 0 };
 	size_t size = sizeof (striping_info_buf);
 	void *p;
-	u64 obj_size;
-	u64 stripe_unit;
-	u64 stripe_count;
 	int ret;
 
 	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
@@ -5071,31 +4646,9 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
 	if (ret < size)
 		return -ERANGE;
 
-	/*
-	 * We don't actually support the "fancy striping" feature
-	 * (STRIPINGV2) yet, but if the striping sizes are the
-	 * defaults the behavior is the same as before.  So find
-	 * out, and only fail if the image has non-default values.
-	 */
-	ret = -EINVAL;
-	obj_size = rbd_obj_bytes(&rbd_dev->header);
 	p = &striping_info_buf;
-	stripe_unit = ceph_decode_64(&p);
-	if (stripe_unit != obj_size) {
-		rbd_warn(rbd_dev, "unsupported stripe unit "
-				"(got %llu want %llu)",
-				stripe_unit, obj_size);
-		return -EINVAL;
-	}
-	stripe_count = ceph_decode_64(&p);
-	if (stripe_count != 1) {
-		rbd_warn(rbd_dev, "unsupported stripe count "
-				"(got %llu want 1)", stripe_count);
-		return -EINVAL;
-	}
-	rbd_dev->header.stripe_unit = stripe_unit;
-	rbd_dev->header.stripe_count = stripe_count;
-
+	rbd_dev->header.stripe_unit = ceph_decode_64(&p);
+	rbd_dev->header.stripe_count = ceph_decode_64(&p);
 	return 0;
 }
 
@@ -5653,39 +5206,6 @@ out_err:
 	return ret;
 }
 
-/*
- * Return pool id (>= 0) or a negative error code.
- */
-static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
-{
-	struct ceph_options *opts = rbdc->client->options;
-	u64 newest_epoch;
-	int tries = 0;
-	int ret;
-
-again:
-	ret = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, pool_name);
-	if (ret == -ENOENT && tries++ < 1) {
-		ret = ceph_monc_get_version(&rbdc->client->monc, "osdmap",
-					    &newest_epoch);
-		if (ret < 0)
-			return ret;
-
-		if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
-			ceph_osdc_maybe_request_map(&rbdc->client->osdc);
-			(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
-						     newest_epoch,
-						     opts->mount_timeout);
-			goto again;
-		} else {
-			/* the osdmap we have is new enough */
-			return -ENOENT;
-		}
-	}
-
-	return ret;
-}
-
 static void rbd_dev_image_unlock(struct rbd_device *rbd_dev)
 {
 	down_write(&rbd_dev->lock_rwsem);
@@ -6114,7 +5634,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 	}
 
 	/* pick the pool */
-	rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
+	rc = ceph_pg_poolid_by_name(rbdc->client->osdc.osdmap, spec->pool_name);
 	if (rc < 0) {
 		if (rc == -ENOENT)
 			pr_info("pool %s does not exist\n", spec->pool_name);
@@ -6366,16 +5886,8 @@ static int rbd_slab_init(void)
 	if (!rbd_obj_request_cache)
 		goto out_err;
 
-	rbd_assert(!rbd_bio_clone);
-	rbd_bio_clone = bioset_create(BIO_POOL_SIZE, 0, 0);
-	if (!rbd_bio_clone)
-		goto out_err_clone;
-
 	return 0;
 
-out_err_clone:
-	kmem_cache_destroy(rbd_obj_request_cache);
-	rbd_obj_request_cache = NULL;
 out_err:
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
@@ -6391,10 +5903,6 @@ static void rbd_slab_exit(void)
 	rbd_assert(rbd_img_request_cache);
 	kmem_cache_destroy(rbd_img_request_cache);
 	rbd_img_request_cache = NULL;
-
-	rbd_assert(rbd_bio_clone);
-	bioset_free(rbd_bio_clone);
-	rbd_bio_clone = NULL;
 }
 
 static int __init rbd_init(void)
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 0c858d027bf3..57dc546628b5 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -809,89 +809,6 @@ static __poll_t rtc_poll(struct file *file, poll_table *wait)
 }
 #endif
 
-int rtc_register(rtc_task_t *task)
-{
-#ifndef RTC_IRQ
-	return -EIO;
-#else
-	if (task == NULL || task->func == NULL)
-		return -EINVAL;
-	spin_lock_irq(&rtc_lock);
-	if (rtc_status & RTC_IS_OPEN) {
-		spin_unlock_irq(&rtc_lock);
-		return -EBUSY;
-	}
-	spin_lock(&rtc_task_lock);
-	if (rtc_callback) {
-		spin_unlock(&rtc_task_lock);
-		spin_unlock_irq(&rtc_lock);
-		return -EBUSY;
-	}
-	rtc_status |= RTC_IS_OPEN;
-	rtc_callback = task;
-	spin_unlock(&rtc_task_lock);
-	spin_unlock_irq(&rtc_lock);
-	return 0;
-#endif
-}
-EXPORT_SYMBOL(rtc_register);
-
-int rtc_unregister(rtc_task_t *task)
-{
-#ifndef RTC_IRQ
-	return -EIO;
-#else
-	unsigned char tmp;
-
-	spin_lock_irq(&rtc_lock);
-	spin_lock(&rtc_task_lock);
-	if (rtc_callback != task) {
-		spin_unlock(&rtc_task_lock);
-		spin_unlock_irq(&rtc_lock);
-		return -ENXIO;
-	}
-	rtc_callback = NULL;
-
-	/* disable controls */
-	if (!hpet_mask_rtc_irq_bit(RTC_PIE | RTC_AIE | RTC_UIE)) {
-		tmp = CMOS_READ(RTC_CONTROL);
-		tmp &= ~RTC_PIE;
-		tmp &= ~RTC_AIE;
-		tmp &= ~RTC_UIE;
-		CMOS_WRITE(tmp, RTC_CONTROL);
-		CMOS_READ(RTC_INTR_FLAGS);
-	}
-	if (rtc_status & RTC_TIMER_ON) {
-		rtc_status &= ~RTC_TIMER_ON;
-		del_timer(&rtc_irq_timer);
-	}
-	rtc_status &= ~RTC_IS_OPEN;
-	spin_unlock(&rtc_task_lock);
-	spin_unlock_irq(&rtc_lock);
-	return 0;
-#endif
-}
-EXPORT_SYMBOL(rtc_unregister);
-
-int rtc_control(rtc_task_t *task, unsigned int cmd, unsigned long arg)
-{
-#ifndef RTC_IRQ
-	return -EIO;
-#else
-	unsigned long flags;
-	if (cmd != RTC_PIE_ON && cmd != RTC_PIE_OFF && cmd != RTC_IRQP_SET)
-		return -EINVAL;
-	spin_lock_irqsave(&rtc_task_lock, flags);
-	if (rtc_callback != task) {
-		spin_unlock_irqrestore(&rtc_task_lock, flags);
-		return -ENXIO;
-	}
-	spin_unlock_irqrestore(&rtc_task_lock, flags);
-	return rtc_do_ioctl(cmd, arg, 1);
-#endif
-}
-EXPORT_SYMBOL(rtc_control);
-
 /*
  *	The various file operations we support.
  */
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index c6ebc88a7d8d..72a2975499db 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -202,6 +202,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
 	cur_frequency = clk_get_rate(clk);
 	if (!cur_frequency) {
 		dev_err(cpu_dev, "Failed to get clock rate for CPU\n");
+		clk_put(clk);
 		return -EINVAL;
 	}
 
@@ -210,6 +211,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
 		return -EINVAL;
 
 	armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
+	clk_put(clk);
 
 	for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
 	     load_lvl++) {
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8300a9fcb80c..bc5fc1630876 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -162,14 +162,23 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		cpu->perf_caps.highest_perf;
 	policy->cpuinfo.max_freq = cppc_dmi_max_khz;
 
-	policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num);
 	policy->transition_delay_us = cppc_get_transition_latency(cpu_num) /
 		NSEC_PER_USEC;
 	policy->shared_type = cpu->shared_type;
 
-	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
+		int i;
+
 		cpumask_copy(policy->cpus, cpu->shared_cpu_map);
-	else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
+
+		for_each_cpu(i, policy->cpus) {
+			if (unlikely(i == policy->cpu))
+				continue;
+
+			memcpy(&all_cpu_data[i]->perf_caps, &cpu->perf_caps,
+			       sizeof(cpu->perf_caps));
+		}
+	} else if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL) {
 		/* Support only SW_ANY for now. */
 		pr_debug("Unsupported CPU co-ord type\n");
 		return -EFAULT;
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 10e119ae66dd..3a8cc99e6815 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -352,20 +352,6 @@ static int set_freq_table_sorted(struct cpufreq_policy *policy)
 	return 0;
 }
 
-int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
-				      struct cpufreq_frequency_table *table)
-{
-	int ret;
-
-	ret = cpufreq_frequency_table_cpuinfo(policy, table);
-	if (ret)
-		return ret;
-
-	policy->freq_table = table;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show);
-
 int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy)
 {
 	int ret;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6d084c61ee25..17e566afbb41 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -26,7 +26,6 @@
 #include <linux/sysfs.h>
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/debugfs.h>
 #include <linux/acpi.h>
 #include <linux/vmalloc.h>
 #include <trace/events/power.h>
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 959a1dbe3835..b4dbc77459b6 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -159,13 +159,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	priv->domain_id = handle->perf_ops->device_domain_id(cpu_dev);
 
 	policy->driver_data = priv;
-
-	ret = cpufreq_table_validate_and_show(policy, freq_table);
-	if (ret) {
-		dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
-			ret);
-		goto out_free_cpufreq_table;
-	}
+	policy->freq_table = freq_table;
 
 	/* SCMI allows DVFS request for any domain from any CPU */
 	policy->dvfs_possible_from_any_cpu = true;
@@ -179,8 +173,6 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	policy->fast_switch_possible = true;
 	return 0;
 
-out_free_cpufreq_table:
-	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
 out_free_priv:
 	kfree(priv);
 out_free_opp:
diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index a099b7bf74cd..6ba709b6f095 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -304,7 +304,7 @@ static struct platform_driver ti_cpufreq_driver = {
 		.name = "ti-cpufreq",
 	},
 };
-module_platform_driver(ti_cpufreq_driver);
+builtin_platform_driver(ti_cpufreq_driver);
 
 MODULE_DESCRIPTION("TI CPUFreq/OPP hw-supported driver");
 MODULE_AUTHOR("Dave Gerlach <d-gerlach@ti.com>");
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0003e9a02637..6df894d65d9e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -272,12 +272,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
  *
  * @drv: the cpuidle driver
  * @dev: the cpuidle device
+ * @stop_tick: indication on whether or not to stop the tick
  *
  * Returns the index of the idle state.  The return value must not be negative.
+ *
+ * The memory location pointed to by @stop_tick is expected to be written the
+ * 'false' boolean value if the scheduler tick should not be stopped before
+ * entering the returned state.
  */
-int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+		   bool *stop_tick)
 {
-	return cpuidle_curr_governor->select(drv, dev);
+	return cpuidle_curr_governor->select(drv, dev, stop_tick);
 }
 
 /**
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 1ad8745fd6d6..b24883f85c99 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -63,9 +63,10 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
  * ladder_select_state - selects the next state to enter
  * @drv: cpuidle driver
  * @dev: the CPU
+ * @dummy: not used
  */
 static int ladder_select_state(struct cpuidle_driver *drv,
-				struct cpuidle_device *dev)
+			       struct cpuidle_device *dev, bool *dummy)
 {
 	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
 	struct device *device = get_cpu_device(dev->cpu);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index aa390404e85f..1bfe03ceb236 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -123,6 +123,7 @@
 struct menu_device {
 	int		last_state_idx;
 	int             needs_update;
+	int             tick_wakeup;
 
 	unsigned int	next_timer_us;
 	unsigned int	predicted_us;
@@ -279,8 +280,10 @@ again:
  * menu_select - selects the next idle state to enter
  * @drv: cpuidle driver containing state data
  * @dev: the CPU
+ * @stop_tick: indication on whether or not to stop the tick
  */
-static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+		       bool *stop_tick)
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
 	struct device *device = get_cpu_device(dev->cpu);
@@ -292,6 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	unsigned int expected_interval;
 	unsigned long nr_iowaiters, cpu_load;
 	int resume_latency = dev_pm_qos_raw_read_value(device);
+	ktime_t delta_next;
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -303,11 +307,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		latency_req = resume_latency;
 
 	/* Special case when user has set very strict latency requirement */
-	if (unlikely(latency_req == 0))
+	if (unlikely(latency_req == 0)) {
+		*stop_tick = false;
 		return 0;
+	}
 
 	/* determine the expected residency time, round up */
-	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length());
+	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
 
 	get_iowait_load(&nr_iowaiters, &cpu_load);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
@@ -346,14 +352,30 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 */
 	data->predicted_us = min(data->predicted_us, expected_interval);
 
-	/*
-	 * Use the performance multiplier and the user-configurable
-	 * latency_req to determine the maximum exit latency.
-	 */
-	interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
-	if (latency_req > interactivity_req)
-		latency_req = interactivity_req;
+	if (tick_nohz_tick_stopped()) {
+		/*
+		 * If the tick is already stopped, the cost of possible short
+		 * idle duration misprediction is much higher, because the CPU
+		 * may be stuck in a shallow idle state for a long time as a
+		 * result of it.  In that case say we might mispredict and try
+		 * to force the CPU into a state for which we would have stopped
+		 * the tick, unless a timer is going to expire really soon
+		 * anyway.
+		 */
+		if (data->predicted_us < TICK_USEC)
+			data->predicted_us = min_t(unsigned int, TICK_USEC,
+						   ktime_to_us(delta_next));
+	} else {
+		/*
+		 * Use the performance multiplier and the user-configurable
+		 * latency_req to determine the maximum exit latency.
+		 */
+		interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+		if (latency_req > interactivity_req)
+			latency_req = interactivity_req;
+	}
 
+	expected_interval = data->predicted_us;
 	/*
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
@@ -369,15 +391,52 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 			idx = i; /* first enabled state */
 		if (s->target_residency > data->predicted_us)
 			break;
-		if (s->exit_latency > latency_req)
+		if (s->exit_latency > latency_req) {
+			/*
+			 * If we break out of the loop for latency reasons, use
+			 * the target residency of the selected state as the
+			 * expected idle duration so that the tick is retained
+			 * as long as that target residency is low enough.
+			 */
+			expected_interval = drv->states[idx].target_residency;
 			break;
-
+		}
 		idx = i;
 	}
 
 	if (idx == -1)
 		idx = 0; /* No states enabled. Must use 0. */
 
+	/*
+	 * Don't stop the tick if the selected state is a polling one or if the
+	 * expected idle duration is shorter than the tick period length.
+	 */
+	if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
+	    expected_interval < TICK_USEC) {
+		unsigned int delta_next_us = ktime_to_us(delta_next);
+
+		*stop_tick = false;
+
+		if (!tick_nohz_tick_stopped() && idx > 0 &&
+		    drv->states[idx].target_residency > delta_next_us) {
+			/*
+			 * The tick is not going to be stopped and the target
+			 * residency of the state to be returned is not within
+			 * the time until the next timer event including the
+			 * tick, so try to correct that.
+			 */
+			for (i = idx - 1; i >= 0; i--) {
+			    if (drv->states[i].disabled ||
+			        dev->states_usage[i].disable)
+					continue;
+
+				idx = i;
+				if (drv->states[i].target_residency <= delta_next_us)
+					break;
+			}
+		}
+	}
+
 	data->last_state_idx = idx;
 
 	return data->last_state_idx;
@@ -397,6 +456,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
 
 	data->last_state_idx = index;
 	data->needs_update = 1;
+	data->tick_wakeup = tick_nohz_idle_got_tick();
 }
 
 /**
@@ -427,14 +487,27 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * assume the state was never reached and the exit latency is 0.
 	 */
 
-	/* measured value */
-	measured_us = cpuidle_get_last_residency(dev);
-
-	/* Deduct exit latency */
-	if (measured_us > 2 * target->exit_latency)
-		measured_us -= target->exit_latency;
-	else
-		measured_us /= 2;
+	if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
+		/*
+		 * The nohz code said that there wouldn't be any events within
+		 * the tick boundary (if the tick was stopped), but the idle
+		 * duration predictor had a differing opinion.  Since the CPU
+		 * was woken up by a tick (that wasn't stopped after all), the
+		 * predictor was not quite right, so assume that the CPU could
+		 * have been idle long (but not forever) to help the idle
+		 * duration predictor do a better job next time.
+		 */
+		measured_us = 9 * MAX_INTERESTING / 10;
+	} else {
+		/* measured value */
+		measured_us = cpuidle_get_last_residency(dev);
+
+		/* Deduct exit latency */
+		if (measured_us > 2 * target->exit_latency)
+			measured_us -= target->exit_latency;
+		else
+			measured_us /= 2;
+	}
 
 	/* Make sure our coefficients do not exceed unity */
 	if (measured_us > data->next_timer_us)
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index b79aa8f7a497..e0700bf4893a 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -1,3 +1,7 @@
+config DAX_DRIVER
+	select DAX
+	bool
+
 menuconfig DAX
 	tristate "DAX: direct access to differentiated memory"
 	select SRCU
@@ -16,7 +20,6 @@ config DEV_DAX
 	  baseline memory pool.  Mappings of a /dev/daxX.Y device impose
 	  restrictions that make the mapping behavior deterministic.
 
-
 config DEV_DAX_PMEM
 	tristate "PMEM DAX: direct access to persistent memory"
 	depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 0b61f48f21a6..be8606457f27 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -257,8 +257,8 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PAGE_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -267,8 +267,7 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				vmf->pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -299,14 +298,14 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PMD_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
 	/* dax pmd mappings require pfn_t_devmap() */
 	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-		dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+		dev_dbg(dev, "region lacks devmap flags\n");
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -323,8 +322,7 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pmd_addr);
 	phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -351,14 +349,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PUD_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
 	/* dax pud mappings require pfn_t_devmap() */
 	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-		dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+		dev_dbg(dev, "region lacks devmap flags\n");
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -375,8 +373,7 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pud_addr);
 	phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -399,9 +396,8 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
 	struct file *filp = vmf->vma->vm_file;
 	struct dev_dax *dev_dax = filp->private_data;
 
-	dev_dbg(&dev_dax->dev, "%s: %s: %s (%#lx - %#lx) size = %d\n", __func__,
-			current->comm, (vmf->flags & FAULT_FLAG_WRITE)
-			? "write" : "read",
+	dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
+			(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
 			vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
 
 	id = dax_read_lock();
@@ -460,7 +456,7 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct dev_dax *dev_dax = filp->private_data;
 	int rc, id;
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 
 	/*
 	 * We lock to check dax_dev liveness and will re-check at
@@ -518,7 +514,7 @@ static int dax_open(struct inode *inode, struct file *filp)
 	struct inode *__dax_inode = dax_inode(dax_dev);
 	struct dev_dax *dev_dax = dax_get_private(dax_dev);
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 	inode->i_mapping = __dax_inode->i_mapping;
 	inode->i_mapping->host = __dax_inode;
 	filp->f_mapping = inode->i_mapping;
@@ -533,7 +529,7 @@ static int dax_release(struct inode *inode, struct file *filp)
 {
 	struct dev_dax *dev_dax = filp->private_data;
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 	return 0;
 }
 
@@ -575,7 +571,7 @@ static void unregister_dev_dax(void *dev)
 	struct inode *inode = dax_inode(dax_dev);
 	struct cdev *cdev = inode->i_cdev;
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 
 	kill_dev_dax(dev_dax);
 	cdev_device_del(cdev, dev);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 31b6ecce4c64..fd49b24fd6af 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -34,7 +34,7 @@ static void dax_pmem_percpu_release(struct percpu_ref *ref)
 {
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	complete(&dax_pmem->cmp);
 }
 
@@ -43,7 +43,7 @@ static void dax_pmem_percpu_exit(void *data)
 	struct percpu_ref *ref = data;
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	wait_for_completion(&dax_pmem->cmp);
 	percpu_ref_exit(ref);
 }
@@ -53,7 +53,7 @@ static void dax_pmem_percpu_kill(void *data)
 	struct percpu_ref *ref = data;
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	percpu_ref_kill(ref);
 }
 
@@ -150,17 +150,7 @@ static struct nd_device_driver dax_pmem_driver = {
 	.type = ND_DRIVER_DAX_PMEM,
 };
 
-static int __init dax_pmem_init(void)
-{
-	return nd_driver_register(&dax_pmem_driver);
-}
-module_init(dax_pmem_init);
-
-static void __exit dax_pmem_exit(void)
-{
-	driver_unregister(&dax_pmem_driver.drv);
-}
-module_exit(dax_pmem_exit);
+module_nd_driver(dax_pmem_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ecdc292aa4e4..2b2332b605e4 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
 		return len < 0 ? len : -EIO;
 	}
 
-	if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
-			|| pfn_t_devmap(pfn))
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
+		/*
+		 * An arch that has enabled the pmem api should also
+		 * have its drivers support pfn_t_devmap()
+		 *
+		 * This is a developer warning and should not trigger in
+		 * production. dax_flush() will crash since it depends
+		 * on being able to do (page_address(pfn_to_page())).
+		 */
+		WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
+	} else if (pfn_t_devmap(pfn)) {
 		/* pass */;
-	else {
+	} else {
 		pr_debug("VFS (%s): error: dax support not enabled\n",
 				sb->s_id);
 		return -EOPNOTSUPP;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 27df3e2837fd..6d61cd023633 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -187,6 +187,16 @@ config DMA_SUN6I
 	help
 	  Support for the DMA engine first found in Allwinner A31 SoCs.
 
+config DW_AXI_DMAC
+	tristate "Synopsys DesignWare AXI DMA support"
+	depends on OF || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for Synopsys DesignWare AXI DMA controller.
+	  NOTE: This driver wasn't tested on 64 bit platform because
+	  of lack 64 bit platform with Synopsys DW AXI DMAC.
+
 config EP93XX_DMA
 	bool "Cirrus Logic EP93xx DMA support"
 	depends on ARCH_EP93XX || COMPILE_TEST
@@ -633,6 +643,8 @@ config ZX_DMA
 # driver files
 source "drivers/dma/bestcomm/Kconfig"
 
+source "drivers/dma/mediatek/Kconfig"
+
 source "drivers/dma/qcom/Kconfig"
 
 source "drivers/dma/dw/Kconfig"
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index b9dca8a0e142..0f62a4d49aab 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_DMA_OMAP) += omap-dma.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
 obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o
 obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/
 obj-$(CONFIG_DW_DMAC_CORE) += dw/
 obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
@@ -75,5 +76,6 @@ obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
 obj-$(CONFIG_ZX_DMA) += zx_dma.o
 obj-$(CONFIG_ST_FDMA) += st_fdma.o
 
+obj-y += mediatek/
 obj-y += qcom/
 obj-y += xilinx/
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index c00e3923d7d8..94236ec9d410 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1471,10 +1471,10 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
 		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
 		rmb();
-		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
-		rmb();
 		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
 		rmb();
+		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
+		rmb();
 		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
 		rmb();
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 80cc2be6483c..b9339524d5bd 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -74,7 +74,11 @@ MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
 
 static bool noverify;
 module_param(noverify, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(noverify, "Disable random data setup and verification");
+MODULE_PARM_DESC(noverify, "Disable data verification (default: verify)");
+
+static bool norandom;
+module_param(norandom, bool, 0644);
+MODULE_PARM_DESC(norandom, "Disable random offset setup (default: random)");
 
 static bool verbose;
 module_param(verbose, bool, S_IRUGO | S_IWUSR);
@@ -103,6 +107,7 @@ struct dmatest_params {
 	unsigned int	pq_sources;
 	int		timeout;
 	bool		noverify;
+	bool		norandom;
 };
 
 /**
@@ -575,7 +580,7 @@ static int dmatest_func(void *data)
 			break;
 		}
 
-		if (params->noverify)
+		if (params->norandom)
 			len = params->buf_size;
 		else
 			len = dmatest_random() % params->buf_size + 1;
@@ -586,17 +591,19 @@ static int dmatest_func(void *data)
 
 		total_len += len;
 
-		if (params->noverify) {
+		if (params->norandom) {
 			src_off = 0;
 			dst_off = 0;
 		} else {
-			start = ktime_get();
 			src_off = dmatest_random() % (params->buf_size - len + 1);
 			dst_off = dmatest_random() % (params->buf_size - len + 1);
 
 			src_off = (src_off >> align) << align;
 			dst_off = (dst_off >> align) << align;
+		}
 
+		if (!params->noverify) {
+			start = ktime_get();
 			dmatest_init_srcs(thread->srcs, src_off, len,
 					  params->buf_size, is_memset);
 			dmatest_init_dsts(thread->dsts, dst_off, len,
@@ -975,6 +982,7 @@ static void run_threaded_test(struct dmatest_info *info)
 	params->pq_sources = pq_sources;
 	params->timeout = timeout;
 	params->noverify = noverify;
+	params->norandom = norandom;
 
 	request_channels(info, DMA_MEMCPY);
 	request_channels(info, DMA_MEMSET);
diff --git a/drivers/dma/dw-axi-dmac/Makefile b/drivers/dma/dw-axi-dmac/Makefile
new file mode 100644
index 000000000000..4bfa462005be
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac-platform.o
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
new file mode 100644
index 000000000000..c4eb55e3011c
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -0,0 +1,1008 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/types.h>
+
+#include "dw-axi-dmac.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * The set of bus widths supported by the DMA controller. DW AXI DMAC supports
+ * master data bus width up to 512 bits (for both AXI master interfaces), but
+ * it depends on IP block configurarion.
+ */
+#define AXI_DMA_BUSWIDTHS		  \
+	(DMA_SLAVE_BUSWIDTH_1_BYTE	| \
+	DMA_SLAVE_BUSWIDTH_2_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_4_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_8_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_16_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_32_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_64_BYTES)
+
+static inline void
+axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val)
+{
+	iowrite32(val, chip->regs + reg);
+}
+
+static inline u32 axi_dma_ioread32(struct axi_dma_chip *chip, u32 reg)
+{
+	return ioread32(chip->regs + reg);
+}
+
+static inline void
+axi_chan_iowrite32(struct axi_dma_chan *chan, u32 reg, u32 val)
+{
+	iowrite32(val, chan->chan_regs + reg);
+}
+
+static inline u32 axi_chan_ioread32(struct axi_dma_chan *chan, u32 reg)
+{
+	return ioread32(chan->chan_regs + reg);
+}
+
+static inline void
+axi_chan_iowrite64(struct axi_dma_chan *chan, u32 reg, u64 val)
+{
+	/*
+	 * We split one 64 bit write for two 32 bit write as some HW doesn't
+	 * support 64 bit access.
+	 */
+	iowrite32(lower_32_bits(val), chan->chan_regs + reg);
+	iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4);
+}
+
+static inline void axi_dma_disable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val &= ~DMAC_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_enable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val |= DMAC_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_disable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val &= ~INT_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_enable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val |= INT_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_chan_irq_disable(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	u32 val;
+
+	if (likely(irq_mask == DWAXIDMAC_IRQ_ALL)) {
+		axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, DWAXIDMAC_IRQ_NONE);
+	} else {
+		val = axi_chan_ioread32(chan, CH_INTSTATUS_ENA);
+		val &= ~irq_mask;
+		axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, val);
+	}
+}
+
+static inline void axi_chan_irq_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_sig_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTSIGNAL_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_clear(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTCLEAR, irq_mask);
+}
+
+static inline u32 axi_chan_irq_read(struct axi_dma_chan *chan)
+{
+	return axi_chan_ioread32(chan, CH_INTSTATUS);
+}
+
+static inline void axi_chan_disable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val &= ~(BIT(chan->id) << DMAC_CHAN_EN_SHIFT);
+	val |=   BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline void axi_chan_enable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT |
+	       BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline bool axi_chan_is_hw_enable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+
+	return !!(val & (BIT(chan->id) << DMAC_CHAN_EN_SHIFT));
+}
+
+static void axi_dma_hw_init(struct axi_dma_chip *chip)
+{
+	u32 i;
+
+	for (i = 0; i < chip->dw->hdata->nr_channels; i++) {
+		axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+		axi_chan_disable(&chip->dw->chan[i]);
+	}
+}
+
+static u32 axi_chan_get_xfer_width(struct axi_dma_chan *chan, dma_addr_t src,
+				   dma_addr_t dst, size_t len)
+{
+	u32 max_width = chan->chip->dw->hdata->m_data_width;
+
+	return __ffs(src | dst | len | BIT(max_width));
+}
+
+static inline const char *axi_chan_name(struct axi_dma_chan *chan)
+{
+	return dma_chan_name(&chan->vc.chan);
+}
+
+static struct axi_dma_desc *axi_desc_get(struct axi_dma_chan *chan)
+{
+	struct dw_axi_dma *dw = chan->chip->dw;
+	struct axi_dma_desc *desc;
+	dma_addr_t phys;
+
+	desc = dma_pool_zalloc(dw->desc_pool, GFP_NOWAIT, &phys);
+	if (unlikely(!desc)) {
+		dev_err(chan2dev(chan), "%s: not enough descriptors available\n",
+			axi_chan_name(chan));
+		return NULL;
+	}
+
+	atomic_inc(&chan->descs_allocated);
+	INIT_LIST_HEAD(&desc->xfer_list);
+	desc->vd.tx.phys = phys;
+	desc->chan = chan;
+
+	return desc;
+}
+
+static void axi_desc_put(struct axi_dma_desc *desc)
+{
+	struct axi_dma_chan *chan = desc->chan;
+	struct dw_axi_dma *dw = chan->chip->dw;
+	struct axi_dma_desc *child, *_next;
+	unsigned int descs_put = 0;
+
+	list_for_each_entry_safe(child, _next, &desc->xfer_list, xfer_list) {
+		list_del(&child->xfer_list);
+		dma_pool_free(dw->desc_pool, child, child->vd.tx.phys);
+		descs_put++;
+	}
+
+	dma_pool_free(dw->desc_pool, desc, desc->vd.tx.phys);
+	descs_put++;
+
+	atomic_sub(descs_put, &chan->descs_allocated);
+	dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n",
+		axi_chan_name(chan), descs_put,
+		atomic_read(&chan->descs_allocated));
+}
+
+static void vchan_desc_put(struct virt_dma_desc *vdesc)
+{
+	axi_desc_put(vd_to_axi_desc(vdesc));
+}
+
+static enum dma_status
+dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+		  struct dma_tx_state *txstate)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+
+	if (chan->is_paused && ret == DMA_IN_PROGRESS)
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+static void write_desc_llp(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.llp = cpu_to_le64(adr);
+}
+
+static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr)
+{
+	axi_chan_iowrite64(chan, CH_LLP, adr);
+}
+
+/* Called in chan locked context */
+static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
+				      struct axi_dma_desc *first)
+{
+	u32 priority = chan->chip->dw->hdata->priority[chan->id];
+	u32 reg, irq_mask;
+	u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+	if (unlikely(axi_chan_is_hw_enable(chan))) {
+		dev_err(chan2dev(chan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+
+		return;
+	}
+
+	axi_dma_enable(chan->chip);
+
+	reg = (DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_DST_MULTBLK_TYPE_POS |
+	       DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_SRC_MULTBLK_TYPE_POS);
+	axi_chan_iowrite32(chan, CH_CFG_L, reg);
+
+	reg = (DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC << CH_CFG_H_TT_FC_POS |
+	       priority << CH_CFG_H_PRIORITY_POS |
+	       DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_DST_POS |
+	       DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_SRC_POS);
+	axi_chan_iowrite32(chan, CH_CFG_H, reg);
+
+	write_chan_llp(chan, first->vd.tx.phys | lms);
+
+	irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR;
+	axi_chan_irq_sig_set(chan, irq_mask);
+
+	/* Generate 'suspend' status but don't generate interrupt */
+	irq_mask |= DWAXIDMAC_IRQ_SUSPENDED;
+	axi_chan_irq_set(chan, irq_mask);
+
+	axi_chan_enable(chan);
+}
+
+static void axi_chan_start_first_queued(struct axi_dma_chan *chan)
+{
+	struct axi_dma_desc *desc;
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&chan->vc);
+	if (!vd)
+		return;
+
+	desc = vd_to_axi_desc(vd);
+	dev_vdbg(chan2dev(chan), "%s: started %u\n", axi_chan_name(chan),
+		vd->tx.cookie);
+	axi_chan_block_xfer_start(chan, desc);
+}
+
+static void dma_chan_issue_pending(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (vchan_issue_pending(&chan->vc))
+		axi_chan_start_first_queued(chan);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static int dma_chan_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+	/* ASSERT: channel is idle */
+	if (axi_chan_is_hw_enable(chan)) {
+		dev_err(chan2dev(chan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+		return -EBUSY;
+	}
+
+	dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan));
+
+	pm_runtime_get(chan->chip->dev);
+
+	return 0;
+}
+
+static void dma_chan_free_chan_resources(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+	/* ASSERT: channel is idle */
+	if (axi_chan_is_hw_enable(chan))
+		dev_err(dchan2dev(dchan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+
+	axi_chan_disable(chan);
+	axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL);
+
+	vchan_free_chan_resources(&chan->vc);
+
+	dev_vdbg(dchan2dev(dchan),
+		 "%s: free resources, descriptor still allocated: %u\n",
+		 axi_chan_name(chan), atomic_read(&chan->descs_allocated));
+
+	pm_runtime_put(chan->chip->dev);
+}
+
+/*
+ * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI
+ * as 1, it understands that the current block is the final block in the
+ * transfer and completes the DMA transfer operation at the end of current
+ * block transfer.
+ */
+static void set_desc_last(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	val = le32_to_cpu(desc->lli.ctl_hi);
+	val |= CH_CTL_H_LLI_LAST;
+	desc->lli.ctl_hi = cpu_to_le32(val);
+}
+
+static void write_desc_sar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.sar = cpu_to_le64(adr);
+}
+
+static void write_desc_dar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.dar = cpu_to_le64(adr);
+}
+
+static void set_desc_src_master(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	/* Select AXI0 for source master */
+	val = le32_to_cpu(desc->lli.ctl_lo);
+	val &= ~CH_CTL_L_SRC_MAST;
+	desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static void set_desc_dest_master(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	/* Select AXI1 for source master if available */
+	val = le32_to_cpu(desc->lli.ctl_lo);
+	if (desc->chan->chip->dw->hdata->nr_masters > 1)
+		val |= CH_CTL_L_DST_MAST;
+	else
+		val &= ~CH_CTL_L_DST_MAST;
+
+	desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static struct dma_async_tx_descriptor *
+dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
+			 dma_addr_t src_adr, size_t len, unsigned long flags)
+{
+	struct axi_dma_desc *first = NULL, *desc = NULL, *prev = NULL;
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	size_t block_ts, max_block_ts, xfer_len;
+	u32 xfer_width, reg;
+	u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+	dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx",
+		axi_chan_name(chan), &src_adr, &dst_adr, len, flags);
+
+	max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+	while (len) {
+		xfer_len = len;
+
+		/*
+		 * Take care for the alignment.
+		 * Actually source and destination widths can be different, but
+		 * make them same to be simpler.
+		 */
+		xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, xfer_len);
+
+		/*
+		 * block_ts indicates the total number of data of width
+		 * to be transferred in a DMA block transfer.
+		 * BLOCK_TS register should be set to block_ts - 1
+		 */
+		block_ts = xfer_len >> xfer_width;
+		if (block_ts > max_block_ts) {
+			block_ts = max_block_ts;
+			xfer_len = max_block_ts << xfer_width;
+		}
+
+		desc = axi_desc_get(chan);
+		if (unlikely(!desc))
+			goto err_desc_get;
+
+		write_desc_sar(desc, src_adr);
+		write_desc_dar(desc, dst_adr);
+		desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1);
+
+		reg = CH_CTL_H_LLI_VALID;
+		if (chan->chip->dw->hdata->restrict_axi_burst_len) {
+			u32 burst_len = chan->chip->dw->hdata->axi_rw_burst_len;
+
+			reg |= (CH_CTL_H_ARLEN_EN |
+				burst_len << CH_CTL_H_ARLEN_POS |
+				CH_CTL_H_AWLEN_EN |
+				burst_len << CH_CTL_H_AWLEN_POS);
+		}
+		desc->lli.ctl_hi = cpu_to_le32(reg);
+
+		reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
+		       DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
+		       xfer_width << CH_CTL_L_DST_WIDTH_POS |
+		       xfer_width << CH_CTL_L_SRC_WIDTH_POS |
+		       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
+		       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS);
+		desc->lli.ctl_lo = cpu_to_le32(reg);
+
+		set_desc_src_master(desc);
+		set_desc_dest_master(desc);
+
+		/* Manage transfer list (xfer_list) */
+		if (!first) {
+			first = desc;
+		} else {
+			list_add_tail(&desc->xfer_list, &first->xfer_list);
+			write_desc_llp(prev, desc->vd.tx.phys | lms);
+		}
+		prev = desc;
+
+		/* update the length and addresses for the next loop cycle */
+		len -= xfer_len;
+		dst_adr += xfer_len;
+		src_adr += xfer_len;
+	}
+
+	/* Total len of src/dest sg == 0, so no descriptor were allocated */
+	if (unlikely(!first))
+		return NULL;
+
+	/* Set end-of-link to the last link descriptor of list */
+	set_desc_last(desc);
+
+	return vchan_tx_prep(&chan->vc, &first->vd, flags);
+
+err_desc_get:
+	axi_desc_put(first);
+	return NULL;
+}
+
+static void axi_chan_dump_lli(struct axi_dma_chan *chan,
+			      struct axi_dma_desc *desc)
+{
+	dev_err(dchan2dev(&chan->vc.chan),
+		"SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x",
+		le64_to_cpu(desc->lli.sar),
+		le64_to_cpu(desc->lli.dar),
+		le64_to_cpu(desc->lli.llp),
+		le32_to_cpu(desc->lli.block_ts_lo),
+		le32_to_cpu(desc->lli.ctl_hi),
+		le32_to_cpu(desc->lli.ctl_lo));
+}
+
+static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
+				   struct axi_dma_desc *desc_head)
+{
+	struct axi_dma_desc *desc;
+
+	axi_chan_dump_lli(chan, desc_head);
+	list_for_each_entry(desc, &desc_head->xfer_list, xfer_list)
+		axi_chan_dump_lli(chan, desc);
+}
+
+static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
+{
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	axi_chan_disable(chan);
+
+	/* The bad descriptor currently is in the head of vc list */
+	vd = vchan_next_desc(&chan->vc);
+	/* Remove the completed descriptor from issued list */
+	list_del(&vd->node);
+
+	/* WARN about bad descriptor */
+	dev_err(chan2dev(chan),
+		"Bad descriptor submitted for %s, cookie: %d, irq: 0x%08x\n",
+		axi_chan_name(chan), vd->tx.cookie, status);
+	axi_chan_list_dump_lli(chan, vd_to_axi_desc(vd));
+
+	vchan_cookie_complete(vd);
+
+	/* Try to restart the controller */
+	axi_chan_start_first_queued(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
+{
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (unlikely(axi_chan_is_hw_enable(chan))) {
+		dev_err(chan2dev(chan), "BUG: %s caught DWAXIDMAC_IRQ_DMA_TRF, but channel not idle!\n",
+			axi_chan_name(chan));
+		axi_chan_disable(chan);
+	}
+
+	/* The completed descriptor currently is in the head of vc list */
+	vd = vchan_next_desc(&chan->vc);
+	/* Remove the completed descriptor from issued list before completing */
+	list_del(&vd->node);
+	vchan_cookie_complete(vd);
+
+	/* Submit queued descriptors after processing the completed ones */
+	axi_chan_start_first_queued(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id)
+{
+	struct axi_dma_chip *chip = dev_id;
+	struct dw_axi_dma *dw = chip->dw;
+	struct axi_dma_chan *chan;
+
+	u32 status, i;
+
+	/* Disable DMAC inerrupts. We'll enable them after processing chanels */
+	axi_dma_irq_disable(chip);
+
+	/* Poll, clear and process every chanel interrupt status */
+	for (i = 0; i < dw->hdata->nr_channels; i++) {
+		chan = &dw->chan[i];
+		status = axi_chan_irq_read(chan);
+		axi_chan_irq_clear(chan, status);
+
+		dev_vdbg(chip->dev, "%s %u IRQ status: 0x%08x\n",
+			axi_chan_name(chan), i, status);
+
+		if (status & DWAXIDMAC_IRQ_ALL_ERR)
+			axi_chan_handle_err(chan, status);
+		else if (status & DWAXIDMAC_IRQ_DMA_TRF)
+			axi_chan_block_xfer_complete(chan);
+	}
+
+	/* Re-enable interrupts */
+	axi_dma_irq_enable(chip);
+
+	return IRQ_HANDLED;
+}
+
+static int dma_chan_terminate_all(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	axi_chan_disable(chan);
+
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	/*
+	 * As vchan_dma_desc_free_list can access to desc_allocated list
+	 * we need to call it in vc.lock context.
+	 */
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan));
+
+	return 0;
+}
+
+static int dma_chan_pause(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+	unsigned int timeout = 20; /* timeout iterations */
+	u32 val;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val |= BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT |
+	       BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+	do  {
+		if (axi_chan_irq_read(chan) & DWAXIDMAC_IRQ_SUSPENDED)
+			break;
+
+		udelay(2);
+	} while (--timeout);
+
+	axi_chan_irq_clear(chan, DWAXIDMAC_IRQ_SUSPENDED);
+
+	chan->is_paused = true;
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return timeout ? 0 : -EAGAIN;
+}
+
+/* Called in chan locked context */
+static inline void axi_chan_resume(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT);
+	val |=  (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT);
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+	chan->is_paused = false;
+}
+
+static int dma_chan_resume(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	if (chan->is_paused)
+		axi_chan_resume(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return 0;
+}
+
+static int axi_dma_suspend(struct axi_dma_chip *chip)
+{
+	axi_dma_irq_disable(chip);
+	axi_dma_disable(chip);
+
+	clk_disable_unprepare(chip->core_clk);
+	clk_disable_unprepare(chip->cfgr_clk);
+
+	return 0;
+}
+
+static int axi_dma_resume(struct axi_dma_chip *chip)
+{
+	int ret;
+
+	ret = clk_prepare_enable(chip->cfgr_clk);
+	if (ret < 0)
+		return ret;
+
+	ret = clk_prepare_enable(chip->core_clk);
+	if (ret < 0)
+		return ret;
+
+	axi_dma_enable(chip);
+	axi_dma_irq_enable(chip);
+
+	return 0;
+}
+
+static int __maybe_unused axi_dma_runtime_suspend(struct device *dev)
+{
+	struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+	return axi_dma_suspend(chip);
+}
+
+static int __maybe_unused axi_dma_runtime_resume(struct device *dev)
+{
+	struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+	return axi_dma_resume(chip);
+}
+
+static int parse_device_properties(struct axi_dma_chip *chip)
+{
+	struct device *dev = chip->dev;
+	u32 tmp, carr[DMAC_MAX_CHANNELS];
+	int ret;
+
+	ret = device_property_read_u32(dev, "dma-channels", &tmp);
+	if (ret)
+		return ret;
+	if (tmp == 0 || tmp > DMAC_MAX_CHANNELS)
+		return -EINVAL;
+
+	chip->dw->hdata->nr_channels = tmp;
+
+	ret = device_property_read_u32(dev, "snps,dma-masters", &tmp);
+	if (ret)
+		return ret;
+	if (tmp == 0 || tmp > DMAC_MAX_MASTERS)
+		return -EINVAL;
+
+	chip->dw->hdata->nr_masters = tmp;
+
+	ret = device_property_read_u32(dev, "snps,data-width", &tmp);
+	if (ret)
+		return ret;
+	if (tmp > DWAXIDMAC_TRANS_WIDTH_MAX)
+		return -EINVAL;
+
+	chip->dw->hdata->m_data_width = tmp;
+
+	ret = device_property_read_u32_array(dev, "snps,block-size", carr,
+					     chip->dw->hdata->nr_channels);
+	if (ret)
+		return ret;
+	for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+		if (carr[tmp] == 0 || carr[tmp] > DMAC_MAX_BLK_SIZE)
+			return -EINVAL;
+
+		chip->dw->hdata->block_size[tmp] = carr[tmp];
+	}
+
+	ret = device_property_read_u32_array(dev, "snps,priority", carr,
+					     chip->dw->hdata->nr_channels);
+	if (ret)
+		return ret;
+	/* Priority value must be programmed within [0:nr_channels-1] range */
+	for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+		if (carr[tmp] >= chip->dw->hdata->nr_channels)
+			return -EINVAL;
+
+		chip->dw->hdata->priority[tmp] = carr[tmp];
+	}
+
+	/* axi-max-burst-len is optional property */
+	ret = device_property_read_u32(dev, "snps,axi-max-burst-len", &tmp);
+	if (!ret) {
+		if (tmp > DWAXIDMAC_ARWLEN_MAX + 1)
+			return -EINVAL;
+		if (tmp < DWAXIDMAC_ARWLEN_MIN + 1)
+			return -EINVAL;
+
+		chip->dw->hdata->restrict_axi_burst_len = true;
+		chip->dw->hdata->axi_rw_burst_len = tmp - 1;
+	}
+
+	return 0;
+}
+
+static int dw_probe(struct platform_device *pdev)
+{
+	struct axi_dma_chip *chip;
+	struct resource *mem;
+	struct dw_axi_dma *dw;
+	struct dw_axi_dma_hcfg *hdata;
+	u32 i;
+	int ret;
+
+	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	dw = devm_kzalloc(&pdev->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	hdata = devm_kzalloc(&pdev->dev, sizeof(*hdata), GFP_KERNEL);
+	if (!hdata)
+		return -ENOMEM;
+
+	chip->dw = dw;
+	chip->dev = &pdev->dev;
+	chip->dw->hdata = hdata;
+
+	chip->irq = platform_get_irq(pdev, 0);
+	if (chip->irq < 0)
+		return chip->irq;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chip->regs = devm_ioremap_resource(chip->dev, mem);
+	if (IS_ERR(chip->regs))
+		return PTR_ERR(chip->regs);
+
+	chip->core_clk = devm_clk_get(chip->dev, "core-clk");
+	if (IS_ERR(chip->core_clk))
+		return PTR_ERR(chip->core_clk);
+
+	chip->cfgr_clk = devm_clk_get(chip->dev, "cfgr-clk");
+	if (IS_ERR(chip->cfgr_clk))
+		return PTR_ERR(chip->cfgr_clk);
+
+	ret = parse_device_properties(chip);
+	if (ret)
+		return ret;
+
+	dw->chan = devm_kcalloc(chip->dev, hdata->nr_channels,
+				sizeof(*dw->chan), GFP_KERNEL);
+	if (!dw->chan)
+		return -ENOMEM;
+
+	ret = devm_request_irq(chip->dev, chip->irq, dw_axi_dma_interrupt,
+			       IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (ret)
+		return ret;
+
+	/* Lli address must be aligned to a 64-byte boundary */
+	dw->desc_pool = dmam_pool_create(KBUILD_MODNAME, chip->dev,
+					 sizeof(struct axi_dma_desc), 64, 0);
+	if (!dw->desc_pool) {
+		dev_err(chip->dev, "No memory for descriptors dma pool\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&dw->dma.channels);
+	for (i = 0; i < hdata->nr_channels; i++) {
+		struct axi_dma_chan *chan = &dw->chan[i];
+
+		chan->chip = chip;
+		chan->id = i;
+		chan->chan_regs = chip->regs + COMMON_REG_LEN + i * CHAN_REG_LEN;
+		atomic_set(&chan->descs_allocated, 0);
+
+		chan->vc.desc_free = vchan_desc_put;
+		vchan_init(&chan->vc, &dw->dma);
+	}
+
+	/* Set capabilities */
+	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
+	/* DMA capabilities */
+	dw->dma.chancnt = hdata->nr_channels;
+	dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
+	dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
+	dw->dma.directions = BIT(DMA_MEM_TO_MEM);
+	dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	dw->dma.dev = chip->dev;
+	dw->dma.device_tx_status = dma_chan_tx_status;
+	dw->dma.device_issue_pending = dma_chan_issue_pending;
+	dw->dma.device_terminate_all = dma_chan_terminate_all;
+	dw->dma.device_pause = dma_chan_pause;
+	dw->dma.device_resume = dma_chan_resume;
+
+	dw->dma.device_alloc_chan_resources = dma_chan_alloc_chan_resources;
+	dw->dma.device_free_chan_resources = dma_chan_free_chan_resources;
+
+	dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy;
+
+	platform_set_drvdata(pdev, chip);
+
+	pm_runtime_enable(chip->dev);
+
+	/*
+	 * We can't just call pm_runtime_get here instead of
+	 * pm_runtime_get_noresume + axi_dma_resume because we need
+	 * driver to work also without Runtime PM.
+	 */
+	pm_runtime_get_noresume(chip->dev);
+	ret = axi_dma_resume(chip);
+	if (ret < 0)
+		goto err_pm_disable;
+
+	axi_dma_hw_init(chip);
+
+	pm_runtime_put(chip->dev);
+
+	ret = dma_async_device_register(&dw->dma);
+	if (ret)
+		goto err_pm_disable;
+
+	dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n",
+		 dw->hdata->nr_channels);
+
+	return 0;
+
+err_pm_disable:
+	pm_runtime_disable(chip->dev);
+
+	return ret;
+}
+
+static int dw_remove(struct platform_device *pdev)
+{
+	struct axi_dma_chip *chip = platform_get_drvdata(pdev);
+	struct dw_axi_dma *dw = chip->dw;
+	struct axi_dma_chan *chan, *_chan;
+	u32 i;
+
+	/* Enable clk before accessing to registers */
+	clk_prepare_enable(chip->cfgr_clk);
+	clk_prepare_enable(chip->core_clk);
+	axi_dma_irq_disable(chip);
+	for (i = 0; i < dw->hdata->nr_channels; i++) {
+		axi_chan_disable(&chip->dw->chan[i]);
+		axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+	}
+	axi_dma_disable(chip);
+
+	pm_runtime_disable(chip->dev);
+	axi_dma_suspend(chip);
+
+	devm_free_irq(chip->dev, chip->irq, chip);
+
+	list_for_each_entry_safe(chan, _chan, &dw->dma.channels,
+			vc.chan.device_node) {
+		list_del(&chan->vc.chan.device_node);
+		tasklet_kill(&chan->vc.task);
+	}
+
+	dma_async_device_unregister(&dw->dma);
+
+	return 0;
+}
+
+static const struct dev_pm_ops dw_axi_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(axi_dma_runtime_suspend, axi_dma_runtime_resume, NULL)
+};
+
+static const struct of_device_id dw_dma_of_id_table[] = {
+	{ .compatible = "snps,axi-dma-1.01a" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
+
+static struct platform_driver dw_driver = {
+	.probe		= dw_probe,
+	.remove		= dw_remove,
+	.driver = {
+		.name	= KBUILD_MODNAME,
+		.of_match_table = of_match_ptr(dw_dma_of_id_table),
+		.pm = &dw_axi_dma_pm_ops,
+	},
+};
+module_platform_driver(dw_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare AXI DMA Controller platform driver");
+MODULE_AUTHOR("Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>");
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
new file mode 100644
index 000000000000..f8888dc0b8dc
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#ifndef _AXI_DMA_PLATFORM_H
+#define _AXI_DMA_PLATFORM_H
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/types.h>
+
+#include "../virt-dma.h"
+
+#define DMAC_MAX_CHANNELS	8
+#define DMAC_MAX_MASTERS	2
+#define DMAC_MAX_BLK_SIZE	0x200000
+
+struct dw_axi_dma_hcfg {
+	u32	nr_channels;
+	u32	nr_masters;
+	u32	m_data_width;
+	u32	block_size[DMAC_MAX_CHANNELS];
+	u32	priority[DMAC_MAX_CHANNELS];
+	/* maximum supported axi burst length */
+	u32	axi_rw_burst_len;
+	bool	restrict_axi_burst_len;
+};
+
+struct axi_dma_chan {
+	struct axi_dma_chip		*chip;
+	void __iomem			*chan_regs;
+	u8				id;
+	atomic_t			descs_allocated;
+
+	struct virt_dma_chan		vc;
+
+	/* these other elements are all protected by vc.lock */
+	bool				is_paused;
+};
+
+struct dw_axi_dma {
+	struct dma_device	dma;
+	struct dw_axi_dma_hcfg	*hdata;
+	struct dma_pool		*desc_pool;
+
+	/* channels */
+	struct axi_dma_chan	*chan;
+};
+
+struct axi_dma_chip {
+	struct device		*dev;
+	int			irq;
+	void __iomem		*regs;
+	struct clk		*core_clk;
+	struct clk		*cfgr_clk;
+	struct dw_axi_dma	*dw;
+};
+
+/* LLI == Linked List Item */
+struct __packed axi_dma_lli {
+	__le64		sar;
+	__le64		dar;
+	__le32		block_ts_lo;
+	__le32		block_ts_hi;
+	__le64		llp;
+	__le32		ctl_lo;
+	__le32		ctl_hi;
+	__le32		sstat;
+	__le32		dstat;
+	__le32		status_lo;
+	__le32		ststus_hi;
+	__le32		reserved_lo;
+	__le32		reserved_hi;
+};
+
+struct axi_dma_desc {
+	struct axi_dma_lli		lli;
+
+	struct virt_dma_desc		vd;
+	struct axi_dma_chan		*chan;
+	struct list_head		xfer_list;
+};
+
+static inline struct device *dchan2dev(struct dma_chan *dchan)
+{
+	return &dchan->dev->device;
+}
+
+static inline struct device *chan2dev(struct axi_dma_chan *chan)
+{
+	return &chan->vc.chan.dev->device;
+}
+
+static inline struct axi_dma_desc *vd_to_axi_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct axi_dma_desc, vd);
+}
+
+static inline struct axi_dma_chan *vc_to_axi_dma_chan(struct virt_dma_chan *vc)
+{
+	return container_of(vc, struct axi_dma_chan, vc);
+}
+
+static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
+{
+	return vc_to_axi_dma_chan(to_virt_chan(dchan));
+}
+
+
+#define COMMON_REG_LEN		0x100
+#define CHAN_REG_LEN		0x100
+
+/* Common registers offset */
+#define DMAC_ID			0x000 /* R DMAC ID */
+#define DMAC_COMPVER		0x008 /* R DMAC Component Version */
+#define DMAC_CFG		0x010 /* R/W DMAC Configuration */
+#define DMAC_CHEN		0x018 /* R/W DMAC Channel Enable */
+#define DMAC_CHEN_L		0x018 /* R/W DMAC Channel Enable 00-31 */
+#define DMAC_CHEN_H		0x01C /* R/W DMAC Channel Enable 32-63 */
+#define DMAC_INTSTATUS		0x030 /* R DMAC Interrupt Status */
+#define DMAC_COMMON_INTCLEAR	0x038 /* W DMAC Interrupt Clear */
+#define DMAC_COMMON_INTSTATUS_ENA 0x040 /* R DMAC Interrupt Status Enable */
+#define DMAC_COMMON_INTSIGNAL_ENA 0x048 /* R/W DMAC Interrupt Signal Enable */
+#define DMAC_COMMON_INTSTATUS	0x050 /* R DMAC Interrupt Status */
+#define DMAC_RESET		0x058 /* R DMAC Reset Register1 */
+
+/* DMA channel registers offset */
+#define CH_SAR			0x000 /* R/W Chan Source Address */
+#define CH_DAR			0x008 /* R/W Chan Destination Address */
+#define CH_BLOCK_TS		0x010 /* R/W Chan Block Transfer Size */
+#define CH_CTL			0x018 /* R/W Chan Control */
+#define CH_CTL_L		0x018 /* R/W Chan Control 00-31 */
+#define CH_CTL_H		0x01C /* R/W Chan Control 32-63 */
+#define CH_CFG			0x020 /* R/W Chan Configuration */
+#define CH_CFG_L		0x020 /* R/W Chan Configuration 00-31 */
+#define CH_CFG_H		0x024 /* R/W Chan Configuration 32-63 */
+#define CH_LLP			0x028 /* R/W Chan Linked List Pointer */
+#define CH_STATUS		0x030 /* R Chan Status */
+#define CH_SWHSSRC		0x038 /* R/W Chan SW Handshake Source */
+#define CH_SWHSDST		0x040 /* R/W Chan SW Handshake Destination */
+#define CH_BLK_TFR_RESUMEREQ	0x048 /* W Chan Block Transfer Resume Req */
+#define CH_AXI_ID		0x050 /* R/W Chan AXI ID */
+#define CH_AXI_QOS		0x058 /* R/W Chan AXI QOS */
+#define CH_SSTAT		0x060 /* R Chan Source Status */
+#define CH_DSTAT		0x068 /* R Chan Destination Status */
+#define CH_SSTATAR		0x070 /* R/W Chan Source Status Fetch Addr */
+#define CH_DSTATAR		0x078 /* R/W Chan Destination Status Fetch Addr */
+#define CH_INTSTATUS_ENA	0x080 /* R/W Chan Interrupt Status Enable */
+#define CH_INTSTATUS		0x088 /* R/W Chan Interrupt Status */
+#define CH_INTSIGNAL_ENA	0x090 /* R/W Chan Interrupt Signal Enable */
+#define CH_INTCLEAR		0x098 /* W Chan Interrupt Clear */
+
+
+/* DMAC_CFG */
+#define DMAC_EN_POS			0
+#define DMAC_EN_MASK			BIT(DMAC_EN_POS)
+
+#define INT_EN_POS			1
+#define INT_EN_MASK			BIT(INT_EN_POS)
+
+#define DMAC_CHAN_EN_SHIFT		0
+#define DMAC_CHAN_EN_WE_SHIFT		8
+
+#define DMAC_CHAN_SUSP_SHIFT		16
+#define DMAC_CHAN_SUSP_WE_SHIFT		24
+
+/* CH_CTL_H */
+#define CH_CTL_H_ARLEN_EN		BIT(6)
+#define CH_CTL_H_ARLEN_POS		7
+#define CH_CTL_H_AWLEN_EN		BIT(15)
+#define CH_CTL_H_AWLEN_POS		16
+
+enum {
+	DWAXIDMAC_ARWLEN_1		= 0,
+	DWAXIDMAC_ARWLEN_2		= 1,
+	DWAXIDMAC_ARWLEN_4		= 3,
+	DWAXIDMAC_ARWLEN_8		= 7,
+	DWAXIDMAC_ARWLEN_16		= 15,
+	DWAXIDMAC_ARWLEN_32		= 31,
+	DWAXIDMAC_ARWLEN_64		= 63,
+	DWAXIDMAC_ARWLEN_128		= 127,
+	DWAXIDMAC_ARWLEN_256		= 255,
+	DWAXIDMAC_ARWLEN_MIN		= DWAXIDMAC_ARWLEN_1,
+	DWAXIDMAC_ARWLEN_MAX		= DWAXIDMAC_ARWLEN_256
+};
+
+#define CH_CTL_H_LLI_LAST		BIT(30)
+#define CH_CTL_H_LLI_VALID		BIT(31)
+
+/* CH_CTL_L */
+#define CH_CTL_L_LAST_WRITE_EN		BIT(30)
+
+#define CH_CTL_L_DST_MSIZE_POS		18
+#define CH_CTL_L_SRC_MSIZE_POS		14
+
+enum {
+	DWAXIDMAC_BURST_TRANS_LEN_1	= 0,
+	DWAXIDMAC_BURST_TRANS_LEN_4,
+	DWAXIDMAC_BURST_TRANS_LEN_8,
+	DWAXIDMAC_BURST_TRANS_LEN_16,
+	DWAXIDMAC_BURST_TRANS_LEN_32,
+	DWAXIDMAC_BURST_TRANS_LEN_64,
+	DWAXIDMAC_BURST_TRANS_LEN_128,
+	DWAXIDMAC_BURST_TRANS_LEN_256,
+	DWAXIDMAC_BURST_TRANS_LEN_512,
+	DWAXIDMAC_BURST_TRANS_LEN_1024
+};
+
+#define CH_CTL_L_DST_WIDTH_POS		11
+#define CH_CTL_L_SRC_WIDTH_POS		8
+
+#define CH_CTL_L_DST_INC_POS		6
+#define CH_CTL_L_SRC_INC_POS		4
+enum {
+	DWAXIDMAC_CH_CTL_L_INC		= 0,
+	DWAXIDMAC_CH_CTL_L_NOINC
+};
+
+#define CH_CTL_L_DST_MAST		BIT(2)
+#define CH_CTL_L_SRC_MAST		BIT(0)
+
+/* CH_CFG_H */
+#define CH_CFG_H_PRIORITY_POS		17
+#define CH_CFG_H_HS_SEL_DST_POS		4
+#define CH_CFG_H_HS_SEL_SRC_POS		3
+enum {
+	DWAXIDMAC_HS_SEL_HW		= 0,
+	DWAXIDMAC_HS_SEL_SW
+};
+
+#define CH_CFG_H_TT_FC_POS		0
+enum {
+	DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC	= 0,
+	DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_PER_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_MEM_SRC,
+	DWAXIDMAC_TT_FC_PER_TO_PER_SRC,
+	DWAXIDMAC_TT_FC_MEM_TO_PER_DST,
+	DWAXIDMAC_TT_FC_PER_TO_PER_DST
+};
+
+/* CH_CFG_L */
+#define CH_CFG_L_DST_MULTBLK_TYPE_POS	2
+#define CH_CFG_L_SRC_MULTBLK_TYPE_POS	0
+enum {
+	DWAXIDMAC_MBLK_TYPE_CONTIGUOUS	= 0,
+	DWAXIDMAC_MBLK_TYPE_RELOAD,
+	DWAXIDMAC_MBLK_TYPE_SHADOW_REG,
+	DWAXIDMAC_MBLK_TYPE_LL
+};
+
+/**
+ * DW AXI DMA channel interrupts
+ *
+ * @DWAXIDMAC_IRQ_NONE: Bitmask of no one interrupt
+ * @DWAXIDMAC_IRQ_BLOCK_TRF: Block transfer complete
+ * @DWAXIDMAC_IRQ_DMA_TRF: Dma transfer complete
+ * @DWAXIDMAC_IRQ_SRC_TRAN: Source transaction complete
+ * @DWAXIDMAC_IRQ_DST_TRAN: Destination transaction complete
+ * @DWAXIDMAC_IRQ_SRC_DEC_ERR: Source decode error
+ * @DWAXIDMAC_IRQ_DST_DEC_ERR: Destination decode error
+ * @DWAXIDMAC_IRQ_SRC_SLV_ERR: Source slave error
+ * @DWAXIDMAC_IRQ_DST_SLV_ERR: Destination slave error
+ * @DWAXIDMAC_IRQ_LLI_RD_DEC_ERR: LLI read decode error
+ * @DWAXIDMAC_IRQ_LLI_WR_DEC_ERR: LLI write decode error
+ * @DWAXIDMAC_IRQ_LLI_RD_SLV_ERR: LLI read slave error
+ * @DWAXIDMAC_IRQ_LLI_WR_SLV_ERR: LLI write slave error
+ * @DWAXIDMAC_IRQ_INVALID_ERR: LLI invalid error or Shadow register error
+ * @DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR: Slave Interface Multiblock type error
+ * @DWAXIDMAC_IRQ_DEC_ERR: Slave Interface decode error
+ * @DWAXIDMAC_IRQ_WR2RO_ERR: Slave Interface write to read only error
+ * @DWAXIDMAC_IRQ_RD2RWO_ERR: Slave Interface read to write only error
+ * @DWAXIDMAC_IRQ_WRONCHEN_ERR: Slave Interface write to channel error
+ * @DWAXIDMAC_IRQ_SHADOWREG_ERR: Slave Interface shadow reg error
+ * @DWAXIDMAC_IRQ_WRONHOLD_ERR: Slave Interface hold error
+ * @DWAXIDMAC_IRQ_LOCK_CLEARED: Lock Cleared Status
+ * @DWAXIDMAC_IRQ_SRC_SUSPENDED: Source Suspended Status
+ * @DWAXIDMAC_IRQ_SUSPENDED: Channel Suspended Status
+ * @DWAXIDMAC_IRQ_DISABLED: Channel Disabled Status
+ * @DWAXIDMAC_IRQ_ABORTED: Channel Aborted Status
+ * @DWAXIDMAC_IRQ_ALL_ERR: Bitmask of all error interrupts
+ * @DWAXIDMAC_IRQ_ALL: Bitmask of all interrupts
+ */
+enum {
+	DWAXIDMAC_IRQ_NONE		= 0,
+	DWAXIDMAC_IRQ_BLOCK_TRF		= BIT(0),
+	DWAXIDMAC_IRQ_DMA_TRF		= BIT(1),
+	DWAXIDMAC_IRQ_SRC_TRAN		= BIT(3),
+	DWAXIDMAC_IRQ_DST_TRAN		= BIT(4),
+	DWAXIDMAC_IRQ_SRC_DEC_ERR	= BIT(5),
+	DWAXIDMAC_IRQ_DST_DEC_ERR	= BIT(6),
+	DWAXIDMAC_IRQ_SRC_SLV_ERR	= BIT(7),
+	DWAXIDMAC_IRQ_DST_SLV_ERR	= BIT(8),
+	DWAXIDMAC_IRQ_LLI_RD_DEC_ERR	= BIT(9),
+	DWAXIDMAC_IRQ_LLI_WR_DEC_ERR	= BIT(10),
+	DWAXIDMAC_IRQ_LLI_RD_SLV_ERR	= BIT(11),
+	DWAXIDMAC_IRQ_LLI_WR_SLV_ERR	= BIT(12),
+	DWAXIDMAC_IRQ_INVALID_ERR	= BIT(13),
+	DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR	= BIT(14),
+	DWAXIDMAC_IRQ_DEC_ERR		= BIT(16),
+	DWAXIDMAC_IRQ_WR2RO_ERR		= BIT(17),
+	DWAXIDMAC_IRQ_RD2RWO_ERR	= BIT(18),
+	DWAXIDMAC_IRQ_WRONCHEN_ERR	= BIT(19),
+	DWAXIDMAC_IRQ_SHADOWREG_ERR	= BIT(20),
+	DWAXIDMAC_IRQ_WRONHOLD_ERR	= BIT(21),
+	DWAXIDMAC_IRQ_LOCK_CLEARED	= BIT(27),
+	DWAXIDMAC_IRQ_SRC_SUSPENDED	= BIT(28),
+	DWAXIDMAC_IRQ_SUSPENDED		= BIT(29),
+	DWAXIDMAC_IRQ_DISABLED		= BIT(30),
+	DWAXIDMAC_IRQ_ABORTED		= BIT(31),
+	DWAXIDMAC_IRQ_ALL_ERR		= (GENMASK(21, 16) | GENMASK(14, 5)),
+	DWAXIDMAC_IRQ_ALL		= GENMASK(31, 0)
+};
+
+enum {
+	DWAXIDMAC_TRANS_WIDTH_8		= 0,
+	DWAXIDMAC_TRANS_WIDTH_16,
+	DWAXIDMAC_TRANS_WIDTH_32,
+	DWAXIDMAC_TRANS_WIDTH_64,
+	DWAXIDMAC_TRANS_WIDTH_128,
+	DWAXIDMAC_TRANS_WIDTH_256,
+	DWAXIDMAC_TRANS_WIDTH_512,
+	DWAXIDMAC_TRANS_WIDTH_MAX	= DWAXIDMAC_TRANS_WIDTH_512
+};
+
+#endif /* _AXI_DMA_PLATFORM_H */
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 948df1ab5f1a..85ea92fcea54 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -1876,6 +1876,11 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 
 	if (memcpy_channels) {
 		m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+		if (!m_ddev) {
+			dev_warn(ecc->dev, "memcpy is disabled due to OoM\n");
+			memcpy_channels = NULL;
+			goto ch_setup;
+		}
 		ecc->dma_memcpy = m_ddev;
 
 		dma_cap_zero(m_ddev->cap_mask);
@@ -1903,6 +1908,7 @@ static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 		dev_info(ecc->dev, "memcpy is disabled\n");
 	}
 
+ch_setup:
 	for (i = 0; i < ecc->num_channels; i++) {
 		struct edma_chan *echan = &ecc->slave_chans[i];
 		echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index e7db24c67030..ccd03c3cedfe 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -338,6 +338,7 @@ struct sdma_channel {
 	unsigned int			chn_real_count;
 	struct tasklet_struct		tasklet;
 	struct imx_dma_data		data;
+	bool				enabled;
 };
 
 #define IMX_DMA_SG_LOOP		BIT(0)
@@ -596,7 +597,14 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,
 
 static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
 {
+	unsigned long flags;
+	struct sdma_channel *sdmac = &sdma->channel[channel];
+
 	writel(BIT(channel), sdma->regs + SDMA_H_START);
+
+	spin_lock_irqsave(&sdmac->lock, flags);
+	sdmac->enabled = true;
+	spin_unlock_irqrestore(&sdmac->lock, flags);
 }
 
 /*
@@ -685,6 +693,14 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 	struct sdma_buffer_descriptor *bd;
 	int error = 0;
 	enum dma_status	old_status = sdmac->status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdmac->lock, flags);
+	if (!sdmac->enabled) {
+		spin_unlock_irqrestore(&sdmac->lock, flags);
+		return;
+	}
+	spin_unlock_irqrestore(&sdmac->lock, flags);
 
 	/*
 	 * loop mode. Iterate over descriptors, re-setup them and
@@ -938,10 +954,15 @@ static int sdma_disable_channel(struct dma_chan *chan)
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
+	unsigned long flags;
 
 	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
 	sdmac->status = DMA_ERROR;
 
+	spin_lock_irqsave(&sdmac->lock, flags);
+	sdmac->enabled = false;
+	spin_unlock_irqrestore(&sdmac->lock, flags);
+
 	return 0;
 }
 
diff --git a/drivers/dma/mediatek/Kconfig b/drivers/dma/mediatek/Kconfig
new file mode 100644
index 000000000000..27bac0bba09e
--- /dev/null
+++ b/drivers/dma/mediatek/Kconfig
@@ -0,0 +1,13 @@
+
+config MTK_HSDMA
+	tristate "MediaTek High-Speed DMA controller support"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	---help---
+	  Enable support for High-Speed DMA controller on MediaTek
+	  SoCs.
+
+	  This controller provides the channels which is dedicated to
+	  memory-to-memory transfer to offload from CPU through ring-
+	  based descriptor management.
diff --git a/drivers/dma/mediatek/Makefile b/drivers/dma/mediatek/Makefile
new file mode 100644
index 000000000000..6e778f842f01
--- /dev/null
+++ b/drivers/dma/mediatek/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o
diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
new file mode 100644
index 000000000000..b7ec56ae02a6
--- /dev/null
+++ b/drivers/dma/mediatek/mtk-hsdma.c
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 MediaTek Inc.
+
+/*
+ * Driver for MediaTek High-Speed DMA Controller
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+
+#include "../virt-dma.h"
+
+#define MTK_HSDMA_USEC_POLL		20
+#define MTK_HSDMA_TIMEOUT_POLL		200000
+#define MTK_HSDMA_DMA_BUSWIDTHS		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+/* The default number of virtual channel */
+#define MTK_HSDMA_NR_VCHANS		3
+
+/* Only one physical channel supported */
+#define MTK_HSDMA_NR_MAX_PCHANS		1
+
+/* Macro for physical descriptor (PD) manipulation */
+/* The number of PD which must be 2 of power */
+#define MTK_DMA_SIZE			64
+#define MTK_HSDMA_NEXT_DESP_IDX(x, y)	(((x) + 1) & ((y) - 1))
+#define MTK_HSDMA_LAST_DESP_IDX(x, y)	(((x) - 1) & ((y) - 1))
+#define MTK_HSDMA_MAX_LEN		0x3f80
+#define MTK_HSDMA_ALIGN_SIZE		4
+#define MTK_HSDMA_PLEN_MASK		0x3fff
+#define MTK_HSDMA_DESC_PLEN(x)		(((x) & MTK_HSDMA_PLEN_MASK) << 16)
+#define MTK_HSDMA_DESC_PLEN_GET(x)	(((x) >> 16) & MTK_HSDMA_PLEN_MASK)
+
+/* Registers for underlying ring manipulation */
+#define MTK_HSDMA_TX_BASE		0x0
+#define MTK_HSDMA_TX_CNT		0x4
+#define MTK_HSDMA_TX_CPU		0x8
+#define MTK_HSDMA_TX_DMA		0xc
+#define MTK_HSDMA_RX_BASE		0x100
+#define MTK_HSDMA_RX_CNT		0x104
+#define MTK_HSDMA_RX_CPU		0x108
+#define MTK_HSDMA_RX_DMA		0x10c
+
+/* Registers for global setup */
+#define MTK_HSDMA_GLO			0x204
+#define MTK_HSDMA_GLO_MULTI_DMA		BIT(10)
+#define MTK_HSDMA_TX_WB_DDONE		BIT(6)
+#define MTK_HSDMA_BURST_64BYTES		(0x2 << 4)
+#define MTK_HSDMA_GLO_RX_BUSY		BIT(3)
+#define MTK_HSDMA_GLO_RX_DMA		BIT(2)
+#define MTK_HSDMA_GLO_TX_BUSY		BIT(1)
+#define MTK_HSDMA_GLO_TX_DMA		BIT(0)
+#define MTK_HSDMA_GLO_DMA		(MTK_HSDMA_GLO_TX_DMA |	\
+					 MTK_HSDMA_GLO_RX_DMA)
+#define MTK_HSDMA_GLO_BUSY		(MTK_HSDMA_GLO_RX_BUSY | \
+					 MTK_HSDMA_GLO_TX_BUSY)
+#define MTK_HSDMA_GLO_DEFAULT		(MTK_HSDMA_GLO_TX_DMA | \
+					 MTK_HSDMA_GLO_RX_DMA | \
+					 MTK_HSDMA_TX_WB_DDONE | \
+					 MTK_HSDMA_BURST_64BYTES | \
+					 MTK_HSDMA_GLO_MULTI_DMA)
+
+/* Registers for reset */
+#define MTK_HSDMA_RESET			0x208
+#define MTK_HSDMA_RST_TX		BIT(0)
+#define MTK_HSDMA_RST_RX		BIT(16)
+
+/* Registers for interrupt control */
+#define MTK_HSDMA_DLYINT		0x20c
+#define MTK_HSDMA_RXDLY_INT_EN		BIT(15)
+
+/* Interrupt fires when the pending number's more than the specified */
+#define MTK_HSDMA_RXMAX_PINT(x)		(((x) & 0x7f) << 8)
+
+/* Interrupt fires when the pending time's more than the specified in 20 us */
+#define MTK_HSDMA_RXMAX_PTIME(x)	((x) & 0x7f)
+#define MTK_HSDMA_DLYINT_DEFAULT	(MTK_HSDMA_RXDLY_INT_EN | \
+					 MTK_HSDMA_RXMAX_PINT(20) | \
+					 MTK_HSDMA_RXMAX_PTIME(20))
+#define MTK_HSDMA_INT_STATUS		0x220
+#define MTK_HSDMA_INT_ENABLE		0x228
+#define MTK_HSDMA_INT_RXDONE		BIT(16)
+
+enum mtk_hsdma_vdesc_flag {
+	MTK_HSDMA_VDESC_FINISHED	= 0x01,
+};
+
+#define IS_MTK_HSDMA_VDESC_FINISHED(x) ((x) == MTK_HSDMA_VDESC_FINISHED)
+
+/**
+ * struct mtk_hsdma_pdesc - This is the struct holding info describing physical
+ *			    descriptor (PD) and its placement must be kept at
+ *			    4-bytes alignment in little endian order.
+ * @desc[1-4]:		    The control pad used to indicate hardware how to
+ *			    deal with the descriptor such as source and
+ *			    destination address and data length. The maximum
+ *			    data length each pdesc can handle is 0x3f80 bytes
+ */
+struct mtk_hsdma_pdesc {
+	__le32 desc1;
+	__le32 desc2;
+	__le32 desc3;
+	__le32 desc4;
+} __packed __aligned(4);
+
+/**
+ * struct mtk_hsdma_vdesc - This is the struct holding info describing virtual
+ *			    descriptor (VD)
+ * @vd:			    An instance for struct virt_dma_desc
+ * @len:		    The total data size device wants to move
+ * @residue:		    The remaining data size device will move
+ * @dest:		    The destination address device wants to move to
+ * @src:		    The source address device wants to move from
+ */
+struct mtk_hsdma_vdesc {
+	struct virt_dma_desc vd;
+	size_t len;
+	size_t residue;
+	dma_addr_t dest;
+	dma_addr_t src;
+};
+
+/**
+ * struct mtk_hsdma_cb - This is the struct holding extra info required for RX
+ *			 ring to know what relevant VD the the PD is being
+ *			 mapped to.
+ * @vd:			 Pointer to the relevant VD.
+ * @flag:		 Flag indicating what action should be taken when VD
+ *			 is completed.
+ */
+struct mtk_hsdma_cb {
+	struct virt_dma_desc *vd;
+	enum mtk_hsdma_vdesc_flag flag;
+};
+
+/**
+ * struct mtk_hsdma_ring - This struct holds info describing underlying ring
+ *			   space
+ * @txd:		   The descriptor TX ring which describes DMA source
+ *			   information
+ * @rxd:		   The descriptor RX ring which describes DMA
+ *			   destination information
+ * @cb:			   The extra information pointed at by RX ring
+ * @tphys:		   The physical addr of TX ring
+ * @rphys:		   The physical addr of RX ring
+ * @cur_tptr:		   Pointer to the next free descriptor used by the host
+ * @cur_rptr:		   Pointer to the last done descriptor by the device
+ */
+struct mtk_hsdma_ring {
+	struct mtk_hsdma_pdesc *txd;
+	struct mtk_hsdma_pdesc *rxd;
+	struct mtk_hsdma_cb *cb;
+	dma_addr_t tphys;
+	dma_addr_t rphys;
+	u16 cur_tptr;
+	u16 cur_rptr;
+};
+
+/**
+ * struct mtk_hsdma_pchan - This is the struct holding info describing physical
+ *			   channel (PC)
+ * @ring:		   An instance for the underlying ring
+ * @sz_ring:		   Total size allocated for the ring
+ * @nr_free:		   Total number of free rooms in the ring. It would
+ *			   be accessed and updated frequently between IRQ
+ *			   context and user context to reflect whether ring
+ *			   can accept requests from VD.
+ */
+struct mtk_hsdma_pchan {
+	struct mtk_hsdma_ring ring;
+	size_t sz_ring;
+	atomic_t nr_free;
+};
+
+/**
+ * struct mtk_hsdma_vchan - This is the struct holding info describing virtual
+ *			   channel (VC)
+ * @vc:			   An instance for struct virt_dma_chan
+ * @issue_completion:	   The wait for all issued descriptors completited
+ * @issue_synchronize:	   Bool indicating channel synchronization starts
+ * @desc_hw_processing:	   List those descriptors the hardware is processing,
+ *			   which is protected by vc.lock
+ */
+struct mtk_hsdma_vchan {
+	struct virt_dma_chan vc;
+	struct completion issue_completion;
+	bool issue_synchronize;
+	struct list_head desc_hw_processing;
+};
+
+/**
+ * struct mtk_hsdma_soc - This is the struct holding differences among SoCs
+ * @ddone:		  Bit mask for DDONE
+ * @ls0:		  Bit mask for LS0
+ */
+struct mtk_hsdma_soc {
+	__le32 ddone;
+	__le32 ls0;
+};
+
+/**
+ * struct mtk_hsdma_device - This is the struct holding info describing HSDMA
+ *			     device
+ * @ddev:		     An instance for struct dma_device
+ * @base:		     The mapped register I/O base
+ * @clk:		     The clock that device internal is using
+ * @irq:		     The IRQ that device are using
+ * @dma_requests:	     The number of VCs the device supports to
+ * @vc:			     The pointer to all available VCs
+ * @pc:			     The pointer to the underlying PC
+ * @pc_refcnt:		     Track how many VCs are using the PC
+ * @lock:		     Lock protect agaisting multiple VCs access PC
+ * @soc:		     The pointer to area holding differences among
+ *			     vaious platform
+ */
+struct mtk_hsdma_device {
+	struct dma_device ddev;
+	void __iomem *base;
+	struct clk *clk;
+	u32 irq;
+
+	u32 dma_requests;
+	struct mtk_hsdma_vchan *vc;
+	struct mtk_hsdma_pchan *pc;
+	refcount_t pc_refcnt;
+
+	/* Lock used to protect against multiple VCs access PC */
+	spinlock_t lock;
+
+	const struct mtk_hsdma_soc *soc;
+};
+
+static struct mtk_hsdma_device *to_hsdma_dev(struct dma_chan *chan)
+{
+	return container_of(chan->device, struct mtk_hsdma_device, ddev);
+}
+
+static inline struct mtk_hsdma_vchan *to_hsdma_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mtk_hsdma_vchan, vc.chan);
+}
+
+static struct mtk_hsdma_vdesc *to_hsdma_vdesc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct mtk_hsdma_vdesc, vd);
+}
+
+static struct device *hsdma2dev(struct mtk_hsdma_device *hsdma)
+{
+	return hsdma->ddev.dev;
+}
+
+static u32 mtk_dma_read(struct mtk_hsdma_device *hsdma, u32 reg)
+{
+	return readl(hsdma->base + reg);
+}
+
+static void mtk_dma_write(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	writel(val, hsdma->base + reg);
+}
+
+static void mtk_dma_rmw(struct mtk_hsdma_device *hsdma, u32 reg,
+			u32 mask, u32 set)
+{
+	u32 val;
+
+	val = mtk_dma_read(hsdma, reg);
+	val &= ~mask;
+	val |= set;
+	mtk_dma_write(hsdma, reg, val);
+}
+
+static void mtk_dma_set(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	mtk_dma_rmw(hsdma, reg, 0, val);
+}
+
+static void mtk_dma_clr(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	mtk_dma_rmw(hsdma, reg, val, 0);
+}
+
+static void mtk_hsdma_vdesc_free(struct virt_dma_desc *vd)
+{
+	kfree(container_of(vd, struct mtk_hsdma_vdesc, vd));
+}
+
+static int mtk_hsdma_busy_wait(struct mtk_hsdma_device *hsdma)
+{
+	u32 status = 0;
+
+	return readl_poll_timeout(hsdma->base + MTK_HSDMA_GLO, status,
+				  !(status & MTK_HSDMA_GLO_BUSY),
+				  MTK_HSDMA_USEC_POLL,
+				  MTK_HSDMA_TIMEOUT_POLL);
+}
+
+static int mtk_hsdma_alloc_pchan(struct mtk_hsdma_device *hsdma,
+				 struct mtk_hsdma_pchan *pc)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+	int err;
+
+	memset(pc, 0, sizeof(*pc));
+
+	/*
+	 * Allocate ring space where [0 ... MTK_DMA_SIZE - 1] is for TX ring
+	 * and [MTK_DMA_SIZE ... 2 * MTK_DMA_SIZE - 1] is for RX ring.
+	 */
+	pc->sz_ring = 2 * MTK_DMA_SIZE * sizeof(*ring->txd);
+	ring->txd = dma_zalloc_coherent(hsdma2dev(hsdma), pc->sz_ring,
+					&ring->tphys, GFP_NOWAIT);
+	if (!ring->txd)
+		return -ENOMEM;
+
+	ring->rxd = &ring->txd[MTK_DMA_SIZE];
+	ring->rphys = ring->tphys + MTK_DMA_SIZE * sizeof(*ring->txd);
+	ring->cur_tptr = 0;
+	ring->cur_rptr = MTK_DMA_SIZE - 1;
+
+	ring->cb = kcalloc(MTK_DMA_SIZE, sizeof(*ring->cb), GFP_NOWAIT);
+	if (!ring->cb) {
+		err = -ENOMEM;
+		goto err_free_dma;
+	}
+
+	atomic_set(&pc->nr_free, MTK_DMA_SIZE - 1);
+
+	/* Disable HSDMA and wait for the completion */
+	mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+	err = mtk_hsdma_busy_wait(hsdma);
+	if (err)
+		goto err_free_cb;
+
+	/* Reset */
+	mtk_dma_set(hsdma, MTK_HSDMA_RESET,
+		    MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+	mtk_dma_clr(hsdma, MTK_HSDMA_RESET,
+		    MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+
+	/* Setup HSDMA initial pointer in the ring */
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, ring->tphys);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, MTK_DMA_SIZE);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_DMA, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, ring->rphys);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, MTK_DMA_SIZE);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, ring->cur_rptr);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_DMA, 0);
+
+	/* Enable HSDMA */
+	mtk_dma_set(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+
+	/* Setup delayed interrupt */
+	mtk_dma_write(hsdma, MTK_HSDMA_DLYINT, MTK_HSDMA_DLYINT_DEFAULT);
+
+	/* Enable interrupt */
+	mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+	return 0;
+
+err_free_cb:
+	kfree(ring->cb);
+
+err_free_dma:
+	dma_free_coherent(hsdma2dev(hsdma),
+			  pc->sz_ring, ring->txd, ring->tphys);
+	return err;
+}
+
+static void mtk_hsdma_free_pchan(struct mtk_hsdma_device *hsdma,
+				 struct mtk_hsdma_pchan *pc)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+
+	/* Disable HSDMA and then wait for the completion */
+	mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+	mtk_hsdma_busy_wait(hsdma);
+
+	/* Reset pointer in the ring */
+	mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, MTK_DMA_SIZE - 1);
+
+	kfree(ring->cb);
+
+	dma_free_coherent(hsdma2dev(hsdma),
+			  pc->sz_ring, ring->txd, ring->tphys);
+}
+
+static int mtk_hsdma_issue_pending_vdesc(struct mtk_hsdma_device *hsdma,
+					 struct mtk_hsdma_pchan *pc,
+					 struct mtk_hsdma_vdesc *hvd)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+	struct mtk_hsdma_pdesc *txd, *rxd;
+	u16 reserved, prev, tlen, num_sgs;
+	unsigned long flags;
+
+	/* Protect against PC is accessed by multiple VCs simultaneously */
+	spin_lock_irqsave(&hsdma->lock, flags);
+
+	/*
+	 * Reserve rooms, where pc->nr_free is used to track how many free
+	 * rooms in the ring being updated in user and IRQ context.
+	 */
+	num_sgs = DIV_ROUND_UP(hvd->len, MTK_HSDMA_MAX_LEN);
+	reserved = min_t(u16, num_sgs, atomic_read(&pc->nr_free));
+
+	if (!reserved) {
+		spin_unlock_irqrestore(&hsdma->lock, flags);
+		return -ENOSPC;
+	}
+
+	atomic_sub(reserved, &pc->nr_free);
+
+	while (reserved--) {
+		/* Limit size by PD capability for valid data moving */
+		tlen = (hvd->len > MTK_HSDMA_MAX_LEN) ?
+		       MTK_HSDMA_MAX_LEN : hvd->len;
+
+		/*
+		 * Setup PDs using the remaining VD info mapped on those
+		 * reserved rooms. And since RXD is shared memory between the
+		 * host and the device allocated by dma_alloc_coherent call,
+		 * the helper macro WRITE_ONCE can ensure the data written to
+		 * RAM would really happens.
+		 */
+		txd = &ring->txd[ring->cur_tptr];
+		WRITE_ONCE(txd->desc1, hvd->src);
+		WRITE_ONCE(txd->desc2,
+			   hsdma->soc->ls0 | MTK_HSDMA_DESC_PLEN(tlen));
+
+		rxd = &ring->rxd[ring->cur_tptr];
+		WRITE_ONCE(rxd->desc1, hvd->dest);
+		WRITE_ONCE(rxd->desc2, MTK_HSDMA_DESC_PLEN(tlen));
+
+		/* Associate VD, the PD belonged to */
+		ring->cb[ring->cur_tptr].vd = &hvd->vd;
+
+		/* Move forward the pointer of TX ring */
+		ring->cur_tptr = MTK_HSDMA_NEXT_DESP_IDX(ring->cur_tptr,
+							 MTK_DMA_SIZE);
+
+		/* Update VD with remaining data */
+		hvd->src  += tlen;
+		hvd->dest += tlen;
+		hvd->len  -= tlen;
+	}
+
+	/*
+	 * Tagging flag for the last PD for VD will be responsible for
+	 * completing VD.
+	 */
+	if (!hvd->len) {
+		prev = MTK_HSDMA_LAST_DESP_IDX(ring->cur_tptr, MTK_DMA_SIZE);
+		ring->cb[prev].flag = MTK_HSDMA_VDESC_FINISHED;
+	}
+
+	/* Ensure all changes indeed done before we're going on */
+	wmb();
+
+	/*
+	 * Updating into hardware the pointer of TX ring lets HSDMA to take
+	 * action for those pending PDs.
+	 */
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+
+	spin_unlock_irqrestore(&hsdma->lock, flags);
+
+	return 0;
+}
+
+static void mtk_hsdma_issue_vchan_pending(struct mtk_hsdma_device *hsdma,
+					  struct mtk_hsdma_vchan *hvc)
+{
+	struct virt_dma_desc *vd, *vd2;
+	int err;
+
+	lockdep_assert_held(&hvc->vc.lock);
+
+	list_for_each_entry_safe(vd, vd2, &hvc->vc.desc_issued, node) {
+		struct mtk_hsdma_vdesc *hvd;
+
+		hvd = to_hsdma_vdesc(vd);
+
+		/* Map VD into PC and all VCs shares a single PC */
+		err = mtk_hsdma_issue_pending_vdesc(hsdma, hsdma->pc, hvd);
+
+		/*
+		 * Move VD from desc_issued to desc_hw_processing when entire
+		 * VD is fit into available PDs. Otherwise, the uncompleted
+		 * VDs would stay in list desc_issued and then restart the
+		 * processing as soon as possible once underlying ring space
+		 * got freed.
+		 */
+		if (err == -ENOSPC || hvd->len > 0)
+			break;
+
+		/*
+		 * The extra list desc_hw_processing is used because
+		 * hardware can't provide sufficient information allowing us
+		 * to know what VDs are still working on the underlying ring.
+		 * Through the additional list, it can help us to implement
+		 * terminate_all, residue calculation and such thing needed
+		 * to know detail descriptor status on the hardware.
+		 */
+		list_move_tail(&vd->node, &hvc->desc_hw_processing);
+	}
+}
+
+static void mtk_hsdma_free_rooms_in_ring(struct mtk_hsdma_device *hsdma)
+{
+	struct mtk_hsdma_vchan *hvc;
+	struct mtk_hsdma_pdesc *rxd;
+	struct mtk_hsdma_vdesc *hvd;
+	struct mtk_hsdma_pchan *pc;
+	struct mtk_hsdma_cb *cb;
+	int i = MTK_DMA_SIZE;
+	__le32 desc2;
+	u32 status;
+	u16 next;
+
+	/* Read IRQ status */
+	status = mtk_dma_read(hsdma, MTK_HSDMA_INT_STATUS);
+	if (unlikely(!(status & MTK_HSDMA_INT_RXDONE)))
+		goto rx_done;
+
+	pc = hsdma->pc;
+
+	/*
+	 * Using a fail-safe loop with iterations of up to MTK_DMA_SIZE to
+	 * reclaim these finished descriptors: The most number of PDs the ISR
+	 * can handle at one time shouldn't be more than MTK_DMA_SIZE so we
+	 * take it as limited count instead of just using a dangerous infinite
+	 * poll.
+	 */
+	while (i--) {
+		next = MTK_HSDMA_NEXT_DESP_IDX(pc->ring.cur_rptr,
+					       MTK_DMA_SIZE);
+		rxd = &pc->ring.rxd[next];
+
+		/*
+		 * If MTK_HSDMA_DESC_DDONE is no specified, that means data
+		 * moving for the PD is still under going.
+		 */
+		desc2 = READ_ONCE(rxd->desc2);
+		if (!(desc2 & hsdma->soc->ddone))
+			break;
+
+		cb = &pc->ring.cb[next];
+		if (unlikely(!cb->vd)) {
+			dev_err(hsdma2dev(hsdma), "cb->vd cannot be null\n");
+			break;
+		}
+
+		/* Update residue of VD the associated PD belonged to */
+		hvd = to_hsdma_vdesc(cb->vd);
+		hvd->residue -= MTK_HSDMA_DESC_PLEN_GET(rxd->desc2);
+
+		/* Complete VD until the relevant last PD is finished */
+		if (IS_MTK_HSDMA_VDESC_FINISHED(cb->flag)) {
+			hvc = to_hsdma_vchan(cb->vd->tx.chan);
+
+			spin_lock(&hvc->vc.lock);
+
+			/* Remove VD from list desc_hw_processing */
+			list_del(&cb->vd->node);
+
+			/* Add VD into list desc_completed */
+			vchan_cookie_complete(cb->vd);
+
+			if (hvc->issue_synchronize &&
+			    list_empty(&hvc->desc_hw_processing)) {
+				complete(&hvc->issue_completion);
+				hvc->issue_synchronize = false;
+			}
+			spin_unlock(&hvc->vc.lock);
+
+			cb->flag = 0;
+		}
+
+		cb->vd = 0;
+
+		/*
+		 * Recycle the RXD with the helper WRITE_ONCE that can ensure
+		 * data written into RAM would really happens.
+		 */
+		WRITE_ONCE(rxd->desc1, 0);
+		WRITE_ONCE(rxd->desc2, 0);
+		pc->ring.cur_rptr = next;
+
+		/* Release rooms */
+		atomic_inc(&pc->nr_free);
+	}
+
+	/* Ensure all changes indeed done before we're going on */
+	wmb();
+
+	/* Update CPU pointer for those completed PDs */
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, pc->ring.cur_rptr);
+
+	/*
+	 * Acking the pending IRQ allows hardware no longer to keep the used
+	 * IRQ line in certain trigger state when software has completed all
+	 * the finished physical descriptors.
+	 */
+	if (atomic_read(&pc->nr_free) >= MTK_DMA_SIZE - 1)
+		mtk_dma_write(hsdma, MTK_HSDMA_INT_STATUS, status);
+
+	/* ASAP handles pending VDs in all VCs after freeing some rooms */
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		hvc = &hsdma->vc[i];
+		spin_lock(&hvc->vc.lock);
+		mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+		spin_unlock(&hvc->vc.lock);
+	}
+
+rx_done:
+	/* All completed PDs are cleaned up, so enable interrupt again */
+	mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+}
+
+static irqreturn_t mtk_hsdma_irq(int irq, void *devid)
+{
+	struct mtk_hsdma_device *hsdma = devid;
+
+	/*
+	 * Disable interrupt until all completed PDs are cleaned up in
+	 * mtk_hsdma_free_rooms call.
+	 */
+	mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+	mtk_hsdma_free_rooms_in_ring(hsdma);
+
+	return IRQ_HANDLED;
+}
+
+static struct virt_dma_desc *mtk_hsdma_find_active_desc(struct dma_chan *c,
+							dma_cookie_t cookie)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	struct virt_dma_desc *vd;
+
+	list_for_each_entry(vd, &hvc->desc_hw_processing, node)
+		if (vd->tx.cookie == cookie)
+			return vd;
+
+	list_for_each_entry(vd, &hvc->vc.desc_issued, node)
+		if (vd->tx.cookie == cookie)
+			return vd;
+
+	return NULL;
+}
+
+static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *txstate)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	struct mtk_hsdma_vdesc *hvd;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&hvc->vc.lock, flags);
+	vd = mtk_hsdma_find_active_desc(c, cookie);
+	spin_unlock_irqrestore(&hvc->vc.lock, flags);
+
+	if (vd) {
+		hvd = to_hsdma_vdesc(vd);
+		bytes = hvd->residue;
+	}
+
+	dma_set_residue(txstate, bytes);
+
+	return ret;
+}
+
+static void mtk_hsdma_issue_pending(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hvc->vc.lock, flags);
+
+	if (vchan_issue_pending(&hvc->vc))
+		mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+
+	spin_unlock_irqrestore(&hvc->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+mtk_hsdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest,
+			  dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct mtk_hsdma_vdesc *hvd;
+
+	hvd = kzalloc(sizeof(*hvd), GFP_NOWAIT);
+	if (!hvd)
+		return NULL;
+
+	hvd->len = len;
+	hvd->residue = len;
+	hvd->src = src;
+	hvd->dest = dest;
+
+	return vchan_tx_prep(to_virt_chan(c), &hvd->vd, flags);
+}
+
+static int mtk_hsdma_free_inactive_desc(struct dma_chan *c)
+{
+	struct virt_dma_chan *vc = to_virt_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_splice_tail_init(&vc->desc_allocated, &head);
+	list_splice_tail_init(&vc->desc_submitted, &head);
+	list_splice_tail_init(&vc->desc_issued, &head);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	/* At the point, we don't expect users put descriptor into VC again */
+	vchan_dma_desc_free_list(vc, &head);
+
+	return 0;
+}
+
+static void mtk_hsdma_free_active_desc(struct dma_chan *c)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	bool sync_needed = false;
+
+	/*
+	 * Once issue_synchronize is being set, which means once the hardware
+	 * consumes all descriptors for the channel in the ring, the
+	 * synchronization must be be notified immediately it is completed.
+	 */
+	spin_lock(&hvc->vc.lock);
+	if (!list_empty(&hvc->desc_hw_processing)) {
+		hvc->issue_synchronize = true;
+		sync_needed = true;
+	}
+	spin_unlock(&hvc->vc.lock);
+
+	if (sync_needed)
+		wait_for_completion(&hvc->issue_completion);
+	/*
+	 * At the point, we expect that all remaining descriptors in the ring
+	 * for the channel should be all processing done.
+	 */
+	WARN_ONCE(!list_empty(&hvc->desc_hw_processing),
+		  "Desc pending still in list desc_hw_processing\n");
+
+	/* Free all descriptors in list desc_completed */
+	vchan_synchronize(&hvc->vc);
+
+	WARN_ONCE(!list_empty(&hvc->vc.desc_completed),
+		  "Desc pending still in list desc_completed\n");
+}
+
+static int mtk_hsdma_terminate_all(struct dma_chan *c)
+{
+	/*
+	 * Free pending descriptors not processed yet by hardware that have
+	 * previously been submitted to the channel.
+	 */
+	mtk_hsdma_free_inactive_desc(c);
+
+	/*
+	 * However, the DMA engine doesn't provide any way to stop these
+	 * descriptors being processed currently by hardware. The only way is
+	 * to just waiting until these descriptors are all processed completely
+	 * through mtk_hsdma_free_active_desc call.
+	 */
+	mtk_hsdma_free_active_desc(c);
+
+	return 0;
+}
+
+static int mtk_hsdma_alloc_chan_resources(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+	int err;
+
+	/*
+	 * Since HSDMA has only one PC, the resource for PC is being allocated
+	 * when the first VC is being created and the other VCs would run on
+	 * the same PC.
+	 */
+	if (!refcount_read(&hsdma->pc_refcnt)) {
+		err = mtk_hsdma_alloc_pchan(hsdma, hsdma->pc);
+		if (err)
+			return err;
+		/*
+		 * refcount_inc would complain increment on 0; use-after-free.
+		 * Thus, we need to explicitly set it as 1 initially.
+		 */
+		refcount_set(&hsdma->pc_refcnt, 1);
+	} else {
+		refcount_inc(&hsdma->pc_refcnt);
+	}
+
+	return 0;
+}
+
+static void mtk_hsdma_free_chan_resources(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+
+	/* Free all descriptors in all lists on the VC */
+	mtk_hsdma_terminate_all(c);
+
+	/* The resource for PC is not freed until all the VCs are destroyed */
+	if (!refcount_dec_and_test(&hsdma->pc_refcnt))
+		return;
+
+	mtk_hsdma_free_pchan(hsdma, hsdma->pc);
+}
+
+static int mtk_hsdma_hw_init(struct mtk_hsdma_device *hsdma)
+{
+	int err;
+
+	pm_runtime_enable(hsdma2dev(hsdma));
+	pm_runtime_get_sync(hsdma2dev(hsdma));
+
+	err = clk_prepare_enable(hsdma->clk);
+	if (err)
+		return err;
+
+	mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DEFAULT);
+
+	return 0;
+}
+
+static int mtk_hsdma_hw_deinit(struct mtk_hsdma_device *hsdma)
+{
+	mtk_dma_write(hsdma, MTK_HSDMA_GLO, 0);
+
+	clk_disable_unprepare(hsdma->clk);
+
+	pm_runtime_put_sync(hsdma2dev(hsdma));
+	pm_runtime_disable(hsdma2dev(hsdma));
+
+	return 0;
+}
+
+static const struct mtk_hsdma_soc mt7623_soc = {
+	.ddone = BIT(31),
+	.ls0 = BIT(30),
+};
+
+static const struct mtk_hsdma_soc mt7622_soc = {
+	.ddone = BIT(15),
+	.ls0 = BIT(14),
+};
+
+static const struct of_device_id mtk_hsdma_match[] = {
+	{ .compatible = "mediatek,mt7623-hsdma", .data = &mt7623_soc},
+	{ .compatible = "mediatek,mt7622-hsdma", .data = &mt7622_soc},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_hsdma_match);
+
+static int mtk_hsdma_probe(struct platform_device *pdev)
+{
+	struct mtk_hsdma_device *hsdma;
+	struct mtk_hsdma_vchan *vc;
+	struct dma_device *dd;
+	struct resource *res;
+	int i, err;
+
+	hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL);
+	if (!hsdma)
+		return -ENOMEM;
+
+	dd = &hsdma->ddev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hsdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(hsdma->base))
+		return PTR_ERR(hsdma->base);
+
+	hsdma->soc = of_device_get_match_data(&pdev->dev);
+	if (!hsdma->soc) {
+		dev_err(&pdev->dev, "No device match found\n");
+		return -ENODEV;
+	}
+
+	hsdma->clk = devm_clk_get(&pdev->dev, "hsdma");
+	if (IS_ERR(hsdma->clk)) {
+		dev_err(&pdev->dev, "No clock for %s\n",
+			dev_name(&pdev->dev));
+		return PTR_ERR(hsdma->clk);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "No irq resource for %s\n",
+			dev_name(&pdev->dev));
+		return -EINVAL;
+	}
+	hsdma->irq = res->start;
+
+	refcount_set(&hsdma->pc_refcnt, 0);
+	spin_lock_init(&hsdma->lock);
+
+	dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+
+	dd->copy_align = MTK_HSDMA_ALIGN_SIZE;
+	dd->device_alloc_chan_resources = mtk_hsdma_alloc_chan_resources;
+	dd->device_free_chan_resources = mtk_hsdma_free_chan_resources;
+	dd->device_tx_status = mtk_hsdma_tx_status;
+	dd->device_issue_pending = mtk_hsdma_issue_pending;
+	dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy;
+	dd->device_terminate_all = mtk_hsdma_terminate_all;
+	dd->src_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+	dd->dst_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+	dd->directions = BIT(DMA_MEM_TO_MEM);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	dd->dev = &pdev->dev;
+	INIT_LIST_HEAD(&dd->channels);
+
+	hsdma->dma_requests = MTK_HSDMA_NR_VCHANS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &hsdma->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Using %u as missing dma-requests property\n",
+			 MTK_HSDMA_NR_VCHANS);
+	}
+
+	hsdma->pc = devm_kcalloc(&pdev->dev, MTK_HSDMA_NR_MAX_PCHANS,
+				 sizeof(*hsdma->pc), GFP_KERNEL);
+	if (!hsdma->pc)
+		return -ENOMEM;
+
+	hsdma->vc = devm_kcalloc(&pdev->dev, hsdma->dma_requests,
+				 sizeof(*hsdma->vc), GFP_KERNEL);
+	if (!hsdma->vc)
+		return -ENOMEM;
+
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		vc = &hsdma->vc[i];
+		vc->vc.desc_free = mtk_hsdma_vdesc_free;
+		vchan_init(&vc->vc, dd);
+		init_completion(&vc->issue_completion);
+		INIT_LIST_HEAD(&vc->desc_hw_processing);
+	}
+
+	err = dma_async_device_register(dd);
+	if (err)
+		return err;
+
+	err = of_dma_controller_register(pdev->dev.of_node,
+					 of_dma_xlate_by_chan_id, hsdma);
+	if (err) {
+		dev_err(&pdev->dev,
+			"MediaTek HSDMA OF registration failed %d\n", err);
+		goto err_unregister;
+	}
+
+	mtk_hsdma_hw_init(hsdma);
+
+	err = devm_request_irq(&pdev->dev, hsdma->irq,
+			       mtk_hsdma_irq, 0,
+			       dev_name(&pdev->dev), hsdma);
+	if (err) {
+		dev_err(&pdev->dev,
+			"request_irq failed with err %d\n", err);
+		goto err_unregister;
+	}
+
+	platform_set_drvdata(pdev, hsdma);
+
+	dev_info(&pdev->dev, "MediaTek HSDMA driver registered\n");
+
+	return 0;
+
+err_unregister:
+	dma_async_device_unregister(dd);
+
+	return err;
+}
+
+static int mtk_hsdma_remove(struct platform_device *pdev)
+{
+	struct mtk_hsdma_device *hsdma = platform_get_drvdata(pdev);
+	struct mtk_hsdma_vchan *vc;
+	int i;
+
+	/* Kill VC task */
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		vc = &hsdma->vc[i];
+
+		list_del(&vc->vc.chan.device_node);
+		tasklet_kill(&vc->vc.task);
+	}
+
+	/* Disable DMA interrupt */
+	mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+
+	/* Waits for any pending IRQ handlers to complete */
+	synchronize_irq(hsdma->irq);
+
+	/* Disable hardware */
+	mtk_hsdma_hw_deinit(hsdma);
+
+	dma_async_device_unregister(&hsdma->ddev);
+	of_dma_controller_free(pdev->dev.of_node);
+
+	return 0;
+}
+
+static struct platform_driver mtk_hsdma_driver = {
+	.probe		= mtk_hsdma_probe,
+	.remove		= mtk_hsdma_remove,
+	.driver = {
+		.name		= KBUILD_MODNAME,
+		.of_match_table	= mtk_hsdma_match,
+	},
+};
+module_platform_driver(mtk_hsdma_driver);
+
+MODULE_DESCRIPTION("MediaTek High-Speed DMA Controller Driver");
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index d7327fd5f445..de1fd59fe136 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -1510,7 +1510,7 @@ static void pl330_dotask(unsigned long data)
 /* Returns 1 if state was updated, 0 otherwise */
 static int pl330_update(struct pl330_dmac *pl330)
 {
-	struct dma_pl330_desc *descdone, *tmp;
+	struct dma_pl330_desc *descdone;
 	unsigned long flags;
 	void __iomem *regs;
 	u32 val;
@@ -1588,7 +1588,9 @@ static int pl330_update(struct pl330_dmac *pl330)
 	}
 
 	/* Now that we are in no hurry, do the callbacks */
-	list_for_each_entry_safe(descdone, tmp, &pl330->req_done, rqd) {
+	while (!list_empty(&pl330->req_done)) {
+		descdone = list_first_entry(&pl330->req_done,
+					    struct dma_pl330_desc, rqd);
 		list_del(&descdone->rqd);
 		spin_unlock_irqrestore(&pl330->lock, flags);
 		dma_pl330_rqcb(descdone, PL330_ERR_NONE);
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index d076940e0c69..d29275b97e84 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -393,6 +393,7 @@ struct bam_device {
 	struct device_dma_parameters dma_parms;
 	struct bam_chan *channels;
 	u32 num_channels;
+	u32 num_ees;
 
 	/* execution environment ID, from DT */
 	u32 ee;
@@ -934,12 +935,15 @@ static void bam_apply_new_config(struct bam_chan *bchan,
 	struct bam_device *bdev = bchan->bdev;
 	u32 maxburst;
 
-	if (dir == DMA_DEV_TO_MEM)
-		maxburst = bchan->slave.src_maxburst;
-	else
-		maxburst = bchan->slave.dst_maxburst;
+	if (!bdev->controlled_remotely) {
+		if (dir == DMA_DEV_TO_MEM)
+			maxburst = bchan->slave.src_maxburst;
+		else
+			maxburst = bchan->slave.dst_maxburst;
 
-	writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+		writel_relaxed(maxburst,
+			       bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+	}
 
 	bchan->reconfigure = 0;
 }
@@ -1128,15 +1132,19 @@ static int bam_init(struct bam_device *bdev)
 	u32 val;
 
 	/* read revision and configuration information */
-	val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
-	val &= NUM_EES_MASK;
+	if (!bdev->num_ees) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION));
+		bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK;
+	}
 
 	/* check that configured EE is within range */
-	if (bdev->ee >= val)
+	if (bdev->ee >= bdev->num_ees)
 		return -EINVAL;
 
-	val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
-	bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+	if (!bdev->num_channels) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
+		bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+	}
 
 	if (bdev->controlled_remotely)
 		return 0;
@@ -1232,9 +1240,25 @@ static int bam_dma_probe(struct platform_device *pdev)
 	bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node,
 						"qcom,controlled-remotely");
 
+	if (bdev->controlled_remotely) {
+		ret = of_property_read_u32(pdev->dev.of_node, "num-channels",
+					   &bdev->num_channels);
+		if (ret)
+			dev_err(bdev->dev, "num-channels unspecified in dt\n");
+
+		ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees",
+					   &bdev->num_ees);
+		if (ret)
+			dev_err(bdev->dev, "num-ees unspecified in dt\n");
+	}
+
 	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
-	if (IS_ERR(bdev->bamclk))
-		return PTR_ERR(bdev->bamclk);
+	if (IS_ERR(bdev->bamclk)) {
+		if (!bdev->controlled_remotely)
+			return PTR_ERR(bdev->bamclk);
+
+		bdev->bamclk = NULL;
+	}
 
 	ret = clk_prepare_enable(bdev->bamclk);
 	if (ret) {
@@ -1309,6 +1333,11 @@ static int bam_dma_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_unregister_dma;
 
+	if (bdev->controlled_remotely) {
+		pm_runtime_disable(&pdev->dev);
+		return 0;
+	}
+
 	pm_runtime_irq_safe(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY);
 	pm_runtime_use_autosuspend(&pdev->dev);
@@ -1392,7 +1421,8 @@ static int __maybe_unused bam_dma_suspend(struct device *dev)
 {
 	struct bam_device *bdev = dev_get_drvdata(dev);
 
-	pm_runtime_force_suspend(dev);
+	if (!bdev->controlled_remotely)
+		pm_runtime_force_suspend(dev);
 
 	clk_unprepare(bdev->bamclk);
 
@@ -1408,7 +1438,8 @@ static int __maybe_unused bam_dma_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	pm_runtime_force_resume(dev);
+	if (!bdev->controlled_remotely)
+		pm_runtime_force_resume(dev);
 
 	return 0;
 }
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index d0cacdb0713e..2a2ccd9c78e4 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1301,8 +1301,17 @@ static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
 	 * If the cookie doesn't correspond to the currently running transfer
 	 * then the descriptor hasn't been processed yet, and the residue is
 	 * equal to the full descriptor size.
+	 * Also, a client driver is possible to call this function before
+	 * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running"
+	 * will be the next descriptor, and the done list will appear. So, if
+	 * the argument cookie matches the done list's cookie, we can assume
+	 * the residue is zero.
 	 */
 	if (cookie != desc->async_tx.cookie) {
+		list_for_each_entry(desc, &chan->desc.done, node) {
+			if (cookie == desc->async_tx.cookie)
+				return 0;
+		}
 		list_for_each_entry(desc, &chan->desc.pending, node) {
 			if (cookie == desc->async_tx.cookie)
 				return desc->size;
@@ -1677,8 +1686,8 @@ static const struct dev_pm_ops rcar_dmac_pm = {
 	 *   - Wait for the current transfer to complete and stop the device,
 	 *   - Resume transfers, if any.
 	 */
-	SET_LATE_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-				     pm_runtime_force_resume)
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				      pm_runtime_force_resume)
 	SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume,
 			   NULL)
 };
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 786fc8fcc38e..8c5807362a25 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -5,6 +5,7 @@
  *
  * Copyright (C) M'boumba Cedric Madianga 2015
  * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *         Pierre-Yves Mordret <pierre-yves.mordret@st.com>
  *
  * License terms:  GNU General Public License (GPL), version 2
  */
@@ -33,9 +34,14 @@
 #define STM32_DMA_LIFCR			0x0008 /* DMA Low Int Flag Clear Reg */
 #define STM32_DMA_HIFCR			0x000c /* DMA High Int Flag Clear Reg */
 #define STM32_DMA_TCI			BIT(5) /* Transfer Complete Interrupt */
+#define STM32_DMA_HTI			BIT(4) /* Half Transfer Interrupt */
 #define STM32_DMA_TEI			BIT(3) /* Transfer Error Interrupt */
 #define STM32_DMA_DMEI			BIT(2) /* Direct Mode Error Interrupt */
 #define STM32_DMA_FEI			BIT(0) /* FIFO Error Interrupt */
+#define STM32_DMA_MASKI			(STM32_DMA_TCI \
+					 | STM32_DMA_TEI \
+					 | STM32_DMA_DMEI \
+					 | STM32_DMA_FEI)
 
 /* DMA Stream x Configuration Register */
 #define STM32_DMA_SCR(x)		(0x0010 + 0x18 * (x)) /* x = 0..7 */
@@ -60,7 +66,8 @@
 #define STM32_DMA_SCR_PINC		BIT(9) /* Peripheral increment mode */
 #define STM32_DMA_SCR_CIRC		BIT(8) /* Circular mode */
 #define STM32_DMA_SCR_PFCTRL		BIT(5) /* Peripheral Flow Controller */
-#define STM32_DMA_SCR_TCIE		BIT(4) /* Transfer Cplete Int Enable*/
+#define STM32_DMA_SCR_TCIE		BIT(4) /* Transfer Complete Int Enable
+						*/
 #define STM32_DMA_SCR_TEIE		BIT(2) /* Transfer Error Int Enable */
 #define STM32_DMA_SCR_DMEIE		BIT(1) /* Direct Mode Err Int Enable */
 #define STM32_DMA_SCR_EN		BIT(0) /* Stream Enable */
@@ -111,11 +118,24 @@
 #define STM32_DMA_FIFO_THRESHOLD_FULL			0x03
 
 #define STM32_DMA_MAX_DATA_ITEMS	0xffff
+/*
+ * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter
+ * gather at boundary. Thus it's safer to round down this value on FIFO
+ * size (16 Bytes)
+ */
+#define STM32_DMA_ALIGNED_MAX_DATA_ITEMS	\
+	ALIGN_DOWN(STM32_DMA_MAX_DATA_ITEMS, 16)
 #define STM32_DMA_MAX_CHANNELS		0x08
 #define STM32_DMA_MAX_REQUEST_ID	0x08
 #define STM32_DMA_MAX_DATA_PARAM	0x03
+#define STM32_DMA_FIFO_SIZE		16	/* FIFO is 16 bytes */
+#define STM32_DMA_MIN_BURST		4
 #define STM32_DMA_MAX_BURST		16
 
+/* DMA Features */
+#define STM32_DMA_THRESHOLD_FTR_MASK	GENMASK(1, 0)
+#define STM32_DMA_THRESHOLD_FTR_GET(n)	((n) & STM32_DMA_THRESHOLD_FTR_MASK)
+
 enum stm32_dma_width {
 	STM32_DMA_BYTE,
 	STM32_DMA_HALF_WORD,
@@ -129,11 +149,18 @@ enum stm32_dma_burst_size {
 	STM32_DMA_BURST_INCR16,
 };
 
+/**
+ * struct stm32_dma_cfg - STM32 DMA custom configuration
+ * @channel_id: channel ID
+ * @request_line: DMA request
+ * @stream_config: 32bit mask specifying the DMA channel configuration
+ * @features: 32bit mask specifying the DMA Feature list
+ */
 struct stm32_dma_cfg {
 	u32 channel_id;
 	u32 request_line;
 	u32 stream_config;
-	u32 threshold;
+	u32 features;
 };
 
 struct stm32_dma_chan_reg {
@@ -171,6 +198,9 @@ struct stm32_dma_chan {
 	u32 next_sg;
 	struct dma_slave_config	dma_sconfig;
 	struct stm32_dma_chan_reg chan_reg;
+	u32 threshold;
+	u32 mem_burst;
+	u32 mem_width;
 };
 
 struct stm32_dma_device {
@@ -235,6 +265,85 @@ static int stm32_dma_get_width(struct stm32_dma_chan *chan,
 	}
 }
 
+static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len,
+						       u32 threshold)
+{
+	enum dma_slave_buswidth max_width;
+
+	if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL)
+		max_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	else
+		max_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	while ((buf_len < max_width  || buf_len % max_width) &&
+	       max_width > DMA_SLAVE_BUSWIDTH_1_BYTE)
+		max_width = max_width >> 1;
+
+	return max_width;
+}
+
+static bool stm32_dma_fifo_threshold_is_allowed(u32 burst, u32 threshold,
+						enum dma_slave_buswidth width)
+{
+	u32 remaining;
+
+	if (width != DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		if (burst != 0) {
+			/*
+			 * If number of beats fit in several whole bursts
+			 * this configuration is allowed.
+			 */
+			remaining = ((STM32_DMA_FIFO_SIZE / width) *
+				     (threshold + 1) / 4) % burst;
+
+			if (remaining == 0)
+				return true;
+		} else {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool stm32_dma_is_burst_possible(u32 buf_len, u32 threshold)
+{
+	switch (threshold) {
+	case STM32_DMA_FIFO_THRESHOLD_FULL:
+		if (buf_len >= STM32_DMA_MAX_BURST)
+			return true;
+		else
+			return false;
+	case STM32_DMA_FIFO_THRESHOLD_HALFFULL:
+		if (buf_len >= STM32_DMA_MAX_BURST / 2)
+			return true;
+		else
+			return false;
+	default:
+		return false;
+	}
+}
+
+static u32 stm32_dma_get_best_burst(u32 buf_len, u32 max_burst, u32 threshold,
+				    enum dma_slave_buswidth width)
+{
+	u32 best_burst = max_burst;
+
+	if (best_burst == 1 || !stm32_dma_is_burst_possible(buf_len, threshold))
+		return 0;
+
+	while ((buf_len < best_burst * width && best_burst > 1) ||
+	       !stm32_dma_fifo_threshold_is_allowed(best_burst, threshold,
+						    width)) {
+		if (best_burst > STM32_DMA_MIN_BURST)
+			best_burst = best_burst >> 1;
+		else
+			best_burst = 0;
+	}
+
+	return best_burst;
+}
+
 static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
 {
 	switch (maxburst) {
@@ -254,12 +363,12 @@ static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
 }
 
 static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan,
-				      u32 src_maxburst, u32 dst_maxburst)
+				      u32 src_burst, u32 dst_burst)
 {
 	chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK;
 	chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE;
 
-	if ((!src_maxburst) && (!dst_maxburst)) {
+	if (!src_burst && !dst_burst) {
 		/* Using direct mode */
 		chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE;
 	} else {
@@ -300,7 +409,7 @@ static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan)
 
 	flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
 
-	return flags;
+	return flags & STM32_DMA_MASKI;
 }
 
 static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
@@ -315,6 +424,7 @@ static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
 	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
 	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
 	 */
+	flags &= STM32_DMA_MASKI;
 	dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
 
 	if (chan->id & 4)
@@ -429,6 +539,8 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
 	dev_dbg(chan2dev(chan), "SFCR:  0x%08x\n", sfcr);
 }
 
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan);
+
 static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 {
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
@@ -471,6 +583,9 @@ static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 	if (status)
 		stm32_dma_irq_clear(chan, status);
 
+	if (chan->desc->cyclic)
+		stm32_dma_configure_next_sg(chan);
+
 	stm32_dma_dump_reg(chan);
 
 	/* Start DMA */
@@ -541,13 +656,29 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
 	status = stm32_dma_irq_status(chan);
 	scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
 
-	if ((status & STM32_DMA_TCI) && (scr & STM32_DMA_SCR_TCIE)) {
+	if (status & STM32_DMA_TCI) {
 		stm32_dma_irq_clear(chan, STM32_DMA_TCI);
-		stm32_dma_handle_chan_done(chan);
-
-	} else {
+		if (scr & STM32_DMA_SCR_TCIE)
+			stm32_dma_handle_chan_done(chan);
+		status &= ~STM32_DMA_TCI;
+	}
+	if (status & STM32_DMA_HTI) {
+		stm32_dma_irq_clear(chan, STM32_DMA_HTI);
+		status &= ~STM32_DMA_HTI;
+	}
+	if (status & STM32_DMA_FEI) {
+		stm32_dma_irq_clear(chan, STM32_DMA_FEI);
+		status &= ~STM32_DMA_FEI;
+		if (!(scr & STM32_DMA_SCR_EN))
+			dev_err(chan2dev(chan), "FIFO Error\n");
+		else
+			dev_dbg(chan2dev(chan), "FIFO over/underrun\n");
+	}
+	if (status) {
 		stm32_dma_irq_clear(chan, status);
 		dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status);
+		if (!(scr & STM32_DMA_SCR_EN))
+			dev_err(chan2dev(chan), "chan disabled by HW\n");
 	}
 
 	spin_unlock(&chan->vchan.lock);
@@ -564,45 +695,59 @@ static void stm32_dma_issue_pending(struct dma_chan *c)
 	if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
 		dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan);
 		stm32_dma_start_transfer(chan);
-		if (chan->desc->cyclic)
-			stm32_dma_configure_next_sg(chan);
+
 	}
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
 }
 
 static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 				    enum dma_transfer_direction direction,
-				    enum dma_slave_buswidth *buswidth)
+				    enum dma_slave_buswidth *buswidth,
+				    u32 buf_len)
 {
 	enum dma_slave_buswidth src_addr_width, dst_addr_width;
 	int src_bus_width, dst_bus_width;
 	int src_burst_size, dst_burst_size;
-	u32 src_maxburst, dst_maxburst;
-	u32 dma_scr = 0;
+	u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst;
+	u32 dma_scr, threshold;
 
 	src_addr_width = chan->dma_sconfig.src_addr_width;
 	dst_addr_width = chan->dma_sconfig.dst_addr_width;
 	src_maxburst = chan->dma_sconfig.src_maxburst;
 	dst_maxburst = chan->dma_sconfig.dst_maxburst;
+	threshold = chan->threshold;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
+		/* Set device data size */
 		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
 		if (dst_bus_width < 0)
 			return dst_bus_width;
 
-		dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+		/* Set device burst size */
+		dst_best_burst = stm32_dma_get_best_burst(buf_len,
+							  dst_maxburst,
+							  threshold,
+							  dst_addr_width);
+
+		dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
 		if (dst_burst_size < 0)
 			return dst_burst_size;
 
-		if (!src_addr_width)
-			src_addr_width = dst_addr_width;
-
+		/* Set memory data size */
+		src_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+		chan->mem_width = src_addr_width;
 		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
 		if (src_bus_width < 0)
 			return src_bus_width;
 
-		src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+		/* Set memory burst size */
+		src_maxburst = STM32_DMA_MAX_BURST;
+		src_best_burst = stm32_dma_get_best_burst(buf_len,
+							  src_maxburst,
+							  threshold,
+							  src_addr_width);
+		src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
 		if (src_burst_size < 0)
 			return src_burst_size;
 
@@ -612,27 +757,46 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 			STM32_DMA_SCR_PBURST(dst_burst_size) |
 			STM32_DMA_SCR_MBURST(src_burst_size);
 
+		/* Set FIFO threshold */
+		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+		/* Set peripheral address */
 		chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr;
 		*buswidth = dst_addr_width;
 		break;
 
 	case DMA_DEV_TO_MEM:
+		/* Set device data size */
 		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
 		if (src_bus_width < 0)
 			return src_bus_width;
 
-		src_burst_size = stm32_dma_get_burst(chan, src_maxburst);
+		/* Set device burst size */
+		src_best_burst = stm32_dma_get_best_burst(buf_len,
+							  src_maxburst,
+							  threshold,
+							  src_addr_width);
+		chan->mem_burst = src_best_burst;
+		src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
 		if (src_burst_size < 0)
 			return src_burst_size;
 
-		if (!dst_addr_width)
-			dst_addr_width = src_addr_width;
-
+		/* Set memory data size */
+		dst_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+		chan->mem_width = dst_addr_width;
 		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
 		if (dst_bus_width < 0)
 			return dst_bus_width;
 
-		dst_burst_size = stm32_dma_get_burst(chan, dst_maxburst);
+		/* Set memory burst size */
+		dst_maxburst = STM32_DMA_MAX_BURST;
+		dst_best_burst = stm32_dma_get_best_burst(buf_len,
+							  dst_maxburst,
+							  threshold,
+							  dst_addr_width);
+		chan->mem_burst = dst_best_burst;
+		dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
 		if (dst_burst_size < 0)
 			return dst_burst_size;
 
@@ -642,6 +806,11 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 			STM32_DMA_SCR_PBURST(src_burst_size) |
 			STM32_DMA_SCR_MBURST(dst_burst_size);
 
+		/* Set FIFO threshold */
+		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+		/* Set peripheral address */
 		chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr;
 		*buswidth = chan->dma_sconfig.src_addr_width;
 		break;
@@ -651,8 +820,9 @@ static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
 		return -EINVAL;
 	}
 
-	stm32_dma_set_fifo_config(chan, src_maxburst, dst_maxburst);
+	stm32_dma_set_fifo_config(chan, src_best_burst, dst_best_burst);
 
+	/* Set DMA control register */
 	chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK |
 			STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK |
 			STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK);
@@ -692,10 +862,6 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
 	if (!desc)
 		return NULL;
 
-	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
-	if (ret < 0)
-		goto err;
-
 	/* Set peripheral flow controller */
 	if (chan->dma_sconfig.device_fc)
 		chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL;
@@ -703,10 +869,15 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
 		chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
 
 	for_each_sg(sgl, sg, sg_len, i) {
+		ret = stm32_dma_set_xfer_param(chan, direction, &buswidth,
+					       sg_dma_len(sg));
+		if (ret < 0)
+			goto err;
+
 		desc->sg_req[i].len = sg_dma_len(sg);
 
 		nb_data_items = desc->sg_req[i].len / buswidth;
-		if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+		if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
 			dev_err(chan2dev(chan), "nb items not supported\n");
 			goto err;
 		}
@@ -767,12 +938,12 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic(
 		return NULL;
 	}
 
-	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth);
+	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len);
 	if (ret < 0)
 		return NULL;
 
 	nb_data_items = period_len / buswidth;
-	if (nb_data_items > STM32_DMA_MAX_DATA_ITEMS) {
+	if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
 		dev_err(chan2dev(chan), "number of items not supported\n");
 		return NULL;
 	}
@@ -816,35 +987,45 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
 	dma_addr_t src, size_t len, unsigned long flags)
 {
 	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
-	u32 num_sgs;
+	enum dma_slave_buswidth max_width;
 	struct stm32_dma_desc *desc;
 	size_t xfer_count, offset;
+	u32 num_sgs, best_burst, dma_burst, threshold;
 	int i;
 
-	num_sgs = DIV_ROUND_UP(len, STM32_DMA_MAX_DATA_ITEMS);
+	num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
 	desc = stm32_dma_alloc_desc(num_sgs);
 	if (!desc)
 		return NULL;
 
+	threshold = chan->threshold;
+
 	for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) {
 		xfer_count = min_t(size_t, len - offset,
-				   STM32_DMA_MAX_DATA_ITEMS);
+				   STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
 
-		desc->sg_req[i].len = xfer_count;
+		/* Compute best burst size */
+		max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST,
+						      threshold, max_width);
+		dma_burst = stm32_dma_get_burst(chan, best_burst);
 
 		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
 		desc->sg_req[i].chan_reg.dma_scr =
 			STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) |
+			STM32_DMA_SCR_PBURST(dma_burst) |
+			STM32_DMA_SCR_MBURST(dma_burst) |
 			STM32_DMA_SCR_MINC |
 			STM32_DMA_SCR_PINC |
 			STM32_DMA_SCR_TCIE |
 			STM32_DMA_SCR_TEIE;
-		desc->sg_req[i].chan_reg.dma_sfcr = STM32_DMA_SFCR_DMDIS |
-			STM32_DMA_SFCR_FTH(STM32_DMA_FIFO_THRESHOLD_FULL) |
-			STM32_DMA_SFCR_FEIE;
+		desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+		desc->sg_req[i].chan_reg.dma_sfcr |=
+			STM32_DMA_SFCR_FTH(threshold);
 		desc->sg_req[i].chan_reg.dma_spar = src + offset;
 		desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
 		desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
+		desc->sg_req[i].len = xfer_count;
 	}
 
 	desc->num_sgs = num_sgs;
@@ -869,6 +1050,7 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 				     struct stm32_dma_desc *desc,
 				     u32 next_sg)
 {
+	u32 modulo, burst_size;
 	u32 residue = 0;
 	int i;
 
@@ -876,8 +1058,10 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 	 * In cyclic mode, for the last period, residue = remaining bytes from
 	 * NDTR
 	 */
-	if (chan->desc->cyclic && next_sg == 0)
-		return stm32_dma_get_remaining_bytes(chan);
+	if (chan->desc->cyclic && next_sg == 0) {
+		residue = stm32_dma_get_remaining_bytes(chan);
+		goto end;
+	}
 
 	/*
 	 * For all other periods in cyclic mode, and in sg mode,
@@ -888,6 +1072,15 @@ static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 		residue += desc->sg_req[i].len;
 	residue += stm32_dma_get_remaining_bytes(chan);
 
+end:
+	if (!chan->mem_burst)
+		return residue;
+
+	burst_size = chan->mem_burst * chan->mem_width;
+	modulo = residue % burst_size;
+	if (modulo)
+		residue = residue - modulo + burst_size;
+
 	return residue;
 }
 
@@ -902,7 +1095,7 @@ static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
 	u32 residue = 0;
 
 	status = dma_cookie_status(c, cookie, state);
-	if ((status == DMA_COMPLETE) || (!state))
+	if (status == DMA_COMPLETE || !state)
 		return status;
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
@@ -966,7 +1159,7 @@ static void stm32_dma_desc_free(struct virt_dma_desc *vdesc)
 }
 
 static void stm32_dma_set_config(struct stm32_dma_chan *chan,
-			  struct stm32_dma_cfg *cfg)
+				 struct stm32_dma_cfg *cfg)
 {
 	stm32_dma_clear_reg(&chan->chan_reg);
 
@@ -976,7 +1169,7 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan,
 	/* Enable Interrupts  */
 	chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE;
 
-	chan->chan_reg.dma_sfcr = cfg->threshold & STM32_DMA_SFCR_FTH_MASK;
+	chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features);
 }
 
 static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
@@ -996,10 +1189,10 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 	cfg.channel_id = dma_spec->args[0];
 	cfg.request_line = dma_spec->args[1];
 	cfg.stream_config = dma_spec->args[2];
-	cfg.threshold = dma_spec->args[3];
+	cfg.features = dma_spec->args[3];
 
-	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) ||
-	    (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) {
+	if (cfg.channel_id >= STM32_DMA_MAX_CHANNELS ||
+	    cfg.request_line >= STM32_DMA_MAX_REQUEST_ID) {
 		dev_err(dev, "Bad channel and/or request id\n");
 		return NULL;
 	}
diff --git a/drivers/firmware/broadcom/Kconfig b/drivers/firmware/broadcom/Kconfig
index 3c7e5b741e37..f77cdb3a041f 100644
--- a/drivers/firmware/broadcom/Kconfig
+++ b/drivers/firmware/broadcom/Kconfig
@@ -13,6 +13,7 @@ config BCM47XX_NVRAM
 config BCM47XX_SPROM
 	bool "Broadcom SPROM driver"
 	depends on BCM47XX_NVRAM
+	select GENERIC_NET_UTILS
 	help
 	  Broadcom devices store configuration data in SPROM. Accessing it is
 	  specific to the bus host type, e.g. PCI(e) devices have it mapped in
diff --git a/drivers/firmware/broadcom/bcm47xx_sprom.c b/drivers/firmware/broadcom/bcm47xx_sprom.c
index 62aa3cf09b4d..4787f86c8ac1 100644
--- a/drivers/firmware/broadcom/bcm47xx_sprom.c
+++ b/drivers/firmware/broadcom/bcm47xx_sprom.c
@@ -137,20 +137,6 @@ static void nvram_read_leddc(const char *prefix, const char *name,
 	*leddc_off_time = (val >> 16) & 0xff;
 }
 
-static void bcm47xx_nvram_parse_macaddr(char *buf, u8 macaddr[6])
-{
-	if (strchr(buf, ':'))
-		sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", &macaddr[0],
-			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
-			&macaddr[5]);
-	else if (strchr(buf, '-'))
-		sscanf(buf, "%hhx-%hhx-%hhx-%hhx-%hhx-%hhx", &macaddr[0],
-			&macaddr[1], &macaddr[2], &macaddr[3], &macaddr[4],
-			&macaddr[5]);
-	else
-		pr_warn("Can not parse mac address: %s\n", buf);
-}
-
 static void nvram_read_macaddr(const char *prefix, const char *name,
 			       u8 val[6], bool fallback)
 {
@@ -161,7 +147,9 @@ static void nvram_read_macaddr(const char *prefix, const char *name,
 	if (err < 0)
 		return;
 
-	bcm47xx_nvram_parse_macaddr(buf, val);
+	strreplace(buf, '-', ':');
+	if (!mac_pton(buf, val))
+		pr_warn("Can not parse mac address: %s\n", buf);
 }
 
 static void nvram_read_alpha2(const char *prefix, const char *name,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index a4f68affc13b..d39400e5bc42 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -89,14 +89,14 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
 	 */
 	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
 		ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
-				mem_region, mem_phys, mem_size);
+				mem_region, mem_phys, mem_size, NULL);
 	} else {
 		char newname[strlen("qcom/") + strlen(fwname) + 1];
 
 		sprintf(newname, "qcom/%s", fwname);
 
 		ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
-				mem_region, mem_phys, mem_size);
+				mem_region, mem_phys, mem_size, NULL);
 	}
 	if (ret)
 		goto out;
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 033e57366d56..f249a4428458 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1231,8 +1231,9 @@ config SENSORS_NCT6775
 	help
 	  If you say yes here you get support for the hardware monitoring
 	  functionality of the Nuvoton NCT6106D, NCT6775F, NCT6776F, NCT6779D,
-	  NCT6791D, NCT6792D, NCT6793D, and compatible Super-I/O chips. This
-	  driver replaces the w83627ehf driver for NCT6775F and NCT6776F.
+	  NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D, and compatible
+	  Super-I/O chips. This driver replaces the w83627ehf driver for
+	  NCT6775F and NCT6776F.
 
 	  This driver can also be built as a module.  If so, the module
 	  will be called nct6775.
diff --git a/drivers/hwmon/g762.c b/drivers/hwmon/g762.c
index 6d1208b2b6d2..6c83c385a7ca 100644
--- a/drivers/hwmon/g762.c
+++ b/drivers/hwmon/g762.c
@@ -128,7 +128,6 @@ enum g762_regs {
 			 G762_REG_FAN_CMD2_GEAR_MODE_1)) >> 2))
 
 struct g762_data {
-	struct device *hwmon_dev;
 	struct i2c_client *client;
 	struct clk *clk;
 
@@ -594,6 +593,14 @@ MODULE_DEVICE_TABLE(of, g762_dt_match);
  * call to g762_of_clock_disable(). Note that a reference to clock is kept
  * in our private data structure to be used in this function.
  */
+static void g762_of_clock_disable(void *data)
+{
+	struct g762_data *g762 = data;
+
+	clk_disable_unprepare(g762->clk);
+	clk_put(g762->clk);
+}
+
 static int g762_of_clock_enable(struct i2c_client *client)
 {
 	struct g762_data *data;
@@ -626,6 +633,7 @@ static int g762_of_clock_enable(struct i2c_client *client)
 	data = i2c_get_clientdata(client);
 	data->clk = clk;
 
+	devm_add_action(&client->dev, g762_of_clock_disable, data);
 	return 0;
 
  clk_unprep:
@@ -637,17 +645,6 @@ static int g762_of_clock_enable(struct i2c_client *client)
 	return ret;
 }
 
-static void g762_of_clock_disable(struct i2c_client *client)
-{
-	struct g762_data *data = i2c_get_clientdata(client);
-
-	if (!data->clk)
-		return;
-
-	clk_disable_unprepare(data->clk);
-	clk_put(data->clk);
-}
-
 static int g762_of_prop_import_one(struct i2c_client *client,
 				   const char *pname,
 				   int (*psetter)(struct device *dev,
@@ -698,8 +695,6 @@ static int g762_of_clock_enable(struct i2c_client *client)
 {
 	return 0;
 }
-
-static void g762_of_clock_disable(struct i2c_client *client) { }
 #endif
 
 /*
@@ -1054,6 +1049,7 @@ static inline int g762_fan_init(struct device *dev)
 static int g762_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
+	struct device *hwmon_dev;
 	struct g762_data *data;
 	int ret;
 
@@ -1080,35 +1076,15 @@ static int g762_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		return ret;
 	ret = g762_of_prop_import(client);
 	if (ret)
-		goto clock_dis;
+		return ret;
 	/* ... or platform_data */
 	ret = g762_pdata_prop_import(client);
 	if (ret)
-		goto clock_dis;
+		return ret;
 
-	data->hwmon_dev = hwmon_device_register_with_groups(dev, client->name,
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							    data, g762_groups);
-	if (IS_ERR(data->hwmon_dev)) {
-		ret = PTR_ERR(data->hwmon_dev);
-		goto clock_dis;
-	}
-
-	return 0;
-
- clock_dis:
-	g762_of_clock_disable(client);
-
-	return ret;
-}
-
-static int g762_remove(struct i2c_client *client)
-{
-	struct g762_data *data = i2c_get_clientdata(client);
-
-	hwmon_device_unregister(data->hwmon_dev);
-	g762_of_clock_disable(client);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
 static struct i2c_driver g762_driver = {
@@ -1117,7 +1093,6 @@ static struct i2c_driver g762_driver = {
 		.of_match_table = of_match_ptr(g762_dt_match),
 	},
 	.probe	  = g762_probe,
-	.remove	  = g762_remove,
 	.id_table = g762_id,
 };
 
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index 2a91974a10bb..d40fe5122e94 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -52,6 +52,7 @@
  */
 static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b,
 						I2C_CLIENT_END };
+enum chips { lm92, max6635 };
 
 /* The LM92 registers */
 #define LM92_REG_CONFIG			0x01 /* 8-bit, RW */
@@ -259,62 +260,6 @@ static void lm92_init_client(struct i2c_client *client)
 					  config & 0xFE);
 }
 
-/*
- * The MAX6635 has no identification register, so we have to use tricks
- * to identify it reliably. This is somewhat slow.
- * Note that we do NOT rely on the 2 MSB of the configuration register
- * always reading 0, as suggested by the datasheet, because it was once
- * reported not to be true.
- */
-static int max6635_check(struct i2c_client *client)
-{
-	u16 temp_low, temp_high, temp_hyst, temp_crit;
-	u8 conf;
-	int i;
-
-	/*
-	 * No manufacturer ID register, so a read from this address will
-	 * always return the last read value.
-	 */
-	temp_low = i2c_smbus_read_word_data(client, LM92_REG_TEMP_LOW);
-	if (i2c_smbus_read_word_data(client, LM92_REG_MAN_ID) != temp_low)
-		return 0;
-	temp_high = i2c_smbus_read_word_data(client, LM92_REG_TEMP_HIGH);
-	if (i2c_smbus_read_word_data(client, LM92_REG_MAN_ID) != temp_high)
-		return 0;
-
-	/* Limits are stored as integer values (signed, 9-bit). */
-	if ((temp_low & 0x7f00) || (temp_high & 0x7f00))
-		return 0;
-	temp_hyst = i2c_smbus_read_word_data(client, LM92_REG_TEMP_HYST);
-	temp_crit = i2c_smbus_read_word_data(client, LM92_REG_TEMP_CRIT);
-	if ((temp_hyst & 0x7f00) || (temp_crit & 0x7f00))
-		return 0;
-
-	/*
-	 * Registers addresses were found to cycle over 16-byte boundaries.
-	 * We don't test all registers with all offsets so as to save some
-	 * reads and time, but this should still be sufficient to dismiss
-	 * non-MAX6635 chips.
-	 */
-	conf = i2c_smbus_read_byte_data(client, LM92_REG_CONFIG);
-	for (i = 16; i < 96; i *= 2) {
-		if (temp_hyst != i2c_smbus_read_word_data(client,
-				 LM92_REG_TEMP_HYST + i - 16)
-		 || temp_crit != i2c_smbus_read_word_data(client,
-				 LM92_REG_TEMP_CRIT + i)
-		 || temp_low != i2c_smbus_read_word_data(client,
-				LM92_REG_TEMP_LOW + i + 16)
-		 || temp_high != i2c_smbus_read_word_data(client,
-				 LM92_REG_TEMP_HIGH + i + 32)
-		 || conf != i2c_smbus_read_byte_data(client,
-			    LM92_REG_CONFIG + i))
-			return 0;
-	}
-
-	return 1;
-}
-
 static struct attribute *lm92_attrs[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	&sensor_dev_attr_temp1_crit.dev_attr.attr,
@@ -348,8 +293,6 @@ static int lm92_detect(struct i2c_client *new_client,
 
 	if ((config & 0xe0) == 0x00 && man_id == 0x0180)
 		pr_info("lm92: Found National Semiconductor LM92 chip\n");
-	else if (max6635_check(new_client))
-		pr_info("lm92: Found Maxim MAX6635 chip\n");
 	else
 		return -ENODEV;
 
@@ -387,8 +330,8 @@ static int lm92_probe(struct i2c_client *new_client,
  */
 
 static const struct i2c_device_id lm92_id[] = {
-	{ "lm92", 0 },
-	/* max6635 could be added here */
+	{ "lm92", lm92 },
+	{ "max6635", max6635 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, lm92_id);
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index c219e43b8f02..aebce560bfaf 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -41,7 +41,7 @@
  * nct6792d    15      6       6       2+6    0xc910 0xc1    0x5ca3
  * nct6793d    15      6       6       2+6    0xd120 0xc1    0x5ca3
  * nct6795d    14      6       6       2+6    0xd350 0xc1    0x5ca3
- *
+ * nct6796d    14      7       7       2+6    0xd420 0xc1    0x5ca3
  *
  * #temp lists the number of monitored temperature sources (first value) plus
  * the number of directly connectable temperature sensors (second value).
@@ -68,7 +68,7 @@
 #define USE_ALTERNATE
 
 enum kinds { nct6106, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793,
-	     nct6795 };
+	     nct6795, nct6796 };
 
 /* used to set data->name = nct6775_device_names[data->sio_kind] */
 static const char * const nct6775_device_names[] = {
@@ -80,6 +80,7 @@ static const char * const nct6775_device_names[] = {
 	"nct6792",
 	"nct6793",
 	"nct6795",
+	"nct6796",
 };
 
 static const char * const nct6775_sio_names[] __initconst = {
@@ -91,6 +92,7 @@ static const char * const nct6775_sio_names[] __initconst = {
 	"NCT6792D",
 	"NCT6793D",
 	"NCT6795D",
+	"NCT6796D",
 };
 
 static unsigned short force_id;
@@ -125,6 +127,7 @@ MODULE_PARM_DESC(fan_debounce, "Enable debouncing for fan RPM signal");
 #define SIO_NCT6792_ID		0xc910
 #define SIO_NCT6793_ID		0xd120
 #define SIO_NCT6795_ID		0xd350
+#define SIO_NCT6796_ID		0xd420
 #define SIO_ID_MASK		0xFFF0
 
 enum pwm_enable { off, manual, thermal_cruise, speed_cruise, sf3, sf4 };
@@ -201,7 +204,7 @@ superio_exit(int ioreg)
 #define NUM_REG_ALARM	7	/* Max number of alarm registers */
 #define NUM_REG_BEEP	5	/* Max number of beep registers */
 
-#define NUM_FAN		6
+#define NUM_FAN		7
 
 #define TEMP_SOURCE_VIRTUAL	0x1f
 
@@ -272,26 +275,26 @@ static const u8 NCT6775_PWM_MODE_MASK[] = { 0x01, 0x02, 0x01 };
 /* Advanced Fan control, some values are common for all fans */
 
 static const u16 NCT6775_REG_TARGET[] = {
-	0x101, 0x201, 0x301, 0x801, 0x901, 0xa01 };
+	0x101, 0x201, 0x301, 0x801, 0x901, 0xa01, 0xb01 };
 static const u16 NCT6775_REG_FAN_MODE[] = {
-	0x102, 0x202, 0x302, 0x802, 0x902, 0xa02 };
+	0x102, 0x202, 0x302, 0x802, 0x902, 0xa02, 0xb02 };
 static const u16 NCT6775_REG_FAN_STEP_DOWN_TIME[] = {
-	0x103, 0x203, 0x303, 0x803, 0x903, 0xa03 };
+	0x103, 0x203, 0x303, 0x803, 0x903, 0xa03, 0xb03 };
 static const u16 NCT6775_REG_FAN_STEP_UP_TIME[] = {
-	0x104, 0x204, 0x304, 0x804, 0x904, 0xa04 };
+	0x104, 0x204, 0x304, 0x804, 0x904, 0xa04, 0xb04 };
 static const u16 NCT6775_REG_FAN_STOP_OUTPUT[] = {
-	0x105, 0x205, 0x305, 0x805, 0x905, 0xa05 };
+	0x105, 0x205, 0x305, 0x805, 0x905, 0xa05, 0xb05 };
 static const u16 NCT6775_REG_FAN_START_OUTPUT[] = {
-	0x106, 0x206, 0x306, 0x806, 0x906, 0xa06 };
+	0x106, 0x206, 0x306, 0x806, 0x906, 0xa06, 0xb06 };
 static const u16 NCT6775_REG_FAN_MAX_OUTPUT[] = { 0x10a, 0x20a, 0x30a };
 static const u16 NCT6775_REG_FAN_STEP_OUTPUT[] = { 0x10b, 0x20b, 0x30b };
 
 static const u16 NCT6775_REG_FAN_STOP_TIME[] = {
-	0x107, 0x207, 0x307, 0x807, 0x907, 0xa07 };
+	0x107, 0x207, 0x307, 0x807, 0x907, 0xa07, 0xb07 };
 static const u16 NCT6775_REG_PWM[] = {
-	0x109, 0x209, 0x309, 0x809, 0x909, 0xa09 };
+	0x109, 0x209, 0x309, 0x809, 0x909, 0xa09, 0xb09 };
 static const u16 NCT6775_REG_PWM_READ[] = {
-	0x01, 0x03, 0x11, 0x13, 0x15, 0xa09 };
+	0x01, 0x03, 0x11, 0x13, 0x15, 0xa09, 0xb09 };
 
 static const u16 NCT6775_REG_FAN[] = { 0x630, 0x632, 0x634, 0x636, 0x638 };
 static const u16 NCT6775_REG_FAN_MIN[] = { 0x3b, 0x3c, 0x3d };
@@ -314,7 +317,7 @@ static const u16 NCT6775_REG_TEMP_SOURCE[ARRAY_SIZE(NCT6775_REG_TEMP)] = {
 	0x621, 0x622, 0x623, 0x624, 0x625, 0x626 };
 
 static const u16 NCT6775_REG_TEMP_SEL[] = {
-	0x100, 0x200, 0x300, 0x800, 0x900, 0xa00 };
+	0x100, 0x200, 0x300, 0x800, 0x900, 0xa00, 0xb00 };
 
 static const u16 NCT6775_REG_WEIGHT_TEMP_SEL[] = {
 	0x139, 0x239, 0x339, 0x839, 0x939, 0xa39 };
@@ -330,9 +333,9 @@ static const u16 NCT6775_REG_WEIGHT_TEMP_BASE[] = {
 static const u16 NCT6775_REG_TEMP_OFFSET[] = { 0x454, 0x455, 0x456 };
 
 static const u16 NCT6775_REG_AUTO_TEMP[] = {
-	0x121, 0x221, 0x321, 0x821, 0x921, 0xa21 };
+	0x121, 0x221, 0x321, 0x821, 0x921, 0xa21, 0xb21 };
 static const u16 NCT6775_REG_AUTO_PWM[] = {
-	0x127, 0x227, 0x327, 0x827, 0x927, 0xa27 };
+	0x127, 0x227, 0x327, 0x827, 0x927, 0xa27, 0xb27 };
 
 #define NCT6775_AUTO_TEMP(data, nr, p)	((data)->REG_AUTO_TEMP[nr] + (p))
 #define NCT6775_AUTO_PWM(data, nr, p)	((data)->REG_AUTO_PWM[nr] + (p))
@@ -340,9 +343,9 @@ static const u16 NCT6775_REG_AUTO_PWM[] = {
 static const u16 NCT6775_REG_CRITICAL_ENAB[] = { 0x134, 0x234, 0x334 };
 
 static const u16 NCT6775_REG_CRITICAL_TEMP[] = {
-	0x135, 0x235, 0x335, 0x835, 0x935, 0xa35 };
+	0x135, 0x235, 0x335, 0x835, 0x935, 0xa35, 0xb35 };
 static const u16 NCT6775_REG_CRITICAL_TEMP_TOLERANCE[] = {
-	0x138, 0x238, 0x338, 0x838, 0x938, 0xa38 };
+	0x138, 0x238, 0x338, 0x838, 0x938, 0xa38, 0xb38 };
 
 static const char *const nct6775_temp_label[] = {
 	"",
@@ -414,13 +417,15 @@ static const s8 NCT6776_BEEP_BITS[] = {
 	30, 31 };			/* intrusion0, intrusion1 */
 
 static const u16 NCT6776_REG_TOLERANCE_H[] = {
-	0x10c, 0x20c, 0x30c, 0x80c, 0x90c, 0xa0c };
+	0x10c, 0x20c, 0x30c, 0x80c, 0x90c, 0xa0c, 0xb0c };
 
 static const u8 NCT6776_REG_PWM_MODE[] = { 0x04, 0, 0, 0, 0, 0 };
 static const u8 NCT6776_PWM_MODE_MASK[] = { 0x01, 0, 0, 0, 0, 0 };
 
-static const u16 NCT6776_REG_FAN_MIN[] = { 0x63a, 0x63c, 0x63e, 0x640, 0x642 };
-static const u16 NCT6776_REG_FAN_PULSES[] = { 0x644, 0x645, 0x646, 0, 0 };
+static const u16 NCT6776_REG_FAN_MIN[] = {
+	0x63a, 0x63c, 0x63e, 0x640, 0x642, 0x64a, 0x64c };
+static const u16 NCT6776_REG_FAN_PULSES[] = {
+	0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0 };
 
 static const u16 NCT6776_REG_WEIGHT_DUTY_BASE[] = {
 	0x13e, 0x23e, 0x33e, 0x83e, 0x93e, 0xa3e };
@@ -495,15 +500,15 @@ static const s8 NCT6779_BEEP_BITS[] = {
 	30, 31 };			/* intrusion0, intrusion1 */
 
 static const u16 NCT6779_REG_FAN[] = {
-	0x4b0, 0x4b2, 0x4b4, 0x4b6, 0x4b8, 0x4ba };
+	0x4b0, 0x4b2, 0x4b4, 0x4b6, 0x4b8, 0x4ba, 0x660 };
 static const u16 NCT6779_REG_FAN_PULSES[] = {
-	0x644, 0x645, 0x646, 0x647, 0x648, 0x649 };
+	0x644, 0x645, 0x646, 0x647, 0x648, 0x649, 0 };
 
 static const u16 NCT6779_REG_CRITICAL_PWM_ENABLE[] = {
-	0x136, 0x236, 0x336, 0x836, 0x936, 0xa36 };
+	0x136, 0x236, 0x336, 0x836, 0x936, 0xa36, 0xb36 };
 #define NCT6779_CRITICAL_PWM_ENABLE_MASK	0x01
 static const u16 NCT6779_REG_CRITICAL_PWM[] = {
-	0x137, 0x237, 0x337, 0x837, 0x937, 0xa37 };
+	0x137, 0x237, 0x337, 0x837, 0x937, 0xa37, 0xb37 };
 
 static const u16 NCT6779_REG_TEMP[] = { 0x27, 0x150 };
 static const u16 NCT6779_REG_TEMP_MON[] = { 0x73, 0x75, 0x77, 0x79, 0x7b };
@@ -570,12 +575,12 @@ static const u16 NCT6779_REG_TEMP_CRIT[32] = {
 
 #define NCT6791_REG_HM_IO_SPACE_LOCK_ENABLE	0x28
 
-static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[6] = { 0, 0x239 };
-static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[6] = { 0, 0x23a };
-static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[6] = { 0, 0x23b };
-static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[6] = { 0, 0x23c };
-static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[6] = { 0, 0x23d };
-static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[6] = { 0, 0x23e };
+static const u16 NCT6791_REG_WEIGHT_TEMP_SEL[NUM_FAN] = { 0, 0x239 };
+static const u16 NCT6791_REG_WEIGHT_TEMP_STEP[NUM_FAN] = { 0, 0x23a };
+static const u16 NCT6791_REG_WEIGHT_TEMP_STEP_TOL[NUM_FAN] = { 0, 0x23b };
+static const u16 NCT6791_REG_WEIGHT_DUTY_STEP[NUM_FAN] = { 0, 0x23c };
+static const u16 NCT6791_REG_WEIGHT_TEMP_BASE[NUM_FAN] = { 0, 0x23d };
+static const u16 NCT6791_REG_WEIGHT_DUTY_BASE[NUM_FAN] = { 0, 0x23e };
 
 static const u16 NCT6791_REG_ALARM[NUM_REG_ALARM] = {
 	0x459, 0x45A, 0x45B, 0x568, 0x45D };
@@ -707,6 +712,43 @@ static const char *const nct6795_temp_label[] = {
 
 #define NCT6795_TEMP_MASK	0xbfffff7e
 
+static const char *const nct6796_temp_label[] = {
+	"",
+	"SYSTIN",
+	"CPUTIN",
+	"AUXTIN0",
+	"AUXTIN1",
+	"AUXTIN2",
+	"AUXTIN3",
+	"AUXTIN4",
+	"SMBUSMASTER 0",
+	"SMBUSMASTER 1",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"",
+	"PECI Agent 0",
+	"PECI Agent 1",
+	"PCH_CHIP_CPU_MAX_TEMP",
+	"PCH_CHIP_TEMP",
+	"PCH_CPU_TEMP",
+	"PCH_MCH_TEMP",
+	"PCH_DIM0_TEMP",
+	"PCH_DIM1_TEMP",
+	"PCH_DIM2_TEMP",
+	"PCH_DIM3_TEMP",
+	"BYTE_TEMP0",
+	"BYTE_TEMP1",
+	"PECI Agent 0 Calibration",
+	"PECI Agent 1 Calibration",
+	"",
+	"Virtual_TEMP"
+};
+
+#define NCT6796_TEMP_MASK	0xbfff03fe
+
 /* NCT6102D/NCT6106D specific data */
 
 #define NCT6106_REG_VBAT	0x318
@@ -1231,11 +1273,13 @@ static bool is_word_sized(struct nct6775_data *data, u16 reg)
 	case nct6792:
 	case nct6793:
 	case nct6795:
+	case nct6796:
 		return reg == 0x150 || reg == 0x153 || reg == 0x155 ||
 		  ((reg & 0xfff0) == 0x4b0 && (reg & 0x000f) < 0x0b) ||
 		  reg == 0x402 ||
 		  reg == 0x63a || reg == 0x63c || reg == 0x63e ||
-		  reg == 0x640 || reg == 0x642 ||
+		  reg == 0x640 || reg == 0x642 || reg == 0x64a ||
+		  reg == 0x64c || reg == 0x660 ||
 		  reg == 0x73 || reg == 0x75 || reg == 0x77 || reg == 0x79 ||
 		  reg == 0x7b || reg == 0x7d;
 	}
@@ -1469,7 +1513,7 @@ static void nct6775_update_pwm(struct device *dev)
 		duty_is_dc = data->REG_PWM_MODE[i] &&
 		  (nct6775_read_value(data, data->REG_PWM_MODE[i])
 		   & data->PWM_MODE_MASK[i]);
-		data->pwm_mode[i] = duty_is_dc;
+		data->pwm_mode[i] = !duty_is_dc;
 
 		fanmodecfg = nct6775_read_value(data, data->REG_FAN_MODE[i]);
 		for (j = 0; j < ARRAY_SIZE(data->REG_PWM); j++) {
@@ -1584,6 +1628,7 @@ static void nct6775_update_pwm_limits(struct device *dev)
 		case nct6792:
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			reg = nct6775_read_value(data,
 					data->REG_CRITICAL_PWM_ENABLE[i]);
 			if (reg & data->CRITICAL_PWM_ENABLE_MASK)
@@ -2092,6 +2137,8 @@ static umode_t nct6775_fan_is_visible(struct kobject *kobj,
 		return 0;
 	if (nr == 2 && data->BEEP_BITS[FAN_ALARM_BASE + fan] == -1)
 		return 0;
+	if (nr == 3 && !data->REG_FAN_PULSES[fan])
+		return 0;
 	if (nr == 4 && !(data->has_fan_min & BIT(fan)))
 		return 0;
 	if (nr == 5 && data->kind != nct6775)
@@ -2350,7 +2397,7 @@ show_pwm_mode(struct device *dev, struct device_attribute *attr, char *buf)
 	struct nct6775_data *data = nct6775_update_device(dev);
 	struct sensor_device_attribute *sattr = to_sensor_dev_attr(attr);
 
-	return sprintf(buf, "%d\n", !data->pwm_mode[sattr->index]);
+	return sprintf(buf, "%d\n", data->pwm_mode[sattr->index]);
 }
 
 static ssize_t
@@ -2371,9 +2418,9 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
 	if (val > 1)
 		return -EINVAL;
 
-	/* Setting DC mode is not supported for all chips/channels */
+	/* Setting DC mode (0) is not supported for all chips/channels */
 	if (data->REG_PWM_MODE[nr] == 0) {
-		if (val)
+		if (!val)
 			return -EINVAL;
 		return count;
 	}
@@ -2382,7 +2429,7 @@ store_pwm_mode(struct device *dev, struct device_attribute *attr,
 	data->pwm_mode[nr] = val;
 	reg = nct6775_read_value(data, data->REG_PWM_MODE[nr]);
 	reg &= ~data->PWM_MODE_MASK[nr];
-	if (val)
+	if (!val)
 		reg |= data->PWM_MODE_MASK[nr];
 	nct6775_write_value(data, data->REG_PWM_MODE[nr], reg);
 	mutex_unlock(&data->update_lock);
@@ -3004,6 +3051,7 @@ store_auto_pwm(struct device *dev, struct device_attribute *attr,
 		case nct6792:
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			nct6775_write_value(data, data->REG_CRITICAL_PWM[nr],
 					    val);
 			reg = nct6775_read_value(data,
@@ -3358,8 +3406,10 @@ static inline void nct6775_init_device(struct nct6775_data *data)
 static void
 nct6775_check_fan_inputs(struct nct6775_data *data)
 {
-	bool fan3pin, fan4pin, fan4min, fan5pin, fan6pin;
-	bool pwm3pin, pwm4pin, pwm5pin, pwm6pin;
+	bool fan3pin = false, fan4pin = false, fan4min = false;
+	bool fan5pin = false, fan6pin = false, fan7pin = false;
+	bool pwm3pin = false, pwm4pin = false, pwm5pin = false;
+	bool pwm6pin = false, pwm7pin = false;
 	int sioreg = data->sioreg;
 	int regval;
 
@@ -3376,12 +3426,6 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 
 		/* On NCT6775, fan4 shares pins with the fdc interface */
 		fan4pin = !(superio_inb(sioreg, 0x2A) & 0x80);
-		fan4min = false;
-		fan5pin = false;
-		fan6pin = false;
-		pwm4pin = false;
-		pwm5pin = false;
-		pwm6pin = false;
 	} else if (data->kind == nct6776) {
 		bool gpok = superio_inb(sioreg, 0x27) & 0x80;
 		const char *board_vendor, *board_name;
@@ -3421,25 +3465,15 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 			fan5pin = superio_inb(sioreg, 0x1C) & 0x02;
 
 		fan4min = fan4pin;
-		fan6pin = false;
 		pwm3pin = fan3pin;
-		pwm4pin = false;
-		pwm5pin = false;
-		pwm6pin = false;
 	} else if (data->kind == nct6106) {
 		regval = superio_inb(sioreg, 0x24);
 		fan3pin = !(regval & 0x80);
 		pwm3pin = regval & 0x08;
-
-		fan4pin = false;
-		fan4min = false;
-		fan5pin = false;
-		fan6pin = false;
-		pwm4pin = false;
-		pwm5pin = false;
-		pwm6pin = false;
-	} else { /* NCT6779D, NCT6791D, NCT6792D, NCT6793D, or NCT6795D */
-		int regval_1b, regval_2a, regval_eb;
+	} else {
+		/* NCT6779D, NCT6791D, NCT6792D, NCT6793D, NCT6795D, NCT6796D */
+		int regval_1b, regval_2a, regval_2f;
+		bool dsw_en;
 
 		regval = superio_inb(sioreg, 0x1c);
 
@@ -3460,31 +3494,60 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 			break;
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			regval_1b = superio_inb(sioreg, 0x1b);
 			regval_2a = superio_inb(sioreg, 0x2a);
+			regval_2f = superio_inb(sioreg, 0x2f);
+			dsw_en = regval_2f & BIT(3);
 
 			if (!pwm5pin)
 				pwm5pin = regval & BIT(7);
-			fan6pin = regval & BIT(1);
-			pwm6pin = regval & BIT(0);
+
 			if (!fan5pin)
 				fan5pin = regval_1b & BIT(5);
 
 			superio_select(sioreg, NCT6775_LD_12);
-			regval_eb = superio_inb(sioreg, 0xeb);
-			if (!fan5pin)
-				fan5pin = regval_eb & BIT(5);
-			if (!pwm5pin)
-				pwm5pin = (regval_eb & BIT(4)) &&
-					   !(regval_2a & BIT(0));
-			if (!fan6pin)
-				fan6pin = regval_eb & BIT(3);
-			if (!pwm6pin)
-				pwm6pin = regval_eb & BIT(2);
+			if (data->kind != nct6796) {
+				int regval_eb = superio_inb(sioreg, 0xeb);
+
+				if (!dsw_en) {
+					fan6pin = regval & BIT(1);
+					pwm6pin = regval & BIT(0);
+				}
+
+				if (!fan5pin)
+					fan5pin = regval_eb & BIT(5);
+				if (!pwm5pin)
+					pwm5pin = (regval_eb & BIT(4)) &&
+						!(regval_2a & BIT(0));
+				if (!fan6pin)
+					fan6pin = regval_eb & BIT(3);
+				if (!pwm6pin)
+					pwm6pin = regval_eb & BIT(2);
+			}
+
+			if (data->kind == nct6795 || data->kind == nct6796) {
+				int regval_ed = superio_inb(sioreg, 0xed);
+
+				if (!fan6pin)
+					fan6pin = (regval_2a & BIT(4)) &&
+					  (!dsw_en ||
+					   (dsw_en && (regval_ed & BIT(4))));
+				if (!pwm6pin)
+					pwm6pin = (regval_2a & BIT(3)) &&
+					  (regval_ed & BIT(2));
+			}
+
+			if (data->kind == nct6796) {
+				int regval_1d = superio_inb(sioreg, 0x1d);
+				int regval_2b = superio_inb(sioreg, 0x2b);
+
+				fan7pin = !(regval_2b & BIT(2));
+				pwm7pin = !(regval_1d & (BIT(2) | BIT(3)));
+			}
+
 			break;
 		default:	/* NCT6779D */
-			fan6pin = false;
-			pwm6pin = false;
 			break;
 		}
 
@@ -3493,11 +3556,11 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
 
 	/* fan 1 and 2 (0x03) are always present */
 	data->has_fan = 0x03 | (fan3pin << 2) | (fan4pin << 3) |
-		(fan5pin << 4) | (fan6pin << 5);
+		(fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
 	data->has_fan_min = 0x03 | (fan3pin << 2) | (fan4min << 3) |
-		(fan5pin << 4);
+		(fan5pin << 4) | (fan6pin << 5) | (fan7pin << 6);
 	data->has_pwm = 0x03 | (pwm3pin << 2) | (pwm4pin << 3) |
-		(pwm5pin << 4) | (pwm6pin << 5);
+		(pwm5pin << 4) | (pwm6pin << 5) | (pwm7pin << 6);
 }
 
 static void add_temp_sensors(struct nct6775_data *data, const u16 *regp,
@@ -3856,8 +3919,9 @@ static int nct6775_probe(struct platform_device *pdev)
 	case nct6792:
 	case nct6793:
 	case nct6795:
+	case nct6796:
 		data->in_num = 15;
-		data->pwm_num = 6;
+		data->pwm_num = (data->kind == nct6796) ? 7 : 6;
 		data->auto_pwm_num = 4;
 		data->has_fan_div = false;
 		data->temp_fixed_num = 6;
@@ -3891,6 +3955,10 @@ static int nct6775_probe(struct platform_device *pdev)
 			data->temp_label = nct6795_temp_label;
 			data->temp_mask = NCT6795_TEMP_MASK;
 			break;
+		case nct6796:
+			data->temp_label = nct6796_temp_label;
+			data->temp_mask = NCT6796_TEMP_MASK;
+			break;
 		}
 
 		data->REG_CONFIG = NCT6775_REG_CONFIG;
@@ -4159,6 +4227,7 @@ static int nct6775_probe(struct platform_device *pdev)
 	case nct6792:
 	case nct6793:
 	case nct6795:
+	case nct6796:
 		break;
 	}
 
@@ -4193,6 +4262,7 @@ static int nct6775_probe(struct platform_device *pdev)
 		case nct6792:
 		case nct6793:
 		case nct6795:
+		case nct6796:
 			tmp |= 0x7e;
 			break;
 		}
@@ -4291,7 +4361,8 @@ static int __maybe_unused nct6775_resume(struct device *dev)
 		superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable);
 
 	if (data->kind == nct6791 || data->kind == nct6792 ||
-	    data->kind == nct6793 || data->kind == nct6795)
+	    data->kind == nct6793 || data->kind == nct6795 ||
+	    data->kind == nct6796)
 		nct6791_enable_io_mapping(sioreg);
 
 	superio_exit(sioreg);
@@ -4391,6 +4462,9 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
 	case SIO_NCT6795_ID:
 		sio_data->kind = nct6795;
 		break;
+	case SIO_NCT6796_ID:
+		sio_data->kind = nct6796;
+		break;
 	default:
 		if (val != 0xffff)
 			pr_debug("unsupported chip ID: 0x%04x\n", val);
@@ -4417,7 +4491,8 @@ static int __init nct6775_find(int sioaddr, struct nct6775_sio_data *sio_data)
 	}
 
 	if (sio_data->kind == nct6791 || sio_data->kind == nct6792 ||
-	    sio_data->kind == nct6793 || sio_data->kind == nct6795)
+	    sio_data->kind == nct6793 || sio_data->kind == nct6795 ||
+	    sio_data->kind == nct6796)
 		nct6791_enable_io_mapping(sioaddr);
 
 	superio_exit(sioaddr);
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 6e4298e99222..e71aec69e76e 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -31,8 +31,8 @@ config SENSORS_ADM1275
 	default n
 	help
 	  If you say yes here you get hardware monitoring support for Analog
-	  Devices ADM1075, ADM1275, ADM1276, ADM1278, ADM1293, and ADM1294
-	  Hot-Swap Controller and Digital Power Monitors.
+	  Devices ADM1075, ADM1272, ADM1275, ADM1276, ADM1278, ADM1293,
+	  and ADM1294 Hot-Swap Controller and Digital Power Monitors.
 
 	  This driver can also be built as a module. If so, the module will
 	  be called adm1275.
diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index 00d6995af4c2..13600fa79e7f 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -3,6 +3,7 @@
  * and Digital Power Monitor
  *
  * Copyright (c) 2011 Ericsson AB.
+ * Copyright (c) 2018 Guenter Roeck
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -24,7 +25,7 @@
 #include <linux/bitops.h>
 #include "pmbus.h"
 
-enum chips { adm1075, adm1275, adm1276, adm1278, adm1293, adm1294 };
+enum chips { adm1075, adm1272, adm1275, adm1276, adm1278, adm1293, adm1294 };
 
 #define ADM1275_MFR_STATUS_IOUT_WARN2	BIT(0)
 #define ADM1293_MFR_STATUS_VAUX_UV_WARN	BIT(5)
@@ -41,6 +42,8 @@ enum chips { adm1075, adm1275, adm1276, adm1278, adm1293, adm1294 };
 #define ADM1075_IRANGE_25		BIT(3)
 #define ADM1075_IRANGE_MASK		(BIT(3) | BIT(4))
 
+#define ADM1272_IRANGE			BIT(0)
+
 #define ADM1278_TEMP1_EN		BIT(3)
 #define ADM1278_VIN_EN			BIT(2)
 #define ADM1278_VOUT_EN			BIT(1)
@@ -105,6 +108,19 @@ static const struct coefficients adm1075_coefficients[] = {
 	[4] = { 4279, 0, -1 },		/* power, irange50 */
 };
 
+static const struct coefficients adm1272_coefficients[] = {
+	[0] = { 6770, 0, -2 },		/* voltage, vrange 60V */
+	[1] = { 4062, 0, -2 },		/* voltage, vrange 100V */
+	[2] = { 1326, 20480, -1 },	/* current, vsense range 15mV */
+	[3] = { 663, 20480, -1 },	/* current, vsense range 30mV */
+	[4] = { 3512, 0, -2 },		/* power, vrange 60V, irange 15mV */
+	[5] = { 21071, 0, -3 },		/* power, vrange 100V, irange 15mV */
+	[6] = { 17561, 0, -3 },		/* power, vrange 60V, irange 30mV */
+	[7] = { 10535, 0, -3 },		/* power, vrange 100V, irange 30mV */
+	[8] = { 42, 31871, -1 },	/* temperature */
+
+};
+
 static const struct coefficients adm1275_coefficients[] = {
 	[0] = { 19199, 0, -2 },		/* voltage, vrange set */
 	[1] = { 6720, 0, -1 },		/* voltage, vrange not set */
@@ -154,7 +170,7 @@ static int adm1275_read_word_data(struct i2c_client *client, int page, int reg)
 	const struct adm1275_data *data = to_adm1275_data(info);
 	int ret = 0;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {
@@ -240,7 +256,7 @@ static int adm1275_write_word_data(struct i2c_client *client, int page, int reg,
 	const struct adm1275_data *data = to_adm1275_data(info);
 	int ret;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {
@@ -335,6 +351,7 @@ static int adm1275_read_byte_data(struct i2c_client *client, int page, int reg)
 
 static const struct i2c_device_id adm1275_id[] = {
 	{ "adm1075", adm1075 },
+	{ "adm1272", adm1272 },
 	{ "adm1275", adm1275 },
 	{ "adm1276", adm1276 },
 	{ "adm1278", adm1278 },
@@ -451,6 +468,54 @@ static int adm1275_probe(struct i2c_client *client,
 			info->func[0] |=
 			  PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
 		break;
+	case adm1272:
+		data->have_vout = true;
+		data->have_pin_max = true;
+		data->have_temp_max = true;
+
+		coefficients = adm1272_coefficients;
+		vindex = (config & ADM1275_VRANGE) ? 1 : 0;
+		cindex = (config & ADM1272_IRANGE) ? 3 : 2;
+		/* pindex depends on the combination of the above */
+		switch (config & (ADM1275_VRANGE | ADM1272_IRANGE)) {
+		case 0:
+		default:
+			pindex = 4;
+			break;
+		case ADM1275_VRANGE:
+			pindex = 5;
+			break;
+		case ADM1272_IRANGE:
+			pindex = 6;
+			break;
+		case ADM1275_VRANGE | ADM1272_IRANGE:
+			pindex = 7;
+			break;
+		}
+		tindex = 8;
+
+		info->func[0] |= PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT |
+			PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
+
+		/* Enable VOUT if not enabled (it is disabled by default) */
+		if (!(config & ADM1278_VOUT_EN)) {
+			config |= ADM1278_VOUT_EN;
+			ret = i2c_smbus_write_byte_data(client,
+							ADM1275_PMON_CONFIG,
+							config);
+			if (ret < 0) {
+				dev_err(&client->dev,
+					"Failed to enable VOUT monitoring\n");
+				return -ENODEV;
+			}
+		}
+
+		if (config & ADM1278_TEMP1_EN)
+			info->func[0] |=
+				PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
+		if (config & ADM1278_VIN_EN)
+			info->func[0] |= PMBUS_HAVE_VIN;
+		break;
 	case adm1275:
 		if (device_config & ADM1275_IOUT_WARN2_SELECT)
 			data->have_oc_fault = true;
diff --git a/drivers/hwmon/pmbus/max8688.c b/drivers/hwmon/pmbus/max8688.c
index dd4883a19045..e951f9b87abb 100644
--- a/drivers/hwmon/pmbus/max8688.c
+++ b/drivers/hwmon/pmbus/max8688.c
@@ -45,7 +45,7 @@ static int max8688_read_word_data(struct i2c_client *client, int page, int reg)
 {
 	int ret;
 
-	if (page)
+	if (page > 0)
 		return -ENXIO;
 
 	switch (reg) {
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index b74dbeca2e8d..70cecb06f93c 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/debugfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -27,6 +28,8 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/pmbus.h>
+#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include "pmbus.h"
 
 enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
@@ -35,8 +38,19 @@ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
 #define UCD9000_NUM_PAGES		0xd6
 #define UCD9000_FAN_CONFIG_INDEX	0xe7
 #define UCD9000_FAN_CONFIG		0xe8
+#define UCD9000_MFR_STATUS		0xf3
+#define UCD9000_GPIO_SELECT		0xfa
+#define UCD9000_GPIO_CONFIG		0xfb
 #define UCD9000_DEVICE_ID		0xfd
 
+/* GPIO CONFIG bits */
+#define UCD9000_GPIO_CONFIG_ENABLE	BIT(0)
+#define UCD9000_GPIO_CONFIG_OUT_ENABLE	BIT(1)
+#define UCD9000_GPIO_CONFIG_OUT_VALUE	BIT(2)
+#define UCD9000_GPIO_CONFIG_STATUS	BIT(3)
+#define UCD9000_GPIO_INPUT		0
+#define UCD9000_GPIO_OUTPUT		1
+
 #define UCD9000_MON_TYPE(x)	(((x) >> 5) & 0x07)
 #define UCD9000_MON_PAGE(x)	((x) & 0x0f)
 
@@ -47,12 +61,29 @@ enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
 
 #define UCD9000_NUM_FAN		4
 
+#define UCD9000_GPIO_NAME_LEN	16
+#define UCD9090_NUM_GPIOS	23
+#define UCD901XX_NUM_GPIOS	26
+#define UCD90910_NUM_GPIOS	26
+
+#define UCD9000_DEBUGFS_NAME_LEN	24
+#define UCD9000_GPI_COUNT		8
+
 struct ucd9000_data {
 	u8 fan_data[UCD9000_NUM_FAN][I2C_SMBUS_BLOCK_MAX];
 	struct pmbus_driver_info info;
+#ifdef CONFIG_GPIOLIB
+	struct gpio_chip gpio;
+#endif
+	struct dentry *debugfs;
 };
 #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info)
 
+struct ucd9000_debugfs_entry {
+	struct i2c_client *client;
+	u8 index;
+};
+
 static int ucd9000_get_fan_config(struct i2c_client *client, int fan)
 {
 	int fan_config = 0;
@@ -149,6 +180,312 @@ static const struct of_device_id ucd9000_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ucd9000_of_match);
 
+#ifdef CONFIG_GPIOLIB
+static int ucd9000_gpio_read_config(struct i2c_client *client,
+				    unsigned int offset)
+{
+	int ret;
+
+	/* No page set required */
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_SELECT, offset);
+	if (ret < 0)
+		return ret;
+
+	return i2c_smbus_read_byte_data(client, UCD9000_GPIO_CONFIG);
+}
+
+static int ucd9000_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+	struct i2c_client *client  = gpiochip_get_data(gc);
+	int ret;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0)
+		return ret;
+
+	return !!(ret & UCD9000_GPIO_CONFIG_STATUS);
+}
+
+static void ucd9000_gpio_set(struct gpio_chip *gc, unsigned int offset,
+			     int value)
+{
+	struct i2c_client *client = gpiochip_get_data(gc);
+	int ret;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0) {
+		dev_dbg(&client->dev, "failed to read GPIO %d config: %d\n",
+			offset, ret);
+		return;
+	}
+
+	if (value) {
+		if (ret & UCD9000_GPIO_CONFIG_STATUS)
+			return;
+
+		ret |= UCD9000_GPIO_CONFIG_STATUS;
+	} else {
+		if (!(ret & UCD9000_GPIO_CONFIG_STATUS))
+			return;
+
+		ret &= ~UCD9000_GPIO_CONFIG_STATUS;
+	}
+
+	ret |= UCD9000_GPIO_CONFIG_ENABLE;
+
+	/* Page set not required */
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, ret);
+	if (ret < 0) {
+		dev_dbg(&client->dev, "Failed to write GPIO %d config: %d\n",
+			offset, ret);
+		return;
+	}
+
+	ret &= ~UCD9000_GPIO_CONFIG_ENABLE;
+
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, ret);
+	if (ret < 0)
+		dev_dbg(&client->dev, "Failed to write GPIO %d config: %d\n",
+			offset, ret);
+}
+
+static int ucd9000_gpio_get_direction(struct gpio_chip *gc,
+				      unsigned int offset)
+{
+	struct i2c_client *client = gpiochip_get_data(gc);
+	int ret;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0)
+		return ret;
+
+	return !(ret & UCD9000_GPIO_CONFIG_OUT_ENABLE);
+}
+
+static int ucd9000_gpio_set_direction(struct gpio_chip *gc,
+				      unsigned int offset, bool direction_out,
+				      int requested_out)
+{
+	struct i2c_client *client = gpiochip_get_data(gc);
+	int ret, config, out_val;
+
+	ret = ucd9000_gpio_read_config(client, offset);
+	if (ret < 0)
+		return ret;
+
+	if (direction_out) {
+		out_val = requested_out ? UCD9000_GPIO_CONFIG_OUT_VALUE : 0;
+
+		if (ret & UCD9000_GPIO_CONFIG_OUT_ENABLE) {
+			if ((ret & UCD9000_GPIO_CONFIG_OUT_VALUE) == out_val)
+				return 0;
+		} else {
+			ret |= UCD9000_GPIO_CONFIG_OUT_ENABLE;
+		}
+
+		if (out_val)
+			ret |= UCD9000_GPIO_CONFIG_OUT_VALUE;
+		else
+			ret &= ~UCD9000_GPIO_CONFIG_OUT_VALUE;
+
+	} else {
+		if (!(ret & UCD9000_GPIO_CONFIG_OUT_ENABLE))
+			return 0;
+
+		ret &= ~UCD9000_GPIO_CONFIG_OUT_ENABLE;
+	}
+
+	ret |= UCD9000_GPIO_CONFIG_ENABLE;
+	config = ret;
+
+	/* Page set not required */
+	ret = i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, config);
+	if (ret < 0)
+		return ret;
+
+	config &= ~UCD9000_GPIO_CONFIG_ENABLE;
+
+	return i2c_smbus_write_byte_data(client, UCD9000_GPIO_CONFIG, config);
+}
+
+static int ucd9000_gpio_direction_input(struct gpio_chip *gc,
+					unsigned int offset)
+{
+	return ucd9000_gpio_set_direction(gc, offset, UCD9000_GPIO_INPUT, 0);
+}
+
+static int ucd9000_gpio_direction_output(struct gpio_chip *gc,
+					 unsigned int offset, int val)
+{
+	return ucd9000_gpio_set_direction(gc, offset, UCD9000_GPIO_OUTPUT,
+					  val);
+}
+
+static void ucd9000_probe_gpio(struct i2c_client *client,
+			       const struct i2c_device_id *mid,
+			       struct ucd9000_data *data)
+{
+	int rc;
+
+	switch (mid->driver_data) {
+	case ucd9090:
+		data->gpio.ngpio = UCD9090_NUM_GPIOS;
+		break;
+	case ucd90120:
+	case ucd90124:
+	case ucd90160:
+		data->gpio.ngpio = UCD901XX_NUM_GPIOS;
+		break;
+	case ucd90910:
+		data->gpio.ngpio = UCD90910_NUM_GPIOS;
+		break;
+	default:
+		return; /* GPIO support is optional. */
+	}
+
+	/*
+	 * Pinmux support has not been added to the new gpio_chip.
+	 * This support should be added when possible given the mux
+	 * behavior of these IO devices.
+	 */
+	data->gpio.label = client->name;
+	data->gpio.get_direction = ucd9000_gpio_get_direction;
+	data->gpio.direction_input = ucd9000_gpio_direction_input;
+	data->gpio.direction_output = ucd9000_gpio_direction_output;
+	data->gpio.get = ucd9000_gpio_get;
+	data->gpio.set = ucd9000_gpio_set;
+	data->gpio.can_sleep = true;
+	data->gpio.base = -1;
+	data->gpio.parent = &client->dev;
+
+	rc = devm_gpiochip_add_data(&client->dev, &data->gpio, client);
+	if (rc)
+		dev_warn(&client->dev, "Could not add gpiochip: %d\n", rc);
+}
+#else
+static void ucd9000_probe_gpio(struct i2c_client *client,
+			       const struct i2c_device_id *mid,
+			       struct ucd9000_data *data)
+{
+}
+#endif /* CONFIG_GPIOLIB */
+
+#ifdef CONFIG_DEBUG_FS
+static int ucd9000_get_mfr_status(struct i2c_client *client, u8 *buffer)
+{
+	int ret = pmbus_set_page(client, 0);
+
+	if (ret < 0)
+		return ret;
+
+	return i2c_smbus_read_block_data(client, UCD9000_MFR_STATUS, buffer);
+}
+
+static int ucd9000_debugfs_show_mfr_status_bit(void *data, u64 *val)
+{
+	struct ucd9000_debugfs_entry *entry = data;
+	struct i2c_client *client = entry->client;
+	u8 buffer[I2C_SMBUS_BLOCK_MAX];
+	int ret;
+
+	ret = ucd9000_get_mfr_status(client, buffer);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Attribute only created for devices with gpi fault bits at bits
+	 * 16-23, which is the second byte of the response.
+	 */
+	*val = !!(buffer[1] & BIT(entry->index));
+
+	return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(ucd9000_debugfs_mfr_status_bit,
+			 ucd9000_debugfs_show_mfr_status_bit, NULL, "%1lld\n");
+
+static ssize_t ucd9000_debugfs_read_mfr_status(struct file *file,
+					       char __user *buf, size_t count,
+					       loff_t *ppos)
+{
+	struct i2c_client *client = file->private_data;
+	u8 buffer[I2C_SMBUS_BLOCK_MAX];
+	char str[(I2C_SMBUS_BLOCK_MAX * 2) + 2];
+	char *res;
+	int rc;
+
+	rc = ucd9000_get_mfr_status(client, buffer);
+	if (rc < 0)
+		return rc;
+
+	res = bin2hex(str, buffer, min(rc, I2C_SMBUS_BLOCK_MAX));
+	*res++ = '\n';
+	*res = 0;
+
+	return simple_read_from_buffer(buf, count, ppos, str, res - str);
+}
+
+static const struct file_operations ucd9000_debugfs_show_mfr_status_fops = {
+	.llseek = noop_llseek,
+	.read = ucd9000_debugfs_read_mfr_status,
+	.open = simple_open,
+};
+
+static int ucd9000_init_debugfs(struct i2c_client *client,
+				const struct i2c_device_id *mid,
+				struct ucd9000_data *data)
+{
+	struct dentry *debugfs;
+	struct ucd9000_debugfs_entry *entries;
+	int i;
+	char name[UCD9000_DEBUGFS_NAME_LEN];
+
+	debugfs = pmbus_get_debugfs_dir(client);
+	if (!debugfs)
+		return -ENOENT;
+
+	data->debugfs = debugfs_create_dir(client->name, debugfs);
+	if (!data->debugfs)
+		return -ENOENT;
+
+	/*
+	 * Of the chips this driver supports, only the UCD9090, UCD90160,
+	 * and UCD90910 report GPI faults in their MFR_STATUS register, so only
+	 * create the GPI fault debugfs attributes for those chips.
+	 */
+	if (mid->driver_data == ucd9090 || mid->driver_data == ucd90160 ||
+	    mid->driver_data == ucd90910) {
+		entries = devm_kzalloc(&client->dev,
+				       sizeof(*entries) * UCD9000_GPI_COUNT,
+				       GFP_KERNEL);
+		if (!entries)
+			return -ENOMEM;
+
+		for (i = 0; i < UCD9000_GPI_COUNT; i++) {
+			entries[i].client = client;
+			entries[i].index = i;
+			scnprintf(name, UCD9000_DEBUGFS_NAME_LEN,
+				  "gpi%d_alarm", i + 1);
+			debugfs_create_file(name, 0444, data->debugfs,
+					    &entries[i],
+					    &ucd9000_debugfs_mfr_status_bit);
+		}
+	}
+
+	scnprintf(name, UCD9000_DEBUGFS_NAME_LEN, "mfr_status");
+	debugfs_create_file(name, 0444, data->debugfs, client,
+			    &ucd9000_debugfs_show_mfr_status_fops);
+
+	return 0;
+}
+#else
+static int ucd9000_init_debugfs(struct i2c_client *client,
+				const struct i2c_device_id *mid,
+				struct ucd9000_data *data)
+{
+	return 0;
+}
+#endif /* CONFIG_DEBUG_FS */
+
 static int ucd9000_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -263,7 +600,18 @@ static int ucd9000_probe(struct i2c_client *client,
 		  | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34;
 	}
 
-	return pmbus_do_probe(client, mid, info);
+	ucd9000_probe_gpio(client, mid, data);
+
+	ret = pmbus_do_probe(client, mid, info);
+	if (ret)
+		return ret;
+
+	ret = ucd9000_init_debugfs(client, mid, data);
+	if (ret)
+		dev_warn(&client->dev, "Failed to register debugfs: %d\n",
+			 ret);
+
+	return 0;
 }
 
 /* This is the driver that will be inserted */
diff --git a/drivers/hwmon/sht21.c b/drivers/hwmon/sht21.c
index 190e7b39ce32..2c7ba70921f5 100644
--- a/drivers/hwmon/sht21.c
+++ b/drivers/hwmon/sht21.c
@@ -16,8 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA
  *
- * Data sheet available (5/2010) at
- * http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT21.pdf
+ * Data sheet available at http://www.sensirion.com/file/datasheet_sht21
  */
 
 #include <linux/module.h>
diff --git a/drivers/hwmon/via-cputemp.c b/drivers/hwmon/via-cputemp.c
index 07a0cb0a1f28..0e81f287d305 100644
--- a/drivers/hwmon/via-cputemp.c
+++ b/drivers/hwmon/via-cputemp.c
@@ -136,20 +136,24 @@ static int via_cputemp_probe(struct platform_device *pdev)
 	data->id = pdev->id;
 	data->name = "via_cputemp";
 
-	switch (c->x86_model) {
-	case 0xA:
-		/* C7 A */
-	case 0xD:
-		/* C7 D */
-		data->msr_temp = 0x1169;
-		data->msr_vid = 0x198;
-		break;
-	case 0xF:
-		/* Nano */
+	if (c->x86 == 7) {
 		data->msr_temp = 0x1423;
-		break;
-	default:
-		return -ENODEV;
+	} else {
+		switch (c->x86_model) {
+		case 0xA:
+			/* C7 A */
+		case 0xD:
+			/* C7 D */
+			data->msr_temp = 0x1169;
+			data->msr_vid = 0x198;
+			break;
+		case 0xF:
+			/* Nano */
+			data->msr_temp = 0x1423;
+			break;
+		default:
+			return -ENODEV;
+		}
 	}
 
 	/* test if we can access the TEMPERATURE MSR */
@@ -283,6 +287,7 @@ static const struct x86_cpu_id __initconst cputemp_ids[] = {
 	{ X86_VENDOR_CENTAUR, 6, 0xa, }, /* C7 A */
 	{ X86_VENDOR_CENTAUR, 6, 0xd, }, /* C7 D */
 	{ X86_VENDOR_CENTAUR, 6, 0xf, }, /* Nano */
+	{ X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, },
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, cputemp_ids);
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 83819d0cbf90..2a99f0f14795 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -81,11 +81,12 @@
  */
 #define AMD_IOMMU_PGSIZES	((~0xFFFUL) & ~(2ULL << 38))
 
-static DEFINE_RWLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
+static DEFINE_SPINLOCK(pd_bitmap_lock);
+static DEFINE_SPINLOCK(iommu_table_lock);
 
 /* List of all available dev_data structures */
-static LIST_HEAD(dev_data_list);
-static DEFINE_SPINLOCK(dev_data_list_lock);
+static LLIST_HEAD(dev_data_list);
 
 LIST_HEAD(ioapic_map);
 LIST_HEAD(hpet_map);
@@ -204,40 +205,33 @@ static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
-	unsigned long flags;
 
 	dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL);
 	if (!dev_data)
 		return NULL;
 
 	dev_data->devid = devid;
-
-	spin_lock_irqsave(&dev_data_list_lock, flags);
-	list_add_tail(&dev_data->dev_data_list, &dev_data_list);
-	spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
 	ratelimit_default_init(&dev_data->rs);
 
+	llist_add(&dev_data->dev_data_list, &dev_data_list);
 	return dev_data;
 }
 
 static struct iommu_dev_data *search_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
-	unsigned long flags;
+	struct llist_node *node;
 
-	spin_lock_irqsave(&dev_data_list_lock, flags);
-	list_for_each_entry(dev_data, &dev_data_list, dev_data_list) {
+	if (llist_empty(&dev_data_list))
+		return NULL;
+
+	node = dev_data_list.first;
+	llist_for_each_entry(dev_data, node, dev_data_list) {
 		if (dev_data->devid == devid)
-			goto out_unlock;
+			return dev_data;
 	}
 
-	dev_data = NULL;
-
-out_unlock:
-	spin_unlock_irqrestore(&dev_data_list_lock, flags);
-
-	return dev_data;
+	return NULL;
 }
 
 static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
@@ -311,6 +305,8 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
 
 	if (dev_data == NULL) {
 		dev_data = alloc_dev_data(devid);
+		if (!dev_data)
+			return NULL;
 
 		if (translation_pre_enabled(iommu))
 			dev_data->defer_attach = true;
@@ -548,6 +544,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
 
 static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
 {
+	struct device *dev = iommu->iommu.dev;
 	int type, devid, domid, flags;
 	volatile u32 *event = __evt;
 	int count = 0;
@@ -574,53 +571,53 @@ retry:
 		amd_iommu_report_page_fault(devid, domid, address, flags);
 		return;
 	} else {
-		printk(KERN_ERR "AMD-Vi: Event logged [");
+		dev_err(dev, "AMD-Vi: Event logged [");
 	}
 
 	switch (type) {
 	case EVENT_TYPE_ILL_DEV:
-		printk("ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
-		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address, flags);
+		dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x "
+			"address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address, flags);
 		dump_dte_entry(devid);
 		break;
 	case EVENT_TYPE_DEV_TAB_ERR:
-		printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
-		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address, flags);
+		dev_err(dev, "DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+			"address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address, flags);
 		break;
 	case EVENT_TYPE_PAGE_TAB_ERR:
-		printk("PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
-		       "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       domid, address, flags);
+		dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
+			"domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			domid, address, flags);
 		break;
 	case EVENT_TYPE_ILL_CMD:
-		printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
+		dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address);
 		dump_command(address);
 		break;
 	case EVENT_TYPE_CMD_HARD_ERR:
-		printk("COMMAND_HARDWARE_ERROR address=0x%016llx "
-		       "flags=0x%04x]\n", address, flags);
+		dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx "
+			"flags=0x%04x]\n", address, flags);
 		break;
 	case EVENT_TYPE_IOTLB_INV_TO:
-		printk("IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
-		       "address=0x%016llx]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address);
+		dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x "
+			"address=0x%016llx]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address);
 		break;
 	case EVENT_TYPE_INV_DEV_REQ:
-		printk("INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
-		       "address=0x%016llx flags=0x%04x]\n",
-		       PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
-		       address, flags);
+		dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x "
+			"address=0x%016llx flags=0x%04x]\n",
+			PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+			address, flags);
 		break;
 	default:
-		printk(KERN_ERR "UNKNOWN type=0x%02x event[0]=0x%08x "
-		       "event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
-		       type, event[0], event[1], event[2], event[3]);
+		dev_err(dev, KERN_ERR "UNKNOWN event[0]=0x%08x event[1]=0x%08x "
+			"event[2]=0x%08x event[3]=0x%08x\n",
+			event[0], event[1], event[2], event[3]);
 	}
 
 	memset(__evt, 0, 4 * sizeof(u32));
@@ -1057,9 +1054,9 @@ static int iommu_queue_command_sync(struct amd_iommu *iommu,
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&iommu->lock, flags);
+	raw_spin_lock_irqsave(&iommu->lock, flags);
 	ret = __iommu_queue_command_sync(iommu, cmd, sync);
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return ret;
 }
@@ -1085,7 +1082,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
 	build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
 
-	spin_lock_irqsave(&iommu->lock, flags);
+	raw_spin_lock_irqsave(&iommu->lock, flags);
 
 	iommu->cmd_sem = 0;
 
@@ -1096,7 +1093,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	ret = wait_on_sem(&iommu->cmd_sem);
 
 out_unlock:
-	spin_unlock_irqrestore(&iommu->lock, flags);
+	raw_spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return ret;
 }
@@ -1606,29 +1603,26 @@ static void del_domain_from_list(struct protection_domain *domain)
 
 static u16 domain_id_alloc(void)
 {
-	unsigned long flags;
 	int id;
 
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock(&pd_bitmap_lock);
 	id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
 	BUG_ON(id == 0);
 	if (id > 0 && id < MAX_DOMAIN_ID)
 		__set_bit(id, amd_iommu_pd_alloc_bitmap);
 	else
 		id = 0;
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock(&pd_bitmap_lock);
 
 	return id;
 }
 
 static void domain_id_free(int id)
 {
-	unsigned long flags;
-
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock(&pd_bitmap_lock);
 	if (id > 0 && id < MAX_DOMAIN_ID)
 		__clear_bit(id, amd_iommu_pd_alloc_bitmap);
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock(&pd_bitmap_lock);
 }
 
 #define DEFINE_FREE_PT_FN(LVL, FN)				\
@@ -2104,9 +2098,9 @@ static int attach_device(struct device *dev,
 	}
 
 skip_ats_check:
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	ret = __attach_device(dev_data, domain);
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
 	/*
 	 * We might boot into a crash-kernel here. The crashed kernel
@@ -2156,9 +2150,9 @@ static void detach_device(struct device *dev)
 	domain   = dev_data->domain;
 
 	/* lock device table */
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
 	__detach_device(dev_data);
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
 	if (!dev_is_pci(dev))
 		return;
@@ -2795,7 +2789,7 @@ static void cleanup_domain(struct protection_domain *domain)
 	struct iommu_dev_data *entry;
 	unsigned long flags;
 
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
 
 	while (!list_empty(&domain->dev_list)) {
 		entry = list_first_entry(&domain->dev_list,
@@ -2803,7 +2797,7 @@ static void cleanup_domain(struct protection_domain *domain)
 		__detach_device(entry);
 	}
 
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 }
 
 static void protection_domain_free(struct protection_domain *domain)
@@ -3025,15 +3019,12 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
 	size_t unmap_size;
 
 	if (domain->mode == PAGE_MODE_NONE)
-		return -EINVAL;
+		return 0;
 
 	mutex_lock(&domain->api_lock);
 	unmap_size = iommu_unmap_page(domain, iova, page_size);
 	mutex_unlock(&domain->api_lock);
 
-	domain_flush_tlb_pde(domain);
-	domain_flush_complete(domain);
-
 	return unmap_size;
 }
 
@@ -3151,6 +3142,19 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
 	return dev_data->defer_attach;
 }
 
+static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	struct protection_domain *dom = to_pdomain(domain);
+
+	domain_flush_tlb_pde(dom);
+	domain_flush_complete(dom);
+}
+
+static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
+				      unsigned long iova, size_t size)
+{
+}
+
 const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
 	.domain_alloc = amd_iommu_domain_alloc,
@@ -3169,6 +3173,9 @@ const struct iommu_ops amd_iommu_ops = {
 	.apply_resv_region = amd_iommu_apply_resv_region,
 	.is_attach_deferred = amd_iommu_is_attach_deferred,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
+	.flush_iotlb_all = amd_iommu_flush_iotlb_all,
+	.iotlb_range_add = amd_iommu_iotlb_range_add,
+	.iotlb_sync = amd_iommu_flush_iotlb_all,
 };
 
 /*****************************************************************************
@@ -3570,14 +3577,62 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 	amd_iommu_dev_table[devid].data[2] = dte;
 }
 
-static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
+static struct irq_remap_table *get_irq_table(u16 devid)
+{
+	struct irq_remap_table *table;
+
+	if (WARN_ONCE(!amd_iommu_rlookup_table[devid],
+		      "%s: no iommu for devid %x\n", __func__, devid))
+		return NULL;
+
+	table = irq_lookup_table[devid];
+	if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid))
+		return NULL;
+
+	return table;
+}
+
+static struct irq_remap_table *__alloc_irq_table(void)
+{
+	struct irq_remap_table *table;
+
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
+	if (!table->table) {
+		kfree(table);
+		return NULL;
+	}
+	raw_spin_lock_init(&table->lock);
+
+	if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+		memset(table->table, 0,
+		       MAX_IRQS_PER_TABLE * sizeof(u32));
+	else
+		memset(table->table, 0,
+		       (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
+	return table;
+}
+
+static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
+				  struct irq_remap_table *table)
+{
+	irq_lookup_table[devid] = table;
+	set_dte_irq_entry(devid, table);
+	iommu_flush_dte(iommu, devid);
+}
+
+static struct irq_remap_table *alloc_irq_table(u16 devid)
 {
 	struct irq_remap_table *table = NULL;
+	struct irq_remap_table *new_table = NULL;
 	struct amd_iommu *iommu;
 	unsigned long flags;
 	u16 alias;
 
-	write_lock_irqsave(&amd_iommu_devtable_lock, flags);
+	spin_lock_irqsave(&iommu_table_lock, flags);
 
 	iommu = amd_iommu_rlookup_table[devid];
 	if (!iommu)
@@ -3590,60 +3645,45 @@ static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic)
 	alias = amd_iommu_alias_table[devid];
 	table = irq_lookup_table[alias];
 	if (table) {
-		irq_lookup_table[devid] = table;
-		set_dte_irq_entry(devid, table);
-		iommu_flush_dte(iommu, devid);
-		goto out;
+		set_remap_table_entry(iommu, devid, table);
+		goto out_wait;
 	}
+	spin_unlock_irqrestore(&iommu_table_lock, flags);
 
 	/* Nothing there yet, allocate new irq remapping table */
-	table = kzalloc(sizeof(*table), GFP_ATOMIC);
-	if (!table)
-		goto out_unlock;
-
-	/* Initialize table spin-lock */
-	spin_lock_init(&table->lock);
+	new_table = __alloc_irq_table();
+	if (!new_table)
+		return NULL;
 
-	if (ioapic)
-		/* Keep the first 32 indexes free for IOAPIC interrupts */
-		table->min_index = 32;
+	spin_lock_irqsave(&iommu_table_lock, flags);
 
-	table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC);
-	if (!table->table) {
-		kfree(table);
-		table = NULL;
+	table = irq_lookup_table[devid];
+	if (table)
 		goto out_unlock;
-	}
 
-	if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
-		memset(table->table, 0,
-		       MAX_IRQS_PER_TABLE * sizeof(u32));
-	else
-		memset(table->table, 0,
-		       (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
-
-	if (ioapic) {
-		int i;
-
-		for (i = 0; i < 32; ++i)
-			iommu->irte_ops->set_allocated(table, i);
+	table = irq_lookup_table[alias];
+	if (table) {
+		set_remap_table_entry(iommu, devid, table);
+		goto out_wait;
 	}
 
-	irq_lookup_table[devid] = table;
-	set_dte_irq_entry(devid, table);
-	iommu_flush_dte(iommu, devid);
-	if (devid != alias) {
-		irq_lookup_table[alias] = table;
-		set_dte_irq_entry(alias, table);
-		iommu_flush_dte(iommu, alias);
-	}
+	table = new_table;
+	new_table = NULL;
 
-out:
+	set_remap_table_entry(iommu, devid, table);
+	if (devid != alias)
+		set_remap_table_entry(iommu, alias, table);
+
+out_wait:
 	iommu_completion_wait(iommu);
 
 out_unlock:
-	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+	spin_unlock_irqrestore(&iommu_table_lock, flags);
 
+	if (new_table) {
+		kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+		kfree(new_table);
+	}
 	return table;
 }
 
@@ -3657,14 +3697,14 @@ static int alloc_irq_index(u16 devid, int count, bool align)
 	if (!iommu)
 		return -ENODEV;
 
-	table = get_irq_table(devid, false);
+	table = alloc_irq_table(devid);
 	if (!table)
 		return -ENODEV;
 
 	if (align)
 		alignment = roundup_pow_of_two(count);
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 
 	/* Scan table for free entries */
 	for (index = ALIGN(table->min_index, alignment), c = 0;
@@ -3691,7 +3731,7 @@ static int alloc_irq_index(u16 devid, int count, bool align)
 	index = -ENOSPC;
 
 out:
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	return index;
 }
@@ -3708,11 +3748,11 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
 	if (iommu == NULL)
 		return -EINVAL;
 
-	table = get_irq_table(devid, false);
+	table = get_irq_table(devid);
 	if (!table)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 
 	entry = (struct irte_ga *)table->table;
 	entry = &entry[index];
@@ -3723,7 +3763,7 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte,
 	if (data)
 		data->ref = entry;
 
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
@@ -3741,13 +3781,13 @@ static int modify_irte(u16 devid, int index, union irte *irte)
 	if (iommu == NULL)
 		return -EINVAL;
 
-	table = get_irq_table(devid, false);
+	table = get_irq_table(devid);
 	if (!table)
 		return -ENOMEM;
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 	table->table[index] = irte->val;
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
@@ -3765,13 +3805,13 @@ static void free_irte(u16 devid, int index)
 	if (iommu == NULL)
 		return;
 
-	table = get_irq_table(devid, false);
+	table = get_irq_table(devid);
 	if (!table)
 		return;
 
-	spin_lock_irqsave(&table->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 	iommu->irte_ops->clear_allocated(table, index);
-	spin_unlock_irqrestore(&table->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
@@ -3852,10 +3892,8 @@ static void irte_ga_set_affinity(void *entry, u16 devid, u16 index,
 				 u8 vector, u32 dest_apicid)
 {
 	struct irte_ga *irte = (struct irte_ga *) entry;
-	struct iommu_dev_data *dev_data = search_dev_data(devid);
 
-	if (!dev_data || !dev_data->use_vapic ||
-	    !irte->lo.fields_remap.guest_mode) {
+	if (!irte->lo.fields_remap.guest_mode) {
 		irte->hi.fields.vector = vector;
 		irte->lo.fields_remap.destination = dest_apicid;
 		modify_irte_ga(devid, index, irte, NULL);
@@ -4061,7 +4099,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
 	struct amd_ir_data *data = NULL;
 	struct irq_cfg *cfg;
 	int i, ret, devid;
-	int index = -1;
+	int index;
 
 	if (!info)
 		return -EINVAL;
@@ -4085,10 +4123,26 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
 		return ret;
 
 	if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
-		if (get_irq_table(devid, true))
+		struct irq_remap_table *table;
+		struct amd_iommu *iommu;
+
+		table = alloc_irq_table(devid);
+		if (table) {
+			if (!table->min_index) {
+				/*
+				 * Keep the first 32 indexes free for IOAPIC
+				 * interrupts.
+				 */
+				table->min_index = 32;
+				iommu = amd_iommu_rlookup_table[devid];
+				for (i = 0; i < 32; ++i)
+					iommu->irte_ops->set_allocated(table, i);
+			}
+			WARN_ON(table->min_index != 32);
 			index = info->ioapic_pin;
-		else
-			ret = -ENOMEM;
+		} else {
+			index = -ENOMEM;
+		}
 	} else {
 		bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
 
@@ -4354,7 +4408,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 {
 	unsigned long flags;
 	struct amd_iommu *iommu;
-	struct irq_remap_table *irt;
+	struct irq_remap_table *table;
 	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
 	int devid = ir_data->irq_2_irte.devid;
 	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
@@ -4368,11 +4422,11 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 	if (!iommu)
 		return -ENODEV;
 
-	irt = get_irq_table(devid, false);
-	if (!irt)
+	table = get_irq_table(devid);
+	if (!table)
 		return -ENODEV;
 
-	spin_lock_irqsave(&irt->lock, flags);
+	raw_spin_lock_irqsave(&table->lock, flags);
 
 	if (ref->lo.fields_vapic.guest_mode) {
 		if (cpu >= 0)
@@ -4381,7 +4435,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
 		barrier();
 	}
 
-	spin_unlock_irqrestore(&irt->lock, flags);
+	raw_spin_unlock_irqrestore(&table->lock, flags);
 
 	iommu_flush_irt(iommu, devid);
 	iommu_completion_wait(iommu);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4e4a615bf13f..904c575d1677 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1474,7 +1474,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
 {
 	int ret;
 
-	spin_lock_init(&iommu->lock);
+	raw_spin_lock_init(&iommu->lock);
 
 	/* Add IOMMU to internal data structures */
 	list_add_tail(&iommu->list, &amd_iommu_list);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6a877ebd058b..1c9b080276c9 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -408,7 +408,7 @@ extern bool amd_iommu_iotlb_sup;
 #define IRQ_TABLE_ALIGNMENT	128
 
 struct irq_remap_table {
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	unsigned min_index;
 	u32 *table;
 };
@@ -490,7 +490,7 @@ struct amd_iommu {
 	int index;
 
 	/* locks the accesses to the hardware */
-	spinlock_t lock;
+	raw_spinlock_t lock;
 
 	/* Pointer to PCI device of this IOMMU */
 	struct pci_dev *dev;
@@ -627,7 +627,7 @@ struct devid_map {
  */
 struct iommu_dev_data {
 	struct list_head list;		  /* For domain->dev_list */
-	struct list_head dev_data_list;	  /* For global dev_data_list */
+	struct llist_node dev_data_list;  /* For global dev_data_list */
 	struct protection_domain *domain; /* Domain the device is bound to */
 	u16 devid;			  /* PCI Device ID */
 	u16 alias;			  /* Alias Device ID */
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 3f2f1fc68b52..1d647104bccc 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -22,6 +22,8 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
 #include <linux/err.h>
@@ -43,18 +45,15 @@
 
 /* MMIO registers */
 #define ARM_SMMU_IDR0			0x0
-#define IDR0_ST_LVL_SHIFT		27
-#define IDR0_ST_LVL_MASK		0x3
-#define IDR0_ST_LVL_2LVL		(1 << IDR0_ST_LVL_SHIFT)
-#define IDR0_STALL_MODEL_SHIFT		24
-#define IDR0_STALL_MODEL_MASK		0x3
-#define IDR0_STALL_MODEL_STALL		(0 << IDR0_STALL_MODEL_SHIFT)
-#define IDR0_STALL_MODEL_FORCE		(2 << IDR0_STALL_MODEL_SHIFT)
-#define IDR0_TTENDIAN_SHIFT		21
-#define IDR0_TTENDIAN_MASK		0x3
-#define IDR0_TTENDIAN_LE		(2 << IDR0_TTENDIAN_SHIFT)
-#define IDR0_TTENDIAN_BE		(3 << IDR0_TTENDIAN_SHIFT)
-#define IDR0_TTENDIAN_MIXED		(0 << IDR0_TTENDIAN_SHIFT)
+#define IDR0_ST_LVL			GENMASK(28, 27)
+#define IDR0_ST_LVL_2LVL		1
+#define IDR0_STALL_MODEL		GENMASK(25, 24)
+#define IDR0_STALL_MODEL_STALL		0
+#define IDR0_STALL_MODEL_FORCE		2
+#define IDR0_TTENDIAN			GENMASK(22, 21)
+#define IDR0_TTENDIAN_MIXED		0
+#define IDR0_TTENDIAN_LE		2
+#define IDR0_TTENDIAN_BE		3
 #define IDR0_CD2L			(1 << 19)
 #define IDR0_VMID16			(1 << 18)
 #define IDR0_PRI			(1 << 16)
@@ -64,10 +63,9 @@
 #define IDR0_ATS			(1 << 10)
 #define IDR0_HYP			(1 << 9)
 #define IDR0_COHACC			(1 << 4)
-#define IDR0_TTF_SHIFT			2
-#define IDR0_TTF_MASK			0x3
-#define IDR0_TTF_AARCH64		(2 << IDR0_TTF_SHIFT)
-#define IDR0_TTF_AARCH32_64		(3 << IDR0_TTF_SHIFT)
+#define IDR0_TTF			GENMASK(3, 2)
+#define IDR0_TTF_AARCH64		2
+#define IDR0_TTF_AARCH32_64		3
 #define IDR0_S1P			(1 << 1)
 #define IDR0_S2P			(1 << 0)
 
@@ -75,31 +73,27 @@
 #define IDR1_TABLES_PRESET		(1 << 30)
 #define IDR1_QUEUES_PRESET		(1 << 29)
 #define IDR1_REL			(1 << 28)
-#define IDR1_CMDQ_SHIFT			21
-#define IDR1_CMDQ_MASK			0x1f
-#define IDR1_EVTQ_SHIFT			16
-#define IDR1_EVTQ_MASK			0x1f
-#define IDR1_PRIQ_SHIFT			11
-#define IDR1_PRIQ_MASK			0x1f
-#define IDR1_SSID_SHIFT			6
-#define IDR1_SSID_MASK			0x1f
-#define IDR1_SID_SHIFT			0
-#define IDR1_SID_MASK			0x3f
+#define IDR1_CMDQS			GENMASK(25, 21)
+#define IDR1_EVTQS			GENMASK(20, 16)
+#define IDR1_PRIQS			GENMASK(15, 11)
+#define IDR1_SSIDSIZE			GENMASK(10, 6)
+#define IDR1_SIDSIZE			GENMASK(5, 0)
 
 #define ARM_SMMU_IDR5			0x14
-#define IDR5_STALL_MAX_SHIFT		16
-#define IDR5_STALL_MAX_MASK		0xffff
+#define IDR5_STALL_MAX			GENMASK(31, 16)
 #define IDR5_GRAN64K			(1 << 6)
 #define IDR5_GRAN16K			(1 << 5)
 #define IDR5_GRAN4K			(1 << 4)
-#define IDR5_OAS_SHIFT			0
-#define IDR5_OAS_MASK			0x7
-#define IDR5_OAS_32_BIT			(0 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_36_BIT			(1 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_40_BIT			(2 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_42_BIT			(3 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_44_BIT			(4 << IDR5_OAS_SHIFT)
-#define IDR5_OAS_48_BIT			(5 << IDR5_OAS_SHIFT)
+#define IDR5_OAS			GENMASK(2, 0)
+#define IDR5_OAS_32_BIT			0
+#define IDR5_OAS_36_BIT			1
+#define IDR5_OAS_40_BIT			2
+#define IDR5_OAS_42_BIT			3
+#define IDR5_OAS_44_BIT			4
+#define IDR5_OAS_48_BIT			5
+#define IDR5_OAS_52_BIT			6
+#define IDR5_VAX			GENMASK(11, 10)
+#define IDR5_VAX_52_BIT			1
 
 #define ARM_SMMU_CR0			0x20
 #define CR0_CMDQEN			(1 << 3)
@@ -110,18 +104,16 @@
 #define ARM_SMMU_CR0ACK			0x24
 
 #define ARM_SMMU_CR1			0x28
-#define CR1_SH_NSH			0
-#define CR1_SH_OSH			2
-#define CR1_SH_ISH			3
+#define CR1_TABLE_SH			GENMASK(11, 10)
+#define CR1_TABLE_OC			GENMASK(9, 8)
+#define CR1_TABLE_IC			GENMASK(7, 6)
+#define CR1_QUEUE_SH			GENMASK(5, 4)
+#define CR1_QUEUE_OC			GENMASK(3, 2)
+#define CR1_QUEUE_IC			GENMASK(1, 0)
+/* CR1 cacheability fields don't quite follow the usual TCR-style encoding */
 #define CR1_CACHE_NC			0
 #define CR1_CACHE_WB			1
 #define CR1_CACHE_WT			2
-#define CR1_TABLE_SH_SHIFT		10
-#define CR1_TABLE_OC_SHIFT		8
-#define CR1_TABLE_IC_SHIFT		6
-#define CR1_QUEUE_SH_SHIFT		4
-#define CR1_QUEUE_OC_SHIFT		2
-#define CR1_QUEUE_IC_SHIFT		0
 
 #define ARM_SMMU_CR2			0x2c
 #define CR2_PTM				(1 << 2)
@@ -129,8 +121,8 @@
 #define CR2_E2H				(1 << 0)
 
 #define ARM_SMMU_GBPA			0x44
-#define GBPA_ABORT			(1 << 20)
 #define GBPA_UPDATE			(1 << 31)
+#define GBPA_ABORT			(1 << 20)
 
 #define ARM_SMMU_IRQ_CTRL		0x50
 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
@@ -158,18 +150,14 @@
 
 #define ARM_SMMU_STRTAB_BASE		0x80
 #define STRTAB_BASE_RA			(1UL << 62)
-#define STRTAB_BASE_ADDR_SHIFT		6
-#define STRTAB_BASE_ADDR_MASK		0x3ffffffffffUL
+#define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
 
 #define ARM_SMMU_STRTAB_BASE_CFG	0x88
-#define STRTAB_BASE_CFG_LOG2SIZE_SHIFT	0
-#define STRTAB_BASE_CFG_LOG2SIZE_MASK	0x3f
-#define STRTAB_BASE_CFG_SPLIT_SHIFT	6
-#define STRTAB_BASE_CFG_SPLIT_MASK	0x1f
-#define STRTAB_BASE_CFG_FMT_SHIFT	16
-#define STRTAB_BASE_CFG_FMT_MASK	0x3
-#define STRTAB_BASE_CFG_FMT_LINEAR	(0 << STRTAB_BASE_CFG_FMT_SHIFT)
-#define STRTAB_BASE_CFG_FMT_2LVL	(1 << STRTAB_BASE_CFG_FMT_SHIFT)
+#define STRTAB_BASE_CFG_FMT		GENMASK(17, 16)
+#define STRTAB_BASE_CFG_FMT_LINEAR	0
+#define STRTAB_BASE_CFG_FMT_2LVL	1
+#define STRTAB_BASE_CFG_SPLIT		GENMASK(10, 6)
+#define STRTAB_BASE_CFG_LOG2SIZE	GENMASK(5, 0)
 
 #define ARM_SMMU_CMDQ_BASE		0x90
 #define ARM_SMMU_CMDQ_PROD		0x98
@@ -190,14 +178,16 @@
 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
 
 /* Common MSI config fields */
-#define MSI_CFG0_ADDR_SHIFT		2
-#define MSI_CFG0_ADDR_MASK		0x3fffffffffffUL
-#define MSI_CFG2_SH_SHIFT		4
-#define MSI_CFG2_SH_NSH			(0UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_SH_OSH			(2UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_SH_ISH			(3UL << MSI_CFG2_SH_SHIFT)
-#define MSI_CFG2_MEMATTR_SHIFT		0
-#define MSI_CFG2_MEMATTR_DEVICE_nGnRE	(0x1 << MSI_CFG2_MEMATTR_SHIFT)
+#define MSI_CFG0_ADDR_MASK		GENMASK_ULL(51, 2)
+#define MSI_CFG2_SH			GENMASK(5, 4)
+#define MSI_CFG2_MEMATTR		GENMASK(3, 0)
+
+/* Common memory attribute values */
+#define ARM_SMMU_SH_NSH			0
+#define ARM_SMMU_SH_OSH			2
+#define ARM_SMMU_SH_ISH			3
+#define ARM_SMMU_MEMATTR_DEVICE_nGnRE	0x1
+#define ARM_SMMU_MEMATTR_OIWB		0xf
 
 #define Q_IDX(q, p)			((p) & ((1 << (q)->max_n_shift) - 1))
 #define Q_WRP(q, p)			((p) & (1 << (q)->max_n_shift))
@@ -207,10 +197,8 @@
 					 Q_IDX(q, p) * (q)->ent_dwords)
 
 #define Q_BASE_RWA			(1UL << 62)
-#define Q_BASE_ADDR_SHIFT		5
-#define Q_BASE_ADDR_MASK		0xfffffffffffUL
-#define Q_BASE_LOG2SIZE_SHIFT		0
-#define Q_BASE_LOG2SIZE_MASK		0x1fUL
+#define Q_BASE_ADDR_MASK		GENMASK_ULL(51, 5)
+#define Q_BASE_LOG2SIZE			GENMASK(4, 0)
 
 /*
  * Stream table.
@@ -223,187 +211,143 @@
 #define STRTAB_SPLIT			8
 
 #define STRTAB_L1_DESC_DWORDS		1
-#define STRTAB_L1_DESC_SPAN_SHIFT	0
-#define STRTAB_L1_DESC_SPAN_MASK	0x1fUL
-#define STRTAB_L1_DESC_L2PTR_SHIFT	6
-#define STRTAB_L1_DESC_L2PTR_MASK	0x3ffffffffffUL
+#define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
+#define STRTAB_L1_DESC_L2PTR_MASK	GENMASK_ULL(51, 6)
 
 #define STRTAB_STE_DWORDS		8
 #define STRTAB_STE_0_V			(1UL << 0)
-#define STRTAB_STE_0_CFG_SHIFT		1
-#define STRTAB_STE_0_CFG_MASK		0x7UL
-#define STRTAB_STE_0_CFG_ABORT		(0UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_BYPASS		(4UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_S1_TRANS	(5UL << STRTAB_STE_0_CFG_SHIFT)
-#define STRTAB_STE_0_CFG_S2_TRANS	(6UL << STRTAB_STE_0_CFG_SHIFT)
-
-#define STRTAB_STE_0_S1FMT_SHIFT	4
-#define STRTAB_STE_0_S1FMT_LINEAR	(0UL << STRTAB_STE_0_S1FMT_SHIFT)
-#define STRTAB_STE_0_S1CTXPTR_SHIFT	6
-#define STRTAB_STE_0_S1CTXPTR_MASK	0x3ffffffffffUL
-#define STRTAB_STE_0_S1CDMAX_SHIFT	59
-#define STRTAB_STE_0_S1CDMAX_MASK	0x1fUL
+#define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
+#define STRTAB_STE_0_CFG_ABORT		0
+#define STRTAB_STE_0_CFG_BYPASS		4
+#define STRTAB_STE_0_CFG_S1_TRANS	5
+#define STRTAB_STE_0_CFG_S2_TRANS	6
+
+#define STRTAB_STE_0_S1FMT		GENMASK_ULL(5, 4)
+#define STRTAB_STE_0_S1FMT_LINEAR	0
+#define STRTAB_STE_0_S1CTXPTR_MASK	GENMASK_ULL(51, 6)
+#define STRTAB_STE_0_S1CDMAX		GENMASK_ULL(63, 59)
 
 #define STRTAB_STE_1_S1C_CACHE_NC	0UL
 #define STRTAB_STE_1_S1C_CACHE_WBRA	1UL
 #define STRTAB_STE_1_S1C_CACHE_WT	2UL
 #define STRTAB_STE_1_S1C_CACHE_WB	3UL
-#define STRTAB_STE_1_S1C_SH_NSH		0UL
-#define STRTAB_STE_1_S1C_SH_OSH		2UL
-#define STRTAB_STE_1_S1C_SH_ISH		3UL
-#define STRTAB_STE_1_S1CIR_SHIFT	2
-#define STRTAB_STE_1_S1COR_SHIFT	4
-#define STRTAB_STE_1_S1CSH_SHIFT	6
+#define STRTAB_STE_1_S1CIR		GENMASK_ULL(3, 2)
+#define STRTAB_STE_1_S1COR		GENMASK_ULL(5, 4)
+#define STRTAB_STE_1_S1CSH		GENMASK_ULL(7, 6)
 
 #define STRTAB_STE_1_S1STALLD		(1UL << 27)
 
+#define STRTAB_STE_1_EATS		GENMASK_ULL(29, 28)
 #define STRTAB_STE_1_EATS_ABT		0UL
 #define STRTAB_STE_1_EATS_TRANS		1UL
 #define STRTAB_STE_1_EATS_S1CHK		2UL
-#define STRTAB_STE_1_EATS_SHIFT		28
 
+#define STRTAB_STE_1_STRW		GENMASK_ULL(31, 30)
 #define STRTAB_STE_1_STRW_NSEL1		0UL
 #define STRTAB_STE_1_STRW_EL2		2UL
-#define STRTAB_STE_1_STRW_SHIFT		30
 
+#define STRTAB_STE_1_SHCFG		GENMASK_ULL(45, 44)
 #define STRTAB_STE_1_SHCFG_INCOMING	1UL
-#define STRTAB_STE_1_SHCFG_SHIFT	44
 
-#define STRTAB_STE_2_S2VMID_SHIFT	0
-#define STRTAB_STE_2_S2VMID_MASK	0xffffUL
-#define STRTAB_STE_2_VTCR_SHIFT		32
-#define STRTAB_STE_2_VTCR_MASK		0x7ffffUL
+#define STRTAB_STE_2_S2VMID		GENMASK_ULL(15, 0)
+#define STRTAB_STE_2_VTCR		GENMASK_ULL(50, 32)
 #define STRTAB_STE_2_S2AA64		(1UL << 51)
 #define STRTAB_STE_2_S2ENDI		(1UL << 52)
 #define STRTAB_STE_2_S2PTW		(1UL << 54)
 #define STRTAB_STE_2_S2R		(1UL << 58)
 
-#define STRTAB_STE_3_S2TTB_SHIFT	4
-#define STRTAB_STE_3_S2TTB_MASK		0xfffffffffffUL
+#define STRTAB_STE_3_S2TTB_MASK		GENMASK_ULL(51, 4)
 
 /* Context descriptor (stage-1 only) */
 #define CTXDESC_CD_DWORDS		8
-#define CTXDESC_CD_0_TCR_T0SZ_SHIFT	0
-#define ARM64_TCR_T0SZ_SHIFT		0
-#define ARM64_TCR_T0SZ_MASK		0x1fUL
-#define CTXDESC_CD_0_TCR_TG0_SHIFT	6
-#define ARM64_TCR_TG0_SHIFT		14
-#define ARM64_TCR_TG0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_IRGN0_SHIFT	8
-#define ARM64_TCR_IRGN0_SHIFT		8
-#define ARM64_TCR_IRGN0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_ORGN0_SHIFT	10
-#define ARM64_TCR_ORGN0_SHIFT		10
-#define ARM64_TCR_ORGN0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_SH0_SHIFT	12
-#define ARM64_TCR_SH0_SHIFT		12
-#define ARM64_TCR_SH0_MASK		0x3UL
-#define CTXDESC_CD_0_TCR_EPD0_SHIFT	14
-#define ARM64_TCR_EPD0_SHIFT		7
-#define ARM64_TCR_EPD0_MASK		0x1UL
-#define CTXDESC_CD_0_TCR_EPD1_SHIFT	30
-#define ARM64_TCR_EPD1_SHIFT		23
-#define ARM64_TCR_EPD1_MASK		0x1UL
+#define CTXDESC_CD_0_TCR_T0SZ		GENMASK_ULL(5, 0)
+#define ARM64_TCR_T0SZ			GENMASK_ULL(5, 0)
+#define CTXDESC_CD_0_TCR_TG0		GENMASK_ULL(7, 6)
+#define ARM64_TCR_TG0			GENMASK_ULL(15, 14)
+#define CTXDESC_CD_0_TCR_IRGN0		GENMASK_ULL(9, 8)
+#define ARM64_TCR_IRGN0			GENMASK_ULL(9, 8)
+#define CTXDESC_CD_0_TCR_ORGN0		GENMASK_ULL(11, 10)
+#define ARM64_TCR_ORGN0			GENMASK_ULL(11, 10)
+#define CTXDESC_CD_0_TCR_SH0		GENMASK_ULL(13, 12)
+#define ARM64_TCR_SH0			GENMASK_ULL(13, 12)
+#define CTXDESC_CD_0_TCR_EPD0		(1ULL << 14)
+#define ARM64_TCR_EPD0			(1ULL << 7)
+#define CTXDESC_CD_0_TCR_EPD1		(1ULL << 30)
+#define ARM64_TCR_EPD1			(1ULL << 23)
 
 #define CTXDESC_CD_0_ENDI		(1UL << 15)
 #define CTXDESC_CD_0_V			(1UL << 31)
 
-#define CTXDESC_CD_0_TCR_IPS_SHIFT	32
-#define ARM64_TCR_IPS_SHIFT		32
-#define ARM64_TCR_IPS_MASK		0x7UL
-#define CTXDESC_CD_0_TCR_TBI0_SHIFT	38
-#define ARM64_TCR_TBI0_SHIFT		37
-#define ARM64_TCR_TBI0_MASK		0x1UL
+#define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
+#define ARM64_TCR_IPS			GENMASK_ULL(34, 32)
+#define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)
+#define ARM64_TCR_TBI0			(1ULL << 37)
 
 #define CTXDESC_CD_0_AA64		(1UL << 41)
 #define CTXDESC_CD_0_S			(1UL << 44)
 #define CTXDESC_CD_0_R			(1UL << 45)
 #define CTXDESC_CD_0_A			(1UL << 46)
-#define CTXDESC_CD_0_ASET_SHIFT		47
-#define CTXDESC_CD_0_ASET_SHARED	(0UL << CTXDESC_CD_0_ASET_SHIFT)
-#define CTXDESC_CD_0_ASET_PRIVATE	(1UL << CTXDESC_CD_0_ASET_SHIFT)
-#define CTXDESC_CD_0_ASID_SHIFT		48
-#define CTXDESC_CD_0_ASID_MASK		0xffffUL
-
-#define CTXDESC_CD_1_TTB0_SHIFT		4
-#define CTXDESC_CD_1_TTB0_MASK		0xfffffffffffUL
+#define CTXDESC_CD_0_ASET		(1UL << 47)
+#define CTXDESC_CD_0_ASID		GENMASK_ULL(63, 48)
 
-#define CTXDESC_CD_3_MAIR_SHIFT		0
+#define CTXDESC_CD_1_TTB0_MASK		GENMASK_ULL(51, 4)
 
 /* Convert between AArch64 (CPU) TCR format and SMMU CD format */
-#define ARM_SMMU_TCR2CD(tcr, fld)					\
-	(((tcr) >> ARM64_TCR_##fld##_SHIFT & ARM64_TCR_##fld##_MASK)	\
-	 << CTXDESC_CD_0_TCR_##fld##_SHIFT)
+#define ARM_SMMU_TCR2CD(tcr, fld)	FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \
+					FIELD_GET(ARM64_TCR_##fld, tcr))
 
 /* Command queue */
 #define CMDQ_ENT_DWORDS			2
 #define CMDQ_MAX_SZ_SHIFT		8
 
-#define CMDQ_ERR_SHIFT			24
-#define CMDQ_ERR_MASK			0x7f
+#define CMDQ_CONS_ERR			GENMASK(30, 24)
 #define CMDQ_ERR_CERROR_NONE_IDX	0
 #define CMDQ_ERR_CERROR_ILL_IDX		1
 #define CMDQ_ERR_CERROR_ABT_IDX		2
 
-#define CMDQ_0_OP_SHIFT			0
-#define CMDQ_0_OP_MASK			0xffUL
+#define CMDQ_0_OP			GENMASK_ULL(7, 0)
 #define CMDQ_0_SSV			(1UL << 11)
 
-#define CMDQ_PREFETCH_0_SID_SHIFT	32
-#define CMDQ_PREFETCH_1_SIZE_SHIFT	0
-#define CMDQ_PREFETCH_1_ADDR_MASK	~0xfffUL
+#define CMDQ_PREFETCH_0_SID		GENMASK_ULL(63, 32)
+#define CMDQ_PREFETCH_1_SIZE		GENMASK_ULL(4, 0)
+#define CMDQ_PREFETCH_1_ADDR_MASK	GENMASK_ULL(63, 12)
 
-#define CMDQ_CFGI_0_SID_SHIFT		32
-#define CMDQ_CFGI_0_SID_MASK		0xffffffffUL
+#define CMDQ_CFGI_0_SID			GENMASK_ULL(63, 32)
 #define CMDQ_CFGI_1_LEAF		(1UL << 0)
-#define CMDQ_CFGI_1_RANGE_SHIFT		0
-#define CMDQ_CFGI_1_RANGE_MASK		0x1fUL
+#define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
 
-#define CMDQ_TLBI_0_VMID_SHIFT		32
-#define CMDQ_TLBI_0_ASID_SHIFT		48
+#define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
+#define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
 #define CMDQ_TLBI_1_LEAF		(1UL << 0)
-#define CMDQ_TLBI_1_VA_MASK		~0xfffUL
-#define CMDQ_TLBI_1_IPA_MASK		0xfffffffff000UL
-
-#define CMDQ_PRI_0_SSID_SHIFT		12
-#define CMDQ_PRI_0_SSID_MASK		0xfffffUL
-#define CMDQ_PRI_0_SID_SHIFT		32
-#define CMDQ_PRI_0_SID_MASK		0xffffffffUL
-#define CMDQ_PRI_1_GRPID_SHIFT		0
-#define CMDQ_PRI_1_GRPID_MASK		0x1ffUL
-#define CMDQ_PRI_1_RESP_SHIFT		12
-#define CMDQ_PRI_1_RESP_DENY		(0UL << CMDQ_PRI_1_RESP_SHIFT)
-#define CMDQ_PRI_1_RESP_FAIL		(1UL << CMDQ_PRI_1_RESP_SHIFT)
-#define CMDQ_PRI_1_RESP_SUCC		(2UL << CMDQ_PRI_1_RESP_SHIFT)
-
-#define CMDQ_SYNC_0_CS_SHIFT		12
-#define CMDQ_SYNC_0_CS_NONE		(0UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_CS_IRQ		(1UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_CS_SEV		(2UL << CMDQ_SYNC_0_CS_SHIFT)
-#define CMDQ_SYNC_0_MSH_SHIFT		22
-#define CMDQ_SYNC_0_MSH_ISH		(3UL << CMDQ_SYNC_0_MSH_SHIFT)
-#define CMDQ_SYNC_0_MSIATTR_SHIFT	24
-#define CMDQ_SYNC_0_MSIATTR_OIWB	(0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
-#define CMDQ_SYNC_0_MSIDATA_SHIFT	32
-#define CMDQ_SYNC_0_MSIDATA_MASK	0xffffffffUL
-#define CMDQ_SYNC_1_MSIADDR_SHIFT	0
-#define CMDQ_SYNC_1_MSIADDR_MASK	0xffffffffffffcUL
+#define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
+#define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
+
+#define CMDQ_PRI_0_SSID			GENMASK_ULL(31, 12)
+#define CMDQ_PRI_0_SID			GENMASK_ULL(63, 32)
+#define CMDQ_PRI_1_GRPID		GENMASK_ULL(8, 0)
+#define CMDQ_PRI_1_RESP			GENMASK_ULL(13, 12)
+
+#define CMDQ_SYNC_0_CS			GENMASK_ULL(13, 12)
+#define CMDQ_SYNC_0_CS_NONE		0
+#define CMDQ_SYNC_0_CS_IRQ		1
+#define CMDQ_SYNC_0_CS_SEV		2
+#define CMDQ_SYNC_0_MSH			GENMASK_ULL(23, 22)
+#define CMDQ_SYNC_0_MSIATTR		GENMASK_ULL(27, 24)
+#define CMDQ_SYNC_0_MSIDATA		GENMASK_ULL(63, 32)
+#define CMDQ_SYNC_1_MSIADDR_MASK	GENMASK_ULL(51, 2)
 
 /* Event queue */
 #define EVTQ_ENT_DWORDS			4
 #define EVTQ_MAX_SZ_SHIFT		7
 
-#define EVTQ_0_ID_SHIFT			0
-#define EVTQ_0_ID_MASK			0xffUL
+#define EVTQ_0_ID			GENMASK_ULL(7, 0)
 
 /* PRI queue */
 #define PRIQ_ENT_DWORDS			2
 #define PRIQ_MAX_SZ_SHIFT		8
 
-#define PRIQ_0_SID_SHIFT		0
-#define PRIQ_0_SID_MASK			0xffffffffUL
-#define PRIQ_0_SSID_SHIFT		32
-#define PRIQ_0_SSID_MASK		0xfffffUL
+#define PRIQ_0_SID			GENMASK_ULL(31, 0)
+#define PRIQ_0_SSID			GENMASK_ULL(51, 32)
 #define PRIQ_0_PERM_PRIV		(1UL << 58)
 #define PRIQ_0_PERM_EXEC		(1UL << 59)
 #define PRIQ_0_PERM_READ		(1UL << 60)
@@ -411,10 +355,8 @@
 #define PRIQ_0_PRG_LAST			(1UL << 62)
 #define PRIQ_0_SSID_V			(1UL << 63)
 
-#define PRIQ_1_PRG_IDX_SHIFT		0
-#define PRIQ_1_PRG_IDX_MASK		0x1ffUL
-#define PRIQ_1_ADDR_SHIFT		12
-#define PRIQ_1_ADDR_MASK		0xfffffffffffffUL
+#define PRIQ_1_PRG_IDX			GENMASK_ULL(8, 0)
+#define PRIQ_1_ADDR_MASK		GENMASK_ULL(63, 12)
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US	100
@@ -430,9 +372,9 @@ MODULE_PARM_DESC(disable_bypass,
 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
 
 enum pri_resp {
-	PRI_RESP_DENY,
-	PRI_RESP_FAIL,
-	PRI_RESP_SUCC,
+	PRI_RESP_DENY = 0,
+	PRI_RESP_FAIL = 1,
+	PRI_RESP_SUCC = 2,
 };
 
 enum arm_smmu_msi_index {
@@ -611,6 +553,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_STALLS		(1 << 11)
 #define ARM_SMMU_FEAT_HYP		(1 << 12)
 #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
+#define ARM_SMMU_FEAT_VAX		(1 << 14)
 	u32				features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
@@ -836,67 +779,64 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 {
 	memset(cmd, 0, CMDQ_ENT_DWORDS << 3);
-	cmd[0] |= (ent->opcode & CMDQ_0_OP_MASK) << CMDQ_0_OP_SHIFT;
+	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
 
 	switch (ent->opcode) {
 	case CMDQ_OP_TLBI_EL2_ALL:
 	case CMDQ_OP_TLBI_NSNH_ALL:
 		break;
 	case CMDQ_OP_PREFETCH_CFG:
-		cmd[0] |= (u64)ent->prefetch.sid << CMDQ_PREFETCH_0_SID_SHIFT;
-		cmd[1] |= ent->prefetch.size << CMDQ_PREFETCH_1_SIZE_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
+		cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size);
 		cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK;
 		break;
 	case CMDQ_OP_CFGI_STE:
-		cmd[0] |= (u64)ent->cfgi.sid << CMDQ_CFGI_0_SID_SHIFT;
-		cmd[1] |= ent->cfgi.leaf ? CMDQ_CFGI_1_LEAF : 0;
+		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
+		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
 		break;
 	case CMDQ_OP_CFGI_ALL:
 		/* Cover the entire SID range */
-		cmd[1] |= CMDQ_CFGI_1_RANGE_MASK << CMDQ_CFGI_1_RANGE_SHIFT;
+		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
 		break;
 	case CMDQ_OP_TLBI_NH_VA:
-		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
-		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
+		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
 		break;
 	case CMDQ_OP_TLBI_S2_IPA:
-		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
-		cmd[1] |= ent->tlbi.leaf ? CMDQ_TLBI_1_LEAF : 0;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
+		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
 		break;
 	case CMDQ_OP_TLBI_NH_ASID:
-		cmd[0] |= (u64)ent->tlbi.asid << CMDQ_TLBI_0_ASID_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
 		/* Fallthrough */
 	case CMDQ_OP_TLBI_S12_VMALL:
-		cmd[0] |= (u64)ent->tlbi.vmid << CMDQ_TLBI_0_VMID_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
 		break;
 	case CMDQ_OP_PRI_RESP:
-		cmd[0] |= ent->substream_valid ? CMDQ_0_SSV : 0;
-		cmd[0] |= ent->pri.ssid << CMDQ_PRI_0_SSID_SHIFT;
-		cmd[0] |= (u64)ent->pri.sid << CMDQ_PRI_0_SID_SHIFT;
-		cmd[1] |= ent->pri.grpid << CMDQ_PRI_1_GRPID_SHIFT;
+		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
+		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
+		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
+		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
 		switch (ent->pri.resp) {
 		case PRI_RESP_DENY:
-			cmd[1] |= CMDQ_PRI_1_RESP_DENY;
-			break;
 		case PRI_RESP_FAIL:
-			cmd[1] |= CMDQ_PRI_1_RESP_FAIL;
-			break;
 		case PRI_RESP_SUCC:
-			cmd[1] |= CMDQ_PRI_1_RESP_SUCC;
 			break;
 		default:
 			return -EINVAL;
 		}
+		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
 		break;
 	case CMDQ_OP_CMD_SYNC:
 		if (ent->sync.msiaddr)
-			cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
+			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
 		else
-			cmd[0] |= CMDQ_SYNC_0_CS_SEV;
-		cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
-		cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
+			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
+		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA, ent->sync.msidata);
 		cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
 		break;
 	default:
@@ -918,7 +858,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 	u64 cmd[CMDQ_ENT_DWORDS];
 	struct arm_smmu_queue *q = &smmu->cmdq.q;
 	u32 cons = readl_relaxed(q->cons_reg);
-	u32 idx = cons >> CMDQ_ERR_SHIFT & CMDQ_ERR_MASK;
+	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
 	struct arm_smmu_cmdq_ent cmd_sync = {
 		.opcode = CMDQ_OP_CMD_SYNC,
 	};
@@ -1083,8 +1023,8 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 #ifdef __BIG_ENDIAN
 	      CTXDESC_CD_0_ENDI |
 #endif
-	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
-	      CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
+	      CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET |
+	      CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) |
 	      CTXDESC_CD_0_V;
 
 	/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
@@ -1093,10 +1033,10 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
 
 	cfg->cdptr[0] = cpu_to_le64(val);
 
-	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
+	val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK;
 	cfg->cdptr[1] = cpu_to_le64(val);
 
-	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair << CTXDESC_CD_3_MAIR_SHIFT);
+	cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair);
 }
 
 /* Stream table manipulation functions */
@@ -1105,10 +1045,8 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
 {
 	u64 val = 0;
 
-	val |= (desc->span & STRTAB_L1_DESC_SPAN_MASK)
-		<< STRTAB_L1_DESC_SPAN_SHIFT;
-	val |= desc->l2ptr_dma &
-	       STRTAB_L1_DESC_L2PTR_MASK << STRTAB_L1_DESC_L2PTR_SHIFT;
+	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
+	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
 
 	*dst = cpu_to_le64(val);
 }
@@ -1156,10 +1094,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	};
 
 	if (val & STRTAB_STE_0_V) {
-		u64 cfg;
-
-		cfg = val & STRTAB_STE_0_CFG_MASK << STRTAB_STE_0_CFG_SHIFT;
-		switch (cfg) {
+		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
 		case STRTAB_STE_0_CFG_BYPASS:
 			break;
 		case STRTAB_STE_0_CFG_S1_TRANS:
@@ -1180,13 +1115,13 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	/* Bypass/fault */
 	if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
 		if (!ste->assigned && disable_bypass)
-			val |= STRTAB_STE_0_CFG_ABORT;
+			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
 		else
-			val |= STRTAB_STE_0_CFG_BYPASS;
+			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
 
 		dst[0] = cpu_to_le64(val);
-		dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
-			 << STRTAB_STE_1_SHCFG_SHIFT);
+		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
+						STRTAB_STE_1_SHCFG_INCOMING));
 		dst[2] = 0; /* Nuke the VMID */
 		/*
 		 * The SMMU can perform negative caching, so we must sync
@@ -1200,41 +1135,36 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
 	if (ste->s1_cfg) {
 		BUG_ON(ste_live);
 		dst[1] = cpu_to_le64(
-			 STRTAB_STE_1_S1C_CACHE_WBRA
-			 << STRTAB_STE_1_S1CIR_SHIFT |
-			 STRTAB_STE_1_S1C_CACHE_WBRA
-			 << STRTAB_STE_1_S1COR_SHIFT |
-			 STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT |
+			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
 #ifdef CONFIG_PCI_ATS
-			 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
+			 FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS) |
 #endif
-			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
+			 FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
 
 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
 		   !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
-		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
-		        << STRTAB_STE_0_S1CTXPTR_SHIFT) |
-			STRTAB_STE_0_CFG_S1_TRANS;
+		val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS);
 	}
 
 	if (ste->s2_cfg) {
 		BUG_ON(ste_live);
 		dst[2] = cpu_to_le64(
-			 ste->s2_cfg->vmid << STRTAB_STE_2_S2VMID_SHIFT |
-			 (ste->s2_cfg->vtcr & STRTAB_STE_2_VTCR_MASK)
-			  << STRTAB_STE_2_VTCR_SHIFT |
+			 FIELD_PREP(STRTAB_STE_2_S2VMID, ste->s2_cfg->vmid) |
+			 FIELD_PREP(STRTAB_STE_2_VTCR, ste->s2_cfg->vtcr) |
 #ifdef __BIG_ENDIAN
 			 STRTAB_STE_2_S2ENDI |
 #endif
 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
 			 STRTAB_STE_2_S2R);
 
-		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr &
-			 STRTAB_STE_3_S2TTB_MASK << STRTAB_STE_3_S2TTB_SHIFT);
+		dst[3] = cpu_to_le64(ste->s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
 
-		val |= STRTAB_STE_0_CFG_S2_TRANS;
+		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
 	}
 
 	arm_smmu_sync_ste_for_sid(smmu, sid);
@@ -1295,7 +1225,7 @@ static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
 
 	do {
 		while (!queue_remove_raw(q, evt)) {
-			u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
+			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
 
 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
@@ -1323,11 +1253,11 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
 	u16 grpid;
 	bool ssv, last;
 
-	sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
-	ssv = evt[0] & PRIQ_0_SSID_V;
-	ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
-	last = evt[0] & PRIQ_0_PRG_LAST;
-	grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
+	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
+	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
+	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0;
+	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
+	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
 
 	dev_info(smmu->dev, "unexpected PRI request received:\n");
 	dev_info(smmu->dev,
@@ -1337,7 +1267,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
-		 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
+		 evt[1] & PRIQ_1_ADDR_MASK);
 
 	if (last) {
 		struct arm_smmu_cmdq_ent cmd = {
@@ -1664,7 +1594,8 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 
 	switch (smmu_domain->stage) {
 	case ARM_SMMU_DOMAIN_S1:
-		ias = VA_BITS;
+		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
+		ias = min_t(unsigned long, ias, VA_BITS);
 		oas = smmu->ias;
 		fmt = ARM_64_LPAE_S1;
 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
@@ -1696,7 +1627,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		return -ENOMEM;
 
 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
-	domain->geometry.aperture_end = (1UL << ias) - 1;
+	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
 	domain->geometry.force_aperture = true;
 
 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
@@ -2102,9 +2033,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 	q->ent_dwords	= dwords;
 
 	q->q_base  = Q_BASE_RWA;
-	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK << Q_BASE_ADDR_SHIFT;
-	q->q_base |= (q->max_n_shift & Q_BASE_LOG2SIZE_MASK)
-		     << Q_BASE_LOG2SIZE_SHIFT;
+	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
+	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->max_n_shift);
 
 	q->prod = q->cons = 0;
 	return 0;
@@ -2186,11 +2116,9 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 	cfg->strtab = strtab;
 
 	/* Configure strtab_base_cfg for 2 levels */
-	reg  = STRTAB_BASE_CFG_FMT_2LVL;
-	reg |= (size & STRTAB_BASE_CFG_LOG2SIZE_MASK)
-		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
-	reg |= (STRTAB_SPLIT & STRTAB_BASE_CFG_SPLIT_MASK)
-		<< STRTAB_BASE_CFG_SPLIT_SHIFT;
+	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
 	cfg->strtab_base_cfg = reg;
 
 	return arm_smmu_init_l1_strtab(smmu);
@@ -2216,9 +2144,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
 	cfg->num_l1_ents = 1 << smmu->sid_bits;
 
 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
-	reg  = STRTAB_BASE_CFG_FMT_LINEAR;
-	reg |= (smmu->sid_bits & STRTAB_BASE_CFG_LOG2SIZE_MASK)
-		<< STRTAB_BASE_CFG_LOG2SIZE_SHIFT;
+	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
 	cfg->strtab_base_cfg = reg;
 
 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents);
@@ -2239,8 +2166,7 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
 		return ret;
 
 	/* Set the strtab base address */
-	reg  = smmu->strtab_cfg.strtab_dma &
-	       STRTAB_BASE_ADDR_MASK << STRTAB_BASE_ADDR_SHIFT;
+	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
 	reg |= STRTAB_BASE_RA;
 	smmu->strtab_cfg.strtab_base = reg;
 
@@ -2303,11 +2229,11 @@ static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
 
 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
-	doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
+	doorbell &= MSI_CFG0_ADDR_MASK;
 
 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
 	writel_relaxed(msg->data, smmu->base + cfg[1]);
-	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
 }
 
 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
@@ -2328,10 +2254,15 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
 		return;
 
+	if (!dev->msi_domain) {
+		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
+		return;
+	}
+
 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
 	if (ret) {
-		dev_warn(dev, "failed to allocate MSIs\n");
+		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
 		return;
 	}
 
@@ -2370,6 +2301,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 						"arm-smmu-v3-evtq", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
+	} else {
+		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
 	}
 
 	irq = smmu->gerr_irq;
@@ -2378,6 +2311,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 				       0, "arm-smmu-v3-gerror", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
+	} else {
+		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
 	}
 
 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
@@ -2391,6 +2326,8 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 			if (ret < 0)
 				dev_warn(smmu->dev,
 					 "failed to enable priq irq\n");
+		} else {
+			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
 		}
 	}
 }
@@ -2463,12 +2400,12 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 		return ret;
 
 	/* CR1 (table and queue memory attributes) */
-	reg = (CR1_SH_ISH << CR1_TABLE_SH_SHIFT) |
-	      (CR1_CACHE_WB << CR1_TABLE_OC_SHIFT) |
-	      (CR1_CACHE_WB << CR1_TABLE_IC_SHIFT) |
-	      (CR1_SH_ISH << CR1_QUEUE_SH_SHIFT) |
-	      (CR1_CACHE_WB << CR1_QUEUE_OC_SHIFT) |
-	      (CR1_CACHE_WB << CR1_QUEUE_IC_SHIFT);
+	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
+	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
+	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
+	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
+	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
+	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
 
 	/* CR2 (random crap) */
@@ -2578,7 +2515,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
 
 	/* 2-level structures */
-	if ((reg & IDR0_ST_LVL_MASK << IDR0_ST_LVL_SHIFT) == IDR0_ST_LVL_2LVL)
+	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
 
 	if (reg & IDR0_CD2L)
@@ -2589,7 +2526,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	 * We currently require the same endianness as the CPU, but this
 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
 	 */
-	switch (reg & IDR0_TTENDIAN_MASK << IDR0_TTENDIAN_SHIFT) {
+	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
 	case IDR0_TTENDIAN_MIXED:
 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
 		break;
@@ -2631,7 +2568,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
 			 coherent ? "true" : "false");
 
-	switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
+	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
 	case IDR0_STALL_MODEL_FORCE:
 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
 		/* Fallthrough */
@@ -2651,7 +2588,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	}
 
 	/* We only support the AArch64 table format at present */
-	switch (reg & IDR0_TTF_MASK << IDR0_TTF_SHIFT) {
+	switch (FIELD_GET(IDR0_TTF, reg)) {
 	case IDR0_TTF_AARCH32_64:
 		smmu->ias = 40;
 		/* Fallthrough */
@@ -2674,22 +2611,22 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	}
 
 	/* Queue sizes, capped at 4k */
-	smmu->cmdq.q.max_n_shift = min((u32)CMDQ_MAX_SZ_SHIFT,
-				       reg >> IDR1_CMDQ_SHIFT & IDR1_CMDQ_MASK);
+	smmu->cmdq.q.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
+					 FIELD_GET(IDR1_CMDQS, reg));
 	if (!smmu->cmdq.q.max_n_shift) {
 		/* Odd alignment restrictions on the base, so ignore for now */
 		dev_err(smmu->dev, "unit-length command queue not supported\n");
 		return -ENXIO;
 	}
 
-	smmu->evtq.q.max_n_shift = min((u32)EVTQ_MAX_SZ_SHIFT,
-				       reg >> IDR1_EVTQ_SHIFT & IDR1_EVTQ_MASK);
-	smmu->priq.q.max_n_shift = min((u32)PRIQ_MAX_SZ_SHIFT,
-				       reg >> IDR1_PRIQ_SHIFT & IDR1_PRIQ_MASK);
+	smmu->evtq.q.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
+					 FIELD_GET(IDR1_EVTQS, reg));
+	smmu->priq.q.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
+					 FIELD_GET(IDR1_PRIQS, reg));
 
 	/* SID/SSID sizes */
-	smmu->ssid_bits = reg >> IDR1_SSID_SHIFT & IDR1_SSID_MASK;
-	smmu->sid_bits = reg >> IDR1_SID_SHIFT & IDR1_SID_MASK;
+	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
+	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
 
 	/*
 	 * If the SMMU supports fewer bits than would fill a single L2 stream
@@ -2702,8 +2639,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
 
 	/* Maximum number of outstanding stalls */
-	smmu->evtq.max_stalls = reg >> IDR5_STALL_MAX_SHIFT
-				& IDR5_STALL_MAX_MASK;
+	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
 
 	/* Page sizes */
 	if (reg & IDR5_GRAN64K)
@@ -2713,13 +2649,12 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	if (reg & IDR5_GRAN4K)
 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
 
-	if (arm_smmu_ops.pgsize_bitmap == -1UL)
-		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
-	else
-		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+	/* Input address size */
+	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
+		smmu->features |= ARM_SMMU_FEAT_VAX;
 
 	/* Output address size */
-	switch (reg & IDR5_OAS_MASK << IDR5_OAS_SHIFT) {
+	switch (FIELD_GET(IDR5_OAS, reg)) {
 	case IDR5_OAS_32_BIT:
 		smmu->oas = 32;
 		break;
@@ -2735,6 +2670,10 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	case IDR5_OAS_44_BIT:
 		smmu->oas = 44;
 		break;
+	case IDR5_OAS_52_BIT:
+		smmu->oas = 52;
+		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
+		break;
 	default:
 		dev_info(smmu->dev,
 			"unknown output address size. Truncating to 48-bit\n");
@@ -2743,6 +2682,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 		smmu->oas = 48;
 	}
 
+	if (arm_smmu_ops.pgsize_bitmap == -1UL)
+		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
+	else
+		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
+
 	/* Set the DMA mask for our table walker */
 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
 		dev_warn(smmu->dev,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 25914d36c5ac..f05f3cf90756 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi_iort.h>
 #include <linux/device.h>
 #include <linux/dma-iommu.h>
 #include <linux/gfp.h>
@@ -167,13 +168,18 @@ EXPORT_SYMBOL(iommu_put_dma_cookie);
  *
  * IOMMU drivers can use this to implement their .get_resv_regions callback
  * for general non-IOMMU-specific reservations. Currently, this covers host
- * bridge windows for PCI devices.
+ * bridge windows for PCI devices and GICv3 ITS region reservation on ACPI
+ * based ARM platforms that may require HW MSI reservation.
  */
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
 {
 	struct pci_host_bridge *bridge;
 	struct resource_entry *window;
 
+	if (!is_of_node(dev->iommu_fwspec->iommu_fwnode) &&
+		iort_iommu_msi_get_resv_regions(dev, list) < 0)
+		return;
+
 	if (!dev_is_pci(dev))
 		return;
 
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9a7ffd13c7f0..accf58388bdb 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -806,7 +806,7 @@ int __init dmar_dev_scope_init(void)
 	return dmar_dev_scope_status;
 }
 
-void dmar_register_bus_notifier(void)
+void __init dmar_register_bus_notifier(void)
 {
 	bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
 }
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c5f4f7691b57..85879cfec52f 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1239,17 +1239,6 @@ static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
 	return phys;
 }
 
-static struct iommu_group *get_device_iommu_group(struct device *dev)
-{
-	struct iommu_group *group;
-
-	group = iommu_group_get(dev);
-	if (!group)
-		group = iommu_group_alloc();
-
-	return group;
-}
-
 static int exynos_iommu_add_device(struct device *dev)
 {
 	struct exynos_iommu_owner *owner = dev->archdata.iommu;
@@ -1345,7 +1334,7 @@ static const struct iommu_ops exynos_iommu_ops = {
 	.unmap = exynos_iommu_unmap,
 	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
-	.device_group = get_device_iommu_group,
+	.device_group = generic_device_group,
 	.add_device = exynos_iommu_add_device,
 	.remove_device = exynos_iommu_remove_device,
 	.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 24d1b1b42013..749d8f235346 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5043,7 +5043,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	struct page *freelist = NULL;
-	struct intel_iommu *iommu;
 	unsigned long start_pfn, last_pfn;
 	unsigned int npages;
 	int iommu_id, level = 0;
@@ -5062,12 +5061,9 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
 
 	npages = last_pfn - start_pfn + 1;
 
-	for_each_domain_iommu(iommu_id, dmar_domain) {
-		iommu = g_iommus[iommu_id];
-
+	for_each_domain_iommu(iommu_id, dmar_domain)
 		iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
 				      start_pfn, npages, !freelist, 0);
-	}
 
 	dma_free_pagelist(freelist);
 
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 99bc9bd64b9e..e8cd984cf9c8 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -396,6 +396,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 				pasid_max - 1, GFP_KERNEL);
 		if (ret < 0) {
 			kfree(svm);
+			kfree(sdev);
 			goto out;
 		}
 		svm->pasid = ret;
@@ -422,17 +423,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
 		iommu->pasid_table[svm->pasid].val = pasid_entry_val;
 
 		wmb();
-		/* In caching mode, we still have to flush with PASID 0 when
-		 * a PASID table entry becomes present. Not entirely clear
-		 * *why* that would be the case — surely we could just issue
-		 * a flush with the PASID value that we've changed? The PASID
-		 * is the index into the table, after all. It's not like domain
-		 * IDs in the case of the equivalent context-entry change in
-		 * caching mode. And for that matter it's not entirely clear why
-		 * a VMM would be in the business of caching the PASID table
-		 * anyway. Surely that can be left entirely to the guest? */
+
+		/*
+		 * Flush PASID cache when a PASID table entry becomes
+		 * present.
+		 */
 		if (cap_caching_mode(iommu->cap))
-			intel_flush_pasid_dev(svm, sdev, 0);
+			intel_flush_pasid_dev(svm, sdev, svm->pasid);
 	}
 	list_add_rcu(&sdev->list, &svm->devs);
 
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 2ca08dc9331c..10e4a3d11c02 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -357,8 +357,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
 	return false;
 }
 
-static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
-			   size_t, int, arm_v7s_iopte *);
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
+			      size_t, int, arm_v7s_iopte *);
 
 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 			    unsigned long iova, phys_addr_t paddr, int prot,
@@ -541,9 +541,10 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
 	return pte;
 }
 
-static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
-				   unsigned long iova, size_t size,
-				   arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep)
+static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
+				      unsigned long iova, size_t size,
+				      arm_v7s_iopte blk_pte,
+				      arm_v7s_iopte *ptep)
 {
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	arm_v7s_iopte pte, *tablep;
@@ -584,9 +585,9 @@ static int arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 	return size;
 }
 
-static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
-			    unsigned long iova, size_t size, int lvl,
-			    arm_v7s_iopte *ptep)
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
+			      unsigned long iova, size_t size, int lvl,
+			      arm_v7s_iopte *ptep)
 {
 	arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
 	struct io_pgtable *iop = &data->iop;
@@ -656,8 +657,8 @@ static int __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 	return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
 }
 
-static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			 size_t size)
+static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+			    size_t size)
 {
 	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 51e5c43caed1..39c2a056da21 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt)	"arm-lpae io-pgtable: " fmt
 
 #include <linux/atomic.h>
+#include <linux/bitops.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
@@ -32,7 +33,7 @@
 
 #include "io-pgtable.h"
 
-#define ARM_LPAE_MAX_ADDR_BITS		48
+#define ARM_LPAE_MAX_ADDR_BITS		52
 #define ARM_LPAE_S2_MAX_CONCAT_PAGES	16
 #define ARM_LPAE_MAX_LEVELS		4
 
@@ -86,6 +87,8 @@
 #define ARM_LPAE_PTE_TYPE_TABLE		3
 #define ARM_LPAE_PTE_TYPE_PAGE		3
 
+#define ARM_LPAE_PTE_ADDR_MASK		GENMASK_ULL(47,12)
+
 #define ARM_LPAE_PTE_NSTABLE		(((arm_lpae_iopte)1) << 63)
 #define ARM_LPAE_PTE_XN			(((arm_lpae_iopte)3) << 53)
 #define ARM_LPAE_PTE_AF			(((arm_lpae_iopte)1) << 10)
@@ -159,6 +162,7 @@
 #define ARM_LPAE_TCR_PS_42_BIT		0x3ULL
 #define ARM_LPAE_TCR_PS_44_BIT		0x4ULL
 #define ARM_LPAE_TCR_PS_48_BIT		0x5ULL
+#define ARM_LPAE_TCR_PS_52_BIT		0x6ULL
 
 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)	((n) << 3)
 #define ARM_LPAE_MAIR_ATTR_MASK		0xff
@@ -170,9 +174,7 @@
 #define ARM_LPAE_MAIR_ATTR_IDX_DEV	2
 
 /* IOPTE accessors */
-#define iopte_deref(pte,d)					\
-	(__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)	\
-	& ~(ARM_LPAE_GRANULE(d) - 1ULL)))
+#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
 
 #define iopte_type(pte,l)					\
 	(((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK)
@@ -184,12 +186,6 @@
 		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) :	\
 		(iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK))
 
-#define iopte_to_pfn(pte,d)					\
-	(((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) >> (d)->pg_shift)
-
-#define pfn_to_iopte(pfn,d)					\
-	(((pfn) << (d)->pg_shift) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1))
-
 struct arm_lpae_io_pgtable {
 	struct io_pgtable	iop;
 
@@ -203,6 +199,27 @@ struct arm_lpae_io_pgtable {
 
 typedef u64 arm_lpae_iopte;
 
+static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr,
+				     struct arm_lpae_io_pgtable *data)
+{
+	arm_lpae_iopte pte = paddr;
+
+	/* Of the bits which overlap, either 51:48 or 15:12 are always RES0 */
+	return (pte | (pte >> (48 - 12))) & ARM_LPAE_PTE_ADDR_MASK;
+}
+
+static phys_addr_t iopte_to_paddr(arm_lpae_iopte pte,
+				  struct arm_lpae_io_pgtable *data)
+{
+	u64 paddr = pte & ARM_LPAE_PTE_ADDR_MASK;
+
+	if (data->pg_shift < 16)
+		return paddr;
+
+	/* Rotate the packed high-order bits back to the top */
+	return (paddr | (paddr << (48 - 12))) & (ARM_LPAE_PTE_ADDR_MASK << 4);
+}
+
 static bool selftest_running = false;
 
 static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -268,9 +285,9 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 		__arm_lpae_sync_pte(ptep, cfg);
 }
 
-static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
-			    unsigned long iova, size_t size, int lvl,
-			    arm_lpae_iopte *ptep);
+static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+			       unsigned long iova, size_t size, int lvl,
+			       arm_lpae_iopte *ptep);
 
 static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 				phys_addr_t paddr, arm_lpae_iopte prot,
@@ -287,7 +304,7 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		pte |= ARM_LPAE_PTE_TYPE_BLOCK;
 
 	pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS;
-	pte |= pfn_to_iopte(paddr >> data->pg_shift, data);
+	pte |= paddr_to_iopte(paddr, data);
 
 	__arm_lpae_set_pte(ptep, pte, &data->iop.cfg);
 }
@@ -506,10 +523,10 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 	kfree(data);
 }
 
-static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
-				    unsigned long iova, size_t size,
-				    arm_lpae_iopte blk_pte, int lvl,
-				    arm_lpae_iopte *ptep)
+static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
+				       unsigned long iova, size_t size,
+				       arm_lpae_iopte blk_pte, int lvl,
+				       arm_lpae_iopte *ptep)
 {
 	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 	arm_lpae_iopte pte, *tablep;
@@ -528,7 +545,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	if (size == split_sz)
 		unmap_idx = ARM_LPAE_LVL_IDX(iova, lvl, data);
 
-	blk_paddr = iopte_to_pfn(blk_pte, data) << data->pg_shift;
+	blk_paddr = iopte_to_paddr(blk_pte, data);
 	pte = iopte_prot(blk_pte);
 
 	for (i = 0; i < tablesz / sizeof(pte); i++, blk_paddr += split_sz) {
@@ -560,9 +577,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 	return size;
 }
 
-static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
-			    unsigned long iova, size_t size, int lvl,
-			    arm_lpae_iopte *ptep)
+static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+			       unsigned long iova, size_t size, int lvl,
+			       arm_lpae_iopte *ptep)
 {
 	arm_lpae_iopte pte;
 	struct io_pgtable *iop = &data->iop;
@@ -606,8 +623,8 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 	return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
 }
 
-static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			  size_t size)
+static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
+			     size_t size)
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	arm_lpae_iopte *ptep = data->pgd;
@@ -652,12 +669,13 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 
 found_translation:
 	iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
-	return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova;
+	return iopte_to_paddr(pte, data) | iova;
 }
 
 static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 {
-	unsigned long granule;
+	unsigned long granule, page_sizes;
+	unsigned int max_addr_bits = 48;
 
 	/*
 	 * We need to restrict the supported page sizes to match the
@@ -677,17 +695,24 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
 
 	switch (granule) {
 	case SZ_4K:
-		cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
+		page_sizes = (SZ_4K | SZ_2M | SZ_1G);
 		break;
 	case SZ_16K:
-		cfg->pgsize_bitmap &= (SZ_16K | SZ_32M);
+		page_sizes = (SZ_16K | SZ_32M);
 		break;
 	case SZ_64K:
-		cfg->pgsize_bitmap &= (SZ_64K | SZ_512M);
+		max_addr_bits = 52;
+		page_sizes = (SZ_64K | SZ_512M);
+		if (cfg->oas > 48)
+			page_sizes |= 1ULL << 42; /* 4TB */
 		break;
 	default:
-		cfg->pgsize_bitmap = 0;
+		page_sizes = 0;
 	}
+
+	cfg->pgsize_bitmap &= page_sizes;
+	cfg->ias = min(cfg->ias, max_addr_bits);
+	cfg->oas = min(cfg->oas, max_addr_bits);
 }
 
 static struct arm_lpae_io_pgtable *
@@ -784,6 +809,9 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 	case 48:
 		reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT);
 		break;
+	case 52:
+		reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT);
+		break;
 	default:
 		goto out_free_data;
 	}
@@ -891,6 +919,9 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
 	case 48:
 		reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT);
 		break;
+	case 52:
+		reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT);
+		break;
 	default:
 		goto out_free_data;
 	}
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index cd2e1eafffe6..2df79093cad9 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -119,8 +119,8 @@ struct io_pgtable_cfg {
 struct io_pgtable_ops {
 	int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
-	int (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
-		     size_t size);
+	size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
+			size_t size);
 	phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
 				    unsigned long iova);
 };
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 69fef991c651..d2aa23202bb9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1573,10 +1573,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 
 	if (unlikely(ops->unmap == NULL ||
 		     domain->pgsize_bitmap == 0UL))
-		return -ENODEV;
+		return 0;
 
 	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
-		return -EINVAL;
+		return 0;
 
 	/* find out the minimum page size supported */
 	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -1589,7 +1589,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 	if (!IS_ALIGNED(iova | size, min_pagesz)) {
 		pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
 		       iova, size, min_pagesz);
-		return -EINVAL;
+		return 0;
 	}
 
 	pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f227d73e7bf6..f2832a10fcea 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -60,7 +60,7 @@
 	(((prot) & 0x3) << F_MMU_TF_PROTECT_SEL_SHIFT(data))
 
 #define REG_MMU_IVRP_PADDR			0x114
-#define F_MMU_IVRP_PA_SET(pa, ext)		(((pa) >> 1) | ((!!(ext)) << 31))
+
 #define REG_MMU_VLD_PA_RNG			0x118
 #define F_MMU_VLD_PA_RNG(EA, SA)		(((EA) << 8) | (SA))
 
@@ -539,8 +539,13 @@ static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 		F_INT_PRETETCH_TRANSATION_FIFO_FAULT;
 	writel_relaxed(regval, data->base + REG_MMU_INT_MAIN_CONTROL);
 
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
-		       data->base + REG_MMU_IVRP_PADDR);
+	if (data->m4u_plat == M4U_MT8173)
+		regval = (data->protect_base >> 1) | (data->enable_4GB << 31);
+	else
+		regval = lower_32_bits(data->protect_base) |
+			 upper_32_bits(data->protect_base);
+	writel_relaxed(regval, data->base + REG_MMU_IVRP_PADDR);
+
 	if (data->enable_4GB && data->m4u_plat != M4U_MT8173) {
 		/*
 		 * If 4GB mode is enabled, the validate PA range is from
@@ -695,6 +700,7 @@ static int __maybe_unused mtk_iommu_suspend(struct device *dev)
 	reg->ctrl_reg = readl_relaxed(base + REG_MMU_CTRL_REG);
 	reg->int_control0 = readl_relaxed(base + REG_MMU_INT_CONTROL0);
 	reg->int_main_control = readl_relaxed(base + REG_MMU_INT_MAIN_CONTROL);
+	reg->ivrp_paddr = readl_relaxed(base + REG_MMU_IVRP_PADDR);
 	clk_disable_unprepare(data->bclk);
 	return 0;
 }
@@ -717,8 +723,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)
 	writel_relaxed(reg->ctrl_reg, base + REG_MMU_CTRL_REG);
 	writel_relaxed(reg->int_control0, base + REG_MMU_INT_CONTROL0);
 	writel_relaxed(reg->int_main_control, base + REG_MMU_INT_MAIN_CONTROL);
-	writel_relaxed(F_MMU_IVRP_PA_SET(data->protect_base, data->enable_4GB),
-		       base + REG_MMU_IVRP_PADDR);
+	writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR);
 	if (data->m4u_dom)
 		writel(data->m4u_dom->cfg.arm_v7s_cfg.ttbr[0],
 		       base + REG_MMU_PT_BASE_ADDR);
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index b4451a1c7c2f..778498b8633f 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -32,6 +32,7 @@ struct mtk_iommu_suspend_reg {
 	u32				ctrl_reg;
 	u32				int_control0;
 	u32				int_main_control;
+	u32				ivrp_paddr;
 };
 
 enum mtk_iommu_plat {
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 5a96fd14ac22..a7c2a973784f 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -417,20 +417,12 @@ static int mtk_iommu_create_mapping(struct device *dev,
 		m4udev->archdata.iommu = mtk_mapping;
 	}
 
-	ret = arm_iommu_attach_device(dev, mtk_mapping);
-	if (ret)
-		goto err_release_mapping;
-
 	return 0;
-
-err_release_mapping:
-	arm_iommu_release_mapping(mtk_mapping);
-	m4udev->archdata.iommu = NULL;
-	return ret;
 }
 
 static int mtk_iommu_add_device(struct device *dev)
 {
+	struct dma_iommu_mapping *mtk_mapping;
 	struct of_phandle_args iommu_spec;
 	struct of_phandle_iterator it;
 	struct mtk_iommu_data *data;
@@ -451,15 +443,30 @@ static int mtk_iommu_add_device(struct device *dev)
 	if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
 		return -ENODEV; /* Not a iommu client device */
 
-	data = dev->iommu_fwspec->iommu_priv;
-	iommu_device_link(&data->iommu, dev);
-
-	group = iommu_group_get_for_dev(dev);
+	/*
+	 * This is a short-term bodge because the ARM DMA code doesn't
+	 * understand multi-device groups, but we have to call into it
+	 * successfully (and not just rely on a normal IOMMU API attach
+	 * here) in order to set the correct DMA API ops on @dev.
+	 */
+	group = iommu_group_alloc();
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	err = iommu_group_add_device(group, dev);
 	iommu_group_put(group);
-	return 0;
+	if (err)
+		return err;
+
+	data = dev->iommu_fwspec->iommu_priv;
+	mtk_mapping = data->dev->archdata.iommu;
+	err = arm_iommu_attach_device(dev, mtk_mapping);
+	if (err) {
+		iommu_group_remove_device(dev);
+		return err;
+	}
+
+	return iommu_device_link(&data->iommu, dev);;
 }
 
 static void mtk_iommu_remove_device(struct device *dev)
@@ -476,24 +483,6 @@ static void mtk_iommu_remove_device(struct device *dev)
 	iommu_fwspec_free(dev);
 }
 
-static struct iommu_group *mtk_iommu_device_group(struct device *dev)
-{
-	struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
-
-	if (!data)
-		return ERR_PTR(-ENODEV);
-
-	/* All the client devices are in the same m4u iommu-group */
-	if (!data->m4u_group) {
-		data->m4u_group = iommu_group_alloc();
-		if (IS_ERR(data->m4u_group))
-			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
-	} else {
-		iommu_group_ref_get(data->m4u_group);
-	}
-	return data->m4u_group;
-}
-
 static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
 {
 	u32 regval;
@@ -546,7 +535,6 @@ static struct iommu_ops mtk_iommu_ops = {
 	.iova_to_phys	= mtk_iommu_iova_to_phys,
 	.add_device	= mtk_iommu_add_device,
 	.remove_device	= mtk_iommu_remove_device,
-	.device_group	= mtk_iommu_device_group,
 	.pgsize_bitmap	= ~0UL << MT2701_IOMMU_PAGE_SHIFT,
 };
 
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index e135ab830ebf..c33b7b104e72 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1536,7 +1536,7 @@ static struct iommu_group *omap_iommu_device_group(struct device *dev)
 	struct iommu_group *group = ERR_PTR(-EINVAL);
 
 	if (arch_data->iommu_dev)
-		group = arch_data->iommu_dev->group;
+		group = iommu_group_ref_get(arch_data->iommu_dev->group);
 
 	return group;
 }
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 9d991c2d8767..5fc8656c60f9 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -4,6 +4,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/clk.h>
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -13,13 +14,15 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iommu.h>
-#include <linux/jiffies.h>
+#include <linux/iopoll.h>
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
@@ -36,7 +39,10 @@
 #define RK_MMU_AUTO_GATING	0x24
 
 #define DTE_ADDR_DUMMY		0xCAFEBABE
-#define FORCE_RESET_TIMEOUT	100	/* ms */
+
+#define RK_MMU_POLL_PERIOD_US		100
+#define RK_MMU_FORCE_RESET_TIMEOUT_US	100000
+#define RK_MMU_POLL_TIMEOUT_US		1000
 
 /* RK_MMU_STATUS fields */
 #define RK_MMU_STATUS_PAGING_ENABLED       BIT(0)
@@ -73,11 +79,8 @@
   */
 #define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
 
-#define IOMMU_REG_POLL_COUNT_FAST 1000
-
 struct rk_iommu_domain {
 	struct list_head iommus;
-	struct platform_device *pdev;
 	u32 *dt; /* page directory table */
 	dma_addr_t dt_dma;
 	spinlock_t iommus_lock; /* lock for iommus list */
@@ -86,24 +89,37 @@ struct rk_iommu_domain {
 	struct iommu_domain domain;
 };
 
+/* list of clocks required by IOMMU */
+static const char * const rk_iommu_clocks[] = {
+	"aclk", "iface",
+};
+
 struct rk_iommu {
 	struct device *dev;
 	void __iomem **bases;
 	int num_mmu;
-	int *irq;
-	int num_irq;
+	struct clk_bulk_data *clocks;
+	int num_clocks;
 	bool reset_disabled;
 	struct iommu_device iommu;
 	struct list_head node; /* entry in rk_iommu_domain.iommus */
 	struct iommu_domain *domain; /* domain to which iommu is attached */
+	struct iommu_group *group;
+};
+
+struct rk_iommudata {
+	struct device_link *link; /* runtime PM link from IOMMU to master */
+	struct rk_iommu *iommu;
 };
 
+static struct device *dma_dev;
+
 static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
 				  unsigned int count)
 {
 	size_t size = count * sizeof(u32); /* count of u32 entry */
 
-	dma_sync_single_for_device(&dom->pdev->dev, dma, size, DMA_TO_DEVICE);
+	dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE);
 }
 
 static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -111,27 +127,6 @@ static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
 	return container_of(dom, struct rk_iommu_domain, domain);
 }
 
-/**
- * Inspired by _wait_for in intel_drv.h
- * This is NOT safe for use in interrupt context.
- *
- * Note that it's important that we check the condition again after having
- * timed out, since the timeout could be due to preemption or similar and
- * we've never had a chance to check the condition before the timeout.
- */
-#define rk_wait_for(COND, MS) ({ \
-	unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1;	\
-	int ret__ = 0;							\
-	while (!(COND)) {						\
-		if (time_after(jiffies, timeout__)) {			\
-			ret__ = (COND) ? 0 : -ETIMEDOUT;		\
-			break;						\
-		}							\
-		usleep_range(50, 100);					\
-	}								\
-	ret__;								\
-})
-
 /*
  * The Rockchip rk3288 iommu uses a 2-level page table.
  * The first level is the "Directory Table" (DT).
@@ -296,19 +291,21 @@ static void rk_iommu_base_command(void __iomem *base, u32 command)
 {
 	writel(command, base + RK_MMU_COMMAND);
 }
-static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova,
+static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova_start,
 			       size_t size)
 {
 	int i;
-
-	dma_addr_t iova_end = iova + size;
+	dma_addr_t iova_end = iova_start + size;
 	/*
 	 * TODO(djkurtz): Figure out when it is more efficient to shootdown the
 	 * entire iotlb rather than iterate over individual iovas.
 	 */
-	for (i = 0; i < iommu->num_mmu; i++)
-		for (; iova < iova_end; iova += SPAGE_SIZE)
+	for (i = 0; i < iommu->num_mmu; i++) {
+		dma_addr_t iova;
+
+		for (iova = iova_start; iova < iova_end; iova += SPAGE_SIZE)
 			rk_iommu_write(iommu->bases[i], RK_MMU_ZAP_ONE_LINE, iova);
+	}
 }
 
 static bool rk_iommu_is_stall_active(struct rk_iommu *iommu)
@@ -335,9 +332,21 @@ static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu)
 	return enable;
 }
 
+static bool rk_iommu_is_reset_done(struct rk_iommu *iommu)
+{
+	bool done = true;
+	int i;
+
+	for (i = 0; i < iommu->num_mmu; i++)
+		done &= rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0;
+
+	return done;
+}
+
 static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (rk_iommu_is_stall_active(iommu))
 		return 0;
@@ -348,7 +357,9 @@ static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 
 	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL);
 
-	ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_stall_active, iommu, val,
+				 val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n",
@@ -360,13 +371,16 @@ static int rk_iommu_enable_stall(struct rk_iommu *iommu)
 static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (!rk_iommu_is_stall_active(iommu))
 		return 0;
 
 	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL);
 
-	ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_stall_active, iommu, val,
+				 !val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n",
@@ -378,13 +392,16 @@ static int rk_iommu_disable_stall(struct rk_iommu *iommu)
 static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (rk_iommu_is_paging_enabled(iommu))
 		return 0;
 
 	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING);
 
-	ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_paging_enabled, iommu, val,
+				 val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n",
@@ -396,13 +413,16 @@ static int rk_iommu_enable_paging(struct rk_iommu *iommu)
 static int rk_iommu_disable_paging(struct rk_iommu *iommu)
 {
 	int ret, i;
+	bool val;
 
 	if (!rk_iommu_is_paging_enabled(iommu))
 		return 0;
 
 	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING);
 
-	ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1);
+	ret = readx_poll_timeout(rk_iommu_is_paging_enabled, iommu, val,
+				 !val, RK_MMU_POLL_PERIOD_US,
+				 RK_MMU_POLL_TIMEOUT_US);
 	if (ret)
 		for (i = 0; i < iommu->num_mmu; i++)
 			dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n",
@@ -415,6 +435,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
 {
 	int ret, i;
 	u32 dte_addr;
+	bool val;
 
 	if (iommu->reset_disabled)
 		return 0;
@@ -435,13 +456,12 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
 
 	rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET);
 
-	for (i = 0; i < iommu->num_mmu; i++) {
-		ret = rk_wait_for(rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR) == 0x00000000,
-				  FORCE_RESET_TIMEOUT);
-		if (ret) {
-			dev_err(iommu->dev, "FORCE_RESET command timed out\n");
-			return ret;
-		}
+	ret = readx_poll_timeout(rk_iommu_is_reset_done, iommu, val,
+				 val, RK_MMU_FORCE_RESET_TIMEOUT_US,
+				 RK_MMU_POLL_TIMEOUT_US);
+	if (ret) {
+		dev_err(iommu->dev, "FORCE_RESET command timed out\n");
+		return ret;
 	}
 
 	return 0;
@@ -503,6 +523,12 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 	int i;
 
+	if (WARN_ON(!pm_runtime_get_if_in_use(iommu->dev)))
+		return 0;
+
+	if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))
+		goto out;
+
 	for (i = 0; i < iommu->num_mmu; i++) {
 		int_status = rk_iommu_read(iommu->bases[i], RK_MMU_INT_STATUS);
 		if (int_status == 0)
@@ -549,6 +575,10 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_CLEAR, int_status);
 	}
 
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+
+out:
+	pm_runtime_put(iommu->dev);
 	return ret;
 }
 
@@ -590,8 +620,17 @@ static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
 	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
 	list_for_each(pos, &rk_domain->iommus) {
 		struct rk_iommu *iommu;
+
 		iommu = list_entry(pos, struct rk_iommu, node);
-		rk_iommu_zap_lines(iommu, iova, size);
+
+		/* Only zap TLBs of IOMMUs that are powered on. */
+		if (pm_runtime_get_if_in_use(iommu->dev)) {
+			WARN_ON(clk_bulk_enable(iommu->num_clocks,
+						iommu->clocks));
+			rk_iommu_zap_lines(iommu, iova, size);
+			clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+			pm_runtime_put(iommu->dev);
+		}
 	}
 	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 }
@@ -608,7 +647,6 @@ static void rk_iommu_zap_iova_first_last(struct rk_iommu_domain *rk_domain,
 static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 				  dma_addr_t iova)
 {
-	struct device *dev = &rk_domain->pdev->dev;
 	u32 *page_table, *dte_addr;
 	u32 dte_index, dte;
 	phys_addr_t pt_phys;
@@ -626,9 +664,9 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
 	if (!page_table)
 		return ERR_PTR(-ENOMEM);
 
-	pt_dma = dma_map_single(dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, pt_dma)) {
-		dev_err(dev, "DMA mapping error while allocating page table\n");
+	pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dma_dev, pt_dma)) {
+		dev_err(dma_dev, "DMA mapping error while allocating page table\n");
 		free_page((unsigned long)page_table);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -790,52 +828,46 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
 
 static struct rk_iommu *rk_iommu_from_dev(struct device *dev)
 {
-	struct iommu_group *group;
-	struct device *iommu_dev;
-	struct rk_iommu *rk_iommu;
+	struct rk_iommudata *data = dev->archdata.iommu;
 
-	group = iommu_group_get(dev);
-	if (!group)
-		return NULL;
-	iommu_dev = iommu_group_get_iommudata(group);
-	rk_iommu = dev_get_drvdata(iommu_dev);
-	iommu_group_put(group);
+	return data ? data->iommu : NULL;
+}
+
+/* Must be called with iommu powered on and attached */
+static void rk_iommu_disable(struct rk_iommu *iommu)
+{
+	int i;
 
-	return rk_iommu;
+	/* Ignore error while disabling, just keep going */
+	WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks));
+	rk_iommu_enable_stall(iommu);
+	rk_iommu_disable_paging(iommu);
+	for (i = 0; i < iommu->num_mmu; i++) {
+		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0);
+		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
+	}
+	rk_iommu_disable_stall(iommu);
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
 }
 
-static int rk_iommu_attach_device(struct iommu_domain *domain,
-				  struct device *dev)
+/* Must be called with iommu powered on and attached */
+static int rk_iommu_enable(struct rk_iommu *iommu)
 {
-	struct rk_iommu *iommu;
+	struct iommu_domain *domain = iommu->domain;
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
-	unsigned long flags;
 	int ret, i;
 
-	/*
-	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
-	 * Such a device does not belong to an iommu group.
-	 */
-	iommu = rk_iommu_from_dev(dev);
-	if (!iommu)
-		return 0;
+	ret = clk_bulk_enable(iommu->num_clocks, iommu->clocks);
+	if (ret)
+		return ret;
 
 	ret = rk_iommu_enable_stall(iommu);
 	if (ret)
-		return ret;
+		goto out_disable_clocks;
 
 	ret = rk_iommu_force_reset(iommu);
 	if (ret)
-		return ret;
-
-	iommu->domain = domain;
-
-	for (i = 0; i < iommu->num_irq; i++) {
-		ret = devm_request_irq(iommu->dev, iommu->irq[i], rk_iommu_irq,
-				       IRQF_SHARED, dev_name(dev), iommu);
-		if (ret)
-			return ret;
-	}
+		goto out_disable_stall;
 
 	for (i = 0; i < iommu->num_mmu; i++) {
 		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
@@ -845,18 +877,12 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	}
 
 	ret = rk_iommu_enable_paging(iommu);
-	if (ret)
-		return ret;
-
-	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
-	list_add_tail(&iommu->node, &rk_domain->iommus);
-	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
-
-	dev_dbg(dev, "Attached to iommu domain\n");
 
+out_disable_stall:
 	rk_iommu_disable_stall(iommu);
-
-	return 0;
+out_disable_clocks:
+	clk_bulk_disable(iommu->num_clocks, iommu->clocks);
+	return ret;
 }
 
 static void rk_iommu_detach_device(struct iommu_domain *domain,
@@ -865,60 +891,90 @@ static void rk_iommu_detach_device(struct iommu_domain *domain,
 	struct rk_iommu *iommu;
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
-	int i;
 
 	/* Allow 'virtual devices' (eg drm) to detach from domain */
 	iommu = rk_iommu_from_dev(dev);
 	if (!iommu)
 		return;
 
+	dev_dbg(dev, "Detaching from iommu domain\n");
+
+	/* iommu already detached */
+	if (iommu->domain != domain)
+		return;
+
+	iommu->domain = NULL;
+
 	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
 	list_del_init(&iommu->node);
 	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
 
-	/* Ignore error while disabling, just keep going */
-	rk_iommu_enable_stall(iommu);
-	rk_iommu_disable_paging(iommu);
-	for (i = 0; i < iommu->num_mmu; i++) {
-		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, 0);
-		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, 0);
+	if (pm_runtime_get_if_in_use(iommu->dev)) {
+		rk_iommu_disable(iommu);
+		pm_runtime_put(iommu->dev);
 	}
-	rk_iommu_disable_stall(iommu);
+}
 
-	for (i = 0; i < iommu->num_irq; i++)
-		devm_free_irq(iommu->dev, iommu->irq[i], iommu);
+static int rk_iommu_attach_device(struct iommu_domain *domain,
+		struct device *dev)
+{
+	struct rk_iommu *iommu;
+	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
+	unsigned long flags;
+	int ret;
 
-	iommu->domain = NULL;
+	/*
+	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
+	 * Such a device does not belong to an iommu group.
+	 */
+	iommu = rk_iommu_from_dev(dev);
+	if (!iommu)
+		return 0;
+
+	dev_dbg(dev, "Attaching to iommu domain\n");
+
+	/* iommu already attached */
+	if (iommu->domain == domain)
+		return 0;
 
-	dev_dbg(dev, "Detached from iommu domain\n");
+	if (iommu->domain)
+		rk_iommu_detach_device(iommu->domain, dev);
+
+	iommu->domain = domain;
+
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_add_tail(&iommu->node, &rk_domain->iommus);
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+
+	if (!pm_runtime_get_if_in_use(iommu->dev))
+		return 0;
+
+	ret = rk_iommu_enable(iommu);
+	if (ret)
+		rk_iommu_detach_device(iommu->domain, dev);
+
+	pm_runtime_put(iommu->dev);
+
+	return ret;
 }
 
 static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 {
 	struct rk_iommu_domain *rk_domain;
-	struct platform_device *pdev;
-	struct device *iommu_dev;
 
 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
 		return NULL;
 
-	/* Register a pdev per domain, so DMA API can base on this *dev
-	 * even some virtual master doesn't have an iommu slave
-	 */
-	pdev = platform_device_register_simple("rk_iommu_domain",
-					       PLATFORM_DEVID_AUTO, NULL, 0);
-	if (IS_ERR(pdev))
+	if (!dma_dev)
 		return NULL;
 
-	rk_domain = devm_kzalloc(&pdev->dev, sizeof(*rk_domain), GFP_KERNEL);
+	rk_domain = devm_kzalloc(dma_dev, sizeof(*rk_domain), GFP_KERNEL);
 	if (!rk_domain)
-		goto err_unreg_pdev;
-
-	rk_domain->pdev = pdev;
+		return NULL;
 
 	if (type == IOMMU_DOMAIN_DMA &&
 	    iommu_get_dma_cookie(&rk_domain->domain))
-		goto err_unreg_pdev;
+		return NULL;
 
 	/*
 	 * rk32xx iommus use a 2 level pagetable.
@@ -929,11 +985,10 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
 	if (!rk_domain->dt)
 		goto err_put_cookie;
 
-	iommu_dev = &pdev->dev;
-	rk_domain->dt_dma = dma_map_single(iommu_dev, rk_domain->dt,
+	rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt,
 					   SPAGE_SIZE, DMA_TO_DEVICE);
-	if (dma_mapping_error(iommu_dev, rk_domain->dt_dma)) {
-		dev_err(iommu_dev, "DMA map error for DT\n");
+	if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) {
+		dev_err(dma_dev, "DMA map error for DT\n");
 		goto err_free_dt;
 	}
 
@@ -954,8 +1009,6 @@ err_free_dt:
 err_put_cookie:
 	if (type == IOMMU_DOMAIN_DMA)
 		iommu_put_dma_cookie(&rk_domain->domain);
-err_unreg_pdev:
-	platform_device_unregister(pdev);
 
 	return NULL;
 }
@@ -972,126 +1025,82 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
 		if (rk_dte_is_pt_valid(dte)) {
 			phys_addr_t pt_phys = rk_dte_pt_address(dte);
 			u32 *page_table = phys_to_virt(pt_phys);
-			dma_unmap_single(&rk_domain->pdev->dev, pt_phys,
+			dma_unmap_single(dma_dev, pt_phys,
 					 SPAGE_SIZE, DMA_TO_DEVICE);
 			free_page((unsigned long)page_table);
 		}
 	}
 
-	dma_unmap_single(&rk_domain->pdev->dev, rk_domain->dt_dma,
+	dma_unmap_single(dma_dev, rk_domain->dt_dma,
 			 SPAGE_SIZE, DMA_TO_DEVICE);
 	free_page((unsigned long)rk_domain->dt);
 
 	if (domain->type == IOMMU_DOMAIN_DMA)
 		iommu_put_dma_cookie(&rk_domain->domain);
-
-	platform_device_unregister(rk_domain->pdev);
 }
 
-static bool rk_iommu_is_dev_iommu_master(struct device *dev)
+static int rk_iommu_add_device(struct device *dev)
 {
-	struct device_node *np = dev->of_node;
-	int ret;
-
-	/*
-	 * An iommu master has an iommus property containing a list of phandles
-	 * to iommu nodes, each with an #iommu-cells property with value 0.
-	 */
-	ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells");
-	return (ret > 0);
-}
+	struct iommu_group *group;
+	struct rk_iommu *iommu;
+	struct rk_iommudata *data;
 
-static int rk_iommu_group_set_iommudata(struct iommu_group *group,
-					struct device *dev)
-{
-	struct device_node *np = dev->of_node;
-	struct platform_device *pd;
-	int ret;
-	struct of_phandle_args args;
+	data = dev->archdata.iommu;
+	if (!data)
+		return -ENODEV;
 
-	/*
-	 * An iommu master has an iommus property containing a list of phandles
-	 * to iommu nodes, each with an #iommu-cells property with value 0.
-	 */
-	ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0,
-					 &args);
-	if (ret) {
-		dev_err(dev, "of_parse_phandle_with_args(%pOF) => %d\n",
-			np, ret);
-		return ret;
-	}
-	if (args.args_count != 0) {
-		dev_err(dev, "incorrect number of iommu params found for %pOF (found %d, expected 0)\n",
-			args.np, args.args_count);
-		return -EINVAL;
-	}
+	iommu = rk_iommu_from_dev(dev);
 
-	pd = of_find_device_by_node(args.np);
-	of_node_put(args.np);
-	if (!pd) {
-		dev_err(dev, "iommu %pOF not found\n", args.np);
-		return -EPROBE_DEFER;
-	}
+	group = iommu_group_get_for_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+	iommu_group_put(group);
 
-	/* TODO(djkurtz): handle multiple slave iommus for a single master */
-	iommu_group_set_iommudata(group, &pd->dev, NULL);
+	iommu_device_link(&iommu->iommu, dev);
+	data->link = device_link_add(dev, iommu->dev, DL_FLAG_PM_RUNTIME);
 
 	return 0;
 }
 
-static int rk_iommu_add_device(struct device *dev)
+static void rk_iommu_remove_device(struct device *dev)
 {
-	struct iommu_group *group;
 	struct rk_iommu *iommu;
-	int ret;
-
-	if (!rk_iommu_is_dev_iommu_master(dev))
-		return -ENODEV;
+	struct rk_iommudata *data = dev->archdata.iommu;
 
-	group = iommu_group_get(dev);
-	if (!group) {
-		group = iommu_group_alloc();
-		if (IS_ERR(group)) {
-			dev_err(dev, "Failed to allocate IOMMU group\n");
-			return PTR_ERR(group);
-		}
-	}
+	iommu = rk_iommu_from_dev(dev);
 
-	ret = iommu_group_add_device(group, dev);
-	if (ret)
-		goto err_put_group;
+	device_link_del(data->link);
+	iommu_device_unlink(&iommu->iommu, dev);
+	iommu_group_remove_device(dev);
+}
 
-	ret = rk_iommu_group_set_iommudata(group, dev);
-	if (ret)
-		goto err_remove_device;
+static struct iommu_group *rk_iommu_device_group(struct device *dev)
+{
+	struct rk_iommu *iommu;
 
 	iommu = rk_iommu_from_dev(dev);
-	if (iommu)
-		iommu_device_link(&iommu->iommu, dev);
-
-	iommu_group_put(group);
-
-	return 0;
 
-err_remove_device:
-	iommu_group_remove_device(dev);
-err_put_group:
-	iommu_group_put(group);
-	return ret;
+	return iommu_group_ref_get(iommu->group);
 }
 
-static void rk_iommu_remove_device(struct device *dev)
+static int rk_iommu_of_xlate(struct device *dev,
+			     struct of_phandle_args *args)
 {
-	struct rk_iommu *iommu;
+	struct platform_device *iommu_dev;
+	struct rk_iommudata *data;
 
-	if (!rk_iommu_is_dev_iommu_master(dev))
-		return;
+	data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
 
-	iommu = rk_iommu_from_dev(dev);
-	if (iommu)
-		iommu_device_unlink(&iommu->iommu, dev);
+	iommu_dev = of_find_device_by_node(args->np);
 
-	iommu_group_remove_device(dev);
+	data->iommu = platform_get_drvdata(iommu_dev);
+	dev->archdata.iommu = data;
+
+	of_dev_put(iommu_dev);
+
+	return 0;
 }
 
 static const struct iommu_ops rk_iommu_ops = {
@@ -1105,31 +1114,9 @@ static const struct iommu_ops rk_iommu_ops = {
 	.add_device = rk_iommu_add_device,
 	.remove_device = rk_iommu_remove_device,
 	.iova_to_phys = rk_iommu_iova_to_phys,
+	.device_group = rk_iommu_device_group,
 	.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
-};
-
-static int rk_iommu_domain_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-
-	dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), GFP_KERNEL);
-	if (!dev->dma_parms)
-		return -ENOMEM;
-
-	/* Set dma_ops for dev, otherwise it would be dummy_dma_ops */
-	arch_setup_dma_ops(dev, 0, DMA_BIT_MASK(32), NULL, false);
-
-	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
-	dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
-
-	return 0;
-}
-
-static struct platform_driver rk_iommu_domain_driver = {
-	.probe = rk_iommu_domain_probe,
-	.driver = {
-		   .name = "rk_iommu_domain",
-	},
+	.of_xlate = rk_iommu_of_xlate,
 };
 
 static int rk_iommu_probe(struct platform_device *pdev)
@@ -1138,7 +1125,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	struct rk_iommu *iommu;
 	struct resource *res;
 	int num_res = pdev->num_resources;
-	int err, i;
+	int err, i, irq;
 
 	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
 	if (!iommu)
@@ -1165,50 +1152,108 @@ static int rk_iommu_probe(struct platform_device *pdev)
 	if (iommu->num_mmu == 0)
 		return PTR_ERR(iommu->bases[0]);
 
-	iommu->num_irq = platform_irq_count(pdev);
-	if (iommu->num_irq < 0)
-		return iommu->num_irq;
-	if (iommu->num_irq == 0)
-		return -ENXIO;
-
-	iommu->irq = devm_kcalloc(dev, iommu->num_irq, sizeof(*iommu->irq),
-				  GFP_KERNEL);
-	if (!iommu->irq)
-		return -ENOMEM;
+	i = 0;
+	while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+		if (irq < 0)
+			return irq;
 
-	for (i = 0; i < iommu->num_irq; i++) {
-		iommu->irq[i] = platform_get_irq(pdev, i);
-		if (iommu->irq[i] < 0) {
-			dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq[i]);
-			return -ENXIO;
-		}
+		err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
+				       IRQF_SHARED, dev_name(dev), iommu);
+		if (err)
+			return err;
 	}
 
 	iommu->reset_disabled = device_property_read_bool(dev,
 					"rockchip,disable-mmu-reset");
 
-	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	iommu->num_clocks = ARRAY_SIZE(rk_iommu_clocks);
+	iommu->clocks = devm_kcalloc(iommu->dev, iommu->num_clocks,
+				     sizeof(*iommu->clocks), GFP_KERNEL);
+	if (!iommu->clocks)
+		return -ENOMEM;
+
+	for (i = 0; i < iommu->num_clocks; ++i)
+		iommu->clocks[i].id = rk_iommu_clocks[i];
+
+	err = devm_clk_bulk_get(iommu->dev, iommu->num_clocks, iommu->clocks);
+	if (err)
+		return err;
+
+	err = clk_bulk_prepare(iommu->num_clocks, iommu->clocks);
 	if (err)
 		return err;
 
+	iommu->group = iommu_group_alloc();
+	if (IS_ERR(iommu->group)) {
+		err = PTR_ERR(iommu->group);
+		goto err_unprepare_clocks;
+	}
+
+	err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+	if (err)
+		goto err_put_group;
+
 	iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops);
+	iommu_device_set_fwnode(&iommu->iommu, &dev->of_node->fwnode);
+
 	err = iommu_device_register(&iommu->iommu);
+	if (err)
+		goto err_remove_sysfs;
+
+	/*
+	 * Use the first registered IOMMU device for domain to use with DMA
+	 * API, since a domain might not physically correspond to a single
+	 * IOMMU device..
+	 */
+	if (!dma_dev)
+		dma_dev = &pdev->dev;
+
+	bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
 
+	pm_runtime_enable(dev);
+
+	return 0;
+err_remove_sysfs:
+	iommu_device_sysfs_remove(&iommu->iommu);
+err_put_group:
+	iommu_group_put(iommu->group);
+err_unprepare_clocks:
+	clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
 	return err;
 }
 
-static int rk_iommu_remove(struct platform_device *pdev)
+static void rk_iommu_shutdown(struct platform_device *pdev)
 {
-	struct rk_iommu *iommu = platform_get_drvdata(pdev);
+	pm_runtime_force_suspend(&pdev->dev);
+}
 
-	if (iommu) {
-		iommu_device_sysfs_remove(&iommu->iommu);
-		iommu_device_unregister(&iommu->iommu);
-	}
+static int __maybe_unused rk_iommu_suspend(struct device *dev)
+{
+	struct rk_iommu *iommu = dev_get_drvdata(dev);
 
+	if (!iommu->domain)
+		return 0;
+
+	rk_iommu_disable(iommu);
 	return 0;
 }
 
+static int __maybe_unused rk_iommu_resume(struct device *dev)
+{
+	struct rk_iommu *iommu = dev_get_drvdata(dev);
+
+	if (!iommu->domain)
+		return 0;
+
+	return rk_iommu_enable(iommu);
+}
+
+static const struct dev_pm_ops rk_iommu_pm_ops = {
+	SET_RUNTIME_PM_OPS(rk_iommu_suspend, rk_iommu_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
 static const struct of_device_id rk_iommu_dt_ids[] = {
 	{ .compatible = "rockchip,iommu" },
 	{ /* sentinel */ }
@@ -1217,45 +1262,22 @@ MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
 
 static struct platform_driver rk_iommu_driver = {
 	.probe = rk_iommu_probe,
-	.remove = rk_iommu_remove,
+	.shutdown = rk_iommu_shutdown,
 	.driver = {
 		   .name = "rk_iommu",
 		   .of_match_table = rk_iommu_dt_ids,
+		   .pm = &rk_iommu_pm_ops,
+		   .suppress_bind_attrs = true,
 	},
 };
 
 static int __init rk_iommu_init(void)
 {
-	struct device_node *np;
-	int ret;
-
-	np = of_find_matching_node(NULL, rk_iommu_dt_ids);
-	if (!np)
-		return 0;
-
-	of_node_put(np);
-
-	ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
-	if (ret)
-		return ret;
-
-	ret = platform_driver_register(&rk_iommu_domain_driver);
-	if (ret)
-		return ret;
-
-	ret = platform_driver_register(&rk_iommu_driver);
-	if (ret)
-		platform_driver_unregister(&rk_iommu_domain_driver);
-	return ret;
+	return platform_driver_register(&rk_iommu_driver);
 }
-static void __exit rk_iommu_exit(void)
-{
-	platform_driver_unregister(&rk_iommu_driver);
-	platform_driver_unregister(&rk_iommu_domain_driver);
-}
-
 subsys_initcall(rk_iommu_init);
-module_exit(rk_iommu_exit);
+
+IOMMU_OF_DECLARE(rk_iommu_of, "rockchip,iommu");
 
 MODULE_DESCRIPTION("IOMMU API for Rockchip");
 MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 2982e93d2369..5416f2b2ac21 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3612,7 +3612,8 @@ static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
 		return -ENOMEM;
 	}
 
-	err = iort_register_domain_token(its_entry->translation_id, dom_handle);
+	err = iort_register_domain_token(its_entry->translation_id, res.start,
+					 dom_handle);
 	if (err) {
 		pr_err("ITS@%pa: Unable to register GICv3 ITS domain token (ITS ID %d) to IORT\n",
 		       &res.start, its_entry->translation_id);
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2c8ac3688815..edff083f7c4e 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -201,7 +201,7 @@ config BLK_DEV_DM_BUILTIN
 config BLK_DEV_DM
 	tristate "Device mapper support"
 	select BLK_DEV_DM_BUILTIN
-	select DAX
+	depends on DAX || DAX=n
 	---help---
 	  Device-mapper is a low level volume manager.  It works by allowing
 	  people to specify mappings for ranges of logical sectors.  Various
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 99297212eeec..775c06d953b7 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -154,6 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 	return fn(ti, lc->dev, lc->start, ti->len, data);
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -184,6 +185,11 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define linear_dax_direct_access NULL
+#define linear_dax_copy_from_iter NULL
+#endif
+
 static struct target_type linear_target = {
 	.name   = "linear",
 	.version = {1, 4, 0},
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 9de072b7782a..c90c7c08a77f 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -611,51 +611,6 @@ static int log_mark(struct log_writes_c *lc, char *data)
 	return 0;
 }
 
-static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
-		   struct iov_iter *i)
-{
-	struct pending_block *block;
-
-	if (!bytes)
-		return 0;
-
-	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
-	if (!block) {
-		DMERR("Error allocating dax pending block");
-		return -ENOMEM;
-	}
-
-	block->data = kzalloc(bytes, GFP_KERNEL);
-	if (!block->data) {
-		DMERR("Error allocating dax data space");
-		kfree(block);
-		return -ENOMEM;
-	}
-
-	/* write data provided via the iterator */
-	if (!copy_from_iter(block->data, bytes, i)) {
-		DMERR("Error copying dax data");
-		kfree(block->data);
-		kfree(block);
-		return -EIO;
-	}
-
-	/* rewind the iterator so that the block driver can use it */
-	iov_iter_revert(i, bytes);
-
-	block->datalen = bytes;
-	block->sector = bio_to_dev_sectors(lc, sector);
-	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
-
-	atomic_inc(&lc->pending_blocks);
-	spin_lock_irq(&lc->blocks_lock);
-	list_add_tail(&block->list, &lc->unflushed_blocks);
-	spin_unlock_irq(&lc->blocks_lock);
-	wake_up_process(lc->log_kthread);
-
-	return 0;
-}
-
 static void log_writes_dtr(struct dm_target *ti)
 {
 	struct log_writes_c *lc = ti->private;
@@ -925,6 +880,52 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
 	limits->io_min = limits->physical_block_size;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
+static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
+		   struct iov_iter *i)
+{
+	struct pending_block *block;
+
+	if (!bytes)
+		return 0;
+
+	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
+	if (!block) {
+		DMERR("Error allocating dax pending block");
+		return -ENOMEM;
+	}
+
+	block->data = kzalloc(bytes, GFP_KERNEL);
+	if (!block->data) {
+		DMERR("Error allocating dax data space");
+		kfree(block);
+		return -ENOMEM;
+	}
+
+	/* write data provided via the iterator */
+	if (!copy_from_iter(block->data, bytes, i)) {
+		DMERR("Error copying dax data");
+		kfree(block->data);
+		kfree(block);
+		return -EIO;
+	}
+
+	/* rewind the iterator so that the block driver can use it */
+	iov_iter_revert(i, bytes);
+
+	block->datalen = bytes;
+	block->sector = bio_to_dev_sectors(lc, sector);
+	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
+
+	atomic_inc(&lc->pending_blocks);
+	spin_lock_irq(&lc->blocks_lock);
+	list_add_tail(&block->list, &lc->unflushed_blocks);
+	spin_unlock_irq(&lc->blocks_lock);
+	wake_up_process(lc->log_kthread);
+
+	return 0;
+}
+
 static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 					 long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -961,6 +962,10 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
 dax_copy:
 	return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
 }
+#else
+#define log_writes_dax_direct_access NULL
+#define log_writes_dax_copy_from_iter NULL
+#endif
 
 static struct target_type log_writes_target = {
 	.name   = "log-writes",
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index bb907cb3e60d..fe7fb9b1aec3 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -313,6 +313,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -353,6 +354,11 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define stripe_dax_direct_access NULL
+#define stripe_dax_copy_from_iter NULL
+#endif
+
 /*
  * Stripe status:
  *
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5a81c47be4e4..4ea404dbcf0b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1826,7 +1826,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 static struct mapped_device *alloc_dev(int minor)
 {
 	int r, numa_node_id = dm_get_numa_node();
-	struct dax_device *dax_dev;
+	struct dax_device *dax_dev = NULL;
 	struct mapped_device *md;
 	void *old_md;
 
@@ -1892,9 +1892,11 @@ static struct mapped_device *alloc_dev(int minor)
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);
 
-	dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
-	if (!dax_dev)
-		goto bad;
+	if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
+		dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+		if (!dax_dev)
+			goto bad;
+	}
 	md->dax_dev = dax_dev;
 
 	add_disk_no_queue_reg(md->disk);
diff --git a/drivers/media/cec/cec-pin.c b/drivers/media/cec/cec-pin.c
index fafe1ebc8aff..2a5df99735fa 100644
--- a/drivers/media/cec/cec-pin.c
+++ b/drivers/media/cec/cec-pin.c
@@ -668,7 +668,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
 		/* Start bit low is too short, go back to idle */
 		if (delta < CEC_TIM_START_BIT_LOW_MIN - CEC_TIM_IDLE_SAMPLE) {
 			if (!pin->rx_start_bit_low_too_short_cnt++) {
-				pin->rx_start_bit_low_too_short_ts = pin->ts;
+				pin->rx_start_bit_low_too_short_ts = ktime_to_ns(pin->ts);
 				pin->rx_start_bit_low_too_short_delta = delta;
 			}
 			cec_pin_to_idle(pin);
@@ -700,7 +700,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
 		/* Start bit is too short, go back to idle */
 		if (delta < CEC_TIM_START_BIT_TOTAL_MIN - CEC_TIM_IDLE_SAMPLE) {
 			if (!pin->rx_start_bit_too_short_cnt++) {
-				pin->rx_start_bit_too_short_ts = pin->ts;
+				pin->rx_start_bit_too_short_ts = ktime_to_ns(pin->ts);
 				pin->rx_start_bit_too_short_delta = delta;
 			}
 			cec_pin_to_idle(pin);
@@ -770,7 +770,7 @@ static void cec_pin_rx_states(struct cec_pin *pin, ktime_t ts)
 		 */
 		if (delta < CEC_TIM_DATA_BIT_TOTAL_MIN) {
 			if (!pin->rx_data_bit_too_short_cnt++) {
-				pin->rx_data_bit_too_short_ts = pin->ts;
+				pin->rx_data_bit_too_short_ts = ktime_to_ns(pin->ts);
 				pin->rx_data_bit_too_short_delta = delta;
 			}
 			cec_pin_low(pin);
diff --git a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
index 37632bc524d4..9b64f4f354bf 100644
--- a/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
+++ b/drivers/media/common/v4l2-tpg/v4l2-tpg-core.c
@@ -1149,7 +1149,7 @@ static void gen_twopix(struct tpg_data *tpg,
 	case V4L2_PIX_FMT_NV42:
 		buf[0][offset] = r_y_h;
 		buf[1][2 * offset] = b_v;
-		buf[1][(2 * offset + 1) %8] = g_u_s;
+		buf[1][(2 * offset + 1) % 8] = g_u_s;
 		break;
 
 	case V4L2_PIX_FMT_YUYV:
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index 21a7d4b47e1a..e33414975065 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -2089,7 +2089,7 @@ static int dvb_frontend_handle_compat_ioctl(struct file *file, unsigned int cmd,
 		}
 		for (i = 0; i < tvps->num; i++) {
 			err = dtv_property_process_get(
-			    fe, &getp, (struct dtv_property *)tvp + i, file);
+			    fe, &getp, (struct dtv_property *)(tvp + i), file);
 			if (err < 0) {
 				kfree(tvp);
 				return err;
diff --git a/drivers/media/i2c/adv748x/adv748x-afe.c b/drivers/media/i2c/adv748x/adv748x-afe.c
index 5188178588c9..61514bae7e5c 100644
--- a/drivers/media/i2c/adv748x/adv748x-afe.c
+++ b/drivers/media/i2c/adv748x/adv748x-afe.c
@@ -275,7 +275,8 @@ static int adv748x_afe_s_stream(struct v4l2_subdev *sd, int enable)
 {
 	struct adv748x_afe *afe = adv748x_sd_to_afe(sd);
 	struct adv748x_state *state = adv748x_afe_to_state(afe);
-	int ret, signal = V4L2_IN_ST_NO_SIGNAL;
+	u32 signal = V4L2_IN_ST_NO_SIGNAL;
+	int ret;
 
 	mutex_lock(&state->mutex);
 
diff --git a/drivers/media/i2c/dw9714.c b/drivers/media/i2c/dw9714.c
index 8dbbf0f917df..91fae01d052b 100644
--- a/drivers/media/i2c/dw9714.c
+++ b/drivers/media/i2c/dw9714.c
@@ -1,15 +1,5 @@
-/*
- * Copyright (c) 2015--2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015--2017 Intel Corporation.
 
 #include <linux/delay.h>
 #include <linux/i2c.h>
diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c
index 664e8acdf2a0..daec33f4196a 100644
--- a/drivers/media/i2c/imx274.c
+++ b/drivers/media/i2c/imx274.c
@@ -1426,7 +1426,7 @@ static int imx274_set_vflip(struct stimx274 *priv, int val)
 
 	err = imx274_write_reg(priv, IMX274_VFLIP_REG, val);
 	if (err) {
-		dev_err(&priv->client->dev, "VFILP control error\n");
+		dev_err(&priv->client->dev, "VFLIP control error\n");
 		return err;
 	}
 
diff --git a/drivers/media/i2c/ov13858.c b/drivers/media/i2c/ov13858.c
index 30ee9f71bf0d..3dbcae257164 100644
--- a/drivers/media/i2c/ov13858.c
+++ b/drivers/media/i2c/ov13858.c
@@ -1,16 +1,5 @@
-/*
- * Copyright (c) 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Intel Corporation.
 
 #include <linux/acpi.h>
 #include <linux/i2c.h>
@@ -1375,7 +1364,9 @@ ov13858_set_pad_format(struct v4l2_subdev *sd,
 	if (fmt->format.code != MEDIA_BUS_FMT_SGRBG10_1X10)
 		fmt->format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 
-	mode = v4l2_find_nearest_size(supported_modes, width, height,
+	mode = v4l2_find_nearest_size(supported_modes,
+				      ARRAY_SIZE(supported_modes),
+				      width, height,
 				      fmt->format.width, fmt->format.height);
 	ov13858_update_pad_format(mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
diff --git a/drivers/media/i2c/ov2685.c b/drivers/media/i2c/ov2685.c
index 83c55e8288e7..385c1886a947 100644
--- a/drivers/media/i2c/ov2685.c
+++ b/drivers/media/i2c/ov2685.c
@@ -832,7 +832,6 @@ MODULE_DEVICE_TABLE(of, ov2685_of_match);
 static struct i2c_driver ov2685_i2c_driver = {
 	.driver = {
 		.name = "ov2685",
-		.owner = THIS_MODULE,
 		.pm = &ov2685_pm_ops,
 		.of_match_table = of_match_ptr(ov2685_of_match),
 	},
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c
index 03940f0cdfa6..852026baa2e7 100644
--- a/drivers/media/i2c/ov5640.c
+++ b/drivers/media/i2c/ov5640.c
@@ -1641,6 +1641,9 @@ static int ov5640_set_mode(struct ov5640_dev *sensor,
 	return 0;
 }
 
+static int ov5640_set_framefmt(struct ov5640_dev *sensor,
+			       struct v4l2_mbus_framefmt *format);
+
 /* restore the last set video mode after chip power-on */
 static int ov5640_restore_mode(struct ov5640_dev *sensor)
 {
@@ -1652,7 +1655,11 @@ static int ov5640_restore_mode(struct ov5640_dev *sensor)
 		return ret;
 
 	/* now restore the last capture mode */
-	return ov5640_set_mode(sensor, &ov5640_mode_init_data);
+	ret = ov5640_set_mode(sensor, &ov5640_mode_init_data);
+	if (ret < 0)
+		return ret;
+
+	return ov5640_set_framefmt(sensor, &sensor->fmt);
 }
 
 static void ov5640_power(struct ov5640_dev *sensor, bool enable)
@@ -1874,7 +1881,13 @@ static int ov5640_try_fmt_internal(struct v4l2_subdev *sd,
 		if (ov5640_formats[i].code == fmt->code)
 			break;
 	if (i >= ARRAY_SIZE(ov5640_formats))
-		fmt->code = ov5640_formats[0].code;
+		i = 0;
+
+	fmt->code = ov5640_formats[i].code;
+	fmt->colorspace = ov5640_formats[i].colorspace;
+	fmt->ycbcr_enc = V4L2_MAP_YCBCR_ENC_DEFAULT(fmt->colorspace);
+	fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+	fmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(fmt->colorspace);
 
 	return 0;
 }
@@ -1885,6 +1898,7 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 {
 	struct ov5640_dev *sensor = to_ov5640_dev(sd);
 	const struct ov5640_mode_info *new_mode;
+	struct v4l2_mbus_framefmt *mbus_fmt = &format->format;
 	int ret;
 
 	if (format->pad != 0)
@@ -1897,7 +1911,7 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 		goto out;
 	}
 
-	ret = ov5640_try_fmt_internal(sd, &format->format,
+	ret = ov5640_try_fmt_internal(sd, mbus_fmt,
 				      sensor->current_fr, &new_mode);
 	if (ret)
 		goto out;
@@ -1906,12 +1920,12 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 		struct v4l2_mbus_framefmt *fmt =
 			v4l2_subdev_get_try_format(sd, cfg, 0);
 
-		*fmt = format->format;
+		*fmt = *mbus_fmt;
 		goto out;
 	}
 
 	sensor->current_mode = new_mode;
-	sensor->fmt = format->format;
+	sensor->fmt = *mbus_fmt;
 	sensor->pending_mode_change = true;
 out:
 	mutex_unlock(&sensor->lock);
@@ -2496,6 +2510,7 @@ static int ov5640_probe(struct i2c_client *client,
 	struct device *dev = &client->dev;
 	struct fwnode_handle *endpoint;
 	struct ov5640_dev *sensor;
+	struct v4l2_mbus_framefmt *fmt;
 	int ret;
 
 	sensor = devm_kzalloc(dev, sizeof(*sensor), GFP_KERNEL);
@@ -2503,10 +2518,15 @@ static int ov5640_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	sensor->i2c_client = client;
-	sensor->fmt.code = MEDIA_BUS_FMT_UYVY8_2X8;
-	sensor->fmt.width = 640;
-	sensor->fmt.height = 480;
-	sensor->fmt.field = V4L2_FIELD_NONE;
+	fmt = &sensor->fmt;
+	fmt->code = ov5640_formats[0].code;
+	fmt->colorspace = ov5640_formats[0].colorspace;
+	fmt->ycbcr_enc = V4L2_MAP_YCBCR_ENC_DEFAULT(fmt->colorspace);
+	fmt->quantization = V4L2_QUANTIZATION_FULL_RANGE;
+	fmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(fmt->colorspace);
+	fmt->width = 640;
+	fmt->height = 480;
+	fmt->field = V4L2_FIELD_NONE;
 	sensor->frame_interval.numerator = 1;
 	sensor->frame_interval.denominator = ov5640_framerates[OV5640_30_FPS];
 	sensor->current_fr = OV5640_30_FPS;
diff --git a/drivers/media/i2c/ov5645.c b/drivers/media/i2c/ov5645.c
index d28845f7356f..4e3142a7e5a7 100644
--- a/drivers/media/i2c/ov5645.c
+++ b/drivers/media/i2c/ov5645.c
@@ -959,23 +959,6 @@ __ov5645_get_pad_crop(struct ov5645 *ov5645, struct v4l2_subdev_pad_config *cfg,
 	}
 }
 
-static const struct ov5645_mode_info *
-ov5645_find_nearest_mode(unsigned int width, unsigned int height)
-{
-	int i;
-
-	for (i = ARRAY_SIZE(ov5645_mode_info_data) - 1; i >= 0; i--) {
-		if (ov5645_mode_info_data[i].width <= width &&
-		    ov5645_mode_info_data[i].height <= height)
-			break;
-	}
-
-	if (i < 0)
-		i = 0;
-
-	return &ov5645_mode_info_data[i];
-}
-
 static int ov5645_set_format(struct v4l2_subdev *sd,
 			     struct v4l2_subdev_pad_config *cfg,
 			     struct v4l2_subdev_format *format)
@@ -989,8 +972,11 @@ static int ov5645_set_format(struct v4l2_subdev *sd,
 	__crop = __ov5645_get_pad_crop(ov5645, cfg, format->pad,
 			format->which);
 
-	new_mode = ov5645_find_nearest_mode(format->format.width,
-					    format->format.height);
+	new_mode = v4l2_find_nearest_size(ov5645_mode_info_data,
+			       ARRAY_SIZE(ov5645_mode_info_data),
+			       width, height,
+			       format->format.width, format->format.height);
+
 	__crop->width = new_mode->width;
 	__crop->height = new_mode->height;
 
@@ -1131,13 +1117,14 @@ static int ov5645_probe(struct i2c_client *client,
 
 	ret = v4l2_fwnode_endpoint_parse(of_fwnode_handle(endpoint),
 					 &ov5645->ep);
+
+	of_node_put(endpoint);
+
 	if (ret < 0) {
 		dev_err(dev, "parsing endpoint node failed\n");
 		return ret;
 	}
 
-	of_node_put(endpoint);
-
 	if (ov5645->ep.bus_type != V4L2_MBUS_CSI2) {
 		dev_err(dev, "invalid bus type, must be CSI2\n");
 		return -EINVAL;
diff --git a/drivers/media/i2c/ov5670.c b/drivers/media/i2c/ov5670.c
index d2db480da1b9..7b7c74d77370 100644
--- a/drivers/media/i2c/ov5670.c
+++ b/drivers/media/i2c/ov5670.c
@@ -1,16 +1,5 @@
-/*
- * Copyright (c) 2017 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Intel Corporation.
 
 #include <linux/acpi.h>
 #include <linux/i2c.h>
@@ -2230,7 +2219,9 @@ static int ov5670_set_pad_format(struct v4l2_subdev *sd,
 
 	fmt->format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 
-	mode = v4l2_find_nearest_size(supported_modes, width, height,
+	mode = v4l2_find_nearest_size(supported_modes,
+				      ARRAY_SIZE(supported_modes),
+				      width, height,
 				      fmt->format.width, fmt->format.height);
 	ov5670_update_pad_format(mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
index 226f90886484..af17aaa21f58 100644
--- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
@@ -1081,11 +1081,11 @@ static int mtk_jpeg_clk_init(struct mtk_jpeg_dev *jpeg)
 
 	jpeg->clk_jdec = devm_clk_get(jpeg->dev, "jpgdec");
 	if (IS_ERR(jpeg->clk_jdec))
-		return -EINVAL;
+		return PTR_ERR(jpeg->clk_jdec);
 
 	jpeg->clk_jdec_smi = devm_clk_get(jpeg->dev, "jpgdec-smi");
 	if (IS_ERR(jpeg->clk_jdec_smi))
-		return -EINVAL;
+		return PTR_ERR(jpeg->clk_jdec_smi);
 
 	return 0;
 }
diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c
index 521d4b36c090..c4a577848dd7 100644
--- a/drivers/media/platform/qcom/venus/firmware.c
+++ b/drivers/media/platform/qcom/venus/firmware.c
@@ -76,7 +76,7 @@ int venus_boot(struct device *dev, const char *fwname)
 	}
 
 	ret = qcom_mdt_load(dev, mdt, fwname, VENUS_PAS_ID, mem_va, mem_phys,
-			    mem_size);
+			    mem_size, NULL);
 
 	release_firmware(mdt);
 
diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index c9e9576bb08a..49bbd1861d3a 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -135,20 +135,21 @@ find_format_by_index(struct venus_inst *inst, unsigned int index, u32 type)
 		return NULL;
 
 	for (i = 0; i < size; i++) {
+		bool valid;
+
 		if (fmt[i].type != type)
 			continue;
-		if (k == index)
+		valid = type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE ||
+			venus_helper_check_codec(inst, fmt[i].pixfmt);
+		if (k == index && valid)
 			break;
-		k++;
+		if (valid)
+			k++;
 	}
 
 	if (i == size)
 		return NULL;
 
-	if (type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE &&
-	    !venus_helper_check_codec(inst, fmt[i].pixfmt))
-		return NULL;
-
 	return &fmt[i];
 }
 
diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index e3a10a852cad..6b2ce479584e 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -120,20 +120,21 @@ find_format_by_index(struct venus_inst *inst, unsigned int index, u32 type)
 		return NULL;
 
 	for (i = 0; i < size; i++) {
+		bool valid;
+
 		if (fmt[i].type != type)
 			continue;
-		if (k == index)
+		valid = type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE ||
+			venus_helper_check_codec(inst, fmt[i].pixfmt);
+		if (k == index && valid)
 			break;
-		k++;
+		if (valid)
+			k++;
 	}
 
 	if (i == size)
 		return NULL;
 
-	if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE &&
-	    !venus_helper_check_codec(inst, fmt[i].pixfmt))
-		return NULL;
-
 	return &fmt[i];
 }
 
diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c
index 01c703683657..1599159f2574 100644
--- a/drivers/media/platform/vivid/vivid-vid-cap.c
+++ b/drivers/media/platform/vivid/vivid-vid-cap.c
@@ -561,8 +561,9 @@ int vivid_try_fmt_vid_cap(struct file *file, void *priv,
 	mp->field = vivid_field_cap(dev, mp->field);
 	if (vivid_is_webcam(dev)) {
 		const struct v4l2_frmsize_discrete *sz =
-			v4l2_find_nearest_size(webcam_sizes, width, height,
-					       mp->width, mp->height);
+			v4l2_find_nearest_size(webcam_sizes,
+					       VIVID_WEBCAM_SIZES, width,
+					       height, mp->width, mp->height);
 
 		w = sz->width;
 		h = sz->height;
diff --git a/drivers/media/platform/vsp1/vsp1_wpf.c b/drivers/media/platform/vsp1/vsp1_wpf.c
index f7f3b4b2c2de..8bd6b2f1af15 100644
--- a/drivers/media/platform/vsp1/vsp1_wpf.c
+++ b/drivers/media/platform/vsp1/vsp1_wpf.c
@@ -452,7 +452,7 @@ static void wpf_configure(struct vsp1_entity *entity,
 			: VI6_WPF_SRCRPF_RPF_ACT_SUB(input->entity.index);
 	}
 
-	if (pipe->bru || pipe->num_inputs > 1)
+	if (pipe->bru)
 		srcrpf |= pipe->bru->type == VSP1_ENTITY_BRU
 			? VI6_WPF_SRCRPF_VIRACT_MST
 			: VI6_WPF_SRCRPF_VIRACT2_MST;
diff --git a/drivers/media/tuners/r820t.c b/drivers/media/tuners/r820t.c
index bc9299059f48..3e14b9e2e763 100644
--- a/drivers/media/tuners/r820t.c
+++ b/drivers/media/tuners/r820t.c
@@ -20,6 +20,8 @@
 //
 //	RF Gain set/get is not implemented.
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/videodev2.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -2371,7 +2373,7 @@ err:
 err_no_gate:
 	mutex_unlock(&r820t_list_mutex);
 
-	tuner_info("%s: failed=%d\n", __func__, rc);
+	pr_info("%s: failed=%d\n", __func__, rc);
 	r820t_release(fe);
 	return NULL;
 }
diff --git a/drivers/media/usb/cx231xx/cx231xx-dvb.c b/drivers/media/usb/cx231xx/cx231xx-dvb.c
index 713029420fcf..67ed66712d05 100644
--- a/drivers/media/usb/cx231xx/cx231xx-dvb.c
+++ b/drivers/media/usb/cx231xx/cx231xx-dvb.c
@@ -276,7 +276,7 @@ static int start_streaming(struct cx231xx_dvb *dvb)
 
 	if (dev->USE_ISO) {
 		dev_dbg(dev->dev, "DVB transfer mode is ISO.\n");
-		cx231xx_set_alt_setting(dev, INDEX_TS1, 4);
+		cx231xx_set_alt_setting(dev, INDEX_TS1, 5);
 		rc = cx231xx_set_mode(dev, CX231XX_DIGITAL_MODE);
 		if (rc < 0)
 			return rc;
diff --git a/drivers/media/usb/gspca/Kconfig b/drivers/media/usb/gspca/Kconfig
index d214a21acff7..bc9a439745aa 100644
--- a/drivers/media/usb/gspca/Kconfig
+++ b/drivers/media/usb/gspca/Kconfig
@@ -7,7 +7,7 @@ menuconfig USB_GSPCA
 	  Say Y here if you want to enable selecting webcams based
 	  on the GSPCA framework.
 
-	  See <file:Documentation/video4linux/gspca.txt> for more info.
+	  See <file:Documentation/media/v4l-drivers/gspca-cardlist.rst> for more info.
 
 	  This driver uses the Video For Linux API. You must say Y or M to
 	  "Video For Linux" to use this driver.
diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index 5198c9eeb348..4312935f1dfc 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -101,7 +101,7 @@ static int get_v4l2_window32(struct v4l2_window __user *kp,
 static int put_v4l2_window32(struct v4l2_window __user *kp,
 			     struct v4l2_window32 __user *up)
 {
-	struct v4l2_clip __user *kclips = kp->clips;
+	struct v4l2_clip __user *kclips;
 	struct v4l2_clip32 __user *uclips;
 	compat_caddr_t p;
 	u32 clipcount;
@@ -116,6 +116,8 @@ static int put_v4l2_window32(struct v4l2_window __user *kp,
 	if (!clipcount)
 		return 0;
 
+	if (get_user(kclips, &kp->clips))
+		return -EFAULT;
 	if (get_user(p, &up->clips))
 		return -EFAULT;
 	uclips = compat_ptr(p);
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 0301fe426a43..1d0b2208e8fb 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -939,10 +939,14 @@ int __video_register_device(struct video_device *vdev,
 #endif
 	vdev->minor = i + minor_offset;
 	vdev->num = nr;
-	devnode_set(vdev);
 
 	/* Should not happen since we thought this minor was free */
-	WARN_ON(video_device[vdev->minor] != NULL);
+	if (WARN_ON(video_device[vdev->minor])) {
+		mutex_unlock(&videodev_lock);
+		printk(KERN_ERR "video_device not empty!\n");
+		return -ENFILE;
+	}
+	devnode_set(vdev);
 	vdev->index = get_index(vdev);
 	video_device[vdev->minor] = vdev;
 	mutex_unlock(&videodev_lock);
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 24108bfad889..6193270e7b3d 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -400,10 +400,14 @@ static void skip_back_repeat_test(char *arg)
 	int go_back = simple_strtol(arg, NULL, 10);
 
 	repeat_test--;
-	if (repeat_test <= 0)
+	if (repeat_test <= 0) {
 		ts.idx++;
-	else
+	} else {
+		if (repeat_test % 100 == 0)
+			v1printk("kgdbts:RUN ... %d remaining\n", repeat_test);
+
 		ts.idx -= go_back;
+	}
 	fill_get_buf(ts.tst[ts.idx].get);
 }
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 02485e310c81..9e923cd1d80e 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -3080,6 +3080,7 @@ static void __exit mmc_blk_exit(void)
 	mmc_unregister_driver(&mmc_driver);
 	unregister_blkdev(MMC_BLOCK_MAJOR, "mmc");
 	unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES);
+	bus_unregister(&mmc_rpmb_bus_type);
 }
 
 module_init(mmc_blk_init);
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 712e08d9a45e..a0168e9e4fce 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -362,9 +362,9 @@ static void jz4740_mmc_set_irq_enabled(struct jz4740_mmc_host *host,
 		host->irq_mask &= ~irq;
 	else
 		host->irq_mask |= irq;
-	spin_unlock_irqrestore(&host->lock, flags);
 
 	writew(host->irq_mask, host->base + JZ_REG_MMC_IMASK);
+	spin_unlock_irqrestore(&host->lock, flags);
 }
 
 static void jz4740_mmc_clock_enable(struct jz4740_mmc_host *host,
diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c
index e30df9ad8197..308029930304 100644
--- a/drivers/mmc/host/tmio_mmc_core.c
+++ b/drivers/mmc/host/tmio_mmc_core.c
@@ -913,7 +913,7 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host)
 		host->check_scc_error(host);
 
 	/* If SET_BLOCK_COUNT, continue with main command */
-	if (host->mrq) {
+	if (host->mrq && !mrq->cmd->error) {
 		tmio_process_mrq(host, mrq);
 		return;
 	}
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index b1fc28f63882..d0b63bbf46a7 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -244,7 +244,7 @@ static int ubiblock_open(struct block_device *bdev, fmode_t mode)
 	 * in any case.
 	 */
 	if (mode & FMODE_WRITE) {
-		ret = -EPERM;
+		ret = -EROFS;
 		goto out_unlock;
 	}
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index e941395de3ae..753494e042d5 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -854,6 +854,17 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
 		return -EINVAL;
 	}
 
+	/*
+	 * Both UBI and UBIFS have been designed for SLC NAND and NOR flashes.
+	 * MLC NAND is different and needs special care, otherwise UBI or UBIFS
+	 * will die soon and you will lose all your data.
+	 */
+	if (mtd->type == MTD_MLCNANDFLASH) {
+		pr_err("ubi: refuse attaching mtd%d - MLC NAND is not supported\n",
+			mtd->index);
+		return -EINVAL;
+	}
+
 	if (ubi_num == UBI_DEV_NUM_AUTO) {
 		/* Search for an empty slot in the @ubi_devices array */
 		for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++)
diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c
index 590d967011bb..98f7d6be8d1f 100644
--- a/drivers/mtd/ubi/fastmap-wl.c
+++ b/drivers/mtd/ubi/fastmap-wl.c
@@ -362,7 +362,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi)
 {
 	int i;
 
-	flush_work(&ubi->fm_work);
 	return_unused_pool_pebs(ubi, &ubi->fm_pool);
 	return_unused_pool_pebs(ubi, &ubi->fm_wl_pool);
 
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 9c2567b0d93e..dfad93fca0a6 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -375,7 +375,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
 	 * because generally mcdi responses are fast. After that, back off
 	 * and poll once a jiffy (approximately)
 	 */
-	spins = TICK_USEC;
+	spins = USER_TICK_USEC;
 	finish = jiffies + MCDI_RPC_TIMEOUT;
 
 	while (1) {
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 6afe896e5cb8..96d26cfae90b 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -253,7 +253,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
 
 static unsigned int hwsim_net_id;
 
-static struct ida hwsim_netgroup_ida = IDA_INIT;
+static DEFINE_IDA(hwsim_netgroup_ida);
 
 struct hwsim_net {
 	int netgroup;
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index a65f2e1d9f53..85997184e047 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -20,7 +20,7 @@ if LIBNVDIMM
 config BLK_DEV_PMEM
 	tristate "PMEM: Persistent memory block device support"
 	default LIBNVDIMM
-	select DAX
+	select DAX_DRIVER
 	select ND_BTT if BTT
 	select ND_PFN if NVDIMM_PFN
 	help
@@ -102,4 +102,15 @@ config NVDIMM_DAX
 
 	  Select Y if unsure
 
+config OF_PMEM
+	# FIXME: make tristate once OF_NUMA dependency removed
+	bool "Device-tree support for persistent memory regions"
+	depends on OF
+	default LIBNVDIMM
+	help
+	  Allows regions of persistent memory to be described in the
+	  device-tree.
+
+	  Select Y if unsure.
+
 endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 70d5f3ad9909..e8847045dac0 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
+obj-$(CONFIG_OF_PMEM) += of_pmem.o
 
 nd_pmem-y := pmem.o
 
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index d58925295aa7..795ad4ff35ca 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -26,7 +26,7 @@ static void nd_btt_release(struct device *dev)
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
 	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
 	kfree(nd_btt->uuid);
@@ -74,8 +74,8 @@ static ssize_t sector_size_store(struct device *dev,
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
 			btt_lbasize_supported);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -101,8 +101,8 @@ static ssize_t uuid_store(struct device *dev,
 
 	device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
 
 	return rc ? rc : len;
@@ -131,8 +131,8 @@ static ssize_t namespace_store(struct device *dev,
 	device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -206,8 +206,8 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
 	dev->groups = nd_btt_attribute_groups;
 	device_initialize(&nd_btt->dev);
 	if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
-		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-				__func__, dev_name(ndns->claim));
+		dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+				dev_name(ndns->claim));
 		put_device(dev);
 		return NULL;
 	}
@@ -346,8 +346,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
 		return -ENOMEM;
 	btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL);
 	rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb);
-	dev_dbg(dev, "%s: btt: %s\n", __func__,
-			rc == 0 ? dev_name(btt_dev) : "<none>");
+	dev_dbg(dev, "btt: %s\n", rc == 0 ? dev_name(btt_dev) : "<none>");
 	if (rc < 0) {
 		struct nd_btt *nd_btt = to_nd_btt(btt_dev);
 
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 78eabc3a1ab1..a64023690cad 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 	nvdimm_bus->dev.release = nvdimm_bus_release;
 	nvdimm_bus->dev.groups = nd_desc->attr_groups;
 	nvdimm_bus->dev.bus = &nvdimm_bus_type;
+	nvdimm_bus->dev.of_node = nd_desc->of_node;
 	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
 	rc = device_register(&nvdimm_bus->dev);
 	if (rc) {
@@ -984,8 +985,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 	if (cmd == ND_CMD_CALL) {
 		func = pkg.nd_command;
-		dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n",
-				__func__, dimm_name, pkg.nd_command,
+		dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n",
+				dimm_name, pkg.nd_command,
 				in_len, out_len, buf_len);
 	}
 
@@ -996,8 +997,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		u32 copy;
 
 		if (out_size == UINT_MAX) {
-			dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			return -EFAULT;
 		}
 		if (out_len < sizeof(out_env))
@@ -1012,9 +1013,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 	buf_len = (u64) out_len + (u64) in_len;
 	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
-		dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__,
-				dimm_name, cmd_name, buf_len,
-				ND_IOCTL_MAX_BUFLEN);
+		dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
+				cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
 		return -EINVAL;
 	}
 
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b2fc29b8279b..30852270484f 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -148,7 +148,7 @@ ssize_t nd_namespace_store(struct device *dev,
 	char *name;
 
 	if (dev->driver) {
-		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		dev_dbg(dev, "namespace already active\n");
 		return -EBUSY;
 	}
 
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 1dc527660637..acce050856a8 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -134,7 +134,7 @@ static void nvdimm_map_release(struct kref *kref)
 	nvdimm_map = container_of(kref, struct nvdimm_map, kref);
 	nvdimm_bus = nvdimm_map->nvdimm_bus;
 
-	dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+	dev_dbg(&nvdimm_bus->dev, "%pa\n", &nvdimm_map->offset);
 	list_del(&nvdimm_map->list);
 	if (nvdimm_map->flags)
 		memunmap(nvdimm_map->mem);
@@ -230,8 +230,8 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
 
 	for (i = 0; i < 16; i++) {
 		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
-			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
-					__func__, i, str - buf, str[0],
+			dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+					i, str - buf, str[0],
 					str + 1 - buf, str[1]);
 			return -EINVAL;
 		}
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 1bf2bd318371..0453f49dc708 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -24,7 +24,7 @@ static void nd_dax_release(struct device *dev)
 	struct nd_dax *nd_dax = to_nd_dax(dev);
 	struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(dev, &nd_pfn->ndns);
 	ida_simple_remove(&nd_region->dax_ida, nd_pfn->id);
 	kfree(nd_pfn->uuid);
@@ -129,8 +129,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
 	pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
 	nd_pfn->pfn_sb = pfn_sb;
 	rc = nd_pfn_validate(nd_pfn, DAX_SIG);
-	dev_dbg(dev, "%s: dax: %s\n", __func__,
-			rc == 0 ? dev_name(dax_dev) : "<none>");
+	dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>");
 	if (rc < 0) {
 		nd_detach_ndns(dax_dev, &nd_pfn->ndns);
 		put_device(dax_dev);
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index f8913b8124b6..233907889f96 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -67,9 +67,11 @@ static int nvdimm_probe(struct device *dev)
 	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
 	nd_label_copy(ndd, to_next_namespace_index(ndd),
 			to_current_namespace_index(ndd));
-	rc = nd_label_reserve_dpa(ndd);
-	if (ndd->ns_current >= 0)
-		nvdimm_set_aliasing(dev);
+	if (ndd->ns_current >= 0) {
+		rc = nd_label_reserve_dpa(ndd);
+		if (rc == 0)
+			nvdimm_set_aliasing(dev);
+	}
 	nvdimm_clear_locked(dev);
 	nvdimm_bus_unlock(dev);
 
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 097794d9f786..e00d45522b80 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -131,7 +131,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 		}
 		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
 	}
-	dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+	dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
 	kfree(cmd);
 
 	return rc;
@@ -266,8 +266,7 @@ void nvdimm_drvdata_release(struct kref *kref)
 	struct device *dev = ndd->dev;
 	struct resource *res, *_r;
 
-	dev_dbg(dev, "%s\n", __func__);
-
+	dev_dbg(dev, "trace\n");
 	nvdimm_bus_lock(dev);
 	for_each_dpa_resource_safe(ndd, res, _r)
 		nvdimm_free_dpa(ndd, res);
@@ -660,7 +659,7 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
 	nd_synchronize();
 
 	device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
-	dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+	dev_dbg(&nvdimm_bus->dev, "count: %d\n", count);
 	if (count != dimm_count)
 		return -ENXIO;
 	return 0;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index de66c02f6140..1d28cd656536 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -45,9 +45,27 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
 	return ndd->nslabel_size;
 }
 
+static size_t __sizeof_namespace_index(u32 nslot)
+{
+	return ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
+			NSINDEX_ALIGN);
+}
+
+static int __nvdimm_num_label_slots(struct nvdimm_drvdata *ndd,
+		size_t index_size)
+{
+	return (ndd->nsarea.config_size - index_size * 2) /
+			sizeof_namespace_label(ndd);
+}
+
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
 {
-	return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
+	u32 tmp_nslot, n;
+
+	tmp_nslot = ndd->nsarea.config_size / sizeof_namespace_label(ndd);
+	n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN;
+
+	return __nvdimm_num_label_slots(ndd, NSINDEX_ALIGN * n);
 }
 
 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
@@ -55,18 +73,14 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
 	u32 nslot, space, size;
 
 	/*
-	 * The minimum index space is 512 bytes, with that amount of
-	 * index we can describe ~1400 labels which is less than a byte
-	 * of overhead per label.  Round up to a byte of overhead per
-	 * label and determine the size of the index region.  Yes, this
-	 * starts to waste space at larger config_sizes, but it's
-	 * unlikely we'll ever see anything but 128K.
+	 * Per UEFI 2.7, the minimum size of the Label Storage Area is large
+	 * enough to hold 2 index blocks and 2 labels.  The minimum index
+	 * block size is 256 bytes, and the minimum label size is 256 bytes.
 	 */
 	nslot = nvdimm_num_label_slots(ndd);
 	space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
-	size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
-			NSINDEX_ALIGN) * 2;
-	if (size <= space)
+	size = __sizeof_namespace_index(nslot) * 2;
+	if (size <= space && nslot >= 2)
 		return size / 2;
 
 	dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n",
@@ -121,8 +135,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 
 		memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
 		if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
-			dev_dbg(dev, "%s: nsindex%d signature invalid\n",
-					__func__, i);
+			dev_dbg(dev, "nsindex%d signature invalid\n", i);
 			continue;
 		}
 
@@ -135,8 +148,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 			labelsize = 128;
 
 		if (labelsize != sizeof_namespace_label(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n",
-					__func__, i, nsindex[i]->labelsize);
+			dev_dbg(dev, "nsindex%d labelsize %d invalid\n",
+					i, nsindex[i]->labelsize);
 			continue;
 		}
 
@@ -145,30 +158,28 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
 		nsindex[i]->checksum = __cpu_to_le64(sum_save);
 		if (sum != sum_save) {
-			dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
-					__func__, i);
+			dev_dbg(dev, "nsindex%d checksum invalid\n", i);
 			continue;
 		}
 
 		seq = __le32_to_cpu(nsindex[i]->seq);
 		if ((seq & NSINDEX_SEQ_MASK) == 0) {
-			dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
-					__func__, i, seq);
+			dev_dbg(dev, "nsindex%d sequence: %#x invalid\n", i, seq);
 			continue;
 		}
 
 		/* sanity check the index against expected values */
 		if (__le64_to_cpu(nsindex[i]->myoff)
 				!= i * sizeof_namespace_index(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
-					__func__, i, (unsigned long long)
+			dev_dbg(dev, "nsindex%d myoff: %#llx invalid\n",
+					i, (unsigned long long)
 					__le64_to_cpu(nsindex[i]->myoff));
 			continue;
 		}
 		if (__le64_to_cpu(nsindex[i]->otheroff)
 				!= (!i) * sizeof_namespace_index(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
-					__func__, i, (unsigned long long)
+			dev_dbg(dev, "nsindex%d otheroff: %#llx invalid\n",
+					i, (unsigned long long)
 					__le64_to_cpu(nsindex[i]->otheroff));
 			continue;
 		}
@@ -176,8 +187,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		size = __le64_to_cpu(nsindex[i]->mysize);
 		if (size > sizeof_namespace_index(ndd)
 				|| size < sizeof(struct nd_namespace_index)) {
-			dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
-					__func__, i, size);
+			dev_dbg(dev, "nsindex%d mysize: %#llx invalid\n", i, size);
 			continue;
 		}
 
@@ -185,9 +195,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		if (nslot * sizeof_namespace_label(ndd)
 				+ 2 * sizeof_namespace_index(ndd)
 				> ndd->nsarea.config_size) {
-			dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
-					__func__, i, nslot,
-					ndd->nsarea.config_size);
+			dev_dbg(dev, "nsindex%d nslot: %u invalid, config_size: %#x\n",
+					i, nslot, ndd->nsarea.config_size);
 			continue;
 		}
 		valid[i] = true;
@@ -356,8 +365,8 @@ static bool slot_valid(struct nvdimm_drvdata *ndd,
 		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
 		nd_label->checksum = __cpu_to_le64(sum_save);
 		if (sum != sum_save) {
-			dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n",
-				__func__, slot, sum);
+			dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
+				slot, sum);
 			return false;
 		}
 	}
@@ -422,8 +431,8 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
 			u64 dpa = __le64_to_cpu(nd_label->dpa);
 
 			dev_dbg(ndd->dev,
-				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
-					__func__, slot, label_slot, dpa, size);
+				"slot%d invalid slot: %d dpa: %llx size: %llx\n",
+					slot, label_slot, dpa, size);
 			continue;
 		}
 		count++;
@@ -650,7 +659,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	slot = nd_label_alloc_slot(ndd);
 	if (slot == UINT_MAX)
 		return -ENXIO;
-	dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+	dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
 	nd_label = to_label(ndd, slot);
 	memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -678,7 +687,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
 		nd_label->checksum = __cpu_to_le64(sum);
 	}
-	nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
+	nd_dbg_dpa(nd_region, ndd, res, "\n");
 
 	/* update label */
 	offset = nd_label_offset(ndd, nd_label);
@@ -700,7 +709,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		break;
 	}
 	if (victim) {
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		slot = to_slot(ndd, victim->label);
 		nd_label_free_slot(ndd, slot);
 		victim->label = NULL;
@@ -868,7 +877,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 		slot = nd_label_alloc_slot(ndd);
 		if (slot == UINT_MAX)
 			goto abort;
-		dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
 		nd_label = to_label(ndd, slot);
 		memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -928,7 +937,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 
 	/* free up now unused slots in the new index */
 	for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		nd_label_free_slot(ndd, slot);
 	}
 
@@ -1092,7 +1101,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 		active--;
 		slot = to_slot(ndd, nd_label);
 		nd_label_free_slot(ndd, slot);
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		list_move_tail(&label_ent->list, &list);
 		label_ent->label = NULL;
 	}
@@ -1100,7 +1109,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 
 	if (active == 0) {
 		nd_mapping_free_labels(nd_mapping);
-		dev_dbg(ndd->dev, "%s: no more active labels\n", __func__);
+		dev_dbg(ndd->dev, "no more active labels\n");
 	}
 	mutex_unlock(&nd_mapping->lock);
 
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 1ebf4d3d01ba..18bbe183b3a9 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -33,7 +33,7 @@ enum {
 	BTTINFO_UUID_LEN = 16,
 	BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
 	BTTINFO_MAJOR_VERSION = 1,
-	ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+	ND_LABEL_MIN_SIZE = 256 * 4, /* see sizeof_namespace_index() */
 	ND_LABEL_ID_SIZE = 50,
 	ND_NSINDEX_INIT = 0x1,
 };
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 658ada497be0..28afdd668905 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -421,7 +421,7 @@ static ssize_t alt_name_store(struct device *dev,
 	rc = __alt_name_store(dev, buf, len);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1007,7 +1007,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 	if (uuid_not_set(uuid, dev, __func__))
 		return -ENXIO;
 	if (nd_region->ndr_mappings == 0) {
-		dev_dbg(dev, "%s: not associated with dimm(s)\n", __func__);
+		dev_dbg(dev, "not associated with dimm(s)\n");
 		return -ENXIO;
 	}
 
@@ -1105,8 +1105,7 @@ static ssize_t size_store(struct device *dev,
 		*uuid = NULL;
 	}
 
-	dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
-			? "fail" : "success", rc);
+	dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
 
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
@@ -1270,8 +1269,8 @@ static ssize_t uuid_store(struct device *dev,
 		rc = nd_namespace_label_update(nd_region, dev);
 	else
 		kfree(uuid);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1355,9 +1354,8 @@ static ssize_t sector_size_store(struct device *dev,
 		rc = nd_size_select_store(dev, buf, lbasize, supported);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
-			rc, rc < 0 ? "tried" : "wrote", buf,
-			buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
+			buf, buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1519,7 +1517,7 @@ static ssize_t holder_class_store(struct device *dev,
 	rc = __holder_class_store(dev, buf);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1717,8 +1715,7 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 		if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__))
 			return ERR_PTR(-ENODEV);
 		if (!nsblk->lbasize) {
-			dev_dbg(&ndns->dev, "%s: sector size not set\n",
-				__func__);
+			dev_dbg(&ndns->dev, "sector size not set\n");
 			return ERR_PTR(-ENODEV);
 		}
 		if (!nd_namespace_blk_validate(nsblk))
@@ -1798,9 +1795,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 			}
 
 			if (found_uuid) {
-				dev_dbg(ndd->dev,
-						"%s duplicate entry for uuid\n",
-						__func__);
+				dev_dbg(ndd->dev, "duplicate entry for uuid\n");
 				return false;
 			}
 			found_uuid = true;
@@ -1926,7 +1921,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	}
 
 	if (i < nd_region->ndr_mappings) {
-		struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+		struct nvdimm *nvdimm = nd_region->mapping[i].nvdimm;
 
 		/*
 		 * Give up if we don't find an instance of a uuid at each
@@ -1934,7 +1929,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 		 * find a dimm with two instances of the same uuid.
 		 */
 		dev_err(&nd_region->dev, "%s missing label for %pUb\n",
-				dev_name(ndd->dev), nd_label->uuid);
+				nvdimm_name(nvdimm), nd_label->uuid);
 		rc = -EINVAL;
 		goto err;
 	}
@@ -1994,14 +1989,13 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	namespace_pmem_release(dev);
 	switch (rc) {
 	case -EINVAL:
-		dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+		dev_dbg(&nd_region->dev, "invalid label(s)\n");
 		break;
 	case -ENODEV:
-		dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+		dev_dbg(&nd_region->dev, "label not found\n");
 		break;
 	default:
-		dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
-				__func__, rc);
+		dev_dbg(&nd_region->dev, "unexpected err: %d\n", rc);
 		break;
 	}
 	return ERR_PTR(rc);
@@ -2334,8 +2328,8 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
 	}
 
-	dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
-			__func__, count, is_nd_blk(&nd_region->dev)
+	dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n",
+			count, is_nd_blk(&nd_region->dev)
 			? "blk" : "pmem", count == 1 ? "" : "s");
 
 	if (count == 0) {
@@ -2467,7 +2461,7 @@ static int init_active_labels(struct nd_region *nd_region)
 		get_ndd(ndd);
 
 		count = nd_label_active_count(ndd);
-		dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+		dev_dbg(ndd->dev, "count: %d\n", count);
 		if (!count)
 			continue;
 		for (j = 0; j < count; j++) {
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 184e070d50a2..32e0364b48b9 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -340,7 +340,6 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
 }
 #endif
 
-struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
new file mode 100644
index 000000000000..85013bad35de
--- /dev/null
+++ b/drivers/nvdimm/of_pmem.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define pr_fmt(fmt) "of_pmem: " fmt
+
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+static const struct attribute_group *region_attr_groups[] = {
+	&nd_region_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static const struct attribute_group *bus_attr_groups[] = {
+	&nvdimm_bus_attribute_group,
+	NULL,
+};
+
+struct of_pmem_private {
+	struct nvdimm_bus_descriptor bus_desc;
+	struct nvdimm_bus *bus;
+};
+
+static int of_pmem_region_probe(struct platform_device *pdev)
+{
+	struct of_pmem_private *priv;
+	struct device_node *np;
+	struct nvdimm_bus *bus;
+	bool is_volatile;
+	int i;
+
+	np = dev_of_node(&pdev->dev);
+	if (!np)
+		return -ENXIO;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->bus_desc.attr_groups = bus_attr_groups;
+	priv->bus_desc.provider_name = "of_pmem";
+	priv->bus_desc.module = THIS_MODULE;
+	priv->bus_desc.of_node = np;
+
+	priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc);
+	if (!bus) {
+		kfree(priv);
+		return -ENODEV;
+	}
+	platform_set_drvdata(pdev, priv);
+
+	is_volatile = !!of_find_property(np, "volatile", NULL);
+	dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n",
+			is_volatile ? "volatile" : "non-volatile",  np);
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		struct nd_region_desc ndr_desc;
+		struct nd_region *region;
+
+		/*
+		 * NB: libnvdimm copies the data from ndr_desc into it's own
+		 * structures so passing a stack pointer is fine.
+		 */
+		memset(&ndr_desc, 0, sizeof(ndr_desc));
+		ndr_desc.attr_groups = region_attr_groups;
+		ndr_desc.numa_node = of_node_to_nid(np);
+		ndr_desc.res = &pdev->resource[i];
+		ndr_desc.of_node = np;
+		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+
+		if (is_volatile)
+			region = nvdimm_volatile_region_create(bus, &ndr_desc);
+		else
+			region = nvdimm_pmem_region_create(bus, &ndr_desc);
+
+		if (!region)
+			dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
+					ndr_desc.res, np);
+		else
+			dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
+					ndr_desc.res, np);
+	}
+
+	return 0;
+}
+
+static int of_pmem_region_remove(struct platform_device *pdev)
+{
+	struct of_pmem_private *priv = platform_get_drvdata(pdev);
+
+	nvdimm_bus_unregister(priv->bus);
+	kfree(priv);
+
+	return 0;
+}
+
+static const struct of_device_id of_pmem_region_match[] = {
+	{ .compatible = "pmem-region" },
+	{ },
+};
+
+static struct platform_driver of_pmem_region_driver = {
+	.probe = of_pmem_region_probe,
+	.remove = of_pmem_region_remove,
+	.driver = {
+		.name = "of_pmem",
+		.owner = THIS_MODULE,
+		.of_match_table = of_pmem_region_match,
+	},
+};
+
+module_platform_driver(of_pmem_region_driver);
+MODULE_DEVICE_TABLE(of, of_pmem_region_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 2f4d18752c97..30b08791597d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -27,7 +27,7 @@ static void nd_pfn_release(struct device *dev)
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
 	ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
 	kfree(nd_pfn->uuid);
@@ -94,8 +94,8 @@ static ssize_t mode_store(struct device *dev,
 		else
 			rc = -EINVAL;
 	}
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -144,8 +144,8 @@ static ssize_t align_store(struct device *dev,
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_pfn->align,
 			nd_pfn_supported_alignments());
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -171,8 +171,8 @@ static ssize_t uuid_store(struct device *dev,
 
 	device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
 
 	return rc ? rc : len;
@@ -201,8 +201,8 @@ static ssize_t namespace_store(struct device *dev,
 	device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -314,8 +314,8 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
 	dev = &nd_pfn->dev;
 	device_initialize(&nd_pfn->dev);
 	if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
-		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-				__func__, dev_name(ndns->claim));
+		dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+				dev_name(ndns->claim));
 		put_device(dev);
 		return NULL;
 	}
@@ -510,8 +510,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
 	nd_pfn = to_nd_pfn(pfn_dev);
 	nd_pfn->pfn_sb = pfn_sb;
 	rc = nd_pfn_validate(nd_pfn, PFN_SIG);
-	dev_dbg(dev, "%s: pfn: %s\n", __func__,
-			rc == 0 ? dev_name(pfn_dev) : "<none>");
+	dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>");
 	if (rc < 0) {
 		nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
 		put_device(pfn_dev);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 5a96d30c294a..9d714926ecf5 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -66,7 +66,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
 		rc = BLK_STS_IOERR;
 	if (cleared > 0 && cleared / 512) {
 		cleared /= 512;
-		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
+		dev_dbg(dev, "%#llx clear %ld sector%s\n",
 				(unsigned long long) sector, cleared,
 				cleared > 1 ? "s" : "");
 		badblocks_clear(&pmem->bb, sector, cleared);
@@ -547,17 +547,7 @@ static struct nd_device_driver nd_pmem_driver = {
 	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
-static int __init pmem_init(void)
-{
-	return nd_driver_register(&nd_pmem_driver);
-}
-module_init(pmem_init);
-
-static void pmem_exit(void)
-{
-	driver_unregister(&nd_pmem_driver.drv);
-}
-module_exit(pmem_exit);
+module_nd_driver(nd_pmem_driver);
 
 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 034f0a07d627..b9ca0033cc99 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -27,10 +27,10 @@ static int nd_region_probe(struct device *dev)
 	if (nd_region->num_lanes > num_online_cpus()
 			&& nd_region->num_lanes < num_possible_cpus()
 			&& !test_and_set_bit(0, &once)) {
-		dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
+		dev_dbg(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
 				num_online_cpus(), nd_region->num_lanes,
 				num_possible_cpus());
-		dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
+		dev_dbg(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
 				nd_region->num_lanes);
 	}
 
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 1593e1806b16..a612be6f019d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -182,6 +182,14 @@ struct nd_region *to_nd_region(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nd_region);
 
+struct device *nd_region_dev(struct nd_region *nd_region)
+{
+	if (!nd_region)
+		return NULL;
+	return &nd_region->dev;
+}
+EXPORT_SYMBOL_GPL(nd_region_dev);
+
 struct nd_blk_region *to_nd_blk_region(struct device *dev)
 {
 	struct nd_region *nd_region = to_nd_region(dev);
@@ -1014,6 +1022,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 	dev->parent = &nvdimm_bus->dev;
 	dev->type = dev_type;
 	dev->groups = ndr_desc->attr_groups;
+	dev->of_node = ndr_desc->of_node;
 	nd_region->ndr_size = resource_size(ndr_desc->res);
 	nd_region->ndr_start = ndr_desc->res->start;
 	nd_device_register(dev);
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 02c5984ab09b..6bb37c18292a 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -295,7 +295,7 @@ static void __init of_unittest_printf(void)
 		return;
 	}
 
-	num_to_str(phandle_str, sizeof(phandle_str), np->phandle);
+	num_to_str(phandle_str, sizeof(phandle_str), np->phandle, 0);
 
 	of_unittest_printf_one(np, "%pOF",  full_name);
 	of_unittest_printf_one(np, "%pOFf", full_name);
diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
index 313cf8ad77bf..ea9e7f4479ca 100644
--- a/drivers/platform/mellanox/mlxreg-hotplug.c
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -93,9 +93,11 @@ struct mlxreg_hotplug_priv_data {
 	bool after_probe;
 };
 
-static int mlxreg_hotplug_device_create(struct device *dev,
+static int mlxreg_hotplug_device_create(struct mlxreg_hotplug_priv_data *priv,
 					struct mlxreg_core_data *data)
 {
+	struct mlxreg_core_hotplug_platform_data *pdata;
+
 	/*
 	 * Return if adapter number is negative. It could be in case hotplug
 	 * event is not associated with hotplug device.
@@ -103,19 +105,21 @@ static int mlxreg_hotplug_device_create(struct device *dev,
 	if (data->hpdev.nr < 0)
 		return 0;
 
-	data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr);
+	pdata = dev_get_platdata(&priv->pdev->dev);
+	data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr +
+					      pdata->shift_nr);
 	if (!data->hpdev.adapter) {
-		dev_err(dev, "Failed to get adapter for bus %d\n",
-			data->hpdev.nr);
+		dev_err(priv->dev, "Failed to get adapter for bus %d\n",
+			data->hpdev.nr + pdata->shift_nr);
 		return -EFAULT;
 	}
 
 	data->hpdev.client = i2c_new_device(data->hpdev.adapter,
 					    data->hpdev.brdinfo);
 	if (!data->hpdev.client) {
-		dev_err(dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
-			data->hpdev.brdinfo->type, data->hpdev.nr,
-			data->hpdev.brdinfo->addr);
+		dev_err(priv->dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
+			data->hpdev.brdinfo->type, data->hpdev.nr +
+			pdata->shift_nr, data->hpdev.brdinfo->addr);
 
 		i2c_put_adapter(data->hpdev.adapter);
 		data->hpdev.adapter = NULL;
@@ -270,10 +274,10 @@ mlxreg_hotplug_work_helper(struct mlxreg_hotplug_priv_data *priv,
 			if (item->inversed)
 				mlxreg_hotplug_device_destroy(data);
 			else
-				mlxreg_hotplug_device_create(priv->dev, data);
+				mlxreg_hotplug_device_create(priv, data);
 		} else {
 			if (item->inversed)
-				mlxreg_hotplug_device_create(priv->dev, data);
+				mlxreg_hotplug_device_create(priv, data);
 			else
 				mlxreg_hotplug_device_destroy(data);
 		}
@@ -319,7 +323,7 @@ mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv,
 		if (regval == MLXREG_HOTPLUG_HEALTH_MASK) {
 			if ((data->health_cntr++ == MLXREG_HOTPLUG_RST_CNTR) ||
 			    !priv->after_probe) {
-				mlxreg_hotplug_device_create(priv->dev, data);
+				mlxreg_hotplug_device_create(priv, data);
 				data->attached = true;
 			}
 		} else {
@@ -550,6 +554,7 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
 {
 	struct mlxreg_core_hotplug_platform_data *pdata;
 	struct mlxreg_hotplug_priv_data *priv;
+	struct i2c_adapter *deferred_adap;
 	int err;
 
 	pdata = dev_get_platdata(&pdev->dev);
@@ -558,6 +563,12 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	/* Defer probing if the necessary adapter is not configured yet. */
+	deferred_adap = i2c_get_adapter(pdata->deferred_nr);
+	if (!deferred_adap)
+		return -EPROBE_DEFER;
+	i2c_put_adapter(deferred_adap);
+
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index ef016e46544a..39d06dd1f63a 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -757,6 +757,8 @@ config TOPSTAR_LAPTOP
 	depends on ACPI
 	depends on INPUT
 	select INPUT_SPARSEKMAP
+	select LEDS_CLASS
+	select NEW_LEDS
 	---help---
 	  This driver adds support for hotkeys found on Topstar laptops.
 
@@ -1174,6 +1176,7 @@ config INTEL_TELEMETRY
 
 config MLX_PLATFORM
 	tristate "Mellanox Technologies platform support"
+	depends on I2C && REGMAP
 	---help---
 	  This option enables system support for the Mellanox Technologies
 	  platform. The Mellanox systems provide data center networking
diff --git a/drivers/platform/x86/dell-smbios-base.c b/drivers/platform/x86/dell-smbios-base.c
index 2485c80a9fdd..33fb2a20458a 100644
--- a/drivers/platform/x86/dell-smbios-base.c
+++ b/drivers/platform/x86/dell-smbios-base.c
@@ -514,7 +514,7 @@ static int build_tokens_sysfs(struct platform_device *dev)
 		continue;
 
 loop_fail_create_value:
-		kfree(value_name);
+		kfree(location_name);
 		goto out_unwind_strings;
 	}
 	smbios_attribute_group.attrs = token_attrs;
@@ -525,7 +525,7 @@ loop_fail_create_value:
 	return 0;
 
 out_unwind_strings:
-	for (i = i-1; i > 0; i--) {
+	while (i--) {
 		kfree(token_location_attrs[i].attr.name);
 		kfree(token_value_attrs[i].attr.name);
 	}
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 2cfbd3fa5136..cd95b6f3a064 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -53,6 +53,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/bitops.h>
 #include <linux/dmi.h>
 #include <linux/backlight.h>
 #include <linux/fb.h>
@@ -61,12 +62,11 @@
 #include <linux/kfifo.h>
 #include <linux/leds.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
 #include <acpi/video.h>
 
-#define FUJITSU_DRIVER_VERSION "0.6.0"
+#define FUJITSU_DRIVER_VERSION		"0.6.0"
 
-#define FUJITSU_LCD_N_LEVELS 8
+#define FUJITSU_LCD_N_LEVELS		8
 
 #define ACPI_FUJITSU_CLASS		"fujitsu"
 #define ACPI_FUJITSU_BL_HID		"FUJ02B1"
@@ -76,41 +76,51 @@
 #define ACPI_FUJITSU_LAPTOP_DRIVER_NAME	"Fujitsu laptop FUJ02E3 ACPI hotkeys driver"
 #define ACPI_FUJITSU_LAPTOP_DEVICE_NAME	"Fujitsu FUJ02E3"
 
-#define ACPI_FUJITSU_NOTIFY_CODE1     0x80
+#define ACPI_FUJITSU_NOTIFY_CODE	0x80
 
 /* FUNC interface - command values */
-#define FUNC_FLAGS	0x1000
-#define FUNC_LEDS	0x1001
-#define FUNC_BUTTONS	0x1002
-#define FUNC_BACKLIGHT  0x1004
+#define FUNC_FLAGS			BIT(12)
+#define FUNC_LEDS			(BIT(12) | BIT(0))
+#define FUNC_BUTTONS			(BIT(12) | BIT(1))
+#define FUNC_BACKLIGHT			(BIT(12) | BIT(2))
 
 /* FUNC interface - responses */
-#define UNSUPPORTED_CMD 0x80000000
+#define UNSUPPORTED_CMD			0x80000000
 
 /* FUNC interface - status flags */
-#define FLAG_RFKILL	0x020
-#define FLAG_LID	0x100
-#define FLAG_DOCK	0x200
+#define FLAG_RFKILL			BIT(5)
+#define FLAG_LID			BIT(8)
+#define FLAG_DOCK			BIT(9)
 
 /* FUNC interface - LED control */
-#define FUNC_LED_OFF	0x1
-#define FUNC_LED_ON	0x30001
-#define KEYBOARD_LAMPS	0x100
-#define LOGOLAMP_POWERON 0x2000
-#define LOGOLAMP_ALWAYS  0x4000
-#define RADIO_LED_ON	0x20
-#define ECO_LED	0x10000
-#define ECO_LED_ON	0x80000
-
-/* Hotkey details */
-#define KEY1_CODE	0x410	/* codes for the keys in the GIRB register */
-#define KEY2_CODE	0x411
-#define KEY3_CODE	0x412
-#define KEY4_CODE	0x413
-#define KEY5_CODE	0x420
-
-#define MAX_HOTKEY_RINGBUFFER_SIZE 100
-#define RINGBUFFERSIZE 40
+#define FUNC_LED_OFF			BIT(0)
+#define FUNC_LED_ON			(BIT(0) | BIT(16) | BIT(17))
+#define LOGOLAMP_POWERON		BIT(13)
+#define LOGOLAMP_ALWAYS			BIT(14)
+#define KEYBOARD_LAMPS			BIT(8)
+#define RADIO_LED_ON			BIT(5)
+#define ECO_LED				BIT(16)
+#define ECO_LED_ON			BIT(19)
+
+/* FUNC interface - backlight power control */
+#define BACKLIGHT_PARAM_POWER		BIT(2)
+#define BACKLIGHT_OFF			(BIT(0) | BIT(1))
+#define BACKLIGHT_ON			0
+
+/* Scancodes read from the GIRB register */
+#define KEY1_CODE			0x410
+#define KEY2_CODE			0x411
+#define KEY3_CODE			0x412
+#define KEY4_CODE			0x413
+#define KEY5_CODE			0x420
+
+/* Hotkey ringbuffer limits */
+#define MAX_HOTKEY_RINGBUFFER_SIZE	100
+#define RINGBUFFERSIZE			40
+
+/* Module parameters */
+static int use_alt_lcd_levels = -1;
+static bool disable_brightness_adjust;
 
 /* Device controlling the backlight and associated keys */
 struct fujitsu_bl {
@@ -122,8 +132,6 @@ struct fujitsu_bl {
 };
 
 static struct fujitsu_bl *fujitsu_bl;
-static int use_alt_lcd_levels = -1;
-static bool disable_brightness_adjust;
 
 /* Device used to access hotkeys and other features on the laptop */
 struct fujitsu_laptop {
@@ -256,9 +264,11 @@ static int bl_update_status(struct backlight_device *b)
 
 	if (fext) {
 		if (b->props.power == FB_BLANK_POWERDOWN)
-			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
+			call_fext_func(fext, FUNC_BACKLIGHT, 0x1,
+				       BACKLIGHT_PARAM_POWER, BACKLIGHT_OFF);
 		else
-			call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
+			call_fext_func(fext, FUNC_BACKLIGHT, 0x1,
+				       BACKLIGHT_PARAM_POWER, BACKLIGHT_ON);
 	}
 
 	return set_lcd_level(device, b->props.brightness);
@@ -385,7 +395,7 @@ static int fujitsu_backlight_register(struct acpi_device *device)
 static int acpi_fujitsu_bl_add(struct acpi_device *device)
 {
 	struct fujitsu_bl *priv;
-	int error;
+	int ret;
 
 	if (acpi_video_get_backlight_type() != acpi_backlight_vendor)
 		return -ENODEV;
@@ -399,10 +409,6 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
 	strcpy(acpi_device_class(device), ACPI_FUJITSU_CLASS);
 	device->driver_data = priv;
 
-	error = acpi_fujitsu_bl_input_setup(device);
-	if (error)
-		return error;
-
 	pr_info("ACPI: %s [%s]\n",
 		acpi_device_name(device), acpi_device_bid(device));
 
@@ -410,11 +416,11 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device)
 		priv->max_brightness = FUJITSU_LCD_N_LEVELS;
 	get_lcd_level(device);
 
-	error = fujitsu_backlight_register(device);
-	if (error)
-		return error;
+	ret = acpi_fujitsu_bl_input_setup(device);
+	if (ret)
+		return ret;
 
-	return 0;
+	return fujitsu_backlight_register(device);
 }
 
 /* Brightness notify */
@@ -424,7 +430,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event)
 	struct fujitsu_bl *priv = acpi_driver_data(device);
 	int oldb, newb;
 
-	if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
+	if (event != ACPI_FUJITSU_NOTIFY_CODE) {
 		acpi_handle_info(device->handle, "unsupported event [0x%x]\n",
 				 event);
 		sparse_keymap_report_event(priv->input, -1, 1, true);
@@ -455,7 +461,9 @@ static const struct key_entry keymap_default[] = {
 	{ KE_KEY, KEY3_CODE, { KEY_PROG3 } },
 	{ KE_KEY, KEY4_CODE, { KEY_PROG4 } },
 	{ KE_KEY, KEY5_CODE, { KEY_RFKILL } },
+	{ KE_KEY, BIT(5),    { KEY_RFKILL } },
 	{ KE_KEY, BIT(26),   { KEY_TOUCHPAD_TOGGLE } },
+	{ KE_KEY, BIT(29),   { KEY_MICMUTE } },
 	{ KE_END, 0 }
 };
 
@@ -693,7 +701,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
 	struct led_classdev *led;
-	int result;
+	int ret;
 
 	if (call_fext_func(device,
 			   FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) {
@@ -704,9 +712,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->name = "fujitsu::logolamp";
 		led->brightness_set_blocking = logolamp_set;
 		led->brightness_get = logolamp_get;
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	if ((call_fext_func(device,
@@ -719,9 +727,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->name = "fujitsu::kblamps";
 		led->brightness_set_blocking = kblamps_set;
 		led->brightness_get = kblamps_get;
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	/*
@@ -742,9 +750,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->brightness_set_blocking = radio_led_set;
 		led->brightness_get = radio_led_get;
 		led->default_trigger = "rfkill-any";
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	/* Support for eco led is not always signaled in bit corresponding
@@ -762,9 +770,9 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 		led->name = "fujitsu::eco_led";
 		led->brightness_set_blocking = eco_led_set;
 		led->brightness_get = eco_led_get;
-		result = devm_led_classdev_register(&device->dev, led);
-		if (result)
-			return result;
+		ret = devm_led_classdev_register(&device->dev, led);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -773,8 +781,7 @@ static int acpi_fujitsu_laptop_leds_register(struct acpi_device *device)
 static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 {
 	struct fujitsu_laptop *priv;
-	int error;
-	int i;
+	int ret, i = 0;
 
 	priv = devm_kzalloc(&device->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -789,23 +796,16 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 
 	/* kfifo */
 	spin_lock_init(&priv->fifo_lock);
-	error = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int),
-			    GFP_KERNEL);
-	if (error) {
-		pr_err("kfifo_alloc failed\n");
-		goto err_stop;
-	}
-
-	error = acpi_fujitsu_laptop_input_setup(device);
-	if (error)
-		goto err_free_fifo;
+	ret = kfifo_alloc(&priv->fifo, RINGBUFFERSIZE * sizeof(int),
+			  GFP_KERNEL);
+	if (ret)
+		return ret;
 
 	pr_info("ACPI: %s [%s]\n",
 		acpi_device_name(device), acpi_device_bid(device));
 
-	i = 0;
-	while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0
-		&& (i++) < MAX_HOTKEY_RINGBUFFER_SIZE)
+	while (call_fext_func(device, FUNC_BUTTONS, 0x1, 0x0, 0x0) != 0 &&
+	       i++ < MAX_HOTKEY_RINGBUFFER_SIZE)
 		; /* No action, result is discarded */
 	acpi_handle_debug(device->handle, "Discarded %i ringbuffer entries\n",
 			  i);
@@ -829,26 +829,31 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device)
 	/* Sync backlight power status */
 	if (fujitsu_bl && fujitsu_bl->bl_device &&
 	    acpi_video_get_backlight_type() == acpi_backlight_vendor) {
-		if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3)
+		if (call_fext_func(fext, FUNC_BACKLIGHT, 0x2,
+				   BACKLIGHT_PARAM_POWER, 0x0) == BACKLIGHT_OFF)
 			fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN;
 		else
 			fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK;
 	}
 
-	error = acpi_fujitsu_laptop_leds_register(device);
-	if (error)
+	ret = acpi_fujitsu_laptop_input_setup(device);
+	if (ret)
+		goto err_free_fifo;
+
+	ret = acpi_fujitsu_laptop_leds_register(device);
+	if (ret)
 		goto err_free_fifo;
 
-	error = fujitsu_laptop_platform_add(device);
-	if (error)
+	ret = fujitsu_laptop_platform_add(device);
+	if (ret)
 		goto err_free_fifo;
 
 	return 0;
 
 err_free_fifo:
 	kfifo_free(&priv->fifo);
-err_stop:
-	return error;
+
+	return ret;
 }
 
 static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
@@ -865,11 +870,11 @@ static int acpi_fujitsu_laptop_remove(struct acpi_device *device)
 static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
-	int status;
+	int ret;
 
-	status = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode,
-				 sizeof(scancode), &priv->fifo_lock);
-	if (status != sizeof(scancode)) {
+	ret = kfifo_in_locked(&priv->fifo, (unsigned char *)&scancode,
+			      sizeof(scancode), &priv->fifo_lock);
+	if (ret != sizeof(scancode)) {
 		dev_info(&priv->input->dev, "Could not push scancode [0x%x]\n",
 			 scancode);
 		return;
@@ -882,13 +887,12 @@ static void acpi_fujitsu_laptop_press(struct acpi_device *device, int scancode)
 static void acpi_fujitsu_laptop_release(struct acpi_device *device)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
-	int scancode, status;
+	int scancode, ret;
 
 	while (true) {
-		status = kfifo_out_locked(&priv->fifo,
-					  (unsigned char *)&scancode,
-					  sizeof(scancode), &priv->fifo_lock);
-		if (status != sizeof(scancode))
+		ret = kfifo_out_locked(&priv->fifo, (unsigned char *)&scancode,
+				       sizeof(scancode), &priv->fifo_lock);
+		if (ret != sizeof(scancode))
 			return;
 		sparse_keymap_report_event(priv->input, scancode, 0, false);
 		dev_dbg(&priv->input->dev,
@@ -899,10 +903,10 @@ static void acpi_fujitsu_laptop_release(struct acpi_device *device)
 static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 {
 	struct fujitsu_laptop *priv = acpi_driver_data(device);
-	int scancode, i = 0;
+	int scancode, i = 0, ret;
 	unsigned int irb;
 
-	if (event != ACPI_FUJITSU_NOTIFY_CODE1) {
+	if (event != ACPI_FUJITSU_NOTIFY_CODE) {
 		acpi_handle_info(device->handle, "Unsupported event [0x%x]\n",
 				 event);
 		sparse_keymap_report_event(priv->input, -1, 1, true);
@@ -930,9 +934,18 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event)
 	 * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is
 	 * handled in software; its state is queried using FUNC_FLAGS
 	 */
-	if ((priv->flags_supported & BIT(26)) &&
-	    (call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26)))
-		sparse_keymap_report_event(priv->input, BIT(26), 1, true);
+	if (priv->flags_supported & (BIT(5) | BIT(26) | BIT(29))) {
+		ret = call_fext_func(device, FUNC_FLAGS, 0x1, 0x0, 0x0);
+		if (ret & BIT(5))
+			sparse_keymap_report_event(priv->input,
+						   BIT(5), 1, true);
+		if (ret & BIT(26))
+			sparse_keymap_report_event(priv->input,
+						   BIT(26), 1, true);
+		if (ret & BIT(29))
+			sparse_keymap_report_event(priv->input,
+						   BIT(29), 1, true);
+	}
 }
 
 /* Initialization */
diff --git a/drivers/platform/x86/gpd-pocket-fan.c b/drivers/platform/x86/gpd-pocket-fan.c
index 2d645c505f81..be85ed966bf3 100644
--- a/drivers/platform/x86/gpd-pocket-fan.c
+++ b/drivers/platform/x86/gpd-pocket-fan.c
@@ -19,12 +19,12 @@
 static int temp_limits[3] = { 55000, 60000, 65000 };
 module_param_array(temp_limits, int, NULL, 0444);
 MODULE_PARM_DESC(temp_limits,
-		 "Milli-celcius values above which the fan speed increases");
+		 "Millicelsius values above which the fan speed increases");
 
 static int hysteresis = 3000;
 module_param(hysteresis, int, 0444);
 MODULE_PARM_DESC(hysteresis,
-		 "Hysteresis in milli-celcius before lowering the fan speed");
+		 "Hysteresis in millicelsius before lowering the fan speed");
 
 static int speed_on_ac = 2;
 module_param(speed_on_ac, int, 0444);
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index 5e3df194723e..b5adba227783 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -16,16 +16,14 @@
  *
  */
 
+#include <linux/acpi.h>
+#include <linux/dmi.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/init.h>
-#include <linux/input.h>
 #include <linux/platform_device.h>
-#include <linux/input/sparse-keymap.h>
-#include <linux/acpi.h>
 #include <linux/suspend.h>
-#include <acpi/acpi_bus.h>
-#include <linux/dmi.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Alex Hung");
@@ -67,8 +65,8 @@ static const struct key_entry intel_array_keymap[] = {
 	{ KE_IGNORE, 0xC5, { KEY_VOLUMEUP } },                /* Release */
 	{ KE_KEY,    0xC6, { KEY_VOLUMEDOWN } },              /* Press */
 	{ KE_IGNORE, 0xC7, { KEY_VOLUMEDOWN } },              /* Release */
-	{ KE_SW,     0xC8, { .sw = { SW_ROTATE_LOCK, 1 } } }, /* Press */
-	{ KE_SW,     0xC9, { .sw = { SW_ROTATE_LOCK, 0 } } }, /* Release */
+	{ KE_KEY,    0xC8, { KEY_ROTATE_LOCK_TOGGLE } },      /* Press */
+	{ KE_IGNORE, 0xC9, { KEY_ROTATE_LOCK_TOGGLE } },      /* Release */
 	{ KE_KEY,    0xCE, { KEY_POWER } },                   /* Press */
 	{ KE_IGNORE, 0xCF, { KEY_POWER } },                   /* Release */
 	{ KE_END },
diff --git a/drivers/platform/x86/intel_turbo_max_3.c b/drivers/platform/x86/intel_turbo_max_3.c
index d4ea01805879..a6d5aa0c3c47 100644
--- a/drivers/platform/x86/intel_turbo_max_3.c
+++ b/drivers/platform/x86/intel_turbo_max_3.c
@@ -138,9 +138,6 @@ static int __init itmt_legacy_init(void)
 	if (!id)
 		return -ENODEV;
 
-	if (boot_cpu_has(X86_FEATURE_HWP))
-		return -ENODEV;
-
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
 				"platform/x86/turbo_max_3:online",
 				itmt_legacy_cpu_online,	NULL);
diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c
index 454e14f02285..7a0bd24c1ae2 100644
--- a/drivers/platform/x86/mlx-platform.c
+++ b/drivers/platform/x86/mlx-platform.c
@@ -85,6 +85,15 @@
 #define MLXPLAT_CPLD_FAN_MASK		GENMASK(3, 0)
 #define MLXPLAT_CPLD_FAN_NG_MASK	GENMASK(5, 0)
 
+/* Default I2C parent bus number */
+#define MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR	1
+
+/* Maximum number of possible physical buses equipped on system */
+#define MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM	16
+
+/* Number of channels in group */
+#define MLXPLAT_CPLD_GRP_CHNL_NUM		8
+
 /* Start channel numbers */
 #define MLXPLAT_CPLD_CH1			2
 #define MLXPLAT_CPLD_CH2			10
@@ -124,7 +133,7 @@ static const struct resource mlxplat_lpc_resources[] = {
 };
 
 /* Platform default channels */
-static const int mlxplat_default_channels[][8] = {
+static const int mlxplat_default_channels[][MLXPLAT_CPLD_GRP_CHNL_NUM] = {
 	{
 		MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2,
 		MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 +
@@ -694,6 +703,8 @@ static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_default_channels[i]);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_default_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -708,6 +719,8 @@ static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_msn21xx_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -722,6 +735,8 @@ static int __init mlxplat_dmi_msn274x_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_msn274x_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -736,6 +751,8 @@ static int __init mlxplat_dmi_msn201x_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_msn201x_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_default_channels[i - 1][MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -750,6 +767,8 @@ static int __init mlxplat_dmi_qmb7xx_matched(const struct dmi_system_id *dmi)
 				ARRAY_SIZE(mlxplat_msn21xx_channels);
 	}
 	mlxplat_hotplug = &mlxplat_mlxcpld_default_ng_data;
+	mlxplat_hotplug->deferred_nr =
+		mlxplat_msn21xx_channels[MLXPLAT_CPLD_GRP_CHNL_NUM - 1];
 
 	return 1;
 };
@@ -830,10 +849,48 @@ static const struct dmi_system_id mlxplat_dmi_table[] __initconst = {
 
 MODULE_DEVICE_TABLE(dmi, mlxplat_dmi_table);
 
+static int mlxplat_mlxcpld_verify_bus_topology(int *nr)
+{
+	struct i2c_adapter *search_adap;
+	int shift, i;
+
+	/* Scan adapters from expected id to verify it is free. */
+	*nr = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR;
+	for (i = MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR; i <
+	     MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM; i++) {
+		search_adap = i2c_get_adapter(i);
+		if (search_adap) {
+			i2c_put_adapter(search_adap);
+			continue;
+		}
+
+		/* Return if expected parent adapter is free. */
+		if (i == MLXPLAT_CPLD_PHYS_ADAPTER_DEF_NR)
+			return 0;
+		break;
+	}
+
+	/* Return with error if free id for adapter is not found. */
+	if (i == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM)
+		return -ENODEV;
+
+	/* Shift adapter ids, since expected parent adapter is not free. */
+	*nr = i;
+	for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+		shift = *nr - mlxplat_mux_data[i].parent;
+		mlxplat_mux_data[i].parent = *nr;
+		mlxplat_mux_data[i].base_nr += shift;
+		if (shift > 0)
+			mlxplat_hotplug->shift_nr = shift;
+	}
+
+	return 0;
+}
+
 static int __init mlxplat_init(void)
 {
 	struct mlxplat_priv *priv;
-	int i, err;
+	int i, nr, err;
 
 	if (!dmi_check_system(mlxplat_dmi_table))
 		return -ENODEV;
@@ -853,7 +910,12 @@ static int __init mlxplat_init(void)
 	}
 	platform_set_drvdata(mlxplat_dev, priv);
 
-	priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1,
+	err = mlxplat_mlxcpld_verify_bus_topology(&nr);
+	if (nr < 0)
+		goto fail_alloc;
+
+	nr = (nr == MLXPLAT_CPLD_MAX_PHYS_ADAPTER_NUM) ? -1 : nr;
+	priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", nr,
 							 NULL, 0);
 	if (IS_ERR(priv->pdev_i2c)) {
 		err = PTR_ERR(priv->pdev_i2c);
diff --git a/drivers/platform/x86/silead_dmi.c b/drivers/platform/x86/silead_dmi.c
index 3a624090191d..452aacabaa8e 100644
--- a/drivers/platform/x86/silead_dmi.c
+++ b/drivers/platform/x86/silead_dmi.c
@@ -446,6 +446,23 @@ static const struct dmi_system_id silead_ts_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "X3 Plus"),
 		},
 	},
+	{
+		/* I.T.Works TW701 */
+		.driver_data = (void *)&surftab_wintron70_st70416_6_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "i71c"),
+			DMI_MATCH(DMI_BIOS_VERSION, "itWORKS.G.WI71C.JGBMRB"),
+		},
+	},
+	{
+		/* Yours Y8W81, same case and touchscreen as Chuwi Vi8 */
+		.driver_data = (void *)&chuwi_vi8_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "YOURS"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Y8W81"),
+		},
+	},
 	{ },
 };
 
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 1c57ee2b6d19..da1ca4856ea1 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -8703,16 +8703,24 @@ static const struct attribute_group fan_attr_group = {
 	  .ec = TPID(__id1, __id2),		\
 	  .quirks = __quirks }
 
+#define TPACPI_FAN_QB(__id1, __id2, __quirks)	\
+	{ .vendor = PCI_VENDOR_ID_LENOVO,	\
+	  .bios = TPID(__id1, __id2),		\
+	  .ec = TPACPI_MATCH_ANY,		\
+	  .quirks = __quirks }
+
 static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
 	TPACPI_FAN_QI('1', 'Y', TPACPI_FAN_Q1),
 	TPACPI_FAN_QI('7', '8', TPACPI_FAN_Q1),
 	TPACPI_FAN_QI('7', '6', TPACPI_FAN_Q1),
 	TPACPI_FAN_QI('7', '0', TPACPI_FAN_Q1),
 	TPACPI_FAN_QL('7', 'M', TPACPI_FAN_2FAN),
+	TPACPI_FAN_QB('N', '1', TPACPI_FAN_2FAN),
 };
 
 #undef TPACPI_FAN_QL
 #undef TPACPI_FAN_QI
+#undef TPACPI_FAN_QB
 
 static int __init fan_init(struct ibm_init_struct *iibm)
 {
diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c
index 1032c00b907b..f7761d98c0fd 100644
--- a/drivers/platform/x86/topstar-laptop.c
+++ b/drivers/platform/x86/topstar-laptop.c
@@ -1,14 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * ACPI driver for Topstar notebooks (hotkeys support only)
+ * Topstar Laptop ACPI Extras driver
  *
  * Copyright (c) 2009 Herton Ronaldo Krzesinski <herton@mandriva.com.br>
+ * Copyright (c) 2018 Guillaume Douézan-Grard
  *
  * Implementation inspired by existing x86 platform drivers, in special
- * asus/eepc/fujitsu-laptop, thanks to their authors
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * asus/eepc/fujitsu-laptop, thanks to their authors.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -18,15 +16,93 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
 
-#define ACPI_TOPSTAR_CLASS "topstar"
+#define TOPSTAR_LAPTOP_CLASS "topstar"
 
-struct topstar_hkey {
-	struct input_dev *inputdev;
+struct topstar_laptop {
+	struct acpi_device *device;
+	struct platform_device *platform;
+	struct input_dev *input;
+	struct led_classdev led;
 };
 
+/*
+ * LED
+ */
+
+static enum led_brightness topstar_led_get(struct led_classdev *led)
+{
+	return led->brightness;
+}
+
+static int topstar_led_set(struct led_classdev *led,
+		enum led_brightness state)
+{
+	struct topstar_laptop *topstar = container_of(led,
+			struct topstar_laptop, led);
+
+	struct acpi_object_list params;
+	union acpi_object in_obj;
+	unsigned long long int ret;
+	acpi_status status;
+
+	params.count = 1;
+	params.pointer = &in_obj;
+	in_obj.type = ACPI_TYPE_INTEGER;
+	in_obj.integer.value = 0x83;
+
+	/*
+	 * Topstar ACPI returns 0x30001 when the LED is ON and 0x30000 when it
+	 * is OFF.
+	 */
+	status = acpi_evaluate_integer(topstar->device->handle,
+			"GETX", &params, &ret);
+	if (ACPI_FAILURE(status))
+		return -1;
+
+	/*
+	 * FNCX(0x83) toggles the LED (more precisely, it is supposed to
+	 * act as an hardware switch and disconnect the WLAN adapter but
+	 * it seems to be faulty on some models like the Topstar U931
+	 * Notebook).
+	 */
+	if ((ret == 0x30001 && state == LED_OFF)
+			|| (ret == 0x30000 && state != LED_OFF)) {
+		status = acpi_execute_simple_method(topstar->device->handle,
+				"FNCX", 0x83);
+		if (ACPI_FAILURE(status))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int topstar_led_init(struct topstar_laptop *topstar)
+{
+	topstar->led = (struct led_classdev) {
+		.default_trigger = "rfkill0",
+		.brightness_get = topstar_led_get,
+		.brightness_set_blocking = topstar_led_set,
+		.name = TOPSTAR_LAPTOP_CLASS "::wlan",
+	};
+
+	return led_classdev_register(&topstar->platform->dev, &topstar->led);
+}
+
+static void topstar_led_exit(struct topstar_laptop *topstar)
+{
+	led_classdev_unregister(&topstar->led);
+}
+
+/*
+ * Input
+ */
+
 static const struct key_entry topstar_keymap[] = {
 	{ KE_KEY, 0x80, { KEY_BRIGHTNESSUP } },
 	{ KE_KEY, 0x81, { KEY_BRIGHTNESSDOWN } },
@@ -57,107 +133,217 @@ static const struct key_entry topstar_keymap[] = {
 	{ KE_END, 0 }
 };
 
-static void acpi_topstar_notify(struct acpi_device *device, u32 event)
+static void topstar_input_notify(struct topstar_laptop *topstar, int event)
 {
-	static bool dup_evnt[2];
-	bool *dup;
-	struct topstar_hkey *hkey = acpi_driver_data(device);
-
-	/* 0x83 and 0x84 key events comes duplicated... */
-	if (event == 0x83 || event == 0x84) {
-		dup = &dup_evnt[event - 0x83];
-		if (*dup) {
-			*dup = false;
-			return;
-		}
-		*dup = true;
-	}
-
-	if (!sparse_keymap_report_event(hkey->inputdev, event, 1, true))
+	if (!sparse_keymap_report_event(topstar->input, event, 1, true))
 		pr_info("unknown event = 0x%02x\n", event);
 }
 
-static int acpi_topstar_fncx_switch(struct acpi_device *device, bool state)
-{
-	acpi_status status;
-
-	status = acpi_execute_simple_method(device->handle, "FNCX",
-						state ? 0x86 : 0x87);
-	if (ACPI_FAILURE(status)) {
-		pr_err("Unable to switch FNCX notifications\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-static int acpi_topstar_init_hkey(struct topstar_hkey *hkey)
+static int topstar_input_init(struct topstar_laptop *topstar)
 {
 	struct input_dev *input;
-	int error;
+	int err;
 
 	input = input_allocate_device();
 	if (!input)
 		return -ENOMEM;
 
 	input->name = "Topstar Laptop extra buttons";
-	input->phys = "topstar/input0";
+	input->phys = TOPSTAR_LAPTOP_CLASS "/input0";
 	input->id.bustype = BUS_HOST;
+	input->dev.parent = &topstar->platform->dev;
 
-	error = sparse_keymap_setup(input, topstar_keymap, NULL);
-	if (error) {
+	err = sparse_keymap_setup(input, topstar_keymap, NULL);
+	if (err) {
 		pr_err("Unable to setup input device keymap\n");
 		goto err_free_dev;
 	}
 
-	error = input_register_device(input);
-	if (error) {
+	err = input_register_device(input);
+	if (err) {
 		pr_err("Unable to register input device\n");
 		goto err_free_dev;
 	}
 
-	hkey->inputdev = input;
+	topstar->input = input;
 	return 0;
 
- err_free_dev:
+err_free_dev:
 	input_free_device(input);
-	return error;
+	return err;
 }
 
-static int acpi_topstar_add(struct acpi_device *device)
+static void topstar_input_exit(struct topstar_laptop *topstar)
 {
-	struct topstar_hkey *tps_hkey;
+	input_unregister_device(topstar->input);
+}
 
-	tps_hkey = kzalloc(sizeof(struct topstar_hkey), GFP_KERNEL);
-	if (!tps_hkey)
+/*
+ * Platform
+ */
+
+static struct platform_driver topstar_platform_driver = {
+	.driver = {
+		.name = TOPSTAR_LAPTOP_CLASS,
+	},
+};
+
+static int topstar_platform_init(struct topstar_laptop *topstar)
+{
+	int err;
+
+	topstar->platform = platform_device_alloc(TOPSTAR_LAPTOP_CLASS, -1);
+	if (!topstar->platform)
 		return -ENOMEM;
 
-	strcpy(acpi_device_name(device), "Topstar TPSACPI");
-	strcpy(acpi_device_class(device), ACPI_TOPSTAR_CLASS);
+	platform_set_drvdata(topstar->platform, topstar);
+
+	err = platform_device_add(topstar->platform);
+	if (err)
+		goto err_device_put;
+
+	return 0;
 
-	if (acpi_topstar_fncx_switch(device, true))
-		goto add_err;
+err_device_put:
+	platform_device_put(topstar->platform);
+	return err;
+}
+
+static void topstar_platform_exit(struct topstar_laptop *topstar)
+{
+	platform_device_unregister(topstar->platform);
+}
+
+/*
+ * ACPI
+ */
+
+static int topstar_acpi_fncx_switch(struct acpi_device *device, bool state)
+{
+	acpi_status status;
+	u64 arg = state ? 0x86 : 0x87;
 
-	if (acpi_topstar_init_hkey(tps_hkey))
-		goto add_err;
+	status = acpi_execute_simple_method(device->handle, "FNCX", arg);
+	if (ACPI_FAILURE(status)) {
+		pr_err("Unable to switch FNCX notifications\n");
+		return -ENODEV;
+	}
 
-	device->driver_data = tps_hkey;
 	return 0;
+}
 
-add_err:
-	kfree(tps_hkey);
-	return -ENODEV;
+static void topstar_acpi_notify(struct acpi_device *device, u32 event)
+{
+	struct topstar_laptop *topstar = acpi_driver_data(device);
+	static bool dup_evnt[2];
+	bool *dup;
+
+	/* 0x83 and 0x84 key events comes duplicated... */
+	if (event == 0x83 || event == 0x84) {
+		dup = &dup_evnt[event - 0x83];
+		if (*dup) {
+			*dup = false;
+			return;
+		}
+		*dup = true;
+	}
+
+	topstar_input_notify(topstar, event);
 }
 
-static int acpi_topstar_remove(struct acpi_device *device)
+static int topstar_acpi_init(struct topstar_laptop *topstar)
 {
-	struct topstar_hkey *tps_hkey = acpi_driver_data(device);
+	return topstar_acpi_fncx_switch(topstar->device, true);
+}
 
-	acpi_topstar_fncx_switch(device, false);
+static void topstar_acpi_exit(struct topstar_laptop *topstar)
+{
+	topstar_acpi_fncx_switch(topstar->device, false);
+}
 
-	input_unregister_device(tps_hkey->inputdev);
-	kfree(tps_hkey);
+/*
+ * Enable software-based WLAN LED control on systems with defective
+ * hardware switch.
+ */
+static bool led_workaround;
 
+static int dmi_led_workaround(const struct dmi_system_id *id)
+{
+	led_workaround = true;
+	return 0;
+}
+
+static const struct dmi_system_id topstar_dmi_ids[] = {
+	{
+		.callback = dmi_led_workaround,
+		.ident = "Topstar U931/RVP7",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "U931"),
+			DMI_MATCH(DMI_BOARD_VERSION, "RVP7"),
+		},
+	},
+	{}
+};
+
+static int topstar_acpi_add(struct acpi_device *device)
+{
+	struct topstar_laptop *topstar;
+	int err;
+
+	dmi_check_system(topstar_dmi_ids);
+
+	topstar = kzalloc(sizeof(struct topstar_laptop), GFP_KERNEL);
+	if (!topstar)
+		return -ENOMEM;
+
+	strcpy(acpi_device_name(device), "Topstar TPSACPI");
+	strcpy(acpi_device_class(device), TOPSTAR_LAPTOP_CLASS);
+	device->driver_data = topstar;
+	topstar->device = device;
+
+	err = topstar_acpi_init(topstar);
+	if (err)
+		goto err_free;
+
+	err = topstar_platform_init(topstar);
+	if (err)
+		goto err_acpi_exit;
+
+	err = topstar_input_init(topstar);
+	if (err)
+		goto err_platform_exit;
+
+	if (led_workaround) {
+		err = topstar_led_init(topstar);
+		if (err)
+			goto err_input_exit;
+	}
+
+	return 0;
+
+err_input_exit:
+	topstar_input_exit(topstar);
+err_platform_exit:
+	topstar_platform_exit(topstar);
+err_acpi_exit:
+	topstar_acpi_exit(topstar);
+err_free:
+	kfree(topstar);
+	return err;
+}
+
+static int topstar_acpi_remove(struct acpi_device *device)
+{
+	struct topstar_laptop *topstar = acpi_driver_data(device);
+
+	if (led_workaround)
+		topstar_led_exit(topstar);
+
+	topstar_input_exit(topstar);
+	topstar_platform_exit(topstar);
+	topstar_acpi_exit(topstar);
+
+	kfree(topstar);
 	return 0;
 }
 
@@ -168,18 +354,47 @@ static const struct acpi_device_id topstar_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, topstar_device_ids);
 
-static struct acpi_driver acpi_topstar_driver = {
+static struct acpi_driver topstar_acpi_driver = {
 	.name = "Topstar laptop ACPI driver",
-	.class = ACPI_TOPSTAR_CLASS,
+	.class = TOPSTAR_LAPTOP_CLASS,
 	.ids = topstar_device_ids,
 	.ops = {
-		.add = acpi_topstar_add,
-		.remove = acpi_topstar_remove,
-		.notify = acpi_topstar_notify,
+		.add = topstar_acpi_add,
+		.remove = topstar_acpi_remove,
+		.notify = topstar_acpi_notify,
 	},
 };
-module_acpi_driver(acpi_topstar_driver);
+
+static int __init topstar_laptop_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&topstar_platform_driver);
+	if (ret < 0)
+		return ret;
+
+	ret = acpi_bus_register_driver(&topstar_acpi_driver);
+	if (ret < 0)
+		goto err_driver_unreg;
+
+	pr_info("ACPI extras driver loaded\n");
+	return 0;
+
+err_driver_unreg:
+	platform_driver_unregister(&topstar_platform_driver);
+	return ret;
+}
+
+static void __exit topstar_laptop_exit(void)
+{
+	acpi_bus_unregister_driver(&topstar_acpi_driver);
+	platform_driver_unregister(&topstar_platform_driver);
+}
+
+module_init(topstar_laptop_init);
+module_exit(topstar_laptop_exit);
 
 MODULE_AUTHOR("Herton Ronaldo Krzesinski");
+MODULE_AUTHOR("Guillaume Douézan-Grard");
 MODULE_DESCRIPTION("Topstar Laptop ACPI Extras driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index 8796211ef24a..8e3d0146ff8c 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -130,13 +130,11 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
 	uuid_le guid_input;
 	struct wmi_block *wblock;
 	struct guid_block *block;
-	struct list_head *p;
 
 	if (uuid_le_to_bin(guid_string, &guid_input))
 		return false;
 
-	list_for_each(p, &wmi_block_list) {
-		wblock = list_entry(p, struct wmi_block, list);
+	list_for_each_entry(wblock, &wmi_block_list, list) {
 		block = &wblock->gblock;
 
 		if (memcmp(block->guid, &guid_input, 16) == 0) {
@@ -519,7 +517,6 @@ wmi_notify_handler handler, void *data)
 	struct wmi_block *block;
 	acpi_status status = AE_NOT_EXIST;
 	uuid_le guid_input;
-	struct list_head *p;
 
 	if (!guid || !handler)
 		return AE_BAD_PARAMETER;
@@ -527,9 +524,8 @@ wmi_notify_handler handler, void *data)
 	if (uuid_le_to_bin(guid, &guid_input))
 		return AE_BAD_PARAMETER;
 
-	list_for_each(p, &wmi_block_list) {
+	list_for_each_entry(block, &wmi_block_list, list) {
 		acpi_status wmi_status;
-		block = list_entry(p, struct wmi_block, list);
 
 		if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
 			if (block->handler &&
@@ -560,7 +556,6 @@ acpi_status wmi_remove_notify_handler(const char *guid)
 	struct wmi_block *block;
 	acpi_status status = AE_NOT_EXIST;
 	uuid_le guid_input;
-	struct list_head *p;
 
 	if (!guid)
 		return AE_BAD_PARAMETER;
@@ -568,9 +563,8 @@ acpi_status wmi_remove_notify_handler(const char *guid)
 	if (uuid_le_to_bin(guid, &guid_input))
 		return AE_BAD_PARAMETER;
 
-	list_for_each(p, &wmi_block_list) {
+	list_for_each_entry(block, &wmi_block_list, list) {
 		acpi_status wmi_status;
-		block = list_entry(p, struct wmi_block, list);
 
 		if (memcmp(block->gblock.guid, &guid_input, 16) == 0) {
 			if (!block->handler ||
@@ -610,15 +604,13 @@ acpi_status wmi_get_event_data(u32 event, struct acpi_buffer *out)
 	union acpi_object params[1];
 	struct guid_block *gblock;
 	struct wmi_block *wblock;
-	struct list_head *p;
 
 	input.count = 1;
 	input.pointer = params;
 	params[0].type = ACPI_TYPE_INTEGER;
 	params[0].integer.value = event;
 
-	list_for_each(p, &wmi_block_list) {
-		wblock = list_entry(p, struct wmi_block, list);
+	list_for_each_entry(wblock, &wmi_block_list, list) {
 		gblock = &wblock->gblock;
 
 		if ((gblock->flags & ACPI_WMI_EVENT) &&
@@ -933,12 +925,11 @@ static int wmi_dev_probe(struct device *dev)
 			goto probe_failure;
 		}
 
-		buf = kmalloc(strlen(wdriver->driver.name) + 5, GFP_KERNEL);
+		buf = kasprintf(GFP_KERNEL, "wmi/%s", wdriver->driver.name);
 		if (!buf) {
 			ret = -ENOMEM;
 			goto probe_string_failure;
 		}
-		sprintf(buf, "wmi/%s", wdriver->driver.name);
 		wblock->char_dev.minor = MISC_DYNAMIC_MINOR;
 		wblock->char_dev.name = buf;
 		wblock->char_dev.fops = &wmi_fops;
@@ -1261,11 +1252,9 @@ static void acpi_wmi_notify_handler(acpi_handle handle, u32 event,
 {
 	struct guid_block *block;
 	struct wmi_block *wblock;
-	struct list_head *p;
 	bool found_it = false;
 
-	list_for_each(p, &wmi_block_list) {
-		wblock = list_entry(p, struct wmi_block, list);
+	list_for_each_entry(wblock, &wmi_block_list, list) {
 		block = &wblock->gblock;
 
 		if (wblock->acpi_device->handle == handle &&
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index cfb54e01d758..9d27016c899e 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -212,7 +212,6 @@ struct mport_cdev_priv {
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 	struct dma_chan		*dmach;
 	struct list_head	async_list;
-	struct list_head	pend_list;
 	spinlock_t              req_lock;
 	struct mutex		dma_lock;
 	struct kref		dma_ref;
@@ -258,8 +257,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mport_cdev_wait);
 static struct class *dev_class;
 static dev_t dev_number;
 
-static struct workqueue_struct *dma_wq;
-
 static void mport_release_mapping(struct kref *ref);
 
 static int rio_mport_maint_rd(struct mport_cdev_priv *priv, void __user *arg,
@@ -539,6 +536,7 @@ static int maint_comptag_set(struct mport_cdev_priv *priv, void __user *arg)
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 
 struct mport_dma_req {
+	struct kref refcount;
 	struct list_head node;
 	struct file *filp;
 	struct mport_cdev_priv *priv;
@@ -554,11 +552,6 @@ struct mport_dma_req {
 	struct completion req_comp;
 };
 
-struct mport_faf_work {
-	struct work_struct work;
-	struct mport_dma_req *req;
-};
-
 static void mport_release_def_dma(struct kref *dma_ref)
 {
 	struct mport_dev *md =
@@ -578,8 +571,10 @@ static void mport_release_dma(struct kref *dma_ref)
 	complete(&priv->comp);
 }
 
-static void dma_req_free(struct mport_dma_req *req)
+static void dma_req_free(struct kref *ref)
 {
+	struct mport_dma_req *req = container_of(ref, struct mport_dma_req,
+			refcount);
 	struct mport_cdev_priv *priv = req->priv;
 	unsigned int i;
 
@@ -611,30 +606,7 @@ static void dma_xfer_callback(void *param)
 	req->status = dma_async_is_tx_complete(priv->dmach, req->cookie,
 					       NULL, NULL);
 	complete(&req->req_comp);
-}
-
-static void dma_faf_cleanup(struct work_struct *_work)
-{
-	struct mport_faf_work *work = container_of(_work,
-						struct mport_faf_work, work);
-	struct mport_dma_req *req = work->req;
-
-	dma_req_free(req);
-	kfree(work);
-}
-
-static void dma_faf_callback(void *param)
-{
-	struct mport_dma_req *req = (struct mport_dma_req *)param;
-	struct mport_faf_work *work;
-
-	work = kmalloc(sizeof(*work), GFP_ATOMIC);
-	if (!work)
-		return;
-
-	INIT_WORK(&work->work, dma_faf_cleanup);
-	work->req = req;
-	queue_work(dma_wq, &work->work);
+	kref_put(&req->refcount, dma_req_free);
 }
 
 /*
@@ -765,16 +737,14 @@ static int do_dma_request(struct mport_dma_req *req,
 		goto err_out;
 	}
 
-	if (sync == RIO_TRANSFER_FAF)
-		tx->callback = dma_faf_callback;
-	else
-		tx->callback = dma_xfer_callback;
+	tx->callback = dma_xfer_callback;
 	tx->callback_param = req;
 
 	req->dmach = chan;
 	req->sync = sync;
 	req->status = DMA_IN_PROGRESS;
 	init_completion(&req->req_comp);
+	kref_get(&req->refcount);
 
 	cookie = dmaengine_submit(tx);
 	req->cookie = cookie;
@@ -785,6 +755,7 @@ static int do_dma_request(struct mport_dma_req *req,
 	if (dma_submit_error(cookie)) {
 		rmcd_error("submit err=%d (addr:0x%llx len:0x%llx)",
 			   cookie, xfer->rio_addr, xfer->length);
+		kref_put(&req->refcount, dma_req_free);
 		ret = -EIO;
 		goto err_out;
 	}
@@ -860,6 +831,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
 	if (!req)
 		return -ENOMEM;
 
+	kref_init(&req->refcount);
+
 	ret = get_dma_channel(priv);
 	if (ret) {
 		kfree(req);
@@ -968,42 +941,20 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
 	ret = do_dma_request(req, xfer, sync, nents);
 
 	if (ret >= 0) {
-		if (sync == RIO_TRANSFER_SYNC)
-			goto sync_out;
-		return ret; /* return ASYNC cookie */
-	}
-
-	if (ret == -ETIMEDOUT || ret == -EINTR) {
-		/*
-		 * This can happen only in case of SYNC transfer.
-		 * Do not free unfinished request structure immediately.
-		 * Place it into pending list and deal with it later
-		 */
-		spin_lock(&priv->req_lock);
-		list_add_tail(&req->node, &priv->pend_list);
-		spin_unlock(&priv->req_lock);
-		return ret;
+		if (sync == RIO_TRANSFER_ASYNC)
+			return ret; /* return ASYNC cookie */
+	} else {
+		rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
 	}
 
-
-	rmcd_debug(DMA, "do_dma_request failed with err=%d", ret);
-sync_out:
-	dma_unmap_sg(chan->device->dev, req->sgt.sgl, req->sgt.nents, dir);
-	sg_free_table(&req->sgt);
 err_pg:
-	if (page_list) {
+	if (!req->page_list) {
 		for (i = 0; i < nr_pages; i++)
 			put_page(page_list[i]);
 		kfree(page_list);
 	}
 err_req:
-	if (req->map) {
-		mutex_lock(&md->buf_mutex);
-		kref_put(&req->map->ref, mport_release_mapping);
-		mutex_unlock(&md->buf_mutex);
-	}
-	put_dma_channel(priv);
-	kfree(req);
+	kref_put(&req->refcount, dma_req_free);
 	return ret;
 }
 
@@ -1121,7 +1072,7 @@ static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
 		ret = 0;
 
 	if (req->status != DMA_IN_PROGRESS && req->status != DMA_PAUSED)
-		dma_req_free(req);
+		kref_put(&req->refcount, dma_req_free);
 
 	return ret;
 
@@ -1966,7 +1917,6 @@ static int mport_cdev_open(struct inode *inode, struct file *filp)
 
 #ifdef CONFIG_RAPIDIO_DMA_ENGINE
 	INIT_LIST_HEAD(&priv->async_list);
-	INIT_LIST_HEAD(&priv->pend_list);
 	spin_lock_init(&priv->req_lock);
 	mutex_init(&priv->dma_lock);
 #endif
@@ -2006,8 +1956,6 @@ static void mport_cdev_release_dma(struct file *filp)
 
 	md = priv->md;
 
-	flush_workqueue(dma_wq);
-
 	spin_lock(&priv->req_lock);
 	if (!list_empty(&priv->async_list)) {
 		rmcd_debug(EXIT, "async list not empty filp=%p %s(%d)",
@@ -2023,20 +1971,7 @@ static void mport_cdev_release_dma(struct file *filp)
 				   req->filp, req->cookie,
 				   completion_done(&req->req_comp)?"yes":"no");
 			list_del(&req->node);
-			dma_req_free(req);
-		}
-	}
-
-	if (!list_empty(&priv->pend_list)) {
-		rmcd_debug(EXIT, "Free pending DMA requests for filp=%p %s(%d)",
-			   filp, current->comm, task_pid_nr(current));
-		list_for_each_entry_safe(req,
-					 req_next, &priv->pend_list, node) {
-			rmcd_debug(EXIT, "free req->filp=%p cookie=%d compl=%s",
-				   req->filp, req->cookie,
-				   completion_done(&req->req_comp)?"yes":"no");
-			list_del(&req->node);
-			dma_req_free(req);
+			kref_put(&req->refcount, dma_req_free);
 		}
 	}
 
@@ -2048,15 +1983,6 @@ static void mport_cdev_release_dma(struct file *filp)
 			current->comm, task_pid_nr(current), wret);
 	}
 
-	spin_lock(&priv->req_lock);
-
-	if (!list_empty(&priv->pend_list)) {
-		rmcd_debug(EXIT, "ATTN: pending DMA requests, filp=%p %s(%d)",
-			   filp, current->comm, task_pid_nr(current));
-	}
-
-	spin_unlock(&priv->req_lock);
-
 	if (priv->dmach != priv->md->dma_chan) {
 		rmcd_debug(EXIT, "Release DMA channel for filp=%p %s(%d)",
 			   filp, current->comm, task_pid_nr(current));
@@ -2573,8 +2499,6 @@ static void mport_cdev_remove(struct mport_dev *md)
 	cdev_device_del(&md->cdev, &md->dev);
 	mport_cdev_kill_fasync(md);
 
-	flush_workqueue(dma_wq);
-
 	/* TODO: do we need to give clients some time to close file
 	 * descriptors? Simple wait for XX, or kref?
 	 */
@@ -2691,17 +2615,8 @@ static int __init mport_init(void)
 		goto err_cli;
 	}
 
-	dma_wq = create_singlethread_workqueue("dma_wq");
-	if (!dma_wq) {
-		rmcd_error("failed to create DMA work queue");
-		ret = -ENOMEM;
-		goto err_wq;
-	}
-
 	return 0;
 
-err_wq:
-	class_interface_unregister(&rio_mport_interface);
 err_cli:
 	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
 err_chr:
@@ -2717,7 +2632,6 @@ static void __exit mport_exit(void)
 	class_interface_unregister(&rio_mport_interface);
 	class_destroy(dev_class);
 	unregister_chrdev_region(dev_number, RIO_MAX_MPORTS);
-	destroy_workqueue(dma_wq);
 }
 
 module_init(mport_init);
diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c
index 23429bdaca84..161b927d9de1 100644
--- a/drivers/rapidio/rio-scan.c
+++ b/drivers/rapidio/rio-scan.c
@@ -76,7 +76,7 @@ static u16 rio_destid_alloc(struct rio_net *net)
 }
 
 /**
- * rio_destid_reserve - Reserve the specivied destID
+ * rio_destid_reserve - Reserve the specified destID
  * @net: RIO network
  * @destid: destID to reserve
  *
@@ -885,7 +885,7 @@ static struct rio_net *rio_scan_alloc_net(struct rio_mport *mport,
  *
  * For each enumerated device, ensure that each switch in a system
  * has correct routing entries. Add routes for devices that where
- * unknown dirung the first enumeration pass through the switch.
+ * unknown during the first enumeration pass through the switch.
  */
 static void rio_update_route_tables(struct rio_net *net)
 {
@@ -983,7 +983,7 @@ static int rio_enum_mport(struct rio_mport *mport, u32 flags)
 		/* reserve mport destID in new net */
 		rio_destid_reserve(net, mport->host_deviceid);
 
-		/* Enable Input Output Port (transmitter reviever) */
+		/* Enable Input Output Port (transmitter receiver) */
 		rio_enable_rx_tx_port(mport, 1, 0, 0, 0);
 
 		/* Set component tag for host */
diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig
index b609e1d3654b..027274008b08 100644
--- a/drivers/remoteproc/Kconfig
+++ b/drivers/remoteproc/Kconfig
@@ -6,6 +6,7 @@ config REMOTEPROC
 	select CRC32
 	select FW_LOADER
 	select VIRTIO
+	select WANT_DEV_COREDUMP
 	help
 	  Support for remote processors (such as DSP coprocessors). These
 	  are mainly used on embedded systems.
@@ -90,6 +91,7 @@ config QCOM_ADSP_PIL
 	depends on QCOM_SMEM
 	depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
 	depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
+	depends on QCOM_SYSMON || QCOM_SYSMON=n
 	select MFD_SYSCON
 	select QCOM_MDT_LOADER
 	select QCOM_RPROC_COMMON
@@ -107,6 +109,7 @@ config QCOM_Q6V5_PIL
 	depends on QCOM_SMEM
 	depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
 	depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
+	depends on QCOM_SYSMON || QCOM_SYSMON=n
 	select MFD_SYSCON
 	select QCOM_RPROC_COMMON
 	select QCOM_SCM
@@ -114,12 +117,28 @@ config QCOM_Q6V5_PIL
 	  Say y here to support the Qualcomm Peripherial Image Loader for the
 	  Hexagon V5 based remote processors.
 
+config QCOM_SYSMON
+	tristate "Qualcomm sysmon driver"
+	depends on RPMSG
+	depends on ARCH_QCOM
+	depends on NET
+	select QCOM_QMI_HELPERS
+	help
+	  The sysmon driver implements a sysmon QMI client and a handler for
+	  the sys_mon SMD and GLINK channel, which are used for graceful
+	  shutdown, retrieving failure information and propagating information
+	  about other subsystems being shut down.
+
+	  Say y here if your system runs firmware on any other subsystems, e.g.
+	  modem or DSP.
+
 config QCOM_WCNSS_PIL
 	tristate "Qualcomm WCNSS Peripheral Image Loader"
 	depends on OF && ARCH_QCOM
 	depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
 	depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
 	depends on QCOM_SMEM
+	depends on QCOM_SYSMON || QCOM_SYSMON=n
 	select QCOM_MDT_LOADER
 	select QCOM_RPROC_COMMON
 	select QCOM_SCM
diff --git a/drivers/remoteproc/Makefile b/drivers/remoteproc/Makefile
index 6e16450ce11f..02627ede8d4a 100644
--- a/drivers/remoteproc/Makefile
+++ b/drivers/remoteproc/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_KEYSTONE_REMOTEPROC)	+= keystone_remoteproc.o
 obj-$(CONFIG_QCOM_ADSP_PIL)		+= qcom_adsp_pil.o
 obj-$(CONFIG_QCOM_RPROC_COMMON)		+= qcom_common.o
 obj-$(CONFIG_QCOM_Q6V5_PIL)		+= qcom_q6v5_pil.o
+obj-$(CONFIG_QCOM_SYSMON)		+= qcom_sysmon.o
 obj-$(CONFIG_QCOM_WCNSS_PIL)		+= qcom_wcnss_pil.o
 qcom_wcnss_pil-y			+= qcom_wcnss.o
 qcom_wcnss_pil-y			+= qcom_wcnss_iris.o
diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c
index 633268e9d550..54c07fd3f204 100644
--- a/drivers/remoteproc/imx_rproc.c
+++ b/drivers/remoteproc/imx_rproc.c
@@ -333,14 +333,14 @@ static int imx_rproc_probe(struct platform_device *pdev)
 	/* set some other name then imx */
 	rproc = rproc_alloc(dev, "imx-rproc", &imx_rproc_ops,
 			    NULL, sizeof(*priv));
-	if (!rproc) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!rproc)
+		return -ENOMEM;
 
 	dcfg = of_device_get_match_data(dev);
-	if (!dcfg)
-		return -EINVAL;
+	if (!dcfg) {
+		ret = -EINVAL;
+		goto err_put_rproc;
+	}
 
 	priv = rproc->priv;
 	priv->rproc = rproc;
@@ -359,8 +359,8 @@ static int imx_rproc_probe(struct platform_device *pdev)
 	priv->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(priv->clk)) {
 		dev_err(dev, "Failed to get clock\n");
-		rproc_free(rproc);
-		return PTR_ERR(priv->clk);
+		ret = PTR_ERR(priv->clk);
+		goto err_put_rproc;
 	}
 
 	/*
@@ -370,8 +370,7 @@ static int imx_rproc_probe(struct platform_device *pdev)
 	ret = clk_prepare_enable(priv->clk);
 	if (ret) {
 		dev_err(&rproc->dev, "Failed to enable clock\n");
-		rproc_free(rproc);
-		return ret;
+		goto err_put_rproc;
 	}
 
 	ret = rproc_add(rproc);
@@ -380,13 +379,13 @@ static int imx_rproc_probe(struct platform_device *pdev)
 		goto err_put_clk;
 	}
 
-	return ret;
+	return 0;
 
 err_put_clk:
 	clk_disable_unprepare(priv->clk);
 err_put_rproc:
 	rproc_free(rproc);
-err:
+
 	return ret;
 }
 
diff --git a/drivers/remoteproc/qcom_adsp_pil.c b/drivers/remoteproc/qcom_adsp_pil.c
index 373c167892d7..89a86ce07f99 100644
--- a/drivers/remoteproc/qcom_adsp_pil.c
+++ b/drivers/remoteproc/qcom_adsp_pil.c
@@ -38,7 +38,10 @@ struct adsp_data {
 	const char *firmware_name;
 	int pas_id;
 	bool has_aggre2_clk;
+
 	const char *ssr_name;
+	const char *sysmon_name;
+	int ssctl_id;
 };
 
 struct qcom_adsp {
@@ -75,6 +78,7 @@ struct qcom_adsp {
 	struct qcom_rproc_glink glink_subdev;
 	struct qcom_rproc_subdev smd_subdev;
 	struct qcom_rproc_ssr ssr_subdev;
+	struct qcom_sysmon *sysmon;
 };
 
 static int adsp_load(struct rproc *rproc, const struct firmware *fw)
@@ -82,7 +86,9 @@ static int adsp_load(struct rproc *rproc, const struct firmware *fw)
 	struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
 
 	return qcom_mdt_load(adsp->dev, fw, rproc->firmware, adsp->pas_id,
-			     adsp->mem_region, adsp->mem_phys, adsp->mem_size);
+			     adsp->mem_region, adsp->mem_phys, adsp->mem_size,
+			     &adsp->mem_reloc);
+
 }
 
 static int adsp_start(struct rproc *rproc)
@@ -177,6 +183,7 @@ static const struct rproc_ops adsp_ops = {
 	.start = adsp_start,
 	.stop = adsp_stop,
 	.da_to_va = adsp_da_to_va,
+	.parse_fw = qcom_register_dump_segments,
 	.load = adsp_load,
 };
 
@@ -201,9 +208,6 @@ static irqreturn_t adsp_fatal_interrupt(int irq, void *dev)
 
 	rproc_report_crash(adsp->rproc, RPROC_FATAL_ERROR);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -398,6 +402,9 @@ static int adsp_probe(struct platform_device *pdev)
 	qcom_add_glink_subdev(rproc, &adsp->glink_subdev);
 	qcom_add_smd_subdev(rproc, &adsp->smd_subdev);
 	qcom_add_ssr_subdev(rproc, &adsp->ssr_subdev, desc->ssr_name);
+	adsp->sysmon = qcom_add_sysmon_subdev(rproc,
+					      desc->sysmon_name,
+					      desc->ssctl_id);
 
 	ret = rproc_add(rproc);
 	if (ret)
@@ -419,6 +426,7 @@ static int adsp_remove(struct platform_device *pdev)
 	rproc_del(adsp->rproc);
 
 	qcom_remove_glink_subdev(adsp->rproc, &adsp->glink_subdev);
+	qcom_remove_sysmon_subdev(adsp->sysmon);
 	qcom_remove_smd_subdev(adsp->rproc, &adsp->smd_subdev);
 	qcom_remove_ssr_subdev(adsp->rproc, &adsp->ssr_subdev);
 	rproc_free(adsp->rproc);
@@ -432,6 +440,8 @@ static const struct adsp_data adsp_resource_init = {
 		.pas_id = 1,
 		.has_aggre2_clk = false,
 		.ssr_name = "lpass",
+		.sysmon_name = "adsp",
+		.ssctl_id = 0x14,
 };
 
 static const struct adsp_data slpi_resource_init = {
@@ -440,6 +450,8 @@ static const struct adsp_data slpi_resource_init = {
 		.pas_id = 12,
 		.has_aggre2_clk = true,
 		.ssr_name = "dsps",
+		.sysmon_name = "slpi",
+		.ssctl_id = 0x16,
 };
 
 static const struct of_device_id adsp_of_match[] = {
diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
index 00602499713f..acfc99f82fb8 100644
--- a/drivers/remoteproc/qcom_common.c
+++ b/drivers/remoteproc/qcom_common.c
@@ -22,6 +22,7 @@
 #include <linux/remoteproc.h>
 #include <linux/rpmsg/qcom_glink.h>
 #include <linux/rpmsg/qcom_smd.h>
+#include <linux/soc/qcom/mdt_loader.h>
 
 #include "remoteproc_internal.h"
 #include "qcom_common.h"
@@ -41,7 +42,7 @@ static int glink_subdev_probe(struct rproc_subdev *subdev)
 	return PTR_ERR_OR_ZERO(glink->edge);
 }
 
-static void glink_subdev_remove(struct rproc_subdev *subdev)
+static void glink_subdev_remove(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_glink *glink = to_glink_subdev(subdev);
 
@@ -74,11 +75,57 @@ EXPORT_SYMBOL_GPL(qcom_add_glink_subdev);
  */
 void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink)
 {
+	if (!glink->node)
+		return;
+
 	rproc_remove_subdev(rproc, &glink->subdev);
 	of_node_put(glink->node);
 }
 EXPORT_SYMBOL_GPL(qcom_remove_glink_subdev);
 
+/**
+ * qcom_register_dump_segments() - register segments for coredump
+ * @rproc:	remoteproc handle
+ * @fw:		firmware header
+ *
+ * Register all segments of the ELF in the remoteproc coredump segment list
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int qcom_register_dump_segments(struct rproc *rproc,
+				const struct firmware *fw)
+{
+	const struct elf32_phdr *phdrs;
+	const struct elf32_phdr *phdr;
+	const struct elf32_hdr *ehdr;
+	int ret;
+	int i;
+
+	ehdr = (struct elf32_hdr *)fw->data;
+	phdrs = (struct elf32_phdr *)(ehdr + 1);
+
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		phdr = &phdrs[i];
+
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		if ((phdr->p_flags & QCOM_MDT_TYPE_MASK) == QCOM_MDT_TYPE_HASH)
+			continue;
+
+		if (!phdr->p_memsz)
+			continue;
+
+		ret = rproc_coredump_add_segment(rproc, phdr->p_paddr,
+						 phdr->p_memsz);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(qcom_register_dump_segments);
+
 static int smd_subdev_probe(struct rproc_subdev *subdev)
 {
 	struct qcom_rproc_subdev *smd = to_smd_subdev(subdev);
@@ -88,7 +135,7 @@ static int smd_subdev_probe(struct rproc_subdev *subdev)
 	return PTR_ERR_OR_ZERO(smd->edge);
 }
 
-static void smd_subdev_remove(struct rproc_subdev *subdev)
+static void smd_subdev_remove(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_subdev *smd = to_smd_subdev(subdev);
 
@@ -121,6 +168,9 @@ EXPORT_SYMBOL_GPL(qcom_add_smd_subdev);
  */
 void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd)
 {
+	if (!smd->node)
+		return;
+
 	rproc_remove_subdev(rproc, &smd->subdev);
 	of_node_put(smd->node);
 }
@@ -157,7 +207,7 @@ static int ssr_notify_start(struct rproc_subdev *subdev)
 	return  0;
 }
 
-static void ssr_notify_stop(struct rproc_subdev *subdev)
+static void ssr_notify_stop(struct rproc_subdev *subdev, bool crashed)
 {
 	struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
 
diff --git a/drivers/remoteproc/qcom_common.h b/drivers/remoteproc/qcom_common.h
index 728be9834d8b..58de71e4781c 100644
--- a/drivers/remoteproc/qcom_common.h
+++ b/drivers/remoteproc/qcom_common.h
@@ -4,6 +4,9 @@
 
 #include <linux/remoteproc.h>
 #include "remoteproc_internal.h"
+#include <linux/soc/qcom/qmi.h>
+
+struct qcom_sysmon;
 
 struct qcom_rproc_glink {
 	struct rproc_subdev subdev;
@@ -30,6 +33,8 @@ struct qcom_rproc_ssr {
 void qcom_add_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink);
 void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink);
 
+int qcom_register_dump_segments(struct rproc *rproc, const struct firmware *fw);
+
 void qcom_add_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd);
 void qcom_remove_smd_subdev(struct rproc *rproc, struct qcom_rproc_subdev *smd);
 
@@ -37,4 +42,22 @@ void qcom_add_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr,
 			 const char *ssr_name);
 void qcom_remove_ssr_subdev(struct rproc *rproc, struct qcom_rproc_ssr *ssr);
 
+#if IS_ENABLED(CONFIG_QCOM_SYSMON)
+struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+					   const char *name,
+					   int ssctl_instance);
+void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon);
+#else
+static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+							 const char *name,
+							 int ssctl_instance)
+{
+	return NULL;
+}
+
+static inline void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
+{
+}
+#endif
+
 #endif
diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c
index b4e5e725848d..8e70a627e0bb 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -168,6 +168,7 @@ struct q6v5 {
 
 	struct qcom_rproc_subdev smd_subdev;
 	struct qcom_rproc_ssr ssr_subdev;
+	struct qcom_sysmon *sysmon;
 	bool need_mem_protection;
 	int mpss_perm;
 	int mba_perm;
@@ -939,9 +940,6 @@ static irqreturn_t q6v5_wdog_interrupt(int irq, void *dev)
 
 	rproc_report_crash(qproc->rproc, RPROC_WATCHDOG);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -959,9 +957,6 @@ static irqreturn_t q6v5_fatal_interrupt(int irq, void *dev)
 
 	rproc_report_crash(qproc->rproc, RPROC_FATAL_ERROR);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -1215,6 +1210,7 @@ static int q6v5_probe(struct platform_device *pdev)
 	qproc->mba_perm = BIT(QCOM_SCM_VMID_HLOS);
 	qcom_add_smd_subdev(rproc, &qproc->smd_subdev);
 	qcom_add_ssr_subdev(rproc, &qproc->ssr_subdev, "mpss");
+	qproc->sysmon = qcom_add_sysmon_subdev(rproc, "modem", 0x12);
 
 	ret = rproc_add(rproc);
 	if (ret)
@@ -1234,6 +1230,7 @@ static int q6v5_remove(struct platform_device *pdev)
 
 	rproc_del(qproc->rproc);
 
+	qcom_remove_sysmon_subdev(qproc->sysmon);
 	qcom_remove_smd_subdev(qproc->rproc, &qproc->smd_subdev);
 	qcom_remove_ssr_subdev(qproc->rproc, &qproc->ssr_subdev);
 	rproc_free(qproc->rproc);
diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c
new file mode 100644
index 000000000000..f085545d7da5
--- /dev/null
+++ b/drivers/remoteproc/qcom_sysmon.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017, Linaro Ltd.
+ */
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/notifier.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/remoteproc/qcom_rproc.h>
+#include <linux/rpmsg.h>
+
+#include "qcom_common.h"
+
+static BLOCKING_NOTIFIER_HEAD(sysmon_notifiers);
+
+struct qcom_sysmon {
+	struct rproc_subdev subdev;
+	struct rproc *rproc;
+
+	struct list_head node;
+
+	const char *name;
+
+	int ssctl_version;
+	int ssctl_instance;
+
+	struct notifier_block nb;
+
+	struct device *dev;
+
+	struct rpmsg_endpoint *ept;
+	struct completion comp;
+	struct mutex lock;
+
+	bool ssr_ack;
+
+	struct qmi_handle qmi;
+	struct sockaddr_qrtr ssctl;
+};
+
+static DEFINE_MUTEX(sysmon_lock);
+static LIST_HEAD(sysmon_list);
+
+/**
+ * sysmon_send_event() - send notification of other remote's SSR event
+ * @sysmon:	sysmon context
+ * @name:	other remote's name
+ */
+static void sysmon_send_event(struct qcom_sysmon *sysmon, const char *name)
+{
+	char req[50];
+	int len;
+	int ret;
+
+	len = snprintf(req, sizeof(req), "ssr:%s:before_shutdown", name);
+	if (len >= sizeof(req))
+		return;
+
+	mutex_lock(&sysmon->lock);
+	reinit_completion(&sysmon->comp);
+	sysmon->ssr_ack = false;
+
+	ret = rpmsg_send(sysmon->ept, req, len);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to send sysmon event\n");
+		goto out_unlock;
+	}
+
+	ret = wait_for_completion_timeout(&sysmon->comp,
+					  msecs_to_jiffies(5000));
+	if (!ret) {
+		dev_err(sysmon->dev, "timeout waiting for sysmon ack\n");
+		goto out_unlock;
+	}
+
+	if (!sysmon->ssr_ack)
+		dev_err(sysmon->dev, "unexpected response to sysmon event\n");
+
+out_unlock:
+	mutex_unlock(&sysmon->lock);
+}
+
+/**
+ * sysmon_request_shutdown() - request graceful shutdown of remote
+ * @sysmon:	sysmon context
+ */
+static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
+{
+	char *req = "ssr:shutdown";
+	int ret;
+
+	mutex_lock(&sysmon->lock);
+	reinit_completion(&sysmon->comp);
+	sysmon->ssr_ack = false;
+
+	ret = rpmsg_send(sysmon->ept, req, strlen(req) + 1);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "send sysmon shutdown request failed\n");
+		goto out_unlock;
+	}
+
+	ret = wait_for_completion_timeout(&sysmon->comp,
+					  msecs_to_jiffies(5000));
+	if (!ret) {
+		dev_err(sysmon->dev, "timeout waiting for sysmon ack\n");
+		goto out_unlock;
+	}
+
+	if (!sysmon->ssr_ack)
+		dev_err(sysmon->dev,
+			"unexpected response to sysmon shutdown request\n");
+
+out_unlock:
+	mutex_unlock(&sysmon->lock);
+}
+
+static int sysmon_callback(struct rpmsg_device *rpdev, void *data, int count,
+			   void *priv, u32 addr)
+{
+	struct qcom_sysmon *sysmon = priv;
+	const char *ssr_ack = "ssr:ack";
+	const int ssr_ack_len = strlen(ssr_ack) + 1;
+
+	if (!sysmon)
+		return -EINVAL;
+
+	if (count >= ssr_ack_len && !memcmp(data, ssr_ack, ssr_ack_len))
+		sysmon->ssr_ack = true;
+
+	complete(&sysmon->comp);
+
+	return 0;
+}
+
+#define SSCTL_SHUTDOWN_REQ		0x21
+#define SSCTL_SUBSYS_EVENT_REQ		0x23
+
+#define SSCTL_MAX_MSG_LEN		7
+
+#define SSCTL_SUBSYS_NAME_LENGTH	15
+
+enum {
+	SSCTL_SSR_EVENT_BEFORE_POWERUP,
+	SSCTL_SSR_EVENT_AFTER_POWERUP,
+	SSCTL_SSR_EVENT_BEFORE_SHUTDOWN,
+	SSCTL_SSR_EVENT_AFTER_SHUTDOWN,
+};
+
+enum {
+	SSCTL_SSR_EVENT_FORCED,
+	SSCTL_SSR_EVENT_GRACEFUL,
+};
+
+struct ssctl_shutdown_resp {
+	struct qmi_response_type_v01 resp;
+};
+
+static struct qmi_elem_info ssctl_shutdown_resp_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x02,
+		.offset		= offsetof(struct ssctl_shutdown_resp, resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{}
+};
+
+struct ssctl_subsys_event_req {
+	u8 subsys_name_len;
+	char subsys_name[SSCTL_SUBSYS_NAME_LENGTH];
+	u32 event;
+	u8 evt_driven_valid;
+	u32 evt_driven;
+};
+
+static struct qmi_elem_info ssctl_subsys_event_req_ei[] = {
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint8_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x01,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   subsys_name_len),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= SSCTL_SUBSYS_NAME_LENGTH,
+		.elem_size	= sizeof(char),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= 0x01,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   subsys_name),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint32_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x02,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   event),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint8_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x10,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   evt_driven_valid),
+		.ei_array	= NULL,
+	},
+	{
+		.data_type	= QMI_SIGNED_4_BYTE_ENUM,
+		.elem_len	= 1,
+		.elem_size	= sizeof(uint32_t),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x10,
+		.offset		= offsetof(struct ssctl_subsys_event_req,
+					   evt_driven),
+		.ei_array	= NULL,
+	},
+	{}
+};
+
+struct ssctl_subsys_event_resp {
+	struct qmi_response_type_v01 resp;
+};
+
+static struct qmi_elem_info ssctl_subsys_event_resp_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= 0x02,
+		.offset		= offsetof(struct ssctl_subsys_event_resp,
+					   resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{}
+};
+
+/**
+ * ssctl_request_shutdown() - request shutdown via SSCTL QMI service
+ * @sysmon:	sysmon context
+ */
+static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
+{
+	struct ssctl_shutdown_resp resp;
+	struct qmi_txn txn;
+	int ret;
+
+	ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_shutdown_resp_ei, &resp);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to allocate QMI txn\n");
+		return;
+	}
+
+	ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
+			       SSCTL_SHUTDOWN_REQ, 0, NULL, NULL);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to send shutdown request\n");
+		qmi_txn_cancel(&txn);
+		return;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0)
+		dev_err(sysmon->dev, "failed receiving QMI response\n");
+	else if (resp.resp.result)
+		dev_err(sysmon->dev, "shutdown request failed\n");
+	else
+		dev_dbg(sysmon->dev, "shutdown request completed\n");
+}
+
+/**
+ * ssctl_send_event() - send notification of other remote's SSR event
+ * @sysmon:	sysmon context
+ * @name:	other remote's name
+ */
+static void ssctl_send_event(struct qcom_sysmon *sysmon, const char *name)
+{
+	struct ssctl_subsys_event_resp resp;
+	struct ssctl_subsys_event_req req;
+	struct qmi_txn txn;
+	int ret;
+
+	memset(&resp, 0, sizeof(resp));
+	ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_subsys_event_resp_ei, &resp);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to allocate QMI txn\n");
+		return;
+	}
+
+	memset(&req, 0, sizeof(req));
+	strlcpy(req.subsys_name, name, sizeof(req.subsys_name));
+	req.subsys_name_len = strlen(req.subsys_name);
+	req.event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
+	req.evt_driven_valid = true;
+	req.evt_driven = SSCTL_SSR_EVENT_FORCED;
+
+	ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
+			       SSCTL_SUBSYS_EVENT_REQ, 40,
+			       ssctl_subsys_event_req_ei, &req);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to send shutdown request\n");
+		qmi_txn_cancel(&txn);
+		return;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0)
+		dev_err(sysmon->dev, "failed receiving QMI response\n");
+	else if (resp.resp.result)
+		dev_err(sysmon->dev, "ssr event send failed\n");
+	else
+		dev_dbg(sysmon->dev, "ssr event send completed\n");
+}
+
+/**
+ * ssctl_new_server() - QMI callback indicating a new service
+ * @qmi:	QMI handle
+ * @svc:	service information
+ *
+ * Return: 0 if we're interested in this service, -EINVAL otherwise.
+ */
+static int ssctl_new_server(struct qmi_handle *qmi, struct qmi_service *svc)
+{
+	struct qcom_sysmon *sysmon = container_of(qmi, struct qcom_sysmon, qmi);
+
+	switch (svc->version) {
+	case 1:
+		if (svc->instance != 0)
+			return -EINVAL;
+		if (strcmp(sysmon->name, "modem"))
+			return -EINVAL;
+		break;
+	case 2:
+		if (svc->instance != sysmon->ssctl_instance)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	};
+
+	sysmon->ssctl_version = svc->version;
+
+	sysmon->ssctl.sq_family = AF_QIPCRTR;
+	sysmon->ssctl.sq_node = svc->node;
+	sysmon->ssctl.sq_port = svc->port;
+
+	svc->priv = sysmon;
+
+	return 0;
+}
+
+/**
+ * ssctl_del_server() - QMI callback indicating that @svc is removed
+ * @qmi:	QMI handle
+ * @svc:	service information
+ */
+static void ssctl_del_server(struct qmi_handle *qmi, struct qmi_service *svc)
+{
+	struct qcom_sysmon *sysmon = svc->priv;
+
+	sysmon->ssctl_version = 0;
+}
+
+static const struct qmi_ops ssctl_ops = {
+	.new_server = ssctl_new_server,
+	.del_server = ssctl_del_server,
+};
+
+static int sysmon_start(struct rproc_subdev *subdev)
+{
+	return 0;
+}
+
+static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
+{
+	struct qcom_sysmon *sysmon = container_of(subdev, struct qcom_sysmon, subdev);
+
+	blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)sysmon->name);
+
+	/* Don't request graceful shutdown if we've crashed */
+	if (crashed)
+		return;
+
+	if (sysmon->ssctl_version)
+		ssctl_request_shutdown(sysmon);
+	else if (sysmon->ept)
+		sysmon_request_shutdown(sysmon);
+}
+
+/**
+ * sysmon_notify() - notify sysmon target of another's SSR
+ * @nb:		notifier_block associated with sysmon instance
+ * @event:	unused
+ * @data:	SSR identifier of the remote that is going down
+ */
+static int sysmon_notify(struct notifier_block *nb, unsigned long event,
+			 void *data)
+{
+	struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb);
+	struct rproc *rproc = sysmon->rproc;
+	const char *ssr_name = data;
+
+	/* Skip non-running rprocs and the originating instance */
+	if (rproc->state != RPROC_RUNNING || !strcmp(data, sysmon->name)) {
+		dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
+		return NOTIFY_DONE;
+	}
+
+	/* Only SSCTL version 2 supports SSR events */
+	if (sysmon->ssctl_version == 2)
+		ssctl_send_event(sysmon, ssr_name);
+	else if (sysmon->ept)
+		sysmon_send_event(sysmon, ssr_name);
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * qcom_add_sysmon_subdev() - create a sysmon subdev for the given remoteproc
+ * @rproc:	rproc context to associate the subdev with
+ * @name:	name of this subdev, to use in SSR
+ * @ssctl_instance: instance id of the ssctl QMI service
+ *
+ * Return: A new qcom_sysmon object, or NULL on failure
+ */
+struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
+					   const char *name,
+					   int ssctl_instance)
+{
+	struct qcom_sysmon *sysmon;
+	int ret;
+
+	sysmon = kzalloc(sizeof(*sysmon), GFP_KERNEL);
+	if (!sysmon)
+		return NULL;
+
+	sysmon->dev = rproc->dev.parent;
+	sysmon->rproc = rproc;
+
+	sysmon->name = name;
+	sysmon->ssctl_instance = ssctl_instance;
+
+	init_completion(&sysmon->comp);
+	mutex_init(&sysmon->lock);
+
+	ret = qmi_handle_init(&sysmon->qmi, SSCTL_MAX_MSG_LEN, &ssctl_ops, NULL);
+	if (ret < 0) {
+		dev_err(sysmon->dev, "failed to initialize qmi handle\n");
+		kfree(sysmon);
+		return NULL;
+	}
+
+	qmi_add_lookup(&sysmon->qmi, 43, 0, 0);
+
+	rproc_add_subdev(rproc, &sysmon->subdev, sysmon_start, sysmon_stop);
+
+	sysmon->nb.notifier_call = sysmon_notify;
+	blocking_notifier_chain_register(&sysmon_notifiers, &sysmon->nb);
+
+	mutex_lock(&sysmon_lock);
+	list_add(&sysmon->node, &sysmon_list);
+	mutex_unlock(&sysmon_lock);
+
+	return sysmon;
+}
+EXPORT_SYMBOL_GPL(qcom_add_sysmon_subdev);
+
+/**
+ * qcom_remove_sysmon_subdev() - release a qcom_sysmon
+ * @sysmon:	sysmon context, as retrieved by qcom_add_sysmon_subdev()
+ */
+void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
+{
+	if (!sysmon)
+		return;
+
+	mutex_lock(&sysmon_lock);
+	list_del(&sysmon->node);
+	mutex_unlock(&sysmon_lock);
+
+	blocking_notifier_chain_unregister(&sysmon_notifiers, &sysmon->nb);
+
+	rproc_remove_subdev(sysmon->rproc, &sysmon->subdev);
+
+	qmi_handle_release(&sysmon->qmi);
+
+	kfree(sysmon);
+}
+EXPORT_SYMBOL_GPL(qcom_remove_sysmon_subdev);
+
+/**
+ * sysmon_probe() - probe sys_mon channel
+ * @rpdev:	rpmsg device handle
+ *
+ * Find the sysmon context associated with the ancestor remoteproc and assign
+ * this rpmsg device with said sysmon context.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int sysmon_probe(struct rpmsg_device *rpdev)
+{
+	struct qcom_sysmon *sysmon;
+	struct rproc *rproc;
+
+	rproc = rproc_get_by_child(&rpdev->dev);
+	if (!rproc) {
+		dev_err(&rpdev->dev, "sysmon device not child of rproc\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&sysmon_lock);
+	list_for_each_entry(sysmon, &sysmon_list, node) {
+		if (sysmon->rproc == rproc)
+			goto found;
+	}
+	mutex_unlock(&sysmon_lock);
+
+	dev_err(&rpdev->dev, "no sysmon associated with parent rproc\n");
+
+	return -EINVAL;
+
+found:
+	mutex_unlock(&sysmon_lock);
+
+	rpdev->ept->priv = sysmon;
+	sysmon->ept = rpdev->ept;
+
+	return 0;
+}
+
+/**
+ * sysmon_remove() - sys_mon channel remove handler
+ * @rpdev:	rpmsg device handle
+ *
+ * Disassociate the rpmsg device with the sysmon instance.
+ */
+static void sysmon_remove(struct rpmsg_device *rpdev)
+{
+	struct qcom_sysmon *sysmon = rpdev->ept->priv;
+
+	sysmon->ept = NULL;
+}
+
+static const struct rpmsg_device_id sysmon_match[] = {
+	{ "sys_mon" },
+	{}
+};
+
+static struct rpmsg_driver sysmon_driver = {
+	.probe = sysmon_probe,
+	.remove = sysmon_remove,
+	.callback = sysmon_callback,
+	.id_table = sysmon_match,
+	.drv = {
+		.name = "qcom_sysmon",
+	},
+};
+
+module_rpmsg_driver(sysmon_driver);
+
+MODULE_DESCRIPTION("Qualcomm sysmon driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c
index 3f0609236a76..b0e07e9f42d5 100644
--- a/drivers/remoteproc/qcom_wcnss.c
+++ b/drivers/remoteproc/qcom_wcnss.c
@@ -40,6 +40,7 @@
 #define WCNSS_CRASH_REASON_SMEM		422
 #define WCNSS_FIRMWARE_NAME		"wcnss.mdt"
 #define WCNSS_PAS_ID			6
+#define WCNSS_SSCTL_ID			0x13
 
 #define WCNSS_SPARE_NVBIN_DLND		BIT(25)
 
@@ -98,6 +99,7 @@ struct qcom_wcnss {
 	size_t mem_size;
 
 	struct qcom_rproc_subdev smd_subdev;
+	struct qcom_sysmon *sysmon;
 };
 
 static const struct wcnss_data riva_data = {
@@ -153,7 +155,8 @@ static int wcnss_load(struct rproc *rproc, const struct firmware *fw)
 	struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv;
 
 	return qcom_mdt_load(wcnss->dev, fw, rproc->firmware, WCNSS_PAS_ID,
-			     wcnss->mem_region, wcnss->mem_phys, wcnss->mem_size);
+			     wcnss->mem_region, wcnss->mem_phys,
+			     wcnss->mem_size, &wcnss->mem_reloc);
 }
 
 static void wcnss_indicate_nv_download(struct qcom_wcnss *wcnss)
@@ -308,6 +311,7 @@ static const struct rproc_ops wcnss_ops = {
 	.start = wcnss_start,
 	.stop = wcnss_stop,
 	.da_to_va = wcnss_da_to_va,
+	.parse_fw = qcom_register_dump_segments,
 	.load = wcnss_load,
 };
 
@@ -332,9 +336,6 @@ static irqreturn_t wcnss_fatal_interrupt(int irq, void *dev)
 
 	rproc_report_crash(wcnss->rproc, RPROC_FATAL_ERROR);
 
-	if (!IS_ERR(msg))
-		msg[0] = '\0';
-
 	return IRQ_HANDLED;
 }
 
@@ -551,6 +552,7 @@ static int wcnss_probe(struct platform_device *pdev)
 	}
 
 	qcom_add_smd_subdev(rproc, &wcnss->smd_subdev);
+	wcnss->sysmon = qcom_add_sysmon_subdev(rproc, "wcnss", WCNSS_SSCTL_ID);
 
 	ret = rproc_add(rproc);
 	if (ret)
@@ -573,6 +575,7 @@ static int wcnss_remove(struct platform_device *pdev)
 	qcom_smem_state_put(wcnss->state);
 	rproc_del(wcnss->rproc);
 
+	qcom_remove_sysmon_subdev(wcnss->sysmon);
 	qcom_remove_smd_subdev(wcnss->rproc, &wcnss->smd_subdev);
 	rproc_free(wcnss->rproc);
 
diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 4170dfbd93bd..6d9c5832ce47 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -33,6 +33,7 @@
 #include <linux/firmware.h>
 #include <linux/string.h>
 #include <linux/debugfs.h>
+#include <linux/devcoredump.h>
 #include <linux/remoteproc.h>
 #include <linux/iommu.h>
 #include <linux/idr.h>
@@ -307,7 +308,7 @@ static int rproc_vdev_do_probe(struct rproc_subdev *subdev)
 	return rproc_add_virtio_dev(rvdev, rvdev->id);
 }
 
-static void rproc_vdev_do_remove(struct rproc_subdev *subdev)
+static void rproc_vdev_do_remove(struct rproc_subdev *subdev, bool crashed)
 {
 	struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev);
 
@@ -788,17 +789,31 @@ static int rproc_probe_subdevices(struct rproc *rproc)
 
 unroll_registration:
 	list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev);
+		subdev->remove(subdev, true);
 
 	return ret;
 }
 
-static void rproc_remove_subdevices(struct rproc *rproc)
+static void rproc_remove_subdevices(struct rproc *rproc, bool crashed)
 {
 	struct rproc_subdev *subdev;
 
 	list_for_each_entry_reverse(subdev, &rproc->subdevs, node)
-		subdev->remove(subdev);
+		subdev->remove(subdev, crashed);
+}
+
+/**
+ * rproc_coredump_cleanup() - clean up dump_segments list
+ * @rproc: the remote processor handle
+ */
+static void rproc_coredump_cleanup(struct rproc *rproc)
+{
+	struct rproc_dump_segment *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &rproc->dump_segments, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
 }
 
 /**
@@ -848,6 +863,8 @@ static void rproc_resource_cleanup(struct rproc *rproc)
 	/* clean up remote vdev entries */
 	list_for_each_entry_safe(rvdev, rvtmp, &rproc->rvdevs, node)
 		kref_put(&rvdev->refcount, rproc_vdev_release);
+
+	rproc_coredump_cleanup(rproc);
 }
 
 static int rproc_start(struct rproc *rproc, const struct firmware *fw)
@@ -927,8 +944,8 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 
 	rproc->bootaddr = rproc_get_boot_addr(rproc, fw);
 
-	/* load resource table */
-	ret = rproc_load_rsc_table(rproc, fw);
+	/* Load resource table, core dump segment list etc from the firmware */
+	ret = rproc_parse_fw(rproc, fw);
 	if (ret)
 		goto disable_iommu;
 
@@ -992,13 +1009,13 @@ static int rproc_trigger_auto_boot(struct rproc *rproc)
 	return ret;
 }
 
-static int rproc_stop(struct rproc *rproc)
+static int rproc_stop(struct rproc *rproc, bool crashed)
 {
 	struct device *dev = &rproc->dev;
 	int ret;
 
 	/* remove any subdevices for the remote processor */
-	rproc_remove_subdevices(rproc);
+	rproc_remove_subdevices(rproc, crashed);
 
 	/* the installed resource table is no longer accessible */
 	rproc->table_ptr = rproc->cached_table;
@@ -1018,6 +1035,113 @@ static int rproc_stop(struct rproc *rproc)
 }
 
 /**
+ * rproc_coredump_add_segment() - add segment of device memory to coredump
+ * @rproc:	handle of a remote processor
+ * @da:		device address
+ * @size:	size of segment
+ *
+ * Add device memory to the list of segments to be included in a coredump for
+ * the remoteproc.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
+{
+	struct rproc_dump_segment *segment;
+
+	segment = kzalloc(sizeof(*segment), GFP_KERNEL);
+	if (!segment)
+		return -ENOMEM;
+
+	segment->da = da;
+	segment->size = size;
+
+	list_add_tail(&segment->node, &rproc->dump_segments);
+
+	return 0;
+}
+EXPORT_SYMBOL(rproc_coredump_add_segment);
+
+/**
+ * rproc_coredump() - perform coredump
+ * @rproc:	rproc handle
+ *
+ * This function will generate an ELF header for the registered segments
+ * and create a devcoredump device associated with rproc.
+ */
+static void rproc_coredump(struct rproc *rproc)
+{
+	struct rproc_dump_segment *segment;
+	struct elf32_phdr *phdr;
+	struct elf32_hdr *ehdr;
+	size_t data_size;
+	size_t offset;
+	void *data;
+	void *ptr;
+	int phnum = 0;
+
+	if (list_empty(&rproc->dump_segments))
+		return;
+
+	data_size = sizeof(*ehdr);
+	list_for_each_entry(segment, &rproc->dump_segments, node) {
+		data_size += sizeof(*phdr) + segment->size;
+
+		phnum++;
+	}
+
+	data = vmalloc(data_size);
+	if (!data)
+		return;
+
+	ehdr = data;
+
+	memset(ehdr, 0, sizeof(*ehdr));
+	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+	ehdr->e_ident[EI_CLASS] = ELFCLASS32;
+	ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+	ehdr->e_ident[EI_OSABI] = ELFOSABI_NONE;
+	ehdr->e_type = ET_CORE;
+	ehdr->e_machine = EM_NONE;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_entry = rproc->bootaddr;
+	ehdr->e_phoff = sizeof(*ehdr);
+	ehdr->e_ehsize = sizeof(*ehdr);
+	ehdr->e_phentsize = sizeof(*phdr);
+	ehdr->e_phnum = phnum;
+
+	phdr = data + ehdr->e_phoff;
+	offset = ehdr->e_phoff + sizeof(*phdr) * ehdr->e_phnum;
+	list_for_each_entry(segment, &rproc->dump_segments, node) {
+		memset(phdr, 0, sizeof(*phdr));
+		phdr->p_type = PT_LOAD;
+		phdr->p_offset = offset;
+		phdr->p_vaddr = segment->da;
+		phdr->p_paddr = segment->da;
+		phdr->p_filesz = segment->size;
+		phdr->p_memsz = segment->size;
+		phdr->p_flags = PF_R | PF_W | PF_X;
+		phdr->p_align = 0;
+
+		ptr = rproc_da_to_va(rproc, segment->da, segment->size);
+		if (!ptr) {
+			dev_err(&rproc->dev,
+				"invalid coredump segment (%pad, %zu)\n",
+				&segment->da, segment->size);
+			memset(data + offset, 0xff, segment->size);
+		} else {
+			memcpy(data + offset, ptr, segment->size);
+		}
+
+		offset += phdr->p_filesz;
+		phdr++;
+	}
+
+	dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
+}
+
+/**
  * rproc_trigger_recovery() - recover a remoteproc
  * @rproc: the remote processor
  *
@@ -1039,10 +1163,13 @@ int rproc_trigger_recovery(struct rproc *rproc)
 	if (ret)
 		return ret;
 
-	ret = rproc_stop(rproc);
+	ret = rproc_stop(rproc, false);
 	if (ret)
 		goto unlock_mutex;
 
+	/* generate coredump */
+	rproc_coredump(rproc);
+
 	/* load firmware */
 	ret = request_firmware(&firmware_p, rproc->firmware, dev);
 	if (ret < 0) {
@@ -1189,7 +1316,7 @@ void rproc_shutdown(struct rproc *rproc)
 	if (!atomic_dec_and_test(&rproc->power))
 		goto out;
 
-	ret = rproc_stop(rproc);
+	ret = rproc_stop(rproc, true);
 	if (ret) {
 		atomic_inc(&rproc->power);
 		goto out;
@@ -1428,7 +1555,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
 	/* Default to ELF loader if no load function is specified */
 	if (!rproc->ops->load) {
 		rproc->ops->load = rproc_elf_load_segments;
-		rproc->ops->load_rsc_table = rproc_elf_load_rsc_table;
+		rproc->ops->parse_fw = rproc_elf_load_rsc_table;
 		rproc->ops->find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table;
 		rproc->ops->sanity_check = rproc_elf_sanity_check;
 		rproc->ops->get_boot_addr = rproc_elf_get_boot_addr;
@@ -1443,6 +1570,7 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
 	INIT_LIST_HEAD(&rproc->traces);
 	INIT_LIST_HEAD(&rproc->rvdevs);
 	INIT_LIST_HEAD(&rproc->subdevs);
+	INIT_LIST_HEAD(&rproc->dump_segments);
 
 	INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);
 
@@ -1535,7 +1663,7 @@ EXPORT_SYMBOL(rproc_del);
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
 		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev))
+		      void (*remove)(struct rproc_subdev *subdev, bool crashed))
 {
 	subdev->probe = probe;
 	subdev->remove = remove;
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index 55a2950c5cb7..7570beb035b5 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -88,11 +88,10 @@ int rproc_load_segments(struct rproc *rproc, const struct firmware *fw)
 	return -EINVAL;
 }
 
-static inline int rproc_load_rsc_table(struct rproc *rproc,
-				       const struct firmware *fw)
+static inline int rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
 {
-	if (rproc->ops->load_rsc_table)
-		return rproc->ops->load_rsc_table(rproc, fw);
+	if (rproc->ops->parse_fw)
+		return rproc->ops->parse_fw(rproc, fw);
 
 	return 0;
 }
diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c
index e0f31ed096a5..768ef542a841 100644
--- a/drivers/rpmsg/qcom_glink_native.c
+++ b/drivers/rpmsg/qcom_glink_native.c
@@ -113,7 +113,7 @@ struct qcom_glink {
 	spinlock_t rx_lock;
 	struct list_head rx_queue;
 
-	struct mutex tx_lock;
+	spinlock_t tx_lock;
 
 	spinlock_t idr_lock;
 	struct idr lcids;
@@ -288,15 +288,14 @@ static int qcom_glink_tx(struct qcom_glink *glink,
 			 const void *data, size_t dlen, bool wait)
 {
 	unsigned int tlen = hlen + dlen;
-	int ret;
+	unsigned long flags;
+	int ret = 0;
 
 	/* Reject packets that are too big */
 	if (tlen >= glink->tx_pipe->length)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&glink->tx_lock);
-	if (ret)
-		return ret;
+	spin_lock_irqsave(&glink->tx_lock, flags);
 
 	while (qcom_glink_tx_avail(glink) < tlen) {
 		if (!wait) {
@@ -304,7 +303,12 @@ static int qcom_glink_tx(struct qcom_glink *glink,
 			goto out;
 		}
 
+		/* Wait without holding the tx_lock */
+		spin_unlock_irqrestore(&glink->tx_lock, flags);
+
 		usleep_range(10000, 15000);
+
+		spin_lock_irqsave(&glink->tx_lock, flags);
 	}
 
 	qcom_glink_tx_write(glink, hdr, hlen, data, dlen);
@@ -313,7 +317,7 @@ static int qcom_glink_tx(struct qcom_glink *glink,
 	mbox_client_txdone(glink->mbox_chan, 0);
 
 out:
-	mutex_unlock(&glink->tx_lock);
+	spin_unlock_irqrestore(&glink->tx_lock, flags);
 
 	return ret;
 }
@@ -1567,7 +1571,7 @@ struct qcom_glink *qcom_glink_native_probe(struct device *dev,
 	glink->features = features;
 	glink->intentless = intentless;
 
-	mutex_init(&glink->tx_lock);
+	spin_lock_init(&glink->tx_lock);
 	spin_lock_init(&glink->rx_lock);
 	INIT_LIST_HEAD(&glink->rx_queue);
 	INIT_WORK(&glink->rx_work, qcom_glink_work);
diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c
index 892f2b92a4d8..3fa9d43e2c87 100644
--- a/drivers/rpmsg/qcom_glink_smem.c
+++ b/drivers/rpmsg/qcom_glink_smem.c
@@ -217,6 +217,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent,
 	ret = device_register(dev);
 	if (ret) {
 		pr_err("failed to register glink edge\n");
+		put_device(dev);
 		return ERR_PTR(ret);
 	}
 
@@ -299,7 +300,7 @@ struct qcom_glink *qcom_glink_smem_register(struct device *parent,
 	return glink;
 
 err_put_dev:
-	put_device(dev);
+	device_unregister(dev);
 
 	return ERR_PTR(ret);
 }
diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c
index 92d0c6a7a837..5ce9bf7b897d 100644
--- a/drivers/rpmsg/qcom_smd.c
+++ b/drivers/rpmsg/qcom_smd.c
@@ -167,9 +167,9 @@ struct qcom_smd_endpoint {
 	struct qcom_smd_channel *qsch;
 };
 
-#define to_smd_device(_rpdev)	container_of(_rpdev, struct qcom_smd_device, rpdev)
+#define to_smd_device(r)	container_of(r, struct qcom_smd_device, rpdev)
 #define to_smd_edge(d)		container_of(d, struct qcom_smd_edge, dev)
-#define to_smd_endpoint(ept)	container_of(ept, struct qcom_smd_endpoint, ept)
+#define to_smd_endpoint(e)	container_of(e, struct qcom_smd_endpoint, ept)
 
 /**
  * struct qcom_smd_channel - smd channel struct
@@ -205,7 +205,7 @@ struct qcom_smd_channel {
 	struct smd_channel_info_pair *info;
 	struct smd_channel_info_word_pair *info_word;
 
-	struct mutex tx_lock;
+	spinlock_t tx_lock;
 	wait_queue_head_t fblockread_event;
 
 	void *tx_fifo;
@@ -729,6 +729,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 {
 	__le32 hdr[5] = { cpu_to_le32(len), };
 	int tlen = sizeof(hdr) + len;
+	unsigned long flags;
 	int ret;
 
 	/* Word aligned channels only accept word size aligned data */
@@ -739,9 +740,11 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 	if (tlen >= channel->fifo_size)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&channel->tx_lock);
-	if (ret)
-		return ret;
+	/* Highlight the fact that if we enter the loop below we might sleep */
+	if (wait)
+		might_sleep();
+
+	spin_lock_irqsave(&channel->tx_lock, flags);
 
 	while (qcom_smd_get_tx_avail(channel) < tlen &&
 	       channel->state == SMD_CHANNEL_OPENED) {
@@ -753,7 +756,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 		SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 0);
 
 		/* Wait without holding the tx_lock */
-		mutex_unlock(&channel->tx_lock);
+		spin_unlock_irqrestore(&channel->tx_lock, flags);
 
 		ret = wait_event_interruptible(channel->fblockread_event,
 				       qcom_smd_get_tx_avail(channel) >= tlen ||
@@ -761,9 +764,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 		if (ret)
 			return ret;
 
-		ret = mutex_lock_interruptible(&channel->tx_lock);
-		if (ret)
-			return ret;
+		spin_lock_irqsave(&channel->tx_lock, flags);
 
 		SET_TX_CHANNEL_FLAG(channel, fBLOCKREADINTR, 1);
 	}
@@ -787,7 +788,7 @@ static int __qcom_smd_send(struct qcom_smd_channel *channel, const void *data,
 	qcom_smd_signal_channel(channel);
 
 out_unlock:
-	mutex_unlock(&channel->tx_lock);
+	spin_unlock_irqrestore(&channel->tx_lock, flags);
 
 	return ret;
 }
@@ -996,8 +997,26 @@ static struct device_node *qcom_smd_match_channel(struct device_node *edge_node,
 	return NULL;
 }
 
+static int qcom_smd_announce_create(struct rpmsg_device *rpdev)
+{
+	struct qcom_smd_endpoint *qept = to_smd_endpoint(rpdev->ept);
+	struct qcom_smd_channel *channel = qept->qsch;
+	unsigned long flags;
+	bool kick_state;
+
+	spin_lock_irqsave(&channel->recv_lock, flags);
+	kick_state = qcom_smd_channel_intr(channel);
+	spin_unlock_irqrestore(&channel->recv_lock, flags);
+
+	if (kick_state)
+		schedule_work(&channel->edge->state_work);
+
+	return 0;
+}
+
 static const struct rpmsg_device_ops qcom_smd_device_ops = {
 	.create_ept = qcom_smd_create_ept,
+	.announce_create = qcom_smd_announce_create,
 };
 
 static const struct rpmsg_endpoint_ops qcom_smd_endpoint_ops = {
@@ -1090,7 +1109,7 @@ static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *ed
 	if (!channel->name)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_init(&channel->tx_lock);
+	spin_lock_init(&channel->tx_lock);
 	spin_lock_init(&channel->recv_lock);
 	init_waitqueue_head(&channel->fblockread_event);
 	init_waitqueue_head(&channel->state_change_event);
@@ -1234,6 +1253,11 @@ static void qcom_channel_state_worker(struct work_struct *work)
 		if (channel->state != SMD_CHANNEL_CLOSED)
 			continue;
 
+		remote_state = GET_RX_CHANNEL_INFO(channel, state);
+		if (remote_state != SMD_CHANNEL_OPENING &&
+		    remote_state != SMD_CHANNEL_OPENED)
+			continue;
+
 		if (channel->registered)
 			continue;
 
@@ -1408,6 +1432,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
 	ret = device_register(&edge->dev);
 	if (ret) {
 		pr_err("failed to register smd edge\n");
+		put_device(&edge->dev);
 		return ERR_PTR(ret);
 	}
 
@@ -1428,7 +1453,7 @@ struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent,
 	return edge;
 
 unregister_dev:
-	put_device(&edge->dev);
+	device_unregister(&edge->dev);
 	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL(qcom_smd_register_edge);
diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index 5a081762afcc..920a02f0462c 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -442,7 +442,7 @@ static int rpmsg_dev_probe(struct device *dev)
 		goto out;
 	}
 
-	if (rpdev->ops->announce_create)
+	if (ept && rpdev->ops->announce_create)
 		err = rpdev->ops->announce_create(rpdev);
 out:
 	return err;
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 319e3c8976d5..59e6dede3db3 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -407,6 +407,16 @@ config RTC_DRV_ISL12022
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-isl12022.
 
+config RTC_DRV_ISL12026
+	tristate "Intersil ISL12026"
+	depends on OF || COMPILE_TEST
+	help
+	  If you say yes here you get support for the
+	  Intersil ISL12026 RTC chip.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called rtc-isl12026.
+
 config RTC_DRV_X1205
 	tristate "Xicor/Intersil X1205"
 	help
@@ -1413,6 +1423,7 @@ config RTC_DRV_AT91RM9200
 config RTC_DRV_AT91SAM9
 	tristate "AT91SAM9 RTT as RTC"
 	depends on ARCH_AT91 || COMPILE_TEST
+	depends on HAS_IOMEM
 	select MFD_SYSCON
 	help
 	  Some AT91SAM9 SoCs provide an RTT (Real Time Timer) block which
@@ -1502,7 +1513,7 @@ config RTC_DRV_STARFIRE
 
 config RTC_DRV_TX4939
 	tristate "TX4939 SoC"
-	depends on SOC_TX4939
+	depends on SOC_TX4939 || COMPILE_TEST
 	help
 	  Driver for the internal RTC (Realtime Clock) module found on
 	  Toshiba TX4939 SoC.
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index ee0206becd9f..5ff2fc0c361a 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -75,6 +75,7 @@ obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
 obj-$(CONFIG_RTC_DRV_HYM8563)	+= rtc-hym8563.o
 obj-$(CONFIG_RTC_DRV_IMXDI)	+= rtc-imxdi.o
 obj-$(CONFIG_RTC_DRV_ISL12022)	+= rtc-isl12022.o
+obj-$(CONFIG_RTC_DRV_ISL12026)	+= rtc-isl12026.o
 obj-$(CONFIG_RTC_DRV_ISL1208)	+= rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_JZ4740)	+= rtc-jz4740.o
 obj-$(CONFIG_RTC_DRV_LP8788)	+= rtc-lp8788.o
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 722d683e0b0f..d37588f08055 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -211,6 +211,73 @@ static int rtc_device_get_id(struct device *dev)
 	return id;
 }
 
+static void rtc_device_get_offset(struct rtc_device *rtc)
+{
+	time64_t range_secs;
+	u32 start_year;
+	int ret;
+
+	/*
+	 * If RTC driver did not implement the range of RTC hardware device,
+	 * then we can not expand the RTC range by adding or subtracting one
+	 * offset.
+	 */
+	if (rtc->range_min == rtc->range_max)
+		return;
+
+	ret = device_property_read_u32(rtc->dev.parent, "start-year",
+				       &start_year);
+	if (!ret) {
+		rtc->start_secs = mktime64(start_year, 1, 1, 0, 0, 0);
+		rtc->set_start_time = true;
+	}
+
+	/*
+	 * If user did not implement the start time for RTC driver, then no
+	 * need to expand the RTC range.
+	 */
+	if (!rtc->set_start_time)
+		return;
+
+	range_secs = rtc->range_max - rtc->range_min + 1;
+
+	/*
+	 * If the start_secs is larger than the maximum seconds (rtc->range_max)
+	 * supported by RTC hardware or the maximum seconds of new expanded
+	 * range (start_secs + rtc->range_max - rtc->range_min) is less than
+	 * rtc->range_min, which means the minimum seconds (rtc->range_min) of
+	 * RTC hardware will be mapped to start_secs by adding one offset, so
+	 * the offset seconds calculation formula should be:
+	 * rtc->offset_secs = rtc->start_secs - rtc->range_min;
+	 *
+	 * If the start_secs is larger than the minimum seconds (rtc->range_min)
+	 * supported by RTC hardware, then there is one region is overlapped
+	 * between the original RTC hardware range and the new expanded range,
+	 * and this overlapped region do not need to be mapped into the new
+	 * expanded range due to it is valid for RTC device. So the minimum
+	 * seconds of RTC hardware (rtc->range_min) should be mapped to
+	 * rtc->range_max + 1, then the offset seconds formula should be:
+	 * rtc->offset_secs = rtc->range_max - rtc->range_min + 1;
+	 *
+	 * If the start_secs is less than the minimum seconds (rtc->range_min),
+	 * which is similar to case 2. So the start_secs should be mapped to
+	 * start_secs + rtc->range_max - rtc->range_min + 1, then the
+	 * offset seconds formula should be:
+	 * rtc->offset_secs = -(rtc->range_max - rtc->range_min + 1);
+	 *
+	 * Otherwise the offset seconds should be 0.
+	 */
+	if (rtc->start_secs > rtc->range_max ||
+	    rtc->start_secs + range_secs - 1 < rtc->range_min)
+		rtc->offset_secs = rtc->start_secs - rtc->range_min;
+	else if (rtc->start_secs > rtc->range_min)
+		rtc->offset_secs = range_secs;
+	else if (rtc->start_secs < rtc->range_min)
+		rtc->offset_secs = -range_secs;
+	else
+		rtc->offset_secs = 0;
+}
+
 /**
  * rtc_device_register - register w/ RTC class
  * @dev: the device to register
@@ -247,6 +314,8 @@ struct rtc_device *rtc_device_register(const char *name, struct device *dev,
 
 	dev_set_name(&rtc->dev, "rtc%d", id);
 
+	rtc_device_get_offset(rtc);
+
 	/* Check to see if there is an ALARM already set in hw */
 	err = __rtc_read_alarm(rtc, &alrm);
 
@@ -293,8 +362,6 @@ EXPORT_SYMBOL_GPL(rtc_device_register);
  */
 void rtc_device_unregister(struct rtc_device *rtc)
 {
-	rtc_nvmem_unregister(rtc);
-
 	mutex_lock(&rtc->ops_lock);
 	/*
 	 * Remove innards of this RTC, then disable it, before
@@ -312,6 +379,7 @@ static void devm_rtc_device_release(struct device *dev, void *res)
 {
 	struct rtc_device *rtc = *(struct rtc_device **)res;
 
+	rtc_nvmem_unregister(rtc);
 	rtc_device_unregister(rtc);
 }
 
@@ -382,6 +450,8 @@ static void devm_rtc_release_device(struct device *dev, void *res)
 {
 	struct rtc_device *rtc = *(struct rtc_device **)res;
 
+	rtc_nvmem_unregister(rtc);
+
 	if (rtc->registered)
 		rtc_device_unregister(rtc);
 	else
@@ -435,6 +505,7 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 		return -EINVAL;
 
 	rtc->owner = owner;
+	rtc_device_get_offset(rtc);
 
 	/* Check to see if there is an ALARM already set in hw */
 	err = __rtc_read_alarm(rtc, &alrm);
@@ -453,8 +524,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc)
 
 	rtc_proc_add_device(rtc);
 
-	rtc_nvmem_register(rtc);
-
 	rtc->registered = true;
 	dev_info(rtc->dev.parent, "registered as %s\n",
 		 dev_name(&rtc->dev));
diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c
index e1cfa06810ef..e79f2a181ad2 100644
--- a/drivers/rtc/hctosys.c
+++ b/drivers/rtc/hctosys.c
@@ -49,6 +49,11 @@ static int __init rtc_hctosys(void)
 
 	tv64.tv_sec = rtc_tm_to_time64(&tm);
 
+#if BITS_PER_LONG == 32
+	if (tv64.tv_sec > INT_MAX)
+		goto err_read;
+#endif
+
 	err = do_settimeofday64(&tv64);
 
 	dev_info(rtc->dev.parent,
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 672b192f8153..7cbdc9228dd5 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -17,9 +17,73 @@
 #include <linux/log2.h>
 #include <linux/workqueue.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/rtc.h>
+
 static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer);
 static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer);
 
+static void rtc_add_offset(struct rtc_device *rtc, struct rtc_time *tm)
+{
+	time64_t secs;
+
+	if (!rtc->offset_secs)
+		return;
+
+	secs = rtc_tm_to_time64(tm);
+
+	/*
+	 * Since the reading time values from RTC device are always in the RTC
+	 * original valid range, but we need to skip the overlapped region
+	 * between expanded range and original range, which is no need to add
+	 * the offset.
+	 */
+	if ((rtc->start_secs > rtc->range_min && secs >= rtc->start_secs) ||
+	    (rtc->start_secs < rtc->range_min &&
+	     secs <= (rtc->start_secs + rtc->range_max - rtc->range_min)))
+		return;
+
+	rtc_time64_to_tm(secs + rtc->offset_secs, tm);
+}
+
+static void rtc_subtract_offset(struct rtc_device *rtc, struct rtc_time *tm)
+{
+	time64_t secs;
+
+	if (!rtc->offset_secs)
+		return;
+
+	secs = rtc_tm_to_time64(tm);
+
+	/*
+	 * If the setting time values are in the valid range of RTC hardware
+	 * device, then no need to subtract the offset when setting time to RTC
+	 * device. Otherwise we need to subtract the offset to make the time
+	 * values are valid for RTC hardware device.
+	 */
+	if (secs >= rtc->range_min && secs <= rtc->range_max)
+		return;
+
+	rtc_time64_to_tm(secs - rtc->offset_secs, tm);
+}
+
+static int rtc_valid_range(struct rtc_device *rtc, struct rtc_time *tm)
+{
+	if (rtc->range_min != rtc->range_max) {
+		time64_t time = rtc_tm_to_time64(tm);
+		time64_t range_min = rtc->set_start_time ? rtc->start_secs :
+			rtc->range_min;
+		time64_t range_max = rtc->set_start_time ?
+			(rtc->start_secs + rtc->range_max - rtc->range_min) :
+			rtc->range_max;
+
+		if (time < range_min || time > range_max)
+			return -ERANGE;
+	}
+
+	return 0;
+}
+
 static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 {
 	int err;
@@ -36,6 +100,8 @@ static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 			return err;
 		}
 
+		rtc_add_offset(rtc, tm);
+
 		err = rtc_valid_tm(tm);
 		if (err < 0)
 			dev_dbg(&rtc->dev, "read_time: rtc_time isn't valid\n");
@@ -53,6 +119,8 @@ int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 
 	err = __rtc_read_time(rtc, tm);
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_read_time(rtc_tm_to_time64(tm), err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_time);
@@ -65,6 +133,12 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
 	if (err != 0)
 		return err;
 
+	err = rtc_valid_range(rtc, tm);
+	if (err)
+		return err;
+
+	rtc_subtract_offset(rtc, tm);
+
 	err = mutex_lock_interruptible(&rtc->ops_lock);
 	if (err)
 		return err;
@@ -87,6 +161,8 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
 	mutex_unlock(&rtc->ops_lock);
 	/* A timer might have just expired */
 	schedule_work(&rtc->irqwork);
+
+	trace_rtc_set_time(rtc_tm_to_time64(tm), err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_time);
@@ -119,6 +195,8 @@ static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *al
 	}
 
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err);
 	return err;
 }
 
@@ -316,6 +394,7 @@ int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	}
 	mutex_unlock(&rtc->ops_lock);
 
+	trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_read_alarm);
@@ -329,6 +408,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	err = rtc_valid_tm(&alarm->time);
 	if (err)
 		return err;
+
+	rtc_subtract_offset(rtc, &alarm->time);
 	scheduled = rtc_tm_to_time64(&alarm->time);
 
 	/* Make sure we're not setting alarms in the past */
@@ -352,6 +433,7 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	else
 		err = rtc->ops->set_alarm(rtc->dev.parent, alarm);
 
+	trace_rtc_set_alarm(rtc_tm_to_time64(&alarm->time), err);
 	return err;
 }
 
@@ -363,6 +445,10 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	if (err != 0)
 		return err;
 
+	err = rtc_valid_range(rtc, &alarm->time);
+	if (err)
+		return err;
+
 	err = mutex_lock_interruptible(&rtc->ops_lock);
 	if (err)
 		return err;
@@ -375,6 +461,8 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 		err = rtc_timer_enqueue(rtc, &rtc->aie_timer);
 
 	mutex_unlock(&rtc->ops_lock);
+
+	rtc_add_offset(rtc, &alarm->time);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);
@@ -406,6 +494,7 @@ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 
 		rtc->aie_timer.enabled = 1;
 		timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node);
+		trace_rtc_timer_enqueue(&rtc->aie_timer);
 	}
 	mutex_unlock(&rtc->ops_lock);
 	return err;
@@ -435,6 +524,8 @@ int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled)
 		err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled);
 
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_alarm_irq_enable(enabled, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable);
@@ -709,6 +800,8 @@ retry:
 		rtc->pie_enabled = enabled;
 	}
 	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	trace_rtc_irq_set_state(enabled, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_state);
@@ -745,6 +838,8 @@ retry:
 		}
 	}
 	spin_unlock_irqrestore(&rtc->irq_task_lock, flags);
+
+	trace_rtc_irq_set_freq(freq, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_irq_set_freq);
@@ -779,6 +874,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
 	}
 
 	timerqueue_add(&rtc->timerqueue, &timer->node);
+	trace_rtc_timer_enqueue(timer);
 	if (!next || ktime_before(timer->node.expires, next->expires)) {
 		struct rtc_wkalrm alarm;
 		int err;
@@ -790,6 +886,7 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer)
 			schedule_work(&rtc->irqwork);
 		} else if (err) {
 			timerqueue_del(&rtc->timerqueue, &timer->node);
+			trace_rtc_timer_dequeue(timer);
 			timer->enabled = 0;
 			return err;
 		}
@@ -803,6 +900,7 @@ static void rtc_alarm_disable(struct rtc_device *rtc)
 		return;
 
 	rtc->ops->alarm_irq_enable(rtc->dev.parent, false);
+	trace_rtc_alarm_irq_enable(0, 0);
 }
 
 /**
@@ -821,6 +919,7 @@ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer)
 {
 	struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue);
 	timerqueue_del(&rtc->timerqueue, &timer->node);
+	trace_rtc_timer_dequeue(timer);
 	timer->enabled = 0;
 	if (next == &timer->node) {
 		struct rtc_wkalrm alarm;
@@ -871,16 +970,19 @@ again:
 		/* expire timer */
 		timer = container_of(next, struct rtc_timer, node);
 		timerqueue_del(&rtc->timerqueue, &timer->node);
+		trace_rtc_timer_dequeue(timer);
 		timer->enabled = 0;
 		if (timer->task.func)
 			timer->task.func(timer->task.private_data);
 
+		trace_rtc_timer_fired(timer);
 		/* Re-add/fwd periodic timers */
 		if (ktime_to_ns(timer->period)) {
 			timer->node.expires = ktime_add(timer->node.expires,
 							timer->period);
 			timer->enabled = 1;
 			timerqueue_add(&rtc->timerqueue, &timer->node);
+			trace_rtc_timer_enqueue(timer);
 		}
 	}
 
@@ -902,6 +1004,7 @@ reprogram:
 
 			timer = container_of(next, struct rtc_timer, node);
 			timerqueue_del(&rtc->timerqueue, &timer->node);
+			trace_rtc_timer_dequeue(timer);
 			timer->enabled = 0;
 			dev_err(&rtc->dev, "__rtc_set_alarm: err=%d\n", err);
 			goto again;
@@ -992,6 +1095,8 @@ int rtc_read_offset(struct rtc_device *rtc, long *offset)
 	mutex_lock(&rtc->ops_lock);
 	ret = rtc->ops->read_offset(rtc->dev.parent, offset);
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_read_offset(*offset, ret);
 	return ret;
 }
 
@@ -1025,5 +1130,7 @@ int rtc_set_offset(struct rtc_device *rtc, long offset)
 	mutex_lock(&rtc->ops_lock);
 	ret = rtc->ops->set_offset(rtc->dev.parent, offset);
 	mutex_unlock(&rtc->ops_lock);
+
+	trace_rtc_set_offset(offset, ret);
 	return ret;
 }
diff --git a/drivers/rtc/nvmem.c b/drivers/rtc/nvmem.c
index 8567b4ed9ac6..17ec4c8d0fad 100644
--- a/drivers/rtc/nvmem.c
+++ b/drivers/rtc/nvmem.c
@@ -14,8 +14,6 @@
 #include <linux/rtc.h>
 #include <linux/sysfs.h>
 
-#include "rtc-core.h"
-
 /*
  * Deprecated ABI compatibility, this should be removed at some point
  */
@@ -46,7 +44,7 @@ rtc_nvram_write(struct file *filp, struct kobject *kobj,
 	return nvmem_device_write(rtc->nvmem, off, count, buf);
 }
 
-static int rtc_nvram_register(struct rtc_device *rtc)
+static int rtc_nvram_register(struct rtc_device *rtc, size_t size)
 {
 	int err;
 
@@ -64,7 +62,7 @@ static int rtc_nvram_register(struct rtc_device *rtc)
 
 	rtc->nvram->read = rtc_nvram_read;
 	rtc->nvram->write = rtc_nvram_write;
-	rtc->nvram->size = rtc->nvmem_config->size;
+	rtc->nvram->size = size;
 
 	err = sysfs_create_bin_file(&rtc->dev.parent->kobj,
 				    rtc->nvram);
@@ -84,21 +82,28 @@ static void rtc_nvram_unregister(struct rtc_device *rtc)
 /*
  * New ABI, uses nvmem
  */
-void rtc_nvmem_register(struct rtc_device *rtc)
+int rtc_nvmem_register(struct rtc_device *rtc,
+		       struct nvmem_config *nvmem_config)
 {
-	if (!rtc->nvmem_config)
-		return;
+	if (!IS_ERR_OR_NULL(rtc->nvmem))
+		return -EBUSY;
+
+	if (!nvmem_config)
+		return -ENODEV;
 
-	rtc->nvmem_config->dev = &rtc->dev;
-	rtc->nvmem_config->owner = rtc->owner;
-	rtc->nvmem = nvmem_register(rtc->nvmem_config);
+	nvmem_config->dev = rtc->dev.parent;
+	nvmem_config->owner = rtc->owner;
+	rtc->nvmem = nvmem_register(nvmem_config);
 	if (IS_ERR_OR_NULL(rtc->nvmem))
-		return;
+		return PTR_ERR(rtc->nvmem);
 
 	/* Register the old ABI */
 	if (rtc->nvram_old_abi)
-		rtc_nvram_register(rtc);
+		rtc_nvram_register(rtc, nvmem_config->size);
+
+	return 0;
 }
+EXPORT_SYMBOL_GPL(rtc_nvmem_register);
 
 void rtc_nvmem_unregister(struct rtc_device *rtc)
 {
diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c
index 466bf7f9a285..6cbafefa80a2 100644
--- a/drivers/rtc/rtc-88pm80x.c
+++ b/drivers/rtc/rtc-88pm80x.c
@@ -134,9 +134,9 @@ static int pm80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	struct pm80x_rtc_info *info = dev_get_drvdata(dev);
 	unsigned char buf[4];
 	unsigned long ticks, base, data;
-	if ((tm->tm_year < 70) || (tm->tm_year > 138)) {
+	if (tm->tm_year > 206) {
 		dev_dbg(info->dev,
-			"Set time %d out of range. Please set time between 1970 to 2038.\n",
+			"Set time %d out of range. Please set time between 1970 to 2106.\n",
 			1900 + tm->tm_year);
 		return -EINVAL;
 	}
diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c
index 19e53b3b8e00..01ffc0ef8033 100644
--- a/drivers/rtc/rtc-88pm860x.c
+++ b/drivers/rtc/rtc-88pm860x.c
@@ -135,9 +135,9 @@ static int pm860x_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	unsigned char buf[4];
 	unsigned long ticks, base, data;
 
-	if ((tm->tm_year < 70) || (tm->tm_year > 138)) {
+	if (tm->tm_year > 206) {
 		dev_dbg(info->dev, "Set time %d out of range. "
-			"Please set time between 1970 to 2038.\n",
+			"Please set time between 1970 to 2106.\n",
 			1900 + tm->tm_year);
 		return -EINVAL;
 	}
diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c
index ef5c16dfabfa..8dc451932446 100644
--- a/drivers/rtc/rtc-ab-b5ze-s3.c
+++ b/drivers/rtc/rtc-ab-b5ze-s3.c
@@ -217,7 +217,7 @@ static int _abb5zes3_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct abb5zes3_rtc_data *data = dev_get_drvdata(dev);
 	u8 regs[ABB5ZES3_REG_RTC_SC + ABB5ZES3_RTC_SEC_LEN];
-	int ret;
+	int ret = 0;
 
 	/*
 	 * As we need to read CTRL1 register anyway to access 24/12h
@@ -255,8 +255,6 @@ static int _abb5zes3_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_mon  = bcd2bin(regs[ABB5ZES3_REG_RTC_MO]) - 1; /* starts at 1 */
 	tm->tm_year = bcd2bin(regs[ABB5ZES3_REG_RTC_YR]) + 100;
 
-	ret = rtc_valid_tm(tm);
-
 err:
 	return ret;
 }
diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c
index 9b725c553058..821ff52a2222 100644
--- a/drivers/rtc/rtc-ab3100.c
+++ b/drivers/rtc/rtc-ab3100.c
@@ -106,7 +106,7 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time64_to_tm(time, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index 24a0af650a1b..e28f4401fd35 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c
@@ -36,10 +36,6 @@
 #define AB8500_RTC_FORCE_BKUP_REG	0x0D
 #define AB8500_RTC_CALIB_REG		0x0E
 #define AB8500_RTC_SWITCH_STAT_REG	0x0F
-#define AB8540_RTC_ALRM_SEC		0x22
-#define AB8540_RTC_ALRM_MIN_LOW_REG	0x23
-#define AB8540_RTC_ALRM_MIN_MID_REG	0x24
-#define AB8540_RTC_ALRM_MIN_HI_REG	0x25
 
 /* RtcReadRequest bits */
 #define RTC_READ_REQUEST		0x01
@@ -63,11 +59,6 @@ static const u8 ab8500_rtc_alarm_regs[] = {
 	AB8500_RTC_ALRM_MIN_LOW_REG
 };
 
-static const u8 ab8540_rtc_alarm_regs[] = {
-	AB8540_RTC_ALRM_MIN_HI_REG, AB8540_RTC_ALRM_MIN_MID_REG,
-	AB8540_RTC_ALRM_MIN_LOW_REG, AB8540_RTC_ALRM_SEC
-};
-
 /* Calculate the seconds from 1970 to 01-01-2000 00:00:00 */
 static unsigned long get_elapsed_seconds(int year)
 {
@@ -131,7 +122,7 @@ static int ab8500_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	secs += get_elapsed_seconds(AB8500_RTC_EPOCH);
 
 	rtc_time_to_tm(secs, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ab8500_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -277,43 +268,6 @@ static int ab8500_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 	return ab8500_rtc_irq_enable(dev, alarm->enabled);
 }
 
-static int ab8540_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
-{
-	int retval, i;
-	unsigned char buf[ARRAY_SIZE(ab8540_rtc_alarm_regs)];
-	unsigned long mins, secs = 0;
-
-	if (alarm->time.tm_year < (AB8500_RTC_EPOCH - 1900)) {
-		dev_dbg(dev, "year should be equal to or greater than %d\n",
-				AB8500_RTC_EPOCH);
-		return -EINVAL;
-	}
-
-	/* Get the number of seconds since 1970 */
-	rtc_tm_to_time(&alarm->time, &secs);
-
-	/*
-	 * Convert it to the number of seconds since 01-01-2000 00:00:00
-	 */
-	secs -= get_elapsed_seconds(AB8500_RTC_EPOCH);
-	mins = secs / 60;
-
-	buf[3] = secs % 60;
-	buf[2] = mins & 0xFF;
-	buf[1] = (mins >> 8) & 0xFF;
-	buf[0] = (mins >> 16) & 0xFF;
-
-	/* Set the alarm time */
-	for (i = 0; i < ARRAY_SIZE(ab8540_rtc_alarm_regs); i++) {
-		retval = abx500_set_register_interruptible(dev, AB8500_RTC,
-			ab8540_rtc_alarm_regs[i], buf[i]);
-		if (retval < 0)
-			return retval;
-	}
-
-	return ab8500_rtc_irq_enable(dev, alarm->enabled);
-}
-
 static int ab8500_rtc_set_calibration(struct device *dev, int calibration)
 {
 	int retval;
@@ -435,17 +389,8 @@ static const struct rtc_class_ops ab8500_rtc_ops = {
 	.alarm_irq_enable	= ab8500_rtc_irq_enable,
 };
 
-static const struct rtc_class_ops ab8540_rtc_ops = {
-	.read_time		= ab8500_rtc_read_time,
-	.set_time		= ab8500_rtc_set_time,
-	.read_alarm		= ab8500_rtc_read_alarm,
-	.set_alarm		= ab8540_rtc_set_alarm,
-	.alarm_irq_enable	= ab8500_rtc_irq_enable,
-};
-
 static const struct platform_device_id ab85xx_rtc_ids[] = {
 	{ "ab8500-rtc", (kernel_ulong_t)&ab8500_rtc_ops, },
-	{ "ab8540-rtc", (kernel_ulong_t)&ab8540_rtc_ops, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(platform, ab85xx_rtc_ids);
diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
index b033bc556f5d..2cefa67a1132 100644
--- a/drivers/rtc/rtc-abx80x.c
+++ b/drivers/rtc/rtc-abx80x.c
@@ -172,11 +172,7 @@ static int abx80x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_mon = bcd2bin(buf[ABX8XX_REG_MO] & 0x1F) - 1;
 	tm->tm_year = bcd2bin(buf[ABX8XX_REG_YR]) + 100;
 
-	err = rtc_valid_tm(tm);
-	if (err < 0)
-		dev_err(&client->dev, "retrieved date/time is not valid.\n");
-
-	return err;
+	return 0;
 }
 
 static int abx80x_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ac100.c b/drivers/rtc/rtc-ac100.c
index 8ff9dc3fe5bf..3fe576fdd45e 100644
--- a/drivers/rtc/rtc-ac100.c
+++ b/drivers/rtc/rtc-ac100.c
@@ -183,7 +183,29 @@ static int ac100_clkout_determine_rate(struct clk_hw *hw,
 
 	for (i = 0; i < num_parents; i++) {
 		struct clk_hw *parent = clk_hw_get_parent_by_index(hw, i);
-		unsigned long tmp, prate = clk_hw_get_rate(parent);
+		unsigned long tmp, prate;
+
+		/*
+		 * The clock has two parents, one is a fixed clock which is
+		 * internally registered by the ac100 driver. The other parent
+		 * is a clock from the codec side of the chip, which we
+		 * properly declare and reference in the devicetree and is
+		 * not implemented in any driver right now.
+		 * If the clock core looks for the parent of that second
+		 * missing clock, it can't find one that is registered and
+		 * returns NULL.
+		 * So we end up in a situation where clk_hw_get_num_parents
+		 * returns the amount of clocks we can be parented to, but
+		 * clk_hw_get_parent_by_index will not return the orphan
+		 * clocks.
+		 * Thus we need to check if the parent exists before
+		 * we get the parent rate, so we could use the RTC
+		 * without waiting for the codec to be supported.
+		 */
+		if (!parent)
+			continue;
+
+		prate = clk_hw_get_rate(parent);
 
 		tmp = ac100_clkout_round_rate(hw, req->rate, prate);
 
@@ -387,7 +409,7 @@ static int ac100_rtc_get_time(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_year = bcd2bin(reg[6] & AC100_RTC_YEA_MASK) +
 			  AC100_YEAR_OFF;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int ac100_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c
index 7418a763ce52..ee71e647fd43 100644
--- a/drivers/rtc/rtc-at91sam9.c
+++ b/drivers/rtc/rtc-at91sam9.c
@@ -349,6 +349,7 @@ static const struct rtc_class_ops at91_rtc_ops = {
 };
 
 static const struct regmap_config gpbr_regmap_config = {
+	.name = "gpbr",
 	.reg_bits = 32,
 	.val_bits = 32,
 	.reg_stride = 4,
diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c
index 2ba44ccb9c3a..7c5530c71285 100644
--- a/drivers/rtc/rtc-au1xxx.c
+++ b/drivers/rtc/rtc-au1xxx.c
@@ -36,7 +36,7 @@ static int au1xtoy_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time_to_tm(t, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int au1xtoy_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 98ac8d5c7901..ef52741000a8 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -36,6 +36,10 @@
 #define BQ32K_CFG2		0x09	/* Trickle charger control */
 #define BQ32K_TCFE		BIT(6)	/* Trickle charge FET bypass */
 
+#define MAX_LEN			10	/* Maximum number of consecutive
+					 * register for this particular RTC.
+					 */
+
 struct bq32k_regs {
 	uint8_t		seconds;
 	uint8_t		minutes;
@@ -74,7 +78,7 @@ static int bq32k_read(struct device *dev, void *data, uint8_t off, uint8_t len)
 static int bq32k_write(struct device *dev, void *data, uint8_t off, uint8_t len)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	uint8_t buffer[len + 1];
+	uint8_t buffer[MAX_LEN + 1];
 
 	buffer[0] = off;
 	memcpy(&buffer[1], data, len);
@@ -110,7 +114,7 @@ static int bq32k_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_year = bcd2bin(regs.years) +
 				((regs.cent_hours & BQ32K_CENT) ? 100 : 0);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int bq32k_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-brcmstb-waketimer.c b/drivers/rtc/rtc-brcmstb-waketimer.c
index 6cee61201c30..bdd6674a1054 100644
--- a/drivers/rtc/rtc-brcmstb-waketimer.c
+++ b/drivers/rtc/rtc-brcmstb-waketimer.c
@@ -60,6 +60,9 @@ static void brcmstb_waketmr_set_alarm(struct brcmstb_waketmr *timer,
 {
 	brcmstb_waketmr_clear_alarm(timer);
 
+	/* Make sure we are actually counting in seconds */
+	writel_relaxed(timer->rate, timer->base + BRCMSTB_WKTMR_PRESCALER);
+
 	writel_relaxed(secs + 1, timer->base + BRCMSTB_WKTMR_ALARM);
 }
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index f7c0f72abb56..1b3738a11702 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -541,11 +541,10 @@ static const struct rtc_class_ops cmos_rtc_ops = {
 
 #define NVRAM_OFFSET	(RTC_REG_D + 1)
 
-static ssize_t
-cmos_nvram_read(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int cmos_nvram_read(void *priv, unsigned int off, void *val,
+			   size_t count)
 {
+	unsigned char *buf = val;
 	int	retval;
 
 	off += NVRAM_OFFSET;
@@ -563,16 +562,13 @@ cmos_nvram_read(struct file *filp, struct kobject *kobj,
 	return retval;
 }
 
-static ssize_t
-cmos_nvram_write(struct file *filp, struct kobject *kobj,
-		struct bin_attribute *attr,
-		char *buf, loff_t off, size_t count)
+static int cmos_nvram_write(void *priv, unsigned int off, void *val,
+			    size_t count)
 {
-	struct cmos_rtc	*cmos;
+	struct cmos_rtc	*cmos = priv;
+	unsigned char	*buf = val;
 	int		retval;
 
-	cmos = dev_get_drvdata(container_of(kobj, struct device, kobj));
-
 	/* NOTE:  on at least PCs and Ataris, the boot firmware uses a
 	 * checksum on part of the NVRAM data.  That's currently ignored
 	 * here.  If userspace is smart enough to know what fields of
@@ -598,17 +594,6 @@ cmos_nvram_write(struct file *filp, struct kobject *kobj,
 	return retval;
 }
 
-static struct bin_attribute nvram = {
-	.attr = {
-		.name	= "nvram",
-		.mode	= S_IRUGO | S_IWUSR,
-	},
-
-	.read	= cmos_nvram_read,
-	.write	= cmos_nvram_write,
-	/* size gets set up later */
-};
-
 /*----------------------------------------------------------------*/
 
 static struct cmos_rtc	cmos_rtc;
@@ -675,6 +660,14 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	unsigned char			rtc_control;
 	unsigned			address_space;
 	u32				flags = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "cmos_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.reg_read = cmos_nvram_read,
+		.reg_write = cmos_nvram_write,
+		.priv = &cmos_rtc,
+	};
 
 	/* there can be only one ... */
 	if (cmos_rtc.dev)
@@ -751,8 +744,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	cmos_rtc.dev = dev;
 	dev_set_drvdata(dev, &cmos_rtc);
 
-	cmos_rtc.rtc = rtc_device_register(driver_name, dev,
-				&cmos_rtc_ops, THIS_MODULE);
+	cmos_rtc.rtc = devm_rtc_allocate_device(dev);
 	if (IS_ERR(cmos_rtc.rtc)) {
 		retval = PTR_ERR(cmos_rtc.rtc);
 		goto cleanup0;
@@ -814,22 +806,25 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 		}
 	}
 
-	/* export at least the first block of NVRAM */
-	nvram.size = address_space - NVRAM_OFFSET;
-	retval = sysfs_create_bin_file(&dev->kobj, &nvram);
-	if (retval < 0) {
-		dev_dbg(dev, "can't create nvram file? %d\n", retval);
+	cmos_rtc.rtc->ops = &cmos_rtc_ops;
+	cmos_rtc.rtc->nvram_old_abi = true;
+	retval = rtc_register_device(cmos_rtc.rtc);
+	if (retval)
 		goto cleanup2;
-	}
 
-	dev_info(dev, "%s%s, %zd bytes nvram%s\n",
-		!is_valid_irq(rtc_irq) ? "no alarms" :
-			cmos_rtc.mon_alrm ? "alarms up to one year" :
-			cmos_rtc.day_alrm ? "alarms up to one month" :
-			"alarms up to one day",
-		cmos_rtc.century ? ", y3k" : "",
-		nvram.size,
-		is_hpet_enabled() ? ", hpet irqs" : "");
+	/* export at least the first block of NVRAM */
+	nvmem_cfg.size = address_space - NVRAM_OFFSET;
+	if (rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg))
+		dev_err(dev, "nvmem registration failed\n");
+
+	dev_info(dev, "%s%s, %d bytes nvram%s\n",
+		 !is_valid_irq(rtc_irq) ? "no alarms" :
+		 cmos_rtc.mon_alrm ? "alarms up to one year" :
+		 cmos_rtc.day_alrm ? "alarms up to one month" :
+		 "alarms up to one day",
+		 cmos_rtc.century ? ", y3k" : "",
+		 nvmem_cfg.size,
+		 is_hpet_enabled() ? ", hpet irqs" : "");
 
 	return 0;
 
@@ -838,7 +833,6 @@ cleanup2:
 		free_irq(rtc_irq, cmos_rtc.rtc);
 cleanup1:
 	cmos_rtc.dev = NULL;
-	rtc_device_unregister(cmos_rtc.rtc);
 cleanup0:
 	if (RTC_IOMAPPED)
 		release_region(ports->start, resource_size(ports));
@@ -862,14 +856,11 @@ static void cmos_do_remove(struct device *dev)
 
 	cmos_do_shutdown(cmos->irq);
 
-	sysfs_remove_bin_file(&dev->kobj, &nvram);
-
 	if (is_valid_irq(cmos->irq)) {
 		free_irq(cmos->irq, cmos->rtc);
 		hpet_unregister_irq_handler(cmos_interrupt);
 	}
 
-	rtc_device_unregister(cmos->rtc);
 	cmos->rtc = NULL;
 
 	ports = cmos->iomem;
@@ -1271,8 +1262,6 @@ MODULE_DEVICE_TABLE(of, of_cmos_match);
 static __init void cmos_of_init(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
-	struct rtc_time time;
-	int ret;
 	const __be32 *val;
 
 	if (!node)
@@ -1285,16 +1274,6 @@ static __init void cmos_of_init(struct platform_device *pdev)
 	val = of_get_property(node, "freq-reg", NULL);
 	if (val)
 		CMOS_WRITE(be32_to_cpup(val), RTC_FREQ_SELECT);
-
-	cmos_read_time(&pdev->dev, &time);
-	ret = rtc_valid_tm(&time);
-	if (ret) {
-		struct rtc_time def_time = {
-			.tm_year = 1,
-			.tm_mday = 1,
-		};
-		cmos_set_time(&pdev->dev, &def_time);
-	}
 }
 #else
 static inline void cmos_of_init(struct platform_device *pdev) {}
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index cfc4141d99cd..2fc517498a5d 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -82,7 +82,7 @@ static int coh901331_read_time(struct device *dev, struct rtc_time *tm)
 	if (readl(rtap->virtbase + COH901331_VALID)) {
 		rtc_time_to_tm(readl(rtap->virtbase + COH901331_CUR_TIME), tm);
 		clk_disable(rtap->clk);
-		return rtc_valid_tm(tm);
+		return 0;
 	}
 	clk_disable(rtap->clk);
 	return -EINVAL;
diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index 513b9bedd2c8..0abf98983e13 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h
@@ -46,11 +46,3 @@ static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 	return NULL;
 }
 #endif
-
-#ifdef CONFIG_RTC_NVMEM
-void rtc_nvmem_register(struct rtc_device *rtc);
-void rtc_nvmem_unregister(struct rtc_device *rtc);
-#else
-static inline void rtc_nvmem_register(struct rtc_device *rtc) {}
-static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
-#endif
diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c
index 3a0333e1f21a..a8856f2b9bc2 100644
--- a/drivers/rtc/rtc-cpcap.c
+++ b/drivers/rtc/rtc-cpcap.c
@@ -119,7 +119,7 @@ static int cpcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	cpcap2rtc_time(tm, &cpcap_tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int cpcap_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index f0ea6899c731..bf7ced095c94 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -197,10 +197,10 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		cros_ec_rtc->saved_alarm = (u32)alarm_time;
 	} else {
 		/* Don't set an alarm in the past. */
-		if ((u32)alarm_time < current_time)
-			alarm_offset = EC_RTC_ALARM_CLEAR;
-		else
-			alarm_offset = (u32)alarm_time - current_time;
+		if ((u32)alarm_time <= current_time)
+			return -ETIME;
+
+		alarm_offset = (u32)alarm_time - current_time;
 	}
 
 	ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset);
diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c
index 4273377562ec..03044e1bc497 100644
--- a/drivers/rtc/rtc-da9052.c
+++ b/drivers/rtc/rtc-da9052.c
@@ -187,8 +187,7 @@ static int da9052_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 			rtc_tm->tm_min  = v[0][1] & DA9052_RTC_MIN;
 			rtc_tm->tm_sec  = v[0][0] & DA9052_RTC_SEC;
 
-			ret = rtc_valid_tm(rtc_tm);
-			return ret;
+			return 0;
 		}
 
 		idx = (1-idx);
diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c
index 678af8648c45..e08cd8130c23 100644
--- a/drivers/rtc/rtc-da9055.c
+++ b/drivers/rtc/rtc-da9055.c
@@ -158,7 +158,7 @@ static int da9055_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 	rtc_tm->tm_min  = v[1] & DA9055_RTC_MIN;
 	rtc_tm->tm_sec  = v[0] & DA9055_RTC_SEC;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int da9055_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-da9063.c b/drivers/rtc/rtc-da9063.c
index f85cae240f12..b4e054c64bad 100644
--- a/drivers/rtc/rtc-da9063.c
+++ b/drivers/rtc/rtc-da9063.c
@@ -256,7 +256,7 @@ static int da9063_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	else
 		rtc->rtc_sync = false;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int da9063_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ds1216.c b/drivers/rtc/rtc-ds1216.c
index 9c82b1da2d45..5f158715fb4c 100644
--- a/drivers/rtc/rtc-ds1216.c
+++ b/drivers/rtc/rtc-ds1216.c
@@ -99,7 +99,7 @@ static int ds1216_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	if (tm->tm_year < 70)
 		tm->tm_year += 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ds1216_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c
index ef75c349dff9..0744916b79c5 100644
--- a/drivers/rtc/rtc-ds1286.c
+++ b/drivers/rtc/rtc-ds1286.c
@@ -211,7 +211,7 @@ static int ds1286_read_time(struct device *dev, struct rtc_time *tm)
 
 	tm->tm_mon--;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ds1286_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index 0ec4be62322b..2a881150d51c 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -43,7 +43,7 @@ static int ds1302_rtc_set_time(struct device *dev, struct rtc_time *time)
 {
 	struct spi_device	*spi = dev_get_drvdata(dev);
 	u8		buf[1 + RTC_CLCK_LEN];
-	u8		*bp = buf;
+	u8		*bp;
 	int		status;
 
 	/* Enable writing */
@@ -98,8 +98,7 @@ static int ds1302_rtc_get_time(struct device *dev, struct rtc_time *time)
 	time->tm_mon = bcd2bin(buf[RTC_ADDR_MON]) - 1;
 	time->tm_year = bcd2bin(buf[RTC_ADDR_YEAR]) + 100;
 
-	/* Time may not be set */
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static const struct rtc_class_ops ds1302_rtc_ops = {
@@ -112,7 +111,7 @@ static int ds1302_probe(struct spi_device *spi)
 	struct rtc_device	*rtc;
 	u8		addr;
 	u8		buf[4];
-	u8		*bp = buf;
+	u8		*bp;
 	int		status;
 
 	/* Sanity check board setup data.  This may be hooked up
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index d8df2e9e14ad..2d502fc85698 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -203,8 +203,7 @@ static int ds1305_get_time(struct device *dev, struct rtc_time *time)
 		time->tm_hour, time->tm_mday,
 		time->tm_mon, time->tm_year, time->tm_wday);
 
-	/* Time may not be set */
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int ds1305_set_time(struct device *dev, struct rtc_time *time)
@@ -544,15 +543,6 @@ static int ds1305_nvram_write(void *priv, unsigned int off, void *buf,
 	return spi_sync(spi, &m);
 }
 
-static struct nvmem_config ds1305_nvmem_cfg = {
-	.name = "ds1305_nvram",
-	.word_size = 1,
-	.stride = 1,
-	.size = DS1305_NVRAM_LEN,
-	.reg_read = ds1305_nvram_read,
-	.reg_write = ds1305_nvram_write,
-};
-
 /*----------------------------------------------------------------------*/
 
 /*
@@ -566,6 +556,14 @@ static int ds1305_probe(struct spi_device *spi)
 	u8				addr, value;
 	struct ds1305_platform_data	*pdata = dev_get_platdata(&spi->dev);
 	bool				write_ctrl = false;
+	struct nvmem_config ds1305_nvmem_cfg = {
+		.name = "ds1305_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = DS1305_NVRAM_LEN,
+		.reg_read = ds1305_nvram_read,
+		.reg_write = ds1305_nvram_write,
+	};
 
 	/* Sanity check board setup data.  This may be hooked up
 	 * in 3wire mode, but we don't care.  Note that unless
@@ -703,15 +701,15 @@ static int ds1305_probe(struct spi_device *spi)
 	ds1305->rtc->ops = &ds1305_ops;
 
 	ds1305_nvmem_cfg.priv = ds1305;
-	ds1305->rtc->nvmem_config = &ds1305_nvmem_cfg;
 	ds1305->rtc->nvram_old_abi = true;
-
 	status = rtc_register_device(ds1305->rtc);
 	if (status) {
 		dev_dbg(&spi->dev, "register rtc --> %d\n", status);
 		return status;
 	}
 
+	rtc_nvmem_register(ds1305->rtc, &ds1305_nvmem_cfg);
+
 	/* Maybe set up alarm IRQ; be ready to handle it triggering right
 	 * away.  NOTE that we don't share this.  The signal is active low,
 	 * and we can't ack it before a SPI message delay.  We temporarily
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 923dde912f60..a13e59edff53 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -114,7 +114,6 @@ enum ds_type {
 #	define RX8025_BIT_XST		0x20
 
 struct ds1307 {
-	struct nvmem_config	nvmem_cfg;
 	enum ds_type		type;
 	unsigned long		flags;
 #define HAS_NVRAM	0		/* bit 0 == sysfs file active */
@@ -438,8 +437,7 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
 		t->tm_hour, t->tm_mday,
 		t->tm_mon, t->tm_year, t->tm_wday);
 
-	/* initial clock setting can be undefined */
-	return rtc_valid_tm(t);
+	return 0;
 }
 
 static int ds1307_set_time(struct device *dev, struct rtc_time *t)
@@ -1696,24 +1694,26 @@ read_rtc:
 		}
 	}
 
-	if (chip->nvram_size) {
-		ds1307->nvmem_cfg.name = "ds1307_nvram";
-		ds1307->nvmem_cfg.word_size = 1;
-		ds1307->nvmem_cfg.stride = 1;
-		ds1307->nvmem_cfg.size = chip->nvram_size;
-		ds1307->nvmem_cfg.reg_read = ds1307_nvram_read;
-		ds1307->nvmem_cfg.reg_write = ds1307_nvram_write;
-		ds1307->nvmem_cfg.priv = ds1307;
-
-		ds1307->rtc->nvmem_config = &ds1307->nvmem_cfg;
-		ds1307->rtc->nvram_old_abi = true;
-	}
-
 	ds1307->rtc->ops = chip->rtc_ops ?: &ds13xx_rtc_ops;
 	err = rtc_register_device(ds1307->rtc);
 	if (err)
 		return err;
 
+	if (chip->nvram_size) {
+		struct nvmem_config nvmem_cfg = {
+			.name = "ds1307_nvram",
+			.word_size = 1,
+			.stride = 1,
+			.size = chip->nvram_size,
+			.reg_read = ds1307_nvram_read,
+			.reg_write = ds1307_nvram_write,
+			.priv = ds1307,
+		};
+
+		ds1307->rtc->nvram_old_abi = true;
+		rtc_nvmem_register(ds1307->rtc, &nvmem_cfg);
+	}
+
 	ds1307_hwmon_register(ds1307);
 	ds1307_clks_register(ds1307);
 
diff --git a/drivers/rtc/rtc-ds1343.c b/drivers/rtc/rtc-ds1343.c
index 895fbeeb47fe..5208da4cf94a 100644
--- a/drivers/rtc/rtc-ds1343.c
+++ b/drivers/rtc/rtc-ds1343.c
@@ -153,120 +153,22 @@ static ssize_t ds1343_store_glitchfilter(struct device *dev,
 static DEVICE_ATTR(glitch_filter, S_IRUGO | S_IWUSR, ds1343_show_glitchfilter,
 			ds1343_store_glitchfilter);
 
-static ssize_t ds1343_nvram_write(struct file *filp, struct kobject *kobj,
-			struct bin_attribute *attr,
-			char *buf, loff_t off, size_t count)
+static int ds1343_nvram_write(void *priv, unsigned int off, void *val,
+			      size_t bytes)
 {
-	int ret;
-	unsigned char address;
-	struct device *dev = kobj_to_dev(kobj);
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-
-	address = DS1343_NVRAM + off;
-
-	ret = regmap_bulk_write(priv->map, address, buf, count);
-	if (ret < 0)
-		dev_err(&priv->spi->dev, "Error in nvram write %d", ret);
+	struct ds1343_priv *ds1343 = priv;
 
-	return (ret < 0) ? ret : count;
+	return regmap_bulk_write(ds1343->map, DS1343_NVRAM + off, val, bytes);
 }
 
-
-static ssize_t ds1343_nvram_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *attr,
-				char *buf, loff_t off, size_t count)
+static int ds1343_nvram_read(void *priv, unsigned int off, void *val,
+			     size_t bytes)
 {
-	int ret;
-	unsigned char address;
-	struct device *dev = kobj_to_dev(kobj);
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
+	struct ds1343_priv *ds1343 = priv;
 
-	address = DS1343_NVRAM + off;
-
-	ret = regmap_bulk_read(priv->map, address, buf, count);
-	if (ret < 0)
-		dev_err(&priv->spi->dev, "Error in nvram read %d\n", ret);
-
-	return (ret < 0) ? ret : count;
+	return regmap_bulk_read(ds1343->map, DS1343_NVRAM + off, val, bytes);
 }
 
-
-static struct bin_attribute nvram_attr = {
-	.attr.name	= "nvram",
-	.attr.mode	= S_IRUGO | S_IWUSR,
-	.read		= ds1343_nvram_read,
-	.write		= ds1343_nvram_write,
-	.size		= DS1343_NVRAM_LEN,
-};
-
-static ssize_t ds1343_show_alarmstatus(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-	int alarmstatus, data;
-
-	regmap_read(priv->map, DS1343_CONTROL_REG, &data);
-
-	alarmstatus = !!(data & DS1343_A0IE);
-
-	if (alarmstatus)
-		return sprintf(buf, "enabled\n");
-	else
-		return sprintf(buf, "disabled\n");
-}
-
-static DEVICE_ATTR(alarm_status, S_IRUGO, ds1343_show_alarmstatus, NULL);
-
-static ssize_t ds1343_show_alarmmode(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-	int alarm_mode, data;
-	char *alarm_str;
-
-	regmap_read(priv->map, DS1343_ALM0_SEC_REG, &data);
-	alarm_mode = (data & 0x80) >> 4;
-
-	regmap_read(priv->map, DS1343_ALM0_MIN_REG, &data);
-	alarm_mode |= (data & 0x80) >> 5;
-
-	regmap_read(priv->map, DS1343_ALM0_HOUR_REG, &data);
-	alarm_mode |= (data & 0x80) >> 6;
-
-	regmap_read(priv->map, DS1343_ALM0_DAY_REG, &data);
-	alarm_mode |= (data & 0x80) >> 7;
-
-	switch (alarm_mode) {
-	case 15:
-		alarm_str = "each second";
-		break;
-
-	case 7:
-		alarm_str = "seconds match";
-		break;
-
-	case 3:
-		alarm_str = "minutes and seconds match";
-		break;
-
-	case 1:
-		alarm_str = "hours, minutes and seconds match";
-		break;
-
-	case 0:
-		alarm_str = "day, hours, minutes and seconds match";
-		break;
-
-	default:
-		alarm_str = "invalid";
-		break;
-	}
-
-	return sprintf(buf, "%s\n", alarm_str);
-}
-
-static DEVICE_ATTR(alarm_mode, S_IRUGO, ds1343_show_alarmmode, NULL);
-
 static ssize_t ds1343_show_tricklecharger(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
@@ -313,7 +215,6 @@ static DEVICE_ATTR(trickle_charger, S_IRUGO, ds1343_show_tricklecharger, NULL);
 
 static int ds1343_sysfs_register(struct device *dev)
 {
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
 	int err;
 
 	err = device_create_file(dev, &dev_attr_glitch_filter);
@@ -321,33 +222,9 @@ static int ds1343_sysfs_register(struct device *dev)
 		return err;
 
 	err = device_create_file(dev, &dev_attr_trickle_charger);
-	if (err)
-		goto error1;
-
-	err = device_create_bin_file(dev, &nvram_attr);
-	if (err)
-		goto error2;
-
-	if (priv->irq <= 0)
-		return err;
-
-	err = device_create_file(dev, &dev_attr_alarm_mode);
-	if (err)
-		goto error3;
-
-	err = device_create_file(dev, &dev_attr_alarm_status);
 	if (!err)
-		return err;
+		return 0;
 
-	device_remove_file(dev, &dev_attr_alarm_mode);
-
-error3:
-	device_remove_bin_file(dev, &nvram_attr);
-
-error2:
-	device_remove_file(dev, &dev_attr_trickle_charger);
-
-error1:
 	device_remove_file(dev, &dev_attr_glitch_filter);
 
 	return err;
@@ -355,17 +232,8 @@ error1:
 
 static void ds1343_sysfs_unregister(struct device *dev)
 {
-	struct ds1343_priv *priv = dev_get_drvdata(dev);
-
 	device_remove_file(dev, &dev_attr_glitch_filter);
 	device_remove_file(dev, &dev_attr_trickle_charger);
-	device_remove_bin_file(dev, &nvram_attr);
-
-	if (priv->irq <= 0)
-		return;
-
-	device_remove_file(dev, &dev_attr_alarm_status);
-	device_remove_file(dev, &dev_attr_alarm_mode);
 }
 
 static int ds1343_read_time(struct device *dev, struct rtc_time *dt)
@@ -386,7 +254,7 @@ static int ds1343_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_mon	= bcd2bin(buf[5] & 0x1F) - 1;
 	dt->tm_year	= bcd2bin(buf[6]) + 100; /* year offset from 1900 */
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds1343_set_time(struct device *dev, struct rtc_time *dt)
@@ -599,14 +467,18 @@ static const struct rtc_class_ops ds1343_rtc_ops = {
 static int ds1343_probe(struct spi_device *spi)
 {
 	struct ds1343_priv *priv;
-	struct regmap_config config;
+	struct regmap_config config = { .reg_bits = 8, .val_bits = 8,
+					.write_flag_mask = 0x80, };
 	unsigned int data;
 	int res;
-
-	memset(&config, 0, sizeof(config));
-	config.reg_bits = 8;
-	config.val_bits = 8;
-	config.write_flag_mask = 0x80;
+	struct nvmem_config nvmem_cfg = {
+		.name = "ds1343-",
+		.word_size = 1,
+		.stride = 1,
+		.size = DS1343_NVRAM_LEN,
+		.reg_read = ds1343_nvram_read,
+		.reg_write = ds1343_nvram_write,
+	};
 
 	priv = devm_kzalloc(&spi->dev, sizeof(struct ds1343_priv), GFP_KERNEL);
 	if (!priv)
@@ -646,12 +518,19 @@ static int ds1343_probe(struct spi_device *spi)
 	data &= ~(DS1343_OSF | DS1343_IRQF1 | DS1343_IRQF0);
 	regmap_write(priv->map, DS1343_STATUS_REG, data);
 
-	priv->rtc = devm_rtc_device_register(&spi->dev, "ds1343",
-					&ds1343_rtc_ops, THIS_MODULE);
-	if (IS_ERR(priv->rtc)) {
-		dev_err(&spi->dev, "unable to register rtc ds1343\n");
+	priv->rtc = devm_rtc_allocate_device(&spi->dev);
+	if (IS_ERR(priv->rtc))
 		return PTR_ERR(priv->rtc);
-	}
+
+	priv->rtc->nvram_old_abi = true;
+	priv->rtc->ops = &ds1343_rtc_ops;
+
+	res = rtc_register_device(priv->rtc);
+	if (res)
+		return res;
+
+	nvmem_cfg.priv = priv;
+	rtc_nvmem_register(priv->rtc, &nvmem_cfg);
 
 	priv->irq = spi->irq;
 
diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c
index ccfc9d43eb1e..938512c676ee 100644
--- a/drivers/rtc/rtc-ds1347.c
+++ b/drivers/rtc/rtc-ds1347.c
@@ -66,7 +66,7 @@ static int ds1347_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_wday = bcd2bin(buf[5]) - 1;
 	dt->tm_year = bcd2bin(buf[6]) + 100;
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds1347_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index 4d5b007d7fc6..3b095401f848 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -153,7 +153,7 @@ static int ds1390_read_time(struct device *dev, struct rtc_time *dt)
 	/* adjust for century bit */
 	dt->tm_year = bcd2bin(chip->txrx_buf[6]) + ((chip->txrx_buf[5] & 0x80) ? 100 : 0);
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds1390_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 1e95312a6f2e..a7d5ca428d68 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -277,10 +277,6 @@ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
 
 	rtc_tm->tm_mon--;
 
-	if (rtc_valid_tm(rtc_tm) < 0) {
-		dev_err(dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, rtc_tm);
-	}
 	return 0;
 }
 
@@ -422,20 +418,20 @@ static int ds1511_nvram_write(void *priv, unsigned int pos, void *buf,
 	return 0;
 }
 
-static struct nvmem_config ds1511_nvmem_cfg = {
-	.name = "ds1511_nvram",
-	.word_size = 1,
-	.stride = 1,
-	.size = DS1511_RAM_MAX,
-	.reg_read = ds1511_nvram_read,
-	.reg_write = ds1511_nvram_write,
-};
-
 static int ds1511_rtc_probe(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct rtc_plat_data *pdata;
 	int ret = 0;
+	struct nvmem_config ds1511_nvmem_cfg = {
+		.name = "ds1511_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = DS1511_RAM_MAX,
+		.reg_read = ds1511_nvram_read,
+		.reg_write = ds1511_nvram_write,
+		.priv = &pdev->dev,
+	};
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -478,14 +474,14 @@ static int ds1511_rtc_probe(struct platform_device *pdev)
 
 	pdata->rtc->ops = &ds1511_rtc_ops;
 
-	ds1511_nvmem_cfg.priv = &pdev->dev;
-	pdata->rtc->nvmem_config = &ds1511_nvmem_cfg;
 	pdata->rtc->nvram_old_abi = true;
 
 	ret = rtc_register_device(pdata->rtc);
 	if (ret)
 		return ret;
 
+	rtc_nvmem_register(pdata->rtc, &ds1511_nvmem_cfg);
+
 	/*
 	 * if the platform has an interrupt in mind for this device,
 	 * then by all means, set it
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 9961ec646fd2..2441b9a2b366 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -127,10 +127,6 @@ static int ds1553_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* year is 1900 + tm->tm_year */
 	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-	if (rtc_valid_tm(tm) < 0) {
-		dev_err(dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, tm);
-	}
 	return 0;
 }
 
@@ -233,46 +229,32 @@ static const struct rtc_class_ops ds1553_rtc_ops = {
 	.alarm_irq_enable	= ds1553_rtc_alarm_irq_enable,
 };
 
-static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *bin_attr,
-				 char *buf, loff_t pos, size_t size)
+static int ds1553_nvram_read(void *priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		*buf++ = readb(ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static ssize_t ds1553_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t pos, size_t size)
+static int ds1553_nvram_write(void *priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
 	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		writeb(*buf++, ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static struct bin_attribute ds1553_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = RTC_OFFSET,
-	.read = ds1553_nvram_read,
-	.write = ds1553_nvram_write,
-};
-
 static int ds1553_rtc_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -280,6 +262,15 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
 	struct rtc_plat_data *pdata;
 	void __iomem *ioaddr;
 	int ret = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "ds1553_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = RTC_OFFSET,
+		.reg_read = ds1553_nvram_read,
+		.reg_write = ds1553_nvram_write,
+		.priv = pdev,
+	};
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -308,11 +299,17 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
 	pdata->last_jiffies = jiffies;
 	platform_set_drvdata(pdev, pdata);
 
-	pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &ds1553_rtc_ops, THIS_MODULE);
+	pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(pdata->rtc))
 		return PTR_ERR(pdata->rtc);
 
+	pdata->rtc->ops = &ds1553_rtc_ops;
+	pdata->rtc->nvram_old_abi = true;
+
+	ret = rtc_register_device(pdata->rtc);
+	if (ret)
+		return ret;
+
 	if (pdata->irq > 0) {
 		writeb(0, ioaddr + RTC_INTERRUPTS);
 		if (devm_request_irq(&pdev->dev, pdata->irq,
@@ -323,21 +320,9 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
 		}
 	}
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
-	if (ret)
-		dev_err(&pdev->dev, "unable to create sysfs file: %s\n",
-			ds1553_nvram_attr.attr.name);
-
-	return 0;
-}
-
-static int ds1553_rtc_remove(struct platform_device *pdev)
-{
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	if (rtc_nvmem_register(pdata->rtc, &nvmem_cfg))
+		dev_err(&pdev->dev, "unable to register nvmem\n");
 
-	sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
-	if (pdata->irq > 0)
-		writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
 	return 0;
 }
 
@@ -346,7 +331,6 @@ MODULE_ALIAS("platform:rtc-ds1553");
 
 static struct platform_driver ds1553_rtc_driver = {
 	.probe		= ds1553_rtc_probe,
-	.remove		= ds1553_rtc_remove,
 	.driver		= {
 		.name	= "rtc-ds1553",
 	},
diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
index ed43b4311660..1a39829d2b40 100644
--- a/drivers/rtc/rtc-ds1685.c
+++ b/drivers/rtc/rtc-ds1685.c
@@ -306,7 +306,7 @@ ds1685_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_yday  = rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year);
 	tm->tm_isdst = 0; /* RTC has hardcoded timezone, so don't use. */
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 /**
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 3abf1cbfb8ce..2d781180e968 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -53,9 +53,7 @@
 struct rtc_plat_data {
 	void __iomem *ioaddr_nvram;
 	void __iomem *ioaddr_rtc;
-	size_t size_nvram;
 	unsigned long last_jiffies;
-	struct bin_attribute nvram_attr;
 };
 
 static int ds1742_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -114,7 +112,7 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* year is 1900 + tm->tm_year */
 	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct rtc_class_ops ds1742_rtc_ops = {
@@ -122,34 +120,28 @@ static const struct rtc_class_ops ds1742_rtc_ops = {
 	.set_time	= ds1742_rtc_set_time,
 };
 
-static ssize_t ds1742_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *bin_attr,
-				 char *buf, loff_t pos, size_t size)
+static int ds1742_nvram_read(void *priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr_nvram;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		*buf++ = readb(ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static ssize_t ds1742_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t pos, size_t size)
+static int ds1742_nvram_write(void *priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr_nvram;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		writeb(*buf++, ioaddr + pos++);
-	return count;
+	return 0;
 }
 
 static int ds1742_rtc_probe(struct platform_device *pdev)
@@ -160,6 +152,14 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 	struct rtc_plat_data *pdata;
 	void __iomem *ioaddr;
 	int ret = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "ds1742_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.reg_read = ds1742_nvram_read,
+		.reg_write = ds1742_nvram_write,
+	};
+
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -171,15 +171,10 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 		return PTR_ERR(ioaddr);
 
 	pdata->ioaddr_nvram = ioaddr;
-	pdata->size_nvram = resource_size(res) - RTC_SIZE;
-	pdata->ioaddr_rtc = ioaddr + pdata->size_nvram;
+	pdata->ioaddr_rtc = ioaddr + resource_size(res) - RTC_SIZE;
 
-	sysfs_bin_attr_init(&pdata->nvram_attr);
-	pdata->nvram_attr.attr.name = "nvram";
-	pdata->nvram_attr.attr.mode = S_IRUGO | S_IWUSR;
-	pdata->nvram_attr.read = ds1742_nvram_read;
-	pdata->nvram_attr.write = ds1742_nvram_write;
-	pdata->nvram_attr.size = pdata->size_nvram;
+	nvmem_cfg.size = resource_size(res) - RTC_SIZE;
+	nvmem_cfg.priv = pdata;
 
 	/* turn RTC on if it was not on */
 	ioaddr = pdata->ioaddr_rtc;
@@ -196,24 +191,21 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 
 	pdata->last_jiffies = jiffies;
 	platform_set_drvdata(pdev, pdata);
-	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &ds1742_rtc_ops, THIS_MODULE);
+
+	rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
-	if (ret)
-		dev_err(&pdev->dev, "Unable to create sysfs entry: %s\n",
-			pdata->nvram_attr.attr.name);
+	rtc->ops = &ds1742_rtc_ops;
+	rtc->nvram_old_abi = true;
 
-	return 0;
-}
+	ret = rtc_register_device(rtc);
+	if (ret)
+		return ret;
 
-static int ds1742_rtc_remove(struct platform_device *pdev)
-{
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	if (rtc_nvmem_register(rtc, &nvmem_cfg))
+		dev_err(&pdev->dev, "Unable to register nvmem\n");
 
-	sysfs_remove_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
 	return 0;
 }
 
@@ -225,7 +217,6 @@ MODULE_DEVICE_TABLE(of, ds1742_rtc_of_match);
 
 static struct platform_driver ds1742_rtc_driver = {
 	.probe		= ds1742_rtc_probe,
-	.remove		= ds1742_rtc_remove,
 	.driver		= {
 		.name	= "rtc-ds1742",
 		.of_match_table = of_match_ptr(ds1742_rtc_of_match),
diff --git a/drivers/rtc/rtc-ds2404.c b/drivers/rtc/rtc-ds2404.c
index 9a1582ed7070..b886b6a5c178 100644
--- a/drivers/rtc/rtc-ds2404.c
+++ b/drivers/rtc/rtc-ds2404.c
@@ -207,7 +207,7 @@ static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
 	time = le32_to_cpu(time);
 
 	rtc_time_to_tm(time, dt);
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int ds2404_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-ds3232.c b/drivers/rtc/rtc-ds3232.c
index 0550f7ba464f..7184e5145f12 100644
--- a/drivers/rtc/rtc-ds3232.c
+++ b/drivers/rtc/rtc-ds3232.c
@@ -145,7 +145,7 @@ static int ds3232_read_time(struct device *dev, struct rtc_time *time)
 
 	time->tm_year = bcd2bin(year) + add_century;
 
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int ds3232_set_time(struct device *dev, struct rtc_time *time)
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index 0130afd7fe88..3454e7814524 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -176,7 +176,7 @@ static int efi_read_time(struct device *dev, struct rtc_time *tm)
 	if (!convert_from_efi_time(&eft, tm))
 		return -EIO;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int efi_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index 576eadbba296..e1137670d4d2 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -136,8 +136,7 @@ static int fm3130_get_time(struct device *dev, struct rtc_time *t)
 		t->tm_hour, t->tm_mday,
 		t->tm_mon, t->tm_year, t->tm_wday);
 
-	/* initial clock setting can be undefined */
-	return rtc_valid_tm(t);
+	return 0;
 }
 
 
diff --git a/drivers/rtc/rtc-goldfish.c b/drivers/rtc/rtc-goldfish.c
index d67769265185..a1c44d0c8557 100644
--- a/drivers/rtc/rtc-goldfish.c
+++ b/drivers/rtc/rtc-goldfish.c
@@ -235,3 +235,5 @@ static struct platform_driver goldfish_rtc = {
 };
 
 module_platform_driver(goldfish_rtc);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index 38586a024ee8..890ccfc9e5aa 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -104,8 +104,9 @@ static int isl12022_write_reg(struct i2c_client *client,
  * In the routines that deal directly with the isl12022 hardware, we use
  * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
  */
-static int isl12022_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	uint8_t buf[ISL12022_REG_INT + 1];
 	int ret;
 
@@ -149,11 +150,12 @@ static int isl12022_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct isl12022 *isl12022 = i2c_get_clientdata(client);
 	size_t i;
 	int ret;
@@ -199,7 +201,7 @@ static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 				return ret;
 		}
 
-		isl12022->write_enabled = 1;
+		isl12022->write_enabled = true;
 	}
 
 	/* hours, minutes and seconds */
@@ -228,16 +230,6 @@ static int isl12022_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	return 0;
 }
 
-static int isl12022_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return isl12022_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int isl12022_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return isl12022_set_datetime(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops isl12022_rtc_ops = {
 	.read_time	= isl12022_rtc_read_time,
 	.set_time	= isl12022_rtc_set_time,
diff --git a/drivers/rtc/rtc-isl12026.c b/drivers/rtc/rtc-isl12026.c
new file mode 100644
index 000000000000..97f594f9667c
--- /dev/null
+++ b/drivers/rtc/rtc-isl12026.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * An I2C driver for the Intersil ISL 12026
+ *
+ * Copyright (c) 2018 Cavium, Inc.
+ */
+#include <linux/bcd.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/rtc.h>
+#include <linux/slab.h>
+
+/* register offsets */
+#define ISL12026_REG_PWR	0x14
+# define ISL12026_REG_PWR_BSW	BIT(6)
+# define ISL12026_REG_PWR_SBIB	BIT(7)
+#define ISL12026_REG_SC		0x30
+#define ISL12026_REG_HR		0x32
+# define ISL12026_REG_HR_MIL	BIT(7)	/* military or 24 hour time */
+#define ISL12026_REG_SR		0x3f
+# define ISL12026_REG_SR_RTCF	BIT(0)
+# define ISL12026_REG_SR_WEL	BIT(1)
+# define ISL12026_REG_SR_RWEL	BIT(2)
+# define ISL12026_REG_SR_MBZ	BIT(3)
+# define ISL12026_REG_SR_OSCF	BIT(4)
+
+/* The EEPROM array responds at i2c address 0x57 */
+#define ISL12026_EEPROM_ADDR	0x57
+
+#define ISL12026_PAGESIZE 16
+#define ISL12026_NVMEM_WRITE_TIME 20
+
+struct isl12026 {
+	struct rtc_device *rtc;
+	struct i2c_client *nvm_client;
+};
+
+static int isl12026_read_reg(struct i2c_client *client, int reg)
+{
+	u8 addr[] = {0, reg};
+	u8 val;
+	int ret;
+
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= client->addr,
+			.flags	= 0,
+			.len	= sizeof(addr),
+			.buf	= addr
+		}, {
+			.addr	= client->addr,
+			.flags	= I2C_M_RD,
+			.len	= 1,
+			.buf	= &val
+		}
+	};
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&client->dev, "read reg error, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+	} else {
+		ret = val;
+	}
+
+	return ret;
+}
+
+static int isl12026_arm_write(struct i2c_client *client)
+{
+	int ret;
+	u8 op[3];
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= 1,
+		.buf	= op
+	};
+
+	/* Set SR.WEL */
+	op[0] = 0;
+	op[1] = ISL12026_REG_SR;
+	op[2] = ISL12026_REG_SR_WEL;
+	msg.len = 3;
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev, "write error SR.WEL, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	/* Set SR.WEL and SR.RWEL */
+	op[2] = ISL12026_REG_SR_WEL | ISL12026_REG_SR_RWEL;
+	msg.len = 3;
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev,
+			"write error SR.WEL|SR.RWEL, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	} else {
+		ret = 0;
+	}
+out:
+	return ret;
+}
+
+static int isl12026_disarm_write(struct i2c_client *client)
+{
+	int ret;
+	u8 op[3] = {0, ISL12026_REG_SR, 0};
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= sizeof(op),
+		.buf	= op
+	};
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev,
+			"write error SR, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+	} else {
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int isl12026_write_reg(struct i2c_client *client, int reg, u8 val)
+{
+	int ret;
+	u8 op[3] = {0, reg, val};
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= sizeof(op),
+		.buf	= op
+	};
+
+	ret = isl12026_arm_write(client);
+	if (ret)
+		return ret;
+
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev, "write error CCR, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	msleep(ISL12026_NVMEM_WRITE_TIME);
+
+	ret = isl12026_disarm_write(client);
+out:
+	return ret;
+}
+
+static int isl12026_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret;
+	u8 op[10];
+	struct i2c_msg msg = {
+		.addr	= client->addr,
+		.flags	= 0,
+		.len	= sizeof(op),
+		.buf	= op
+	};
+
+	ret = isl12026_arm_write(client);
+	if (ret)
+		return ret;
+
+	/* Set the CCR registers */
+	op[0] = 0;
+	op[1] = ISL12026_REG_SC;
+	op[2] = bin2bcd(tm->tm_sec); /* SC */
+	op[3] = bin2bcd(tm->tm_min); /* MN */
+	op[4] = bin2bcd(tm->tm_hour) | ISL12026_REG_HR_MIL; /* HR */
+	op[5] = bin2bcd(tm->tm_mday); /* DT */
+	op[6] = bin2bcd(tm->tm_mon + 1); /* MO */
+	op[7] = bin2bcd(tm->tm_year % 100); /* YR */
+	op[8] = bin2bcd(tm->tm_wday & 7); /* DW */
+	op[9] = bin2bcd(tm->tm_year >= 100 ? 20 : 19); /* Y2K */
+	ret = i2c_transfer(client->adapter, &msg, 1);
+	if (ret != 1) {
+		dev_err(&client->dev, "write error CCR, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	ret = isl12026_disarm_write(client);
+out:
+	return ret;
+}
+
+static int isl12026_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	u8 ccr[8];
+	u8 addr[2];
+	u8 sr;
+	int ret;
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= client->addr,
+			.flags	= 0,
+			.len	= sizeof(addr),
+			.buf	= addr
+		}, {
+			.addr	= client->addr,
+			.flags	= I2C_M_RD,
+		}
+	};
+
+	/* First, read SR */
+	addr[0] = 0;
+	addr[1] = ISL12026_REG_SR;
+	msgs[1].len = 1;
+	msgs[1].buf = &sr;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&client->dev, "read error, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	if (sr & ISL12026_REG_SR_RTCF)
+		dev_warn(&client->dev, "Real-Time Clock Failure on read\n");
+	if (sr & ISL12026_REG_SR_OSCF)
+		dev_warn(&client->dev, "Oscillator Failure on read\n");
+
+	/* Second, CCR regs */
+	addr[0] = 0;
+	addr[1] = ISL12026_REG_SC;
+	msgs[1].len = sizeof(ccr);
+	msgs[1].buf = ccr;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&client->dev, "read error, ret=%d\n", ret);
+		ret = ret < 0 ? ret : -EIO;
+		goto out;
+	}
+
+	tm->tm_sec = bcd2bin(ccr[0] & 0x7F);
+	tm->tm_min = bcd2bin(ccr[1] & 0x7F);
+	if (ccr[2] & ISL12026_REG_HR_MIL)
+		tm->tm_hour = bcd2bin(ccr[2] & 0x3F);
+	else
+		tm->tm_hour = bcd2bin(ccr[2] & 0x1F) +
+			((ccr[2] & 0x20) ? 12 : 0);
+	tm->tm_mday = bcd2bin(ccr[3] & 0x3F);
+	tm->tm_mon = bcd2bin(ccr[4] & 0x1F) - 1;
+	tm->tm_year = bcd2bin(ccr[5]);
+	if (bcd2bin(ccr[7]) == 20)
+		tm->tm_year += 100;
+	tm->tm_wday = ccr[6] & 0x07;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static const struct rtc_class_ops isl12026_rtc_ops = {
+	.read_time	= isl12026_rtc_read_time,
+	.set_time	= isl12026_rtc_set_time,
+};
+
+static int isl12026_nvm_read(void *p, unsigned int offset,
+			     void *val, size_t bytes)
+{
+	struct isl12026 *priv = p;
+	int ret;
+	u8 addr[2];
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= priv->nvm_client->addr,
+			.flags	= 0,
+			.len	= sizeof(addr),
+			.buf	= addr
+		}, {
+			.addr	= priv->nvm_client->addr,
+			.flags	= I2C_M_RD,
+			.buf	= val
+		}
+	};
+
+	/*
+	 * offset and bytes checked and limited by nvmem core, so
+	 * proceed without further checks.
+	 */
+	ret = mutex_lock_interruptible(&priv->rtc->ops_lock);
+	if (ret)
+		return ret;
+
+	/* 2 bytes of address, most significant first */
+	addr[0] = offset >> 8;
+	addr[1] = offset;
+	msgs[1].len = bytes;
+	ret = i2c_transfer(priv->nvm_client->adapter, msgs, ARRAY_SIZE(msgs));
+
+	mutex_unlock(&priv->rtc->ops_lock);
+
+	if (ret != ARRAY_SIZE(msgs)) {
+		dev_err(&priv->nvm_client->dev,
+			"nvmem read error, ret=%d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	return 0;
+}
+
+static int isl12026_nvm_write(void *p, unsigned int offset,
+			      void *val, size_t bytes)
+{
+	struct isl12026 *priv = p;
+	int ret;
+	u8 *v = val;
+	size_t chunk_size, num_written;
+	u8 payload[ISL12026_PAGESIZE + 2]; /* page + 2 address bytes */
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= priv->nvm_client->addr,
+			.flags	= 0,
+			.buf	= payload
+		}
+	};
+
+	/*
+	 * offset and bytes checked and limited by nvmem core, so
+	 * proceed without further checks.
+	 */
+	ret = mutex_lock_interruptible(&priv->rtc->ops_lock);
+	if (ret)
+		return ret;
+
+	num_written = 0;
+	while (bytes) {
+		chunk_size = round_down(offset, ISL12026_PAGESIZE) +
+			ISL12026_PAGESIZE - offset;
+		chunk_size = min(bytes, chunk_size);
+		/*
+		 * 2 bytes of address, most significant first, followed
+		 * by page data bytes
+		 */
+		memcpy(payload + 2, v + num_written, chunk_size);
+		payload[0] = offset >> 8;
+		payload[1] = offset;
+		msgs[0].len = chunk_size + 2;
+		ret = i2c_transfer(priv->nvm_client->adapter,
+				   msgs, ARRAY_SIZE(msgs));
+		if (ret != ARRAY_SIZE(msgs)) {
+			dev_err(&priv->nvm_client->dev,
+				"nvmem write error, ret=%d\n", ret);
+			ret = ret < 0 ? ret : -EIO;
+			break;
+		}
+		ret = 0;
+		bytes -= chunk_size;
+		offset += chunk_size;
+		num_written += chunk_size;
+		msleep(ISL12026_NVMEM_WRITE_TIME);
+	}
+
+	mutex_unlock(&priv->rtc->ops_lock);
+
+	return ret;
+}
+
+static void isl12026_force_power_modes(struct i2c_client *client)
+{
+	int ret;
+	int pwr, requested_pwr;
+	u32 bsw_val, sbib_val;
+	bool set_bsw, set_sbib;
+
+	/*
+	 * If we can read the of_property, set the specified value.
+	 * If there is an error reading the of_property (likely
+	 * because it does not exist), keep the current value.
+	 */
+	ret = of_property_read_u32(client->dev.of_node,
+				   "isil,pwr-bsw", &bsw_val);
+	set_bsw = (ret == 0);
+
+	ret = of_property_read_u32(client->dev.of_node,
+				   "isil,pwr-sbib", &sbib_val);
+	set_sbib = (ret == 0);
+
+	/* Check if PWR.BSW and/or PWR.SBIB need specified values */
+	if (!set_bsw && !set_sbib)
+		return;
+
+	pwr = isl12026_read_reg(client, ISL12026_REG_PWR);
+	if (pwr < 0) {
+		dev_warn(&client->dev, "Error: Failed to read PWR %d\n", pwr);
+		return;
+	}
+
+	requested_pwr = pwr;
+
+	if (set_bsw) {
+		if (bsw_val)
+			requested_pwr |= ISL12026_REG_PWR_BSW;
+		else
+			requested_pwr &= ~ISL12026_REG_PWR_BSW;
+	} /* else keep current BSW */
+
+	if (set_sbib) {
+		if (sbib_val)
+			requested_pwr |= ISL12026_REG_PWR_SBIB;
+		else
+			requested_pwr &= ~ISL12026_REG_PWR_SBIB;
+	} /* else keep current SBIB */
+
+	if (pwr >= 0 && pwr != requested_pwr) {
+		dev_dbg(&client->dev, "PWR: %02x\n", pwr);
+		dev_dbg(&client->dev, "Updating PWR to: %02x\n", requested_pwr);
+		isl12026_write_reg(client, ISL12026_REG_PWR, requested_pwr);
+	}
+}
+
+static int isl12026_probe_new(struct i2c_client *client)
+{
+	struct isl12026 *priv;
+	int ret;
+	struct nvmem_config nvm_cfg = {
+		.name = "isl12026-",
+		.base_dev = &client->dev,
+		.stride = 1,
+		.word_size = 1,
+		.size = 512,
+		.reg_read = isl12026_nvm_read,
+		.reg_write = isl12026_nvm_write,
+	};
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -ENODEV;
+
+	priv = devm_kzalloc(&client->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, priv);
+
+	isl12026_force_power_modes(client);
+
+	priv->nvm_client = i2c_new_dummy(client->adapter, ISL12026_EEPROM_ADDR);
+	if (!priv->nvm_client)
+		return -ENOMEM;
+
+	priv->rtc = devm_rtc_allocate_device(&client->dev);
+	ret = PTR_ERR_OR_ZERO(priv->rtc);
+	if (ret)
+		return ret;
+
+	priv->rtc->ops = &isl12026_rtc_ops;
+	nvm_cfg.priv = priv;
+	ret = rtc_nvmem_register(priv->rtc, &nvm_cfg);
+	if (ret)
+		return ret;
+
+	return rtc_register_device(priv->rtc);
+}
+
+static int isl12026_remove(struct i2c_client *client)
+{
+	struct isl12026 *priv = i2c_get_clientdata(client);
+
+	i2c_unregister_device(priv->nvm_client);
+	return 0;
+}
+
+static const struct of_device_id isl12026_dt_match[] = {
+	{ .compatible = "isil,isl12026" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, isl12026_dt_match);
+
+static struct i2c_driver isl12026_driver = {
+	.driver		= {
+		.name	= "rtc-isl12026",
+		.of_match_table = isl12026_dt_match,
+	},
+	.probe_new	= isl12026_probe_new,
+	.remove		= isl12026_remove,
+};
+
+module_i2c_driver(isl12026_driver);
+
+MODULE_DESCRIPTION("ISL 12026 RTC driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index 8dd299c6a1f3..1a2c38cc0178 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -459,6 +459,11 @@ isl1208_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	}
 
 	/* clear WRTC again */
+	sr = isl1208_i2c_get_sr(client);
+	if (sr < 0) {
+		dev_err(&client->dev, "%s: reading SR failed\n", __func__);
+		return sr;
+	}
 	sr = i2c_smbus_write_byte_data(client, ISL1208_REG_SR,
 				       sr & ~ISL1208_REG_SR_WRTC);
 	if (sr < 0) {
@@ -630,29 +635,12 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (isl1208_i2c_validate_client(client) < 0)
 		return -ENODEV;
 
-	if (client->irq > 0) {
-		rc = devm_request_threaded_irq(&client->dev, client->irq, NULL,
-					       isl1208_rtc_interrupt,
-					       IRQF_SHARED | IRQF_ONESHOT,
-					       isl1208_driver.driver.name,
-					       client);
-		if (!rc) {
-			device_init_wakeup(&client->dev, 1);
-			enable_irq_wake(client->irq);
-		} else {
-			dev_err(&client->dev,
-				"Unable to request irq %d, no alarm support\n",
-				client->irq);
-			client->irq = 0;
-		}
-	}
-
-	rtc = devm_rtc_device_register(&client->dev, isl1208_driver.driver.name,
-				  &isl1208_rtc_ops,
-				  THIS_MODULE);
+	rtc = devm_rtc_allocate_device(&client->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
 
+	rtc->ops = &isl1208_rtc_ops;
+
 	i2c_set_clientdata(client, rtc);
 
 	rc = isl1208_i2c_get_sr(client);
@@ -669,7 +657,24 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (rc)
 		return rc;
 
-	return 0;
+	if (client->irq > 0) {
+		rc = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+					       isl1208_rtc_interrupt,
+					       IRQF_SHARED | IRQF_ONESHOT,
+					       isl1208_driver.driver.name,
+					       client);
+		if (!rc) {
+			device_init_wakeup(&client->dev, 1);
+			enable_irq_wake(client->irq);
+		} else {
+			dev_err(&client->dev,
+				"Unable to request irq %d, no alarm support\n",
+				client->irq);
+			client->irq = 0;
+		}
+	}
+
+	return rtc_register_device(rtc);
 }
 
 static int
diff --git a/drivers/rtc/rtc-jz4740.c b/drivers/rtc/rtc-jz4740.c
index ff65a7d2b9c9..d0a891777f44 100644
--- a/drivers/rtc/rtc-jz4740.c
+++ b/drivers/rtc/rtc-jz4740.c
@@ -173,7 +173,7 @@ static int jz4740_rtc_read_time(struct device *dev, struct rtc_time *time)
 
 	rtc_time_to_tm(secs, time);
 
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int jz4740_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
index 1ae7da5cfc60..4a3c0f3aab14 100644
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -52,13 +52,11 @@ EXPORT_SYMBOL(rtc_year_days);
  */
 void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
 {
-	unsigned int month, year;
-	unsigned long secs;
+	unsigned int month, year, secs;
 	int days;
 
 	/* time must be positive */
-	days = div_s64(time, 86400);
-	secs = time - (unsigned int) days * 86400;
+	days = div_s64_rem(time, 86400, &secs);
 
 	/* day of the week, 1970-01-01 was a Thursday */
 	tm->tm_wday = (days + 4) % 7;
@@ -67,7 +65,7 @@ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
 	days -= (year - 1970) * 365
 		+ LEAPS_THRU_END_OF(year - 1)
 		- LEAPS_THRU_END_OF(1970 - 1);
-	if (days < 0) {
+	while (days < 0) {
 		year -= 1;
 		days += 365 + is_leap_year(year);
 	}
diff --git a/drivers/rtc/rtc-lpc24xx.c b/drivers/rtc/rtc-lpc24xx.c
index 59d99596fdeb..14dc7b04fae0 100644
--- a/drivers/rtc/rtc-lpc24xx.c
+++ b/drivers/rtc/rtc-lpc24xx.c
@@ -110,7 +110,7 @@ static int lpc24xx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_year = CT1_YEAR(ct1);
 	tm->tm_yday = CT2_DOY(ct2);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int lpc24xx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *wkalrm)
diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c
index 887871c3d526..3ba87239aacc 100644
--- a/drivers/rtc/rtc-lpc32xx.c
+++ b/drivers/rtc/rtc-lpc32xx.c
@@ -70,7 +70,7 @@ static int lpc32xx_rtc_read_time(struct device *dev, struct rtc_time *time)
 	elapsed_sec = rtc_readl(rtc, LPC32XX_RTC_UCOUNT);
 	rtc_time_to_tm(elapsed_sec, time);
 
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int lpc32xx_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c
index e04ca54f21e2..045af1135e48 100644
--- a/drivers/rtc/rtc-ls1x.c
+++ b/drivers/rtc/rtc-ls1x.c
@@ -98,7 +98,7 @@ static int ls1x_rtc_read_time(struct device *dev, struct rtc_time *rtm)
 			ls1x_get_min(v), ls1x_get_sec(v));
 	rtc_time_to_tm(t, rtm);
 
-	return rtc_valid_tm(rtm);
+	return 0;
 }
 
 static int ls1x_rtc_set_time(struct device *dev, struct  rtc_time *rtm)
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index c90fba3ed861..ad03e2f12f5d 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -73,7 +73,6 @@
 #define M41T80_FEATURE_WD	BIT(3)	/* Extra watchdog resolution */
 #define M41T80_FEATURE_SQ_ALT	BIT(4)	/* RSx bits are in reg 4 */
 
-static DEFINE_MUTEX(m41t80_rtc_mutex);
 static const struct i2c_device_id m41t80_id[] = {
 	{ "m41t62", M41T80_FEATURE_SQ | M41T80_FEATURE_SQ_ALT },
 	{ "m41t65", M41T80_FEATURE_HT | M41T80_FEATURE_WD },
@@ -199,9 +198,9 @@ static irqreturn_t m41t80_handle_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int m41t80_get_datetime(struct i2c_client *client,
-			       struct rtc_time *tm)
+static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char buf[8];
 	int err, flags;
 
@@ -230,12 +229,12 @@ static int m41t80_get_datetime(struct i2c_client *client,
 
 	/* assume 20YY not 19YY, and ignore the Century Bit */
 	tm->tm_year = bcd2bin(buf[M41T80_REG_YEAR]) + 100;
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-/* Sets the given date and time to the real time clock. */
-static int m41t80_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct m41t80_data *clientdata = i2c_get_clientdata(client);
 	unsigned char buf[8];
 	int err, flags;
@@ -298,16 +297,6 @@ static int m41t80_rtc_proc(struct device *dev, struct seq_file *seq)
 	return 0;
 }
 
-static int m41t80_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return m41t80_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int m41t80_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return m41t80_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int m41t80_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client *client = to_i2c_client(dev);
@@ -598,6 +587,7 @@ static struct clk *m41t80_sqw_register_clk(struct m41t80_data *m41t80)
  *
  *****************************************************************************
  */
+static DEFINE_MUTEX(m41t80_rtc_mutex);
 static struct i2c_client *save_client;
 
 /* Default margin */
@@ -885,7 +875,6 @@ static int m41t80_probe(struct i2c_client *client,
 {
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	int rc = 0;
-	struct rtc_device *rtc = NULL;
 	struct rtc_time tm;
 	struct m41t80_data *m41t80_data = NULL;
 	bool wakeup_source = false;
@@ -909,6 +898,10 @@ static int m41t80_probe(struct i2c_client *client,
 		m41t80_data->features = id->driver_data;
 	i2c_set_clientdata(client, m41t80_data);
 
+	m41t80_data->rtc =  devm_rtc_allocate_device(&client->dev);
+	if (IS_ERR(m41t80_data->rtc))
+		return PTR_ERR(m41t80_data->rtc);
+
 #ifdef CONFIG_OF
 	wakeup_source = of_property_read_bool(client->dev.of_node,
 					      "wakeup-source");
@@ -932,15 +925,11 @@ static int m41t80_probe(struct i2c_client *client,
 		device_init_wakeup(&client->dev, true);
 	}
 
-	rtc = devm_rtc_device_register(&client->dev, client->name,
-				       &m41t80_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc))
-		return PTR_ERR(rtc);
+	m41t80_data->rtc->ops = &m41t80_rtc_ops;
 
-	m41t80_data->rtc = rtc;
 	if (client->irq <= 0) {
 		/* We cannot support UIE mode if we do not have an IRQ line */
-		rtc->uie_unsupported = 1;
+		m41t80_data->rtc->uie_unsupported = 1;
 	}
 
 	/* Make sure HT (Halt Update) bit is cleared */
@@ -948,7 +937,7 @@ static int m41t80_probe(struct i2c_client *client,
 
 	if (rc >= 0 && rc & M41T80_ALHOUR_HT) {
 		if (m41t80_data->features & M41T80_FEATURE_HT) {
-			m41t80_get_datetime(client, &tm);
+			m41t80_rtc_read_time(&client->dev, &tm);
 			dev_info(&client->dev, "HT bit was set!\n");
 			dev_info(&client->dev,
 				 "Power Down at %04i-%02i-%02i %02i:%02i:%02i\n",
@@ -993,6 +982,11 @@ static int m41t80_probe(struct i2c_client *client,
 	if (m41t80_data->features & M41T80_FEATURE_SQ)
 		m41t80_sqw_register_clk(m41t80_data);
 #endif
+
+	rc = rtc_register_device(m41t80_data->rtc);
+	if (rc)
+		return rc;
+
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-m41t93.c b/drivers/rtc/rtc-m41t93.c
index 5ac45fc1a787..4a08a9dabc82 100644
--- a/drivers/rtc/rtc-m41t93.c
+++ b/drivers/rtc/rtc-m41t93.c
@@ -159,7 +159,7 @@ static int m41t93_get_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_hour, tm->tm_mday,
 		tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return ret < 0 ? ret : rtc_valid_tm(tm);
+	return ret;
 }
 
 
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index 1f0eb79e69f9..bab82b4be356 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -99,8 +99,7 @@ static int m41t94_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_hour, tm->tm_mday,
 		tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	/* initial clock setting can be undefined */
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct rtc_class_ops m41t94_rtc_ops = {
diff --git a/drivers/rtc/rtc-m48t35.c b/drivers/rtc/rtc-m48t35.c
index 810f4ea481e4..0cf6507de3c7 100644
--- a/drivers/rtc/rtc-m48t35.c
+++ b/drivers/rtc/rtc-m48t35.c
@@ -84,7 +84,7 @@ static int m48t35_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_year += 100;
 
 	tm->tm_mon--;
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int m48t35_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index d99a705bec07..216fac62c888 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -105,7 +105,7 @@ static int m48t59_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	dev_dbg(dev, "RTC read time %04d-%02d-%02d %02d/%02d/%02d\n",
 		tm->tm_year + 1900, tm->tm_mon, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int m48t59_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -334,16 +334,16 @@ static const struct rtc_class_ops m48t02_rtc_ops = {
 	.set_time	= m48t59_rtc_set_time,
 };
 
-static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *bin_attr,
-				char *buf, loff_t pos, size_t size)
+static int m48t59_nvram_read(void *priv, unsigned int offset, void *val,
+			     size_t size)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
+	struct device *dev = &pdev->dev;
 	struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
 	struct m48t59_private *m48t59 = platform_get_drvdata(pdev);
 	ssize_t cnt = 0;
 	unsigned long flags;
+	u8 *buf = val;
 
 	spin_lock_irqsave(&m48t59->lock, flags);
 
@@ -352,19 +352,19 @@ static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
 
 	spin_unlock_irqrestore(&m48t59->lock, flags);
 
-	return cnt;
+	return 0;
 }
 
-static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
-				struct bin_attribute *bin_attr,
-				char *buf, loff_t pos, size_t size)
+static int m48t59_nvram_write(void *priv, unsigned int offset, void *val,
+			      size_t size)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
+	struct platform_device *pdev = priv;
+	struct device *dev = &pdev->dev;
 	struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
 	struct m48t59_private *m48t59 = platform_get_drvdata(pdev);
 	ssize_t cnt = 0;
 	unsigned long flags;
+	u8 *buf = val;
 
 	spin_lock_irqsave(&m48t59->lock, flags);
 
@@ -373,18 +373,9 @@ static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
 
 	spin_unlock_irqrestore(&m48t59->lock, flags);
 
-	return cnt;
+	return 0;
 }
 
-static struct bin_attribute m48t59_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.read = m48t59_nvram_read,
-	.write = m48t59_nvram_write,
-};
-
 static int m48t59_rtc_probe(struct platform_device *pdev)
 {
 	struct m48t59_plat_data *pdata = dev_get_platdata(&pdev->dev);
@@ -393,6 +384,14 @@ static int m48t59_rtc_probe(struct platform_device *pdev)
 	int ret = -ENOMEM;
 	char *name;
 	const struct rtc_class_ops *ops;
+	struct nvmem_config nvmem_cfg = {
+		.name = "m48t59-",
+		.word_size = 1,
+		.stride = 1,
+		.reg_read = m48t59_nvram_read,
+		.reg_write = m48t59_nvram_write,
+		.priv = pdev,
+	};
 
 	/* This chip could be memory-mapped or I/O-mapped */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -480,23 +479,22 @@ static int m48t59_rtc_probe(struct platform_device *pdev)
 	spin_lock_init(&m48t59->lock);
 	platform_set_drvdata(pdev, m48t59);
 
-	m48t59->rtc = devm_rtc_device_register(&pdev->dev, name, ops,
-						THIS_MODULE);
+	m48t59->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(m48t59->rtc))
 		return PTR_ERR(m48t59->rtc);
 
-	m48t59_nvram_attr.size = pdata->offset;
+	m48t59->rtc->nvram_old_abi = true;
+	m48t59->rtc->ops = ops;
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr);
+	nvmem_cfg.size = pdata->offset;
+	ret = rtc_nvmem_register(m48t59->rtc, &nvmem_cfg);
 	if (ret)
 		return ret;
 
-	return 0;
-}
+	ret = rtc_register_device(m48t59->rtc);
+	if (ret)
+		return ret;
 
-static int m48t59_rtc_remove(struct platform_device *pdev)
-{
-	sysfs_remove_bin_file(&pdev->dev.kobj, &m48t59_nvram_attr);
 	return 0;
 }
 
@@ -508,7 +506,6 @@ static struct platform_driver m48t59_rtc_driver = {
 		.name	= "rtc-m48t59",
 	},
 	.probe		= m48t59_rtc_probe,
-	.remove		= m48t59_rtc_remove,
 };
 
 module_platform_driver(m48t59_rtc_driver);
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
index d9aea9b6d9cd..a9533535c3b7 100644
--- a/drivers/rtc/rtc-m48t86.c
+++ b/drivers/rtc/rtc-m48t86.c
@@ -100,7 +100,7 @@ static int m48t86_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		if (m48t86_readb(dev, M48T86_HOUR) & 0x80)
 			tm->tm_hour += 12;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int m48t86_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -218,21 +218,21 @@ static bool m48t86_verify_chip(struct platform_device *pdev)
 	return false;
 }
 
-static struct nvmem_config m48t86_nvmem_cfg = {
-	.name = "m48t86_nvram",
-	.word_size = 1,
-	.stride = 1,
-	.size = M48T86_NVRAM_LEN,
-	.reg_read = m48t86_nvram_read,
-	.reg_write = m48t86_nvram_write,
-};
-
 static int m48t86_rtc_probe(struct platform_device *pdev)
 {
 	struct m48t86_rtc_info *info;
 	struct resource *res;
 	unsigned char reg;
 	int err;
+	struct nvmem_config m48t86_nvmem_cfg = {
+		.name = "m48t86_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = M48T86_NVRAM_LEN,
+		.reg_read = m48t86_nvram_read,
+		.reg_write = m48t86_nvram_write,
+		.priv = &pdev->dev,
+	};
 
 	info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
 	if (!info)
@@ -264,15 +264,14 @@ static int m48t86_rtc_probe(struct platform_device *pdev)
 		return PTR_ERR(info->rtc);
 
 	info->rtc->ops = &m48t86_rtc_ops;
-
-	m48t86_nvmem_cfg.priv = &pdev->dev;
-	info->rtc->nvmem_config = &m48t86_nvmem_cfg;
 	info->rtc->nvram_old_abi = true;
 
 	err = rtc_register_device(info->rtc);
 	if (err)
 		return err;
 
+	rtc_nvmem_register(info->rtc, &m48t86_nvmem_cfg);
+
 	/* read battery status */
 	reg = m48t86_readb(&pdev->dev, M48T86_D);
 	dev_info(&pdev->dev, "battery %s\n",
diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c
index cbdc86a560ba..ab60f13fa3ef 100644
--- a/drivers/rtc/rtc-max6900.c
+++ b/drivers/rtc/rtc-max6900.c
@@ -139,8 +139,9 @@ static int max6900_i2c_write_regs(struct i2c_client *client, u8 const *buf)
 	return -EIO;
 }
 
-static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
+static int max6900_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	int rc;
 	u8 regs[MAX6900_REG_LEN];
 
@@ -157,7 +158,7 @@ static int max6900_i2c_read_time(struct i2c_client *client, struct rtc_time *tm)
 		      bcd2bin(regs[MAX6900_REG_CENTURY]) * 100 - 1900;
 	tm->tm_wday = bcd2bin(regs[MAX6900_REG_DW]);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int max6900_i2c_clear_write_protect(struct i2c_client *client)
@@ -165,9 +166,9 @@ static int max6900_i2c_clear_write_protect(struct i2c_client *client)
 	return i2c_smbus_write_byte_data(client, MAX6900_REG_CONTROL_WRITE, 0);
 }
 
-static int
-max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
+static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	u8 regs[MAX6900_REG_LEN];
 	int rc;
 
@@ -193,16 +194,6 @@ max6900_i2c_set_time(struct i2c_client *client, struct rtc_time const *tm)
 	return 0;
 }
 
-static int max6900_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return max6900_i2c_read_time(to_i2c_client(dev), tm);
-}
-
-static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return max6900_i2c_set_time(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops max6900_rtc_ops = {
 	.read_time = max6900_rtc_read_time,
 	.set_time = max6900_rtc_set_time,
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 315d09e0f2c1..745827463367 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -85,7 +85,7 @@ static int max6902_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_year += century;
 	dt->tm_year -= 1900;
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int max6902_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-max6916.c b/drivers/rtc/rtc-max6916.c
index 623ab27b2757..7e908a490cf6 100644
--- a/drivers/rtc/rtc-max6916.c
+++ b/drivers/rtc/rtc-max6916.c
@@ -75,7 +75,7 @@ static int max6916_read_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_wday = bcd2bin(buf[5]) - 1;
 	dt->tm_year = bcd2bin(buf[6]) + 100;
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int max6916_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 182fdd00e290..cefde273fae6 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -364,11 +364,9 @@ static int max77686_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	max77686_rtc_data_to_tm(data, tm, info);
 
-	ret = rtc_valid_tm(tm);
-
 out:
 	mutex_unlock(&info->lock);
-	return ret;
+	return 0;
 }
 
 static int max77686_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c
index db984d4bf952..e8cee123e8aa 100644
--- a/drivers/rtc/rtc-max8997.c
+++ b/drivers/rtc/rtc-max8997.c
@@ -153,7 +153,7 @@ static int max8997_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	max8997_rtc_data_to_tm(data, tm, info->rtc_24hr_mode);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int max8997_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-max8998.c b/drivers/rtc/rtc-max8998.c
index 30804b00985e..d8c0f9b3f87d 100644
--- a/drivers/rtc/rtc-max8998.c
+++ b/drivers/rtc/rtc-max8998.c
@@ -120,7 +120,7 @@ static int max8998_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	max8998_data_to_tm(data, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int max8998_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 30b8ef6a3676..1f892b238ddb 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -85,7 +85,7 @@ static int mc13xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs)
diff --git a/drivers/rtc/rtc-mcp795.c b/drivers/rtc/rtc-mcp795.c
index 77f21331ae21..00e11c1b2186 100644
--- a/drivers/rtc/rtc-mcp795.c
+++ b/drivers/rtc/rtc-mcp795.c
@@ -82,7 +82,7 @@ static int mcp795_rtcc_write(struct device *dev, u8 addr, u8 *data, u8 count)
 {
 	struct spi_device *spi = to_spi_device(dev);
 	int ret;
-	u8 tx[2 + count];
+	u8 tx[257];
 
 	tx[0] = MCP795_WRITE;
 	tx[1] = addr;
@@ -262,7 +262,7 @@ static int mcp795_read_time(struct device *dev, struct rtc_time *tim)
 			tim->tm_year + 1900, tim->tm_mon, tim->tm_mday,
 			tim->tm_wday, tim->tm_hour, tim->tm_min, tim->tm_sec);
 
-	return rtc_valid_tm(tim);
+	return 0;
 }
 
 static int mcp795_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
diff --git a/drivers/rtc/rtc-mpc5121.c b/drivers/rtc/rtc-mpc5121.c
index 4ca4daa0b8f3..dd0364293bc0 100644
--- a/drivers/rtc/rtc-mpc5121.c
+++ b/drivers/rtc/rtc-mpc5121.c
@@ -122,7 +122,7 @@ static int mpc5121_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	 */
 	mpc5121_rtc_update_smh(regs, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mpc5121_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 7334c44fa7c3..fcb9de5218b2 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -105,7 +105,7 @@ static int mrst_read_time(struct device *dev, struct rtc_time *time)
 	/* Adjust for the 1972/1900 */
 	time->tm_year += 72;
 	time->tm_mon--;
-	return rtc_valid_tm(time);
+	return 0;
 }
 
 static int mrst_set_time(struct device *dev, struct rtc_time *time)
@@ -122,7 +122,7 @@ static int mrst_set_time(struct device *dev, struct rtc_time *time)
 	min = time->tm_min;
 	sec = time->tm_sec;
 
-	if (yrs < 72 || yrs > 138)
+	if (yrs < 72 || yrs > 172)
 		return -EINVAL;
 	yrs -= 72;
 
diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c
index c1c5c4e3b3b4..0c72a2e8ec67 100644
--- a/drivers/rtc/rtc-msm6242.c
+++ b/drivers/rtc/rtc-msm6242.c
@@ -155,7 +155,7 @@ static int msm6242_read_time(struct device *dev, struct rtc_time *tm)
 
 	msm6242_unlock(priv);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int msm6242_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-mt7622.c b/drivers/rtc/rtc-mt7622.c
index d79b9ae4d237..fd0cea722286 100644
--- a/drivers/rtc/rtc-mt7622.c
+++ b/drivers/rtc/rtc-mt7622.c
@@ -232,7 +232,7 @@ static int mtk_rtc_gettime(struct device *dev, struct rtc_time *tm)
 
 	mtk_rtc_get_alarm_or_time(hw, tm, MTK_TC);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mtk_rtc_settime(struct device *dev, struct rtc_time *tm)
@@ -307,6 +307,7 @@ static const struct of_device_id mtk_rtc_match[] = {
 	{ .compatible = "mediatek,soc-rtc" },
 	{},
 };
+MODULE_DEVICE_TABLE(of, mtk_rtc_match);
 
 static int mtk_rtc_probe(struct platform_device *pdev)
 {
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
index 79bb28617d45..bc52dbb0c0e2 100644
--- a/drivers/rtc/rtc-mv.c
+++ b/drivers/rtc/rtc-mv.c
@@ -94,7 +94,7 @@ static int mv_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* hw counts from year 2000, but tm_year is relative to 1900 */
 	tm->tm_year = bcd2bin(year) + 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int mv_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
@@ -223,7 +223,6 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct rtc_plat_data *pdata;
 	u32 rtc_time;
-	u32 rtc_date;
 	int ret = 0;
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
@@ -259,17 +258,6 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
 		}
 	}
 
-	/*
-	 * A date after January 19th, 2038 does not fit on 32 bits and
-	 * will confuse the kernel and userspace. Reset to a sane date
-	 * (January 1st, 2013) if we're after 2038.
-	 */
-	rtc_date = readl(pdata->ioaddr + RTC_DATE_REG_OFFS);
-	if (bcd2bin((rtc_date >> RTC_YEAR_OFFS) & 0xff) >= 38) {
-		dev_info(&pdev->dev, "invalid RTC date, resetting to January 1st, 2013\n");
-		writel(0x130101, pdata->ioaddr + RTC_DATE_REG_OFFS);
-	}
-
 	pdata->irq = platform_get_irq(pdev, 0);
 
 	platform_set_drvdata(pdev, pdata);
diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c
index 784221dfc9c7..9e14efb990b2 100644
--- a/drivers/rtc/rtc-mxc_v2.c
+++ b/drivers/rtc/rtc-mxc_v2.c
@@ -273,7 +273,7 @@ static const struct rtc_class_ops mxc_rtc_ops = {
 	.alarm_irq_enable = mxc_rtc_alarm_irq_enable,
 };
 
-static int mxc_rtc_wait_for_flag(void *__iomem ioaddr, int flag)
+static int mxc_rtc_wait_for_flag(void __iomem *ioaddr, int flag)
 {
 	unsigned int timeout = REG_READ_TIMEOUT;
 
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index 4ed81117cf5f..7da664a77181 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -102,8 +102,8 @@ static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc)
 	return NULL;
 }
 
-static int nuc900_rtc_bcd2bin(unsigned int timereg,
-				unsigned int calreg, struct rtc_time *tm)
+static void nuc900_rtc_bcd2bin(unsigned int timereg,
+			       unsigned int calreg, struct rtc_time *tm)
 {
 	tm->tm_mday	= bcd2bin(calreg >> 0);
 	tm->tm_mon	= bcd2bin(calreg >> 8);
@@ -112,8 +112,6 @@ static int nuc900_rtc_bcd2bin(unsigned int timereg,
 	tm->tm_sec	= bcd2bin(timereg >> 0);
 	tm->tm_min	= bcd2bin(timereg >> 8);
 	tm->tm_hour	= bcd2bin(timereg >> 16);
-
-	return rtc_valid_tm(tm);
 }
 
 static void nuc900_rtc_bin2bcd(struct device *dev, struct rtc_time *settm,
@@ -156,7 +154,9 @@ static int nuc900_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TLR);
 	clrval	= __raw_readl(rtc->rtc_reg + REG_RTC_CLR);
 
-	return nuc900_rtc_bcd2bin(timeval, clrval, tm);
+	nuc900_rtc_bcd2bin(timeval, clrval, tm);
+
+	return 0;
 }
 
 static int nuc900_rtc_set_time(struct device *dev, struct rtc_time *tm)
@@ -189,7 +189,9 @@ static int nuc900_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	timeval = __raw_readl(rtc->rtc_reg + REG_RTC_TAR);
 	carval	= __raw_readl(rtc->rtc_reg + REG_RTC_CAR);
 
-	return nuc900_rtc_bcd2bin(timeval, carval, &alrm->time);
+	nuc900_rtc_bcd2bin(timeval, carval, &alrm->time);
+
+	return rtc_valid_tm(&alrm->time);
 }
 
 static int nuc900_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 09ef802d6e54..39086398833e 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -273,9 +273,6 @@ static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 /* this hardware doesn't support "don't care" alarm fields */
 static int tm2bcd(struct rtc_time *tm)
 {
-	if (rtc_valid_tm(tm) != 0)
-		return -EINVAL;
-
 	tm->tm_sec = bin2bcd(tm->tm_sec);
 	tm->tm_min = bin2bcd(tm->tm_min);
 	tm->tm_hour = bin2bcd(tm->tm_hour);
@@ -850,7 +847,6 @@ static int omap_rtc_probe(struct platform_device *pdev)
 
 	rtc->rtc->ops = &omap_rtc_ops;
 	omap_rtc_nvmem_config.priv = rtc;
-	rtc->rtc->nvmem_config = &omap_rtc_nvmem_config;
 
 	/* handle periodic and alarm irqs */
 	ret = devm_request_irq(&pdev->dev, rtc->irq_timer, rtc_irq, 0,
@@ -886,6 +882,8 @@ static int omap_rtc_probe(struct platform_device *pdev)
 	if (ret)
 		goto err;
 
+	rtc_nvmem_register(rtc->rtc, &omap_rtc_nvmem_config);
+
 	return 0;
 
 err:
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index c4433240d8a9..c05f524ba9af 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -95,7 +95,7 @@ static int pcap_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time_to_tm(secs, tm);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcap_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c
index 8895f77726e8..e5222c5d8223 100644
--- a/drivers/rtc/rtc-pcf2123.c
+++ b/drivers/rtc/rtc-pcf2123.c
@@ -289,7 +289,7 @@ static int pcf2123_rtc_read_time(struct device *dev, struct rtc_time *tm)
 			tm->tm_sec, tm->tm_min, tm->tm_hour,
 			tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf2123_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index f33447c5db85..e83be1852c2f 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -111,7 +111,7 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf2127_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c
index 00c31c91b245..ef72b0c389d7 100644
--- a/drivers/rtc/rtc-pcf50633.c
+++ b/drivers/rtc/rtc-pcf50633.c
@@ -135,7 +135,7 @@ static int pcf50633_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_mday, tm->tm_mon, tm->tm_year,
 		tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf50633_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index a06dff994c83..49bcbb3d4a69 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -70,7 +70,7 @@ static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1)
 	s32 ret;
 
 	/* start the clock */
-	ctrl1 &= PCF85063_REG_CTRL1_STOP;
+	ctrl1 &= ~PCF85063_REG_CTRL1_STOP;
 
 	ret = i2c_smbus_write_byte_data(client, PCF85063_REG_CTRL1, ctrl1);
 	if (ret < 0) {
@@ -81,8 +81,9 @@ static int pcf85063_start_clock(struct i2c_client *client, u8 ctrl1)
 	return 0;
 }
 
-static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	int rc;
 	u8 regs[7];
 
@@ -114,11 +115,12 @@ static int pcf85063_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 	tm->tm_year = bcd2bin(regs[6]);
 	tm->tm_year += 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	int rc;
 	u8 regs[7];
 	u8 ctrl1;
@@ -172,16 +174,6 @@ static int pcf85063_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	return 0;
 }
 
-static int pcf85063_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return pcf85063_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int pcf85063_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return pcf85063_set_datetime(to_i2c_client(dev), tm);
-}
-
 static const struct rtc_class_ops pcf85063_rtc_ops = {
 	.read_time	= pcf85063_rtc_read_time,
 	.set_time	= pcf85063_rtc_set_time
diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c
index c312af0db729..453615f8ac9a 100644
--- a/drivers/rtc/rtc-pcf8523.c
+++ b/drivers/rtc/rtc-pcf8523.c
@@ -192,7 +192,7 @@ static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_mon = bcd2bin(regs[5] & 0x1f) - 1;
 	tm->tm_year = bcd2bin(regs[6]) + 100;
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int pcf8523_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pcf85363.c b/drivers/rtc/rtc-pcf85363.c
index ea04e9f0930b..c04a1edcd571 100644
--- a/drivers/rtc/rtc-pcf85363.c
+++ b/drivers/rtc/rtc-pcf85363.c
@@ -73,6 +73,43 @@
 #define CTRL_RESETS	0x2f
 #define CTRL_RAM	0x40
 
+#define ALRM_SEC_A1E	BIT(0)
+#define ALRM_MIN_A1E	BIT(1)
+#define ALRM_HR_A1E	BIT(2)
+#define ALRM_DAY_A1E	BIT(3)
+#define ALRM_MON_A1E	BIT(4)
+#define ALRM_MIN_A2E	BIT(5)
+#define ALRM_HR_A2E	BIT(6)
+#define ALRM_DAY_A2E	BIT(7)
+
+#define INT_WDIE	BIT(0)
+#define INT_BSIE	BIT(1)
+#define INT_TSRIE	BIT(2)
+#define INT_A2IE	BIT(3)
+#define INT_A1IE	BIT(4)
+#define INT_OIE		BIT(5)
+#define INT_PIE		BIT(6)
+#define INT_ILP		BIT(7)
+
+#define FLAGS_TSR1F	BIT(0)
+#define FLAGS_TSR2F	BIT(1)
+#define FLAGS_TSR3F	BIT(2)
+#define FLAGS_BSF	BIT(3)
+#define FLAGS_WDF	BIT(4)
+#define FLAGS_A1F	BIT(5)
+#define FLAGS_A2F	BIT(6)
+#define FLAGS_PIF	BIT(7)
+
+#define PIN_IO_INTAPM	GENMASK(1, 0)
+#define PIN_IO_INTA_CLK	0
+#define PIN_IO_INTA_BAT	1
+#define PIN_IO_INTA_OUT	2
+#define PIN_IO_INTA_HIZ	3
+
+#define STOP_EN_STOP	BIT(0)
+
+#define RESET_CPR	0xa4
+
 #define NVRAM_SIZE	0x40
 
 static struct i2c_driver pcf85363_driver;
@@ -80,7 +117,6 @@ static struct i2c_driver pcf85363_driver;
 struct pcf85363 {
 	struct device		*dev;
 	struct rtc_device	*rtc;
-	struct nvmem_config	nvmem_cfg;
 	struct regmap		*regmap;
 };
 
@@ -116,8 +152,12 @@ static int pcf85363_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
-	unsigned char buf[DT_YEARS + 1];
-	int len = sizeof(buf);
+	unsigned char tmp[11];
+	unsigned char *buf = &tmp[2];
+	int ret;
+
+	tmp[0] = STOP_EN_STOP;
+	tmp[1] = RESET_CPR;
 
 	buf[DT_100THS] = 0;
 	buf[DT_SECS] = bin2bcd(tm->tm_sec);
@@ -128,8 +168,116 @@ static int pcf85363_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	buf[DT_MONTHS] = bin2bcd(tm->tm_mon + 1);
 	buf[DT_YEARS] = bin2bcd(tm->tm_year % 100);
 
-	return regmap_bulk_write(pcf85363->regmap, DT_100THS,
-				 buf, len);
+	ret = regmap_bulk_write(pcf85363->regmap, CTRL_STOP_EN,
+				tmp, sizeof(tmp));
+	if (ret)
+		return ret;
+
+	return regmap_write(pcf85363->regmap, CTRL_STOP_EN, 0);
+}
+
+static int pcf85363_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+	unsigned char buf[DT_MONTH_ALM1 - DT_SECOND_ALM1 + 1];
+	unsigned int val;
+	int ret;
+
+	ret = regmap_bulk_read(pcf85363->regmap, DT_SECOND_ALM1, buf,
+			       sizeof(buf));
+	if (ret)
+		return ret;
+
+	alrm->time.tm_sec = bcd2bin(buf[0]);
+	alrm->time.tm_min = bcd2bin(buf[1]);
+	alrm->time.tm_hour = bcd2bin(buf[2]);
+	alrm->time.tm_mday = bcd2bin(buf[3]);
+	alrm->time.tm_mon = bcd2bin(buf[4]) - 1;
+
+	ret = regmap_read(pcf85363->regmap, CTRL_INTA_EN, &val);
+	if (ret)
+		return ret;
+
+	alrm->enabled =  !!(val & INT_A1IE);
+
+	return 0;
+}
+
+static int _pcf85363_rtc_alarm_irq_enable(struct pcf85363 *pcf85363, unsigned
+					  int enabled)
+{
+	unsigned int alarm_flags = ALRM_SEC_A1E | ALRM_MIN_A1E | ALRM_HR_A1E |
+				   ALRM_DAY_A1E | ALRM_MON_A1E;
+	int ret;
+
+	ret = regmap_update_bits(pcf85363->regmap, DT_ALARM_EN, alarm_flags,
+				 enabled ? alarm_flags : 0);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(pcf85363->regmap, CTRL_INTA_EN,
+				 INT_A1IE, enabled ? INT_A1IE : 0);
+
+	if (ret || enabled)
+		return ret;
+
+	/* clear current flags */
+	return regmap_update_bits(pcf85363->regmap, CTRL_FLAGS, FLAGS_A1F, 0);
+}
+
+static int pcf85363_rtc_alarm_irq_enable(struct device *dev,
+					 unsigned int enabled)
+{
+	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+
+	return _pcf85363_rtc_alarm_irq_enable(pcf85363, enabled);
+}
+
+static int pcf85363_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
+{
+	struct pcf85363 *pcf85363 = dev_get_drvdata(dev);
+	unsigned char buf[DT_MONTH_ALM1 - DT_SECOND_ALM1 + 1];
+	int ret;
+
+	buf[0] = bin2bcd(alrm->time.tm_sec);
+	buf[1] = bin2bcd(alrm->time.tm_min);
+	buf[2] = bin2bcd(alrm->time.tm_hour);
+	buf[3] = bin2bcd(alrm->time.tm_mday);
+	buf[4] = bin2bcd(alrm->time.tm_mon + 1);
+
+	/*
+	 * Disable the alarm interrupt before changing the value to avoid
+	 * spurious interrupts
+	 */
+	ret = _pcf85363_rtc_alarm_irq_enable(pcf85363, 0);
+	if (ret)
+		return ret;
+
+	ret = regmap_bulk_write(pcf85363->regmap, DT_SECOND_ALM1, buf,
+				sizeof(buf));
+	if (ret)
+		return ret;
+
+	return _pcf85363_rtc_alarm_irq_enable(pcf85363, alrm->enabled);
+}
+
+static irqreturn_t pcf85363_rtc_handle_irq(int irq, void *dev_id)
+{
+	struct pcf85363 *pcf85363 = i2c_get_clientdata(dev_id);
+	unsigned int flags;
+	int err;
+
+	err = regmap_read(pcf85363->regmap, CTRL_FLAGS, &flags);
+	if (err)
+		return IRQ_NONE;
+
+	if (flags & FLAGS_A1F) {
+		rtc_update_irq(pcf85363->rtc, 1, RTC_IRQF | RTC_AF);
+		regmap_update_bits(pcf85363->regmap, CTRL_FLAGS, FLAGS_A1F, 0);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
 }
 
 static const struct rtc_class_ops rtc_ops = {
@@ -137,6 +285,14 @@ static const struct rtc_class_ops rtc_ops = {
 	.set_time	= pcf85363_rtc_set_time,
 };
 
+static const struct rtc_class_ops rtc_ops_alarm = {
+	.read_time	= pcf85363_rtc_read_time,
+	.set_time	= pcf85363_rtc_set_time,
+	.read_alarm	= pcf85363_rtc_read_alarm,
+	.set_alarm	= pcf85363_rtc_set_alarm,
+	.alarm_irq_enable = pcf85363_rtc_alarm_irq_enable,
+};
+
 static int pcf85363_nvram_read(void *priv, unsigned int offset, void *val,
 			       size_t bytes)
 {
@@ -158,12 +314,22 @@ static int pcf85363_nvram_write(void *priv, unsigned int offset, void *val,
 static const struct regmap_config regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+	.max_register = 0x7f,
 };
 
 static int pcf85363_probe(struct i2c_client *client,
 			  const struct i2c_device_id *id)
 {
 	struct pcf85363 *pcf85363;
+	struct nvmem_config nvmem_cfg = {
+		.name = "pcf85363-",
+		.word_size = 1,
+		.stride = 1,
+		.size = NVRAM_SIZE,
+		.reg_read = pcf85363_nvram_read,
+		.reg_write = pcf85363_nvram_write,
+	};
+	int ret;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
 		return -ENODEV;
@@ -186,17 +352,28 @@ static int pcf85363_probe(struct i2c_client *client,
 	if (IS_ERR(pcf85363->rtc))
 		return PTR_ERR(pcf85363->rtc);
 
-	pcf85363->nvmem_cfg.name = "pcf85363-";
-	pcf85363->nvmem_cfg.word_size = 1;
-	pcf85363->nvmem_cfg.stride = 1;
-	pcf85363->nvmem_cfg.size = NVRAM_SIZE;
-	pcf85363->nvmem_cfg.reg_read = pcf85363_nvram_read;
-	pcf85363->nvmem_cfg.reg_write = pcf85363_nvram_write;
-	pcf85363->nvmem_cfg.priv = pcf85363;
-	pcf85363->rtc->nvmem_config = &pcf85363->nvmem_cfg;
 	pcf85363->rtc->ops = &rtc_ops;
 
-	return rtc_register_device(pcf85363->rtc);
+	if (client->irq > 0) {
+		regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
+		regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
+				   PIN_IO_INTA_OUT, PIN_IO_INTAPM);
+		ret = devm_request_threaded_irq(pcf85363->dev, client->irq,
+						NULL, pcf85363_rtc_handle_irq,
+						IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+						"pcf85363", client);
+		if (ret)
+			dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
+		else
+			pcf85363->rtc->ops = &rtc_ops_alarm;
+	}
+
+	ret = rtc_register_device(pcf85363->rtc);
+
+	nvmem_cfg.priv = pcf85363;
+	rtc_nvmem_register(pcf85363->rtc, &nvmem_cfg);
+
+	return ret;
 }
 
 static const struct of_device_id dev_ids[] = {
diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c
index 5cfb6df5c430..3c08eab4f1a8 100644
--- a/drivers/rtc/rtc-pic32.c
+++ b/drivers/rtc/rtc-pic32.c
@@ -175,7 +175,7 @@ static int pic32_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 		rtc_tm->tm_hour, rtc_tm->tm_min, rtc_tm->tm_sec);
 
 	clk_disable(pdata->clk);
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int pic32_rtc_settime(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c
index fac835530671..29358a045925 100644
--- a/drivers/rtc/rtc-pm8xxx.c
+++ b/drivers/rtc/rtc-pm8xxx.c
@@ -74,16 +74,18 @@ struct pm8xxx_rtc {
 /*
  * Steps to write the RTC registers.
  * 1. Disable alarm if enabled.
- * 2. Write 0x00 to LSB.
- * 3. Write Byte[1], Byte[2], Byte[3] then Byte[0].
- * 4. Enable alarm if disabled in step 1.
+ * 2. Disable rtc if enabled.
+ * 3. Write 0x00 to LSB.
+ * 4. Write Byte[1], Byte[2], Byte[3] then Byte[0].
+ * 5. Enable rtc if disabled in step 2.
+ * 6. Enable alarm if disabled in step 1.
  */
 static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	int rc, i;
 	unsigned long secs, irq_flags;
-	u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0;
-	unsigned int ctrl_reg;
+	u8 value[NUM_8_BIT_RTC_REGS], alarm_enabled = 0, rtc_disabled = 0;
+	unsigned int ctrl_reg, rtc_ctrl_reg;
 	struct pm8xxx_rtc *rtc_dd = dev_get_drvdata(dev);
 	const struct pm8xxx_rtc_regs *regs = rtc_dd->regs;
 
@@ -92,23 +94,38 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_tm_to_time(tm, &secs);
 
+	dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs);
+
 	for (i = 0; i < NUM_8_BIT_RTC_REGS; i++) {
 		value[i] = secs & 0xFF;
 		secs >>= 8;
 	}
 
-	dev_dbg(dev, "Seconds value to be written to RTC = %lu\n", secs);
-
 	spin_lock_irqsave(&rtc_dd->ctrl_reg_lock, irq_flags);
 
-	rc = regmap_read(rtc_dd->regmap, regs->ctrl, &ctrl_reg);
+	rc = regmap_read(rtc_dd->regmap, regs->alarm_ctrl, &ctrl_reg);
 	if (rc)
 		goto rtc_rw_fail;
 
 	if (ctrl_reg & regs->alarm_en) {
 		alarm_enabled = 1;
 		ctrl_reg &= ~regs->alarm_en;
-		rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg);
+		rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
+		if (rc) {
+			dev_err(dev, "Write to RTC Alarm control register failed\n");
+			goto rtc_rw_fail;
+		}
+	}
+
+	/* Disable RTC H/w before writing on RTC register */
+	rc = regmap_read(rtc_dd->regmap, regs->ctrl, &rtc_ctrl_reg);
+	if (rc)
+		goto rtc_rw_fail;
+
+	if (rtc_ctrl_reg & PM8xxx_RTC_ENABLE) {
+		rtc_disabled = 1;
+		rtc_ctrl_reg &= ~PM8xxx_RTC_ENABLE;
+		rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
 		if (rc) {
 			dev_err(dev, "Write to RTC control register failed\n");
 			goto rtc_rw_fail;
@@ -137,11 +154,21 @@ static int pm8xxx_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		goto rtc_rw_fail;
 	}
 
+	/* Enable RTC H/w after writing on RTC register */
+	if (rtc_disabled) {
+		rtc_ctrl_reg |= PM8xxx_RTC_ENABLE;
+		rc = regmap_write(rtc_dd->regmap, regs->ctrl, rtc_ctrl_reg);
+		if (rc) {
+			dev_err(dev, "Write to RTC control register failed\n");
+			goto rtc_rw_fail;
+		}
+	}
+
 	if (alarm_enabled) {
 		ctrl_reg |= regs->alarm_en;
-		rc = regmap_write(rtc_dd->regmap, regs->ctrl, ctrl_reg);
+		rc = regmap_write(rtc_dd->regmap, regs->alarm_ctrl, ctrl_reg);
 		if (rc) {
-			dev_err(dev, "Write to RTC control register failed\n");
+			dev_err(dev, "Write to RTC Alarm control register failed\n");
 			goto rtc_rw_fail;
 		}
 	}
@@ -190,12 +217,6 @@ static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 
 	rtc_time_to_tm(secs, tm);
 
-	rc = rtc_valid_tm(tm);
-	if (rc < 0) {
-		dev_err(dev, "Invalid time read from RTC\n");
-		return rc;
-	}
-
 	dev_dbg(dev, "secs = %lu, h:m:s == %d:%d:%d, d/m/y = %d/%d/%d\n",
 		secs, tm->tm_hour, tm->tm_min, tm->tm_sec,
 		tm->tm_mday, tm->tm_mon, tm->tm_year);
diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c
index 6a8f5d758eac..347288bff438 100644
--- a/drivers/rtc/rtc-ps3.c
+++ b/drivers/rtc/rtc-ps3.c
@@ -41,7 +41,7 @@ static u64 read_rtc(void)
 static int ps3_get_time(struct device *dev, struct rtc_time *tm)
 {
 	rtc_time_to_tm(read_rtc() + ps3_os_area_get_rtc_diff(), tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int ps3_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-r7301.c b/drivers/rtc/rtc-r7301.c
index 500e8c8a2605..169704b2ce13 100644
--- a/drivers/rtc/rtc-r7301.c
+++ b/drivers/rtc/rtc-r7301.c
@@ -224,7 +224,7 @@ static int rtc7301_read_time(struct device *dev, struct rtc_time *tm)
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	return err ? err : rtc_valid_tm(tm);
+	return err;
 }
 
 static int rtc7301_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index b6c5eb97051c..a39ccd1cf6e8 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -92,7 +92,7 @@ static int r9701_get_datetime(struct device *dev, struct rtc_time *dt)
 	 * according to the data sheet. make sure they are valid.
 	 */
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int r9701_set_datetime(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c
index 35c9aada07c8..739c0d42e835 100644
--- a/drivers/rtc/rtc-rk808.c
+++ b/drivers/rtc/rtc-rk808.c
@@ -375,7 +375,6 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 {
 	struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent);
 	struct rk808_rtc *rk808_rtc;
-	struct rtc_time tm;
 	int ret;
 
 	rk808_rtc = devm_kzalloc(&pdev->dev, sizeof(*rk808_rtc), GFP_KERNEL);
@@ -404,24 +403,13 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 			return ret;
 	}
 
-	/* set init time */
-	ret = rk808_rtc_readtime(&pdev->dev, &tm);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to read RTC time\n");
-		return ret;
-	}
-	ret = rtc_valid_tm(&tm);
-	if (ret)
-		dev_warn(&pdev->dev, "invalid date/time\n");
-
 	device_init_wakeup(&pdev->dev, 1);
 
-	rk808_rtc->rtc = devm_rtc_device_register(&pdev->dev, "rk808-rtc",
-						  &rk808_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rk808_rtc->rtc)) {
-		ret = PTR_ERR(rk808_rtc->rtc);
-		return ret;
-	}
+	rk808_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(rk808_rtc->rtc))
+		return PTR_ERR(rk808_rtc->rtc);
+
+	rk808_rtc->rtc->ops = &rk808_rtc_ops;
 
 	rk808_rtc->irq = platform_get_irq(pdev, 0);
 	if (rk808_rtc->irq < 0) {
@@ -438,9 +426,10 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to request alarm IRQ %d: %d\n",
 			rk808_rtc->irq, ret);
+		return ret;
 	}
 
-	return ret;
+	return rtc_register_device(rk808_rtc->rtc);
 }
 
 static struct platform_driver rk808_rtc_driver = {
diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c
index 026035373ae6..f1c160fe7d37 100644
--- a/drivers/rtc/rtc-rp5c01.c
+++ b/drivers/rtc/rtc-rp5c01.c
@@ -64,7 +64,6 @@ struct rp5c01_priv {
 	u32 __iomem *regs;
 	struct rtc_device *rtc;
 	spinlock_t lock;	/* against concurrent RTC/NVRAM access */
-	struct bin_attribute nvram_attr;
 };
 
 static inline unsigned int rp5c01_read(struct rp5c01_priv *priv,
@@ -116,7 +115,7 @@ static int rp5c01_read_time(struct device *dev, struct rtc_time *tm)
 	rp5c01_unlock(priv);
 	spin_unlock_irq(&priv->lock);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int rp5c01_set_time(struct device *dev, struct rtc_time *tm)
@@ -160,17 +159,15 @@ static const struct rtc_class_ops rp5c01_rtc_ops = {
  * byte is stored in BLOCK10, the low nibble in BLOCK11.
  */
 
-static ssize_t rp5c01_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *bin_attr,
-				 char *buf, loff_t pos, size_t size)
+static int rp5c01_nvram_read(void *_priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct rp5c01_priv *priv = dev_get_drvdata(dev);
-	ssize_t count;
+	struct rp5c01_priv *priv = _priv;
+	u8 *buf = val;
 
 	spin_lock_irq(&priv->lock);
 
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		u8 data;
 
 		rp5c01_write(priv,
@@ -187,20 +184,18 @@ static ssize_t rp5c01_nvram_read(struct file *filp, struct kobject *kobj,
 	}
 
 	spin_unlock_irq(&priv->lock);
-	return count;
+	return 0;
 }
 
-static ssize_t rp5c01_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *bin_attr,
-				  char *buf, loff_t pos, size_t size)
+static int rp5c01_nvram_write(void *_priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct rp5c01_priv *priv = dev_get_drvdata(dev);
-	ssize_t count;
+	struct rp5c01_priv *priv = _priv;
+	u8 *buf = val;
 
 	spin_lock_irq(&priv->lock);
 
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		u8 data = *buf++;
 
 		rp5c01_write(priv,
@@ -216,7 +211,7 @@ static ssize_t rp5c01_nvram_write(struct file *filp, struct kobject *kobj,
 	}
 
 	spin_unlock_irq(&priv->lock);
-	return count;
+	return 0;
 }
 
 static int __init rp5c01_rtc_probe(struct platform_device *dev)
@@ -225,6 +220,14 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
 	struct rp5c01_priv *priv;
 	struct rtc_device *rtc;
 	int error;
+	struct nvmem_config nvmem_cfg = {
+		.name = "rp5c01_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = RP5C01_MODE,
+		.reg_read = rp5c01_nvram_read,
+		.reg_write = rp5c01_nvram_write,
+	};
 
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	if (!res)
@@ -238,43 +241,31 @@ static int __init rp5c01_rtc_probe(struct platform_device *dev)
 	if (!priv->regs)
 		return -ENOMEM;
 
-	sysfs_bin_attr_init(&priv->nvram_attr);
-	priv->nvram_attr.attr.name = "nvram";
-	priv->nvram_attr.attr.mode = S_IRUGO | S_IWUSR;
-	priv->nvram_attr.read = rp5c01_nvram_read;
-	priv->nvram_attr.write = rp5c01_nvram_write;
-	priv->nvram_attr.size = RP5C01_MODE;
-
 	spin_lock_init(&priv->lock);
 
 	platform_set_drvdata(dev, priv);
 
-	rtc = devm_rtc_device_register(&dev->dev, "rtc-rp5c01", &rp5c01_rtc_ops,
-				  THIS_MODULE);
+	rtc = devm_rtc_allocate_device(&dev->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
+
+	rtc->ops = &rp5c01_rtc_ops;
+	rtc->nvram_old_abi = true;
+
 	priv->rtc = rtc;
 
-	error = sysfs_create_bin_file(&dev->dev.kobj, &priv->nvram_attr);
+	nvmem_cfg.priv = priv;
+	error = rtc_nvmem_register(rtc, &nvmem_cfg);
 	if (error)
 		return error;
 
-	return 0;
-}
-
-static int __exit rp5c01_rtc_remove(struct platform_device *dev)
-{
-	struct rp5c01_priv *priv = platform_get_drvdata(dev);
-
-	sysfs_remove_bin_file(&dev->dev.kobj, &priv->nvram_attr);
-	return 0;
+	return rtc_register_device(rtc);
 }
 
 static struct platform_driver rp5c01_rtc_driver = {
 	.driver	= {
 		.name	= "rtc-rp5c01",
 	},
-	.remove	= __exit_p(rp5c01_rtc_remove),
 };
 
 module_platform_driver_probe(rp5c01_rtc_driver, rp5c01_rtc_probe);
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index 9a306983aaba..f2de8b17e7e3 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -135,11 +135,6 @@ rs5c348_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	tm->tm_year = bcd2bin(rxbuf[RS5C348_REG_YEAR]) +
 		((rxbuf[RS5C348_REG_MONTH] & RS5C348_BIT_Y2K) ? 100 : 0);
 
-	if (rtc_valid_tm(tm) < 0) {
-		dev_err(&spi->dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, tm);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index d4eff8d7131f..c5038329058c 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -207,8 +207,9 @@ static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
 	return bin2bcd(hour);
 }
 
-static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct rs5c372	*rs5c = i2c_get_clientdata(client);
 	int		status = rs5c_get_regs(rs5c);
 
@@ -234,12 +235,12 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	/* rtc might need initialization */
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct rs5c372	*rs5c = i2c_get_clientdata(client);
 	unsigned char	buf[7];
 	int		addr;
@@ -305,17 +306,6 @@ static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
 }
 #endif
 
-static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return rs5c372_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return rs5c372_set_datetime(to_i2c_client(dev), tm);
-}
-
-
 static int rs5c_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client	*client = to_i2c_client(dev);
@@ -581,7 +571,6 @@ static int rs5c372_probe(struct i2c_client *client,
 	int err = 0;
 	int smbus_mode = 0;
 	struct rs5c372 *rs5c372;
-	struct rtc_time tm;
 
 	dev_dbg(&client->dev, "%s\n", __func__);
 
@@ -662,9 +651,6 @@ static int rs5c372_probe(struct i2c_client *client,
 		goto exit;
 	}
 
-	if (rs5c372_get_datetime(client, &tm) < 0)
-		dev_warn(&client->dev, "clock needs to be set\n");
-
 	dev_info(&client->dev, "%s found, %s\n",
 			({ char *s; switch (rs5c372->type) {
 			case rtc_r2025sd:	s = "r2025sd"; break;
diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index aae2576741a6..29fc3d210392 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -68,7 +68,6 @@ struct rv8803_data {
 	struct mutex flags_lock;
 	u8 ctrl;
 	enum rv8803_type type;
-	struct nvmem_config nvmem_cfg;
 };
 
 static int rv8803_read_reg(const struct i2c_client *client, u8 reg)
@@ -528,6 +527,15 @@ static int rv8803_probe(struct i2c_client *client,
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
 	struct rv8803_data *rv8803;
 	int err, flags;
+	struct nvmem_config nvmem_cfg = {
+		.name = "rv8803_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = 1,
+		.reg_read = rv8803_nvram_read,
+		.reg_write = rv8803_nvram_write,
+		.priv = client,
+	};
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
 				     I2C_FUNC_SMBUS_I2C_BLOCK)) {
@@ -582,21 +590,6 @@ static int rv8803_probe(struct i2c_client *client,
 		}
 	}
 
-	rv8803->nvmem_cfg.name = "rv8803_nvram",
-	rv8803->nvmem_cfg.word_size = 1,
-	rv8803->nvmem_cfg.stride = 1,
-	rv8803->nvmem_cfg.size = 1,
-	rv8803->nvmem_cfg.reg_read = rv8803_nvram_read,
-	rv8803->nvmem_cfg.reg_write = rv8803_nvram_write,
-	rv8803->nvmem_cfg.priv = client;
-
-	rv8803->rtc->ops = &rv8803_rtc_ops;
-	rv8803->rtc->nvmem_config = &rv8803->nvmem_cfg;
-	rv8803->rtc->nvram_old_abi = true;
-	err = rtc_register_device(rv8803->rtc);
-	if (err)
-		return err;
-
 	err = rv8803_write_reg(rv8803->client, RV8803_EXT, RV8803_EXT_WADA);
 	if (err)
 		return err;
@@ -607,6 +600,14 @@ static int rv8803_probe(struct i2c_client *client,
 		return err;
 	}
 
+	rv8803->rtc->ops = &rv8803_rtc_ops;
+	rv8803->rtc->nvram_old_abi = true;
+	err = rtc_register_device(rv8803->rtc);
+	if (err)
+		return err;
+
+	rtc_nvmem_register(rv8803->rtc, &nvmem_cfg);
+
 	rv8803->rtc->max_user_freq = 1;
 
 	return 0;
diff --git a/drivers/rtc/rtc-rx4581.c b/drivers/rtc/rtc-rx4581.c
index de3fe4f8d133..c59a218bdd87 100644
--- a/drivers/rtc/rtc-rx4581.c
+++ b/drivers/rtc/rtc-rx4581.c
@@ -172,11 +172,7 @@ static int rx4581_get_datetime(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	err = rtc_valid_tm(tm);
-	if (err < 0)
-		dev_err(dev, "retrieved date/time is not valid.\n");
-
-	return err;
+	return 0;
 }
 
 static int rx4581_set_datetime(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c
index 7c9c08eab5e5..8e322d884cc2 100644
--- a/drivers/rtc/rtc-rx6110.c
+++ b/drivers/rtc/rtc-rx6110.c
@@ -252,7 +252,7 @@ static int rx6110_get_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct reg_sequence rx6110_default_regs[] = {
diff --git a/drivers/rtc/rtc-rx8010.c b/drivers/rtc/rtc-rx8010.c
index 5c5938ab3d86..7ddc22eb5b0f 100644
--- a/drivers/rtc/rtc-rx8010.c
+++ b/drivers/rtc/rtc-rx8010.c
@@ -138,7 +138,7 @@ static int rx8010_get_time(struct device *dev, struct rtc_time *dt)
 	dt->tm_year = bcd2bin(date[RX8010_YEAR - RX8010_SEC]) + 100;
 	dt->tm_wday = ffs(date[RX8010_WDAY - RX8010_SEC] & 0x7f);
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int rx8010_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 91857d8d2df8..41127adf5765 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -214,7 +214,7 @@ static int rx8025_get_time(struct device *dev, struct rtc_time *dt)
 		dt->tm_sec, dt->tm_min, dt->tm_hour,
 		dt->tm_mday, dt->tm_mon, dt->tm_year);
 
-	return rtc_valid_tm(dt);
+	return 0;
 }
 
 static int rx8025_set_time(struct device *dev, struct rtc_time *dt)
diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c
index 9998d7937688..32caadf912ca 100644
--- a/drivers/rtc/rtc-rx8581.c
+++ b/drivers/rtc/rtc-rx8581.c
@@ -164,11 +164,7 @@ static int rx8581_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
 
-	err = rtc_valid_tm(tm);
-	if (err < 0)
-		dev_err(&client->dev, "retrieved date/time is not valid.\n");
-
-	return err;
+	return 0;
 }
 
 static int rx8581_set_datetime(struct i2c_client *client, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 7067bca5c20d..77feb603cd4c 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -210,8 +210,9 @@ static int s35390a_reg2hr(struct s35390a *s35390a, char reg)
 	return hour;
 }
 
-static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a	*s35390a = i2c_get_clientdata(client);
 	int i, err;
 	char buf[7], status;
@@ -241,8 +242,9 @@ static int s35390a_set_datetime(struct i2c_client *client, struct rtc_time *tm)
 	return err;
 }
 
-static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
+static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a *s35390a = i2c_get_clientdata(client);
 	char buf[7], status;
 	int i, err;
@@ -271,11 +273,12 @@ static int s35390a_get_datetime(struct i2c_client *client, struct rtc_time *tm)
 		tm->tm_min, tm->tm_hour, tm->tm_mday, tm->tm_mon, tm->tm_year,
 		tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
-static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
+static int s35390a_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a *s35390a = i2c_get_clientdata(client);
 	char buf[3], sts = 0;
 	int err, i;
@@ -329,8 +332,9 @@ static int s35390a_set_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
 	return err;
 }
 
-static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
+static int s35390a_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a *s35390a = i2c_get_clientdata(client);
 	char buf[3], sts;
 	int i, err;
@@ -384,26 +388,6 @@ static int s35390a_read_alarm(struct i2c_client *client, struct rtc_wkalrm *alm)
 	return 0;
 }
 
-static int s35390a_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	return s35390a_read_alarm(to_i2c_client(dev), alm);
-}
-
-static int s35390a_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	return s35390a_set_alarm(to_i2c_client(dev), alm);
-}
-
-static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	return s35390a_get_datetime(to_i2c_client(dev), tm);
-}
-
-static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	return s35390a_set_datetime(to_i2c_client(dev), tm);
-}
-
 static int s35390a_rtc_ioctl(struct device *dev, unsigned int cmd,
 			     unsigned long arg)
 {
@@ -450,7 +434,6 @@ static int s35390a_probe(struct i2c_client *client,
 	int err, err_read;
 	unsigned int i;
 	struct s35390a *s35390a;
-	struct rtc_time tm;
 	char buf, status1;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
@@ -508,9 +491,6 @@ static int s35390a_probe(struct i2c_client *client,
 		}
 	}
 
-	if (err_read > 0 || s35390a_get_datetime(client, &tm) < 0)
-		dev_warn(&client->dev, "clock needs to be set\n");
-
 	device_set_wakeup_capable(&client->dev, 1);
 
 	s35390a->rtc = devm_rtc_device_register(&client->dev,
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index a8992c227f61..75c8c5033e08 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -232,7 +232,7 @@ retry_get_time:
 
 	rtc_tm->tm_mon -= 1;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int s3c_rtc_settime(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 0477678d968f..8428455432ca 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -38,6 +38,19 @@
  */
 #define UDR_READ_RETRY_CNT	5
 
+enum {
+	RTC_SEC = 0,
+	RTC_MIN,
+	RTC_HOUR,
+	RTC_WEEKDAY,
+	RTC_DATE,
+	RTC_MONTH,
+	RTC_YEAR1,
+	RTC_YEAR2,
+	/* Make sure this is always the last enum name. */
+	RTC_MAX_NUM_TIME_REGS
+};
+
 /*
  * Registers used by the driver which are different between chipsets.
  *
@@ -367,7 +380,7 @@ static void s5m8763_tm_to_data(struct rtc_time *tm, u8 *data)
 static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret;
 
 	if (info->regs->read_time_udr_mask) {
@@ -407,13 +420,13 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret = 0;
 
 	switch (info->device_type) {
@@ -450,7 +463,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	unsigned int val;
 	int ret, i;
 
@@ -500,7 +513,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 {
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret, i;
 	struct rtc_time tm;
 
@@ -545,7 +558,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 {
 	int ret;
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	u8 alarm0_conf;
 	struct rtc_time tm;
 
@@ -598,7 +611,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct s5m_rtc_info *info = dev_get_drvdata(dev);
-	u8 data[info->regs->regs_count];
+	u8 data[RTC_MAX_NUM_TIME_REGS];
 	int ret;
 
 	switch (info->device_type) {
diff --git a/drivers/rtc/rtc-sc27xx.c b/drivers/rtc/rtc-sc27xx.c
index d544d5268757..00d87d138984 100644
--- a/drivers/rtc/rtc-sc27xx.c
+++ b/drivers/rtc/rtc-sc27xx.c
@@ -376,7 +376,7 @@ static int sprd_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		return ret;
 
 	rtc_time64_to_tm(secs, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int sprd_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 6c2d3989f967..4e8ab370ce63 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -414,7 +414,7 @@ static int sh_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		tm->tm_sec, tm->tm_min, tm->tm_hour,
 		tm->tm_mday, tm->tm_mon + 1, tm->tm_year, tm->tm_wday);
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int sh_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-sirfsoc.c b/drivers/rtc/rtc-sirfsoc.c
index 7367f617145c..2a9e151cae99 100644
--- a/drivers/rtc/rtc-sirfsoc.c
+++ b/drivers/rtc/rtc-sirfsoc.c
@@ -204,23 +204,6 @@ static int sirfsoc_rtc_set_time(struct device *dev,
 	return 0;
 }
 
-static int sirfsoc_rtc_ioctl(struct device *dev, unsigned int cmd,
-		unsigned long arg)
-{
-	switch (cmd) {
-	case RTC_PIE_ON:
-	case RTC_PIE_OFF:
-	case RTC_UIE_ON:
-	case RTC_UIE_OFF:
-	case RTC_AIE_ON:
-	case RTC_AIE_OFF:
-		return 0;
-
-	default:
-		return -ENOIOCTLCMD;
-	}
-}
-
 static int sirfsoc_rtc_alarm_irq_enable(struct device *dev,
 		unsigned int enabled)
 {
@@ -250,7 +233,6 @@ static const struct rtc_class_ops sirfsoc_rtc_ops = {
 	.set_time = sirfsoc_rtc_set_time,
 	.read_alarm = sirfsoc_rtc_read_alarm,
 	.set_alarm = sirfsoc_rtc_set_alarm,
-	.ioctl = sirfsoc_rtc_ioctl,
 	.alarm_irq_enable = sirfsoc_rtc_alarm_irq_enable
 };
 
diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
index d8ef9e052c4f..9af591d5223c 100644
--- a/drivers/rtc/rtc-snvs.c
+++ b/drivers/rtc/rtc-snvs.c
@@ -132,20 +132,23 @@ static int snvs_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct snvs_rtc_data *data = dev_get_drvdata(dev);
 	unsigned long time;
+	int ret;
 
 	rtc_tm_to_time(tm, &time);
 
 	/* Disable RTC first */
-	snvs_rtc_enable(data, false);
+	ret = snvs_rtc_enable(data, false);
+	if (ret)
+		return ret;
 
 	/* Write 32-bit time to 47-bit timer, leaving 15 LSBs blank */
 	regmap_write(data->regmap, data->offset + SNVS_LPSRTCLR, time << CNTR_TO_SECS_SH);
 	regmap_write(data->regmap, data->offset + SNVS_LPSRTCMR, time >> (32 - CNTR_TO_SECS_SH));
 
 	/* Enable RTC again */
-	snvs_rtc_enable(data, true);
+	ret = snvs_rtc_enable(data, true);
 
-	return 0;
+	return ret;
 }
 
 static int snvs_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -288,7 +291,11 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 	regmap_write(data->regmap, data->offset + SNVS_LPSR, 0xffffffff);
 
 	/* Enable RTC */
-	snvs_rtc_enable(data, true);
+	ret = snvs_rtc_enable(data, true);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable rtc %d\n", ret);
+		goto error_rtc_device_register;
+	}
 
 	device_init_wakeup(&pdev->dev, true);
 
diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index e377f42abae7..0567944fd4f8 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c
@@ -170,18 +170,14 @@ static irqreturn_t spear_rtc_irq(int irq, void *dev_id)
 
 }
 
-static int tm2bcd(struct rtc_time *tm)
+static void tm2bcd(struct rtc_time *tm)
 {
-	if (rtc_valid_tm(tm) != 0)
-		return -EINVAL;
 	tm->tm_sec = bin2bcd(tm->tm_sec);
 	tm->tm_min = bin2bcd(tm->tm_min);
 	tm->tm_hour = bin2bcd(tm->tm_hour);
 	tm->tm_mday = bin2bcd(tm->tm_mday);
 	tm->tm_mon = bin2bcd(tm->tm_mon + 1);
 	tm->tm_year = bin2bcd(tm->tm_year);
-
-	return 0;
 }
 
 static void bcd2tm(struct rtc_time *tm)
@@ -237,8 +233,7 @@ static int spear_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	struct spear_rtc_config *config = dev_get_drvdata(dev);
 	unsigned int time, date;
 
-	if (tm2bcd(tm) < 0)
-		return -EINVAL;
+	tm2bcd(tm);
 
 	rtc_wait_not_busy(config);
 	time = (tm->tm_sec << SECOND_SHIFT) | (tm->tm_min << MINUTE_SHIFT) |
@@ -295,8 +290,7 @@ static int spear_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 	unsigned int time, date;
 	int err;
 
-	if (tm2bcd(&alm->time) < 0)
-		return -EINVAL;
+	tm2bcd(&alm->time);
 
 	rtc_wait_not_busy(config);
 
diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c
index 82b0af159a28..d5222667f892 100644
--- a/drivers/rtc/rtc-st-lpc.c
+++ b/drivers/rtc/rtc-st-lpc.c
@@ -195,7 +195,6 @@ static int st_rtc_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	struct st_rtc *rtc;
 	struct resource *res;
-	struct rtc_time tm_check;
 	uint32_t mode;
 	int ret = 0;
 
@@ -254,21 +253,6 @@ static int st_rtc_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, rtc);
 
-	/*
-	 * The RTC-LPC is able to manage date.year > 2038
-	 * but currently the kernel can not manage this date!
-	 * If the RTC-LPC has a date.year > 2038 then
-	 * it's set to the epoch "Jan 1st 2000"
-	 */
-	st_rtc_read_time(&pdev->dev, &tm_check);
-
-	if (tm_check.tm_year >=  (2038 - 1900)) {
-		memset(&tm_check, 0, sizeof(tm_check));
-		tm_check.tm_year = 100;
-		tm_check.tm_mday = 1;
-		st_rtc_set_time(&pdev->dev, &tm_check);
-	}
-
 	rtc->rtc_dev = rtc_device_register("st-lpc-rtc", &pdev->dev,
 					   &st_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc->rtc_dev)) {
diff --git a/drivers/rtc/rtc-starfire.c b/drivers/rtc/rtc-starfire.c
index 7fc36973fa33..a7d49329d626 100644
--- a/drivers/rtc/rtc-starfire.c
+++ b/drivers/rtc/rtc-starfire.c
@@ -28,7 +28,7 @@ static u32 starfire_get_time(void)
 static int starfire_read_time(struct device *dev, struct rtc_time *tm)
 {
 	rtc_time_to_tm(starfire_get_time(), tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static const struct rtc_class_ops starfire_rtc_ops = {
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index a456cb6177ea..e70b78d17a98 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -129,10 +129,6 @@ static int stk17ta8_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* year is 1900 + tm->tm_year */
 	tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-	if (rtc_valid_tm(tm) < 0) {
-		dev_err(dev, "retrieved date/time is not valid.\n");
-		rtc_time_to_tm(0, tm);
-	}
 	return 0;
 }
 
@@ -242,46 +238,30 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = {
 	.alarm_irq_enable	= stk17ta8_rtc_alarm_irq_enable,
 };
 
-static ssize_t stk17ta8_nvram_read(struct file *filp, struct kobject *kobj,
-				 struct bin_attribute *attr, char *buf,
-				 loff_t pos, size_t size)
+static int stk17ta8_nvram_read(void *priv, unsigned int pos, void *val,
+			       size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		*buf++ = readb(ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static ssize_t stk17ta8_nvram_write(struct file *filp, struct kobject *kobj,
-				  struct bin_attribute *attr, char *buf,
-				  loff_t pos, size_t size)
+static int stk17ta8_nvram_write(void *priv, unsigned int pos, void *val,
+				size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct platform_device *pdev = to_platform_device(dev);
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
+	struct rtc_plat_data *pdata = priv;
 	void __iomem *ioaddr = pdata->ioaddr;
-	ssize_t count;
+	u8 *buf = val;
 
-	for (count = 0; count < size; count++)
+	for (; bytes; bytes--)
 		writeb(*buf++, ioaddr + pos++);
-	return count;
+	return 0;
 }
 
-static struct bin_attribute stk17ta8_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = RTC_OFFSET,
-	.read = stk17ta8_nvram_read,
-	.write = stk17ta8_nvram_write,
-};
-
 static int stk17ta8_rtc_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -290,6 +270,14 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
 	struct rtc_plat_data *pdata;
 	void __iomem *ioaddr;
 	int ret = 0;
+	struct nvmem_config nvmem_cfg = {
+		.name = "stk17ta8_nvram",
+		.word_size = 1,
+		.stride = 1,
+		.size = RTC_OFFSET,
+		.reg_read = stk17ta8_nvram_read,
+		.reg_write = stk17ta8_nvram_write,
+	};
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -328,24 +316,19 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
 		}
 	}
 
-	pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &stk17ta8_rtc_ops, THIS_MODULE);
+	pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(pdata->rtc))
 		return PTR_ERR(pdata->rtc);
 
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
+	pdata->rtc->ops = &stk17ta8_rtc_ops;
+	pdata->rtc->nvram_old_abi = true;
 
-	return ret;
-}
+	nvmem_cfg.priv = pdata;
+	ret = rtc_nvmem_register(pdata->rtc, &nvmem_cfg);
+	if (ret)
+		return ret;
 
-static int stk17ta8_rtc_remove(struct platform_device *pdev)
-{
-	struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-
-	sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
-	if (pdata->irq > 0)
-		writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
-	return 0;
+	return rtc_register_device(pdata->rtc);
 }
 
 /* work with hotplug and coldplug */
@@ -353,7 +336,6 @@ MODULE_ALIAS("platform:stk17ta8");
 
 static struct platform_driver stk17ta8_rtc_driver = {
 	.probe		= stk17ta8_rtc_probe,
-	.remove		= stk17ta8_rtc_remove,
 	.driver		= {
 		.name	= "stk17ta8",
 	},
diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c
index 5bc28eed1adf..2e6fb275acc8 100644
--- a/drivers/rtc/rtc-sun6i.c
+++ b/drivers/rtc/rtc-sun6i.c
@@ -349,7 +349,7 @@ static int sun6i_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	 */
 	rtc_tm->tm_year += SUN6I_YEAR_OFF;
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int sun6i_rtc_getalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
diff --git a/drivers/rtc/rtc-sunxi.c b/drivers/rtc/rtc-sunxi.c
index abada609ddc7..dadbf8b324ad 100644
--- a/drivers/rtc/rtc-sunxi.c
+++ b/drivers/rtc/rtc-sunxi.c
@@ -261,7 +261,7 @@ static int sunxi_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 	 */
 	rtc_tm->tm_year += SUNXI_YEAR_OFF(chip->data_year);
 
-	return rtc_valid_tm(rtc_tm);
+	return 0;
 }
 
 static int sunxi_rtc_setalarm(struct device *dev, struct rtc_wkalrm *wkalrm)
diff --git a/drivers/rtc/rtc-sysfs.c b/drivers/rtc/rtc-sysfs.c
index 92ff2edb86a6..454da38c6012 100644
--- a/drivers/rtc/rtc-sysfs.c
+++ b/drivers/rtc/rtc-sysfs.c
@@ -248,6 +248,14 @@ offset_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(offset);
 
+static ssize_t
+range_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "[%lld,%llu]\n", to_rtc_device(dev)->range_min,
+		       to_rtc_device(dev)->range_max);
+}
+static DEVICE_ATTR_RO(range);
+
 static struct attribute *rtc_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_date.attr,
@@ -257,6 +265,7 @@ static struct attribute *rtc_attrs[] = {
 	&dev_attr_hctosys.attr,
 	&dev_attr_wakealarm.attr,
 	&dev_attr_offset.attr,
+	&dev_attr_range.attr,
 	NULL,
 };
 
@@ -286,6 +295,9 @@ static umode_t rtc_attr_is_visible(struct kobject *kobj,
 	} else if (attr == &dev_attr_offset.attr) {
 		if (!rtc->ops->set_offset)
 			mode = 0;
+	} else if (attr == &dev_attr_range.attr) {
+		if (!(rtc->range_max - rtc->range_min))
+			mode = 0;
 	}
 
 	return mode;
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index d30d57b048d3..66efff60c4d5 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -144,10 +144,6 @@ static int tegra_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	int ret;
 
 	/* convert tm to seconds. */
-	ret = rtc_valid_tm(tm);
-	if (ret)
-		return ret;
-
 	rtc_tm_to_time(tm, &sec);
 
 	dev_vdbg(dev, "time set to %lu. %d/%d/%d %d:%02u:%02u\n",
diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c
index a3418a8a3796..d7785ae0a2b4 100644
--- a/drivers/rtc/rtc-tps6586x.c
+++ b/drivers/rtc/rtc-tps6586x.c
@@ -90,7 +90,7 @@ static int tps6586x_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	seconds = ticks >> 10;
 	seconds += rtc->epoch_start;
 	rtc_time_to_tm(seconds, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int tps6586x_rtc_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index 560d9a5e0225..08dbefc79520 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -14,7 +14,30 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
-#include <asm/txx9/tx4939.h>
+
+#define TX4939_RTCCTL_ALME	0x00000080
+#define TX4939_RTCCTL_ALMD	0x00000040
+#define TX4939_RTCCTL_BUSY	0x00000020
+
+#define TX4939_RTCCTL_COMMAND	0x00000007
+#define TX4939_RTCCTL_COMMAND_NOP	0x00000000
+#define TX4939_RTCCTL_COMMAND_GETTIME	0x00000001
+#define TX4939_RTCCTL_COMMAND_SETTIME	0x00000002
+#define TX4939_RTCCTL_COMMAND_GETALARM	0x00000003
+#define TX4939_RTCCTL_COMMAND_SETALARM	0x00000004
+
+#define TX4939_RTCTBC_PM	0x00000080
+#define TX4939_RTCTBC_COMP	0x0000007f
+
+#define TX4939_RTC_REG_RAMSIZE	0x00000100
+#define TX4939_RTC_REG_RWBSIZE	0x00000006
+
+struct tx4939_rtc_reg {
+	__u32 ctl;
+	__u32 adr;
+	__u32 dat;
+	__u32 tbc;
+};
 
 struct tx4939rtc_plat_data {
 	struct rtc_device *rtc;
@@ -86,9 +109,10 @@ static int tx4939_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	for (i = 2; i < 6; i++)
 		buf[i] = __raw_readl(&rtcreg->dat);
 	spin_unlock_irq(&pdata->lock);
-	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+		(buf[3] << 8) | buf[2];
 	rtc_time_to_tm(sec, tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int tx4939_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
@@ -147,7 +171,8 @@ static int tx4939_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	alrm->enabled = (ctl & TX4939_RTCCTL_ALME) ? 1 : 0;
 	alrm->pending = (ctl & TX4939_RTCCTL_ALMD) ? 1 : 0;
 	spin_unlock_irq(&pdata->lock);
-	sec = (buf[5] << 24) | (buf[4] << 16) | (buf[3] << 8) | buf[2];
+	sec = ((unsigned long)buf[5] << 24) | (buf[4] << 16) |
+		(buf[3] << 8) | buf[2];
 	rtc_time_to_tm(sec, &alrm->time);
 	return rtc_valid_tm(&alrm->time);
 }
@@ -189,58 +214,52 @@ static const struct rtc_class_ops tx4939_rtc_ops = {
 	.alarm_irq_enable	= tx4939_rtc_alarm_irq_enable,
 };
 
-static ssize_t tx4939_rtc_nvram_read(struct file *filp, struct kobject *kobj,
-				     struct bin_attribute *bin_attr,
-				     char *buf, loff_t pos, size_t size)
+static int tx4939_nvram_read(void *priv, unsigned int pos, void *val,
+			     size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939rtc_plat_data *pdata = priv;
 	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
-	ssize_t count;
+	u8 *buf = val;
 
 	spin_lock_irq(&pdata->lock);
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		__raw_writel(pos++, &rtcreg->adr);
 		*buf++ = __raw_readl(&rtcreg->dat);
 	}
 	spin_unlock_irq(&pdata->lock);
-	return count;
+	return 0;
 }
 
-static ssize_t tx4939_rtc_nvram_write(struct file *filp, struct kobject *kobj,
-				      struct bin_attribute *bin_attr,
-				      char *buf, loff_t pos, size_t size)
+static int tx4939_nvram_write(void *priv, unsigned int pos, void *val,
+			      size_t bytes)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct tx4939rtc_plat_data *pdata = get_tx4939rtc_plat_data(dev);
+	struct tx4939rtc_plat_data *pdata = priv;
 	struct tx4939_rtc_reg __iomem *rtcreg = pdata->rtcreg;
-	ssize_t count;
+	u8 *buf = val;
 
 	spin_lock_irq(&pdata->lock);
-	for (count = 0; count < size; count++) {
+	for (; bytes; bytes--) {
 		__raw_writel(pos++, &rtcreg->adr);
 		__raw_writel(*buf++, &rtcreg->dat);
 	}
 	spin_unlock_irq(&pdata->lock);
-	return count;
+	return 0;
 }
 
-static struct bin_attribute tx4939_rtc_nvram_attr = {
-	.attr = {
-		.name = "nvram",
-		.mode = S_IRUGO | S_IWUSR,
-	},
-	.size = TX4939_RTC_REG_RAMSIZE,
-	.read = tx4939_rtc_nvram_read,
-	.write = tx4939_rtc_nvram_write,
-};
-
 static int __init tx4939_rtc_probe(struct platform_device *pdev)
 {
 	struct rtc_device *rtc;
 	struct tx4939rtc_plat_data *pdata;
 	struct resource *res;
 	int irq, ret;
+	struct nvmem_config nvmem_cfg = {
+		.name = "rv8803_nvram",
+		.word_size = 4,
+		.stride = 4,
+		.size = TX4939_RTC_REG_RAMSIZE,
+		.reg_read = tx4939_nvram_read,
+		.reg_write = tx4939_nvram_write,
+	};
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -260,21 +279,27 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
 	if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt,
 			     0, pdev->name, &pdev->dev) < 0)
 		return -EBUSY;
-	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-				  &tx4939_rtc_ops, THIS_MODULE);
+	rtc = devm_rtc_allocate_device(&pdev->dev);
 	if (IS_ERR(rtc))
 		return PTR_ERR(rtc);
+
+	rtc->ops = &tx4939_rtc_ops;
+	rtc->nvram_old_abi = true;
+
 	pdata->rtc = rtc;
-	ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
 
-	return ret;
+	nvmem_cfg.priv = pdata;
+	ret = rtc_nvmem_register(rtc, &nvmem_cfg);
+	if (ret)
+		return ret;
+
+	return rtc_register_device(rtc);
 }
 
 static int __exit tx4939_rtc_remove(struct platform_device *pdev)
 {
 	struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev);
 
-	sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
 	spin_lock_irq(&pdata->lock);
 	tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
 	spin_unlock_irq(&pdata->lock);
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 75aea4c4d334..7b824dabf104 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -156,7 +156,7 @@ static int wm831x_rtc_readtime(struct device *dev, struct rtc_time *tm)
 			u32 time = (time1[0] << 16) | time1[1];
 
 			rtc_time_to_tm(time, tm);
-			return rtc_valid_tm(tm);
+			return 0;
 		}
 
 	} while (++count < WM831X_GET_TIME_RETRIES);
diff --git a/drivers/rtc/rtc-xgene.c b/drivers/rtc/rtc-xgene.c
index 0c34d3b81279..153820876a82 100644
--- a/drivers/rtc/rtc-xgene.c
+++ b/drivers/rtc/rtc-xgene.c
@@ -60,7 +60,7 @@ static int xgene_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	struct xgene_rtc_dev *pdata = dev_get_drvdata(dev);
 
 	rtc_time_to_tm(readl(pdata->csr_base + RTC_CCVR), tm);
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int xgene_rtc_set_mmss(struct device *dev, unsigned long secs)
diff --git a/drivers/rtc/rtc-zynqmp.c b/drivers/rtc/rtc-zynqmp.c
index da18a8ae3c1d..fba994dc31eb 100644
--- a/drivers/rtc/rtc-zynqmp.c
+++ b/drivers/rtc/rtc-zynqmp.c
@@ -122,7 +122,7 @@ static int xlnx_rtc_read_time(struct device *dev, struct rtc_time *tm)
 		rtc_time64_to_tm(read_time, tm);
 	}
 
-	return rtc_valid_tm(tm);
+	return 0;
 }
 
 static int xlnx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c
index 0c177647ea6c..718293d72426 100644
--- a/drivers/rtc/systohc.c
+++ b/drivers/rtc/systohc.c
@@ -20,7 +20,7 @@
  * cases.
  *
  * -EPROTO is returned if now.tv_nsec is not close enough to *target_nsec.
- (
+ *
  * If temporary failure is indicated the caller should try again 'soon'
  */
 int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec)
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 1444333210c7..9ac7574e3cfb 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -15,8 +15,8 @@ config BLK_DEV_XPRAM
 
 config DCSSBLK
 	def_tristate m
-	select DAX
 	select FS_DAX_LIMITED
+	select DAX_DRIVER
 	prompt "DCSSBLK support"
 	depends on S390 && BLOCK
 	help
diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index a993d19fa562..5c4535b545cc 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -37,7 +37,7 @@ config QCOM_PM
 
 config QCOM_QMI_HELPERS
 	tristate
-	depends on ARCH_QCOM
+	depends on ARCH_QCOM && NET
 	help
 	  Helper library for handling QMI encoded messages.  QMI encoded
 	  messages are used in communication between the majority of QRTR
diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c
index 08bd8549242a..17b314d9a148 100644
--- a/drivers/soc/qcom/mdt_loader.c
+++ b/drivers/soc/qcom/mdt_loader.c
@@ -83,12 +83,14 @@ EXPORT_SYMBOL_GPL(qcom_mdt_get_size);
  * @mem_region:	allocated memory region to load firmware into
  * @mem_phys:	physical address of allocated memory region
  * @mem_size:	size of the allocated memory region
+ * @reloc_base:	adjusted physical address after relocation
  *
  * Returns 0 on success, negative errno otherwise.
  */
 int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 		  const char *firmware, int pas_id, void *mem_region,
-		  phys_addr_t mem_phys, size_t mem_size)
+		  phys_addr_t mem_phys, size_t mem_size,
+		  phys_addr_t *reloc_base)
 {
 	const struct elf32_phdr *phdrs;
 	const struct elf32_phdr *phdr;
@@ -192,6 +194,9 @@ int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 			memset(ptr + phdr->p_filesz, 0, phdr->p_memsz - phdr->p_filesz);
 	}
 
+	if (reloc_base)
+		*reloc_base = mem_reloc;
+
 out:
 	kfree(fw_name);
 
diff --git a/drivers/staging/lustre/lustre/llite/glimpse.c b/drivers/staging/lustre/lustre/llite/glimpse.c
index c43ac574274c..3075358f3f08 100644
--- a/drivers/staging/lustre/lustre/llite/glimpse.c
+++ b/drivers/staging/lustre/lustre/llite/glimpse.c
@@ -69,7 +69,7 @@ blkcnt_t dirty_cnt(struct inode *inode)
 	void	      *results[1];
 
 	if (inode->i_mapping)
-		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->page_tree,
+		cnt += radix_tree_gang_lookup_tag(&inode->i_mapping->i_pages,
 						  results, 0, 1,
 						  PAGECACHE_TAG_DIRTY);
 	if (cnt == 0 && atomic_read(&vob->vob_mmap_cnt) > 0)
diff --git a/drivers/staging/lustre/lustre/mdc/mdc_request.c b/drivers/staging/lustre/lustre/mdc/mdc_request.c
index 3b1c8e5a3053..8ee7b4d273b2 100644
--- a/drivers/staging/lustre/lustre/mdc/mdc_request.c
+++ b/drivers/staging/lustre/lustre/mdc/mdc_request.c
@@ -934,14 +934,14 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
 	struct page *page;
 	int found;
 
-	spin_lock_irq(&mapping->tree_lock);
-	found = radix_tree_gang_lookup(&mapping->page_tree,
+	xa_lock_irq(&mapping->i_pages);
+	found = radix_tree_gang_lookup(&mapping->i_pages,
 				       (void **)&page, offset, 1);
 	if (found > 0 && !radix_tree_exceptional_entry(page)) {
 		struct lu_dirpage *dp;
 
 		get_page(page);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		/*
 		 * In contrast to find_lock_page() we are sure that directory
 		 * page cannot be truncated (while DLM lock is held) and,
@@ -989,7 +989,7 @@ static struct page *mdc_page_locate(struct address_space *mapping, __u64 *hash,
 			page = ERR_PTR(-EIO);
 		}
 	} else {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		page = NULL;
 	}
 	return page;
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
index 93753cb96180..512fa87fa11b 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
@@ -619,7 +619,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = gc0310_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config gc0310_controls[] = {
+static const struct v4l2_ctrl_config gc0310_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
index 834fba8c4fa0..44db9f9f1fc5 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
@@ -107,7 +107,7 @@ mt9m114_write_reg(struct i2c_client *client, u16 data_length, u16 reg, u32 val)
 	int num_msg;
 	struct i2c_msg msg;
 	unsigned char data[6] = {0};
-	u16 *wreg;
+	__be16 *wreg;
 	int retry = 0;
 
 	if (!client->adapter) {
@@ -130,18 +130,20 @@ again:
 	msg.buf = data;
 
 	/* high byte goes out first */
-	wreg = (u16 *)data;
+	wreg = (void *)data;
 	*wreg = cpu_to_be16(reg);
 
 	if (data_length == MISENSOR_8BIT) {
 		data[2] = (u8)(val);
 	} else if (data_length == MISENSOR_16BIT) {
-		u16 *wdata = (u16 *)&data[2];
-		*wdata = be16_to_cpu((u16)val);
+		u16 *wdata = (void *)&data[2];
+
+		*wdata = be16_to_cpu(*(__be16 *)&data[2]);
 	} else {
 		/* MISENSOR_32BIT */
-		u32 *wdata = (u32 *)&data[2];
-		*wdata = be32_to_cpu(val);
+		u32 *wdata = (void *)&data[2];
+
+		*wdata = be32_to_cpu(*(__be32 *)&data[2]);
 	}
 
 	num_msg = i2c_transfer(client->adapter, &msg, 1);
@@ -245,6 +247,7 @@ static int __mt9m114_flush_reg_array(struct i2c_client *client,
 	const int num_msg = 1;
 	int ret;
 	int retry = 0;
+	__be16 *data16 = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
@@ -253,7 +256,7 @@ again:
 	msg.addr = client->addr;
 	msg.flags = 0;
 	msg.len = 2 + ctrl->index;
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+	*data16 = cpu_to_be16(ctrl->buffer.addr);
 	msg.buf = (u8 *)&ctrl->buffer;
 
 	ret = i2c_transfer(client->adapter, &msg, num_msg);
@@ -282,8 +285,8 @@ static int __mt9m114_buf_reg_array(struct i2c_client *client,
 				   struct mt9m114_write_ctrl *ctrl,
 				   const struct misensor_reg *next)
 {
-	u16 *data16;
-	u32 *data32;
+	__be16 *data16;
+	__be32 *data32;
 	int err;
 
 	/* Insufficient buffer? Let's flush and get more free space. */
@@ -298,11 +301,11 @@ static int __mt9m114_buf_reg_array(struct i2c_client *client,
 		ctrl->buffer.data[ctrl->index] = (u8)next->val;
 		break;
 	case MISENSOR_16BIT:
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+		data16 = (__be16 *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	case MISENSOR_32BIT:
-		data32 = (u32 *)&ctrl->buffer.data[ctrl->index];
+		data32 = (__be32 *)&ctrl->buffer.data[ctrl->index];
 		*data32 = cpu_to_be32(next->val);
 		break;
 	default:
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
index 11412061c40e..c0849299d592 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
@@ -94,9 +94,9 @@ static int ov2680_read_reg(struct i2c_client *client,
 	if (data_length == OV2680_8BIT)
 		*val = (u8)data[0];
 	else if (data_length == OV2680_16BIT)
-		*val = be16_to_cpu(*(u16 *)&data[0]);
+		*val = be16_to_cpu(*(__be16 *)&data[0]);
 	else
-		*val = be32_to_cpu(*(u32 *)&data[0]);
+		*val = be32_to_cpu(*(__be32 *)&data[0]);
 	//dev_dbg(&client->dev,  "++++i2c read adr%x = %x\n", reg,*val);
 	return 0;
 }
@@ -121,7 +121,7 @@ static int ov2680_write_reg(struct i2c_client *client, u16 data_length,
 {
 	int ret;
 	unsigned char data[4] = {0};
-	u16 *wreg = (u16 *)data;
+	__be16 *wreg = (void *)data;
 	const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
 	if (data_length != OV2680_8BIT && data_length != OV2680_16BIT) {
@@ -137,7 +137,8 @@ static int ov2680_write_reg(struct i2c_client *client, u16 data_length,
 		data[2] = (u8)(val);
 	} else {
 		/* OV2680_16BIT */
-		u16 *wdata = (u16 *)&data[2];
+		__be16 *wdata = (void *)&data[2];
+
 		*wdata = cpu_to_be16(val);
 	}
 
@@ -169,12 +170,13 @@ static int __ov2680_flush_reg_array(struct i2c_client *client,
 				    struct ov2680_write_ctrl *ctrl)
 {
 	u16 size;
+	__be16 *data16 = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
 
 	size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+	*data16 = cpu_to_be16(ctrl->buffer.addr);
 	ctrl->index = 0;
 
 	return ov2680_i2c_write(client, size, (u8 *)&ctrl->buffer);
@@ -185,7 +187,7 @@ static int __ov2680_buf_reg_array(struct i2c_client *client,
 				  const struct ov2680_reg *next)
 {
 	int size;
-	u16 *data16;
+	__be16 *data16;
 
 	switch (next->type) {
 	case OV2680_8BIT:
@@ -194,7 +196,7 @@ static int __ov2680_buf_reg_array(struct i2c_client *client,
 		break;
 	case OV2680_16BIT:
 		size = 2;
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+		data16 = (void *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	default:
@@ -722,7 +724,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = ov2680_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov2680_controls[] = {
+static const struct v4l2_ctrl_config ov2680_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
index e59358ac89ce..a362eebd882f 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
@@ -85,9 +85,9 @@ static int ov2722_read_reg(struct i2c_client *client,
 	if (data_length == OV2722_8BIT)
 		*val = (u8)data[0];
 	else if (data_length == OV2722_16BIT)
-		*val = be16_to_cpu(*(u16 *)&data[0]);
+		*val = be16_to_cpu(*(__be16 *)&data[0]);
 	else
-		*val = be32_to_cpu(*(u32 *)&data[0]);
+		*val = be32_to_cpu(*(__be32 *)&data[0]);
 
 	return 0;
 }
@@ -112,7 +112,7 @@ static int ov2722_write_reg(struct i2c_client *client, u16 data_length,
 {
 	int ret;
 	unsigned char data[4] = {0};
-	u16 *wreg = (u16 *)data;
+	__be16 *wreg = (__be16 *)data;
 	const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
 	if (data_length != OV2722_8BIT && data_length != OV2722_16BIT) {
@@ -128,7 +128,8 @@ static int ov2722_write_reg(struct i2c_client *client, u16 data_length,
 		data[2] = (u8)(val);
 	} else {
 		/* OV2722_16BIT */
-		u16 *wdata = (u16 *)&data[2];
+		__be16 *wdata = (__be16 *)&data[2];
+
 		*wdata = cpu_to_be16(val);
 	}
 
@@ -160,12 +161,13 @@ static int __ov2722_flush_reg_array(struct i2c_client *client,
 				    struct ov2722_write_ctrl *ctrl)
 {
 	u16 size;
+	__be16 *data16 = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
 
 	size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+	*data16 = cpu_to_be16(ctrl->buffer.addr);
 	ctrl->index = 0;
 
 	return ov2722_i2c_write(client, size, (u8 *)&ctrl->buffer);
@@ -176,7 +178,7 @@ static int __ov2722_buf_reg_array(struct i2c_client *client,
 				  const struct ov2722_reg *next)
 {
 	int size;
-	u16 *data16;
+	__be16 *data16;
 
 	switch (next->type) {
 	case OV2722_8BIT:
@@ -185,7 +187,7 @@ static int __ov2722_buf_reg_array(struct i2c_client *client,
 		break;
 	case OV2722_16BIT:
 		size = 2;
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+		data16 = (void *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	default:
@@ -569,7 +571,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = ov2722_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov2722_controls[] = {
+static const struct v4l2_ctrl_config ov2722_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/gc0310.h b/drivers/staging/media/atomisp/i2c/gc0310.h
index af6b11f6e5e7..70c252c5163c 100644
--- a/drivers/staging/media/atomisp/i2c/gc0310.h
+++ b/drivers/staging/media/atomisp/i2c/gc0310.h
@@ -377,8 +377,7 @@ static struct gc0310_reg const gc0310_VGA_30fps[] = {
 	{GC0310_TOK_TERM, 0, 0},
 };
 
-
-struct gc0310_resolution gc0310_res_preview[] = {
+static struct gc0310_resolution gc0310_res_preview[] = {
 	{
 		.desc = "gc0310_VGA_30fps",
 		.width = 656, // 648,
diff --git a/drivers/staging/media/atomisp/i2c/ov2722.h b/drivers/staging/media/atomisp/i2c/ov2722.h
index 028b04aaaa8f..757b37613ccc 100644
--- a/drivers/staging/media/atomisp/i2c/ov2722.h
+++ b/drivers/staging/media/atomisp/i2c/ov2722.h
@@ -1096,7 +1096,7 @@ static struct ov2722_reg const ov2722_720p_30fps[] = {
 	{OV2722_TOK_TERM, 0, 0},
 };
 
-struct ov2722_resolution ov2722_res_preview[] = {
+static struct ov2722_resolution ov2722_res_preview[] = {
 	{
 		.desc = "ov2722_1632_1092_30fps",
 		.width = 1632,
diff --git a/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c b/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
index 30a735e59e54..714297c36b3e 100644
--- a/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
+++ b/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
@@ -173,9 +173,9 @@ static int ov5693_read_reg(struct i2c_client *client,
 	if (data_length == OV5693_8BIT)
 		*val = (u8)data[0];
 	else if (data_length == OV5693_16BIT)
-		*val = be16_to_cpu(*(u16 *)&data[0]);
+		*val = be16_to_cpu(*(__be16 *)&data[0]);
 	else
-		*val = be32_to_cpu(*(u32 *)&data[0]);
+		*val = be32_to_cpu(*(__be32 *)&data[0]);
 
 	return 0;
 }
@@ -200,13 +200,13 @@ static int vcm_dw_i2c_write(struct i2c_client *client, u16 data)
 	struct i2c_msg msg;
 	const int num_msg = 1;
 	int ret;
-	u16 val;
+	__be16 val;
 
 	val = cpu_to_be16(data);
 	msg.addr = VCM_ADDR;
 	msg.flags = 0;
 	msg.len = OV5693_16BIT;
-	msg.buf = (u8 *)&val;
+	msg.buf = (void *)&val;
 
 	ret = i2c_transfer(client->adapter, &msg, 1);
 
@@ -263,7 +263,7 @@ static int ov5693_write_reg(struct i2c_client *client, u16 data_length,
 {
 	int ret;
 	unsigned char data[4] = {0};
-	u16 *wreg = (u16 *)data;
+	__be16 *wreg = (void *)data;
 	const u16 len = data_length + sizeof(u16); /* 16-bit address + data */
 
 	if (data_length != OV5693_8BIT && data_length != OV5693_16BIT) {
@@ -279,7 +279,8 @@ static int ov5693_write_reg(struct i2c_client *client, u16 data_length,
 		data[2] = (u8)(val);
 	} else {
 		/* OV5693_16BIT */
-		u16 *wdata = (u16 *)&data[2];
+		__be16 *wdata = (void *)&data[2];
+
 		*wdata = cpu_to_be16(val);
 	}
 
@@ -311,15 +312,17 @@ static int __ov5693_flush_reg_array(struct i2c_client *client,
 				    struct ov5693_write_ctrl *ctrl)
 {
 	u16 size;
+	__be16 *reg = (void *)&ctrl->buffer.addr;
 
 	if (ctrl->index == 0)
 		return 0;
 
 	size = sizeof(u16) + ctrl->index; /* 16-bit address + data */
-	ctrl->buffer.addr = cpu_to_be16(ctrl->buffer.addr);
+
+	*reg = cpu_to_be16(ctrl->buffer.addr);
 	ctrl->index = 0;
 
-	return ov5693_i2c_write(client, size, (u8 *)&ctrl->buffer);
+	return ov5693_i2c_write(client, size, (u8 *)reg);
 }
 
 static int __ov5693_buf_reg_array(struct i2c_client *client,
@@ -327,7 +330,7 @@ static int __ov5693_buf_reg_array(struct i2c_client *client,
 				  const struct ov5693_reg *next)
 {
 	int size;
-	u16 *data16;
+	__be16 *data16;
 
 	switch (next->type) {
 	case OV5693_8BIT:
@@ -336,7 +339,8 @@ static int __ov5693_buf_reg_array(struct i2c_client *client,
 		break;
 	case OV5693_16BIT:
 		size = 2;
-		data16 = (u16 *)&ctrl->buffer.data[ctrl->index];
+
+		data16 = (void *)&ctrl->buffer.data[ctrl->index];
 		*data16 = cpu_to_be16((u16)next->val);
 		break;
 	default:
@@ -951,7 +955,7 @@ static int ad5823_t_focus_vcm(struct v4l2_subdev *sd, u16 val)
 	return ret;
 }
 
-int ad5823_t_focus_abs(struct v4l2_subdev *sd, s32 value)
+static int ad5823_t_focus_abs(struct v4l2_subdev *sd, s32 value)
 {
 	value = min(value, AD5823_MAX_FOCUS_POS);
 	return ad5823_t_focus_vcm(sd, value);
@@ -1132,7 +1136,7 @@ static const struct v4l2_ctrl_ops ctrl_ops = {
 	.g_volatile_ctrl = ov5693_g_volatile_ctrl
 };
 
-struct v4l2_ctrl_config ov5693_controls[] = {
+static const struct v4l2_ctrl_config ov5693_controls[] = {
 	{
 	 .ops = &ctrl_ops,
 	 .id = V4L2_CID_EXPOSURE_ABSOLUTE,
diff --git a/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h b/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h
index 6d27dd849a62..9058a82455a6 100644
--- a/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h
+++ b/drivers/staging/media/atomisp/i2c/ov5693/ov5693.h
@@ -1087,7 +1087,7 @@ static struct ov5693_reg const ov5693_2576x1936_30fps[] = {
 	{OV5693_TOK_TERM, 0, 0}
 };
 
-struct ov5693_resolution ov5693_res_preview[] = {
+static struct ov5693_resolution ov5693_res_preview[] = {
 	{
 		.desc = "ov5693_736x496_30fps",
 		.width = 736,
diff --git a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
index e0f0c379e7ce..aa5e294e7b7d 100644
--- a/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
+++ b/drivers/staging/media/atomisp/include/linux/atomisp_platform.h
@@ -104,6 +104,10 @@ enum atomisp_input_format {
 	ATOMISP_INPUT_FORMAT_USER_DEF8,  /* User defined 8-bit data type 8 */
 };
 
+#define N_ATOMISP_INPUT_FORMAT (ATOMISP_INPUT_FORMAT_USER_DEF8 + 1)
+
+
+
 enum intel_v4l2_subdev_type {
 	RAW_CAMERA = 1,
 	SOC_CAMERA = 2,
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/Makefile b/drivers/staging/media/atomisp/pci/atomisp2/Makefile
index 83f816faba1b..7fead5fc9a7d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/Makefile
+++ b/drivers/staging/media/atomisp/pci/atomisp2/Makefile
@@ -59,17 +59,14 @@ atomisp-objs += \
 	css2400/isp/kernels/bnr/bnr_1.0/ia_css_bnr.host.o \
 	css2400/isp/kernels/bnr/bnr2_2/ia_css_bnr2_2.host.o \
 	css2400/isp/kernels/dpc2/ia_css_dpc2.host.o \
-	css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.o \
 	css2400/isp/kernels/fc/fc_1.0/ia_css_formats.host.o \
 	css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc.host.o \
 	css2400/isp/kernels/ctc/ctc_1.0/ia_css_ctc_table.host.o \
 	css2400/isp/kernels/ctc/ctc2/ia_css_ctc2.host.o \
 	css2400/isp/kernels/ctc/ctc1_5/ia_css_ctc1_5.host.o \
 	css2400/isp/kernels/bh/bh_2/ia_css_bh.host.o \
-	css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.o \
 	css2400/isp/kernels/bnlm/ia_css_bnlm.host.o \
 	css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.o \
-	css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.o \
 	css2400/isp/kernels/dvs/dvs_1.0/ia_css_dvs.host.o \
 	css2400/isp/kernels/anr/anr_1.0/ia_css_anr.host.o \
 	css2400/isp/kernels/anr/anr_2/ia_css_anr2_table.host.o \
@@ -96,7 +93,6 @@ atomisp-objs += \
 	css2400/isp/kernels/ob/ob2/ia_css_ob2.host.o \
 	css2400/isp/kernels/iterator/iterator_1.0/ia_css_iterator.host.o \
 	css2400/isp/kernels/wb/wb_1.0/ia_css_wb.host.o \
-	css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.o \
 	css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.o \
 	css2400/isp/kernels/sc/sc_1.0/ia_css_sc.host.o \
 	css2400/isp/kernels/ipu2_io_ls/bayer_io_ls/ia_css_bayer_io.host.o \
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
index 22f2dbcecc15..fa6ea506f8b1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.c
@@ -437,7 +437,7 @@ static void atomisp_reset_event(struct atomisp_sub_device *asd)
 }
 
 
-static void print_csi_rx_errors(enum ia_css_csi2_port port,
+static void print_csi_rx_errors(enum mipi_port_id port,
 				struct atomisp_device *isp)
 {
 	u32 infos = 0;
@@ -481,7 +481,7 @@ static void clear_irq_reg(struct atomisp_device *isp)
 }
 
 static struct atomisp_sub_device *
-__get_asd_from_port(struct atomisp_device *isp, mipi_port_ID_t port)
+__get_asd_from_port(struct atomisp_device *isp, enum mipi_port_id port)
 {
 	int i;
 
@@ -515,7 +515,7 @@ irqreturn_t atomisp_isr(int irq, void *dev)
 
 	spin_lock_irqsave(&isp->lock, flags);
 	if (isp->sw_contex.power_state != ATOM_ISP_POWER_UP ||
-	    isp->css_initialized == false) {
+	    !isp->css_initialized) {
 		spin_unlock_irqrestore(&isp->lock, flags);
 		return IRQ_HANDLED;
 	}
@@ -570,9 +570,9 @@ irqreturn_t atomisp_isr(int irq, void *dev)
 	    (irq_infos & CSS_IRQ_INFO_IF_ERROR)) {
 		/* handle mipi receiver error */
 		u32 rx_infos;
-		enum ia_css_csi2_port port;
+		enum mipi_port_id port;
 
-		for (port = IA_CSS_CSI2_PORT0; port <= IA_CSS_CSI2_PORT2;
+		for (port = MIPI_PORT0_ID; port <= MIPI_PORT2_ID;
 		     port++) {
 			print_csi_rx_errors(port, isp);
 			atomisp_css_rx_get_irq_info(port, &rx_infos);
@@ -4603,7 +4603,7 @@ int atomisp_fixed_pattern(struct atomisp_sub_device *asd, int flag,
 	}
 
 	if (*value == 0) {
-		asd->params.fpn_en = 0;
+		asd->params.fpn_en = false;
 		return 0;
 	}
 
@@ -5028,7 +5028,7 @@ atomisp_try_fmt_file(struct atomisp_device *isp, struct v4l2_format *f)
 	return 0;
 }
 
-mipi_port_ID_t __get_mipi_port(struct atomisp_device *isp,
+enum mipi_port_id __get_mipi_port(struct atomisp_device *isp,
 				enum atomisp_camera_port port)
 {
 	switch (port) {
@@ -5162,22 +5162,22 @@ static int __enable_continuous_mode(struct atomisp_sub_device *asd,
 	return atomisp_update_run_mode(asd);
 }
 
-int configure_pp_input_nop(struct atomisp_sub_device *asd,
-			   unsigned int width, unsigned int height)
+static int configure_pp_input_nop(struct atomisp_sub_device *asd,
+				  unsigned int width, unsigned int height)
 {
 	return 0;
 }
 
-int configure_output_nop(struct atomisp_sub_device *asd,
-			 unsigned int width, unsigned int height,
-			 unsigned int min_width,
-			 enum atomisp_css_frame_format sh_fmt)
+static int configure_output_nop(struct atomisp_sub_device *asd,
+				unsigned int width, unsigned int height,
+				unsigned int min_width,
+				enum atomisp_css_frame_format sh_fmt)
 {
 	return 0;
 }
 
-int get_frame_info_nop(struct atomisp_sub_device *asd,
-		       struct atomisp_css_frame_info *finfo)
+static int get_frame_info_nop(struct atomisp_sub_device *asd,
+			      struct atomisp_css_frame_info *finfo)
 {
 	return 0;
 }
@@ -5524,7 +5524,7 @@ static void atomisp_get_dis_envelop(struct atomisp_sub_device *asd,
 
 	/* if subdev type is SOC camera,we do not need to set DVS */
 	if (isp->inputs[asd->input_curr].type == SOC_CAMERA)
-		asd->params.video_dis_en = 0;
+		asd->params.video_dis_en = false;
 
 	if (asd->params.video_dis_en &&
 	    asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) {
@@ -5624,7 +5624,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
 			ffmt = req_ffmt;
 			dev_warn(isp->dev,
 			  "can not enable video dis due to sensor limitation.");
-			asd->params.video_dis_en = 0;
+			asd->params.video_dis_en = false;
 		}
 	}
 	dev_dbg(isp->dev, "sensor width: %d, height: %d\n",
@@ -5649,7 +5649,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
 	    (ffmt->width < req_ffmt->width || ffmt->height < req_ffmt->height)) {
 		dev_warn(isp->dev,
 			 "can not enable video dis due to sensor limitation.");
-		asd->params.video_dis_en = 0;
+		asd->params.video_dis_en = false;
 	}
 
 	atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
@@ -6152,7 +6152,7 @@ int atomisp_set_shading_table(struct atomisp_sub_device *asd,
 
 	if (!user_shading_table->enable) {
 		atomisp_css_set_shading_table(asd, NULL);
-		asd->params.sc_en = 0;
+		asd->params.sc_en = false;
 		return 0;
 	}
 
@@ -6190,7 +6190,7 @@ int atomisp_set_shading_table(struct atomisp_sub_device *asd,
 	free_table = asd->params.css_param.shading_table;
 	asd->params.css_param.shading_table = shading_table;
 	atomisp_css_set_shading_table(asd, shading_table);
-	asd->params.sc_en = 1;
+	asd->params.sc_en = true;
 
 out:
 	if (free_table != NULL)
@@ -6627,7 +6627,7 @@ int atomisp_inject_a_fake_event(struct atomisp_sub_device *asd, int *event)
 	return 0;
 }
 
-int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
+static int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
 {
 	struct atomisp_sub_device *asd = pipe->asd;
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h
index bdc73862fb79..79d493dba403 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_cmd.h
@@ -389,7 +389,7 @@ int atomisp_source_pad_to_stream_id(struct atomisp_sub_device *asd,
  */
 void atomisp_eof_event(struct atomisp_sub_device *asd, uint8_t exp_id);
 
-mipi_port_ID_t __get_mipi_port(struct atomisp_device *isp,
+enum mipi_port_id __get_mipi_port(struct atomisp_device *isp,
 				enum atomisp_camera_port port);
 
 bool atomisp_is_vf_pipe(struct atomisp_video_pipe *pipe);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h
index 3ef850cd25bd..6c829d0a1e4c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat.h
@@ -148,10 +148,10 @@ void atomisp_css_init_struct(struct atomisp_sub_device *asd);
 int atomisp_css_irq_translate(struct atomisp_device *isp,
 			      unsigned int *infos);
 
-void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_get_irq_info(enum mipi_port_id port,
 					unsigned int *infos);
 
-void atomisp_css_rx_clear_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_clear_irq_info(enum mipi_port_id port,
 					unsigned int infos);
 
 int atomisp_css_irq_enable(struct atomisp_device *isp,
@@ -182,8 +182,6 @@ void atomisp_css_mmu_invalidate_cache(void);
 
 void atomisp_css_mmu_invalidate_tlb(void);
 
-void atomisp_css_mmu_set_page_table_base_index(unsigned long base_index);
-
 int atomisp_css_start(struct atomisp_sub_device *asd,
 		      enum atomisp_css_pipe_id pipe_id, bool in_reset);
 
@@ -255,7 +253,7 @@ void atomisp_css_isys_set_valid(struct atomisp_sub_device *asd,
 
 void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
 				 enum atomisp_input_stream_id stream_id,
-				 enum atomisp_css_stream_format format,
+				 enum atomisp_input_format format,
 				 int isys_stream);
 
 int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
@@ -264,18 +262,18 @@ int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
 
 int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format);
+				    enum atomisp_input_format input_format);
 
 void atomisp_css_isys_two_stream_cfg_update_stream1(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height);
 
 void atomisp_css_isys_two_stream_cfg_update_stream2(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height);
 
 int atomisp_css_input_set_resolution(struct atomisp_sub_device *asd,
@@ -292,7 +290,7 @@ void atomisp_css_input_set_bayer_order(struct atomisp_sub_device *asd,
 
 void atomisp_css_input_set_format(struct atomisp_sub_device *asd,
 				enum atomisp_input_stream_id stream_id,
-				enum atomisp_css_stream_format format);
+				enum atomisp_input_format format);
 
 int atomisp_css_input_set_effective_resolution(
 					struct atomisp_sub_device *asd,
@@ -334,11 +332,11 @@ void atomisp_css_enable_cvf(struct atomisp_sub_device *asd,
 							bool enable);
 
 int atomisp_css_input_configure_port(struct atomisp_sub_device *asd,
-				mipi_port_ID_t port,
+				enum mipi_port_id port,
 				unsigned int num_lanes,
 				unsigned int timeout,
 				unsigned int mipi_freq,
-				enum atomisp_css_stream_format metadata_format,
+				enum atomisp_input_format metadata_format,
 				unsigned int metadata_width,
 				unsigned int metadata_height);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
index 7621b4537147..f668c68dc33a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.c
@@ -88,7 +88,7 @@ unsigned int atomisp_css_debug_get_dtrace_level(void)
 	return ia_css_debug_trace_level;
 }
 
-void atomisp_css2_hw_store_8(hrt_address addr, uint8_t data)
+static void atomisp_css2_hw_store_8(hrt_address addr, uint8_t data)
 {
 	unsigned long flags;
 
@@ -126,7 +126,7 @@ static uint8_t atomisp_css2_hw_load_8(hrt_address addr)
 	return ret;
 }
 
-uint16_t atomisp_css2_hw_load_16(hrt_address addr)
+static uint16_t atomisp_css2_hw_load_16(hrt_address addr)
 {
 	unsigned long flags;
 	uint16_t ret;
@@ -136,7 +136,8 @@ uint16_t atomisp_css2_hw_load_16(hrt_address addr)
 	spin_unlock_irqrestore(&mmio_lock, flags);
 	return ret;
 }
-uint32_t atomisp_css2_hw_load_32(hrt_address addr)
+
+static uint32_t atomisp_css2_hw_load_32(hrt_address addr)
 {
 	unsigned long flags;
 	uint32_t ret;
@@ -1019,7 +1020,7 @@ int atomisp_css_irq_translate(struct atomisp_device *isp,
 	return 0;
 }
 
-void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_get_irq_info(enum mipi_port_id port,
 					unsigned int *infos)
 {
 #ifndef ISP2401_NEW_INPUT_SYSTEM
@@ -1029,7 +1030,7 @@ void atomisp_css_rx_get_irq_info(enum ia_css_csi2_port port,
 #endif
 }
 
-void atomisp_css_rx_clear_irq_info(enum ia_css_csi2_port port,
+void atomisp_css_rx_clear_irq_info(enum mipi_port_id port,
 					unsigned int infos)
 {
 #ifndef ISP2401_NEW_INPUT_SYSTEM
@@ -1159,31 +1160,6 @@ void atomisp_css_mmu_invalidate_tlb(void)
 	ia_css_mmu_invalidate_cache();
 }
 
-void atomisp_css_mmu_set_page_table_base_index(unsigned long base_index)
-{
-}
-
-/*
- * Check whether currently running MIPI buffer size fulfill
- * the requirement of the stream to be run
- */
-bool __need_realloc_mipi_buffer(struct atomisp_device *isp)
-{
-	unsigned int i;
-
-	for (i = 0; i < isp->num_of_streams; i++) {
-		struct atomisp_sub_device *asd = &isp->asd[i];
-
-		if (asd->streaming !=
-				ATOMISP_DEVICE_STREAMING_ENABLED)
-			continue;
-		if (asd->mipi_frame_size < isp->mipi_frame_size)
-			return true;
-	}
-
-	return false;
-}
-
 int atomisp_css_start(struct atomisp_sub_device *asd,
 			enum atomisp_css_pipe_id pipe_id, bool in_reset)
 {
@@ -1808,7 +1784,7 @@ void atomisp_css_isys_set_valid(struct atomisp_sub_device *asd,
 
 void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
 				 enum atomisp_input_stream_id stream_id,
-				 enum atomisp_css_stream_format format,
+				 enum atomisp_input_format format,
 				 int isys_stream)
 {
 
@@ -1820,7 +1796,7 @@ void atomisp_css_isys_set_format(struct atomisp_sub_device *asd,
 
 void atomisp_css_input_set_format(struct atomisp_sub_device *asd,
 					enum atomisp_input_stream_id stream_id,
-					enum atomisp_css_stream_format format)
+					enum atomisp_input_format format)
 {
 
 	struct ia_css_stream_config *s_config =
@@ -1859,7 +1835,7 @@ int atomisp_css_set_default_isys_config(struct atomisp_sub_device *asd,
 
 int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format)
+				    enum atomisp_input_format input_format)
 {
 	struct ia_css_stream_config *s_config =
 		&asd->stream_env[stream_id].stream_config;
@@ -1873,9 +1849,9 @@ int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].linked_isys_stream_id
 		= IA_CSS_STREAM_ISYS_STREAM_0;
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_0].format =
-		IA_CSS_STREAM_FORMAT_USER_DEF1;
+		ATOMISP_INPUT_FORMAT_USER_DEF1;
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].format =
-		IA_CSS_STREAM_FORMAT_USER_DEF2;
+		ATOMISP_INPUT_FORMAT_USER_DEF2;
 	s_config->isys_config[IA_CSS_STREAM_ISYS_STREAM_1].valid = true;
 	return 0;
 }
@@ -1883,7 +1859,7 @@ int atomisp_css_isys_two_stream_cfg(struct atomisp_sub_device *asd,
 void atomisp_css_isys_two_stream_cfg_update_stream1(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height)
 {
 	struct ia_css_stream_config *s_config =
@@ -1901,7 +1877,7 @@ void atomisp_css_isys_two_stream_cfg_update_stream1(
 void atomisp_css_isys_two_stream_cfg_update_stream2(
 				    struct atomisp_sub_device *asd,
 				    enum atomisp_input_stream_id stream_id,
-				    enum atomisp_css_stream_format input_format,
+				    enum atomisp_input_format input_format,
 				    unsigned int width, unsigned int height)
 {
 	struct ia_css_stream_config *s_config =
@@ -2142,11 +2118,11 @@ void atomisp_css_enable_cvf(struct atomisp_sub_device *asd,
 
 int atomisp_css_input_configure_port(
 		struct atomisp_sub_device *asd,
-		mipi_port_ID_t port,
+		enum mipi_port_id port,
 		unsigned int num_lanes,
 		unsigned int timeout,
 		unsigned int mipi_freq,
-		enum atomisp_css_stream_format metadata_format,
+		enum atomisp_input_format metadata_format,
 		unsigned int metadata_width,
 		unsigned int metadata_height)
 {
@@ -2890,8 +2866,8 @@ stream_err:
 	return -EINVAL;
 }
 
-unsigned int atomisp_get_pipe_index(struct atomisp_sub_device *asd,
-					uint16_t source_pad)
+static unsigned int atomisp_get_pipe_index(struct atomisp_sub_device *asd,
+					   uint16_t source_pad)
 {
 	struct atomisp_device *isp = asd->isp;
 	/*
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h
index b03711668eda..a06c5b6e8027 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_compat_css20.h
@@ -37,7 +37,6 @@
 #define atomisp_css_irq_info  ia_css_irq_info
 #define atomisp_css_isp_config ia_css_isp_config
 #define atomisp_css_bayer_order ia_css_bayer_order
-#define atomisp_css_stream_format ia_css_stream_format
 #define atomisp_css_capture_mode ia_css_capture_mode
 #define atomisp_css_input_mode ia_css_input_mode
 #define atomisp_css_frame ia_css_frame
@@ -117,7 +116,7 @@
  */
 #define CSS_ID(val)	(IA_ ## val)
 #define CSS_EVENT(val)	(IA_CSS_EVENT_TYPE_ ## val)
-#define CSS_FORMAT(val)	(IA_CSS_STREAM_FORMAT_ ## val)
+#define CSS_FORMAT(val)	(ATOMISP_INPUT_FORMAT_ ## val)
 
 #define CSS_EVENT_PORT_EOF	CSS_EVENT(PORT_EOF)
 #define CSS_EVENT_FRAME_TAGGED	CSS_EVENT(FRAME_TAGGED)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c
index ceedb82b6beb..a815c768bda9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_drvfs.c
@@ -22,6 +22,7 @@
 #include "atomisp_compat.h"
 #include "atomisp_internal.h"
 #include "atomisp_ioctl.h"
+#include "atomisp_drvfs.h"
 #include "hmm/hmm.h"
 
 /*
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
index 545ef024841d..709137f25700 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_fops.c
@@ -689,7 +689,7 @@ static void atomisp_dev_init_struct(struct atomisp_device *isp)
 {
 	unsigned int i;
 
-	isp->sw_contex.file_input = 0;
+	isp->sw_contex.file_input = false;
 	isp->need_gfx_throttle = true;
 	isp->isp_fatal_error = false;
 	isp->mipi_frame_size = 0;
@@ -708,12 +708,12 @@ static void atomisp_subdev_init_struct(struct atomisp_sub_device *asd)
 	v4l2_ctrl_s_ctrl(asd->run_mode, ATOMISP_RUN_MODE_STILL_CAPTURE);
 	memset(&asd->params.css_param, 0, sizeof(asd->params.css_param));
 	asd->params.color_effect = V4L2_COLORFX_NONE;
-	asd->params.bad_pixel_en = 1;
-	asd->params.gdc_cac_en = 0;
-	asd->params.video_dis_en = 0;
-	asd->params.sc_en = 0;
-	asd->params.fpn_en = 0;
-	asd->params.xnr_en = 0;
+	asd->params.bad_pixel_en = true;
+	asd->params.gdc_cac_en = false;
+	asd->params.video_dis_en = false;
+	asd->params.sc_en = false;
+	asd->params.fpn_en = false;
+	asd->params.xnr_en = false;
 	asd->params.false_color = 0;
 	asd->params.online_process = 1;
 	asd->params.yuv_ds_en = 0;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c
index 5c84dd63778e..61bd550dafb9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_ioctl.c
@@ -1607,10 +1607,12 @@ int atomisp_stream_on_master_slave_sensor(struct atomisp_device *isp,
 
 /* FIXME! */
 #ifndef ISP2401
-void __wdt_on_master_slave_sensor(struct atomisp_device *isp, unsigned int wdt_duration)
+static void __wdt_on_master_slave_sensor(struct atomisp_device *isp,
+					 unsigned int wdt_duration)
 #else
-void __wdt_on_master_slave_sensor(struct atomisp_video_pipe *pipe,
-				unsigned int wdt_duration, bool enable)
+static void __wdt_on_master_slave_sensor(struct atomisp_video_pipe *pipe,
+					 unsigned int wdt_duration,
+					 bool enable)
 #endif
 {
 #ifndef ISP2401
@@ -2731,7 +2733,7 @@ static int atomisp_s_parm_file(struct file *file, void *fh,
 	}
 
 	rt_mutex_lock(&isp->mutex);
-	isp->sw_contex.file_input = 1;
+	isp->sw_contex.file_input = true;
 	rt_mutex_unlock(&isp->mutex);
 
 	return 0;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c
index b78276ac22da..49a9973b4289 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.c
@@ -42,17 +42,17 @@ const struct atomisp_in_fmt_conv atomisp_in_fmt_conv[] = {
 	{ MEDIA_BUS_FMT_SGBRG12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_GBRG, CSS_FORMAT_RAW_12 },
 	{ MEDIA_BUS_FMT_SGRBG12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_GRBG, CSS_FORMAT_RAW_12 },
 	{ MEDIA_BUS_FMT_SRGGB12_1X12, 12, 12, ATOMISP_INPUT_FORMAT_RAW_12, CSS_BAYER_ORDER_RGGB, CSS_FORMAT_RAW_12 },
-	{ MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, IA_CSS_STREAM_FORMAT_YUV422_8 },
-	{ MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, IA_CSS_STREAM_FORMAT_YUV422_8 },
-	{ MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, IA_CSS_STREAM_FORMAT_BINARY_8 },
+	{ MEDIA_BUS_FMT_UYVY8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, ATOMISP_INPUT_FORMAT_YUV422_8 },
+	{ MEDIA_BUS_FMT_YUYV8_1X16, 8, 8, ATOMISP_INPUT_FORMAT_YUV422_8, 0, ATOMISP_INPUT_FORMAT_YUV422_8 },
+	{ MEDIA_BUS_FMT_JPEG_1X8, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, ATOMISP_INPUT_FORMAT_BINARY_8 },
 	{ V4L2_MBUS_FMT_CUSTOM_NV12, 12, 12, CSS_FRAME_FORMAT_NV12, 0, CSS_FRAME_FORMAT_NV12 },
 	{ V4L2_MBUS_FMT_CUSTOM_NV21, 12, 12, CSS_FRAME_FORMAT_NV21, 0, CSS_FRAME_FORMAT_NV21 },
-	{ V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY },
+	{ V4L2_MBUS_FMT_CUSTOM_YUV420, 12, 12, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, 0, ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY },
 #if 0
-	{ V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, IA_CSS_STREAM_FORMAT_BINARY_8 },
+	{ V4L2_MBUS_FMT_CUSTOM_M10MO_RAW, 8, 8, CSS_FRAME_FORMAT_BINARY_8, 0, ATOMISP_INPUT_FORMAT_BINARY_8 },
 #endif
 	/* no valid V4L2 MBUS code for metadata format, so leave it 0. */
-	{ 0, 0, 0, ATOMISP_INPUT_FORMAT_EMBEDDED, 0, IA_CSS_STREAM_FORMAT_EMBEDDED },
+	{ 0, 0, 0, ATOMISP_INPUT_FORMAT_EMBEDDED, 0, ATOMISP_INPUT_FORMAT_EMBEDDED },
 	{}
 };
 
@@ -101,7 +101,7 @@ const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv(u32 code)
 }
 
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(
-	enum atomisp_css_stream_format atomisp_in_fmt)
+	enum atomisp_input_format atomisp_in_fmt)
 {
 	int i;
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
index c3eba675da06..59ff8723c182 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/atomisp_subdev.h
@@ -58,9 +58,9 @@ struct atomisp_in_fmt_conv {
 	u32     code;
 	uint8_t bpp; /* bits per pixel */
 	uint8_t depth; /* uncompressed */
-	enum atomisp_css_stream_format atomisp_in_fmt;
+	enum atomisp_input_format atomisp_in_fmt;
 	enum atomisp_css_bayer_order bayer_order;
-	enum ia_css_stream_format css_stream_fmt;
+	enum atomisp_input_format css_stream_fmt;
 };
 
 struct atomisp_sub_device;
@@ -424,10 +424,10 @@ bool atomisp_subdev_is_compressed(u32 code);
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv(u32 code);
 #ifndef ISP2401
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(
-	enum atomisp_css_stream_format atomisp_in_fmt);
+	enum atomisp_input_format atomisp_in_fmt);
 #else
 const struct atomisp_in_fmt_conv
-    *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(enum atomisp_css_stream_format
+    *atomisp_find_in_fmt_conv_by_atomisp_in_fmt(enum atomisp_input_format
 						atomisp_in_fmt);
 #endif
 const struct atomisp_in_fmt_conv *atomisp_find_in_fmt_conv_compressed(u32 code);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
index a8c27676a38b..5ab48f346790 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/interface/ia_css_util.h
@@ -116,7 +116,7 @@ extern bool ia_css_util_resolution_is_even(
  *
  */
 extern unsigned int ia_css_util_input_format_bpp(
-	enum ia_css_stream_format stream_format,
+	enum atomisp_input_format stream_format,
 	bool two_ppc);
 
 /* @brief check if input format it raw
@@ -126,7 +126,7 @@ extern unsigned int ia_css_util_input_format_bpp(
  *
  */
 extern bool ia_css_util_is_input_format_raw(
-	enum ia_css_stream_format stream_format);
+	enum atomisp_input_format stream_format);
 
 /* @brief check if input format it yuv
  *
@@ -135,7 +135,7 @@ extern bool ia_css_util_is_input_format_raw(
  *
  */
 extern bool ia_css_util_is_input_format_yuv(
-	enum ia_css_stream_format stream_format);
+	enum atomisp_input_format stream_format);
 
 #endif /* __IA_CSS_UTIL_H__ */
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c
index 54193789a809..91e586112332 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/camera/util/src/util.c
@@ -52,55 +52,55 @@ enum ia_css_err ia_css_convert_errno(
 
 /* MW: Table look-up ??? */
 unsigned int ia_css_util_input_format_bpp(
-	enum ia_css_stream_format format,
+	enum atomisp_input_format format,
 	bool two_ppc)
 {
 	unsigned int rval = 0;
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 		rval = 8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		rval = 10;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		rval = 16;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		rval = 4;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		rval = 5;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		rval = 65;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		rval = 6;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		rval = 7;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		rval = 12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		if (two_ppc)
 			rval = 14;
 		else
 			rval = 12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		if (two_ppc)
 			rval = 16;
 		else
@@ -175,28 +175,28 @@ bool ia_css_util_resolution_is_even(const struct ia_css_resolution resolution)
 }
 
 #endif
-bool ia_css_util_is_input_format_raw(enum ia_css_stream_format format)
+bool ia_css_util_is_input_format_raw(enum atomisp_input_format format)
 {
-	return ((format == IA_CSS_STREAM_FORMAT_RAW_6) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_7) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_8) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_10) ||
-		(format == IA_CSS_STREAM_FORMAT_RAW_12));
+	return ((format == ATOMISP_INPUT_FORMAT_RAW_6) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_7) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_8) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_10) ||
+		(format == ATOMISP_INPUT_FORMAT_RAW_12));
 	/* raw_14 and raw_16 are not supported as input formats to the ISP.
 	 * They can only be copied to a frame in memory using the
 	 * copy binary.
 	 */
 }
 
-bool ia_css_util_is_input_format_yuv(enum ia_css_stream_format format)
+bool ia_css_util_is_input_format_yuv(enum atomisp_input_format format)
 {
-	return format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY ||
-	    format == IA_CSS_STREAM_FORMAT_YUV420_8  ||
-	    format == IA_CSS_STREAM_FORMAT_YUV420_10 ||
-	    format == IA_CSS_STREAM_FORMAT_YUV420_16 ||
-	    format == IA_CSS_STREAM_FORMAT_YUV422_8  ||
-	    format == IA_CSS_STREAM_FORMAT_YUV422_10 ||
-	    format == IA_CSS_STREAM_FORMAT_YUV422_16;
+	return format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY ||
+	    format == ATOMISP_INPUT_FORMAT_YUV420_8  ||
+	    format == ATOMISP_INPUT_FORMAT_YUV420_10 ||
+	    format == ATOMISP_INPUT_FORMAT_YUV420_16 ||
+	    format == ATOMISP_INPUT_FORMAT_YUV422_8  ||
+	    format == ATOMISP_INPUT_FORMAT_YUV422_10 ||
+	    format == ATOMISP_INPUT_FORMAT_YUV422_16;
 }
 
 enum ia_css_err ia_css_util_check_input(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
index d2e3a2deea2e..7907f0ff6d6c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/css_2401_csi2p_system/system_global.h
@@ -284,12 +284,12 @@ typedef enum {
 	N_RX_ID
 } rx_ID_t;
 
-typedef enum {
+enum mipi_port_id {
 	MIPI_PORT0_ID = 0,
 	MIPI_PORT1_ID,
 	MIPI_PORT2_ID,
 	N_MIPI_PORT_ID
-} mipi_port_ID_t;
+};
 
 #define	N_RX_CHANNEL_ID		4
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c
index c412810887b3..dcb9a3127cfe 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/debug.c
@@ -29,7 +29,7 @@
 hrt_address	debug_buffer_address = (hrt_address)-1;
 hrt_vaddress	debug_buffer_ddr_address = (hrt_vaddress)-1;
 /* The local copy */
-debug_data_t		debug_data;
+static debug_data_t		debug_data;
 debug_data_t		*debug_data_ptr = &debug_data;
 
 void debug_buffer_init(const hrt_address addr)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
index bcfd443f5202..b6b1344786b1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/gp_timer.c
@@ -29,7 +29,7 @@ gp_timer_reg_load(uint32_t reg);
 static void
 gp_timer_reg_store(uint32_t reg, uint32_t value);
 
-uint32_t
+static uint32_t
 gp_timer_reg_load(uint32_t reg)
 {
 	return ia_css_device_load_uint32(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c
index a8997e45738e..0e1ca995fb06 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_formatter.c
@@ -45,8 +45,9 @@ const uint8_t HIVE_IF_SWITCH_CODE[N_INPUT_FORMATTER_ID] = {
 	HIVE_INPUT_SWITCH_SELECT_STR_TO_MEM};
 
 /* MW Should be part of system_global.h, where we have the main enumeration */
-const bool HIVE_IF_BIN_COPY[N_INPUT_FORMATTER_ID] = {
-	false, false, false, true};
+static const bool HIVE_IF_BIN_COPY[N_INPUT_FORMATTER_ID] = {
+	false, false, false, true
+};
 
 void input_formatter_rst(
 	const input_formatter_ID_t		ID)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c
index bd6821e436b2..2515e162828f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system.c
@@ -29,7 +29,7 @@
 #define ZERO (0x0)
 #define ONE  (1U)
 
-const ib_buffer_t   IB_BUFFER_NULL = {0 ,0, 0 };
+static const ib_buffer_t   IB_BUFFER_NULL = {0 ,0, 0 };
 
 static input_system_error_t input_system_configure_channel(
 	const channel_cfg_t		channel);
@@ -98,7 +98,7 @@ static inline void ctrl_unit_get_state(
 
 static inline void mipi_port_get_state(
 	const rx_ID_t					ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	mipi_port_state_t				*state);
 
 static inline void rx_channel_get_state(
@@ -180,7 +180,7 @@ void receiver_get_state(
 	const rx_ID_t				ID,
 	receiver_state_t			*state)
 {
-	mipi_port_ID_t	port_id;
+	enum mipi_port_id	port_id;
 	unsigned int	ch_id;
 
 	assert(ID < N_RX_ID);
@@ -209,7 +209,7 @@ void receiver_get_state(
 	state->raw16 = (uint16_t)receiver_reg_load(ID,
 		_HRT_CSS_RECEIVER_RAW16_REG_IDX);
 
-	for (port_id = (mipi_port_ID_t)0; port_id < N_MIPI_PORT_ID; port_id++) {
+	for (port_id = (enum mipi_port_id)0; port_id < N_MIPI_PORT_ID; port_id++) {
 		mipi_port_get_state(ID, port_id,
 			&(state->mipi_port_state[port_id]));
 	}
@@ -305,7 +305,7 @@ void receiver_set_compression(
 
 void receiver_port_enable(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const bool			cnd)
 {
 	hrt_data	reg = receiver_port_reg_load(ID, port_ID,
@@ -324,7 +324,7 @@ void receiver_port_enable(
 
 bool is_receiver_port_enabled(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID)
+	const enum mipi_port_id		port_ID)
 {
 	hrt_data	reg = receiver_port_reg_load(ID, port_ID,
 		_HRT_CSS_RECEIVER_DEVICE_READY_REG_IDX);
@@ -333,7 +333,7 @@ bool is_receiver_port_enabled(
 
 void receiver_irq_enable(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const rx_irq_info_t		irq_info)
 {
 	receiver_port_reg_store(ID,
@@ -343,7 +343,7 @@ void receiver_irq_enable(
 
 rx_irq_info_t receiver_get_irq_info(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID)
+	const enum mipi_port_id		port_ID)
 {
 	return receiver_port_reg_load(ID,
 	port_ID, _HRT_CSS_RECEIVER_IRQ_STATUS_REG_IDX);
@@ -351,7 +351,7 @@ rx_irq_info_t receiver_get_irq_info(
 
 void receiver_irq_clear(
 	const rx_ID_t			ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const rx_irq_info_t		irq_info)
 {
 	receiver_port_reg_store(ID,
@@ -556,7 +556,7 @@ static inline void ctrl_unit_get_state(
 
 static inline void mipi_port_get_state(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	mipi_port_state_t			*state)
 {
 	int	i;
@@ -644,12 +644,12 @@ static inline void rx_channel_get_state(
 }
 
 // MW: "2400" in the name is not good, but this is to avoid a naming conflict
-input_system_cfg2400_t config;
+static input_system_cfg2400_t config;
 
 static void receiver_rst(
 	const rx_ID_t				ID)
 {
-	mipi_port_ID_t		port_id;
+	enum mipi_port_id		port_id;
 
 	assert(ID < N_RX_ID);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h
index 3e8bd00082dc..bf9230fd08f2 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_local.h
@@ -353,7 +353,7 @@ typedef struct rx_cfg_s		rx_cfg_t;
  */
 struct rx_cfg_s {
 	rx_mode_t			mode;	/* The HW config */
-	mipi_port_ID_t		port;	/* The port ID to apply the control on */
+	enum mipi_port_id		port;	/* The port ID to apply the control on */
 	unsigned int		timeout;
 	unsigned int		initcount;
 	unsigned int		synccount;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h
index 118185eb86e9..48876bb08b70 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/host/input_system_private.h
@@ -63,7 +63,7 @@ STORAGE_CLASS_INPUT_SYSTEM_C hrt_data receiver_reg_load(
 
 STORAGE_CLASS_INPUT_SYSTEM_C void receiver_port_reg_store(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const hrt_address			reg,
 	const hrt_data				value)
 {
@@ -77,7 +77,7 @@ STORAGE_CLASS_INPUT_SYSTEM_C void receiver_port_reg_store(
 
 STORAGE_CLASS_INPUT_SYSTEM_C hrt_data receiver_port_reg_load(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const hrt_address			reg)
 {
 	assert(ID < N_RX_ID);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h
index d803efd7400a..6f63962a54e8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_common/system_global.h
@@ -266,12 +266,12 @@ typedef enum {
 	N_RX_ID
 } rx_ID_t;
 
-typedef enum {
+enum mipi_port_id {
 	MIPI_PORT0_ID = 0,
 	MIPI_PORT1_ID,
 	MIPI_PORT2_ID,
 	N_MIPI_PORT_ID
-} mipi_port_ID_t;
+};
 
 #define	N_RX_CHANNEL_ID		4
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h
index 1596757fe9ef..6e37ff0fe0f9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/hive_isp_css_include/host/input_system_public.h
@@ -83,7 +83,7 @@ extern void receiver_set_compression(
  */
 extern void receiver_port_enable(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const bool					cnd);
 
 /*! Flag if PORT[port_ID] of RECEIVER[ID] is enabled
@@ -95,7 +95,7 @@ extern void receiver_port_enable(
  */
 extern bool is_receiver_port_enabled(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID);
+	const enum mipi_port_id		port_ID);
 
 /*! Enable the IRQ channels of PORT[port_ID] of RECEIVER[ID]
 
@@ -107,7 +107,7 @@ extern bool is_receiver_port_enabled(
  */
 extern void receiver_irq_enable(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const rx_irq_info_t			irq_info);
 
 /*! Return the IRQ status of PORT[port_ID] of RECEIVER[ID]
@@ -119,7 +119,7 @@ extern void receiver_irq_enable(
  */
 extern rx_irq_info_t receiver_get_irq_info(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID);
+	const enum mipi_port_id		port_ID);
 
 /*! Clear the IRQ status of PORT[port_ID] of RECEIVER[ID]
 
@@ -131,7 +131,7 @@ extern rx_irq_info_t receiver_get_irq_info(
  */
 extern void receiver_irq_clear(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const rx_irq_info_t			irq_info);
 
 /*! Write to a control register of INPUT_SYSTEM[ID]
@@ -195,7 +195,7 @@ STORAGE_CLASS_INPUT_SYSTEM_H hrt_data receiver_reg_load(
  */
 STORAGE_CLASS_INPUT_SYSTEM_H void receiver_port_reg_store(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t			port_ID,
+	const enum mipi_port_id			port_ID,
 	const hrt_address			reg,
 	const hrt_data				value);
 
@@ -210,7 +210,7 @@ STORAGE_CLASS_INPUT_SYSTEM_H void receiver_port_reg_store(
  */
 STORAGE_CLASS_INPUT_SYSTEM_H hrt_data receiver_port_reg_load(
 	const rx_ID_t				ID,
-	const mipi_port_ID_t		port_ID,
+	const enum mipi_port_id		port_ID,
 	const hrt_address			reg);
 
 /*! Write to a control register of SUB_SYSTEM[sub_ID] of INPUT_SYSTEM[ID]
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
index f415570a3da9..ad9ca5449369 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_input_port.h
@@ -12,6 +12,9 @@
  * more details.
  */
 
+/* For MIPI_PORT0_ID to MIPI_PORT2_ID */
+#include "system_global.h"
+
 #ifndef __IA_CSS_INPUT_PORT_H
 #define __IA_CSS_INPUT_PORT_H
 
@@ -19,21 +22,12 @@
  * This file contains information about the possible input ports for CSS
  */
 
-/* Enumeration of the physical input ports on the CSS hardware.
- *  There are 3 MIPI CSI-2 ports.
- */
-enum ia_css_csi2_port {
-	IA_CSS_CSI2_PORT0, /* Implicitly map to MIPI_PORT0_ID */
-	IA_CSS_CSI2_PORT1, /* Implicitly map to MIPI_PORT1_ID */
-	IA_CSS_CSI2_PORT2  /* Implicitly map to MIPI_PORT2_ID */
-};
-
 /* Backward compatible for CSS API 2.0 only
  *  TO BE REMOVED when all drivers move to CSS	API 2.1
  */
-#define	IA_CSS_CSI2_PORT_4LANE IA_CSS_CSI2_PORT0
-#define	IA_CSS_CSI2_PORT_1LANE IA_CSS_CSI2_PORT1
-#define	IA_CSS_CSI2_PORT_2LANE IA_CSS_CSI2_PORT2
+#define	IA_CSS_CSI2_PORT_4LANE MIPI_PORT0_ID
+#define	IA_CSS_CSI2_PORT_1LANE MIPI_PORT1_ID
+#define	IA_CSS_CSI2_PORT_2LANE MIPI_PORT2_ID
 
 /* The CSI2 interface supports 2 types of compression or can
  *  be run without compression.
@@ -56,7 +50,7 @@ struct ia_css_csi2_compression {
 /* Input port structure.
  */
 struct ia_css_input_port {
-	enum ia_css_csi2_port port; /** Physical CSI-2 port */
+	enum mipi_port_id port; /** Physical CSI-2 port */
 	unsigned int num_lanes; /** Number of lanes used (4-lane port only) */
 	unsigned int timeout;   /** Timeout value */
 	unsigned int rxcount;   /** Register value, should include all lanes */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
index 10ef61178bb2..c8840138899a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_irq.h
@@ -186,7 +186,7 @@ ia_css_rx_get_irq_info(unsigned int *irq_bits);
  * that occurred.
  */
 void
-ia_css_rx_port_get_irq_info(enum ia_css_csi2_port port, unsigned int *irq_bits);
+ia_css_rx_port_get_irq_info(enum mipi_port_id port, unsigned int *irq_bits);
 
 /* @brief Clear CSI receiver error info.
  *
@@ -218,7 +218,7 @@ ia_css_rx_clear_irq_info(unsigned int irq_bits);
  * error bits get overwritten.
  */
 void
-ia_css_rx_port_clear_irq_info(enum ia_css_csi2_port port, unsigned int irq_bits);
+ia_css_rx_port_clear_irq_info(enum mipi_port_id port, unsigned int irq_bits);
 
 /* @brief Enable or disable specific interrupts.
  *
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
index 8b674c98224c..ed0b6ab371da 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_metadata.h
@@ -27,8 +27,8 @@
  *  to process sensor metadata.
  */
 struct ia_css_metadata_config {
-	enum ia_css_stream_format data_type; /** Data type of CSI-2 embedded
-			data. The default value is IA_CSS_STREAM_FORMAT_EMBEDDED. For
+	enum atomisp_input_format data_type; /** Data type of CSI-2 embedded
+			data. The default value is ATOMISP_INPUT_FORMAT_EMBEDDED. For
 			certain sensors, user can choose non-default data type for embedded
 			data. */
 	struct ia_css_resolution  resolution; /** Resolution */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
index f9c9cd76be97..367b2aafa5e8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_mipi.h
@@ -55,7 +55,7 @@ ia_css_mipi_frame_specify(const unsigned int	size_mem_words,
  *
  */
 enum ia_css_err
-ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
+ia_css_mipi_frame_enable_check_on_size(const enum mipi_port_id port,
 				const unsigned int	size_mem_words);
 #endif
 
@@ -74,7 +74,7 @@ ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
 enum ia_css_err
 ia_css_mipi_frame_calculate_size(const unsigned int width,
 				const unsigned int height,
-				const enum ia_css_stream_format format,
+				const enum atomisp_input_format format,
 				const bool hasSOLandEOL,
 				const unsigned int embedded_data_size_words,
 				unsigned int *size_mem_words);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
index f7e9020a86e1..f97b9eb2b19c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_format.h
@@ -20,75 +20,10 @@
  */
 
 #include <type_support.h> /* bool */
-
-/* The ISP streaming input interface supports the following formats.
- *  These match the corresponding MIPI formats.
- */
-enum ia_css_stream_format {
-	IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY,    /** 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_8,  /** 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_10, /** 10 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV420_16, /** 16 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_8,  /** UYVY..UYVY, 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_10, /** UYVY..UYVY, 10 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_YUV422_16, /** UYVY..UYVY, 16 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_444,  /** BGR..BGR, 4 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_555,  /** BGR..BGR, 5 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_565,  /** BGR..BGR, 5 bits B and R, 6 bits G */
-	IA_CSS_STREAM_FORMAT_RGB_666,  /** BGR..BGR, 6 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RGB_888,  /** BGR..BGR, 8 bits per subpixel */
-	IA_CSS_STREAM_FORMAT_RAW_6,    /** RAW data, 6 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_7,    /** RAW data, 7 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_8,    /** RAW data, 8 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_10,   /** RAW data, 10 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_12,   /** RAW data, 12 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_14,   /** RAW data, 14 bits per pixel */
-	IA_CSS_STREAM_FORMAT_RAW_16,   /** RAW data, 16 bits per pixel, which is
-					    not specified in CSI-MIPI standard*/
-	IA_CSS_STREAM_FORMAT_BINARY_8, /** Binary byte stream, which is target at
-					    JPEG. */
-
-	/* CSI2-MIPI specific format: Generic short packet data. It is used to
-	 *  keep the timing information for the opening/closing of shutters,
-	 *  triggering of flashes and etc.
-	 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT1,  /** Generic Short Packet Code 1 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT2,  /** Generic Short Packet Code 2 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT3,  /** Generic Short Packet Code 3 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT4,  /** Generic Short Packet Code 4 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT5,  /** Generic Short Packet Code 5 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT6,  /** Generic Short Packet Code 6 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT7,  /** Generic Short Packet Code 7 */
-	IA_CSS_STREAM_FORMAT_GENERIC_SHORT8,  /** Generic Short Packet Code 8 */
-
-	/* CSI2-MIPI specific format: YUV data.
-	 */
-	IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT,  /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
-	IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT, /** YUV420 8-bit (Chroma Shifted Pixel Sampling) */
-
-	/* CSI2-MIPI specific format: Generic long packet data
-	 */
-	IA_CSS_STREAM_FORMAT_EMBEDDED, /** Embedded 8-bit non Image Data */
-
-	/* CSI2-MIPI specific format: User defined byte-based data. For example,
-	 *  the data transmitter (e.g. the SoC sensor) can keep the JPEG data as
-	 *  the User Defined Data Type 4 and the MPEG data as the
-	 *  User Defined Data Type 7.
-	 */
-	IA_CSS_STREAM_FORMAT_USER_DEF1,  /** User defined 8-bit data type 1 */
-	IA_CSS_STREAM_FORMAT_USER_DEF2,  /** User defined 8-bit data type 2 */
-	IA_CSS_STREAM_FORMAT_USER_DEF3,  /** User defined 8-bit data type 3 */
-	IA_CSS_STREAM_FORMAT_USER_DEF4,  /** User defined 8-bit data type 4 */
-	IA_CSS_STREAM_FORMAT_USER_DEF5,  /** User defined 8-bit data type 5 */
-	IA_CSS_STREAM_FORMAT_USER_DEF6,  /** User defined 8-bit data type 6 */
-	IA_CSS_STREAM_FORMAT_USER_DEF7,  /** User defined 8-bit data type 7 */
-	IA_CSS_STREAM_FORMAT_USER_DEF8,  /** User defined 8-bit data type 8 */
-};
-
-#define	IA_CSS_STREAM_FORMAT_NUM	IA_CSS_STREAM_FORMAT_USER_DEF8
+#include "../../../include/linux/atomisp_platform.h"
 
 unsigned int ia_css_util_input_format_bpp(
-	enum ia_css_stream_format format,
+	enum atomisp_input_format format,
 	bool two_ppc);
 
-#endif /* __IA_CSS_STREAM_FORMAT_H */
+#endif /* __ATOMISP_INPUT_FORMAT_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
index ca3203357ff5..ddefad330db7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/ia_css_stream_public.h
@@ -62,7 +62,7 @@ enum {
  */
 struct ia_css_stream_isys_stream_config {
 	struct ia_css_resolution  input_res; /** Resolution of input data */
-	enum ia_css_stream_format format; /** Format of input stream. This data
+	enum atomisp_input_format format; /** Format of input stream. This data
 					       format will be mapped to MIPI data
 					       type internally. */
 	int linked_isys_stream_id; /** default value is -1, other value means
@@ -77,7 +77,7 @@ struct ia_css_stream_input_config {
 							Used for CSS 2400/1 System and deprecated for other
 							systems (replaced by input_effective_res in
 							ia_css_pipe_config) */
-	enum ia_css_stream_format format; /** Format of input stream. This data
+	enum atomisp_input_format format; /** Format of input stream. This data
 					       format will be mapped to MIPI data
 					       type internally. */
 	enum ia_css_bayer_order bayer_order; /** Bayer order for RAW streams */
@@ -257,7 +257,7 @@ ia_css_stream_unload(struct ia_css_stream *stream);
  *
  * This function will return the stream format.
  */
-enum ia_css_stream_format
+enum atomisp_input_format
 ia_css_stream_get_format(const struct ia_css_stream *stream);
 
 /* @brief Check if the stream is configured for 2 pixels per clock
@@ -453,7 +453,7 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
  */
 void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
-			      enum ia_css_stream_format format,
+			      enum atomisp_input_format format,
 			      const unsigned short *data,
 			      unsigned int width);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h
index b99c0644ab38..675f6e539b3f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_bnlm_types.h"
 #include "ia_css_bnlm_param.h"
-#include "ia_css_bnlm_default.host.h"
 
 void
 ia_css_bnlm_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c
deleted file mode 100644
index e2eb88c0f123..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_bnlm_types.h"
-
-const struct ia_css_bnlm_config default_bnlm_config = {
-
-	.rad_enable = true,
-	.rad_x_origin = 0,
-	.rad_y_origin = 0,
-	.avg_min_th = 127,
-	.max_min_th = 2047,
-
-	.exp_coeff_a = 6048,
-	.exp_coeff_b = 7828,
-	.exp_coeff_c = 0,
-	.exp_exponent = 3,
-
-	.nl_th = {2252, 2251, 2250},
-	.match_quality_max_idx = {2, 3, 3, 1},
-
-	.mu_root_lut_thr = {
-		26, 56, 128, 216, 462, 626, 932, 1108, 1480, 1564, 1824, 1896, 2368, 3428, 4560},
-	.mu_root_lut_val = {
-		384, 320, 320, 264, 248, 240, 224, 192, 192, 160, 160, 160, 136, 130, 96, 80},
-	.sad_norm_lut_thr = {
-		236, 328, 470, 774, 964, 1486, 2294, 3244, 4844, 6524, 6524, 6524, 6524, 6524, 6524},
-	.sad_norm_lut_val = {
-		8064, 7680, 7168, 6144, 5120, 3840, 2560, 2304, 1984, 1792, 1792, 1792, 1792, 1792, 1792, 1792},
-	.sig_detail_lut_thr = {
-		2936, 3354, 3943, 4896, 5230, 5682, 5996, 7299, 7299, 7299, 7299, 7299, 7299, 7299, 7299},
-	.sig_detail_lut_val = {
-		8191, 7680, 7168, 6144, 5120, 4608, 4224, 4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032, 4032},
-	.sig_rad_lut_thr = {
-		18, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20},
-	.sig_rad_lut_val = {
-		2560, 7168, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188, 8188},
-	.rad_pow_lut_thr = {
-		0, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013, 7013},
-	.rad_pow_lut_val = {
-		8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-	.nl_0_lut_thr = {
-		1072, 7000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000, 8000},
-	.nl_0_lut_val = {
-		2560, 3072, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120, 5120},
-	.nl_1_lut_thr = {
-		624, 3224, 3392, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424, 7424},
-	.nl_1_lut_val = {
-		3584, 4608, 5120, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144, 6144},
-	.nl_2_lut_thr = {
-		745, 2896, 3720, 6535, 7696, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040, 8040},
-	.nl_2_lut_val = {
-		3584, 4608, 6144, 7168, 7936, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-	.nl_3_lut_thr = {
-		4848, 4984, 5872, 6000, 6517, 6960, 7944, 8088, 8161, 8161, 8161, 8161, 8161, 8161, 8161},
-	.nl_3_lut_val = {
-		3072, 4104, 4608, 5120, 6144, 7168, 7680, 8128, 8191, 8191, 8191, 8191, 8191, 8191, 8191, 8191},
-
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h
deleted file mode 100644
index f18c8070abba..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/bnlm/ia_css_bnlm_default.host.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_BNLM_DEFAULT_HOST_H
-#define __IA_CSS_BNLM_DEFAULT_HOST_H
-
-#include "ia_css_bnlm_types.h"
-extern const struct ia_css_bnlm_config default_bnlm_config;
-
-#endif /* __IA_CSS_BNLM_DEFAULT_HOST_H */
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h
index 641564b4af8e..38d10a5237c6 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_dpc2_types.h"
 #include "ia_css_dpc2_param.h"
-#include "ia_css_dpc2_default.host.h"
 
 void
 ia_css_dpc2_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c
deleted file mode 100644
index c102601cc635..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_dpc2_types.h"
-
-const struct ia_css_dpc2_config default_dpc2_config = {
-	.metric1 = 1638,
-	.metric2 =  128,
-	.metric3 = 1638,
-	.wb_gain_gr = 512,
-	.wb_gain_r  = 512,
-	.wb_gain_b  = 512,
-	.wb_gain_gb = 512
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h
deleted file mode 100644
index a1527ce3eddc..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/dpc2/ia_css_dpc2_default.host.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_DPC2_DEFAULT_HOST_H
-#define __IA_CSS_DPC2_DEFAULT_HOST_H
-
-#include "ia_css_dpc2_types.h"
-
-extern const struct ia_css_dpc2_config default_dpc2_config;
-
-#endif /* __IA_CSS_DPC2_DEFAULT_HOST_H */
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h
index 355ff13273b0..fff932c1364e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_eed1_8_types.h"
 #include "ia_css_eed1_8_param.h"
-#include "ia_css_eed1_8_default.host.h"
 
 void
 ia_css_eed1_8_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c
deleted file mode 100644
index 3622719dafa5..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_eed1_8_types.h"
-
-/* The default values for the kernel parameters are based on
- * ISP261 CSS API public parameter list_all.xlsx from 12-09-2014
- * The parameter list is available on the ISP261 sharepoint
- */
-
-/* Default kernel parameters. */
-const struct ia_css_eed1_8_config default_eed1_8_config = {
-	.rbzp_strength = 5489,
-	.fcstrength = 6554,
-	.fcthres_0 = 0,
-	.fcthres_1 = 0,
-	.fc_sat_coef = 8191,
-	.fc_coring_prm = 128,
-	.aerel_thres0 = 0,
-	.aerel_gain0 = 8191,
-	.aerel_thres1 = 16,
-	.aerel_gain1 = 20,
-	.derel_thres0 = 1229,
-	.derel_gain0 = 1,
-	.derel_thres1 = 819,
-	.derel_gain1 = 1,
-	.coring_pos0 = 0,
-	.coring_pos1 = 0,
-	.coring_neg0 = 0,
-	.coring_neg1 = 0,
-	.gain_exp = 2,
-	.gain_pos0 = 6144,
-	.gain_pos1 = 2048,
-	.gain_neg0 = 2048,
-	.gain_neg1 = 6144,
-	.pos_margin0 = 1475,
-	.pos_margin1 = 1475,
-	.neg_margin0 = 1475,
-	.neg_margin1 = 1475,
-	.dew_enhance_seg_x = {
-		0,
-		64,
-		272,
-		688,
-		1376,
-		2400,
-		3840,
-		5744,
-		8191
-		},
-	.dew_enhance_seg_y = {
-		0,
-		144,
-		480,
-		1040,
-		1852,
-		2945,
-		4357,
-		6094,
-		8191
-		},
-	.dew_enhance_seg_slope = {
-		4608,
-		3308,
-		2757,
-		2417,
-		2186,
-		8033,
-		7473,
-		7020
-		},
-	.dew_enhance_seg_exp = {
-		2,
-		2,
-		2,
-		2,
-		2,
-		0,
-		0,
-		0
-		},
-	.dedgew_max = 6144
-};
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h
deleted file mode 100644
index 782f739ca8b5..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/eed1_8/ia_css_eed1_8_default.host.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_EED1_8_DEFAULT_HOST_H
-#define __IA_CSS_EED1_8_DEFAULT_HOST_H
-
-#include "ia_css_eed1_8_types.h"
-
-extern const struct ia_css_eed1_8_config default_eed1_8_config;
-
-#endif /* __IA_CSS_EED1_8_DEFAULT_HOST_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c
index 8fdf47c9310c..9efe5e5e4e06 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/output/output_1.0/ia_css_output.host.c
@@ -60,7 +60,7 @@ ia_css_output_config(
 	(void)size;
 	ia_css_dma_configure_from_info(&to->port_b, from->info);
 	to->width_a_over_b = elems_a / to->port_b.elems;
-	to->height = from->info->res.height;
+	to->height = from->info ? from->info->res.height : 0;
 	to->enable = from->info != NULL;
 	ia_css_frame_info_to_frame_sp_info(&to->info, from->info);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c
index 68a27f0cfba0..fa9ce0fedf23 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw.host.c
@@ -37,34 +37,34 @@ sh_css_elems_bytes_from_info (unsigned raw_bit_depth)
 
 /* MW: These areMIPI / ISYS properties, not camera function properties */
 static enum sh_stream_format
-css2isp_stream_format(enum ia_css_stream_format from)
+css2isp_stream_format(enum atomisp_input_format from)
 {
 	switch (from) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		return sh_stream_format_yuv420_legacy;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		return sh_stream_format_yuv420;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		return sh_stream_format_yuv422;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		return sh_stream_format_rgb;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		return sh_stream_format_raw;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 	default:
 		return sh_stream_format_raw;
 	}
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
index 5c0b8febd79a..ae868eb5e10f 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/raw/raw_1.0/ia_css_raw_types.h
@@ -28,7 +28,7 @@ struct ia_css_raw_configuration {
 	const struct ia_css_frame_info  *in_info;
 	const struct ia_css_frame_info  *internal_info;
 	bool two_ppc;
-	enum ia_css_stream_format stream_format;
+	enum atomisp_input_format stream_format;
 	bool deinterleaved;
 	uint8_t enable_left_padding;
 };
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c
index e775af51c0c0..78a113bfe8f1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.c
@@ -15,7 +15,7 @@
 #include "ia_css_debug.h"
 #include "ia_css_tdf.host.h"
 
-const int16_t g_pyramid[8][8] = {
+static const int16_t g_pyramid[8][8] = {
 {128, 384, 640, 896, 896, 640, 384, 128},
 {384, 1152, 1920, 2688, 2688, 1920, 1152, 384},
 {640, 1920, 3200, 4480, 4480, 3200, 1920, 640},
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h
index 1b3e759e41a3..bd628a18e839 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf.host.h
@@ -17,7 +17,6 @@
 
 #include "ia_css_tdf_types.h"
 #include "ia_css_tdf_param.h"
-#include "ia_css_tdf_default.host.h"
 
 void
 ia_css_tdf_vmem_encode(
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c
deleted file mode 100644
index 9bb42daf070d..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#include "ia_css_tdf_types.h"
-
-const struct ia_css_tdf_config default_tdf_config = {
-	.thres_flat_table = {0},
-	.thres_detail_table = {0},
-	.epsilon_0 = 4095,
-	.epsilon_1 = 5733,
-	.eps_scale_text = 409,
-	.eps_scale_edge = 3686,
-	.sepa_flat = 1294,
-	.sepa_edge = 4095,
-	.blend_flat = 819,
-	.blend_text = 819,
-	.blend_edge = 8191,
-	.shading_gain = 1024,
-	.shading_base_gain = 8191,
-	.local_y_gain = 0,
-	.local_y_base_gain = 2047,
-	.rad_x_origin = 0,
-	.rad_y_origin = 0
-};
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h
deleted file mode 100644
index cd8fb70e5a87..000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/tdf/tdf_1.0/ia_css_tdf_default.host.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- * Copyright (c) 2015, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- */
-
-#ifndef __IA_CSS_TDF_DEFAULT_HOST_H
-#define __IA_CSS_TDF_DEFAULT_HOST_H
-
-#include "ia_css_tdf_types.h"
-
-extern const struct ia_css_tdf_config default_tdf_config;
-
-#endif /* __IA_CSS_TDF_DEFAULT_HOST_H */
-
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c
index 5610833ed595..c2076e412410 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/isp/kernels/vf/vf_1.0/ia_css_vf.host.c
@@ -130,11 +130,11 @@ ia_css_vf_configure(
 
 	err = configure_kernel(info, out_info, vf_info, downscale_log2, &config);
 	configure_dma(&config, vf_info);
-	if (binary) {
-		if (vf_info)
-			vf_info->raw_bit_depth = info->dma.vfdec_bits_per_pixel;
-		ia_css_configure_vf (binary, &config);
-	}
+
+	if (vf_info)
+		vf_info->raw_bit_depth = info->dma.vfdec_bits_per_pixel;
+	ia_css_configure_vf (binary, &config);
+
 	return IA_CSS_SUCCESS;
 }
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
index 732e49a241eb..b62c4d321a4e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/interface/ia_css_binary.h
@@ -113,7 +113,7 @@ struct ia_css_binary_descr {
 #endif
 	bool enable_capture_pp_bli;
 	struct ia_css_resolution dvs_env;
-	enum ia_css_stream_format stream_format;
+	enum atomisp_input_format stream_format;
 	struct ia_css_frame_info *in_info;		/* the info of the input-frame with the
 							   ISP required resolution. */
 	struct ia_css_frame_info *bds_out_info;
@@ -126,7 +126,7 @@ struct ia_css_binary_descr {
 
 struct ia_css_binary {
 	const struct ia_css_binary_xinfo *info;
-	enum ia_css_stream_format input_format;
+	enum atomisp_input_format input_format;
 	struct ia_css_frame_info in_frame_info;
 	struct ia_css_frame_info internal_frame_info;
 	struct ia_css_frame_info out_frame_info[IA_CSS_BINARY_MAX_OUTPUT_PORTS];
@@ -162,7 +162,7 @@ struct ia_css_binary {
 
 #define IA_CSS_BINARY_DEFAULT_SETTINGS \
 (struct ia_css_binary) { \
-	.input_format		= IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY, \
+	.input_format		= ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY, \
 	.in_frame_info		= IA_CSS_BINARY_DEFAULT_FRAME_INFO, \
 	.internal_frame_info	= IA_CSS_BINARY_DEFAULT_FRAME_INFO, \
 	.out_frame_info		= {IA_CSS_BINARY_DEFAULT_FRAME_INFO}, \
@@ -179,7 +179,7 @@ enum ia_css_err
 ia_css_binary_fill_info(const struct ia_css_binary_xinfo *xinfo,
 		 bool online,
 		 bool two_ppc,
-		 enum ia_css_stream_format stream_format,
+		 enum atomisp_input_format stream_format,
 		 const struct ia_css_frame_info *in_info,
 		 const struct ia_css_frame_info *bds_out_info,
 		 const struct ia_css_frame_info *out_info[],
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
index a0f0e9062c4c..0cd6e1da43cf 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/binary/src/binary.c
@@ -861,7 +861,7 @@ binary_supports_output_format(const struct ia_css_binary_xinfo *info,
 #ifdef ISP2401
 static bool
 binary_supports_input_format(const struct ia_css_binary_xinfo *info,
-			     enum ia_css_stream_format format)
+			     enum atomisp_input_format format)
 {
 
 	assert(info != NULL);
@@ -1088,7 +1088,7 @@ enum ia_css_err
 ia_css_binary_fill_info(const struct ia_css_binary_xinfo *xinfo,
 		 bool online,
 		 bool two_ppc,
-		 enum ia_css_stream_format stream_format,
+		 enum atomisp_input_format stream_format,
 		 const struct ia_css_frame_info *in_info, /* can be NULL */
 		 const struct ia_css_frame_info *bds_out_info, /* can be NULL */
 		 const struct ia_css_frame_info *out_info[], /* can be NULL */
@@ -1382,7 +1382,7 @@ ia_css_binary_find(struct ia_css_binary_descr *descr,
 	int mode;
 	bool online;
 	bool two_ppc;
-	enum ia_css_stream_format stream_format;
+	enum atomisp_input_format stream_format;
 	const struct ia_css_frame_info *req_in_info,
 				       *req_bds_out_info,
 				       *req_out_info[IA_CSS_BINARY_MAX_OUTPUT_PORTS],
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
index e50d9f2e2609..ffbcdd80d934 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/bufq/src/bufq.c
@@ -90,12 +90,11 @@ struct sh_css_queues {
 
 #endif
 
-struct sh_css_queues  css_queues;
-
-
 /*******************************************************
 *** Static variables
 ********************************************************/
+static struct sh_css_queues css_queues;
+
 static int buffer_type_to_queue_id_map[SH_CSS_MAX_SP_THREADS][IA_CSS_NUM_DYNAMIC_BUFFER_TYPE];
 static bool queue_availability[SH_CSS_MAX_SP_THREADS][SH_CSS_MAX_NUM_QUEUES];
 
@@ -207,7 +206,7 @@ static void map_buffer_type_to_queue_id(
 	}
 
 	for (i = SH_CSS_QUEUE_C_ID; i < SH_CSS_MAX_NUM_QUEUES; i++) {
-		if (queue_availability[thread_id][i] == true) {
+		if (queue_availability[thread_id][i]) {
 			queue_availability[thread_id][i] = false;
 			buffer_type_to_queue_id_map[thread_id][buf_type] = i;
 			break;
@@ -266,7 +265,7 @@ static ia_css_queue_t *bufq_get_qhandle(
 	case sh_css_sp2host_isys_event_queue:
 		q = &css_queues.sp2host_isys_event_queue_handle;
 		break;
-#endif		
+#endif
 	case sh_css_host2sp_tag_cmd_queue:
 		q = &css_queues.host2sp_tag_cmd_queue_handle;
 		break;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
index 60395904f89a..4607a76dc78a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/debug/src/ia_css_debug.c
@@ -110,9 +110,6 @@
 /* Global variable to store the dtrace verbosity level */
 unsigned int ia_css_debug_trace_level = IA_CSS_DEBUG_WARNING;
 
-/* Assumes that IA_CSS_STREAM_FORMAT_BINARY_8 is last */
-#define N_IA_CSS_STREAM_FORMAT (IA_CSS_STREAM_FORMAT_BINARY_8+1)
-
 #define DPG_START "ia_css_debug_pipe_graph_dump_start "
 #define DPG_END   " ia_css_debug_pipe_graph_dump_end\n"
 
@@ -141,8 +138,8 @@ static struct pipe_graph_class {
 	int width;
 	int eff_height;
 	int eff_width;
-	enum ia_css_stream_format stream_format;
-} pg_inst = {true, 0, 0, 0, 0, N_IA_CSS_STREAM_FORMAT};
+	enum atomisp_input_format stream_format;
+} pg_inst = {true, 0, 0, 0, 0, N_ATOMISP_INPUT_FORMAT};
 
 static const char * const queue_id_to_str[] = {
 	/* [SH_CSS_QUEUE_A_ID]     =*/ "queue_A",
@@ -261,86 +258,86 @@ unsigned int ia_css_debug_get_dtrace_level(void)
 	return ia_css_debug_trace_level;
 }
 
-static const char *debug_stream_format2str(const enum ia_css_stream_format stream_format)
+static const char *debug_stream_format2str(const enum atomisp_input_format stream_format)
 {
 	switch (stream_format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		return "yuv420-8-legacy";
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
 		return "yuv420-8";
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
 		return "yuv420-10";
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		return "yuv420-16";
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
 		return "yuv422-8";
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
 		return "yuv422-10";
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		return "yuv422-16";
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		return "rgb444";
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		return "rgb555";
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		return "rgb565";
-	case IA_CSS_STREAM_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
 		return "rgb666";
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		return "rgb888";
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		return "raw6";
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		return "raw7";
-	case IA_CSS_STREAM_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
 		return "raw8";
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		return "raw10";
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		return "raw12";
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		return "raw14";
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		return "raw16";
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 		return "binary8";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT1:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT1:
 		return "generic-short1";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT2:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT2:
 		return "generic-short2";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT3:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT3:
 		return "generic-short3";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT4:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT4:
 		return "generic-short4";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT5:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT5:
 		return "generic-short5";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT6:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT6:
 		return "generic-short6";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT7:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT7:
 		return "generic-short7";
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT8:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT8:
 		return "generic-short8";
-	case IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_SHIFT:
 		return "yuv420-8-shift";
-	case IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT:
+	case ATOMISP_INPUT_FORMAT_YUV420_10_SHIFT:
 		return "yuv420-10-shift";
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 		return "embedded-8";
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
 		return "user-def-8-type-1";
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
 		return "user-def-8-type-2";
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
 		return "user-def-8-type-3";
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
 		return "user-def-8-type-4";
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
 		return "user-def-8-type-5";
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
 		return "user-def-8-type-6";
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
 		return "user-def-8-type-7";
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		return "user-def-8-type-8";
 
 	default:
@@ -2679,9 +2676,9 @@ ia_css_debug_pipe_graph_dump_frame(
 	}
 	dtrace_dot(
 		"node [shape = box, "
-		"fixedsize=true, width=2, height=0.7]; \"0x%08lx\" "
+		"fixedsize=true, width=2, height=0.7]; \"%p\" "
 		"[label = \"%s\\n%d(%d) x %d, %dbpp\\n%s\"];",
-		HOST_ADDRESS(frame),
+		frame,
 		debug_frame_format2str(frame->info.format),
 		frame->info.res.width,
 		frame->info.padded_width,
@@ -2691,16 +2688,16 @@ ia_css_debug_pipe_graph_dump_frame(
 
 	if (in_frame) {
 		dtrace_dot(
-			"\"0x%08lx\"->\"%s(pipe%d)\" "
+			"\"%p\"->\"%s(pipe%d)\" "
 			"[label = %s_frame];",
-			HOST_ADDRESS(frame),
+			frame,
 			blob_name, id, frame_name);
 	} else {
 		dtrace_dot(
-			"\"%s(pipe%d)\"->\"0x%08lx\" "
+			"\"%s(pipe%d)\"->\"%p\" "
 			"[label = %s_frame];",
 			blob_name, id,
-			HOST_ADDRESS(frame),
+			frame,
 			frame_name);
 	}
 }
@@ -2730,7 +2727,7 @@ void ia_css_debug_pipe_graph_dump_epilogue(void)
 	}
 
 
-	if (pg_inst.stream_format != N_IA_CSS_STREAM_FORMAT) {
+	if (pg_inst.stream_format != N_ATOMISP_INPUT_FORMAT) {
 		/* An input stream format has been set so assume we have
 		 * an input system and sensor
 		 */
@@ -2770,7 +2767,7 @@ void ia_css_debug_pipe_graph_dump_epilogue(void)
 	pg_inst.height = 0;
 	pg_inst.eff_width = 0;
 	pg_inst.eff_height = 0;
-	pg_inst.stream_format = N_IA_CSS_STREAM_FORMAT;
+	pg_inst.stream_format = N_ATOMISP_INPUT_FORMAT;
 }
 
 void
@@ -3011,9 +3008,9 @@ ia_css_debug_pipe_graph_dump_sp_raw_copy(
 
 	snprintf(ring_buffer, sizeof(ring_buffer),
 		"node [shape = box, "
-		"fixedsize=true, width=2, height=0.7]; \"0x%08lx\" "
+		"fixedsize=true, width=2, height=0.7]; \"%p\" "
 		"[label = \"%s\\n%d(%d) x %d\\nRingbuffer\"];",
-		HOST_ADDRESS(out_frame),
+		out_frame,
 		debug_frame_format2str(out_frame->info.format),
 		out_frame->info.res.width,
 		out_frame->info.padded_width,
@@ -3022,9 +3019,9 @@ ia_css_debug_pipe_graph_dump_sp_raw_copy(
 	dtrace_dot(ring_buffer);
 
 	dtrace_dot(
-		"\"%s(pipe%d)\"->\"0x%08lx\" "
+		"\"%s(pipe%d)\"->\"%p\" "
 		"[label = out_frame];",
-		"sp_raw_copy", 1, HOST_ADDRESS(out_frame));
+		"sp_raw_copy", 1, out_frame);
 
 	snprintf(dot_id_input_bin, sizeof(dot_id_input_bin), "%s(pipe%d)", "sp_raw_copy", 1);
 }
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
index adefa57820a4..1bed027435fd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/ifmtr/src/ifmtr.c
@@ -112,13 +112,13 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	    width_b_factor = 1, start_column_b,
 	    left_padding = 0;
 	input_formatter_cfg_t if_a_config, if_b_config;
-	enum ia_css_stream_format input_format;
+	enum atomisp_input_format input_format;
 	enum ia_css_err err = IA_CSS_SUCCESS;
 	uint8_t if_config_index;
 
 	/* Determine which input formatter config set is targeted. */
 	/* Index is equal to the CSI-2 port used. */
-	enum ia_css_csi2_port port;
+	enum mipi_port_id port;
 
 	if (binary) {
 		cropped_height = binary->in_frame_info.res.height;
@@ -141,7 +141,7 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	if (config->mode == IA_CSS_INPUT_MODE_SENSOR
 	    || config->mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR) {
 		port = config->source.port.port;
-		if_config_index = (uint8_t) (port - IA_CSS_CSI2_PORT0);
+		if_config_index = (uint8_t) (port - MIPI_PORT0_ID);
 	} else if (config->mode == IA_CSS_INPUT_MODE_MEMORY) {
 		if_config_index = SH_CSS_IF_CONFIG_NOT_NEEDED;
 	} else {
@@ -189,7 +189,7 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	bits_per_pixel = input_formatter_get_alignment(INPUT_FORMATTER0_ID)
 	    * 8 / ISP_VEC_NELEMS;
 	switch (input_format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		if (two_ppc) {
 			vmem_increment = 1;
 			deinterleaving = 1;
@@ -219,9 +219,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 			start_column = start_column * deinterleaving / 2;
 		}
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		if (two_ppc) {
 			vmem_increment = 1;
 			deinterleaving = 1;
@@ -246,9 +246,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 			start_column *= deinterleaving;
 		}
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		if (two_ppc) {
 			vmem_increment = 1;
 			deinterleaving = 1;
@@ -267,11 +267,11 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 			start_column *= deinterleaving;
 		}
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		num_vectors *= 2;
 		if (two_ppc) {
 			deinterleaving = 2;	/* BR in if_a, G in if_b */
@@ -293,11 +293,11 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		num_vectors = num_vectors / 2 * deinterleaving;
 		buf_offset_b = buffer_width / 2 / ISP_VEC_NELEMS;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		if (two_ppc) {
 			int crop_col = (start_column % 2) == 1;
 			vmem_increment = 2;
@@ -332,8 +332,8 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		vectors_per_line = CEIL_DIV(cropped_width, ISP_VEC_NELEMS);
 		vectors_per_line = CEIL_MUL(vectors_per_line, deinterleaving);
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		if (two_ppc) {
 			num_vectors *= 2;
 			vmem_increment = 1;
@@ -350,26 +350,26 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		}
 		buffer_height *= 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT1:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT2:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT3:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT4:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT5:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT6:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT7:
-	case IA_CSS_STREAM_FORMAT_GENERIC_SHORT8:
-	case IA_CSS_STREAM_FORMAT_YUV420_8_SHIFT:
-	case IA_CSS_STREAM_FORMAT_YUV420_10_SHIFT:
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT1:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT2:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT3:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT4:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT5:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT6:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT7:
+	case ATOMISP_INPUT_FORMAT_GENERIC_SHORT8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_SHIFT:
+	case ATOMISP_INPUT_FORMAT_YUV420_10_SHIFT:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		break;
 	}
 	if (width_a == 0)
@@ -420,9 +420,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 	if_a_config.buf_eol_offset =
 	    buffer_width * bits_per_pixel / 8 - line_width;
 	if_a_config.is_yuv420_format =
-	    (input_format == IA_CSS_STREAM_FORMAT_YUV420_8)
-	    || (input_format == IA_CSS_STREAM_FORMAT_YUV420_10)
-	    || (input_format == IA_CSS_STREAM_FORMAT_YUV420_16);
+	    (input_format == ATOMISP_INPUT_FORMAT_YUV420_8)
+	    || (input_format == ATOMISP_INPUT_FORMAT_YUV420_10)
+	    || (input_format == ATOMISP_INPUT_FORMAT_YUV420_16);
 	if_a_config.block_no_reqs = (config->mode != IA_CSS_INPUT_MODE_SENSOR);
 
 	if (two_ppc) {
@@ -449,9 +449,9 @@ enum ia_css_err ia_css_ifmtr_configure(struct ia_css_stream_config *config,
 		if_b_config.buf_eol_offset =
 		    buffer_width * bits_per_pixel / 8 - line_width;
 		if_b_config.is_yuv420_format =
-		    input_format == IA_CSS_STREAM_FORMAT_YUV420_8
-		    || input_format == IA_CSS_STREAM_FORMAT_YUV420_10
-		    || input_format == IA_CSS_STREAM_FORMAT_YUV420_16;
+		    input_format == ATOMISP_INPUT_FORMAT_YUV420_8
+		    || input_format == ATOMISP_INPUT_FORMAT_YUV420_10
+		    || input_format == ATOMISP_INPUT_FORMAT_YUV420_16;
 		if_b_config.block_no_reqs =
 		    (config->mode != IA_CSS_INPUT_MODE_SENSOR);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h
index 47d0f7e53f47..545f9e2da59e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/interface/ia_css_inputfifo.h
@@ -42,12 +42,12 @@ void ia_css_inputfifo_send_input_frame(
 	unsigned int	width,
 	unsigned int	height,
 	unsigned int	ch_id,
-	enum ia_css_stream_format	input_format,
+	enum atomisp_input_format	input_format,
 	bool			two_ppc);
 
 void ia_css_inputfifo_start_frame(
 	unsigned int	ch_id,
-	enum ia_css_stream_format	input_format,
+	enum atomisp_input_format	input_format,
 	bool			two_ppc);
 
 void ia_css_inputfifo_send_line(
@@ -59,7 +59,7 @@ void ia_css_inputfifo_send_line(
 
 void ia_css_inputfifo_send_embedded_line(
 	unsigned int	ch_id,
-	enum ia_css_stream_format	data_type,
+	enum atomisp_input_format	data_type,
 	const unsigned short	*data,
 	unsigned int	width);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
index 8dc74927e9a2..24ca4aaf8df1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/inputfifo/src/inputfifo.c
@@ -86,7 +86,7 @@ static unsigned int inputfifo_curr_ch_id, inputfifo_curr_fmt_type;
 #endif
 struct inputfifo_instance {
 	unsigned int				ch_id;
-	enum ia_css_stream_format	input_format;
+	enum atomisp_input_format	input_format;
 	bool						two_ppc;
 	bool						streaming;
 	unsigned int				hblank_cycles;
@@ -466,21 +466,21 @@ static void inputfifo_send_frame(
 
 
 static enum inputfifo_mipi_data_type inputfifo_determine_type(
-	enum ia_css_stream_format input_format)
+	enum atomisp_input_format input_format)
 {
 	enum inputfifo_mipi_data_type type;
 
 	type = inputfifo_mipi_data_type_regular;
-	if (input_format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) {
+	if (input_format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) {
 		type =
 			inputfifo_mipi_data_type_yuv420_legacy;
-	} else if (input_format == IA_CSS_STREAM_FORMAT_YUV420_8  ||
-		   input_format == IA_CSS_STREAM_FORMAT_YUV420_10 ||
-		   input_format == IA_CSS_STREAM_FORMAT_YUV420_16) {
+	} else if (input_format == ATOMISP_INPUT_FORMAT_YUV420_8  ||
+		   input_format == ATOMISP_INPUT_FORMAT_YUV420_10 ||
+		   input_format == ATOMISP_INPUT_FORMAT_YUV420_16) {
 		type =
 			inputfifo_mipi_data_type_yuv420;
-	} else if (input_format >= IA_CSS_STREAM_FORMAT_RGB_444 &&
-		   input_format <= IA_CSS_STREAM_FORMAT_RGB_888) {
+	} else if (input_format >= ATOMISP_INPUT_FORMAT_RGB_444 &&
+		   input_format <= ATOMISP_INPUT_FORMAT_RGB_888) {
 		type =
 			inputfifo_mipi_data_type_rgb;
 	}
@@ -500,7 +500,7 @@ void ia_css_inputfifo_send_input_frame(
 	unsigned int width,
 	unsigned int height,
 	unsigned int ch_id,
-	enum ia_css_stream_format input_format,
+	enum atomisp_input_format input_format,
 	bool two_ppc)
 {
 	unsigned int fmt_type, hblank_cycles, marker_cycles;
@@ -524,7 +524,7 @@ void ia_css_inputfifo_send_input_frame(
 
 void ia_css_inputfifo_start_frame(
 	unsigned int ch_id,
-	enum ia_css_stream_format input_format,
+	enum atomisp_input_format input_format,
 	bool two_ppc)
 {
 	struct inputfifo_instance *s2mi;
@@ -574,7 +574,7 @@ void ia_css_inputfifo_send_line(
 
 void ia_css_inputfifo_send_embedded_line(
 	unsigned int	ch_id,
-	enum ia_css_stream_format	data_type,
+	enum atomisp_input_format	data_type,
 	const unsigned short	*data,
 	unsigned int	width)
 {
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
index 4cf2defe9ef0..8c005db9766e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/interface/ia_css_isys.h
@@ -50,8 +50,8 @@ typedef input_system_cfg_t	ia_css_isys_descr_t;
 #if defined(USE_INPUT_SYSTEM_VERSION_2) || defined(USE_INPUT_SYSTEM_VERSION_2401)
 input_system_error_t ia_css_isys_init(void);
 void ia_css_isys_uninit(void);
-mipi_port_ID_t ia_css_isys_port_to_mipi_port(
-	enum ia_css_csi2_port api_port);
+enum mipi_port_id ia_css_isys_port_to_mipi_port(
+	enum mipi_port_id api_port);
 #endif
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2401)
@@ -68,7 +68,7 @@ mipi_port_ID_t ia_css_isys_port_to_mipi_port(
  *				there is already a stream registered with the same handle
  */
 enum ia_css_err ia_css_isys_csi_rx_register_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id);
 
 /**
@@ -83,14 +83,14 @@ enum ia_css_err ia_css_isys_csi_rx_register_stream(
  *				there is no stream registered with that handle
  */
 enum ia_css_err ia_css_isys_csi_rx_unregister_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id);
 
 enum ia_css_err ia_css_isys_convert_compressed_format(
 		struct ia_css_csi2_compression *comp,
 		struct input_system_cfg_s *cfg);
 unsigned int ia_css_csi2_calculate_input_system_alignment(
-	enum ia_css_stream_format fmt_type);
+	enum atomisp_input_format fmt_type);
 #endif
 
 #if !defined(USE_INPUT_SYSTEM_VERSION_2401)
@@ -101,12 +101,12 @@ void ia_css_isys_rx_configure(
 
 void ia_css_isys_rx_disable(void);
 
-void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port);
+void ia_css_isys_rx_enable_all_interrupts(enum mipi_port_id port);
 
-unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port);
-void ia_css_isys_rx_get_irq_info(mipi_port_ID_t port,
+unsigned int ia_css_isys_rx_get_interrupt_reg(enum mipi_port_id port);
+void ia_css_isys_rx_get_irq_info(enum mipi_port_id port,
 				 unsigned int *irq_infos);
-void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port,
+void ia_css_isys_rx_clear_irq_info(enum mipi_port_id port,
 				   unsigned int irq_infos);
 unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits);
 
@@ -124,7 +124,7 @@ unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits);
  * format type must be sumitted correctly by the application.
  */
 enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
-		enum ia_css_stream_format input_format,
+		enum atomisp_input_format input_format,
 		mipi_predictor_t compression,
 		unsigned int *fmt_type);
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
index 3b04dc51335a..a914ce5532ec 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/csi_rx_rmgr.c
@@ -141,7 +141,7 @@ void ia_css_isys_csi_rx_lut_rmgr_release(
 }
 
 enum ia_css_err ia_css_isys_csi_rx_register_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id)
 {
 	enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
@@ -160,7 +160,7 @@ enum ia_css_err ia_css_isys_csi_rx_register_stream(
 }
 
 enum ia_css_err ia_css_isys_csi_rx_unregister_stream(
-	enum ia_css_csi2_port port,
+	enum mipi_port_id port,
 	uint32_t isys_stream_id)
 {
 	enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
index 4122084fd237..2ae5e59d5e31 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/isys_init.c
@@ -105,8 +105,6 @@ input_system_error_t ia_css_isys_init(void)
 #elif defined(USE_INPUT_SYSTEM_VERSION_2401)
 input_system_error_t ia_css_isys_init(void)
 {
-	input_system_error_t error = INPUT_SYSTEM_ERR_NO_ERROR;
-
 	ia_css_isys_csi_rx_lut_rmgr_init();
 	ia_css_isys_ibuf_rmgr_init();
 	ia_css_isys_dma_channel_rmgr_init();
@@ -120,7 +118,7 @@ input_system_error_t ia_css_isys_init(void)
 	isys_irqc_status_enable(ISYS_IRQ1_ID);
 	isys_irqc_status_enable(ISYS_IRQ2_ID);
 
-	return error;
+	return INPUT_SYSTEM_ERR_NO_ERROR;
 }
 #endif
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
index 70f6cb5e5918..425bd3cc3f34 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/rx.c
@@ -36,7 +36,7 @@ more details.
 #include "sh_css_internal.h"
 
 #if !defined(USE_INPUT_SYSTEM_VERSION_2401)
-void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port)
+void ia_css_isys_rx_enable_all_interrupts(enum mipi_port_id port)
 {
 	hrt_data bits = receiver_port_reg_load(RX0_ID,
 				port,
@@ -80,22 +80,22 @@ void ia_css_isys_rx_enable_all_interrupts(mipi_port_ID_t port)
  * initializers in Windows. Without that there is no easy way to guarantee
  * that the array values would be in the correct order.
  * */
-mipi_port_ID_t ia_css_isys_port_to_mipi_port(enum ia_css_csi2_port api_port)
+enum mipi_port_id ia_css_isys_port_to_mipi_port(enum mipi_port_id api_port)
 {
 	/* In this module the validity of the inptu variable should
 	 * have been checked already, so we do not check for erroneous
 	 * values. */
-	mipi_port_ID_t port = MIPI_PORT0_ID;
+	enum mipi_port_id port = MIPI_PORT0_ID;
 
-	if (api_port == IA_CSS_CSI2_PORT1)
+	if (api_port == MIPI_PORT1_ID)
 		port = MIPI_PORT1_ID;
-	else if (api_port == IA_CSS_CSI2_PORT2)
+	else if (api_port == MIPI_PORT2_ID)
 		port = MIPI_PORT2_ID;
 
 	return port;
 }
 
-unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port)
+unsigned int ia_css_isys_rx_get_interrupt_reg(enum mipi_port_id port)
 {
 	return receiver_port_reg_load(RX0_ID,
 				      port,
@@ -104,17 +104,17 @@ unsigned int ia_css_isys_rx_get_interrupt_reg(mipi_port_ID_t port)
 
 void ia_css_rx_get_irq_info(unsigned int *irq_infos)
 {
-	ia_css_rx_port_get_irq_info(IA_CSS_CSI2_PORT1, irq_infos);
+	ia_css_rx_port_get_irq_info(MIPI_PORT1_ID, irq_infos);
 }
 
-void ia_css_rx_port_get_irq_info(enum ia_css_csi2_port api_port,
+void ia_css_rx_port_get_irq_info(enum mipi_port_id api_port,
 				 unsigned int *irq_infos)
 {
-	mipi_port_ID_t port = ia_css_isys_port_to_mipi_port(api_port);
+	enum mipi_port_id port = ia_css_isys_port_to_mipi_port(api_port);
 	ia_css_isys_rx_get_irq_info(port, irq_infos);
 }
 
-void ia_css_isys_rx_get_irq_info(mipi_port_ID_t port,
+void ia_css_isys_rx_get_irq_info(enum mipi_port_id port,
 				 unsigned int *irq_infos)
 {
 	unsigned int bits;
@@ -169,16 +169,16 @@ unsigned int ia_css_isys_rx_translate_irq_infos(unsigned int bits)
 
 void ia_css_rx_clear_irq_info(unsigned int irq_infos)
 {
-	ia_css_rx_port_clear_irq_info(IA_CSS_CSI2_PORT1, irq_infos);
+	ia_css_rx_port_clear_irq_info(MIPI_PORT1_ID, irq_infos);
 }
 
-void ia_css_rx_port_clear_irq_info(enum ia_css_csi2_port api_port, unsigned int irq_infos)
+void ia_css_rx_port_clear_irq_info(enum mipi_port_id api_port, unsigned int irq_infos)
 {
-	mipi_port_ID_t port = ia_css_isys_port_to_mipi_port(api_port);
+	enum mipi_port_id port = ia_css_isys_port_to_mipi_port(api_port);
 	ia_css_isys_rx_clear_irq_info(port, irq_infos);
 }
 
-void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port, unsigned int irq_infos)
+void ia_css_isys_rx_clear_irq_info(enum mipi_port_id port, unsigned int irq_infos)
 {
 	hrt_data bits = receiver_port_reg_load(RX0_ID,
 				port,
@@ -229,7 +229,7 @@ void ia_css_isys_rx_clear_irq_info(mipi_port_ID_t port, unsigned int irq_infos)
 #endif /* #if !defined(USE_INPUT_SYSTEM_VERSION_2401) */
 
 enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
-		enum ia_css_stream_format input_format,
+		enum atomisp_input_format input_format,
 		mipi_predictor_t compression,
 		unsigned int *fmt_type)
 {
@@ -244,25 +244,25 @@ enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
 	 */
 	if (compression != MIPI_PREDICTOR_NONE) {
 		switch (input_format) {
-		case IA_CSS_STREAM_FORMAT_RAW_6:
+		case ATOMISP_INPUT_FORMAT_RAW_6:
 			*fmt_type = 6;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_7:
+		case ATOMISP_INPUT_FORMAT_RAW_7:
 			*fmt_type = 7;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_8:
+		case ATOMISP_INPUT_FORMAT_RAW_8:
 			*fmt_type = 8;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_10:
+		case ATOMISP_INPUT_FORMAT_RAW_10:
 			*fmt_type = 10;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_12:
+		case ATOMISP_INPUT_FORMAT_RAW_12:
 			*fmt_type = 12;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_14:
+		case ATOMISP_INPUT_FORMAT_RAW_14:
 			*fmt_type = 14;
 			break;
-		case IA_CSS_STREAM_FORMAT_RAW_16:
+		case ATOMISP_INPUT_FORMAT_RAW_16:
 			*fmt_type = 16;
 			break;
 		default:
@@ -277,96 +277,96 @@ enum ia_css_err ia_css_isys_convert_stream_format_to_mipi_format(
 	 * MW: For some reason the mapping is not 1-to-1
 	 */
 	switch (input_format) {
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		*fmt_type = MIPI_FORMAT_RGB888;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		*fmt_type = MIPI_FORMAT_RGB555;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		*fmt_type = MIPI_FORMAT_RGB444;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		*fmt_type = MIPI_FORMAT_RGB565;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
 		*fmt_type = MIPI_FORMAT_RGB666;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
 		*fmt_type = MIPI_FORMAT_RAW8;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		*fmt_type = MIPI_FORMAT_RAW10;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		*fmt_type = MIPI_FORMAT_RAW6;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		*fmt_type = MIPI_FORMAT_RAW7;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		*fmt_type = MIPI_FORMAT_RAW12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		*fmt_type = MIPI_FORMAT_RAW14;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
 		*fmt_type = MIPI_FORMAT_YUV420_8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
 		*fmt_type = MIPI_FORMAT_YUV420_10;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
 		*fmt_type = MIPI_FORMAT_YUV422_8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
 		*fmt_type = MIPI_FORMAT_YUV422_10;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		*fmt_type = MIPI_FORMAT_YUV420_8_LEGACY;
 		break;
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 		*fmt_type = MIPI_FORMAT_EMBEDDED;
 		break;
 #ifndef USE_INPUT_SYSTEM_VERSION_2401
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		/* This is not specified by Arasan, so we use
 		 * 17 for now.
 		 */
 		*fmt_type = MIPI_FORMAT_RAW16;
 		break;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 		*fmt_type = MIPI_FORMAT_BINARY_8;
 		break;
 #else
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
 		*fmt_type = MIPI_FORMAT_CUSTOM0;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
 		*fmt_type = MIPI_FORMAT_CUSTOM1;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
 		*fmt_type = MIPI_FORMAT_CUSTOM2;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
 		*fmt_type = MIPI_FORMAT_CUSTOM3;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
 		*fmt_type = MIPI_FORMAT_CUSTOM4;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
 		*fmt_type = MIPI_FORMAT_CUSTOM5;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
 		*fmt_type = MIPI_FORMAT_CUSTOM6;
 		break;
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		*fmt_type = MIPI_FORMAT_CUSTOM7;
 		break;
 #endif
 
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 	default:
 		return IA_CSS_ERR_INTERNAL_ERROR;
 	}
@@ -448,34 +448,34 @@ enum ia_css_err ia_css_isys_convert_compressed_format(
 }
 
 unsigned int ia_css_csi2_calculate_input_system_alignment(
-	enum ia_css_stream_format fmt_type)
+	enum atomisp_input_format fmt_type)
 {
 	unsigned int memory_alignment_in_bytes = HIVE_ISP_DDR_WORD_BYTES;
 
 	switch (fmt_type) {
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		memory_alignment_in_bytes = 2 * ISP_VEC_NELEMS;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		/* Planar YUV formats need to have all planes aligned, this means
 		 * double the alignment for the Y plane if the horizontal decimation is 2. */
 		memory_alignment_in_bytes = 2 * HIVE_ISP_DDR_WORD_BYTES;
 		break;
-	case IA_CSS_STREAM_FORMAT_EMBEDDED:
+	case ATOMISP_INPUT_FORMAT_EMBEDDED:
 	default:
 		memory_alignment_in_bytes = HIVE_ISP_DDR_WORD_BYTES;
 		break;
@@ -492,7 +492,7 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 #if defined(HAS_RX_VERSION_2)
 	bool port_enabled[N_MIPI_PORT_ID];
 	bool any_port_enabled = false;
-	mipi_port_ID_t port;
+	enum mipi_port_id port;
 
 	if ((config == NULL)
 		|| (config->mode >= N_RX_MODE)
@@ -500,7 +500,7 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 		assert(0);
 		return;
 	}
-	for (port = (mipi_port_ID_t) 0; port < N_MIPI_PORT_ID; port++) {
+	for (port = (enum mipi_port_id) 0; port < N_MIPI_PORT_ID; port++) {
 		if (is_receiver_port_enabled(RX0_ID, port))
 			any_port_enabled = true;
 	}
@@ -595,8 +595,8 @@ void ia_css_isys_rx_configure(const rx_cfg_t *config,
 
 void ia_css_isys_rx_disable(void)
 {
-	mipi_port_ID_t port;
-	for (port = (mipi_port_ID_t) 0; port < N_MIPI_PORT_ID; port++) {
+	enum mipi_port_id port;
+	for (port = (enum mipi_port_id) 0; port < N_MIPI_PORT_ID; port++) {
 		receiver_port_reg_store(RX0_ID, port,
 					_HRT_CSS_RECEIVER_DEVICE_READY_REG_IDX,
 					false);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
index 90922a7acefd..2484949453b7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/isys/src/virtual_isys.c
@@ -331,7 +331,7 @@ static bool create_input_system_channel(
 		break;
 	}
 
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	if (!acquire_sid(me->stream2mmio_id, &(me->stream2mmio_sid_id))) {
@@ -474,7 +474,7 @@ static bool calculate_input_system_channel_cfg(
 
 	rc = calculate_stream2mmio_cfg(isys_cfg, metadata,
 			&(channel_cfg->stream2mmio_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	rc = calculate_ibuf_ctrl_cfg(
@@ -482,7 +482,7 @@ static bool calculate_input_system_channel_cfg(
 			input_port,
 			isys_cfg,
 			&(channel_cfg->ibuf_ctrl_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 	if (metadata)
 		channel_cfg->ibuf_ctrl_cfg.stores_per_frame = isys_cfg->metadata.lines_per_frame;
@@ -491,7 +491,7 @@ static bool calculate_input_system_channel_cfg(
 			channel,
 			isys_cfg,
 			&(channel_cfg->dma_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	rc = calculate_isys2401_dma_port_cfg(
@@ -499,7 +499,7 @@ static bool calculate_input_system_channel_cfg(
 			false,
 			metadata,
 			&(channel_cfg->dma_src_port_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	rc = calculate_isys2401_dma_port_cfg(
@@ -507,7 +507,7 @@ static bool calculate_input_system_channel_cfg(
 			isys_cfg->raw_packed,
 			metadata,
 			&(channel_cfg->dma_dest_port_cfg));
-	if (rc == false)
+	if (!rc)
 		return false;
 
 	return true;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
index 81a50c73ad0b..4746620ca212 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/pipeline/src/pipeline.c
@@ -161,9 +161,9 @@ void ia_css_pipeline_start(enum ia_css_pipe_id pipe_id,
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-				, (mipi_port_ID_t) 0
+				, (enum mipi_port_id) 0
 #else
-				(mipi_port_ID_t) 0,
+				(enum mipi_port_id) 0,
 #endif
 #endif
 #ifndef ISP2401
@@ -574,7 +574,7 @@ static void pipeline_map_num_to_sp_thread(unsigned int pipe_num)
 
 		But the below is more descriptive.
 	*/
-	assert(found_sp_thread != false);
+	assert(found_sp_thread);
 }
 
 static void pipeline_unmap_num_to_sp_thread(unsigned int pipe_num)
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
index 54239ac9d7c9..a4d8a48f95ba 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/runtime/rmgr/src/rmgr_vbuf.c
@@ -24,12 +24,12 @@
  * @brief VBUF resource handles
  */
 #define NUM_HANDLES 1000
-struct ia_css_rmgr_vbuf_handle handle_table[NUM_HANDLES];
+static struct ia_css_rmgr_vbuf_handle handle_table[NUM_HANDLES];
 
 /*
  * @brief VBUF resource pool - refpool
  */
-struct ia_css_rmgr_vbuf_pool refpool = {
+static struct ia_css_rmgr_vbuf_pool refpool = {
 	false,			/* copy_on_write */
 	false,			/* recycle */
 	0,			/* size */
@@ -40,7 +40,7 @@ struct ia_css_rmgr_vbuf_pool refpool = {
 /*
  * @brief VBUF resource pool - writepool
  */
-struct ia_css_rmgr_vbuf_pool writepool = {
+static struct ia_css_rmgr_vbuf_pool writepool = {
 	true,			/* copy_on_write */
 	false,			/* recycle */
 	0,			/* size */
@@ -51,7 +51,7 @@ struct ia_css_rmgr_vbuf_pool writepool = {
 /*
  * @brief VBUF resource pool - hmmbufferpool
  */
-struct ia_css_rmgr_vbuf_pool hmmbufferpool = {
+static struct ia_css_rmgr_vbuf_pool hmmbufferpool = {
 	true,			/* copy_on_write */
 	true,			/* recycle */
 	32,			/* size */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
index 37116faab631..c771e4b910f3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css.c
@@ -462,46 +462,46 @@ verify_copy_out_frame_format(struct ia_css_pipe *pipe)
 	assert(pipe->stream != NULL);
 
 	switch (pipe->stream->config.input_config.format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
 		for (i=0; i<ARRAY_SIZE(yuv420_copy_formats) && !found; i++)
 			found = (out_fmt == yuv420_copy_formats[i]);
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_YUV420_16);
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
 		for (i=0; i<ARRAY_SIZE(yuv422_copy_formats) && !found; i++)
 			found = (out_fmt == yuv422_copy_formats[i]);
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_YUV422_16 ||
 			 out_fmt == IA_CSS_FRAME_FORMAT_YUV420_16);
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_RGBA888 ||
 			 out_fmt == IA_CSS_FRAME_FORMAT_RGB565);
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_RGBA888 ||
 			 out_fmt == IA_CSS_FRAME_FORMAT_YUV420);
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_RAW) ||
 			(out_fmt == IA_CSS_FRAME_FORMAT_RAW_PACKED);
 		break;
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
 		found = (out_fmt == IA_CSS_FRAME_FORMAT_BINARY_8);
 		break;
 	default:
@@ -586,13 +586,13 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 }
 #elif !defined(HAS_NO_INPUT_SYSTEM) && defined(USE_INPUT_SYSTEM_VERSION_2401)
 static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
-		enum ia_css_stream_format	format,
+		enum atomisp_input_format	format,
 		unsigned int			pixels_per_line)
 {
 	unsigned int rval;
 
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -611,9 +611,9 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -630,9 +630,9 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -649,11 +649,11 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 2;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:
-	case IA_CSS_STREAM_FORMAT_RGB_555:
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -670,22 +670,22 @@ static unsigned int csi2_protocol_calculate_max_subpixels_per_line(
 		 */
 		rval = pixels_per_line * 4;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_6:
-	case IA_CSS_STREAM_FORMAT_RAW_7:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
-	case IA_CSS_STREAM_FORMAT_RAW_12:
-	case IA_CSS_STREAM_FORMAT_RAW_14:
-	case IA_CSS_STREAM_FORMAT_RAW_16:
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		/*
 		 * The frame format layout is shown below.
 		 *
@@ -742,11 +742,11 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_id(
 		break;
 	case IA_CSS_INPUT_MODE_BUFFERED_SENSOR:
 
-		if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT0) {
+		if (stream_cfg->source.port.port == MIPI_PORT0_ID) {
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT0_ID;
-		} else if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT1) {
+		} else if (stream_cfg->source.port.port == MIPI_PORT1_ID) {
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT1_ID;
-		} else if (stream_cfg->source.port.port == IA_CSS_CSI2_PORT2) {
+		} else if (stream_cfg->source.port.port == MIPI_PORT2_ID) {
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT2_ID;
 		}
 
@@ -927,7 +927,7 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_resolution(
 	unsigned int max_subpixels_per_line;
 	unsigned int lines_per_frame;
 	unsigned int align_req_in_bytes;
-	enum ia_css_stream_format fmt_type;
+	enum atomisp_input_format fmt_type;
 
 	fmt_type = stream_cfg->isys_config[isys_stream_idx].format;
 	if ((stream_cfg->mode == IA_CSS_INPUT_MODE_SENSOR ||
@@ -936,11 +936,11 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_resolution(
 
 		if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
 			UNCOMPRESSED_BITS_PER_PIXEL_10) {
-				fmt_type = IA_CSS_STREAM_FORMAT_RAW_10;
+				fmt_type = ATOMISP_INPUT_FORMAT_RAW_10;
 		}
 		else if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
 			UNCOMPRESSED_BITS_PER_PIXEL_12) {
-				fmt_type = IA_CSS_STREAM_FORMAT_RAW_12;
+				fmt_type = ATOMISP_INPUT_FORMAT_RAW_12;
 		}
 		else
 			return false;
@@ -1082,7 +1082,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 
 	/* get the SP thread id */
 	rc = ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &sp_thread_id);
-	if (rc != true)
+	if (!rc)
 		return IA_CSS_ERR_INTERNAL_ERROR;
 	/* get the target input terminal */
 	sp_pipeline_input_terminal = &(sh_css_sp_group.pipe_io[sp_thread_id].input);
@@ -1108,7 +1108,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 					&(isys_stream_descr));
 		}
 
-		if (rc != true)
+		if (!rc)
 			return IA_CSS_ERR_INTERNAL_ERROR;
 
 		isys_stream_id = ia_css_isys_generate_stream_id(sp_thread_id, i);
@@ -1118,7 +1118,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 				&(isys_stream_descr),
 				&(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]),
 				isys_stream_id);
-		if (rc != true)
+		if (!rc)
 			return IA_CSS_ERR_INTERNAL_ERROR;
 
 		/* calculate the configuration of the virtual Input System (2401) */
@@ -1126,7 +1126,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 				&(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]),
 				&(isys_stream_descr),
 				&(sp_pipeline_input_terminal->ctrl.virtual_input_system_stream_cfg[i]));
-		if (rc != true) {
+		if (!rc) {
 			ia_css_isys_stream_destroy(&(sp_pipeline_input_terminal->context.virtual_input_system_stream[i]));
 			return IA_CSS_ERR_INTERNAL_ERROR;
 		}
@@ -1195,7 +1195,7 @@ static inline struct ia_css_pipe *stream_get_target_pipe(
 
 static enum ia_css_err stream_csi_rx_helper(
 	struct ia_css_stream *stream,
-	enum ia_css_err (*func)(enum ia_css_csi2_port, uint32_t))
+	enum ia_css_err (*func)(enum mipi_port_id, uint32_t))
 {
 	enum ia_css_err retval = IA_CSS_ERR_INTERNAL_ERROR;
 	uint32_t sp_thread_id, stream_id;
@@ -1391,7 +1391,7 @@ start_copy_on_sp(struct ia_css_pipe *pipe,
 		ia_css_isys_rx_disable();
 #endif
 
-	if (pipe->stream->config.input_config.format != IA_CSS_STREAM_FORMAT_BINARY_8)
+	if (pipe->stream->config.input_config.format != ATOMISP_INPUT_FORMAT_BINARY_8)
 		return IA_CSS_ERR_INTERNAL_ERROR;
 	sh_css_sp_start_binary_copy(ia_css_pipe_get_pipe_num(pipe), out_frame, pipe->stream->config.pixels_per_clock == 2);
 
@@ -1454,7 +1454,7 @@ static void start_pipe(
 				&me->stream->info.metadata_info
 #if !defined(HAS_NO_INPUT_SYSTEM)
 				,(input_mode==IA_CSS_INPUT_MODE_MEMORY) ?
-					(mipi_port_ID_t)0 :
+					(enum mipi_port_id)0 :
 					me->stream->config.source.port.port
 #endif
 #ifdef ISP2401
@@ -1497,7 +1497,7 @@ static void
 enable_interrupts(enum ia_css_irq_type irq_type)
 {
 #ifdef USE_INPUT_SYSTEM_VERSION_2
-	mipi_port_ID_t port;
+	enum mipi_port_id port;
 #endif
 	bool enable_pulse = irq_type != IA_CSS_IRQ_TYPE_EDGE;
 	IA_CSS_ENTER_PRIVATE("");
@@ -2562,7 +2562,7 @@ ia_css_uninit(void)
 	ifmtr_set_if_blocking_mode_reset = true;
 #endif
 
-	if (fw_explicitly_loaded == false) {
+	if (!fw_explicitly_loaded) {
 		ia_css_unload_firmware();
 	}
 	ia_css_spctrl_unload_fw(SP0_ID);
@@ -4074,9 +4074,9 @@ preview_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-			, (mipi_port_ID_t)0
+			, (enum mipi_port_id)0
 #else
-			(mipi_port_ID_t)0,
+			(enum mipi_port_id)0,
 #endif
 #endif
 #ifndef ISP2401
@@ -4106,9 +4106,9 @@ preview_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-			, (mipi_port_ID_t) 0
+			, (enum mipi_port_id) 0
 #else
-			(mipi_port_ID_t) 0,
+			(enum mipi_port_id) 0,
 #endif
 #endif
 #ifndef ISP2401
@@ -4673,7 +4673,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event)
 	event->type = convert_event_sp_to_host_domain[payload[0]];
 	/* Some sane default values since not all events use all fields. */
 	event->pipe = NULL;
-	event->port = IA_CSS_CSI2_PORT0;
+	event->port = MIPI_PORT0_ID;
 	event->exp_id = 0;
 	event->fw_warning = IA_CSS_FW_WARNING_NONE;
 	event->fw_handle = 0;
@@ -4719,7 +4719,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event)
 		}
 	}
 	if (event->type == IA_CSS_EVENT_TYPE_PORT_EOF) {
-		event->port = (enum ia_css_csi2_port)payload[1];
+		event->port = (enum mipi_port_id)payload[1];
 		event->exp_id = payload[3];
 	} else if (event->type == IA_CSS_EVENT_TYPE_FW_WARNING) {
 		event->fw_warning = (enum ia_css_fw_warning)payload[1];
@@ -5949,9 +5949,9 @@ static enum ia_css_err video_start(struct ia_css_pipe *pipe)
 #endif
 #if !defined(HAS_NO_INPUT_SYSTEM)
 #ifndef ISP2401
-			, (mipi_port_ID_t)0
+			, (enum mipi_port_id)0
 #else
-			(mipi_port_ID_t)0,
+			(enum mipi_port_id)0,
 #endif
 #endif
 #ifndef ISP2401
@@ -6784,7 +6784,7 @@ static bool copy_on_sp(struct ia_css_pipe *pipe)
 
 	rval &= (pipe->config.default_capture_config.mode == IA_CSS_CAPTURE_MODE_RAW);
 
-	rval &= ((pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) ||
+	rval &= ((pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) ||
 		(pipe->config.mode == IA_CSS_PIPE_MODE_COPY));
 
 	return rval;
@@ -6817,7 +6817,7 @@ static enum ia_css_err load_capture_binaries(
 		return err;
 	}
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 			&pipe->output_info[0],
 			JPEG_BYTES,
@@ -6915,7 +6915,7 @@ need_yuv_scaler_stage(const struct ia_css_pipe *pipe)
 
 	/* TODO: make generic function */
 	need_format_conversion =
-		((pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) &&
+		((pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) &&
 		(pipe->output_info[0].format != IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8));
 
 	in_res = pipe->config.input_effective_res;
@@ -7304,7 +7304,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 	/*
 	 * NOTES
 	 * - Why does the "yuvpp" pipe needs "isp_copy_binary" (i.e. ISP Copy) when
-	 *   its input is "IA_CSS_STREAM_FORMAT_YUV422_8"?
+	 *   its input is "ATOMISP_INPUT_FORMAT_YUV422_8"?
 	 *
 	 *   In most use cases, the first stage in the "yuvpp" pipe is the "yuv_scale_
 	 *   binary". However, the "yuv_scale_binary" does NOT support the input-frame
@@ -7319,7 +7319,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 	 *   "yuv_scale_binary".
 	 */
 	need_isp_copy_binary =
-		(pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV422_8);
+		(pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV422_8);
 #else  /* !USE_INPUT_SYSTEM_VERSION_2401 */
 	need_isp_copy_binary = true;
 #endif /*  USE_INPUT_SYSTEM_VERSION_2401 */
@@ -7627,11 +7627,11 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 		 * Bayer-Quad RAW.
 		 */
 		int in_frame_format;
-		if (pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY) {
+		if (pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY) {
 			in_frame_format = IA_CSS_FRAME_FORMAT_CSI_MIPI_LEGACY_YUV420_8;
-		} else if (pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_YUV422_8) {
+		} else if (pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_YUV422_8) {
 			/*
-			 * When the sensor output frame format is "IA_CSS_STREAM_FORMAT_YUV422_8",
+			 * When the sensor output frame format is "ATOMISP_INPUT_FORMAT_YUV422_8",
 			 * the "isp_copy_var" binary is selected as the first stage in the yuvpp
 			 * pipe.
 			 *
@@ -7739,7 +7739,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 
 		for (i = 0, j = 0; i < num_stage; i++) {
 			assert(j < num_output_stage);
-			if (pipe->pipe_settings.yuvpp.is_output_stage[i] == true) {
+			if (pipe->pipe_settings.yuvpp.is_output_stage[i]) {
 				tmp_out_frame = out_frame[j];
 				tmp_vf_frame = vf_frame[j];
 			} else {
@@ -7758,7 +7758,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 			}
 			/* we use output port 1 as internal output port */
 			tmp_in_frame = yuv_scaler_stage->args.out_frame[1];
-			if (pipe->pipe_settings.yuvpp.is_output_stage[i] == true) {
+			if (pipe->pipe_settings.yuvpp.is_output_stage[i]) {
 				if (tmp_vf_frame && (tmp_vf_frame->info.res.width != 0)) {
 					in_frame = yuv_scaler_stage->args.out_vf_frame;
 					err = add_vf_pp_stage(pipe, in_frame, tmp_vf_frame, &vf_pp_binary[j],
@@ -7812,7 +7812,7 @@ create_host_copy_pipeline(struct ia_css_pipe *pipe,
 	out_frame->flash_state = IA_CSS_FRAME_FLASH_STATE_NONE;
 
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 			&out_frame->info,
 			JPEG_BYTES,
@@ -8044,7 +8044,6 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 	}
 
 	if (mode == IA_CSS_CAPTURE_MODE_PRIMARY) {
-		unsigned int frm;
 		struct ia_css_frame *local_in_frame = NULL;
 		struct ia_css_frame *local_out_frame = NULL;
 
@@ -8082,7 +8081,6 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 				return err;
 			}
 		}
-		(void)frm;
 		/* If we use copy iso primary,
 		   the input must be yuv iso raw */
 		current_stage->args.copy_vf =
@@ -8321,8 +8319,6 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 				  struct ia_css_frame_info *info,
 				  unsigned int idx)
 {
-	enum ia_css_err err = IA_CSS_SUCCESS;
-
 	assert(pipe != NULL);
 	assert(info != NULL);
 
@@ -8331,7 +8327,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
 	*info = pipe->output_info[idx];
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8) {
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 			info,
 			JPEG_BYTES,
@@ -8347,7 +8343,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
 						"sh_css_pipe_get_output_frame_info() leave:\n");
-	return err;
+	return IA_CSS_SUCCESS;
 }
 
 #if !defined(HAS_NO_INPUT_SYSTEM)
@@ -8392,7 +8388,7 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
 
 void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
-		enum ia_css_stream_format format,
+		enum atomisp_input_format format,
 		const unsigned short *data,
 		unsigned int width)
 {
@@ -9176,7 +9172,7 @@ ia_css_stream_configure_rx(struct ia_css_stream *stream)
 	else if (config->num_lanes != 0)
 		return IA_CSS_ERR_INVALID_ARGUMENTS;
 
-	if (config->port > IA_CSS_CSI2_PORT2)
+	if (config->port > MIPI_PORT2_ID)
 		return IA_CSS_ERR_INVALID_ARGUMENTS;
 	stream->csi_rx_config.port =
 		ia_css_isys_port_to_mipi_port(config->port);
@@ -9363,7 +9359,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 
 #if defined(USE_INPUT_SYSTEM_VERSION_2)
 	/* We don't support metadata for JPEG stream, since they both use str2mem */
-	if (stream_config->input_config.format == IA_CSS_STREAM_FORMAT_BINARY_8 &&
+	if (stream_config->input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8 &&
 	    stream_config->metadata_config.resolution.height > 0) {
 		err = IA_CSS_ERR_INVALID_ARGUMENTS;
 		IA_CSS_LEAVE_ERR(err);
@@ -10142,7 +10138,7 @@ ia_css_temp_pipe_to_pipe_id(const struct ia_css_pipe *pipe, enum ia_css_pipe_id
 	return IA_CSS_SUCCESS;
 }
 
-enum ia_css_stream_format
+enum atomisp_input_format
 ia_css_stream_get_format(const struct ia_css_stream *stream)
 {
 	return stream->config.input_config.format;
@@ -10218,8 +10214,6 @@ ia_css_stream_get_3a_binary(const struct ia_css_stream *stream)
 enum ia_css_err
 ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int output_padded_width)
 {
-	enum ia_css_err err = IA_CSS_SUCCESS;
-
 	struct ia_css_pipe *pipe;
 
 	assert(stream != NULL);
@@ -10232,7 +10226,7 @@ ia_css_stream_set_output_padded_width(struct ia_css_stream *stream, unsigned int
 	pipe->config.output_info[IA_CSS_PIPE_OUTPUT_STAGE_0].padded_width = output_padded_width;
 	pipe->output_info[IA_CSS_PIPE_OUTPUT_STAGE_0].padded_width = output_padded_width;
 
-	return err;
+	return IA_CSS_SUCCESS;
 }
 
 static struct ia_css_binary *
@@ -10734,7 +10728,7 @@ ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle, bool
 				(uint8_t) IA_CSS_PSYS_SW_EVENT_STAGE_ENABLE_DISABLE,
 				(uint8_t) thread_id,
 				(uint8_t) stage->stage_num,
-				(enable == true) ? 1 : 0);
+				enable ? 1 : 0);
 			if (err == IA_CSS_SUCCESS) {
 				if(enable)
 					SH_CSS_QOS_STAGE_ENABLE(&(sh_css_sp_group.pipe[thread_id]),stage->stage_num);
@@ -11059,7 +11053,7 @@ static struct sh_css_hmm_buffer_record
 
 	buffer_record = &hmm_buffer_record[0];
 	for (i = 0; i < MAX_HMM_BUFFER_NUM; i++) {
-		if (buffer_record->in_use == false) {
+		if (!buffer_record->in_use) {
 			buffer_record->in_use = true;
 			buffer_record->type = type;
 			buffer_record->h_vbuf = h_vbuf;
@@ -11083,7 +11077,7 @@ static struct sh_css_hmm_buffer_record
 
 	buffer_record = &hmm_buffer_record[0];
 	for (i = 0; i < MAX_HMM_BUFFER_NUM; i++) {
-		if ((buffer_record->in_use == true) &&
+		if ((buffer_record->in_use) &&
 		    (buffer_record->type == type) &&
 		    (buffer_record->h_vbuf != NULL) &&
 		    (buffer_record->h_vbuf->vptr == ddr_buffer_addr)) {
@@ -11093,7 +11087,7 @@ static struct sh_css_hmm_buffer_record
 		buffer_record++;
 	}
 
-	if (found_record == true)
+	if (found_record)
 		return buffer_record;
 	else
 		return NULL;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
index 883474e90c81..a6a00024bae8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_mipi.c
@@ -104,7 +104,7 @@ static bool ia_css_mipi_is_source_port_valid(struct ia_css_pipe *pipe,
 enum ia_css_err
 ia_css_mipi_frame_calculate_size(const unsigned int width,
 				const unsigned int height,
-				const enum ia_css_stream_format format,
+				const enum atomisp_input_format format,
 				const bool hasSOLandEOL,
 				const unsigned int embedded_data_size_words,
 				unsigned int *size_mem_words)
@@ -136,16 +136,16 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 		     width_padded, height, format, hasSOLandEOL, embedded_data_size_words);
 
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_RAW_6:		/* 4p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_RAW_6:		/* 4p, 3B, 24bits */
 		bits_per_pixel = 6;	break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:		/* 8p, 7B, 56bits */
+	case ATOMISP_INPUT_FORMAT_RAW_7:		/* 8p, 7B, 56bits */
 		bits_per_pixel = 7;		break;
-	case IA_CSS_STREAM_FORMAT_RAW_8:		/* 1p, 1B, 8bits */
-	case IA_CSS_STREAM_FORMAT_BINARY_8:		/*  8bits, TODO: check. */
-	case IA_CSS_STREAM_FORMAT_YUV420_8:		/* odd 2p, 2B, 16bits, even 2p, 4B, 32bits */
+	case ATOMISP_INPUT_FORMAT_RAW_8:		/* 1p, 1B, 8bits */
+	case ATOMISP_INPUT_FORMAT_BINARY_8:		/*  8bits, TODO: check. */
+	case ATOMISP_INPUT_FORMAT_YUV420_8:		/* odd 2p, 2B, 16bits, even 2p, 4B, 32bits */
 		bits_per_pixel = 8;		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:		/* odd 4p, 5B, 40bits, even 4p, 10B, 80bits */
-	case IA_CSS_STREAM_FORMAT_RAW_10:		/* 4p, 5B, 40bits */
+	case ATOMISP_INPUT_FORMAT_YUV420_10:		/* odd 4p, 5B, 40bits, even 4p, 10B, 80bits */
+	case ATOMISP_INPUT_FORMAT_RAW_10:		/* 4p, 5B, 40bits */
 #if !defined(HAS_NO_PACKED_RAW_PIXELS)
 		/* The changes will be reverted as soon as RAW
 		 * Buffers are deployed by the 2401 Input System
@@ -156,26 +156,26 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 		bits_per_pixel = 16;
 #endif
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:	/* 2p, 3B, 24bits */
-	case IA_CSS_STREAM_FORMAT_RAW_12:		/* 2p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:	/* 2p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_RAW_12:		/* 2p, 3B, 24bits */
 		bits_per_pixel = 12;	break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:		/* 4p, 7B, 56bits */
+	case ATOMISP_INPUT_FORMAT_RAW_14:		/* 4p, 7B, 56bits */
 		bits_per_pixel = 14;	break;
-	case IA_CSS_STREAM_FORMAT_RGB_444:		/* 1p, 2B, 16bits */
-	case IA_CSS_STREAM_FORMAT_RGB_555:		/* 1p, 2B, 16bits */
-	case IA_CSS_STREAM_FORMAT_RGB_565:		/* 1p, 2B, 16bits */
-	case IA_CSS_STREAM_FORMAT_YUV422_8:		/* 2p, 4B, 32bits */
+	case ATOMISP_INPUT_FORMAT_RGB_444:		/* 1p, 2B, 16bits */
+	case ATOMISP_INPUT_FORMAT_RGB_555:		/* 1p, 2B, 16bits */
+	case ATOMISP_INPUT_FORMAT_RGB_565:		/* 1p, 2B, 16bits */
+	case ATOMISP_INPUT_FORMAT_YUV422_8:		/* 2p, 4B, 32bits */
 		bits_per_pixel = 16;	break;
-	case IA_CSS_STREAM_FORMAT_RGB_666:		/* 4p, 9B, 72bits */
+	case ATOMISP_INPUT_FORMAT_RGB_666:		/* 4p, 9B, 72bits */
 		bits_per_pixel = 18;	break;
-	case IA_CSS_STREAM_FORMAT_YUV422_10:		/* 2p, 5B, 40bits */
+	case ATOMISP_INPUT_FORMAT_YUV422_10:		/* 2p, 5B, 40bits */
 		bits_per_pixel = 20;	break;
-	case IA_CSS_STREAM_FORMAT_RGB_888:		/* 1p, 3B, 24bits */
+	case ATOMISP_INPUT_FORMAT_RGB_888:		/* 1p, 3B, 24bits */
 		bits_per_pixel = 24;	break;
 
-	case IA_CSS_STREAM_FORMAT_YUV420_16:		/* Not supported */
-	case IA_CSS_STREAM_FORMAT_YUV422_16:		/* Not supported */
-	case IA_CSS_STREAM_FORMAT_RAW_16:		/* TODO: not specified in MIPI SPEC, check */
+	case ATOMISP_INPUT_FORMAT_YUV420_16:		/* Not supported */
+	case ATOMISP_INPUT_FORMAT_YUV422_16:		/* Not supported */
+	case ATOMISP_INPUT_FORMAT_RAW_16:		/* TODO: not specified in MIPI SPEC, check */
 	default:
 		return IA_CSS_ERR_INVALID_ARGUMENTS;
 	}
@@ -183,9 +183,9 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 	odd_line_bytes = (width_padded * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 
 	/* Even lines for YUV420 formats are double in bits_per_pixel. */
-	if (format == IA_CSS_STREAM_FORMAT_YUV420_8
-			|| format == IA_CSS_STREAM_FORMAT_YUV420_10
-			|| format == IA_CSS_STREAM_FORMAT_YUV420_16) {
+	if (format == ATOMISP_INPUT_FORMAT_YUV420_8
+			|| format == ATOMISP_INPUT_FORMAT_YUV420_10
+			|| format == ATOMISP_INPUT_FORMAT_YUV420_16) {
 		even_line_bytes = (width_padded * 2 * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 	} else {
 		even_line_bytes = odd_line_bytes;
@@ -239,7 +239,7 @@ ia_css_mipi_frame_calculate_size(const unsigned int width,
 
 #if !defined(HAS_NO_INPUT_SYSTEM) && defined(USE_INPUT_SYSTEM_VERSION_2)
 enum ia_css_err
-ia_css_mipi_frame_enable_check_on_size(const enum ia_css_csi2_port port,
+ia_css_mipi_frame_enable_check_on_size(const enum mipi_port_id port,
 				const unsigned int	size_mem_words)
 {
 	uint32_t idx;
@@ -285,7 +285,7 @@ calculate_mipi_buff_size(
 #else
 	unsigned int width;
 	unsigned int height;
-	enum ia_css_stream_format format;
+	enum atomisp_input_format format;
 	bool pack_raw_pixels;
 
 	unsigned int width_padded;
@@ -348,15 +348,15 @@ calculate_mipi_buff_size(
 
 	bits_per_pixel = sh_css_stream_format_2_bits_per_subpixel(format);
 	bits_per_pixel =
-		(format == IA_CSS_STREAM_FORMAT_RAW_10 && pack_raw_pixels) ? bits_per_pixel : 16;
+		(format == ATOMISP_INPUT_FORMAT_RAW_10 && pack_raw_pixels) ? bits_per_pixel : 16;
 	if (bits_per_pixel == 0)
 		return IA_CSS_ERR_INTERNAL_ERROR;
 
 	odd_line_bytes = (width_padded * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 
 	/* Even lines for YUV420 formats are double in bits_per_pixel. */
-	if (format == IA_CSS_STREAM_FORMAT_YUV420_8
-		|| format == IA_CSS_STREAM_FORMAT_YUV420_10) {
+	if (format == ATOMISP_INPUT_FORMAT_YUV420_8
+		|| format == ATOMISP_INPUT_FORMAT_YUV420_10) {
 		even_line_bytes = (width_padded * 2 * bits_per_pixel + 7) >> 3; /* ceil ( bits per line / 8) */
 	} else {
 		even_line_bytes = odd_line_bytes;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c
index fbb36112fe3c..43529b1605c3 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_params.c
@@ -110,7 +110,7 @@
 #define FPNTBL_BYTES(binary) \
 	(sizeof(char) * (binary)->in_frame_info.res.height * \
 	 (binary)->in_frame_info.padded_width)
-	 
+
 #ifndef ISP2401
 
 #define SCTBL_BYTES(binary) \
@@ -1741,7 +1741,7 @@ ia_css_process_zoom_and_motion(
 				out_infos[0] = &args->out_frame[0]->info;
 			info = &stage->firmware->info.isp;
 			ia_css_binary_fill_info(info, false, false,
-				IA_CSS_STREAM_FORMAT_RAW_10,
+				ATOMISP_INPUT_FORMAT_RAW_10,
 				args->in_frame  ? &args->in_frame->info  : NULL,
 				NULL,
 				out_infos,
@@ -2891,8 +2891,8 @@ ia_css_metadata_free_multiple(unsigned int num_bufs, struct ia_css_metadata **bu
 	}
 }
 
-unsigned g_param_buffer_dequeue_count = 0;
-unsigned g_param_buffer_enqueue_count = 0;
+static unsigned g_param_buffer_dequeue_count = 0;
+static unsigned g_param_buffer_enqueue_count = 0;
 
 enum ia_css_err
 ia_css_stream_isp_parameters_init(struct ia_css_stream *stream)
@@ -3805,7 +3805,6 @@ sh_css_param_update_isp_params(struct ia_css_pipe *curr_pipe,
 
 		enum sh_css_queue_id queue_id;
 
-		(void)stage;
 		pipe = curr_pipe->stream->pipes[i];
 		pipeline = ia_css_pipe_get_pipeline(pipe);
 		pipe_num = ia_css_pipe_get_pipe_num(pipe);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
index 6fc00fc402b1..85263725540d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.c
@@ -71,7 +71,7 @@
 struct sh_css_sp_group		sh_css_sp_group;
 struct sh_css_sp_stage		sh_css_sp_stage;
 struct sh_css_isp_stage		sh_css_isp_stage;
-struct sh_css_sp_output		sh_css_sp_output;
+static struct sh_css_sp_output		sh_css_sp_output;
 static struct sh_css_sp_per_frame_data per_frame_data;
 
 /* true if SP supports frame loop and host2sp_commands */
@@ -117,9 +117,9 @@ copy_isp_stage_to_sp_stage(void)
 	*/
 	sh_css_sp_stage.enable.sdis = sh_css_isp_stage.binary_info.enable.dis;
 	sh_css_sp_stage.enable.s3a = sh_css_isp_stage.binary_info.enable.s3a;
-#ifdef ISP2401	
+#ifdef ISP2401
 	sh_css_sp_stage.enable.lace_stats = sh_css_isp_stage.binary_info.enable.lace_stats;
-#endif	
+#endif
 }
 
 void
@@ -754,7 +754,7 @@ sh_css_sp_write_frame_pointers(const struct sh_css_binary_args *args)
 
 static void
 sh_css_sp_init_group(bool two_ppc,
-		     enum ia_css_stream_format input_format,
+		     enum atomisp_input_format input_format,
 		     bool no_isp_sync,
 		     uint8_t if_config_index)
 {
@@ -817,7 +817,6 @@ configure_isp_from_args(
 	bool two_ppc,
 	bool deinterleaved)
 {
-	enum ia_css_err err = IA_CSS_SUCCESS;
 #ifdef ISP2401
 	struct ia_css_pipe *pipe = find_pipe_by_num(pipeline->pipe_num);
 	const struct ia_css_resolution *res;
@@ -841,7 +840,7 @@ configure_isp_from_args(
 	ia_css_ref_configure(binary, (const struct ia_css_frame **)args->delay_frames, pipeline->dvs_frame_delay);
 	ia_css_tnr_configure(binary, (const struct ia_css_frame **)args->tnr_frames);
 	ia_css_bayer_io_config(binary, args);
-	return err;
+	return IA_CSS_SUCCESS;
 }
 
 static void
@@ -1118,7 +1117,7 @@ sp_init_stage(struct ia_css_pipeline_stage *stage,
 			out_infos[0] = &args->out_frame[0]->info;
 		info = &firmware->info.isp;
 		ia_css_binary_fill_info(info, false, false,
-			    IA_CSS_STREAM_FORMAT_RAW_10,
+			    ATOMISP_INPUT_FORMAT_RAW_10,
 			    args->in_frame  ? &args->in_frame->info  : NULL,
 			    NULL,
 				out_infos,
@@ -1197,7 +1196,7 @@ sh_css_sp_init_pipeline(struct ia_css_pipeline *me,
 			const struct ia_css_metadata_config *md_config,
 			const struct ia_css_metadata_info *md_info,
 #if !defined(HAS_NO_INPUT_SYSTEM)
-			const mipi_port_ID_t port_id
+			const enum mipi_port_id port_id
 #endif
 #ifdef ISP2401
 			,
@@ -1442,8 +1441,6 @@ sh_css_update_host2sp_offline_frame(
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int offset;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	assert(frame_num < NUM_CONTINUOUS_FRAMES);
 
 	/* Write new frame data into SP DMEM */
@@ -1473,8 +1470,6 @@ sh_css_update_host2sp_mipi_frame(
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int offset;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* MIPI buffers are dedicated to port, so now there are more of them. */
 	assert(frame_num < (N_CSI_PORTS * NUM_MIPI_FRAMES_PER_STREAM));
 
@@ -1500,8 +1495,6 @@ sh_css_update_host2sp_mipi_metadata(
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int o;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* MIPI buffers are dedicated to port, so now there are more of them. */
 	assert(frame_num < (N_CSI_PORTS * NUM_MIPI_FRAMES_PER_STREAM));
 
@@ -1520,8 +1513,6 @@ sh_css_update_host2sp_num_mipi_frames(unsigned num_frames)
 	unsigned int HIVE_ADDR_host_sp_com;
 	unsigned int offset;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* Write new frame data into SP DMEM */
 	HIVE_ADDR_host_sp_com = sh_css_sp_fw.info.sp.host_sp_com;
 	offset = (unsigned int)offsetof(struct host_sp_communication, host2sp_num_mipi_frames)
@@ -1539,8 +1530,6 @@ sh_css_update_host2sp_cont_num_raw_frames(unsigned num_frames, bool set_avail)
 	unsigned int extra_num_frames, avail_num_frames;
 	unsigned int offset, offset_extra;
 
-	(void)HIVE_ADDR_host_sp_com; /* Suppres warnings in CRUN */
-
 	/* Write new frame data into SP DMEM */
 	fw = &sh_css_sp_fw;
 	HIVE_ADDR_host_sp_com = fw->info.sp.host_sp_com;
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h
index 98444a3cc3e4..3c41e997de79 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_sp.h
@@ -64,7 +64,7 @@ sh_css_sp_init_pipeline(struct ia_css_pipeline *me,
 			const struct ia_css_metadata_config *md_config,
 			const struct ia_css_metadata_info *md_info,
 #if !defined(HAS_NO_INPUT_SYSTEM)
-			const mipi_port_ID_t port_id
+			const enum mipi_port_id port_id
 #endif
 #ifdef ISP2401
 			,
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c
index 52d0a6471597..77f135e7dc3c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.c
@@ -16,55 +16,55 @@
 #include <ia_css_stream_format.h>
 
 unsigned int sh_css_stream_format_2_bits_per_subpixel(
-		enum ia_css_stream_format format)
+		enum atomisp_input_format format)
 {
 	unsigned int rval;
 
 	switch (format) {
-	case IA_CSS_STREAM_FORMAT_RGB_444:
+	case ATOMISP_INPUT_FORMAT_RGB_444:
 		rval = 4;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_555:
+	case ATOMISP_INPUT_FORMAT_RGB_555:
 		rval = 5;
 		break;
-	case IA_CSS_STREAM_FORMAT_RGB_565:
-	case IA_CSS_STREAM_FORMAT_RGB_666:
-	case IA_CSS_STREAM_FORMAT_RAW_6:
+	case ATOMISP_INPUT_FORMAT_RGB_565:
+	case ATOMISP_INPUT_FORMAT_RGB_666:
+	case ATOMISP_INPUT_FORMAT_RAW_6:
 		rval = 6;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_7:
+	case ATOMISP_INPUT_FORMAT_RAW_7:
 		rval = 7;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_8_LEGACY:
-	case IA_CSS_STREAM_FORMAT_YUV420_8:
-	case IA_CSS_STREAM_FORMAT_YUV422_8:
-	case IA_CSS_STREAM_FORMAT_RGB_888:
-	case IA_CSS_STREAM_FORMAT_RAW_8:
-	case IA_CSS_STREAM_FORMAT_BINARY_8:
-	case IA_CSS_STREAM_FORMAT_USER_DEF1:
-	case IA_CSS_STREAM_FORMAT_USER_DEF2:
-	case IA_CSS_STREAM_FORMAT_USER_DEF3:
-	case IA_CSS_STREAM_FORMAT_USER_DEF4:
-	case IA_CSS_STREAM_FORMAT_USER_DEF5:
-	case IA_CSS_STREAM_FORMAT_USER_DEF6:
-	case IA_CSS_STREAM_FORMAT_USER_DEF7:
-	case IA_CSS_STREAM_FORMAT_USER_DEF8:
+	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
+	case ATOMISP_INPUT_FORMAT_YUV420_8:
+	case ATOMISP_INPUT_FORMAT_YUV422_8:
+	case ATOMISP_INPUT_FORMAT_RGB_888:
+	case ATOMISP_INPUT_FORMAT_RAW_8:
+	case ATOMISP_INPUT_FORMAT_BINARY_8:
+	case ATOMISP_INPUT_FORMAT_USER_DEF1:
+	case ATOMISP_INPUT_FORMAT_USER_DEF2:
+	case ATOMISP_INPUT_FORMAT_USER_DEF3:
+	case ATOMISP_INPUT_FORMAT_USER_DEF4:
+	case ATOMISP_INPUT_FORMAT_USER_DEF5:
+	case ATOMISP_INPUT_FORMAT_USER_DEF6:
+	case ATOMISP_INPUT_FORMAT_USER_DEF7:
+	case ATOMISP_INPUT_FORMAT_USER_DEF8:
 		rval = 8;
 		break;
-	case IA_CSS_STREAM_FORMAT_YUV420_10:
-	case IA_CSS_STREAM_FORMAT_YUV422_10:
-	case IA_CSS_STREAM_FORMAT_RAW_10:
+	case ATOMISP_INPUT_FORMAT_YUV420_10:
+	case ATOMISP_INPUT_FORMAT_YUV422_10:
+	case ATOMISP_INPUT_FORMAT_RAW_10:
 		rval = 10;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_12:
+	case ATOMISP_INPUT_FORMAT_RAW_12:
 		rval = 12;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_14:
+	case ATOMISP_INPUT_FORMAT_RAW_14:
 		rval = 14;
 		break;
-	case IA_CSS_STREAM_FORMAT_RAW_16:
-	case IA_CSS_STREAM_FORMAT_YUV420_16:
-	case IA_CSS_STREAM_FORMAT_YUV422_16:
+	case ATOMISP_INPUT_FORMAT_RAW_16:
+	case ATOMISP_INPUT_FORMAT_YUV420_16:
+	case ATOMISP_INPUT_FORMAT_YUV422_16:
 		rval = 16;
 		break;
 	default:
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h
index aab2b6207051..b699f538e0dd 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/css2400/sh_css_stream_format.h
@@ -18,6 +18,6 @@
 #include <ia_css_stream_format.h>
 
 unsigned int sh_css_stream_format_2_bits_per_subpixel(
-		enum ia_css_stream_format format);
+		enum atomisp_input_format format);
 
 #endif /* __SH_CSS_STREAM_FORMAT_H */
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h b/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h
index 560014add005..4b2d94a37ea1 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h
+++ b/drivers/staging/media/atomisp/pci/atomisp2/include/mmu/isp_mmu.h
@@ -80,12 +80,10 @@ struct isp_mmu_client {
 	unsigned int null_pte;
 
 	/*
-	 * set/get page directory base address (physical address).
+	 * get page directory base address (physical address).
 	 *
 	 * must be provided.
 	 */
-	int (*set_pd_base) (struct isp_mmu *mmu,
-			phys_addr_t pd_base);
 	unsigned int (*get_pd_base) (struct isp_mmu *mmu, phys_addr_t pd_base);
 	/*
 	 * callback to flush tlb.
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c b/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
index f21075c1e503..198f29f4a324 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
@@ -344,13 +344,6 @@ static int mmu_map(struct isp_mmu *mmu, unsigned int isp_virt,
 		/*
 		 * setup L1 page table physical addr to MMU
 		 */
-		ret = mmu->driver->set_pd_base(mmu, l1_pt);
-		if (ret) {
-			dev_err(atomisp_dev,
-				 "set page directory base address fail.\n");
-			mutex_unlock(&mmu->pt_mutex);
-			return ret;
-		}
 		mmu->base_address = l1_pt;
 		mmu->l1_pte = isp_pgaddr_to_pte_valid(mmu, l1_pt);
 		memset(mmu->l2_pgt_refcount, 0, sizeof(int) * ISP_L1PT_PTES);
@@ -531,10 +524,8 @@ int isp_mmu_init(struct isp_mmu *mmu, struct isp_mmu_client *driver)
 
 	mmu->driver = driver;
 
-	if (!driver->set_pd_base || !driver->tlb_flush_all) {
-		dev_err(atomisp_dev,
-			    "set_pd_base or tlb_flush_all operation "
-			     "not provided.\n");
+	if (!driver->tlb_flush_all) {
+		dev_err(atomisp_dev, "tlb_flush_all operation not provided.\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c b/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c
index c59bcc982966..c0212564b7c8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/mmu/sh_mmu_mrfld.c
@@ -18,6 +18,7 @@
  */
 #include "type_support.h"
 #include "mmu/isp_mmu.h"
+#include "mmu/sh_mmu_mrfld.h"
 #include "memory_access/memory_access.h"
 #include "atomisp_compat.h"
 
@@ -40,20 +41,6 @@ static phys_addr_t sh_pte_to_phys(struct isp_mmu *mmu,
 	return (phys_addr_t)((pte & ~mask) << ISP_PAGE_OFFSET);
 }
 
-/*
- * set page directory base address (physical address).
- *
- * must be provided.
- */
-static int sh_set_pd_base(struct isp_mmu *mmu,
-			  phys_addr_t phys)
-{
-	unsigned int pte = sh_phys_to_pte(mmu, phys);
-	/*mmgr_set_base_address(HOST_ADDRESS(pte));*/
-	atomisp_css_mmu_set_page_table_base_index(HOST_ADDRESS(pte));
-	return 0;
-}
-
 static unsigned int sh_get_pd_base(struct isp_mmu *mmu,
 				   phys_addr_t phys)
 {
@@ -81,7 +68,6 @@ struct isp_mmu_client sh_mmu_mrfld = {
 	.name = "Silicon Hive ISP3000 MMU",
 	.pte_valid_mask = MERR_VALID_PTE_MASK,
 	.null_pte = ~MERR_VALID_PTE_MASK,
-	.set_pd_base = sh_set_pd_base,
 	.get_pd_base = sh_get_pd_base,
 	.tlb_flush_all = sh_tlb_flush,
 	.phys_to_pte = sh_phys_to_pte,
diff --git a/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c b/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c
index d8b7183db252..3283c1b05d6a 100644
--- a/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c
+++ b/drivers/staging/media/atomisp/platform/intel-mid/atomisp_gmin_platform.c
@@ -441,7 +441,7 @@ static int gmin_v1p2_ctrl(struct v4l2_subdev *subdev, int on)
 {
 	struct gmin_subdev *gs = find_gmin_subdev(subdev);
 
-	if (gs && gs->v1p2_on == on)
+	if (!gs || gs->v1p2_on == on)
 		return 0;
 	gs->v1p2_on = on;
 
@@ -475,7 +475,7 @@ static int gmin_v1p8_ctrl(struct v4l2_subdev *subdev, int on)
 		}
 	}
 
-	if (gs && gs->v1p8_on == on)
+	if (!gs || gs->v1p8_on == on)
 		return 0;
 	gs->v1p8_on = on;
 
@@ -511,7 +511,7 @@ static int gmin_v2p8_ctrl(struct v4l2_subdev *subdev, int on)
 		}
 	}
 
-	if (gs && gs->v2p8_on == on)
+	if (!gs || gs->v2p8_on == on)
 		return 0;
 	gs->v2p8_on = on;
 
@@ -693,9 +693,11 @@ static int gmin_get_config_var(struct device *dev, const char *var,
 	for (i = 0; i < sizeof(var8) && var8[i]; i++)
 		var16[i] = var8[i];
 
+#ifdef CONFIG_64BIT
 	/* To avoid owerflows when calling the efivar API */
 	if (*out_len > ULONG_MAX)
 		return -EINVAL;
+#endif
 
 	/* Not sure this API usage is kosher; efivar_entry_get()'s
 	 * implementation simply uses VariableName and VendorGuid from
diff --git a/drivers/staging/media/davinci_vpfe/dm365_resizer.c b/drivers/staging/media/davinci_vpfe/dm365_resizer.c
index 857b0e847c5e..1ee216d71d42 100644
--- a/drivers/staging/media/davinci_vpfe/dm365_resizer.c
+++ b/drivers/staging/media/davinci_vpfe/dm365_resizer.c
@@ -480,7 +480,7 @@ resizer_configure_common_in_params(struct vpfe_resizer_device *resizer)
 	return 0;
 }
 static int
-resizer_configure_in_continious_mode(struct vpfe_resizer_device *resizer)
+resizer_configure_in_continuous_mode(struct vpfe_resizer_device *resizer)
 {
 	struct device *dev = resizer->crop_resizer.subdev.v4l2_dev->dev;
 	struct resizer_params *param = &resizer->config;
@@ -1242,7 +1242,7 @@ static int resizer_do_hw_setup(struct vpfe_resizer_device *resizer)
 		    ipipeif_source == IPIPEIF_OUTPUT_RESIZER)
 			ret = resizer_configure_in_single_shot_mode(resizer);
 		else
-			ret =  resizer_configure_in_continious_mode(resizer);
+			ret =  resizer_configure_in_continuous_mode(resizer);
 		if (ret)
 			return ret;
 		ret = config_rsz_hw(resizer, param);
diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c
index 1aa2be891704..16cab40156ca 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -1005,7 +1005,7 @@ static int csi_link_validate(struct v4l2_subdev *sd,
 			     struct v4l2_subdev_format *sink_fmt)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
-	struct v4l2_fwnode_endpoint upstream_ep;
+	struct v4l2_fwnode_endpoint upstream_ep = {};
 	const struct imx_media_pixfmt *incc;
 	bool is_csi2;
 	int ret;
@@ -1800,7 +1800,10 @@ static int imx_csi_probe(struct platform_device *pdev)
 	pinctrl = devm_pinctrl_get_select_default(priv->dev);
 	if (IS_ERR(pinctrl)) {
 		ret = PTR_ERR(priv->vdev);
-		goto free;
+		dev_dbg(priv->dev,
+			"devm_pinctrl_get_select_default() failed: %d\n", ret);
+		if (ret != -ENODEV)
+			goto free;
 	}
 
 	ret = v4l2_async_register_subdev(&priv->sd);
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 4f950c686055..83d3d271ca15 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -9,9 +9,6 @@ if HAS_IOMEM
 config HAVE_FB_ATMEL
 	bool
 
-config SH_LCD_MIPI_DSI
-	bool
-
 source "drivers/char/agp/Kconfig"
 
 source "drivers/gpu/vga/Kconfig"
diff --git a/drivers/video/console/sticore.c b/drivers/video/console/sticore.c
index d1d3796773aa..08b822656846 100644
--- a/drivers/video/console/sticore.c
+++ b/drivers/video/console/sticore.c
@@ -827,10 +827,8 @@ static struct sti_struct *sti_try_rom_generic(unsigned long address,
 	}
 	
 	sti = kzalloc(sizeof(*sti), GFP_KERNEL);
-	if (!sti) {
-		printk(KERN_ERR "Not enough memory !\n");
+	if (!sti)
 		return NULL;
-	}
 
 	spin_lock_init(&sti->lock);
 
diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 399573742487..d94254263ea5 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1053,6 +1053,11 @@ config FB_I810_I2C
 	bool "Enable DDC Support"
 	depends on FB_I810 && FB_I810_GTF
 	select FB_DDC
+	help
+	  Add DDC/I2C support for i810fb.  This will allow the driver to get
+	  display information, especially for monitors with fickle timings.
+
+	  If unsure, say Y.
 
 config FB_LE80578
 	tristate "Intel LE80578 (Vermilion) support"
@@ -1917,8 +1922,7 @@ config FB_TMIO_ACCELL
 
 config FB_S3C
 	tristate "Samsung S3C framebuffer support"
-	depends on FB && (CPU_S3C2416 || ARCH_S3C64XX || \
-		ARCH_S5PV210 || ARCH_EXYNOS)
+	depends on FB && (CPU_S3C2416 || ARCH_S3C64XX)
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index 36d25190b48c..38c1f324ce15 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -967,9 +967,8 @@ static int clcdfb_probe(struct amba_device *dev, const struct amba_id *id)
 		goto out;
 	}
 
-	fb = kzalloc(sizeof(struct clcd_fb), GFP_KERNEL);
+	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
 	if (!fb) {
-		printk(KERN_INFO "CLCD: could not allocate new clcd_fb struct\n");
 		ret = -ENOMEM;
 		goto free_region;
 	}
diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index 3dee267d7c75..076d24afbd72 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -18,10 +18,10 @@
 #include <linux/delay.h>
 #include <linux/backlight.h>
 #include <linux/gfp.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <video/of_display_timing.h>
 #include <linux/regulator/consumer.h>
 #include <video/videomode.h>
@@ -61,8 +61,7 @@ struct atmel_lcdfb_info {
 };
 
 struct atmel_lcdfb_power_ctrl_gpio {
-	int gpio;
-	int active_low;
+	struct gpio_desc *gpiod;
 
 	struct list_head list;
 };
@@ -1018,7 +1017,7 @@ static void atmel_lcdfb_power_control_gpio(struct atmel_lcdfb_pdata *pdata, int
 	struct atmel_lcdfb_power_ctrl_gpio *og;
 
 	list_for_each_entry(og, &pdata->pwr_gpios, list)
-		gpio_set_value(og->gpio, on);
+		gpiod_set_value(og->gpiod, on);
 }
 
 static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
@@ -1031,11 +1030,11 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
 	struct device_node *display_np;
 	struct device_node *timings_np;
 	struct display_timings *timings;
-	enum of_gpio_flags flags;
 	struct atmel_lcdfb_power_ctrl_gpio *og;
 	bool is_gpio_power = false;
+	struct gpio_desc *gpiod;
 	int ret = -ENOENT;
-	int i, gpio;
+	int i;
 
 	sinfo->config = (struct atmel_lcdfb_config*)
 		of_match_device(atmel_lcdfb_dt_ids, dev)->data;
@@ -1072,28 +1071,22 @@ static int atmel_lcdfb_of_init(struct atmel_lcdfb_info *sinfo)
 
 	INIT_LIST_HEAD(&pdata->pwr_gpios);
 	ret = -ENOMEM;
-	for (i = 0; i < of_gpio_named_count(display_np, "atmel,power-control-gpio"); i++) {
-		gpio = of_get_named_gpio_flags(display_np, "atmel,power-control-gpio",
-					       i, &flags);
-		if (gpio < 0)
+	for (i = 0; i < gpiod_count(dev, "atmel,power-control"); i++) {
+		gpiod = devm_gpiod_get_index(dev, "atmel,power-control",
+					     i, GPIOD_ASIS);
+		if (IS_ERR(gpiod))
 			continue;
 
 		og = devm_kzalloc(dev, sizeof(*og), GFP_KERNEL);
 		if (!og)
 			goto put_display_node;
 
-		og->gpio = gpio;
-		og->active_low = flags & OF_GPIO_ACTIVE_LOW;
+		og->gpiod = gpiod;
 		is_gpio_power = true;
-		ret = devm_gpio_request(dev, gpio, "lcd-power-control-gpio");
-		if (ret) {
-			dev_err(dev, "request gpio %d failed\n", gpio);
-			goto put_display_node;
-		}
 
-		ret = gpio_direction_output(gpio, og->active_low);
+		ret = gpiod_direction_output(gpiod, gpiod_is_active_low(gpiod));
 		if (ret) {
-			dev_err(dev, "set direction output gpio %d failed\n", gpio);
+			dev_err(dev, "set direction output gpio atmel,power-control[%d] failed\n", i);
 			goto put_display_node;
 		}
 		list_add(&og->list, &pdata->pwr_gpios);
diff --git a/drivers/video/fbdev/aty/aty128fb.c b/drivers/video/fbdev/aty/aty128fb.c
index db18474607c9..09b0e558dce8 100644
--- a/drivers/video/fbdev/aty/aty128fb.c
+++ b/drivers/video/fbdev/aty/aty128fb.c
@@ -1716,7 +1716,7 @@ static int aty128fb_setup(char *options)
 			continue;
 		}
 		if(!strncmp(this_opt, "nomtrr", 6)) {
-			mtrr = 0;
+			mtrr = false;
 			continue;
 		}
 #ifdef CONFIG_PPC_PMAC
diff --git a/drivers/video/fbdev/aty/mach64_ct.c b/drivers/video/fbdev/aty/mach64_ct.c
index 7d3bd723d3d5..74a62aa193c0 100644
--- a/drivers/video/fbdev/aty/mach64_ct.c
+++ b/drivers/video/fbdev/aty/mach64_ct.c
@@ -180,7 +180,7 @@ static int aty_dsp_gt(const struct fb_info *info, u32 bpp, struct pll_ct *pll)
 		dsp_on = ((multiplier << vshift) + divider) / divider;
 		tmp = ((ras_multiplier << xshift) + ras_divider) / ras_divider;
 		if (dsp_on < tmp)
-		dsp_on = tmp;
+			dsp_on = tmp;
 		dsp_on = dsp_on + (tmp * 2) + (pll->xclkpagefaultdelay << xshift);
 	}
 
diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c
index 87608c0b2351..e8594bbaea60 100644
--- a/drivers/video/fbdev/aty/radeon_base.c
+++ b/drivers/video/fbdev/aty/radeon_base.c
@@ -2255,6 +2255,23 @@ static const struct bin_attribute edid2_attr = {
 	.read	= radeon_show_edid2,
 };
 
+static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return -ENOMEM;
+
+	ap->ranges[0].base = pci_resource_start(pdev, 0);
+	ap->ranges[0].size = pci_resource_len(pdev, 0);
+
+	remove_conflicting_framebuffers(ap, KBUILD_MODNAME, false);
+
+	kfree(ap);
+
+	return 0;
+}
 
 static int radeonfb_pci_register(struct pci_dev *pdev,
 				 const struct pci_device_id *ent)
@@ -2308,6 +2325,10 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
 	rinfo->fb_base_phys = pci_resource_start (pdev, 0);
 	rinfo->mmio_base_phys = pci_resource_start (pdev, 2);
 
+	ret = radeon_kick_out_firmware_fb(pdev);
+	if (ret)
+		return ret;
+
 	/* request the mem regions */
 	ret = pci_request_region(pdev, 0, "radeonfb framebuffer");
 	if (ret < 0) {
diff --git a/drivers/video/fbdev/au1100fb.c b/drivers/video/fbdev/au1100fb.c
index 8de42f617d16..7c9a672e9811 100644
--- a/drivers/video/fbdev/au1100fb.c
+++ b/drivers/video/fbdev/au1100fb.c
@@ -410,18 +410,15 @@ static int au1100fb_setup(struct au1100fb_device *fbdev)
 
 static int au1100fb_drv_probe(struct platform_device *dev)
 {
-	struct au1100fb_device *fbdev = NULL;
+	struct au1100fb_device *fbdev;
 	struct resource *regs_res;
 	unsigned long page;
 	struct clk *c;
 
 	/* Allocate new device private */
-	fbdev = devm_kzalloc(&dev->dev, sizeof(struct au1100fb_device),
-			     GFP_KERNEL);
-	if (!fbdev) {
-		print_err("fail to allocate device private record");
+	fbdev = devm_kzalloc(&dev->dev, sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev)
 		return -ENOMEM;
-	}
 
 	if (au1100fb_setup(fbdev))
 		goto failed;
diff --git a/drivers/video/fbdev/fsl-diu-fb.c b/drivers/video/fbdev/fsl-diu-fb.c
index 25abbcf38913..1bfd13cbd4e3 100644
--- a/drivers/video/fbdev/fsl-diu-fb.c
+++ b/drivers/video/fbdev/fsl-diu-fb.c
@@ -1960,12 +1960,8 @@ static int __init fsl_diu_init(void)
 
 	of_node_put(np);
 	coherence_data = vmalloc(coherence_data_size);
-	if (!coherence_data) {
-		pr_err("fsl-diu-fb: could not allocate coherence data "
-		       "(size=%zu)\n", coherence_data_size);
+	if (!coherence_data)
 		return -ENOMEM;
-	}
-
 #endif
 
 	ret = platform_driver_register(&fsl_diu_driver);
diff --git a/drivers/video/fbdev/matrox/matroxfb_crtc2.c b/drivers/video/fbdev/matrox/matroxfb_crtc2.c
index 02796a4317a9..f64e1d55d7a1 100644
--- a/drivers/video/fbdev/matrox/matroxfb_crtc2.c
+++ b/drivers/video/fbdev/matrox/matroxfb_crtc2.c
@@ -696,10 +696,9 @@ static void* matroxfb_crtc2_probe(struct matrox_fb_info* minfo) {
 	if (!minfo->devflags.crtc2)
 		return NULL;
 	m2info = kzalloc(sizeof(*m2info), GFP_KERNEL);
-	if (!m2info) {
-		printk(KERN_ERR "matroxfb_crtc2: Not enough memory for CRTC2 control structs\n");
+	if (!m2info)
 		return NULL;
-	}
+
 	m2info->primary_dev = minfo;
 	if (matroxfb_dh_registerfb(m2info)) {
 		kfree(m2info);
diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c
index 90d38de34479..77c0a2f45b3b 100644
--- a/drivers/video/fbdev/offb.c
+++ b/drivers/video/fbdev/offb.c
@@ -280,6 +280,7 @@ static void offb_destroy(struct fb_info *info)
 	if (info->screen_base)
 		iounmap(info->screen_base);
 	release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
+	fb_dealloc_cmap(&info->cmap);
 	framebuffer_release(info);
 }
 
@@ -518,6 +519,7 @@ static void __init offb_init_fb(const char *name,
 	return;
 
 out_err:
+	fb_dealloc_cmap(&info->cmap);
 	iounmap(info->screen_base);
 out_aper:
 	iounmap(par->cmap_adr);
diff --git a/drivers/video/fbdev/s3c-fb.c b/drivers/video/fbdev/s3c-fb.c
index 5f4f696c2ecf..9ec85ccd0ce9 100644
--- a/drivers/video/fbdev/s3c-fb.c
+++ b/drivers/video/fbdev/s3c-fb.c
@@ -1383,11 +1383,9 @@ static int s3c_fb_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	sfb = devm_kzalloc(dev, sizeof(struct s3c_fb), GFP_KERNEL);
-	if (!sfb) {
-		dev_err(dev, "no memory for framebuffers\n");
+	sfb = devm_kzalloc(dev, sizeof(*sfb), GFP_KERNEL);
+	if (!sfb)
 		return -ENOMEM;
-	}
 
 	dev_dbg(dev, "allocate new framebuffer %p\n", sfb);
 
@@ -1716,63 +1714,6 @@ static struct s3c_fb_win_variant s3c_fb_data_64xx_wins[] = {
 	},
 };
 
-static struct s3c_fb_win_variant s3c_fb_data_s5p_wins[] = {
-	[0] = {
-		.has_osd_c	= 1,
-		.osd_size_off	= 0x8,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[1] = {
-		.has_osd_c	= 1,
-		.has_osd_d	= 1,
-		.osd_size_off	= 0xc,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[2] = {
-		.has_osd_c	= 1,
-		.has_osd_d	= 1,
-		.osd_size_off	= 0xc,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[3] = {
-		.has_osd_c	= 1,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-	[4] = {
-		.has_osd_c	= 1,
-		.has_osd_alpha	= 1,
-		.palette_sz	= 256,
-		.valid_bpp	= (VALID_BPP1248 | VALID_BPP(13) |
-				   VALID_BPP(15) | VALID_BPP(16) |
-				   VALID_BPP(18) | VALID_BPP(19) |
-				   VALID_BPP(24) | VALID_BPP(25) |
-				   VALID_BPP(32)),
-	},
-};
-
 static struct s3c_fb_driverdata s3c_fb_data_64xx = {
 	.variant = {
 		.nr_windows	= 5,
@@ -1804,102 +1745,6 @@ static struct s3c_fb_driverdata s3c_fb_data_64xx = {
 	.win[4]	= &s3c_fb_data_64xx_wins[4],
 };
 
-static struct s3c_fb_driverdata s3c_fb_data_s5pv210 = {
-	.variant = {
-		.nr_windows	= 5,
-		.vidtcon	= VIDTCON0,
-		.wincon		= WINCON(0),
-		.winmap		= WINxMAP(0),
-		.keycon		= WKEYCON,
-		.osd		= VIDOSD_BASE,
-		.osd_stride	= 16,
-		.buf_start	= VIDW_BUF_START(0),
-		.buf_size	= VIDW_BUF_SIZE(0),
-		.buf_end	= VIDW_BUF_END(0),
-
-		.palette = {
-			[0] = 0x2400,
-			[1] = 0x2800,
-			[2] = 0x2c00,
-			[3] = 0x3000,
-			[4] = 0x3400,
-		},
-
-		.has_shadowcon	= 1,
-		.has_blendcon	= 1,
-		.has_clksel	= 1,
-		.has_fixvclk	= 1,
-	},
-	.win[0]	= &s3c_fb_data_s5p_wins[0],
-	.win[1]	= &s3c_fb_data_s5p_wins[1],
-	.win[2]	= &s3c_fb_data_s5p_wins[2],
-	.win[3]	= &s3c_fb_data_s5p_wins[3],
-	.win[4]	= &s3c_fb_data_s5p_wins[4],
-};
-
-static struct s3c_fb_driverdata s3c_fb_data_exynos4 = {
-	.variant = {
-		.nr_windows	= 5,
-		.vidtcon	= VIDTCON0,
-		.wincon		= WINCON(0),
-		.winmap		= WINxMAP(0),
-		.keycon		= WKEYCON,
-		.osd		= VIDOSD_BASE,
-		.osd_stride	= 16,
-		.buf_start	= VIDW_BUF_START(0),
-		.buf_size	= VIDW_BUF_SIZE(0),
-		.buf_end	= VIDW_BUF_END(0),
-
-		.palette = {
-			[0] = 0x2400,
-			[1] = 0x2800,
-			[2] = 0x2c00,
-			[3] = 0x3000,
-			[4] = 0x3400,
-		},
-
-		.has_shadowcon	= 1,
-		.has_blendcon	= 1,
-		.has_fixvclk	= 1,
-	},
-	.win[0]	= &s3c_fb_data_s5p_wins[0],
-	.win[1]	= &s3c_fb_data_s5p_wins[1],
-	.win[2]	= &s3c_fb_data_s5p_wins[2],
-	.win[3]	= &s3c_fb_data_s5p_wins[3],
-	.win[4]	= &s3c_fb_data_s5p_wins[4],
-};
-
-static struct s3c_fb_driverdata s3c_fb_data_exynos5 = {
-	.variant = {
-		.nr_windows	= 5,
-		.vidtcon	= FIMD_V8_VIDTCON0,
-		.wincon		= WINCON(0),
-		.winmap		= WINxMAP(0),
-		.keycon		= WKEYCON,
-		.osd		= VIDOSD_BASE,
-		.osd_stride	= 16,
-		.buf_start	= VIDW_BUF_START(0),
-		.buf_size	= VIDW_BUF_SIZE(0),
-		.buf_end	= VIDW_BUF_END(0),
-
-		.palette = {
-			[0] = 0x2400,
-			[1] = 0x2800,
-			[2] = 0x2c00,
-			[3] = 0x3000,
-			[4] = 0x3400,
-		},
-		.has_shadowcon	= 1,
-		.has_blendcon	= 1,
-		.has_fixvclk	= 1,
-	},
-	.win[0]	= &s3c_fb_data_s5p_wins[0],
-	.win[1]	= &s3c_fb_data_s5p_wins[1],
-	.win[2]	= &s3c_fb_data_s5p_wins[2],
-	.win[3]	= &s3c_fb_data_s5p_wins[3],
-	.win[4]	= &s3c_fb_data_s5p_wins[4],
-};
-
 /* S3C2443/S3C2416 style hardware */
 static struct s3c_fb_driverdata s3c_fb_data_s3c2443 = {
 	.variant = {
@@ -1942,15 +1787,6 @@ static const struct platform_device_id s3c_fb_driver_ids[] = {
 		.name		= "s3c-fb",
 		.driver_data	= (unsigned long)&s3c_fb_data_64xx,
 	}, {
-		.name		= "s5pv210-fb",
-		.driver_data	= (unsigned long)&s3c_fb_data_s5pv210,
-	}, {
-		.name		= "exynos4-fb",
-		.driver_data	= (unsigned long)&s3c_fb_data_exynos4,
-	}, {
-		.name		= "exynos5-fb",
-		.driver_data	= (unsigned long)&s3c_fb_data_exynos5,
-	}, {
 		.name		= "s3c2443-fb",
 		.driver_data	= (unsigned long)&s3c_fb_data_s3c2443,
 	},
diff --git a/drivers/video/fbdev/sis/init.h b/drivers/video/fbdev/sis/init.h
index 85d6738b6c64..400b0e5681b2 100644
--- a/drivers/video/fbdev/sis/init.h
+++ b/drivers/video/fbdev/sis/init.h
@@ -1461,81 +1461,5 @@ static const struct SiS_LVDSCRT1Data SiS_LVDSCRT1640x480_1_H[] =
    0x00}}
 };
 
-bool		SiSInitPtr(struct SiS_Private *SiS_Pr);
-unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, bool FSTN,
-				unsigned short CustomT, int LCDwith, int LCDheight,
-				unsigned int VBFlags2);
-unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-
-void		SiS_DisplayOn(struct SiS_Private *SiS_Pr);
-void		SiS_DisplayOff(struct SiS_Private *SiS_Pr);
-void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
-void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
-void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
-unsigned short	SiS_GetModeFlag(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
-
-bool		SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
-				unsigned short *ModeIdIndex);
-unsigned short	SiS_GetModePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-unsigned short  SiS_GetRefCRTVCLK(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
-unsigned short  SiS_GetRefCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
-unsigned short	SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-unsigned short	SiS_GetOffset(struct SiS_Private *SiS_Pr,unsigned short ModeNo,
-				unsigned short ModeIdIndex, unsigned short RRTI);
-#ifdef CONFIG_FB_SIS_300
-void		SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
-				unsigned short *idx2);
-unsigned short	SiS_GetFIFOThresholdB300(unsigned short idx1, unsigned short idx2);
-unsigned short	SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
-#endif
-void		SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
-bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-void		SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
-void		SiS_CalcLCDACRT1Timing(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-void		SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
-				int yres, struct fb_var_screeninfo *var, bool writeres);
-
-/* From init301.c: */
-extern void		SiS_GetVBInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex, int chkcrt2mode);
-extern void		SiS_GetLCDResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern void		SiS_SetYPbPr(struct SiS_Private *SiS_Pr);
-extern void		SiS_SetTVMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern void		SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
-extern void		SiS_DisableBridge(struct SiS_Private *);
-extern bool		SiS_SetCRT2Group(struct SiS_Private *, unsigned short);
-extern unsigned short	SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern void		SiS_WaitRetrace1(struct SiS_Private *SiS_Pr);
-extern unsigned short	SiS_GetResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex);
-extern unsigned short	SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short tempax);
-extern unsigned short	SiS_GetVCLK2Ptr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-				unsigned short ModeIdIndex, unsigned short RRTI);
-extern bool		SiS_IsVAMode(struct SiS_Private *);
-extern bool		SiS_IsDualEdge(struct SiS_Private *);
-
-#ifdef CONFIG_FB_SIS_300
-extern unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-extern void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg,
-				unsigned int val);
-#endif
-#ifdef CONFIG_FB_SIS_315
-extern void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg,
-				unsigned char val);
-extern unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
-#endif
-
 #endif
 
diff --git a/drivers/video/fbdev/sis/init301.c b/drivers/video/fbdev/sis/init301.c
index 02ee752d5000..27a2b72e50e8 100644
--- a/drivers/video/fbdev/sis/init301.c
+++ b/drivers/video/fbdev/sis/init301.c
@@ -82,6 +82,332 @@
 #define SiS_I2CDELAY      1000
 #define SiS_I2CDELAYSHORT  150
 
+static const unsigned char SiS_YPbPrTable[3][64] = {
+  {
+    0x17,0x1d,0x03,0x09,0x05,0x06,0x0c,0x0c,
+    0x94,0x49,0x01,0x0a,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x1b,
+    0x0c,0x50,0x00,0x97,0x00,0xda,0x4a,0x17,
+    0x7d,0x05,0x4b,0x00,0x00,0xe2,0x00,0x02,
+    0x03,0x0a,0x65,0x9d /*0x8d*/,0x08,0x92,0x8f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x53 /*0x50*/,
+    0x00,0x40,0x44,0x00,0xdb,0x02,0x3b,0x00
+  },
+  {
+    0x33,0x06,0x06,0x09,0x0b,0x0c,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0d,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0x0c,0x50,0xb2,0x9f,0x16,0x59,0x4f,0x13,
+    0xad,0x11,0xad,0x1d,0x40,0x8a,0x3d,0xb8,
+    0x51,0x5e,0x60,0x49,0x7d,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x4e,
+    0x43,0x41,0x11,0x00,0xfc,0xff,0x32,0x00
+  },
+  {
+#if 0 /* OK, but sticks to left edge */
+    0x13,0x1d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0xed,0x50,0x70,0x9f,0x16,0x59,0x21 /*0x2b*/,0x13,
+    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
+    0x4b,0x4b,0x65 /*0x6f*/,0x2f,0x63,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x27,
+    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
+#endif
+#if 1 /* Perfect */
+    0x23,0x2d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
+    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
+    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
+    0xed,0x50,0x70,0x9f,0x16,0x59,0x60,0x13,
+    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
+    0x4b,0x4b,0x6f,0x2f,0x63,0x92,0x0f,0x40,
+    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x73,
+    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
+#endif
+  }
+};
+
+static const unsigned char SiS_TVPhase[] =
+{
+	0x21,0xED,0xBA,0x08,	/* 0x00 SiS_NTSCPhase */
+	0x2A,0x05,0xE3,0x00,	/* 0x01 SiS_PALPhase */
+	0x21,0xE4,0x2E,0x9B,	/* 0x02 SiS_PALMPhase */
+	0x21,0xF4,0x3E,0xBA,	/* 0x03 SiS_PALNPhase */
+	0x1E,0x8B,0xA2,0xA7,
+	0x1E,0x83,0x0A,0xE0,	/* 0x05 SiS_SpecialPhaseM */
+	0x00,0x00,0x00,0x00,
+	0x00,0x00,0x00,0x00,
+	0x21,0xF0,0x7B,0xD6,	/* 0x08 SiS_NTSCPhase2 */
+	0x2A,0x09,0x86,0xE9,	/* 0x09 SiS_PALPhase2 */
+	0x21,0xE6,0xEF,0xA4,	/* 0x0a SiS_PALMPhase2 */
+	0x21,0xF6,0x94,0x46,	/* 0x0b SiS_PALNPhase2 */
+	0x1E,0x8B,0xA2,0xA7,
+	0x1E,0x83,0x0A,0xE0,	/* 0x0d SiS_SpecialPhaseM */
+	0x00,0x00,0x00,0x00,
+	0x00,0x00,0x00,0x00,
+	0x1e,0x8c,0x5c,0x7a,	/* 0x10 SiS_SpecialPhase */
+	0x25,0xd4,0xfd,0x5e	/* 0x11 SiS_SpecialPhaseJ */
+};
+
+static const unsigned char SiS_HiTVGroup3_1[] = {
+    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x13,
+    0xb1, 0x41, 0x62, 0x62, 0xff, 0xf4, 0x45, 0xa6,
+    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
+    0xac, 0xda, 0x60, 0xfe, 0x6a, 0x9a, 0x06, 0x10,
+    0xd1, 0x04, 0x18, 0x0a, 0xff, 0x80, 0x00, 0x80,
+    0x3b, 0x77, 0x00, 0xef, 0xe0, 0x10, 0xb0, 0xe0,
+    0x10, 0x4f, 0x0f, 0x0f, 0x05, 0x0f, 0x08, 0x6e,
+    0x1a, 0x1f, 0x25, 0x2a, 0x4c, 0xaa, 0x01
+};
+
+static const unsigned char SiS_HiTVGroup3_2[] = {
+    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x7a,
+    0x54, 0x41, 0xe7, 0xe7, 0xff, 0xf4, 0x45, 0xa6,
+    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
+    0xac, 0x6a, 0x60, 0x2b, 0x52, 0xcd, 0x61, 0x10,
+    0x51, 0x04, 0x18, 0x0a, 0x1f, 0x80, 0x00, 0x80,
+    0xff, 0xa4, 0x04, 0x2b, 0x94, 0x21, 0x72, 0x94,
+    0x26, 0x05, 0x01, 0x0f, 0xed, 0x0f, 0x0a, 0x64,
+    0x18, 0x1d, 0x23, 0x28, 0x4c, 0xaa, 0x01
+};
+
+/* 301C / 302ELV extended Part2 TV registers (4 tap scaler) */
+
+static const unsigned char SiS_Part2CLVX_1[] = {
+    0x00,0x00,
+    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
+    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
+    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
+};
+
+static const unsigned char SiS_Part2CLVX_2[] = {
+    0x00,0x00,
+    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
+    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
+    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
+};
+
+static const unsigned char SiS_Part2CLVX_3[] = {  /* NTSC, 525i, 525p */
+    0xE0,0x01,
+    0x04,0x1A,0x04,0x7E,0x03,0x1A,0x06,0x7D,0x01,0x1A,0x08,0x7D,0x00,0x19,0x0A,0x7D,
+    0x7F,0x19,0x0C,0x7C,0x7E,0x18,0x0E,0x7C,0x7E,0x17,0x10,0x7B,0x7D,0x15,0x12,0x7C,
+    0x7D,0x13,0x13,0x7D,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0E,0x18,0x7E,
+    0x7D,0x0C,0x19,0x7E,0x7D,0x0A,0x19,0x00,0x7D,0x08,0x1A,0x01,0x7E,0x06,0x1A,0x02,
+    0x58,0x02,
+    0x07,0x14,0x07,0x7E,0x06,0x14,0x09,0x7D,0x05,0x14,0x0A,0x7D,0x04,0x13,0x0B,0x7E,
+    0x03,0x13,0x0C,0x7E,0x02,0x12,0x0D,0x7F,0x01,0x12,0x0E,0x7F,0x01,0x11,0x0F,0x7F,
+    0x00,0x10,0x10,0x00,0x7F,0x0F,0x11,0x01,0x7F,0x0E,0x12,0x01,0x7E,0x0D,0x12,0x03,
+    0x7E,0x0C,0x13,0x03,0x7E,0x0B,0x13,0x04,0x7E,0x0A,0x14,0x04,0x7D,0x09,0x14,0x06,
+    0x00,0x03,
+    0x09,0x0F,0x09,0x7F,0x08,0x0F,0x09,0x00,0x07,0x0F,0x0A,0x00,0x06,0x0F,0x0A,0x01,
+    0x06,0x0E,0x0B,0x01,0x05,0x0E,0x0B,0x02,0x04,0x0E,0x0C,0x02,0x04,0x0D,0x0C,0x03,
+    0x03,0x0D,0x0D,0x03,0x02,0x0C,0x0D,0x05,0x02,0x0C,0x0E,0x04,0x01,0x0B,0x0E,0x06,
+    0x01,0x0B,0x0E,0x06,0x00,0x0A,0x0F,0x07,0x00,0x0A,0x0F,0x07,0x00,0x09,0x0F,0x08,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_4[] = {   /* PAL */
+    0x58,0x02,
+    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
+    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
+    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
+    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
+    0x00,0x03,
+    0x08,0x12,0x08,0x7E,0x07,0x12,0x09,0x7E,0x06,0x12,0x0A,0x7E,0x05,0x11,0x0B,0x7F,
+    0x04,0x11,0x0C,0x7F,0x03,0x11,0x0C,0x00,0x03,0x10,0x0D,0x00,0x02,0x0F,0x0E,0x01,
+    0x01,0x0F,0x0F,0x01,0x01,0x0E,0x0F,0x02,0x00,0x0D,0x10,0x03,0x7F,0x0C,0x11,0x04,
+    0x7F,0x0C,0x11,0x04,0x7F,0x0B,0x11,0x05,0x7E,0x0A,0x12,0x06,0x7E,0x09,0x12,0x07,
+    0x40,0x02,
+    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
+    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
+    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_5[] = {   /* 750p */
+    0x00,0x03,
+    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
+    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
+    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
+    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
+    0xFF,0xFF
+};
+
+static const unsigned char SiS_Part2CLVX_6[] = {   /* 1080i */
+    0x00,0x04,
+    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
+    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
+    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
+    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
+    0xFF,0xFF,
+};
+
+#ifdef CONFIG_FB_SIS_315
+/* 661 et al LCD data structure (2.03.00) */
+static const unsigned char SiS_LCDStruct661[] = {
+    /* 1024x768 */
+/*  type|CR37|   HDE   |   VDE   |    HT   |    VT   |   hss    | hse   */
+    0x02,0xC0,0x00,0x04,0x00,0x03,0x40,0x05,0x26,0x03,0x10,0x00,0x88,
+    0x00,0x02,0x00,0x06,0x00,0x41,0x5A,0x64,0x00,0x00,0x00,0x00,0x04,
+    /*  | vss     |    vse  |clck|  clock  |CRT2DataP|CRT2DataP|idx     */
+    /*					      VESA    non-VESA  noscale */
+    /* 1280x1024 */
+    0x03,0xC0,0x00,0x05,0x00,0x04,0x98,0x06,0x2A,0x04,0x30,0x00,0x70,
+    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x08,
+    /* 1400x1050 */
+    0x09,0x20,0x78,0x05,0x1A,0x04,0x98,0x06,0x2A,0x04,0x18,0x00,0x38,
+    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x09,
+    /* 1600x1200 */
+    0x0B,0xE0,0x40,0x06,0xB0,0x04,0x70,0x08,0xE2,0x04,0x40,0x00,0xC0,
+    0x00,0x01,0x00,0x03,0x00,0xA2,0x70,0x24,0x00,0x00,0x00,0x00,0x0A,
+    /* 1280x768 (_2) */
+    0x0A,0xE0,0x00,0x05,0x00,0x03,0x7C,0x06,0x26,0x03,0x30,0x00,0x70,
+    0x00,0x03,0x00,0x06,0x00,0x4D,0xC8,0x48,0x00,0x00,0x00,0x00,0x06,
+    /* 1280x720 */
+    0x0E,0xE0,0x00,0x05,0xD0,0x02,0x80,0x05,0x26,0x03,0x10,0x00,0x20,
+    0x00,0x01,0x00,0x06,0x00,0x45,0x9C,0x62,0x00,0x00,0x00,0x00,0x05,
+    /* 1280x800 (_2) */
+    0x0C,0xE0,0x00,0x05,0x20,0x03,0x10,0x06,0x2C,0x03,0x30,0x00,0x70,
+    0x00,0x04,0x00,0x03,0x00,0x49,0xCE,0x1E,0x00,0x00,0x00,0x00,0x09,
+    /* 1680x1050 */
+    0x0D,0xE0,0x90,0x06,0x1A,0x04,0x6C,0x07,0x2A,0x04,0x1A,0x00,0x4C,
+    0x00,0x03,0x00,0x06,0x00,0x79,0xBE,0x44,0x00,0x00,0x00,0x00,0x06,
+    /* 1280x800_3 */
+    0x0C,0xE0,0x00,0x05,0x20,0x03,0xAA,0x05,0x2E,0x03,0x30,0x00,0x50,
+    0x00,0x04,0x00,0x03,0x00,0x47,0xA9,0x10,0x00,0x00,0x00,0x00,0x07,
+    /* 800x600 */
+    0x01,0xC0,0x20,0x03,0x58,0x02,0x20,0x04,0x74,0x02,0x2A,0x00,0x80,
+    0x00,0x06,0x00,0x04,0x00,0x28,0x63,0x4B,0x00,0x00,0x00,0x00,0x00,
+    /* 1280x854 */
+    0x08,0xE0,0x00,0x05,0x56,0x03,0x80,0x06,0x5d,0x03,0x10,0x00,0x70,
+    0x00,0x01,0x00,0x03,0x00,0x54,0x75,0x13,0x00,0x00,0x00,0x00,0x08
+};
+#endif
+
+#ifdef CONFIG_FB_SIS_300
+static unsigned char SiS300_TrumpionData[14][80] = {
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x0B,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x10,0x00,0x00,0x04,0x23,
+    0x00,0x00,0x03,0x28,0x03,0x10,0x05,0x08,0x40,0x10,0x00,0x10,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xBC,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x09,0x04,0x04,0x05,
+    0x04,0x0C,0x09,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5A,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x27,0x00,0x80,0x02,
+    0x20,0x03,0x07,0x00,0x5E,0x01,0x0D,0x02,0x60,0x0C,0x30,0x11,0x00,0x00,0x04,0x23,
+    0x00,0x00,0x03,0x80,0x03,0x28,0x06,0x08,0x40,0x11,0x00,0x11,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x90,0x01,0xFF,0x0F,0xF4,0x19,0x01,0x00,0x05,0x01,0x00,0x04,0x05,
+    0x04,0x0C,0x02,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEC,0x57,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xD9,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x59,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
+    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
+    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
+    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
+    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
+    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
+    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
+    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
+  /* variant 2 */
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
+    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
+    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
+    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
+    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
+    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
+    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
+    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
+    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
+    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
+    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
+    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
+  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
+    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
+    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
+    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
+    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 }
+};
+#endif
+
+#ifdef CONFIG_FB_SIS_315
+static void	SiS_Chrontel701xOn(struct SiS_Private *SiS_Pr);
+static void	SiS_Chrontel701xOff(struct SiS_Private *SiS_Pr);
+static void	SiS_ChrontelInitTVVSync(struct SiS_Private *SiS_Pr);
+static void	SiS_ChrontelDoSomething1(struct SiS_Private *SiS_Pr);
+#endif /* 315 */
+
+#ifdef CONFIG_FB_SIS_300
+static  bool	SiS_SetTrumpionBlock(struct SiS_Private *SiS_Pr, unsigned char *dataptr);
+#endif
+
+static unsigned short	SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
+				int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
+				bool checkcr32, unsigned int VBFlags2);
+static unsigned short	SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
+				unsigned char *buffer);
+static void		SiS_SetSwitchDDC2(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetStart(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetStop(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetSCLKLow(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_SetSCLKHigh(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_ReadDDC2Data(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_WriteDDC2Data(struct SiS_Private *SiS_Pr, unsigned short tempax);
+static unsigned short	SiS_CheckACK(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_WriteDABDDC(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_PrepareReadDDC(struct SiS_Private *SiS_Pr);
+static unsigned short	SiS_PrepareDDC(struct SiS_Private *SiS_Pr);
+static void		SiS_SendACK(struct SiS_Private *SiS_Pr, unsigned short yesno);
+static unsigned short	SiS_DoProbeDDC(struct SiS_Private *SiS_Pr);
+
+#ifdef CONFIG_FB_SIS_300
+static void		SiS_OEM300Setting(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo, unsigned short ModeIdIndex, unsigned short RefTabindex);
+static void		SetOEMLCDData2(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo, unsigned short ModeIdIndex,unsigned short RefTableIndex);
+#endif
+#ifdef CONFIG_FB_SIS_315
+static void		SiS_OEM310Setting(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
+static void		SiS_OEM661Setting(struct SiS_Private *SiS_Pr,
+				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
+static void		SiS_FinalizeLCD(struct SiS_Private *, unsigned short, unsigned short);
+#endif
+
 static unsigned short	SiS_GetBIOSLCDResInfo(struct SiS_Private *SiS_Pr);
 static void		SiS_SetCH70xx(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
 
diff --git a/drivers/video/fbdev/sis/init301.h b/drivers/video/fbdev/sis/init301.h
index 2112d6d7feda..6e5cf14c4ce4 100644
--- a/drivers/video/fbdev/sis/init301.h
+++ b/drivers/video/fbdev/sis/init301.h
@@ -66,287 +66,6 @@
 #include "sis.h"
 #include <video/sisfb.h>
 
-static const unsigned char SiS_YPbPrTable[3][64] = {
-  {
-    0x17,0x1d,0x03,0x09,0x05,0x06,0x0c,0x0c,
-    0x94,0x49,0x01,0x0a,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x1b,
-    0x0c,0x50,0x00,0x97,0x00,0xda,0x4a,0x17,
-    0x7d,0x05,0x4b,0x00,0x00,0xe2,0x00,0x02,
-    0x03,0x0a,0x65,0x9d /*0x8d*/,0x08,0x92,0x8f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x53 /*0x50*/,
-    0x00,0x40,0x44,0x00,0xdb,0x02,0x3b,0x00
-  },
-  {
-    0x33,0x06,0x06,0x09,0x0b,0x0c,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0d,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0x0c,0x50,0xb2,0x9f,0x16,0x59,0x4f,0x13,
-    0xad,0x11,0xad,0x1d,0x40,0x8a,0x3d,0xb8,
-    0x51,0x5e,0x60,0x49,0x7d,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x4e,
-    0x43,0x41,0x11,0x00,0xfc,0xff,0x32,0x00
-  },
-  {
-#if 0 /* OK, but sticks to left edge */
-    0x13,0x1d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0xed,0x50,0x70,0x9f,0x16,0x59,0x21 /*0x2b*/,0x13,
-    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
-    0x4b,0x4b,0x65 /*0x6f*/,0x2f,0x63,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x27,
-    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
-#endif
-#if 1 /* Perfect */
-    0x23,0x2d,0xe8,0x09,0x09,0xed,0x0c,0x0c,
-    0x98,0x0a,0x01,0x0c,0x06,0x0d,0x04,0x0a,
-    0x06,0x14,0x0d,0x04,0x0a,0x00,0x85,0x3f,
-    0xed,0x50,0x70,0x9f,0x16,0x59,0x60,0x13,
-    0x27,0x0b,0x27,0xfc,0x30,0x27,0x1c,0xb0,
-    0x4b,0x4b,0x6f,0x2f,0x63,0x92,0x0f,0x40,
-    0x60,0x80,0x14,0x90,0x8c,0x60,0x14,0x73,
-    0x00,0x40,0x11,0x00,0xfc,0xff,0x32,0x00
-#endif
-  }
-};
-
-static const unsigned char SiS_TVPhase[] =
-{
-	0x21,0xED,0xBA,0x08,	/* 0x00 SiS_NTSCPhase */
-	0x2A,0x05,0xE3,0x00,	/* 0x01 SiS_PALPhase */
-	0x21,0xE4,0x2E,0x9B,	/* 0x02 SiS_PALMPhase */
-	0x21,0xF4,0x3E,0xBA,	/* 0x03 SiS_PALNPhase */
-	0x1E,0x8B,0xA2,0xA7,
-	0x1E,0x83,0x0A,0xE0,	/* 0x05 SiS_SpecialPhaseM */
-	0x00,0x00,0x00,0x00,
-	0x00,0x00,0x00,0x00,
-	0x21,0xF0,0x7B,0xD6,	/* 0x08 SiS_NTSCPhase2 */
-	0x2A,0x09,0x86,0xE9,	/* 0x09 SiS_PALPhase2 */
-	0x21,0xE6,0xEF,0xA4,	/* 0x0a SiS_PALMPhase2 */
-	0x21,0xF6,0x94,0x46,	/* 0x0b SiS_PALNPhase2 */
-	0x1E,0x8B,0xA2,0xA7,
-	0x1E,0x83,0x0A,0xE0,	/* 0x0d SiS_SpecialPhaseM */
-	0x00,0x00,0x00,0x00,
-	0x00,0x00,0x00,0x00,
-	0x1e,0x8c,0x5c,0x7a,	/* 0x10 SiS_SpecialPhase */
-	0x25,0xd4,0xfd,0x5e	/* 0x11 SiS_SpecialPhaseJ */
-};
-
-static const unsigned char SiS_HiTVGroup3_1[] = {
-    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x13,
-    0xb1, 0x41, 0x62, 0x62, 0xff, 0xf4, 0x45, 0xa6,
-    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
-    0xac, 0xda, 0x60, 0xfe, 0x6a, 0x9a, 0x06, 0x10,
-    0xd1, 0x04, 0x18, 0x0a, 0xff, 0x80, 0x00, 0x80,
-    0x3b, 0x77, 0x00, 0xef, 0xe0, 0x10, 0xb0, 0xe0,
-    0x10, 0x4f, 0x0f, 0x0f, 0x05, 0x0f, 0x08, 0x6e,
-    0x1a, 0x1f, 0x25, 0x2a, 0x4c, 0xaa, 0x01
-};
-
-static const unsigned char SiS_HiTVGroup3_2[] = {
-    0x00, 0x14, 0x15, 0x25, 0x55, 0x15, 0x0b, 0x7a,
-    0x54, 0x41, 0xe7, 0xe7, 0xff, 0xf4, 0x45, 0xa6,
-    0x25, 0x2f, 0x67, 0xf6, 0xbf, 0xff, 0x8e, 0x20,
-    0xac, 0x6a, 0x60, 0x2b, 0x52, 0xcd, 0x61, 0x10,
-    0x51, 0x04, 0x18, 0x0a, 0x1f, 0x80, 0x00, 0x80,
-    0xff, 0xa4, 0x04, 0x2b, 0x94, 0x21, 0x72, 0x94,
-    0x26, 0x05, 0x01, 0x0f, 0xed, 0x0f, 0x0a, 0x64,
-    0x18, 0x1d, 0x23, 0x28, 0x4c, 0xaa, 0x01
-};
-
-/* 301C / 302ELV extended Part2 TV registers (4 tap scaler) */
-
-static const unsigned char SiS_Part2CLVX_1[] = {
-    0x00,0x00,
-    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
-    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
-    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
-};
-
-static const unsigned char SiS_Part2CLVX_2[] = {
-    0x00,0x00,
-    0x00,0x20,0x00,0x00,0x7F,0x20,0x02,0x7F,0x7D,0x20,0x04,0x7F,0x7D,0x1F,0x06,0x7E,
-    0x7C,0x1D,0x09,0x7E,0x7C,0x1B,0x0B,0x7E,0x7C,0x19,0x0E,0x7D,0x7C,0x17,0x11,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x11,0x17,0x7C,0x7D,0x0E,0x19,0x7C,0x7E,0x0B,0x1B,0x7C,
-    0x7E,0x09,0x1D,0x7C,0x7F,0x06,0x1F,0x7C,0x7F,0x04,0x20,0x7D,0x00,0x02,0x20,0x7E
-};
-
-static const unsigned char SiS_Part2CLVX_3[] = {  /* NTSC, 525i, 525p */
-    0xE0,0x01,
-    0x04,0x1A,0x04,0x7E,0x03,0x1A,0x06,0x7D,0x01,0x1A,0x08,0x7D,0x00,0x19,0x0A,0x7D,
-    0x7F,0x19,0x0C,0x7C,0x7E,0x18,0x0E,0x7C,0x7E,0x17,0x10,0x7B,0x7D,0x15,0x12,0x7C,
-    0x7D,0x13,0x13,0x7D,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0E,0x18,0x7E,
-    0x7D,0x0C,0x19,0x7E,0x7D,0x0A,0x19,0x00,0x7D,0x08,0x1A,0x01,0x7E,0x06,0x1A,0x02,
-    0x58,0x02,
-    0x07,0x14,0x07,0x7E,0x06,0x14,0x09,0x7D,0x05,0x14,0x0A,0x7D,0x04,0x13,0x0B,0x7E,
-    0x03,0x13,0x0C,0x7E,0x02,0x12,0x0D,0x7F,0x01,0x12,0x0E,0x7F,0x01,0x11,0x0F,0x7F,
-    0x00,0x10,0x10,0x00,0x7F,0x0F,0x11,0x01,0x7F,0x0E,0x12,0x01,0x7E,0x0D,0x12,0x03,
-    0x7E,0x0C,0x13,0x03,0x7E,0x0B,0x13,0x04,0x7E,0x0A,0x14,0x04,0x7D,0x09,0x14,0x06,
-    0x00,0x03,
-    0x09,0x0F,0x09,0x7F,0x08,0x0F,0x09,0x00,0x07,0x0F,0x0A,0x00,0x06,0x0F,0x0A,0x01,
-    0x06,0x0E,0x0B,0x01,0x05,0x0E,0x0B,0x02,0x04,0x0E,0x0C,0x02,0x04,0x0D,0x0C,0x03,
-    0x03,0x0D,0x0D,0x03,0x02,0x0C,0x0D,0x05,0x02,0x0C,0x0E,0x04,0x01,0x0B,0x0E,0x06,
-    0x01,0x0B,0x0E,0x06,0x00,0x0A,0x0F,0x07,0x00,0x0A,0x0F,0x07,0x00,0x09,0x0F,0x08,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_4[] = {   /* PAL */
-    0x58,0x02,
-    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
-    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
-    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
-    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
-    0x00,0x03,
-    0x08,0x12,0x08,0x7E,0x07,0x12,0x09,0x7E,0x06,0x12,0x0A,0x7E,0x05,0x11,0x0B,0x7F,
-    0x04,0x11,0x0C,0x7F,0x03,0x11,0x0C,0x00,0x03,0x10,0x0D,0x00,0x02,0x0F,0x0E,0x01,
-    0x01,0x0F,0x0F,0x01,0x01,0x0E,0x0F,0x02,0x00,0x0D,0x10,0x03,0x7F,0x0C,0x11,0x04,
-    0x7F,0x0C,0x11,0x04,0x7F,0x0B,0x11,0x05,0x7E,0x0A,0x12,0x06,0x7E,0x09,0x12,0x07,
-    0x40,0x02,
-    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
-    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
-    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_5[] = {   /* 750p */
-    0x00,0x03,
-    0x05,0x19,0x05,0x7D,0x03,0x19,0x06,0x7E,0x02,0x19,0x08,0x7D,0x01,0x18,0x0A,0x7D,
-    0x00,0x18,0x0C,0x7C,0x7F,0x17,0x0E,0x7C,0x7E,0x16,0x0F,0x7D,0x7E,0x14,0x11,0x7D,
-    0x7D,0x13,0x13,0x7D,0x7D,0x11,0x14,0x7E,0x7D,0x0F,0x16,0x7E,0x7D,0x0E,0x17,0x7E,
-    0x7D,0x0C,0x18,0x7F,0x7D,0x0A,0x18,0x01,0x7D,0x08,0x19,0x02,0x7D,0x06,0x19,0x04,
-    0xFF,0xFF
-};
-
-static const unsigned char SiS_Part2CLVX_6[] = {   /* 1080i */
-    0x00,0x04,
-    0x04,0x1A,0x04,0x7E,0x02,0x1B,0x05,0x7E,0x01,0x1A,0x07,0x7E,0x00,0x1A,0x09,0x7D,
-    0x7F,0x19,0x0B,0x7D,0x7E,0x18,0x0D,0x7D,0x7D,0x17,0x10,0x7C,0x7D,0x15,0x12,0x7C,
-    0x7C,0x14,0x14,0x7C,0x7C,0x12,0x15,0x7D,0x7C,0x10,0x17,0x7D,0x7C,0x0D,0x18,0x7F,
-    0x7D,0x0B,0x19,0x7F,0x7D,0x09,0x1A,0x00,0x7D,0x07,0x1A,0x02,0x7E,0x05,0x1B,0x02,
-    0xFF,0xFF,
-};
-
-#ifdef CONFIG_FB_SIS_315
-/* 661 et al LCD data structure (2.03.00) */
-static const unsigned char SiS_LCDStruct661[] = {
-    /* 1024x768 */
-/*  type|CR37|   HDE   |   VDE   |    HT   |    VT   |   hss    | hse   */
-    0x02,0xC0,0x00,0x04,0x00,0x03,0x40,0x05,0x26,0x03,0x10,0x00,0x88,
-    0x00,0x02,0x00,0x06,0x00,0x41,0x5A,0x64,0x00,0x00,0x00,0x00,0x04,
-    /*  | vss     |    vse  |clck|  clock  |CRT2DataP|CRT2DataP|idx     */
-    /*					      VESA    non-VESA  noscale */
-    /* 1280x1024 */
-    0x03,0xC0,0x00,0x05,0x00,0x04,0x98,0x06,0x2A,0x04,0x30,0x00,0x70,
-    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x08,
-    /* 1400x1050 */
-    0x09,0x20,0x78,0x05,0x1A,0x04,0x98,0x06,0x2A,0x04,0x18,0x00,0x38,
-    0x00,0x01,0x00,0x03,0x00,0x6C,0xF8,0x2F,0x00,0x00,0x00,0x00,0x09,
-    /* 1600x1200 */
-    0x0B,0xE0,0x40,0x06,0xB0,0x04,0x70,0x08,0xE2,0x04,0x40,0x00,0xC0,
-    0x00,0x01,0x00,0x03,0x00,0xA2,0x70,0x24,0x00,0x00,0x00,0x00,0x0A,
-    /* 1280x768 (_2) */
-    0x0A,0xE0,0x00,0x05,0x00,0x03,0x7C,0x06,0x26,0x03,0x30,0x00,0x70,
-    0x00,0x03,0x00,0x06,0x00,0x4D,0xC8,0x48,0x00,0x00,0x00,0x00,0x06,
-    /* 1280x720 */
-    0x0E,0xE0,0x00,0x05,0xD0,0x02,0x80,0x05,0x26,0x03,0x10,0x00,0x20,
-    0x00,0x01,0x00,0x06,0x00,0x45,0x9C,0x62,0x00,0x00,0x00,0x00,0x05,
-    /* 1280x800 (_2) */
-    0x0C,0xE0,0x00,0x05,0x20,0x03,0x10,0x06,0x2C,0x03,0x30,0x00,0x70,
-    0x00,0x04,0x00,0x03,0x00,0x49,0xCE,0x1E,0x00,0x00,0x00,0x00,0x09,
-    /* 1680x1050 */
-    0x0D,0xE0,0x90,0x06,0x1A,0x04,0x6C,0x07,0x2A,0x04,0x1A,0x00,0x4C,
-    0x00,0x03,0x00,0x06,0x00,0x79,0xBE,0x44,0x00,0x00,0x00,0x00,0x06,
-    /* 1280x800_3 */
-    0x0C,0xE0,0x00,0x05,0x20,0x03,0xAA,0x05,0x2E,0x03,0x30,0x00,0x50,
-    0x00,0x04,0x00,0x03,0x00,0x47,0xA9,0x10,0x00,0x00,0x00,0x00,0x07,
-    /* 800x600 */
-    0x01,0xC0,0x20,0x03,0x58,0x02,0x20,0x04,0x74,0x02,0x2A,0x00,0x80,
-    0x00,0x06,0x00,0x04,0x00,0x28,0x63,0x4B,0x00,0x00,0x00,0x00,0x00,
-    /* 1280x854 */
-    0x08,0xE0,0x00,0x05,0x56,0x03,0x80,0x06,0x5d,0x03,0x10,0x00,0x70,
-    0x00,0x01,0x00,0x03,0x00,0x54,0x75,0x13,0x00,0x00,0x00,0x00,0x08
-};
-#endif
-
-#ifdef CONFIG_FB_SIS_300
-static unsigned char SiS300_TrumpionData[14][80] = {
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x0B,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x10,0x00,0x00,0x04,0x23,
-    0x00,0x00,0x03,0x28,0x03,0x10,0x05,0x08,0x40,0x10,0x00,0x10,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xBC,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x09,0x04,0x04,0x05,
-    0x04,0x0C,0x09,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5A,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x27,0x00,0x80,0x02,
-    0x20,0x03,0x07,0x00,0x5E,0x01,0x0D,0x02,0x60,0x0C,0x30,0x11,0x00,0x00,0x04,0x23,
-    0x00,0x00,0x03,0x80,0x03,0x28,0x06,0x08,0x40,0x11,0x00,0x11,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x90,0x01,0xFF,0x0F,0xF4,0x19,0x01,0x00,0x05,0x01,0x00,0x04,0x05,
-    0x04,0x0C,0x02,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEC,0x57,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xD9,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x59,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
-    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
-    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
-    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
-    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
-    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x0D,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
-    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
-    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x5B,0x01,0xBE,0x01,0x00 },
-  /* variant 2 */
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
-    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x15,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x18,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x44,0x03,0x28,0x06,0x08,0x40,0x18,0x00,0x18,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xA6,0x01,0xFF,0x03,0xFF,0x19,0x01,0x00,0x05,0x13,0x04,0x04,0x05,
-    0x04,0x0C,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x8A,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x72,0x00,0xD8,0x02,
-    0x84,0x03,0x16,0x00,0x90,0x01,0xC1,0x01,0x60,0x0C,0x30,0x1C,0x00,0x20,0x04,0x23,
-    0x00,0x01,0x03,0x53,0x03,0x28,0x06,0x08,0x40,0x1C,0x00,0x16,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xDA,0x01,0xFF,0x0F,0xF4,0x18,0x07,0x05,0x05,0x13,0x04,0x04,0x05,
-    0x01,0x0B,0x13,0x0A,0x02,0xB0,0x00,0x00,0x02,0xBA,0xF0,0x55,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x00,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0x7F,0x00,0x80,0x02,
-    0x20,0x03,0x16,0x00,0xE0,0x01,0x0D,0x02,0x60,0x0C,0x30,0x98,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x45,0x03,0x48,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0xF4,0x01,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x05,0x01,0x00,0x05,0x05,
-    0x04,0x0C,0x08,0x05,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x02,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xBF,0x00,0x20,0x03,
-    0x20,0x04,0x0D,0x00,0x58,0x02,0x71,0x02,0x80,0x0C,0x30,0x9A,0x00,0xFA,0x03,0x1D,
-    0x00,0x01,0x03,0x22,0x03,0x28,0x06,0x08,0x40,0x98,0x00,0x98,0x04,0x1D,0x00,0x1D,
-    0x03,0x11,0x60,0x39,0x03,0x40,0x05,0xF4,0x18,0x07,0x02,0x06,0x04,0x01,0x06,0x0B,
-    0x02,0x0A,0x20,0x19,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 },
-  { 0x02,0x0A,0x0A,0x01,0x04,0x01,0x00,0x03,0x11,0x00,0x0D,0x10,0xEF,0x00,0x00,0x04,
-    0x40,0x05,0x13,0x00,0x00,0x03,0x26,0x03,0x88,0x0C,0x30,0x90,0x00,0x00,0x04,0x23,
-    0x00,0x01,0x03,0x24,0x03,0x28,0x06,0x08,0x40,0x90,0x00,0x90,0x04,0x23,0x00,0x23,
-    0x03,0x11,0x60,0x40,0x05,0xFF,0x0F,0xF4,0x18,0x01,0x00,0x08,0x01,0x00,0x08,0x01,
-    0x00,0x08,0x01,0x01,0x02,0xB0,0x00,0x00,0x02,0xBA,0xEA,0x58,0x01,0xBE,0x01,0x00 }
-};
-#endif
-
 void		SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
 void		SiS_EnableCRT2(struct SiS_Private *SiS_Pr);
 unsigned short	SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
@@ -375,16 +94,11 @@ unsigned short	SiS_GetCH701x(struct SiS_Private *SiS_Pr, unsigned short tempax);
 void		SiS_SetCH70xxANDOR(struct SiS_Private *SiS_Pr, unsigned short reg,
 			unsigned char orval,unsigned short andval);
 #ifdef CONFIG_FB_SIS_315
-static void	SiS_Chrontel701xOn(struct SiS_Private *SiS_Pr);
-static void	SiS_Chrontel701xOff(struct SiS_Private *SiS_Pr);
-static void	SiS_ChrontelInitTVVSync(struct SiS_Private *SiS_Pr);
-static void	SiS_ChrontelDoSomething1(struct SiS_Private *SiS_Pr);
 void		SiS_Chrontel701xBLOn(struct SiS_Private *SiS_Pr);
 void		SiS_Chrontel701xBLOff(struct SiS_Private *SiS_Pr);
 #endif /* 315 */
 
 #ifdef CONFIG_FB_SIS_300
-static  bool	SiS_SetTrumpionBlock(struct SiS_Private *SiS_Pr, unsigned char *dataptr);
 void		SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo);
 #endif
 
@@ -394,40 +108,6 @@ unsigned short	SiS_HandleDDC(struct SiS_Private *SiS_Pr, unsigned int VBFlags, i
 			unsigned short adaptnum, unsigned short DDCdatatype,
 			unsigned char *buffer, unsigned int VBFlags2);
 
-static unsigned short	SiS_InitDDCRegs(struct SiS_Private *SiS_Pr, unsigned int VBFlags,
-				int VGAEngine, unsigned short adaptnum, unsigned short DDCdatatype,
-				bool checkcr32, unsigned int VBFlags2);
-static unsigned short	SiS_ProbeDDC(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_ReadDDC(struct SiS_Private *SiS_Pr, unsigned short DDCdatatype,
-				unsigned char *buffer);
-static void		SiS_SetSwitchDDC2(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetStart(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetStop(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetSCLKLow(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_SetSCLKHigh(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_ReadDDC2Data(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_WriteDDC2Data(struct SiS_Private *SiS_Pr, unsigned short tempax);
-static unsigned short	SiS_CheckACK(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_WriteDABDDC(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_PrepareReadDDC(struct SiS_Private *SiS_Pr);
-static unsigned short	SiS_PrepareDDC(struct SiS_Private *SiS_Pr);
-static void		SiS_SendACK(struct SiS_Private *SiS_Pr, unsigned short yesno);
-static unsigned short	SiS_DoProbeDDC(struct SiS_Private *SiS_Pr);
-
-#ifdef CONFIG_FB_SIS_300
-static void		SiS_OEM300Setting(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo, unsigned short ModeIdIndex, unsigned short RefTabindex);
-static void		SetOEMLCDData2(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo, unsigned short ModeIdIndex,unsigned short RefTableIndex);
-#endif
-#ifdef CONFIG_FB_SIS_315
-static void		SiS_OEM310Setting(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
-static void		SiS_OEM661Setting(struct SiS_Private *SiS_Pr,
-				unsigned short ModeNo,unsigned short ModeIdIndex, unsigned short RRTI);
-static void		SiS_FinalizeLCD(struct SiS_Private *, unsigned short, unsigned short);
-#endif
-
 extern void		SiS_DisplayOff(struct SiS_Private *SiS_Pr);
 extern void		SiS_DisplayOn(struct SiS_Private *SiS_Pr);
 extern bool		SiS_SearchModeID(struct SiS_Private *, unsigned short *, unsigned short *);
diff --git a/drivers/video/fbdev/sis/sis.h b/drivers/video/fbdev/sis/sis.h
index ea1d1c9640bf..d04982b0cd6f 100644
--- a/drivers/video/fbdev/sis/sis.h
+++ b/drivers/video/fbdev/sis/sis.h
@@ -28,6 +28,7 @@
 
 #include "vgatypes.h"
 #include "vstruct.h"
+#include "init.h"
 
 #define VER_MAJOR		1
 #define VER_MINOR		8
@@ -321,6 +322,85 @@ u8 SiS_GetRegByte(SISIOADDRESS);
 u16 SiS_GetRegShort(SISIOADDRESS);
 u32 SiS_GetRegLong(SISIOADDRESS);
 
+/* Chrontel TV, DDC and DPMS functions */
+/* from init.c */
+bool		SiSInitPtr(struct SiS_Private *SiS_Pr);
+unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, bool FSTN,
+				unsigned short CustomT, int LCDwith, int LCDheight,
+				unsigned int VBFlags2);
+unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+
+void		SiS_DisplayOn(struct SiS_Private *SiS_Pr);
+void		SiS_DisplayOff(struct SiS_Private *SiS_Pr);
+void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
+void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
+void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
+unsigned short	SiS_GetModeFlag(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
+
+bool		SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
+				unsigned short *ModeIdIndex);
+unsigned short	SiS_GetModePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+unsigned short  SiS_GetRefCRTVCLK(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
+unsigned short  SiS_GetRefCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short Index, int UseWide);
+unsigned short	SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+unsigned short	SiS_GetOffset(struct SiS_Private *SiS_Pr,unsigned short ModeNo,
+				unsigned short ModeIdIndex, unsigned short RRTI);
+#ifdef CONFIG_FB_SIS_300
+void		SiS_GetFIFOThresholdIndex300(struct SiS_Private *SiS_Pr, unsigned short *idx1,
+				unsigned short *idx2);
+unsigned short	SiS_GetFIFOThresholdB300(unsigned short idx1, unsigned short idx2);
+unsigned short	SiS_GetLatencyFactor630(struct SiS_Private *SiS_Pr, unsigned short index);
+#endif
+void		SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo, unsigned short ModeIdIndex);
+bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
+void		SiS_CalcCRRegisters(struct SiS_Private *SiS_Pr, int depth);
+void		SiS_CalcLCDACRT1Timing(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+void		SiS_Generic_ConvertCRData(struct SiS_Private *SiS_Pr, unsigned char *crdata, int xres,
+				int yres, struct fb_var_screeninfo *var, bool writeres);
+
+/* From init301.c: */
+extern void		SiS_GetVBInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex, int chkcrt2mode);
+extern void		SiS_GetLCDResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern void		SiS_SetYPbPr(struct SiS_Private *SiS_Pr);
+extern void		SiS_SetTVMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern void		SiS_UnLockCRT2(struct SiS_Private *SiS_Pr);
+extern void		SiS_DisableBridge(struct SiS_Private *);
+extern bool		SiS_SetCRT2Group(struct SiS_Private *, unsigned short);
+extern unsigned short	SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern void		SiS_WaitRetrace1(struct SiS_Private *SiS_Pr);
+extern unsigned short	SiS_GetResInfo(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex);
+extern unsigned short	SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short tempax);
+extern unsigned short	SiS_GetVCLK2Ptr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
+				unsigned short ModeIdIndex, unsigned short RRTI);
+extern bool		SiS_IsVAMode(struct SiS_Private *);
+extern bool		SiS_IsDualEdge(struct SiS_Private *);
+
+#ifdef CONFIG_FB_SIS_300
+extern unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+extern void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg,
+				unsigned int val);
+#endif
+#ifdef CONFIG_FB_SIS_315
+extern void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg,
+				unsigned char val);
+extern unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
+#endif
+
+
 /* MMIO access macros */
 #define MMIO_IN8(base, offset)  readb((base+offset))
 #define MMIO_IN16(base, offset) readw((base+offset))
@@ -583,4 +663,55 @@ struct sis_video_info {
 	struct sis_video_info *next;
 };
 
+/* from sis_accel.c */
+extern void	fbcon_sis_fillrect(struct fb_info *info,
+				const struct fb_fillrect *rect);
+extern void	fbcon_sis_copyarea(struct fb_info *info,
+				const struct fb_copyarea *area);
+extern int	fbcon_sis_sync(struct fb_info *info);
+
+/* Internal 2D accelerator functions */
+extern int	sisfb_initaccel(struct sis_video_info *ivideo);
+extern void	sisfb_syncaccel(struct sis_video_info *ivideo);
+
+/* Internal general routines */
+#ifdef CONFIG_FB_SIS_300
+unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg, unsigned int val);
+unsigned int	sisfb_read_lpc_pci_dword(struct SiS_Private *SiS_Pr, int reg);
+#endif
+#ifdef CONFIG_FB_SIS_315
+void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg, unsigned char val);
+unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
+#endif
+
+/* SiS-specific exported functions */
+void			sis_malloc(struct sis_memreq *req);
+void			sis_malloc_new(struct pci_dev *pdev, struct sis_memreq *req);
+void			sis_free(u32 base);
+void			sis_free_new(struct pci_dev *pdev, u32 base);
+
+/* Routines from init.c/init301.c */
+extern unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, bool FSTN, unsigned short CustomT,
+				int LCDwith, int LCDheight, unsigned int VBFlags2);
+extern unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+extern unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
+				int VDisplay, int Depth, unsigned int VBFlags2);
+extern void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
+extern bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
+extern void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
+extern void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
+
+extern bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
+
+extern bool		sisfb_gettotalfrommode(struct SiS_Private *SiS_Pr, unsigned char modeno,
+				int *htotal, int *vtotal, unsigned char rateindex);
+extern int		sisfb_mode_rate_to_dclock(struct SiS_Private *SiS_Pr,
+				unsigned char modeno, unsigned char rateindex);
+extern int		sisfb_mode_rate_to_ddata(struct SiS_Private *SiS_Pr, unsigned char modeno,
+				unsigned char rateindex, struct fb_var_screeninfo *var);
+
+
 #endif
diff --git a/drivers/video/fbdev/sis/sis_main.c b/drivers/video/fbdev/sis/sis_main.c
index ecdd054d8951..20aff9005978 100644
--- a/drivers/video/fbdev/sis/sis_main.c
+++ b/drivers/video/fbdev/sis/sis_main.c
@@ -56,15 +56,66 @@
 
 #include "sis.h"
 #include "sis_main.h"
+#include "init301.h"
 
 #if !defined(CONFIG_FB_SIS_300) && !defined(CONFIG_FB_SIS_315)
 #warning Neither CONFIG_FB_SIS_300 nor CONFIG_FB_SIS_315 is set
 #warning sisfb will not work!
 #endif
 
+/* ---------------------- Prototypes ------------------------- */
+
+/* Interface used by the world */
+#ifndef MODULE
+static int sisfb_setup(char *options);
+#endif
+
+/* Interface to the low level console driver */
+static int sisfb_init(void);
+
+/* fbdev routines */
+static int	sisfb_get_fix(struct fb_fix_screeninfo *fix, int con,
+				struct fb_info *info);
+
+static int	sisfb_ioctl(struct fb_info *info, unsigned int cmd,
+			    unsigned long arg);
+static int	sisfb_set_par(struct fb_info *info);
+static int	sisfb_blank(int blank,
+				struct fb_info *info);
+
 static void sisfb_handle_command(struct sis_video_info *ivideo,
 				 struct sisfb_cmd *sisfb_command);
 
+static void	sisfb_search_mode(char *name, bool quiet);
+static int	sisfb_validate_mode(struct sis_video_info *ivideo, int modeindex, u32 vbflags);
+static u8	sisfb_search_refresh_rate(struct sis_video_info *ivideo, unsigned int rate,
+				int index);
+static int	sisfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+				unsigned blue, unsigned transp,
+				struct fb_info *fb_info);
+static int	sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive,
+				struct fb_info *info);
+static void	sisfb_pre_setmode(struct sis_video_info *ivideo);
+static void	sisfb_post_setmode(struct sis_video_info *ivideo);
+static bool	sisfb_CheckVBRetrace(struct sis_video_info *ivideo);
+static bool	sisfbcheckvretracecrt2(struct sis_video_info *ivideo);
+static bool	sisfbcheckvretracecrt1(struct sis_video_info *ivideo);
+static bool	sisfb_bridgeisslave(struct sis_video_info *ivideo);
+static void	sisfb_detect_VB_connect(struct sis_video_info *ivideo);
+static void	sisfb_get_VB_type(struct sis_video_info *ivideo);
+static void	sisfb_set_TVxposoffset(struct sis_video_info *ivideo, int val);
+static void	sisfb_set_TVyposoffset(struct sis_video_info *ivideo, int val);
+
+/* Internal heap routines */
+static int		sisfb_heap_init(struct sis_video_info *ivideo);
+static struct SIS_OH *	sisfb_poh_new_node(struct SIS_HEAP *memheap);
+static struct SIS_OH *	sisfb_poh_allocate(struct SIS_HEAP *memheap, u32 size);
+static void		sisfb_delete_node(struct SIS_OH *poh);
+static void		sisfb_insert_node(struct SIS_OH *pohList, struct SIS_OH *poh);
+static struct SIS_OH *	sisfb_poh_free(struct SIS_HEAP *memheap, u32 base);
+static void		sisfb_free_node(struct SIS_HEAP *memheap, struct SIS_OH *poh);
+
+
 /* ------------------ Internal helper routines ----------------- */
 
 static void __init
diff --git a/drivers/video/fbdev/sis/sis_main.h b/drivers/video/fbdev/sis/sis_main.h
index 32e23c209430..d8ba07061f1e 100644
--- a/drivers/video/fbdev/sis/sis_main.h
+++ b/drivers/video/fbdev/sis/sis_main.h
@@ -661,121 +661,4 @@ static struct _customttable {
 	}
 };
 
-/* ---------------------- Prototypes ------------------------- */
-
-/* Interface used by the world */
-#ifndef MODULE
-static int sisfb_setup(char *options);
 #endif
-
-/* Interface to the low level console driver */
-static int sisfb_init(void);
-
-/* fbdev routines */
-static int	sisfb_get_fix(struct fb_fix_screeninfo *fix, int con,
-				struct fb_info *info);
-
-static int	sisfb_ioctl(struct fb_info *info, unsigned int cmd,
-			    unsigned long arg);
-static int	sisfb_set_par(struct fb_info *info);
-static int	sisfb_blank(int blank,
-				struct fb_info *info);
-extern void	fbcon_sis_fillrect(struct fb_info *info,
-				const struct fb_fillrect *rect);
-extern void	fbcon_sis_copyarea(struct fb_info *info,
-				const struct fb_copyarea *area);
-extern int	fbcon_sis_sync(struct fb_info *info);
-
-/* Internal 2D accelerator functions */
-extern int	sisfb_initaccel(struct sis_video_info *ivideo);
-extern void	sisfb_syncaccel(struct sis_video_info *ivideo);
-
-/* Internal general routines */
-static void	sisfb_search_mode(char *name, bool quiet);
-static int	sisfb_validate_mode(struct sis_video_info *ivideo, int modeindex, u32 vbflags);
-static u8	sisfb_search_refresh_rate(struct sis_video_info *ivideo, unsigned int rate,
-				int index);
-static int	sisfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-				unsigned blue, unsigned transp,
-				struct fb_info *fb_info);
-static int	sisfb_do_set_var(struct fb_var_screeninfo *var, int isactive,
-				struct fb_info *info);
-static void	sisfb_pre_setmode(struct sis_video_info *ivideo);
-static void	sisfb_post_setmode(struct sis_video_info *ivideo);
-static bool	sisfb_CheckVBRetrace(struct sis_video_info *ivideo);
-static bool	sisfbcheckvretracecrt2(struct sis_video_info *ivideo);
-static bool	sisfbcheckvretracecrt1(struct sis_video_info *ivideo);
-static bool	sisfb_bridgeisslave(struct sis_video_info *ivideo);
-static void	sisfb_detect_VB_connect(struct sis_video_info *ivideo);
-static void	sisfb_get_VB_type(struct sis_video_info *ivideo);
-static void	sisfb_set_TVxposoffset(struct sis_video_info *ivideo, int val);
-static void	sisfb_set_TVyposoffset(struct sis_video_info *ivideo, int val);
-#ifdef CONFIG_FB_SIS_300
-unsigned int	sisfb_read_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-void		sisfb_write_nbridge_pci_dword(struct SiS_Private *SiS_Pr, int reg, unsigned int val);
-unsigned int	sisfb_read_lpc_pci_dword(struct SiS_Private *SiS_Pr, int reg);
-#endif
-#ifdef CONFIG_FB_SIS_315
-void		sisfb_write_nbridge_pci_byte(struct SiS_Private *SiS_Pr, int reg, unsigned char val);
-unsigned int	sisfb_read_mio_pci_word(struct SiS_Private *SiS_Pr, int reg);
-#endif
-
-/* SiS-specific exported functions */
-void			sis_malloc(struct sis_memreq *req);
-void			sis_malloc_new(struct pci_dev *pdev, struct sis_memreq *req);
-void			sis_free(u32 base);
-void			sis_free_new(struct pci_dev *pdev, u32 base);
-
-/* Internal heap routines */
-static int		sisfb_heap_init(struct sis_video_info *ivideo);
-static struct SIS_OH *	sisfb_poh_new_node(struct SIS_HEAP *memheap);
-static struct SIS_OH *	sisfb_poh_allocate(struct SIS_HEAP *memheap, u32 size);
-static void		sisfb_delete_node(struct SIS_OH *poh);
-static void		sisfb_insert_node(struct SIS_OH *pohList, struct SIS_OH *poh);
-static struct SIS_OH *	sisfb_poh_free(struct SIS_HEAP *memheap, u32 base);
-static void		sisfb_free_node(struct SIS_HEAP *memheap, struct SIS_OH *poh);
-
-/* Routines from init.c/init301.c */
-extern unsigned short	SiS_GetModeID_LCD(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, bool FSTN, unsigned short CustomT,
-				int LCDwith, int LCDheight, unsigned int VBFlags2);
-extern unsigned short	SiS_GetModeID_TV(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-extern unsigned short	SiS_GetModeID_VGA2(int VGAEngine, unsigned int VBFlags, int HDisplay,
-				int VDisplay, int Depth, unsigned int VBFlags2);
-extern void		SiSRegInit(struct SiS_Private *SiS_Pr, SISIOADDRESS BaseAddr);
-extern bool		SiSSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-extern void		SiS_SetEnableDstn(struct SiS_Private *SiS_Pr, int enable);
-extern void		SiS_SetEnableFstn(struct SiS_Private *SiS_Pr, int enable);
-
-extern bool		SiSDetermineROMLayout661(struct SiS_Private *SiS_Pr);
-
-extern bool		sisfb_gettotalfrommode(struct SiS_Private *SiS_Pr, unsigned char modeno,
-				int *htotal, int *vtotal, unsigned char rateindex);
-extern int		sisfb_mode_rate_to_dclock(struct SiS_Private *SiS_Pr,
-				unsigned char modeno, unsigned char rateindex);
-extern int		sisfb_mode_rate_to_ddata(struct SiS_Private *SiS_Pr, unsigned char modeno,
-				unsigned char rateindex, struct fb_var_screeninfo *var);
-
-/* Chrontel TV, DDC and DPMS functions */
-extern unsigned short	SiS_GetCH700x(struct SiS_Private *SiS_Pr, unsigned short reg);
-extern void		SiS_SetCH700x(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-extern unsigned short	SiS_GetCH701x(struct SiS_Private *SiS_Pr, unsigned short reg);
-extern void		SiS_SetCH701x(struct SiS_Private *SiS_Pr, unsigned short reg, unsigned char val);
-extern void		SiS_SetCH70xxANDOR(struct SiS_Private *SiS_Pr, unsigned short reg,
-				unsigned char myor, unsigned char myand);
-extern void		SiS_DDC2Delay(struct SiS_Private *SiS_Pr, unsigned int delaytime);
-extern void		SiS_SetChrontelGPIO(struct SiS_Private *SiS_Pr, unsigned short myvbinfo);
-extern unsigned short	SiS_HandleDDC(struct SiS_Private *SiS_Pr, unsigned int VBFlags, int VGAEngine,
-				unsigned short adaptnum, unsigned short DDCdatatype, unsigned char *buffer,
-				unsigned int VBFlags2);
-extern unsigned short	SiS_ReadDDC1Bit(struct SiS_Private *SiS_Pr);
-#ifdef CONFIG_FB_SIS_315
-extern void		SiS_Chrontel701xBLOn(struct SiS_Private *SiS_Pr);
-extern void		SiS_Chrontel701xBLOff(struct SiS_Private *SiS_Pr);
-#endif
-extern void		SiS_SiS30xBLOn(struct SiS_Private *SiS_Pr);
-extern void		SiS_SiS30xBLOff(struct SiS_Private *SiS_Pr);
-#endif
-
-
diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index 8db7085e5d1a..22b606af0a87 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c
@@ -1293,7 +1293,6 @@ static struct fb_ops ufx_ops = {
  * Assumes no active clients have framebuffer open */
 static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
 {
-	int retval = -ENOMEM;
 	int old_len = info->fix.smem_len;
 	int new_len;
 	unsigned char *old_fb = info->screen_base;
@@ -1308,10 +1307,8 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
 		 * Alloc system memory for virtual framebuffer
 		 */
 		new_fb = vmalloc(new_len);
-		if (!new_fb) {
-			pr_err("Virtual framebuffer alloc failed");
-			goto error;
-		}
+		if (!new_fb)
+			return -ENOMEM;
 
 		if (info->screen_base) {
 			memcpy(new_fb, old_fb, old_len);
@@ -1323,11 +1320,7 @@ static int ufx_realloc_framebuffer(struct ufx_data *dev, struct fb_info *info)
 		info->fix.smem_start = (unsigned long) new_fb;
 		info->flags = smscufx_info_flags;
 	}
-
-	retval = 0;
-
-error:
-	return retval;
+	return 0;
 }
 
 /* sets up I2C Controller for 100 Kbps, std. speed, 7-bit addr, master,
@@ -1620,8 +1613,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
 {
 	struct usb_device *usbdev;
 	struct ufx_data *dev;
-	struct fb_info *info = NULL;
-	int retval = -ENOMEM;
+	struct fb_info *info;
+	int retval;
 	u32 id_rev, fpga_rev;
 
 	/* usb initialization */
@@ -1631,7 +1624,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (dev == NULL) {
 		dev_err(&usbdev->dev, "ufx_usb_probe: failed alloc of dev struct\n");
-		goto error;
+		return -ENOMEM;
 	}
 
 	/* we need to wait for both usb and fbdev to spin down on disconnect */
@@ -1652,9 +1645,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	dev_dbg(dev->gdev, "fb_defio enable=%d\n", fb_defio);
 
 	if (!ufx_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) {
-		retval = -ENOMEM;
 		dev_err(dev->gdev, "ufx_alloc_urb_list failed\n");
-		goto error;
+		goto e_nomem;
 	}
 
 	/* We don't register a new USB class. Our client interface is fbdev */
@@ -1662,9 +1654,8 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	/* allocates framebuffer driver structure, not framebuffer memory */
 	info = framebuffer_alloc(0, &usbdev->dev);
 	if (!info) {
-		retval = -ENOMEM;
 		dev_err(dev->gdev, "framebuffer_alloc failed\n");
-		goto error;
+		goto e_nomem;
 	}
 
 	dev->info = info;
@@ -1675,7 +1666,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	retval = fb_alloc_cmap(&info->cmap, 256, 0);
 	if (retval < 0) {
 		dev_err(dev->gdev, "fb_alloc_cmap failed %x\n", retval);
-		goto error;
+		goto destroy_modedb;
 	}
 
 	INIT_DELAYED_WORK(&dev->free_framebuffer_work,
@@ -1736,26 +1727,20 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	return 0;
 
 error:
-	if (dev) {
-		if (info) {
-			if (info->cmap.len != 0)
-				fb_dealloc_cmap(&info->cmap);
-			if (info->monspecs.modedb)
-				fb_destroy_modedb(info->monspecs.modedb);
-			vfree(info->screen_base);
-
-			fb_destroy_modelist(&info->modelist);
-
-			framebuffer_release(info);
-		}
-
-		kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
-		kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
-
-		/* dev has been deallocated. Do not dereference */
-	}
-
+	fb_dealloc_cmap(&info->cmap);
+destroy_modedb:
+	fb_destroy_modedb(info->monspecs.modedb);
+	vfree(info->screen_base);
+	fb_destroy_modelist(&info->modelist);
+	framebuffer_release(info);
+put_ref:
+	kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
+	kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
 	return retval;
+
+e_nomem:
+	retval = -ENOMEM;
+	goto put_ref;
 }
 
 static void ufx_usb_disconnect(struct usb_interface *interface)
diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index f599520374dd..6439231f2db2 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -628,7 +628,8 @@ static int ssd1307fb_probe(struct i2c_client *client,
 		goto fb_alloc_error;
 	}
 
-	ssd1307fb_defio = devm_kzalloc(&client->dev, sizeof(struct fb_deferred_io), GFP_KERNEL);
+	ssd1307fb_defio = devm_kzalloc(&client->dev, sizeof(*ssd1307fb_defio),
+				       GFP_KERNEL);
 	if (!ssd1307fb_defio) {
 		dev_err(&client->dev, "Couldn't allocate deferred io.\n");
 		ret = -ENOMEM;
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 3c2e4cabc08f..045e8afe398b 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -1126,10 +1126,8 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
 	int bpp, xres, yres;
 
 	fb = kzalloc(sizeof(*fb), GFP_ATOMIC);
-	if (!fb) {
-		printk(KERN_ERR "stifb: Could not allocate stifb structure\n");
-		return -ENODEV;
-	}
+	if (!fb)
+		return -ENOMEM;
 	
 	info = &fb->info;
 
diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c
index 452a4207ac1b..f365d4862015 100644
--- a/drivers/video/fbdev/udlfb.c
+++ b/drivers/video/fbdev/udlfb.c
@@ -428,7 +428,6 @@ static void dlfb_compress_hline(
 	const uint16_t *pixel = *pixel_start_ptr;
 	uint32_t dev_addr  = *device_address_ptr;
 	uint8_t *cmd = *command_buffer_ptr;
-	const int bpp = 2;
 
 	while ((pixel_end > pixel) &&
 	       (cmd_buffer_end - MIN_RLX_CMD_BYTES > cmd)) {
@@ -441,9 +440,9 @@ static void dlfb_compress_hline(
 
 		*cmd++ = 0xAF;
 		*cmd++ = 0x6B;
-		*cmd++ = (uint8_t) ((dev_addr >> 16) & 0xFF);
-		*cmd++ = (uint8_t) ((dev_addr >> 8) & 0xFF);
-		*cmd++ = (uint8_t) ((dev_addr) & 0xFF);
+		*cmd++ = dev_addr >> 16;
+		*cmd++ = dev_addr >> 8;
+		*cmd++ = dev_addr;
 
 		cmd_pixels_count_byte = cmd++; /*  we'll know this later */
 		cmd_pixel_start = pixel;
@@ -453,15 +452,15 @@ static void dlfb_compress_hline(
 
 		cmd_pixel_end = pixel + min(MAX_CMD_PIXELS + 1,
 			min((int)(pixel_end - pixel),
-			    (int)(cmd_buffer_end - cmd) / bpp));
+			    (int)(cmd_buffer_end - cmd) / BPP));
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * BPP);
 
 		while (pixel < cmd_pixel_end) {
 			const uint16_t * const repeating_pixel = pixel;
 
-			*(uint16_t *)cmd = cpu_to_be16p(pixel);
-			cmd += 2;
+			*cmd++ = *pixel >> 8;
+			*cmd++ = *pixel;
 			pixel++;
 
 			if (unlikely((pixel < cmd_pixel_end) &&
@@ -490,7 +489,7 @@ static void dlfb_compress_hline(
 		}
 
 		*cmd_pixels_count_byte = (pixel - cmd_pixel_start) & 0xFF;
-		dev_addr += (pixel - cmd_pixel_start) * bpp;
+		dev_addr += (pixel - cmd_pixel_start) * BPP;
 	}
 
 	if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) {
@@ -1136,7 +1135,6 @@ static struct fb_ops dlfb_ops = {
  */
 static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info)
 {
-	int retval = -ENOMEM;
 	int old_len = info->fix.smem_len;
 	int new_len;
 	unsigned char *old_fb = info->screen_base;
@@ -1152,7 +1150,7 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
 		new_fb = vmalloc(new_len);
 		if (!new_fb) {
 			dev_err(info->dev, "Virtual framebuffer alloc failed\n");
-			goto error;
+			return -ENOMEM;
 		}
 
 		if (info->screen_base) {
@@ -1181,11 +1179,7 @@ static int dlfb_realloc_framebuffer(struct dlfb_data *dlfb, struct fb_info *info
 			dlfb->backing_buffer = new_back;
 		}
 	}
-
-	retval = 0;
-
-error:
-	return retval;
+	return 0;
 }
 
 /*
@@ -1531,15 +1525,16 @@ static int dlfb_parse_vendor_descriptor(struct dlfb_data *dlfb,
 			u8 length;
 			u16 key;
 
-			key = le16_to_cpu(*((u16 *) desc));
-			desc += sizeof(u16);
-			length = *desc;
-			desc++;
+			key = *desc++;
+			key |= (u16)*desc++ << 8;
+			length = *desc++;
 
 			switch (key) {
 			case 0x0200: { /* max_area */
-				u32 max_area;
-				max_area = le32_to_cpu(*((u32 *)desc));
+				u32 max_area = *desc++;
+				max_area |= (u32)*desc++ << 8;
+				max_area |= (u32)*desc++ << 16;
+				max_area |= (u32)*desc++ << 24;
 				dev_warn(&intf->dev,
 					 "DL chip limited to %d pixel modes\n",
 					 max_area);
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 6f8d444eb0e3..5172fa581147 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -651,7 +651,7 @@ static int vmlfb_check_var_locked(struct fb_var_screeninfo *var,
 	}
 
 	pitch = ALIGN((var->xres * var->bits_per_pixel) >> 3, 0x40);
-	mem = pitch * var->yres_virtual;
+	mem = (u64)pitch * var->yres_virtual;
 	if (mem > vinfo->vram_contig_size) {
 		return -ENOMEM;
 	}
diff --git a/drivers/video/fbdev/via/via_aux_sii164.c b/drivers/video/fbdev/via/via_aux_sii164.c
index ca1b35f033b1..c27f62c2c75a 100644
--- a/drivers/video/fbdev/via/via_aux_sii164.c
+++ b/drivers/video/fbdev/via/via_aux_sii164.c
@@ -36,7 +36,7 @@ static void probe(struct via_aux_bus *bus, u8 addr)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x01, 0x00, 0x06, 0x00}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/fbdev/via/via_aux_vt1631.c b/drivers/video/fbdev/via/via_aux_vt1631.c
index 06e742f1f723..32978a0ccfd7 100644
--- a/drivers/video/fbdev/via/via_aux_vt1631.c
+++ b/drivers/video/fbdev/via/via_aux_vt1631.c
@@ -36,7 +36,7 @@ void via_aux_vt1631_probe(struct via_aux_bus *bus)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x06, 0x11, 0x91, 0x31}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/fbdev/via/via_aux_vt1632.c b/drivers/video/fbdev/via/via_aux_vt1632.c
index d24f4cd97401..cec8cc43d524 100644
--- a/drivers/video/fbdev/via/via_aux_vt1632.c
+++ b/drivers/video/fbdev/via/via_aux_vt1632.c
@@ -36,7 +36,7 @@ static void probe(struct via_aux_bus *bus, u8 addr)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x06, 0x11, 0x92, 0x31}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/fbdev/via/via_aux_vt1636.c b/drivers/video/fbdev/via/via_aux_vt1636.c
index 9e015c101d4d..2b10bc21ab79 100644
--- a/drivers/video/fbdev/via/via_aux_vt1636.c
+++ b/drivers/video/fbdev/via/via_aux_vt1636.c
@@ -36,7 +36,7 @@ void via_aux_vt1636_probe(struct via_aux_bus *bus)
 		.name	=	name};
 	/* check vendor id and device id */
 	const u8 id[] = {0x06, 0x11, 0x45, 0x33}, len = ARRAY_SIZE(id);
-	u8 tmp[len];
+	u8 tmp[ARRAY_SIZE(id)];
 
 	if (!via_aux_read(&drv, 0x00, tmp, len) || memcmp(id, tmp, len))
 		return;
diff --git a/drivers/video/of_display_timing.c b/drivers/video/of_display_timing.c
index 8ce0a99bf17c..83b8963c9657 100644
--- a/drivers/video/of_display_timing.c
+++ b/drivers/video/of_display_timing.c
@@ -244,23 +244,3 @@ dispfail:
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(of_get_display_timings);
-
-/**
- * of_display_timings_exist - check if a display-timings node is provided
- * @np: device_node with the timing
- **/
-int of_display_timings_exist(const struct device_node *np)
-{
-	struct device_node *timings_np;
-
-	if (!np)
-		return -EINVAL;
-
-	timings_np = of_parse_phandle(np, "display-timings", 0);
-	if (!timings_np)
-		return -EINVAL;
-
-	of_node_put(timings_np);
-	return 1;
-}
-EXPORT_SYMBOL_GPL(of_display_timings_exist);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index dfe5684000be..6b237e3f4983 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -272,6 +272,12 @@ static unsigned int update_balloon_stats(struct virtio_balloon *vb)
 				pages_to_bytes(events[PSWPOUT]));
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+#ifdef CONFIG_HUGETLB_PAGE
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGALLOC,
+		    events[HTLB_BUDDY_PGALLOC]);
+	update_stat(vb, idx++, VIRTIO_BALLOON_S_HTLB_PGFAIL,
+		    events[HTLB_BUDDY_PGALLOC_FAIL]);
+#endif
 #endif
 	update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
 				pages_to_bytes(i.freeram));
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index 23e391d3ec01..b29f4e40851f 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -53,6 +53,8 @@ static unsigned long *acpi_ids_done;
 static unsigned long *acpi_id_present;
 /* And if there is an _CST definition (or a PBLK) for the ACPI IDs */
 static unsigned long *acpi_id_cst_present;
+/* Which ACPI P-State dependencies for a enumerated processor */
+static struct acpi_psd_package *acpi_psd;
 
 static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
 {
@@ -362,9 +364,9 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
 	}
 	/* There are more ACPI Processor objects than in x2APIC or MADT.
 	 * This can happen with incorrect ACPI SSDT declerations. */
-	if (acpi_id > nr_acpi_bits) {
-		pr_debug("We only have %u, trying to set %u\n",
-			 nr_acpi_bits, acpi_id);
+	if (acpi_id >= nr_acpi_bits) {
+		pr_debug("max acpi id %u, trying to set %u\n",
+			 nr_acpi_bits - 1, acpi_id);
 		return AE_OK;
 	}
 	/* OK, There is a ACPI Processor object */
@@ -372,6 +374,13 @@ read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
 
 	pr_debug("ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, (unsigned long)pblk);
 
+	/* It has P-state dependencies */
+	if (!acpi_processor_get_psd(handle, &acpi_psd[acpi_id])) {
+		pr_debug("ACPI CPU%u w/ PST:coord_type = %llu domain = %llu\n",
+			 acpi_id, acpi_psd[acpi_id].coord_type,
+			 acpi_psd[acpi_id].domain);
+	}
+
 	status = acpi_evaluate_object(handle, "_CST", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
 		if (!pblk)
@@ -405,6 +414,14 @@ static int check_acpi_ids(struct acpi_processor *pr_backup)
 		return -ENOMEM;
 	}
 
+	acpi_psd = kcalloc(nr_acpi_bits, sizeof(struct acpi_psd_package),
+			   GFP_KERNEL);
+	if (!acpi_psd) {
+		kfree(acpi_id_present);
+		kfree(acpi_id_cst_present);
+		return -ENOMEM;
+	}
+
 	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
 			    ACPI_UINT32_MAX,
 			    read_acpi_id, NULL, NULL, NULL);
@@ -417,6 +434,12 @@ upload:
 			pr_backup->acpi_id = i;
 			/* Mask out C-states if there are no _CST or PBLK */
 			pr_backup->flags.power = test_bit(i, acpi_id_cst_present);
+			/* num_entries is non-zero if we evaluated _PSD */
+			if (acpi_psd[i].num_entries) {
+				memcpy(&pr_backup->performance->domain_info,
+				       &acpi_psd[i],
+				       sizeof(struct acpi_psd_package));
+			}
 			(void)upload_pm_data(pr_backup);
 		}
 	}
@@ -566,6 +589,7 @@ static void __exit xen_acpi_processor_exit(void)
 	kfree(acpi_ids_done);
 	kfree(acpi_id_present);
 	kfree(acpi_id_cst_present);
+	kfree(acpi_psd);
 	for_each_possible_cpu(i)
 		acpi_processor_unregister_performance(i);
 
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index a493e99bed21..0d6d9264d6a9 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -365,7 +365,7 @@ void xenbus_dev_queue_reply(struct xb_req_data *req)
 			if (WARN_ON(rc))
 				goto out;
 		}
-	} else if (req->msg.type == XS_TRANSACTION_END) {
+	} else if (req->type == XS_TRANSACTION_END) {
 		trans = xenbus_get_transaction(u, req->msg.tx_id);
 		if (WARN_ON(!trans))
 			goto out;
@@ -429,6 +429,10 @@ static int xenbus_write_transaction(unsigned msg_type,
 {
 	int rc;
 	struct xenbus_transaction_holder *trans = NULL;
+	struct {
+		struct xsd_sockmsg hdr;
+		char body[];
+	} *msg = (void *)u->u.buffer;
 
 	if (msg_type == XS_TRANSACTION_START) {
 		trans = kzalloc(sizeof(*trans), GFP_KERNEL);
@@ -437,11 +441,15 @@ static int xenbus_write_transaction(unsigned msg_type,
 			goto out;
 		}
 		list_add(&trans->list, &u->transactions);
-	} else if (u->u.msg.tx_id != 0 &&
-		   !xenbus_get_transaction(u, u->u.msg.tx_id))
+	} else if (msg->hdr.tx_id != 0 &&
+		   !xenbus_get_transaction(u, msg->hdr.tx_id))
 		return xenbus_command_reply(u, XS_ERROR, "ENOENT");
+	else if (msg_type == XS_TRANSACTION_END &&
+		 !(msg->hdr.len == 2 &&
+		   (!strcmp(msg->body, "T") || !strcmp(msg->body, "F"))))
+		return xenbus_command_reply(u, XS_ERROR, "EINVAL");
 
-	rc = xenbus_dev_request_and_reply(&u->u.msg, u);
+	rc = xenbus_dev_request_and_reply(&msg->hdr, u);
 	if (rc && trans) {
 		list_del(&trans->list);
 		kfree(trans);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 3f3b29398ab8..49a3874ae6bb 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -140,7 +140,9 @@ void xs_request_exit(struct xb_req_data *req)
 	spin_lock(&xs_state_lock);
 	xs_state_users--;
 	if ((req->type == XS_TRANSACTION_START && req->msg.type == XS_ERROR) ||
-	    req->type == XS_TRANSACTION_END)
+	    (req->type == XS_TRANSACTION_END &&
+	     !WARN_ON_ONCE(req->msg.type == XS_ERROR &&
+			   !strcmp(req->body, "ENOENT"))))
 		xs_state_users--;
 	spin_unlock(&xs_state_lock);
 
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9370e2feb999..dbc3c0b0142d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -570,10 +570,11 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		_debug("wback %lx", page->index);
 
-		/* at this point we hold neither mapping->tree_lock nor lock on
-		 * the page itself: the page may be truncated or invalidated
-		 * (changing page->mapping to NULL), or even swizzled back from
-		 * swapper_space to tmpfs file mapping
+		/*
+		 * at this point we hold neither the i_pages lock nor the
+		 * page lock: the page may be truncated or invalidated
+		 * (changing page->mapping to NULL), or even swizzled
+		 * back from swapper_space to tmpfs file mapping
 		 */
 		ret = lock_page_killable(page);
 		if (ret < 0) {
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index a0c57c37fa21..be9c3dc048ab 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -19,9 +19,6 @@
  */
 static autofs_wqt_t autofs4_next_wait_queue = 1;
 
-/* These are the signals we allow interrupting a pending mount */
-#define SHUTDOWN_SIGS	(sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
-
 void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
 {
 	struct autofs_wait_queue *wq, *nwq;
@@ -486,29 +483,7 @@ int autofs4_wait(struct autofs_sb_info *sbi,
 	 * wq->name.name is NULL iff the lock is already released
 	 * or the mount has been made catatonic.
 	 */
-	if (wq->name.name) {
-		/* Block all but "shutdown" signals while waiting */
-		unsigned long shutdown_sigs_mask;
-		unsigned long irqflags;
-		sigset_t oldset;
-
-		spin_lock_irqsave(&current->sighand->siglock, irqflags);
-		oldset = current->blocked;
-		shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0];
-		siginitsetinv(&current->blocked, shutdown_sigs_mask);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
-
-		wait_event_interruptible(wq->queue, wq->name.name == NULL);
-
-		spin_lock_irqsave(&current->sighand->siglock, irqflags);
-		current->blocked = oldset;
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
-	} else {
-		pr_debug("skipped sleeping\n");
-	}
-
+	wait_event_killable(wq->queue, wq->name.name == NULL);
 	status = wq->status;
 
 	/*
@@ -574,7 +549,7 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
 	kfree(wq->name.name);
 	wq->name.name = NULL;	/* Do not wait on this queue */
 	wq->status = status;
-	wake_up_interruptible(&wq->queue);
+	wake_up(&wq->queue);
 	if (!--wq->wait_ctr)
 		kfree(wq);
 	mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ce1824f47ba6..c3deb2e35f20 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -330,6 +330,7 @@ beyond_if:
 #ifdef __alpha__
 	regs->gp = ex.a_gpvalue;
 #endif
+	finalize_exec(bprm);
 	start_thread(regs, ex.a_entry, current->mm->start_stack);
 	return 0;
 }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bdb201230bae..41e04183e4ce 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -377,6 +377,11 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
 	} else
 		map_addr = vm_mmap(filep, addr, size, prot, type, off);
 
+	if ((type & MAP_FIXED_NOREPLACE) && BAD_ADDR(map_addr))
+		pr_info("%d (%s): Uhuuh, elf segment at %p requested but the memory is mapped already\n",
+				task_pid_nr(current), current->comm,
+				(void *)addr);
+
 	return(map_addr);
 }
 
@@ -575,7 +580,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 				elf_prot |= PROT_EXEC;
 			vaddr = eppnt->p_vaddr;
 			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
-				elf_type |= MAP_FIXED;
+				elf_type |= MAP_FIXED_NOREPLACE;
 			else if (no_base && interp_elf_ex->e_type == ET_DYN)
 				load_addr = -vaddr;
 
@@ -890,7 +895,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
 	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
-		int elf_prot = 0, elf_flags;
+		int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
 		unsigned long k, vaddr;
 		unsigned long total_size = 0;
 
@@ -922,6 +927,13 @@ static int load_elf_binary(struct linux_binprm *bprm)
 					 */
 				}
 			}
+
+			/*
+			 * Some binaries have overlapping elf segments and then
+			 * we have to forcefully map over an existing mapping
+			 * e.g. over this newly established brk mapping.
+			 */
+			elf_fixed = MAP_FIXED;
 		}
 
 		if (elf_ppnt->p_flags & PF_R)
@@ -939,7 +951,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		 * the ET_DYN load_addr calculations, proceed normally.
 		 */
 		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
-			elf_flags |= MAP_FIXED;
+			elf_flags |= elf_fixed;
 		} else if (loc->elf_ex.e_type == ET_DYN) {
 			/*
 			 * This logic is run once for the first LOAD Program
@@ -975,7 +987,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 				load_bias = ELF_ET_DYN_BASE;
 				if (current->flags & PF_RANDOMIZE)
 					load_bias += arch_mmap_rnd();
-				elf_flags |= MAP_FIXED;
+				elf_flags |= elf_fixed;
 			} else
 				load_bias = 0;
 
@@ -1155,6 +1167,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	ELF_PLAT_INIT(regs, reloc_func_desc);
 #endif
 
+	finalize_exec(bprm);
 	start_thread(regs, elf_entry, bprm->p);
 	retval = 0;
 out:
@@ -1234,7 +1247,7 @@ static int load_elf_library(struct file *file)
 			(eppnt->p_filesz +
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
 			PROT_READ | PROT_WRITE | PROT_EXEC,
-			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
+			MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
 			(eppnt->p_offset -
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 429326b6e2e7..d90993adeffa 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -463,6 +463,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 			    dynaddr);
 #endif
 
+	finalize_exec(bprm);
 	/* everything is now ready... get the userspace context ready to roll */
 	entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
 	start_thread(regs, entryaddr, current->mm->start_stack);
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 5d6b94475f27..82a48e830018 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -994,6 +994,7 @@ static int load_flat_binary(struct linux_binprm *bprm)
 	FLAT_PLAT_INIT(regs);
 #endif
 
+	finalize_exec(bprm);
 	pr_debug("start_thread(regs=0x%p, entry=0x%lx, start_stack=0x%lx)\n",
 		 regs, start_addr, current->mm->start_stack);
 	start_thread(regs, start_addr, current->mm->start_stack);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7a506c55a993..7ec920e27065 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1948,11 +1948,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 static int blkdev_writepages(struct address_space *mapping,
 			     struct writeback_control *wbc)
 {
-	if (dax_mapping(mapping)) {
-		struct block_device *bdev = I_BDEV(mapping->host);
-
-		return dax_writeback_mapping_range(mapping, bdev, wbc);
-	}
 	return generic_writepages(mapping, wbc);
 }
 
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 562c3e633403..578181cd96b5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -458,7 +458,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 			break;
 
 		rcu_read_lock();
-		page = radix_tree_lookup(&mapping->page_tree, pg_index);
+		page = radix_tree_lookup(&mapping->i_pages, pg_index);
 		rcu_read_unlock();
 		if (page && !radix_tree_exceptional_entry(page)) {
 			misses++;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 47a8fe9d22e8..cf87976e389d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3963,11 +3963,11 @@ retry:
 
 			done_index = page->index;
 			/*
-			 * At this point we hold neither mapping->tree_lock nor
-			 * lock on the page itself: the page may be truncated or
-			 * invalidated (changing page->mapping to NULL), or even
-			 * swizzled back from swapper_space to tmpfs file
-			 * mapping
+			 * At this point we hold neither the i_pages lock nor
+			 * the page lock: the page may be truncated or
+			 * invalidated (changing page->mapping to NULL),
+			 * or even swizzled back from swapper_space to
+			 * tmpfs file mapping
 			 */
 			if (!trylock_page(page)) {
 				flush_write_bio(epd);
@@ -5174,13 +5174,13 @@ void clear_extent_buffer_dirty(struct extent_buffer *eb)
 		WARN_ON(!PagePrivate(page));
 
 		clear_page_dirty_for_io(page);
-		spin_lock_irq(&page->mapping->tree_lock);
+		xa_lock_irq(&page->mapping->i_pages);
 		if (!PageDirty(page)) {
-			radix_tree_tag_clear(&page->mapping->page_tree,
+			radix_tree_tag_clear(&page->mapping->i_pages,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
 		}
-		spin_unlock_irq(&page->mapping->tree_lock);
+		xa_unlock_irq(&page->mapping->i_pages);
 		ClearPageError(page);
 		unlock_page(page);
 	}
diff --git a/fs/buffer.c b/fs/buffer.c
index ec5dd39071e6..f3491074b035 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -185,10 +185,9 @@ EXPORT_SYMBOL(end_buffer_write_sync);
  * we get exclusion from try_to_free_buffers with the blockdev mapping's
  * private_lock.
  *
- * Hack idea: for the blockdev mapping, i_bufferlist_lock contention
+ * Hack idea: for the blockdev mapping, private_lock contention
  * may be quite high.  This code could TryLock the page, and if that
- * succeeds, there is no need to take private_lock. (But if
- * private_lock is contended then so is mapping->tree_lock).
+ * succeeds, there is no need to take private_lock.
  */
 static struct buffer_head *
 __find_get_block_slow(struct block_device *bdev, sector_t block)
@@ -594,20 +593,21 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
  *
  * The caller must hold lock_page_memcg().
  */
-static void __set_page_dirty(struct page *page, struct address_space *mapping,
+void __set_page_dirty(struct page *page, struct address_space *mapping,
 			     int warn)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	if (page->mapping) {	/* Race with truncate? */
 		WARN_ON_ONCE(warn && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
-		radix_tree_tag_set(&mapping->page_tree,
+		radix_tree_tag_set(&mapping->i_pages,
 				page_index(page), PAGECACHE_TAG_DIRTY);
 	}
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 }
+EXPORT_SYMBOL_GPL(__set_page_dirty);
 
 /*
  * Add a page to the dirty page list.
@@ -1095,7 +1095,7 @@ __getblk_slow(struct block_device *bdev, sector_t block,
  * inode list.
  *
  * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
- * mapping->tree_lock and mapping->host->i_lock.
+ * i_pages lock and mapping->host->i_lock.
  */
 void mark_buffer_dirty(struct buffer_head *bh)
 {
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index 174f5709e508..a699e320393f 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_CEPH_FS) += ceph.o
 
 ceph-y := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \
-	export.o caps.o snap.o xattr.o \
+	export.o caps.o snap.o xattr.o quota.o \
 	mds_client.o mdsmap.o strings.o ceph_frag.o \
 	debugfs.o
 
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b4336b42ce3b..5f7ad3d0df2e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -15,6 +15,7 @@
 #include "mds_client.h"
 #include "cache.h"
 #include <linux/ceph/osd_client.h>
+#include <linux/ceph/striper.h>
 
 /*
  * Ceph address space ops.
@@ -438,7 +439,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_file_info *ci = file->private_data;
+	struct ceph_file_info *fi = file->private_data;
 	struct ceph_rw_context *rw_ctx;
 	int rc = 0;
 	int max = 0;
@@ -452,7 +453,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	if (rc == 0)
 		goto out;
 
-	rw_ctx = ceph_find_rw_context(ci);
+	rw_ctx = ceph_find_rw_context(fi);
 	max = fsc->mount_options->rsize >> PAGE_SHIFT;
 	dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
 	     inode, file, rw_ctx, nr_pages, max);
@@ -800,7 +801,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	struct ceph_osd_request *req = NULL;
 	struct ceph_writeback_ctl ceph_wbc;
 	bool should_loop, range_whole = false;
-	bool stop, done = false;
+	bool done = false;
 
 	dout("writepages_start %p (mode=%s)\n", inode,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
@@ -856,7 +857,7 @@ retry:
 		 * in that range can be associated with newer snapc.
 		 * They are not writeable until we write all dirty pages
 		 * associated with 'snapc' get written */
-		if (index > 0 || wbc->sync_mode != WB_SYNC_NONE)
+		if (index > 0)
 			should_loop = true;
 		dout(" non-head snapc, range whole\n");
 	}
@@ -864,8 +865,7 @@ retry:
 	ceph_put_snap_context(last_snapc);
 	last_snapc = snapc;
 
-	stop = false;
-	while (!stop && index <= end) {
+	while (!done && index <= end) {
 		int num_ops = 0, op_idx;
 		unsigned i, pvec_pages, max_pages, locked_pages = 0;
 		struct page **pages = NULL, **data_pages;
@@ -898,16 +898,30 @@ get_more_pages:
 				unlock_page(page);
 				continue;
 			}
-			if (strip_unit_end && (page->index > strip_unit_end)) {
-				dout("end of strip unit %p\n", page);
+			/* only if matching snap context */
+			pgsnapc = page_snap_context(page);
+			if (pgsnapc != snapc) {
+				dout("page snapc %p %lld != oldest %p %lld\n",
+				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
+				if (!should_loop &&
+				    !ceph_wbc.head_snapc &&
+				    wbc->sync_mode != WB_SYNC_NONE)
+					should_loop = true;
 				unlock_page(page);
-				break;
+				continue;
 			}
 			if (page_offset(page) >= ceph_wbc.i_size) {
 				dout("%p page eof %llu\n",
 				     page, ceph_wbc.i_size);
-				/* not done if range_cyclic */
-				stop = true;
+				if (ceph_wbc.size_stable ||
+				    page_offset(page) >= i_size_read(inode))
+					mapping->a_ops->invalidatepage(page,
+								0, PAGE_SIZE);
+				unlock_page(page);
+				continue;
+			}
+			if (strip_unit_end && (page->index > strip_unit_end)) {
+				dout("end of strip unit %p\n", page);
 				unlock_page(page);
 				break;
 			}
@@ -921,15 +935,6 @@ get_more_pages:
 				wait_on_page_writeback(page);
 			}
 
-			/* only if matching snap context */
-			pgsnapc = page_snap_context(page);
-			if (pgsnapc != snapc) {
-				dout("page snapc %p %lld != oldest %p %lld\n",
-				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
-				unlock_page(page);
-				continue;
-			}
-
 			if (!clear_page_dirty_for_io(page)) {
 				dout("%p !clear_page_dirty_for_io\n", page);
 				unlock_page(page);
@@ -945,19 +950,15 @@ get_more_pages:
 			if (locked_pages == 0) {
 				u64 objnum;
 				u64 objoff;
+				u32 xlen;
 
 				/* prepare async write request */
 				offset = (u64)page_offset(page);
-				len = wsize;
-
-				rc = ceph_calc_file_object_mapping(&ci->i_layout,
-								offset, len,
-								&objnum, &objoff,
-								&len);
-				if (rc < 0) {
-					unlock_page(page);
-					break;
-				}
+				ceph_calc_file_object_mapping(&ci->i_layout,
+							      offset, wsize,
+							      &objnum, &objoff,
+							      &xlen);
+				len = xlen;
 
 				num_ops = 1;
 				strip_unit_end = page->index +
@@ -1146,7 +1147,7 @@ new_request:
 		 * we tagged for writeback prior to entering this loop.
 		 */
 		if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
-			done = stop = true;
+			done = true;
 
 release_pvec_pages:
 		dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index 33a211b364ed..bb524c880b1e 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -51,7 +51,7 @@ static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
 	.type		= FSCACHE_COOKIE_TYPE_INDEX,
 };
 
-int ceph_fscache_register(void)
+int __init ceph_fscache_register(void)
 {
 	return fscache_register_netfs(&ceph_cache_netfs);
 }
@@ -135,7 +135,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
 	if (memcmp(data, &aux, sizeof(aux)) != 0)
 		return FSCACHE_CHECKAUX_OBSOLETE;
 
-	dout("ceph inode 0x%p cached okay", ci);
+	dout("ceph inode 0x%p cached okay\n", ci);
 	return FSCACHE_CHECKAUX_OKAY;
 }
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0e5bd3e3344e..23dbfae16156 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -184,36 +184,54 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 					 mdsc->caps_avail_count);
 	spin_unlock(&mdsc->caps_list_lock);
 
-	for (i = have; i < need; i++) {
-retry:
+	for (i = have; i < need; ) {
 		cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
-		if (!cap) {
-			if (!trimmed) {
-				for (j = 0; j < mdsc->max_sessions; j++) {
-					s = __ceph_lookup_mds_session(mdsc, j);
-					if (!s)
-						continue;
-					mutex_unlock(&mdsc->mutex);
+		if (cap) {
+			list_add(&cap->caps_item, &newcaps);
+			alloc++;
+			i++;
+			continue;
+		}
 
-					mutex_lock(&s->s_mutex);
-					max_caps = s->s_nr_caps - (need - i);
-					ceph_trim_caps(mdsc, s, max_caps);
-					mutex_unlock(&s->s_mutex);
+		if (!trimmed) {
+			for (j = 0; j < mdsc->max_sessions; j++) {
+				s = __ceph_lookup_mds_session(mdsc, j);
+				if (!s)
+					continue;
+				mutex_unlock(&mdsc->mutex);
 
-					ceph_put_mds_session(s);
-					mutex_lock(&mdsc->mutex);
-				}
-				trimmed = true;
-				goto retry;
-			} else {
-				pr_warn("reserve caps ctx=%p ENOMEM "
-					"need=%d got=%d\n",
-					ctx, need, have + alloc);
-				goto out_nomem;
+				mutex_lock(&s->s_mutex);
+				max_caps = s->s_nr_caps - (need - i);
+				ceph_trim_caps(mdsc, s, max_caps);
+				mutex_unlock(&s->s_mutex);
+
+				ceph_put_mds_session(s);
+				mutex_lock(&mdsc->mutex);
 			}
+			trimmed = true;
+
+			spin_lock(&mdsc->caps_list_lock);
+			if (mdsc->caps_avail_count) {
+				int more_have;
+				if (mdsc->caps_avail_count >= need - i)
+					more_have = need - i;
+				else
+					more_have = mdsc->caps_avail_count;
+
+				i += more_have;
+				have += more_have;
+				mdsc->caps_avail_count -= more_have;
+				mdsc->caps_reserve_count += more_have;
+
+			}
+			spin_unlock(&mdsc->caps_list_lock);
+
+			continue;
 		}
-		list_add(&cap->caps_item, &newcaps);
-		alloc++;
+
+		pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
+			ctx, need, have + alloc);
+		goto out_nomem;
 	}
 	BUG_ON(have + alloc != need);
 
@@ -234,16 +252,28 @@ retry:
 	return 0;
 
 out_nomem:
+
+	spin_lock(&mdsc->caps_list_lock);
+	mdsc->caps_avail_count += have;
+	mdsc->caps_reserve_count -= have;
+
 	while (!list_empty(&newcaps)) {
 		cap = list_first_entry(&newcaps,
 				struct ceph_cap, caps_item);
 		list_del(&cap->caps_item);
-		kmem_cache_free(ceph_cap_cachep, cap);
+
+		/* Keep some preallocated caps around (ceph_min_count), to
+		 * avoid lots of free/alloc churn. */
+		if (mdsc->caps_avail_count >=
+		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
+			kmem_cache_free(ceph_cap_cachep, cap);
+		} else {
+			mdsc->caps_avail_count++;
+			mdsc->caps_total_count++;
+			list_add(&cap->caps_item, &mdsc->caps_list);
+		}
 	}
 
-	spin_lock(&mdsc->caps_list_lock);
-	mdsc->caps_avail_count += have;
-	mdsc->caps_reserve_count -= have;
 	BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
 					 mdsc->caps_reserve_count +
 					 mdsc->caps_avail_count);
@@ -254,12 +284,26 @@ out_nomem:
 int ceph_unreserve_caps(struct ceph_mds_client *mdsc,
 			struct ceph_cap_reservation *ctx)
 {
+	int i;
+	struct ceph_cap *cap;
+
 	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
 	if (ctx->count) {
 		spin_lock(&mdsc->caps_list_lock);
 		BUG_ON(mdsc->caps_reserve_count < ctx->count);
 		mdsc->caps_reserve_count -= ctx->count;
-		mdsc->caps_avail_count += ctx->count;
+		if (mdsc->caps_avail_count >=
+		    mdsc->caps_reserve_count + mdsc->caps_min_count) {
+			mdsc->caps_total_count -= ctx->count;
+			for (i = 0; i < ctx->count; i++) {
+				cap = list_first_entry(&mdsc->caps_list,
+					struct ceph_cap, caps_item);
+				list_del(&cap->caps_item);
+				kmem_cache_free(ceph_cap_cachep, cap);
+			}
+		} else {
+			mdsc->caps_avail_count += ctx->count;
+		}
 		ctx->count = 0;
 		dout("unreserve caps %d = %d used + %d resv + %d avail\n",
 		     mdsc->caps_total_count, mdsc->caps_use_count,
@@ -285,7 +329,23 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 			mdsc->caps_use_count++;
 			mdsc->caps_total_count++;
 			spin_unlock(&mdsc->caps_list_lock);
+		} else {
+			spin_lock(&mdsc->caps_list_lock);
+			if (mdsc->caps_avail_count) {
+				BUG_ON(list_empty(&mdsc->caps_list));
+
+				mdsc->caps_avail_count--;
+				mdsc->caps_use_count++;
+				cap = list_first_entry(&mdsc->caps_list,
+						struct ceph_cap, caps_item);
+				list_del(&cap->caps_item);
+
+				BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
+				       mdsc->caps_reserve_count + mdsc->caps_avail_count);
+			}
+			spin_unlock(&mdsc->caps_list_lock);
 		}
+
 		return cap;
 	}
 
@@ -341,6 +401,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 {
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 
+	spin_lock(&mdsc->caps_list_lock);
+
 	if (total)
 		*total = mdsc->caps_total_count;
 	if (avail)
@@ -351,6 +413,8 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
 		*reserved = mdsc->caps_reserve_count;
 	if (min)
 		*min = mdsc->caps_min_count;
+
+	spin_unlock(&mdsc->caps_list_lock);
 }
 
 /*
@@ -639,9 +703,11 @@ void ceph_add_cap(struct inode *inode,
 			}
 
 			spin_lock(&realm->inodes_with_caps_lock);
-			ci->i_snap_realm = realm;
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
+			ci->i_snap_realm = realm;
+			if (realm->ino == ci->i_vino.ino)
+				realm->inode = inode;
 			spin_unlock(&realm->inodes_with_caps_lock);
 
 			if (oldrealm)
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 644def813754..abdf98deeec4 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -260,7 +260,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
-					0600,
+					0400,
 					fsc->client->debugfs_dir,
 					fsc,
 					&mdsmap_show_fops);
@@ -268,7 +268,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions",
-					0600,
+					0400,
 					fsc->client->debugfs_dir,
 					fsc,
 					&mds_sessions_show_fops);
@@ -276,7 +276,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_mdsc = debugfs_create_file("mdsc",
-						0600,
+						0400,
 						fsc->client->debugfs_dir,
 						fsc,
 						&mdsc_show_fops);
@@ -292,7 +292,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 		goto out;
 
 	fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
-					0600,
+					0400,
 					fsc->client->debugfs_dir,
 					fsc,
 					&dentry_lru_show_fops);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 2bdd561c4c68..1a78dd6f8bf2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -101,18 +101,18 @@ static int fpos_cmp(loff_t l, loff_t r)
  * regardless of what dir changes take place on the
  * server.
  */
-static int note_last_dentry(struct ceph_file_info *fi, const char *name,
+static int note_last_dentry(struct ceph_dir_file_info *dfi, const char *name,
 		            int len, unsigned next_offset)
 {
 	char *buf = kmalloc(len+1, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
-	kfree(fi->last_name);
-	fi->last_name = buf;
-	memcpy(fi->last_name, name, len);
-	fi->last_name[len] = 0;
-	fi->next_offset = next_offset;
-	dout("note_last_dentry '%s'\n", fi->last_name);
+	kfree(dfi->last_name);
+	dfi->last_name = buf;
+	memcpy(dfi->last_name, name, len);
+	dfi->last_name[len] = 0;
+	dfi->next_offset = next_offset;
+	dout("note_last_dentry '%s'\n", dfi->last_name);
 	return 0;
 }
 
@@ -174,7 +174,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx,
 static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			    int shared_gen)
 {
-	struct ceph_file_info *fi = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct dentry *parent = file->f_path.dentry;
 	struct inode *dir = d_inode(parent);
 	struct dentry *dentry, *last = NULL;
@@ -221,7 +221,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 		bool emit_dentry = false;
 		dentry = __dcache_find_get_entry(parent, idx++, &cache_ctl);
 		if (!dentry) {
-			fi->flags |= CEPH_F_ATEND;
+			dfi->file_info.flags |= CEPH_F_ATEND;
 			err = 0;
 			break;
 		}
@@ -272,33 +272,33 @@ out:
 	if (last) {
 		int ret;
 		di = ceph_dentry(last);
-		ret = note_last_dentry(fi, last->d_name.name, last->d_name.len,
+		ret = note_last_dentry(dfi, last->d_name.name, last->d_name.len,
 				       fpos_off(di->offset) + 1);
 		if (ret < 0)
 			err = ret;
 		dput(last);
 		/* last_name no longer match cache index */
-		if (fi->readdir_cache_idx >= 0) {
-			fi->readdir_cache_idx = -1;
-			fi->dir_release_count = 0;
+		if (dfi->readdir_cache_idx >= 0) {
+			dfi->readdir_cache_idx = -1;
+			dfi->dir_release_count = 0;
 		}
 	}
 	return err;
 }
 
-static bool need_send_readdir(struct ceph_file_info *fi, loff_t pos)
+static bool need_send_readdir(struct ceph_dir_file_info *dfi, loff_t pos)
 {
-	if (!fi->last_readdir)
+	if (!dfi->last_readdir)
 		return true;
 	if (is_hash_order(pos))
-		return !ceph_frag_contains_value(fi->frag, fpos_hash(pos));
+		return !ceph_frag_contains_value(dfi->frag, fpos_hash(pos));
 	else
-		return fi->frag != fpos_frag(pos);
+		return dfi->frag != fpos_frag(pos);
 }
 
 static int ceph_readdir(struct file *file, struct dir_context *ctx)
 {
-	struct ceph_file_info *fi = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -309,7 +309,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	struct ceph_mds_reply_info_parsed *rinfo;
 
 	dout("readdir %p file %p pos %llx\n", inode, file, ctx->pos);
-	if (fi->flags & CEPH_F_ATEND)
+	if (dfi->file_info.flags & CEPH_F_ATEND)
 		return 0;
 
 	/* always start with . and .. */
@@ -350,15 +350,15 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 	/* proceed with a normal readdir */
 more:
 	/* do we have the correct frag content buffered? */
-	if (need_send_readdir(fi, ctx->pos)) {
+	if (need_send_readdir(dfi, ctx->pos)) {
 		struct ceph_mds_request *req;
 		int op = ceph_snap(inode) == CEPH_SNAPDIR ?
 			CEPH_MDS_OP_LSSNAP : CEPH_MDS_OP_READDIR;
 
 		/* discard old result, if any */
-		if (fi->last_readdir) {
-			ceph_mdsc_put_request(fi->last_readdir);
-			fi->last_readdir = NULL;
+		if (dfi->last_readdir) {
+			ceph_mdsc_put_request(dfi->last_readdir);
+			dfi->last_readdir = NULL;
 		}
 
 		if (is_hash_order(ctx->pos)) {
@@ -372,7 +372,7 @@ more:
 		}
 
 		dout("readdir fetching %llx.%llx frag %x offset '%s'\n",
-		     ceph_vinop(inode), frag, fi->last_name);
+		     ceph_vinop(inode), frag, dfi->last_name);
 		req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 		if (IS_ERR(req))
 			return PTR_ERR(req);
@@ -388,8 +388,8 @@ more:
 			__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
 			req->r_inode_drop = CEPH_CAP_FILE_EXCL;
 		}
-		if (fi->last_name) {
-			req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
+		if (dfi->last_name) {
+			req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
 			if (!req->r_path2) {
 				ceph_mdsc_put_request(req);
 				return -ENOMEM;
@@ -399,10 +399,10 @@ more:
 				cpu_to_le32(fpos_hash(ctx->pos));
 		}
 
-		req->r_dir_release_cnt = fi->dir_release_count;
-		req->r_dir_ordered_cnt = fi->dir_ordered_count;
-		req->r_readdir_cache_idx = fi->readdir_cache_idx;
-		req->r_readdir_offset = fi->next_offset;
+		req->r_dir_release_cnt = dfi->dir_release_count;
+		req->r_dir_ordered_cnt = dfi->dir_ordered_count;
+		req->r_readdir_cache_idx = dfi->readdir_cache_idx;
+		req->r_readdir_offset = dfi->next_offset;
 		req->r_args.readdir.frag = cpu_to_le32(frag);
 		req->r_args.readdir.flags =
 				cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
@@ -426,35 +426,35 @@ more:
 		if (le32_to_cpu(rinfo->dir_dir->frag) != frag) {
 			frag = le32_to_cpu(rinfo->dir_dir->frag);
 			if (!rinfo->hash_order) {
-				fi->next_offset = req->r_readdir_offset;
+				dfi->next_offset = req->r_readdir_offset;
 				/* adjust ctx->pos to beginning of frag */
 				ctx->pos = ceph_make_fpos(frag,
-							  fi->next_offset,
+							  dfi->next_offset,
 							  false);
 			}
 		}
 
-		fi->frag = frag;
-		fi->last_readdir = req;
+		dfi->frag = frag;
+		dfi->last_readdir = req;
 
 		if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
-			fi->readdir_cache_idx = req->r_readdir_cache_idx;
-			if (fi->readdir_cache_idx < 0) {
+			dfi->readdir_cache_idx = req->r_readdir_cache_idx;
+			if (dfi->readdir_cache_idx < 0) {
 				/* preclude from marking dir ordered */
-				fi->dir_ordered_count = 0;
+				dfi->dir_ordered_count = 0;
 			} else if (ceph_frag_is_leftmost(frag) &&
-				   fi->next_offset == 2) {
+				   dfi->next_offset == 2) {
 				/* note dir version at start of readdir so
 				 * we can tell if any dentries get dropped */
-				fi->dir_release_count = req->r_dir_release_cnt;
-				fi->dir_ordered_count = req->r_dir_ordered_cnt;
+				dfi->dir_release_count = req->r_dir_release_cnt;
+				dfi->dir_ordered_count = req->r_dir_ordered_cnt;
 			}
 		} else {
-			dout("readdir !did_prepopulate");
+			dout("readdir !did_prepopulate\n");
 			/* disable readdir cache */
-			fi->readdir_cache_idx = -1;
+			dfi->readdir_cache_idx = -1;
 			/* preclude from marking dir complete */
-			fi->dir_release_count = 0;
+			dfi->dir_release_count = 0;
 		}
 
 		/* note next offset and last dentry name */
@@ -463,19 +463,19 @@ more:
 					rinfo->dir_entries + (rinfo->dir_nr-1);
 			unsigned next_offset = req->r_reply_info.dir_end ?
 					2 : (fpos_off(rde->offset) + 1);
-			err = note_last_dentry(fi, rde->name, rde->name_len,
+			err = note_last_dentry(dfi, rde->name, rde->name_len,
 					       next_offset);
 			if (err)
 				return err;
 		} else if (req->r_reply_info.dir_end) {
-			fi->next_offset = 2;
+			dfi->next_offset = 2;
 			/* keep last name */
 		}
 	}
 
-	rinfo = &fi->last_readdir->r_reply_info;
+	rinfo = &dfi->last_readdir->r_reply_info;
 	dout("readdir frag %x num %d pos %llx chunk first %llx\n",
-	     fi->frag, rinfo->dir_nr, ctx->pos,
+	     dfi->frag, rinfo->dir_nr, ctx->pos,
 	     rinfo->dir_nr ? rinfo->dir_entries[0].offset : 0LL);
 
 	i = 0;
@@ -519,52 +519,55 @@ more:
 		ctx->pos++;
 	}
 
-	ceph_mdsc_put_request(fi->last_readdir);
-	fi->last_readdir = NULL;
+	ceph_mdsc_put_request(dfi->last_readdir);
+	dfi->last_readdir = NULL;
 
-	if (fi->next_offset > 2) {
-		frag = fi->frag;
+	if (dfi->next_offset > 2) {
+		frag = dfi->frag;
 		goto more;
 	}
 
 	/* more frags? */
-	if (!ceph_frag_is_rightmost(fi->frag)) {
-		frag = ceph_frag_next(fi->frag);
+	if (!ceph_frag_is_rightmost(dfi->frag)) {
+		frag = ceph_frag_next(dfi->frag);
 		if (is_hash_order(ctx->pos)) {
 			loff_t new_pos = ceph_make_fpos(ceph_frag_value(frag),
-							fi->next_offset, true);
+							dfi->next_offset, true);
 			if (new_pos > ctx->pos)
 				ctx->pos = new_pos;
 			/* keep last_name */
 		} else {
-			ctx->pos = ceph_make_fpos(frag, fi->next_offset, false);
-			kfree(fi->last_name);
-			fi->last_name = NULL;
+			ctx->pos = ceph_make_fpos(frag, dfi->next_offset,
+							false);
+			kfree(dfi->last_name);
+			dfi->last_name = NULL;
 		}
 		dout("readdir next frag is %x\n", frag);
 		goto more;
 	}
-	fi->flags |= CEPH_F_ATEND;
+	dfi->file_info.flags |= CEPH_F_ATEND;
 
 	/*
 	 * if dir_release_count still matches the dir, no dentries
 	 * were released during the whole readdir, and we should have
 	 * the complete dir contents in our cache.
 	 */
-	if (atomic64_read(&ci->i_release_count) == fi->dir_release_count) {
+	if (atomic64_read(&ci->i_release_count) ==
+					dfi->dir_release_count) {
 		spin_lock(&ci->i_ceph_lock);
-		if (fi->dir_ordered_count == atomic64_read(&ci->i_ordered_count)) {
+		if (dfi->dir_ordered_count ==
+				atomic64_read(&ci->i_ordered_count)) {
 			dout(" marking %p complete and ordered\n", inode);
 			/* use i_size to track number of entries in
 			 * readdir cache */
-			BUG_ON(fi->readdir_cache_idx < 0);
-			i_size_write(inode, fi->readdir_cache_idx *
+			BUG_ON(dfi->readdir_cache_idx < 0);
+			i_size_write(inode, dfi->readdir_cache_idx *
 				     sizeof(struct dentry*));
 		} else {
 			dout(" marking %p complete\n", inode);
 		}
-		__ceph_dir_set_complete(ci, fi->dir_release_count,
-					fi->dir_ordered_count);
+		__ceph_dir_set_complete(ci, dfi->dir_release_count,
+					dfi->dir_ordered_count);
 		spin_unlock(&ci->i_ceph_lock);
 	}
 
@@ -572,25 +575,25 @@ more:
 	return 0;
 }
 
-static void reset_readdir(struct ceph_file_info *fi)
+static void reset_readdir(struct ceph_dir_file_info *dfi)
 {
-	if (fi->last_readdir) {
-		ceph_mdsc_put_request(fi->last_readdir);
-		fi->last_readdir = NULL;
+	if (dfi->last_readdir) {
+		ceph_mdsc_put_request(dfi->last_readdir);
+		dfi->last_readdir = NULL;
 	}
-	kfree(fi->last_name);
-	fi->last_name = NULL;
-	fi->dir_release_count = 0;
-	fi->readdir_cache_idx = -1;
-	fi->next_offset = 2;  /* compensate for . and .. */
-	fi->flags &= ~CEPH_F_ATEND;
+	kfree(dfi->last_name);
+	dfi->last_name = NULL;
+	dfi->dir_release_count = 0;
+	dfi->readdir_cache_idx = -1;
+	dfi->next_offset = 2;  /* compensate for . and .. */
+	dfi->file_info.flags &= ~CEPH_F_ATEND;
 }
 
 /*
  * discard buffered readdir content on seekdir(0), or seek to new frag,
  * or seek prior to current chunk
  */
-static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
+static bool need_reset_readdir(struct ceph_dir_file_info *dfi, loff_t new_pos)
 {
 	struct ceph_mds_reply_info_parsed *rinfo;
 	loff_t chunk_offset;
@@ -599,10 +602,10 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
 	if (is_hash_order(new_pos)) {
 		/* no need to reset last_name for a forward seek when
 		 * dentries are sotred in hash order */
-	} else if (fi->frag != fpos_frag(new_pos)) {
+	} else if (dfi->frag != fpos_frag(new_pos)) {
 		return true;
 	}
-	rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
+	rinfo = dfi->last_readdir ? &dfi->last_readdir->r_reply_info : NULL;
 	if (!rinfo || !rinfo->dir_nr)
 		return true;
 	chunk_offset = rinfo->dir_entries[0].offset;
@@ -612,7 +615,7 @@ static bool need_reset_readdir(struct ceph_file_info *fi, loff_t new_pos)
 
 static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 {
-	struct ceph_file_info *fi = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file->f_mapping->host;
 	loff_t retval;
 
@@ -630,20 +633,20 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
 	}
 
 	if (offset >= 0) {
-		if (need_reset_readdir(fi, offset)) {
+		if (need_reset_readdir(dfi, offset)) {
 			dout("dir_llseek dropping %p content\n", file);
-			reset_readdir(fi);
+			reset_readdir(dfi);
 		} else if (is_hash_order(offset) && offset > file->f_pos) {
 			/* for hash offset, we don't know if a forward seek
 			 * is within same frag */
-			fi->dir_release_count = 0;
-			fi->readdir_cache_idx = -1;
+			dfi->dir_release_count = 0;
+			dfi->readdir_cache_idx = -1;
 		}
 
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 			file->f_version = 0;
-			fi->flags &= ~CEPH_F_ATEND;
+			dfi->file_info.flags &= ~CEPH_F_ATEND;
 		}
 		retval = offset;
 	}
@@ -824,6 +827,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 
+	if (ceph_quota_is_max_files_exceeded(dir))
+		return -EDQUOT;
+
 	err = ceph_pre_init_acls(dir, &mode, &acls);
 	if (err < 0)
 		return err;
@@ -877,6 +883,9 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 	if (ceph_snap(dir) != CEPH_NOSNAP)
 		return -EROFS;
 
+	if (ceph_quota_is_max_files_exceeded(dir))
+		return -EDQUOT;
+
 	dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
@@ -926,6 +935,12 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		goto out;
 	}
 
+	if (op == CEPH_MDS_OP_MKDIR &&
+	    ceph_quota_is_max_files_exceeded(dir)) {
+		err = -EDQUOT;
+		goto out;
+	}
+
 	mode |= S_IFDIR;
 	err = ceph_pre_init_acls(dir, &mode, &acls);
 	if (err < 0)
@@ -1065,6 +1080,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		else
 			return -EROFS;
 	}
+	/* don't allow cross-quota renames */
+	if ((old_dir != new_dir) &&
+	    (!ceph_quota_is_same_realm(old_dir, new_dir)))
+		return -EXDEV;
+
 	dout("rename dir %p dentry %p to dir %p dentry %p\n",
 	     old_dir, old_dentry, new_dir, new_dentry);
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
@@ -1351,7 +1371,7 @@ static void ceph_d_prune(struct dentry *dentry)
 static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 			     loff_t *ppos)
 {
-	struct ceph_file_info *cf = file->private_data;
+	struct ceph_dir_file_info *dfi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int left;
@@ -1360,12 +1380,12 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 	if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
 		return -EISDIR;
 
-	if (!cf->dir_info) {
-		cf->dir_info = kmalloc(bufsize, GFP_KERNEL);
-		if (!cf->dir_info)
+	if (!dfi->dir_info) {
+		dfi->dir_info = kmalloc(bufsize, GFP_KERNEL);
+		if (!dfi->dir_info)
 			return -ENOMEM;
-		cf->dir_info_len =
-			snprintf(cf->dir_info, bufsize,
+		dfi->dir_info_len =
+			snprintf(dfi->dir_info, bufsize,
 				"entries:   %20lld\n"
 				" files:    %20lld\n"
 				" subdirs:  %20lld\n"
@@ -1385,10 +1405,10 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
 				(long)ci->i_rctime.tv_nsec);
 	}
 
-	if (*ppos >= cf->dir_info_len)
+	if (*ppos >= dfi->dir_info_len)
 		return 0;
-	size = min_t(unsigned, size, cf->dir_info_len-*ppos);
-	left = copy_to_user(buf, cf->dir_info + *ppos, size);
+	size = min_t(unsigned, size, dfi->dir_info_len-*ppos);
+	left = copy_to_user(buf, dfi->dir_info + *ppos, size);
 	if (left == size)
 		return -EFAULT;
 	*ppos += (size - left);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b67eec3532a1..f85040d73e3d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -30,6 +30,8 @@ static __le32 ceph_flags_sys2wire(u32 flags)
 		break;
 	}
 
+	flags &= ~O_ACCMODE;
+
 #define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; }
 
 	ceph_sys2wire(O_CREAT);
@@ -41,7 +43,7 @@ static __le32 ceph_flags_sys2wire(u32 flags)
 #undef ceph_sys2wire
 
 	if (flags)
-		dout("unused open flags: %x", flags);
+		dout("unused open flags: %x\n", flags);
 
 	return cpu_to_le32(wire_flags);
 }
@@ -159,13 +161,50 @@ out:
 	return req;
 }
 
+static int ceph_init_file_info(struct inode *inode, struct file *file,
+					int fmode, bool isdir)
+{
+	struct ceph_file_info *fi;
+
+	dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
+			inode->i_mode, isdir ? "dir" : "regular");
+	BUG_ON(inode->i_fop->release != ceph_release);
+
+	if (isdir) {
+		struct ceph_dir_file_info *dfi =
+			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
+		if (!dfi) {
+			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+			return -ENOMEM;
+		}
+
+		file->private_data = dfi;
+		fi = &dfi->file_info;
+		dfi->next_offset = 2;
+		dfi->readdir_cache_idx = -1;
+	} else {
+		fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
+		if (!fi) {
+			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+			return -ENOMEM;
+		}
+
+		file->private_data = fi;
+	}
+
+	fi->fmode = fmode;
+	spin_lock_init(&fi->rw_contexts_lock);
+	INIT_LIST_HEAD(&fi->rw_contexts);
+
+	return 0;
+}
+
 /*
  * initialize private struct file data.
  * if we fail, clean up by dropping fmode reference on the ceph_inode
  */
 static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 {
-	struct ceph_file_info *cf;
 	int ret = 0;
 
 	switch (inode->i_mode & S_IFMT) {
@@ -173,22 +212,10 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 		ceph_fscache_register_inode_cookie(inode);
 		ceph_fscache_file_set_cookie(inode, file);
 	case S_IFDIR:
-		dout("init_file %p %p 0%o (regular)\n", inode, file,
-		     inode->i_mode);
-		cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
-		if (!cf) {
-			ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
-			return -ENOMEM;
-		}
-		cf->fmode = fmode;
-
-		spin_lock_init(&cf->rw_contexts_lock);
-		INIT_LIST_HEAD(&cf->rw_contexts);
-
-		cf->next_offset = 2;
-		cf->readdir_cache_idx = -1;
-		file->private_data = cf;
-		BUG_ON(inode->i_fop->release != ceph_release);
+		ret = ceph_init_file_info(inode, file, fmode,
+						S_ISDIR(inode->i_mode));
+		if (ret)
+			return ret;
 		break;
 
 	case S_IFLNK:
@@ -278,11 +305,11 @@ int ceph_open(struct inode *inode, struct file *file)
 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req;
-	struct ceph_file_info *cf = file->private_data;
+	struct ceph_file_info *fi = file->private_data;
 	int err;
 	int flags, fmode, wanted;
 
-	if (cf) {
+	if (fi) {
 		dout("open file %p is already opened\n", file);
 		return 0;
 	}
@@ -375,7 +402,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct ceph_mds_request *req;
 	struct dentry *dn;
 	struct ceph_acls_info acls = {};
-       int mask;
+	int mask;
 	int err;
 
 	dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@@ -386,6 +413,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		return -ENAMETOOLONG;
 
 	if (flags & O_CREAT) {
+		if (ceph_quota_is_max_files_exceeded(dir))
+			return -EDQUOT;
 		err = ceph_pre_init_acls(dir, &mode, &acls);
 		if (err < 0)
 			return err;
@@ -460,16 +489,27 @@ out_acl:
 int ceph_release(struct inode *inode, struct file *file)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_file_info *cf = file->private_data;
 
-	dout("release inode %p file %p\n", inode, file);
-	ceph_put_fmode(ci, cf->fmode);
-	if (cf->last_readdir)
-		ceph_mdsc_put_request(cf->last_readdir);
-	kfree(cf->last_name);
-	kfree(cf->dir_info);
-	WARN_ON(!list_empty(&cf->rw_contexts));
-	kmem_cache_free(ceph_file_cachep, cf);
+	if (S_ISDIR(inode->i_mode)) {
+		struct ceph_dir_file_info *dfi = file->private_data;
+		dout("release inode %p dir file %p\n", inode, file);
+		WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
+
+		ceph_put_fmode(ci, dfi->file_info.fmode);
+
+		if (dfi->last_readdir)
+			ceph_mdsc_put_request(dfi->last_readdir);
+		kfree(dfi->last_name);
+		kfree(dfi->dir_info);
+		kmem_cache_free(ceph_dir_file_cachep, dfi);
+	} else {
+		struct ceph_file_info *fi = file->private_data;
+		dout("release inode %p regular file %p\n", inode, file);
+		WARN_ON(!list_empty(&fi->rw_contexts));
+
+		ceph_put_fmode(ci, fi->fmode);
+		kmem_cache_free(ceph_file_cachep, fi);
+	}
 
 	/* wake up anyone waiting for caps on this inode */
 	wake_up_all(&ci->i_cap_wq);
@@ -1338,6 +1378,11 @@ retry_snap:
 
 	pos = iocb->ki_pos;
 	count = iov_iter_count(from);
+	if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) {
+		err = -EDQUOT;
+		goto out;
+	}
+
 	err = file_remove_privs(file);
 	if (err)
 		goto out;
@@ -1419,6 +1464,7 @@ retry_snap:
 
 	if (written >= 0) {
 		int dirty;
+
 		spin_lock(&ci->i_ceph_lock);
 		ci->i_inline_version = CEPH_INLINE_NONE;
 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
@@ -1426,6 +1472,8 @@ retry_snap:
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
+		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
+			ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
 	}
 
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
@@ -1668,6 +1716,12 @@ static long ceph_fallocate(struct file *file, int mode,
 		goto unlock;
 	}
 
+	if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)) &&
+	    ceph_quota_is_max_bytes_exceeded(inode, offset + length)) {
+		ret = -EDQUOT;
+		goto unlock;
+	}
+
 	if (ceph_osdmap_flag(osdc, CEPH_OSDMAP_FULL) &&
 	    !(mode & FALLOC_FL_PUNCH_HOLE)) {
 		ret = -ENOSPC;
@@ -1716,6 +1770,9 @@ static long ceph_fallocate(struct file *file, int mode,
 		spin_unlock(&ci->i_ceph_lock);
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
+		if ((endoff > size) &&
+		    ceph_quota_is_max_bytes_approaching(inode, endoff))
+			ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
 	}
 
 	ceph_put_cap_refs(ci, got);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index c6ec5aa46100..8bf60250309e 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -441,6 +441,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	atomic64_set(&ci->i_complete_seq[1], 0);
 	ci->i_symlink = NULL;
 
+	ci->i_max_bytes = 0;
+	ci->i_max_files = 0;
+
 	memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
 	RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL);
 
@@ -536,6 +539,9 @@ void ceph_destroy_inode(struct inode *inode)
 
 	ceph_queue_caps_release(inode);
 
+	if (__ceph_has_any_quota(ci))
+		ceph_adjust_quota_realms_count(inode, false);
+
 	/*
 	 * we may still have a snap_realm reference if there are stray
 	 * caps in i_snap_caps.
@@ -548,6 +554,9 @@ void ceph_destroy_inode(struct inode *inode)
 		dout(" dropping residual ref to snap realm %p\n", realm);
 		spin_lock(&realm->inodes_with_caps_lock);
 		list_del_init(&ci->i_snap_realm_item);
+		ci->i_snap_realm = NULL;
+		if (realm->ino == ci->i_vino.ino)
+			realm->inode = NULL;
 		spin_unlock(&realm->inodes_with_caps_lock);
 		ceph_put_snap_realm(mdsc, realm);
 	}
@@ -790,6 +799,8 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 	inode->i_rdev = le32_to_cpu(info->rdev);
 	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
+	__ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
+
 	if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
 	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(info->mode);
@@ -1867,20 +1878,9 @@ retry:
 	 * possibly truncate them.. so write AND block!
 	 */
 	if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
-		struct ceph_cap_snap *capsnap;
-		to = ci->i_truncate_size;
-		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
-			// MDS should have revoked Frw caps
-			WARN_ON_ONCE(capsnap->writing);
-			if (capsnap->dirty_pages && capsnap->size > to)
-				to = capsnap->size;
-		}
 		spin_unlock(&ci->i_ceph_lock);
 		dout("__do_pending_vmtruncate %p flushing snaps first\n",
 		     inode);
-
-		truncate_pagecache(inode, to);
-
 		filemap_write_and_wait_range(&inode->i_data, 0,
 					     inode->i_sb->s_maxbytes);
 		goto retry;
@@ -2152,6 +2152,10 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err != 0)
 		return err;
 
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
+		return -EDQUOT;
+
 	err = __ceph_setattr(inode, attr);
 
 	if (err >= 0 && (attr->ia_valid & ATTR_MODE))
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 851aa69ec8f0..c90f03beb15d 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -5,7 +5,7 @@
 #include "super.h"
 #include "mds_client.h"
 #include "ioctl.h"
-
+#include <linux/ceph/striper.h>
 
 /*
  * ioctls
@@ -185,7 +185,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
 	struct ceph_object_locator oloc;
 	CEPH_DEFINE_OID_ONSTACK(oid);
-	u64 len = 1, olen;
+	u32 xlen;
 	u64 tmp;
 	struct ceph_pg pgid;
 	int r;
@@ -195,13 +195,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
 		return -EFAULT;
 
 	down_read(&osdc->lock);
-	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
-					  &dl.object_no, &dl.object_offset,
-					  &olen);
-	if (r < 0) {
-		up_read(&osdc->lock);
-		return -EIO;
-	}
+	ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, 1,
+				      &dl.object_no, &dl.object_offset, &xlen);
 	dl.file_offset -= dl.object_offset;
 	dl.object_size = ci->i_layout.object_size;
 	dl.block_size = ci->i_layout.stripe_unit;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 9e66f69ee8a5..9dae2ec7e1fa 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -95,7 +95,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 	owner = secure_addr(fl->fl_owner);
 
 	dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
-	     "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
+	     "start: %llu, length: %llu, wait: %d, type: %d\n", (int)lock_type,
 	     (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length,
 	     wait, fl->fl_type);
 
@@ -132,7 +132,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 	}
 	ceph_mdsc_put_request(req);
 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-	     "length: %llu, wait: %d, type: %d, err code %d", (int)lock_type,
+	     "length: %llu, wait: %d, type: %d, err code %d\n", (int)lock_type,
 	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
 	     length, wait, fl->fl_type, err);
 	return err;
@@ -226,7 +226,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK)
 		return -ENOLCK;
 
-	dout("ceph_lock, fl_owner: %p", fl->fl_owner);
+	dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
 
 	/* set wait bit as appropriate, then make command as Ceph expects it*/
 	if (IS_GETLK(cmd))
@@ -264,7 +264,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, inode, lock_cmd, wait, fl);
 	if (!err) {
 		if (op == CEPH_MDS_OP_SETFILELOCK) {
-			dout("mds locked, locking locally");
+			dout("mds locked, locking locally\n");
 			err = posix_lock_file(file, fl, NULL);
 			if (err) {
 				/* undo! This should only happen if
@@ -272,7 +272,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				 * deadlock. */
 				ceph_lock_message(CEPH_LOCK_FCNTL, op, inode,
 						  CEPH_LOCK_UNLOCK, 0, fl);
-				dout("got %d on posix_lock_file, undid lock",
+				dout("got %d on posix_lock_file, undid lock\n",
 				     err);
 			}
 		}
@@ -294,7 +294,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 	if (fl->fl_type & LOCK_MAND)
 		return -EOPNOTSUPP;
 
-	dout("ceph_flock, fl_file: %p", fl->fl_file);
+	dout("ceph_flock, fl_file: %p\n", fl->fl_file);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) {
@@ -329,7 +329,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 			ceph_lock_message(CEPH_LOCK_FLOCK,
 					  CEPH_MDS_OP_SETFILELOCK,
 					  inode, CEPH_LOCK_UNLOCK, 0, fl);
-			dout("got %d on locks_lock_file_wait, undid lock", err);
+			dout("got %d on locks_lock_file_wait, undid lock\n", err);
 		}
 	}
 	return err;
@@ -356,7 +356,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
 			++(*flock_count);
 		spin_unlock(&ctx->flc_lock);
 	}
-	dout("counted %d flock locks and %d fcntl locks",
+	dout("counted %d flock locks and %d fcntl locks\n",
 	     *flock_count, *fcntl_count);
 }
 
@@ -384,7 +384,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
 		cephlock->type = CEPH_LOCK_UNLOCK;
 		break;
 	default:
-		dout("Have unknown lock type %d", lock->fl_type);
+		dout("Have unknown lock type %d\n", lock->fl_type);
 		err = -EINVAL;
 	}
 
@@ -407,7 +407,7 @@ int ceph_encode_locks_to_buffer(struct inode *inode,
 	int seen_flock = 0;
 	int l = 0;
 
-	dout("encoding %d flock and %d fcntl locks", num_flock_locks,
+	dout("encoding %d flock and %d fcntl locks\n", num_flock_locks,
 	     num_fcntl_locks);
 
 	if (!ctx)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2e8f90f96540..5ece2e6ad154 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,26 @@ static int parse_reply_info_in(void **p, void *end,
 	} else
 		info->inline_version = CEPH_INLINE_NONE;
 
+	if (features & CEPH_FEATURE_MDS_QUOTA) {
+		u8 struct_v, struct_compat;
+		u32 struct_len;
+
+		/*
+		 * both struct_v and struct_compat are expected to be >= 1
+		 */
+		ceph_decode_8_safe(p, end, struct_v, bad);
+		ceph_decode_8_safe(p, end, struct_compat, bad);
+		if (!struct_v || !struct_compat)
+			goto bad;
+		ceph_decode_32_safe(p, end, struct_len, bad);
+		ceph_decode_need(p, end, struct_len, bad);
+		ceph_decode_64_safe(p, end, info->max_bytes, bad);
+		ceph_decode_64_safe(p, end, info->max_files, bad);
+	} else {
+		info->max_bytes = 0;
+		info->max_files = 0;
+	}
+
 	info->pool_ns_len = 0;
 	info->pool_ns_data = NULL;
 	if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
@@ -384,7 +404,7 @@ static struct ceph_mds_session *get_session(struct ceph_mds_session *s)
 		     refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
 		return s;
 	} else {
-		dout("mdsc get_session %p 0 -- FAIL", s);
+		dout("mdsc get_session %p 0 -- FAIL\n", s);
 		return NULL;
 	}
 }
@@ -419,9 +439,10 @@ struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
 
 static bool __have_session(struct ceph_mds_client *mdsc, int mds)
 {
-	if (mds >= mdsc->max_sessions)
+	if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
 		return false;
-	return mdsc->sessions[mds];
+	else
+		return true;
 }
 
 static int __verify_registered_session(struct ceph_mds_client *mdsc,
@@ -448,6 +469,25 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	s = kzalloc(sizeof(*s), GFP_NOFS);
 	if (!s)
 		return ERR_PTR(-ENOMEM);
+
+	if (mds >= mdsc->max_sessions) {
+		int newmax = 1 << get_count_order(mds + 1);
+		struct ceph_mds_session **sa;
+
+		dout("%s: realloc to %d\n", __func__, newmax);
+		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
+		if (!sa)
+			goto fail_realloc;
+		if (mdsc->sessions) {
+			memcpy(sa, mdsc->sessions,
+			       mdsc->max_sessions * sizeof(void *));
+			kfree(mdsc->sessions);
+		}
+		mdsc->sessions = sa;
+		mdsc->max_sessions = newmax;
+	}
+
+	dout("%s: mds%d\n", __func__, mds);
 	s->s_mdsc = mdsc;
 	s->s_mds = mds;
 	s->s_state = CEPH_MDS_SESSION_NEW;
@@ -476,23 +516,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
 	INIT_LIST_HEAD(&s->s_cap_releases);
 	INIT_LIST_HEAD(&s->s_cap_flushing);
 
-	dout("register_session mds%d\n", mds);
-	if (mds >= mdsc->max_sessions) {
-		int newmax = 1 << get_count_order(mds+1);
-		struct ceph_mds_session **sa;
-
-		dout("register_session realloc to %d\n", newmax);
-		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
-		if (!sa)
-			goto fail_realloc;
-		if (mdsc->sessions) {
-			memcpy(sa, mdsc->sessions,
-			       mdsc->max_sessions * sizeof(void *));
-			kfree(mdsc->sessions);
-		}
-		mdsc->sessions = sa;
-		mdsc->max_sessions = newmax;
-	}
 	mdsc->sessions[mds] = s;
 	atomic_inc(&mdsc->num_sessions);
 	refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
@@ -2531,10 +2554,10 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	 * Otherwise we just have to return an ESTALE
 	 */
 	if (result == -ESTALE) {
-		dout("got ESTALE on request %llu", req->r_tid);
+		dout("got ESTALE on request %llu\n", req->r_tid);
 		req->r_resend_mds = -1;
 		if (req->r_direct_mode != USE_AUTH_MDS) {
-			dout("not using auth, setting for that now");
+			dout("not using auth, setting for that now\n");
 			req->r_direct_mode = USE_AUTH_MDS;
 			__do_request(mdsc, req);
 			mutex_unlock(&mdsc->mutex);
@@ -2542,13 +2565,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 		} else  {
 			int mds = __choose_mds(mdsc, req);
 			if (mds >= 0 && mds != req->r_session->s_mds) {
-				dout("but auth changed, so resending");
+				dout("but auth changed, so resending\n");
 				__do_request(mdsc, req);
 				mutex_unlock(&mdsc->mutex);
 				goto out;
 			}
 		}
-		dout("have to return ESTALE on request %llu", req->r_tid);
+		dout("have to return ESTALE on request %llu\n", req->r_tid);
 	}
 
 
@@ -3470,13 +3493,12 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
 }
 
 /*
- * drop all leases (and dentry refs) in preparation for umount
+ * lock unlock sessions, to wait ongoing session activities
  */
-static void drop_leases(struct ceph_mds_client *mdsc)
+static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
 {
 	int i;
 
-	dout("drop_leases\n");
 	mutex_lock(&mdsc->mutex);
 	for (i = 0; i < mdsc->max_sessions; i++) {
 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
@@ -3572,7 +3594,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	if (!mdsc)
 		return -ENOMEM;
 	mdsc->fsc = fsc;
-	fsc->mdsc = mdsc;
 	mutex_init(&mdsc->mutex);
 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
 	if (!mdsc->mdsmap) {
@@ -3580,6 +3601,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 		return -ENOMEM;
 	}
 
+	fsc->mdsc = mdsc;
 	init_completion(&mdsc->safe_umount_waiters);
 	init_waitqueue_head(&mdsc->session_close_wq);
 	INIT_LIST_HEAD(&mdsc->waiting_for_map);
@@ -3587,6 +3609,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	atomic_set(&mdsc->num_sessions, 0);
 	mdsc->max_sessions = 0;
 	mdsc->stopping = 0;
+	atomic64_set(&mdsc->quotarealms_count, 0);
 	mdsc->last_snap_seq = 0;
 	init_rwsem(&mdsc->snap_rwsem);
 	mdsc->snap_realms = RB_ROOT;
@@ -3660,7 +3683,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 	dout("pre_umount\n");
 	mdsc->stopping = 1;
 
-	drop_leases(mdsc);
+	lock_unlock_sessions(mdsc);
 	ceph_flush_dirty_caps(mdsc);
 	wait_requests(mdsc);
 
@@ -3858,6 +3881,9 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	dout("mdsc_destroy %p\n", mdsc);
 
+	if (!mdsc)
+		return;
+
 	/* flush out any connection work with references to us */
 	ceph_msgr_flush();
 
@@ -4077,6 +4103,9 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 	case CEPH_MSG_CLIENT_LEASE:
 		handle_lease(mdsc, s, msg);
 		break;
+	case CEPH_MSG_CLIENT_QUOTA:
+		ceph_handle_quota(mdsc, s, msg);
+		break;
 
 	default:
 		pr_err("received unknown message type %d %s\n", type,
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 71e3b783ee6f..2ec3b5b35067 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -49,6 +49,8 @@ struct ceph_mds_reply_info_in {
 	char *inline_data;
 	u32 pool_ns_len;
 	char *pool_ns_data;
+	u64 max_bytes;
+	u64 max_files;
 };
 
 struct ceph_mds_reply_dir_entry {
@@ -312,6 +314,8 @@ struct ceph_mds_client {
 	int                     max_sessions;  /* len of s_mds_sessions */
 	int                     stopping;      /* true if shutting down */
 
+	atomic64_t		quotarealms_count; /* # realms with quota */
+
 	/*
 	 * snap_rwsem will cover cap linkage into snaprealms, and
 	 * realm snap contexts.  (later, we can do per-realm snap
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
new file mode 100644
index 000000000000..242bfa5c0539
--- /dev/null
+++ b/fs/ceph/quota.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * quota.c - CephFS quota
+ *
+ * Copyright (C) 2017-2018 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/statfs.h>
+
+#include "super.h"
+#include "mds_client.h"
+
+void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	if (inc)
+		atomic64_inc(&mdsc->quotarealms_count);
+	else
+		atomic64_dec(&mdsc->quotarealms_count);
+}
+
+static inline bool ceph_has_realms_with_quotas(struct inode *inode)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	return atomic64_read(&mdsc->quotarealms_count) > 0;
+}
+
+void ceph_handle_quota(struct ceph_mds_client *mdsc,
+		       struct ceph_mds_session *session,
+		       struct ceph_msg *msg)
+{
+	struct super_block *sb = mdsc->fsc->sb;
+	struct ceph_mds_quota *h = msg->front.iov_base;
+	struct ceph_vino vino;
+	struct inode *inode;
+	struct ceph_inode_info *ci;
+
+	if (msg->front.iov_len != sizeof(*h)) {
+		pr_err("%s corrupt message mds%d len %d\n", __func__,
+		       session->s_mds, (int)msg->front.iov_len);
+		ceph_msg_dump(msg);
+		return;
+	}
+
+	/* increment msg sequence number */
+	mutex_lock(&session->s_mutex);
+	session->s_seq++;
+	mutex_unlock(&session->s_mutex);
+
+	/* lookup inode */
+	vino.ino = le64_to_cpu(h->ino);
+	vino.snap = CEPH_NOSNAP;
+	inode = ceph_find_inode(sb, vino);
+	if (!inode) {
+		pr_warn("Failed to find inode %llu\n", vino.ino);
+		return;
+	}
+	ci = ceph_inode(inode);
+
+	spin_lock(&ci->i_ceph_lock);
+	ci->i_rbytes = le64_to_cpu(h->rbytes);
+	ci->i_rfiles = le64_to_cpu(h->rfiles);
+	ci->i_rsubdirs = le64_to_cpu(h->rsubdirs);
+	__ceph_update_quota(ci, le64_to_cpu(h->max_bytes),
+		            le64_to_cpu(h->max_files));
+	spin_unlock(&ci->i_ceph_lock);
+
+	iput(inode);
+}
+
+/*
+ * This function walks through the snaprealm for an inode and returns the
+ * ceph_snap_realm for the first snaprealm that has quotas set (either max_files
+ * or max_bytes).  If the root is reached, return the root ceph_snap_realm
+ * instead.
+ *
+ * Note that the caller is responsible for calling ceph_put_snap_realm() on the
+ * returned realm.
+ */
+static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
+					       struct inode *inode)
+{
+	struct ceph_inode_info *ci = NULL;
+	struct ceph_snap_realm *realm, *next;
+	struct inode *in;
+	bool has_quota;
+
+	if (ceph_snap(inode) != CEPH_NOSNAP)
+		return NULL;
+
+	realm = ceph_inode(inode)->i_snap_realm;
+	if (realm)
+		ceph_get_snap_realm(mdsc, realm);
+	else
+		pr_err_ratelimited("get_quota_realm: ino (%llx.%llx) "
+				   "null i_snap_realm\n", ceph_vinop(inode));
+	while (realm) {
+		spin_lock(&realm->inodes_with_caps_lock);
+		in = realm->inode ? igrab(realm->inode) : NULL;
+		spin_unlock(&realm->inodes_with_caps_lock);
+		if (!in)
+			break;
+
+		ci = ceph_inode(in);
+		has_quota = __ceph_has_any_quota(ci);
+		iput(in);
+
+		next = realm->parent;
+		if (has_quota || !next)
+		       return realm;
+
+		ceph_get_snap_realm(mdsc, next);
+		ceph_put_snap_realm(mdsc, realm);
+		realm = next;
+	}
+	if (realm)
+		ceph_put_snap_realm(mdsc, realm);
+
+	return NULL;
+}
+
+bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
+	struct ceph_snap_realm *old_realm, *new_realm;
+	bool is_same;
+
+	down_read(&mdsc->snap_rwsem);
+	old_realm = get_quota_realm(mdsc, old);
+	new_realm = get_quota_realm(mdsc, new);
+	is_same = (old_realm == new_realm);
+	up_read(&mdsc->snap_rwsem);
+
+	if (old_realm)
+		ceph_put_snap_realm(mdsc, old_realm);
+	if (new_realm)
+		ceph_put_snap_realm(mdsc, new_realm);
+
+	return is_same;
+}
+
+enum quota_check_op {
+	QUOTA_CHECK_MAX_FILES_OP,	/* check quota max_files limit */
+	QUOTA_CHECK_MAX_BYTES_OP,	/* check quota max_files limit */
+	QUOTA_CHECK_MAX_BYTES_APPROACHING_OP	/* check if quota max_files
+						   limit is approaching */
+};
+
+/*
+ * check_quota_exceeded() will walk up the snaprealm hierarchy and, for each
+ * realm, it will execute quota check operation defined by the 'op' parameter.
+ * The snaprealm walk is interrupted if the quota check detects that the quota
+ * is exceeded or if the root inode is reached.
+ */
+static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
+				 loff_t delta)
+{
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_inode_info *ci;
+	struct ceph_snap_realm *realm, *next;
+	struct inode *in;
+	u64 max, rvalue;
+	bool exceeded = false;
+
+	if (ceph_snap(inode) != CEPH_NOSNAP)
+		return false;
+
+	down_read(&mdsc->snap_rwsem);
+	realm = ceph_inode(inode)->i_snap_realm;
+	if (realm)
+		ceph_get_snap_realm(mdsc, realm);
+	else
+		pr_err_ratelimited("check_quota_exceeded: ino (%llx.%llx) "
+				   "null i_snap_realm\n", ceph_vinop(inode));
+	while (realm) {
+		spin_lock(&realm->inodes_with_caps_lock);
+		in = realm->inode ? igrab(realm->inode) : NULL;
+		spin_unlock(&realm->inodes_with_caps_lock);
+		if (!in)
+			break;
+
+		ci = ceph_inode(in);
+		spin_lock(&ci->i_ceph_lock);
+		if (op == QUOTA_CHECK_MAX_FILES_OP) {
+			max = ci->i_max_files;
+			rvalue = ci->i_rfiles + ci->i_rsubdirs;
+		} else {
+			max = ci->i_max_bytes;
+			rvalue = ci->i_rbytes;
+		}
+		spin_unlock(&ci->i_ceph_lock);
+		switch (op) {
+		case QUOTA_CHECK_MAX_FILES_OP:
+			exceeded = (max && (rvalue >= max));
+			break;
+		case QUOTA_CHECK_MAX_BYTES_OP:
+			exceeded = (max && (rvalue + delta > max));
+			break;
+		case QUOTA_CHECK_MAX_BYTES_APPROACHING_OP:
+			if (max) {
+				if (rvalue >= max)
+					exceeded = true;
+				else {
+					/*
+					 * when we're writing more that 1/16th
+					 * of the available space
+					 */
+					exceeded =
+						(((max - rvalue) >> 4) < delta);
+				}
+			}
+			break;
+		default:
+			/* Shouldn't happen */
+			pr_warn("Invalid quota check op (%d)\n", op);
+			exceeded = true; /* Just break the loop */
+		}
+		iput(in);
+
+		next = realm->parent;
+		if (exceeded || !next)
+			break;
+		ceph_get_snap_realm(mdsc, next);
+		ceph_put_snap_realm(mdsc, realm);
+		realm = next;
+	}
+	ceph_put_snap_realm(mdsc, realm);
+	up_read(&mdsc->snap_rwsem);
+
+	return exceeded;
+}
+
+/*
+ * ceph_quota_is_max_files_exceeded - check if we can create a new file
+ * @inode:	directory where a new file is being created
+ *
+ * This functions returns true is max_files quota allows a new file to be
+ * created.  It is necessary to walk through the snaprealm hierarchy (until the
+ * FS root) to check all realms with quotas set.
+ */
+bool ceph_quota_is_max_files_exceeded(struct inode *inode)
+{
+	if (!ceph_has_realms_with_quotas(inode))
+		return false;
+
+	WARN_ON(!S_ISDIR(inode->i_mode));
+
+	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_FILES_OP, 0);
+}
+
+/*
+ * ceph_quota_is_max_bytes_exceeded - check if we can write to a file
+ * @inode:	inode being written
+ * @newsize:	new size if write succeeds
+ *
+ * This functions returns true is max_bytes quota allows a file size to reach
+ * @newsize; it returns false otherwise.
+ */
+bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, loff_t newsize)
+{
+	loff_t size = i_size_read(inode);
+
+	if (!ceph_has_realms_with_quotas(inode))
+		return false;
+
+	/* return immediately if we're decreasing file size */
+	if (newsize <= size)
+		return false;
+
+	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_OP, (newsize - size));
+}
+
+/*
+ * ceph_quota_is_max_bytes_approaching - check if we're reaching max_bytes
+ * @inode:	inode being written
+ * @newsize:	new size if write succeeds
+ *
+ * This function returns true if the new file size @newsize will be consuming
+ * more than 1/16th of the available quota space; it returns false otherwise.
+ */
+bool ceph_quota_is_max_bytes_approaching(struct inode *inode, loff_t newsize)
+{
+	loff_t size = ceph_inode(inode)->i_reported_size;
+
+	if (!ceph_has_realms_with_quotas(inode))
+		return false;
+
+	/* return immediately if we're decreasing file size */
+	if (newsize <= size)
+		return false;
+
+	return check_quota_exceeded(inode, QUOTA_CHECK_MAX_BYTES_APPROACHING_OP,
+				    (newsize - size));
+}
+
+/*
+ * ceph_quota_update_statfs - if root has quota update statfs with quota status
+ * @fsc:	filesystem client instance
+ * @buf:	statfs to update
+ *
+ * If the mounted filesystem root has max_bytes quota set, update the filesystem
+ * statistics with the quota status.
+ *
+ * This function returns true if the stats have been updated, false otherwise.
+ */
+bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
+{
+	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_inode_info *ci;
+	struct ceph_snap_realm *realm;
+	struct inode *in;
+	u64 total = 0, used, free;
+	bool is_updated = false;
+
+	down_read(&mdsc->snap_rwsem);
+	realm = get_quota_realm(mdsc, d_inode(fsc->sb->s_root));
+	up_read(&mdsc->snap_rwsem);
+	if (!realm)
+		return false;
+
+	spin_lock(&realm->inodes_with_caps_lock);
+	in = realm->inode ? igrab(realm->inode) : NULL;
+	spin_unlock(&realm->inodes_with_caps_lock);
+	if (in) {
+		ci = ceph_inode(in);
+		spin_lock(&ci->i_ceph_lock);
+		if (ci->i_max_bytes) {
+			total = ci->i_max_bytes >> CEPH_BLOCK_SHIFT;
+			used = ci->i_rbytes >> CEPH_BLOCK_SHIFT;
+			/* It is possible for a quota to be exceeded.
+			 * Report 'zero' in that case
+			 */
+			free = total > used ? total - used : 0;
+		}
+		spin_unlock(&ci->i_ceph_lock);
+		if (total) {
+			buf->f_blocks = total;
+			buf->f_bfree = free;
+			buf->f_bavail = free;
+			is_updated = true;
+		}
+		iput(in);
+	}
+	ceph_put_snap_realm(mdsc, realm);
+
+	return is_updated;
+}
+
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 07cf95e6413d..041c27ea8de1 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -931,6 +931,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
 			list_add(&ci->i_snap_realm_item,
 				 &realm->inodes_with_caps);
 			ci->i_snap_realm = realm;
+			if (realm->ino == ci->i_vino.ino)
+                                realm->inode = inode;
 			spin_unlock(&realm->inodes_with_caps_lock);
 
 			spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index fb2bc9c15a23..b33082e6878f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -76,9 +76,18 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 */
 	buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
 	buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
-	buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
-	buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
-	buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+
+	/*
+	 * By default use root quota for stats; fallback to overall filesystem
+	 * usage if using 'noquotadf' mount option or if the root dir doesn't
+	 * have max_bytes quota set.
+	 */
+	if (ceph_test_mount_opt(fsc, NOQUOTADF) ||
+	    !ceph_quota_update_statfs(fsc, buf)) {
+		buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
+		buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+		buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
+	}
 
 	buf->f_files = le64_to_cpu(st.num_objects);
 	buf->f_ffree = -1;
@@ -151,6 +160,8 @@ enum {
 	Opt_acl,
 #endif
 	Opt_noacl,
+	Opt_quotadf,
+	Opt_noquotadf,
 };
 
 static match_table_t fsopt_tokens = {
@@ -187,6 +198,8 @@ static match_table_t fsopt_tokens = {
 	{Opt_acl, "acl"},
 #endif
 	{Opt_noacl, "noacl"},
+	{Opt_quotadf, "quotadf"},
+	{Opt_noquotadf, "noquotadf"},
 	{-1, NULL}
 };
 
@@ -314,13 +327,16 @@ static int parse_fsopt_token(char *c, void *private)
 		break;
 	case Opt_fscache:
 		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+		kfree(fsopt->fscache_uniq);
+		fsopt->fscache_uniq = NULL;
 		break;
 	case Opt_nofscache:
 		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
+		kfree(fsopt->fscache_uniq);
+		fsopt->fscache_uniq = NULL;
 		break;
 	case Opt_poolperm:
 		fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
-		printk ("pool perm");
 		break;
 	case Opt_nopoolperm:
 		fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
@@ -331,6 +347,12 @@ static int parse_fsopt_token(char *c, void *private)
 	case Opt_norequire_active_mds:
 		fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
 		break;
+	case Opt_quotadf:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_NOQUOTADF;
+		break;
+	case Opt_noquotadf:
+		fsopt->flags |= CEPH_MOUNT_OPT_NOQUOTADF;
+		break;
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	case Opt_acl:
 		fsopt->sb_flags |= SB_POSIXACL;
@@ -513,13 +535,12 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 	if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
 		seq_puts(m, ",nodcache");
 	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) {
-		if (fsopt->fscache_uniq)
-			seq_printf(m, ",fsc=%s", fsopt->fscache_uniq);
-		else
-			seq_puts(m, ",fsc");
+		seq_show_option(m, "fsc", fsopt->fscache_uniq);
 	}
 	if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
 		seq_puts(m, ",nopoolperm");
+	if (fsopt->flags & CEPH_MOUNT_OPT_NOQUOTADF)
+		seq_puts(m, ",noquotadf");
 
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 	if (fsopt->sb_flags & SB_POSIXACL)
@@ -529,7 +550,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 #endif
 
 	if (fsopt->mds_namespace)
-		seq_printf(m, ",mds_namespace=%s", fsopt->mds_namespace);
+		seq_show_option(m, "mds_namespace", fsopt->mds_namespace);
 	if (fsopt->wsize)
 		seq_printf(m, ",wsize=%d", fsopt->wsize);
 	if (fsopt->rsize != CEPH_MAX_READ_SIZE)
@@ -679,6 +700,7 @@ struct kmem_cache *ceph_cap_cachep;
 struct kmem_cache *ceph_cap_flush_cachep;
 struct kmem_cache *ceph_dentry_cachep;
 struct kmem_cache *ceph_file_cachep;
+struct kmem_cache *ceph_dir_file_cachep;
 
 static void ceph_inode_init_once(void *foo)
 {
@@ -698,8 +720,7 @@ static int __init init_caches(void)
 	if (!ceph_inode_cachep)
 		return -ENOMEM;
 
-	ceph_cap_cachep = KMEM_CACHE(ceph_cap,
-				     SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
+	ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD);
 	if (!ceph_cap_cachep)
 		goto bad_cap;
 	ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
@@ -716,6 +737,10 @@ static int __init init_caches(void)
 	if (!ceph_file_cachep)
 		goto bad_file;
 
+	ceph_dir_file_cachep = KMEM_CACHE(ceph_dir_file_info, SLAB_MEM_SPREAD);
+	if (!ceph_dir_file_cachep)
+		goto bad_dir_file;
+
 	error = ceph_fscache_register();
 	if (error)
 		goto bad_fscache;
@@ -723,6 +748,8 @@ static int __init init_caches(void)
 	return 0;
 
 bad_fscache:
+	kmem_cache_destroy(ceph_dir_file_cachep);
+bad_dir_file:
 	kmem_cache_destroy(ceph_file_cachep);
 bad_file:
 	kmem_cache_destroy(ceph_dentry_cachep);
@@ -748,6 +775,7 @@ static void destroy_caches(void)
 	kmem_cache_destroy(ceph_cap_flush_cachep);
 	kmem_cache_destroy(ceph_dentry_cachep);
 	kmem_cache_destroy(ceph_file_cachep);
+	kmem_cache_destroy(ceph_dir_file_cachep);
 
 	ceph_fscache_unregister();
 }
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1c2086e0fec2..a7077a0c989f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -39,6 +39,7 @@
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 #define CEPH_MOUNT_OPT_NOPOOLPERM      (1<<11) /* no pool permission check */
 #define CEPH_MOUNT_OPT_MOUNTWAIT       (1<<12) /* mount waits if no mds is up */
+#define CEPH_MOUNT_OPT_NOQUOTADF       (1<<13) /* no root dir quota in statfs */
 
 #define CEPH_MOUNT_OPT_DEFAULT    CEPH_MOUNT_OPT_DCACHE
 
@@ -310,6 +311,9 @@ struct ceph_inode_info {
 	u64 i_rbytes, i_rfiles, i_rsubdirs;
 	u64 i_files, i_subdirs;
 
+	/* quotas */
+	u64 i_max_bytes, i_max_files;
+
 	struct rb_root i_fragtree;
 	int i_fragtree_nsplits;
 	struct mutex i_fragtree_mutex;
@@ -671,6 +675,10 @@ struct ceph_file_info {
 
 	spinlock_t rw_contexts_lock;
 	struct list_head rw_contexts;
+};
+
+struct ceph_dir_file_info {
+	struct ceph_file_info file_info;
 
 	/* readdir: position within the dir */
 	u32 frag;
@@ -748,6 +756,7 @@ struct ceph_readdir_cache_control {
  */
 struct ceph_snap_realm {
 	u64 ino;
+	struct inode *inode;
 	atomic_t nref;
 	struct rb_node node;
 
@@ -1066,4 +1075,37 @@ extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
 extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
 extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
 
+/* quota.c */
+static inline bool __ceph_has_any_quota(struct ceph_inode_info *ci)
+{
+	return ci->i_max_files || ci->i_max_bytes;
+}
+
+extern void ceph_adjust_quota_realms_count(struct inode *inode, bool inc);
+
+static inline void __ceph_update_quota(struct ceph_inode_info *ci,
+				       u64 max_bytes, u64 max_files)
+{
+	bool had_quota, has_quota;
+	had_quota = __ceph_has_any_quota(ci);
+	ci->i_max_bytes = max_bytes;
+	ci->i_max_files = max_files;
+	has_quota = __ceph_has_any_quota(ci);
+
+	if (had_quota != has_quota)
+		ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota);
+}
+
+extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
+			      struct ceph_mds_session *session,
+			      struct ceph_msg *msg);
+extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
+extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
+extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
+					     loff_t newlen);
+extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
+						loff_t newlen);
+extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
+				     struct kstatfs *buf);
+
 #endif /* _FS_CEPH_SUPER_H */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index e1c4e0b12b4c..7e72348639e4 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -224,6 +224,31 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 			(long)ci->i_rctime.tv_nsec);
 }
 
+/* quotas */
+
+static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
+{
+	return (ci->i_max_files || ci->i_max_bytes);
+}
+
+static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
+				  size_t size)
+{
+	return snprintf(val, size, "max_bytes=%llu max_files=%llu",
+			ci->i_max_bytes, ci->i_max_files);
+}
+
+static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
+					    char *val, size_t size)
+{
+	return snprintf(val, size, "%llu", ci->i_max_bytes);
+}
+
+static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
+					    char *val, size_t size)
+{
+	return snprintf(val, size, "%llu", ci->i_max_files);
+}
 
 #define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2)	\
@@ -247,6 +272,15 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
 		.hidden = true,			\
 		.exists_cb = ceph_vxattrcb_layout_exists,	\
 	}
+#define XATTR_QUOTA_FIELD(_type, _name)					\
+	{								\
+		.name = CEPH_XATTR_NAME(_type, _name),			\
+		.name_size = sizeof(CEPH_XATTR_NAME(_type, _name)),	\
+		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name,	\
+		.readonly = false,					\
+		.hidden = true,						\
+		.exists_cb = ceph_vxattrcb_quota_exists,		\
+	}
 
 static struct ceph_vxattr ceph_dir_vxattrs[] = {
 	{
@@ -270,6 +304,16 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
 	XATTR_NAME_CEPH(dir, rsubdirs),
 	XATTR_NAME_CEPH(dir, rbytes),
 	XATTR_NAME_CEPH(dir, rctime),
+	{
+		.name = "ceph.quota",
+		.name_size = sizeof("ceph.quota"),
+		.getxattr_cb = ceph_vxattrcb_quota,
+		.readonly = false,
+		.hidden = true,
+		.exists_cb = ceph_vxattrcb_quota_exists,
+	},
+	XATTR_QUOTA_FIELD(quota, max_bytes),
+	XATTR_QUOTA_FIELD(quota, max_files),
 	{ .name = NULL, 0 }	/* Required table terminator */
 };
 static size_t ceph_dir_vxattrs_name_size;	/* total size of all names */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7cee97b93a61..4bcd4e838b47 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1987,11 +1987,10 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
 	for (i = 0; i < found_pages; i++) {
 		page = wdata->pages[i];
 		/*
-		 * At this point we hold neither mapping->tree_lock nor
-		 * lock on the page itself: the page may be truncated or
-		 * invalidated (changing page->mapping to NULL), or even
-		 * swizzled back from swapper_space to tmpfs file
-		 * mapping
+		 * At this point we hold neither the i_pages lock nor the
+		 * page lock: the page may be truncated or invalidated
+		 * (changing page->mapping to NULL), or even swizzled
+		 * back from swapper_space to tmpfs file mapping
 		 */
 
 		if (nr_pages == 0)
diff --git a/fs/dax.c b/fs/dax.c
index 0276df90e86c..aaec72ded1b6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -73,16 +73,15 @@ fs_initcall(init_dax_wait_table);
 #define RADIX_DAX_ZERO_PAGE	(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
 #define RADIX_DAX_EMPTY		(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
 
-static unsigned long dax_radix_sector(void *entry)
+static unsigned long dax_radix_pfn(void *entry)
 {
 	return (unsigned long)entry >> RADIX_DAX_SHIFT;
 }
 
-static void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
 {
 	return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
-			((unsigned long)sector << RADIX_DAX_SHIFT) |
-			RADIX_DAX_ENTRY_LOCK);
+			(pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
 }
 
 static unsigned int dax_radix_order(void *entry)
@@ -159,11 +158,9 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, unsigned int mo
 }
 
 /*
- * We do not necessarily hold the mapping->tree_lock when we call this
- * function so it is possible that 'entry' is no longer a valid item in the
- * radix tree.  This is okay because all we really need to do is to find the
- * correct waitqueue where tasks might be waiting for that old 'entry' and
- * wake them.
+ * @entry may no longer be the entry at the index in the mapping.
+ * The important information it's conveying is whether the entry at
+ * this index used to be a PMD entry.
  */
 static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 		pgoff_t index, void *entry, bool wake_all)
@@ -175,7 +172,7 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 
 	/*
 	 * Checking for locked entry and prepare_to_wait_exclusive() happens
-	 * under mapping->tree_lock, ditto for entry handling in our callers.
+	 * under the i_pages lock, ditto for entry handling in our callers.
 	 * So at this point all tasks that could have seen our entry locked
 	 * must be in the waitqueue and the following check will see them.
 	 */
@@ -184,41 +181,39 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
 }
 
 /*
- * Check whether the given slot is locked. The function must be called with
- * mapping->tree_lock held
+ * Check whether the given slot is locked.  Must be called with the i_pages
+ * lock held.
  */
 static inline int slot_locked(struct address_space *mapping, void **slot)
 {
 	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 	return entry & RADIX_DAX_ENTRY_LOCK;
 }
 
 /*
- * Mark the given slot is locked. The function must be called with
- * mapping->tree_lock held
+ * Mark the given slot as locked.  Must be called with the i_pages lock held.
  */
 static inline void *lock_slot(struct address_space *mapping, void **slot)
 {
 	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 
 	entry |= RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
 	return (void *)entry;
 }
 
 /*
- * Mark the given slot is unlocked. The function must be called with
- * mapping->tree_lock held
+ * Mark the given slot as unlocked.  Must be called with the i_pages lock held.
  */
 static inline void *unlock_slot(struct address_space *mapping, void **slot)
 {
 	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
 
 	entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
+	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
 	return (void *)entry;
 }
 
@@ -229,7 +224,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
  * put_locked_mapping_entry() when he locked the entry and now wants to
  * unlock it.
  *
- * The function must be called with mapping->tree_lock held.
+ * Must be called with the i_pages lock held.
  */
 static void *get_unlocked_mapping_entry(struct address_space *mapping,
 					pgoff_t index, void ***slotp)
@@ -242,7 +237,7 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 	ewait.wait.func = wake_exceptional_entry_func;
 
 	for (;;) {
-		entry = __radix_tree_lookup(&mapping->page_tree, index, NULL,
+		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
 					  &slot);
 		if (!entry ||
 		    WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
@@ -255,10 +250,10 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
 		wq = dax_entry_waitqueue(mapping, index, entry, &ewait.key);
 		prepare_to_wait_exclusive(wq, &ewait.wait,
 					  TASK_UNINTERRUPTIBLE);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		schedule();
 		finish_wait(wq, &ewait.wait);
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 	}
 }
 
@@ -267,15 +262,15 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
 {
 	void *entry, **slot;
 
-	spin_lock_irq(&mapping->tree_lock);
-	entry = __radix_tree_lookup(&mapping->page_tree, index, NULL, &slot);
+	xa_lock_irq(&mapping->i_pages);
+	entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
 	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
 			 !slot_locked(mapping, slot))) {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return;
 	}
 	unlock_slot(mapping, slot);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
@@ -299,6 +294,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
+static unsigned long dax_entry_size(void *entry)
+{
+	if (dax_is_zero_entry(entry))
+		return 0;
+	else if (dax_is_empty_entry(entry))
+		return 0;
+	else if (dax_is_pmd_entry(entry))
+		return PMD_SIZE;
+	else
+		return PAGE_SIZE;
+}
+
+static unsigned long dax_radix_end_pfn(void *entry)
+{
+	return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
+}
+
+/*
+ * Iterate through all mapped pfns represented by an entry, i.e. skip
+ * 'empty' and 'zero' entries.
+ */
+#define for_each_mapped_pfn(entry, pfn) \
+	for (pfn = dax_radix_pfn(entry); \
+			pfn < dax_radix_end_pfn(entry); pfn++)
+
+static void dax_associate_entry(void *entry, struct address_space *mapping)
+{
+	unsigned long pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_mapped_pfn(entry, pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(page->mapping);
+		page->mapping = mapping;
+	}
+}
+
+static void dax_disassociate_entry(void *entry, struct address_space *mapping,
+		bool trunc)
+{
+	unsigned long pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_mapped_pfn(entry, pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+		WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+		page->mapping = NULL;
+	}
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -332,7 +384,7 @@ static void *grab_mapping_entry(struct address_space *mapping, pgoff_t index,
 	void *entry, **slot;
 
 restart:
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	entry = get_unlocked_mapping_entry(mapping, index, &slot);
 
 	if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
@@ -364,12 +416,12 @@ restart:
 		if (pmd_downgrade) {
 			/*
 			 * Make sure 'entry' remains valid while we drop
-			 * mapping->tree_lock.
+			 * the i_pages lock.
 			 */
 			entry = lock_slot(mapping, slot);
 		}
 
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		/*
 		 * Besides huge zero pages the only other thing that gets
 		 * downgraded are empty entries which don't need to be
@@ -386,26 +438,27 @@ restart:
 				put_locked_mapping_entry(mapping, index);
 			return ERR_PTR(err);
 		}
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 
 		if (!entry) {
 			/*
-			 * We needed to drop the page_tree lock while calling
+			 * We needed to drop the i_pages lock while calling
 			 * radix_tree_preload() and we didn't have an entry to
 			 * lock.  See if another thread inserted an entry at
 			 * our index during this time.
 			 */
-			entry = __radix_tree_lookup(&mapping->page_tree, index,
+			entry = __radix_tree_lookup(&mapping->i_pages, index,
 					NULL, &slot);
 			if (entry) {
 				radix_tree_preload_end();
-				spin_unlock_irq(&mapping->tree_lock);
+				xa_unlock_irq(&mapping->i_pages);
 				goto restart;
 			}
 		}
 
 		if (pmd_downgrade) {
-			radix_tree_delete(&mapping->page_tree, index);
+			dax_disassociate_entry(entry, mapping, false);
+			radix_tree_delete(&mapping->i_pages, index);
 			mapping->nrexceptional--;
 			dax_wake_mapping_entry_waiter(mapping, index, entry,
 					true);
@@ -413,11 +466,11 @@ restart:
 
 		entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
 
-		err = __radix_tree_insert(&mapping->page_tree, index,
+		err = __radix_tree_insert(&mapping->i_pages, index,
 				dax_radix_order(entry), entry);
 		radix_tree_preload_end();
 		if (err) {
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			/*
 			 * Our insertion of a DAX entry failed, most likely
 			 * because we were inserting a PMD entry and it
@@ -430,12 +483,12 @@ restart:
 		}
 		/* Good, we have inserted empty locked entry into the tree. */
 		mapping->nrexceptional++;
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return entry;
 	}
 	entry = lock_slot(mapping, slot);
  out_unlock:
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	return entry;
 }
 
@@ -444,22 +497,23 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 {
 	int ret = 0;
 	void *entry;
-	struct radix_tree_root *page_tree = &mapping->page_tree;
+	struct radix_tree_root *pages = &mapping->i_pages;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(pages);
 	entry = get_unlocked_mapping_entry(mapping, index, NULL);
 	if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
 		goto out;
 	if (!trunc &&
-	    (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
-	     radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
+	    (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
+	     radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE)))
 		goto out;
-	radix_tree_delete(page_tree, index);
+	dax_disassociate_entry(entry, mapping, trunc);
+	radix_tree_delete(pages, index);
 	mapping->nrexceptional--;
 	ret = 1;
 out:
 	put_unlocked_mapping_entry(mapping, index, entry);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(pages);
 	return ret;
 }
 /*
@@ -526,12 +580,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
  */
 static void *dax_insert_mapping_entry(struct address_space *mapping,
 				      struct vm_fault *vmf,
-				      void *entry, sector_t sector,
+				      void *entry, pfn_t pfn_t,
 				      unsigned long flags, bool dirty)
 {
-	struct radix_tree_root *page_tree = &mapping->page_tree;
-	void *new_entry;
+	struct radix_tree_root *pages = &mapping->i_pages;
+	unsigned long pfn = pfn_t_to_pfn(pfn_t);
 	pgoff_t index = vmf->pgoff;
+	void *new_entry;
 
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -545,8 +600,12 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 			unmap_mapping_pages(mapping, vmf->pgoff, 1, false);
 	}
 
-	spin_lock_irq(&mapping->tree_lock);
-	new_entry = dax_radix_locked_entry(sector, flags);
+	xa_lock_irq(pages);
+	new_entry = dax_radix_locked_entry(pfn, flags);
+	if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+		dax_disassociate_entry(entry, mapping, false);
+		dax_associate_entry(new_entry, mapping);
+	}
 
 	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		/*
@@ -561,17 +620,17 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 		void **slot;
 		void *ret;
 
-		ret = __radix_tree_lookup(page_tree, index, &node, &slot);
+		ret = __radix_tree_lookup(pages, index, &node, &slot);
 		WARN_ON_ONCE(ret != entry);
-		__radix_tree_replace(page_tree, node, slot,
+		__radix_tree_replace(pages, node, slot,
 				     new_entry, NULL);
 		entry = new_entry;
 	}
 
 	if (dirty)
-		radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+		radix_tree_tag_set(pages, index, PAGECACHE_TAG_DIRTY);
 
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(pages);
 	return entry;
 }
 
@@ -657,17 +716,14 @@ unlock_pte:
 	i_mmap_unlock_read(mapping);
 }
 
-static int dax_writeback_one(struct block_device *bdev,
-		struct dax_device *dax_dev, struct address_space *mapping,
-		pgoff_t index, void *entry)
+static int dax_writeback_one(struct dax_device *dax_dev,
+		struct address_space *mapping, pgoff_t index, void *entry)
 {
-	struct radix_tree_root *page_tree = &mapping->page_tree;
-	void *entry2, **slot, *kaddr;
-	long ret = 0, id;
-	sector_t sector;
-	pgoff_t pgoff;
+	struct radix_tree_root *pages = &mapping->i_pages;
+	void *entry2, **slot;
+	unsigned long pfn;
+	long ret = 0;
 	size_t size;
-	pfn_t pfn;
 
 	/*
 	 * A page got tagged dirty in DAX mapping? Something is seriously
@@ -676,17 +732,17 @@ static int dax_writeback_one(struct block_device *bdev,
 	if (WARN_ON(!radix_tree_exceptional_entry(entry)))
 		return -EIO;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(pages);
 	entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
 	/* Entry got punched out / reallocated? */
 	if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
 		goto put_unlocked;
 	/*
 	 * Entry got reallocated elsewhere? No need to writeback. We have to
-	 * compare sectors as we must not bail out due to difference in lockbit
+	 * compare pfns as we must not bail out due to difference in lockbit
 	 * or entry type.
 	 */
-	if (dax_radix_sector(entry2) != dax_radix_sector(entry))
+	if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
 		goto put_unlocked;
 	if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
 				dax_is_zero_entry(entry))) {
@@ -695,7 +751,7 @@ static int dax_writeback_one(struct block_device *bdev,
 	}
 
 	/* Another fsync thread may have already written back this entry */
-	if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+	if (!radix_tree_tag_get(pages, index, PAGECACHE_TAG_TOWRITE))
 		goto put_unlocked;
 	/* Lock the entry to serialize with page faults */
 	entry = lock_slot(mapping, slot);
@@ -703,60 +759,40 @@ static int dax_writeback_one(struct block_device *bdev,
 	 * We can clear the tag now but we have to be careful so that concurrent
 	 * dax_writeback_one() calls for the same index cannot finish before we
 	 * actually flush the caches. This is achieved as the calls will look
-	 * at the entry only under tree_lock and once they do that they will
-	 * see the entry locked and wait for it to unlock.
+	 * at the entry only under the i_pages lock and once they do that
+	 * they will see the entry locked and wait for it to unlock.
 	 */
-	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
-	spin_unlock_irq(&mapping->tree_lock);
+	radix_tree_tag_clear(pages, index, PAGECACHE_TAG_TOWRITE);
+	xa_unlock_irq(pages);
 
 	/*
 	 * Even if dax_writeback_mapping_range() was given a wbc->range_start
 	 * in the middle of a PMD, the 'index' we are given will be aligned to
-	 * the start index of the PMD, as will the sector we pull from
-	 * 'entry'.  This allows us to flush for PMD_SIZE and not have to
-	 * worry about partial PMD writebacks.
+	 * the start index of the PMD, as will the pfn we pull from 'entry'.
+	 * This allows us to flush for PMD_SIZE and not have to worry about
+	 * partial PMD writebacks.
 	 */
-	sector = dax_radix_sector(entry);
+	pfn = dax_radix_pfn(entry);
 	size = PAGE_SIZE << dax_radix_order(entry);
 
-	id = dax_read_lock();
-	ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
-	if (ret)
-		goto dax_unlock;
-
-	/*
-	 * dax_direct_access() may sleep, so cannot hold tree_lock over
-	 * its invocation.
-	 */
-	ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn);
-	if (ret < 0)
-		goto dax_unlock;
-
-	if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) {
-		ret = -EIO;
-		goto dax_unlock;
-	}
-
-	dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
-	dax_flush(dax_dev, kaddr, size);
+	dax_mapping_entry_mkclean(mapping, index, pfn);
+	dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
 	/*
 	 * After we have flushed the cache, we can clear the dirty tag. There
 	 * cannot be new dirty data in the pfn after the flush has completed as
 	 * the pfn mappings are writeprotected and fault waits for mapping
 	 * entry lock.
 	 */
-	spin_lock_irq(&mapping->tree_lock);
-	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_lock_irq(pages);
+	radix_tree_tag_clear(pages, index, PAGECACHE_TAG_DIRTY);
+	xa_unlock_irq(pages);
 	trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
- dax_unlock:
-	dax_read_unlock(id);
 	put_locked_mapping_entry(mapping, index);
 	return ret;
 
  put_unlocked:
 	put_unlocked_mapping_entry(mapping, index, entry2);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(pages);
 	return ret;
 }
 
@@ -808,8 +844,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 				break;
 			}
 
-			ret = dax_writeback_one(bdev, dax_dev, mapping,
-					indices[i], pvec.pages[i]);
+			ret = dax_writeback_one(dax_dev, mapping, indices[i],
+					pvec.pages[i]);
 			if (ret < 0) {
 				mapping_set_error(mapping, ret);
 				goto out;
@@ -877,6 +913,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	int ret = VM_FAULT_NOPAGE;
 	struct page *zero_page;
 	void *entry2;
+	pfn_t pfn;
 
 	zero_page = ZERO_PAGE(0);
 	if (unlikely(!zero_page)) {
@@ -884,14 +921,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 		goto out;
 	}
 
-	entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+	pfn = page_to_pfn_t(zero_page);
+	entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 			RADIX_DAX_ZERO_PAGE, false);
 	if (IS_ERR(entry2)) {
 		ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
 
-	vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page));
+	vm_insert_mixed(vmf->vma, vaddr, pfn);
 out:
 	trace_dax_load_hole(inode, vmf, ret);
 	return ret;
@@ -1200,8 +1238,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 		if (error < 0)
 			goto error_finish_iomap;
 
-		entry = dax_insert_mapping_entry(mapping, vmf, entry,
-						 dax_iomap_sector(&iomap, pos),
+		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 						 0, write && !sync);
 		if (IS_ERR(entry)) {
 			error = PTR_ERR(entry);
@@ -1280,13 +1317,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
 	void *ret = NULL;
 	spinlock_t *ptl;
 	pmd_t pmd_entry;
+	pfn_t pfn;
 
 	zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
 
 	if (unlikely(!zero_page))
 		goto fallback;
 
-	ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+	pfn = page_to_pfn_t(zero_page);
+	ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 			RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
 	if (IS_ERR(ret))
 		goto fallback;
@@ -1409,8 +1448,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 		if (error < 0)
 			goto finish_iomap;
 
-		entry = dax_insert_mapping_entry(mapping, vmf, entry,
-						dax_iomap_sector(&iomap, pos),
+		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 						RADIX_DAX_PMD, write && !sync);
 		if (IS_ERR(entry))
 			goto finish_iomap;
@@ -1524,21 +1562,21 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
 	pgoff_t index = vmf->pgoff;
 	int vmf_ret, error;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	entry = get_unlocked_mapping_entry(mapping, index, &slot);
 	/* Did we race with someone splitting entry or so? */
 	if (!entry ||
 	    (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) ||
 	    (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) {
 		put_unlocked_mapping_entry(mapping, index, entry);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
 						      VM_FAULT_NOPAGE);
 		return VM_FAULT_NOPAGE;
 	}
-	radix_tree_tag_set(&mapping->page_tree, index, PAGECACHE_TAG_DIRTY);
+	radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY);
 	entry = lock_slot(mapping, slot);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	switch (pe_size) {
 	case PE_SIZE_PTE:
 		error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
diff --git a/fs/dcache.c b/fs/dcache.c
index 593079176123..86d2de63461e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -257,11 +257,25 @@ static void __d_free(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void __d_free_external_name(struct rcu_head *head)
+{
+	struct external_name *name = container_of(head, struct external_name,
+						  u.head);
+
+	mod_node_page_state(page_pgdat(virt_to_page(name)),
+			    NR_INDIRECTLY_RECLAIMABLE_BYTES,
+			    -ksize(name));
+
+	kfree(name);
+}
+
 static void __d_free_external(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-	kfree(external_name(dentry));
-	kmem_cache_free(dentry_cache, dentry); 
+
+	__d_free_external_name(&external_name(dentry)->u.head);
+
+	kmem_cache_free(dentry_cache, dentry);
 }
 
 static inline int dname_external(const struct dentry *dentry)
@@ -291,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name)
 		struct external_name *p;
 		p = container_of(name->name, struct external_name, name[0]);
 		if (unlikely(atomic_dec_and_test(&p->u.count)))
-			kfree_rcu(p, u.head);
+			call_rcu(&p->u.head, __d_free_external_name);
 	}
 }
 EXPORT_SYMBOL(release_dentry_name_snapshot);
@@ -1038,6 +1052,8 @@ static void shrink_dentry_list(struct list_head *list)
 	while (!list_empty(list)) {
 		struct dentry *dentry, *parent;
 
+		cond_resched();
+
 		dentry = list_entry(list->prev, struct dentry, d_lru);
 		spin_lock(&dentry->d_lock);
 		rcu_read_lock();
@@ -1191,7 +1207,6 @@ void shrink_dcache_sb(struct super_block *sb)
 
 		this_cpu_sub(nr_dentry_unused, freed);
 		shrink_dentry_list(&dispose);
-		cond_resched();
 	} while (list_lru_count(&sb->s_dentry_lru) > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
@@ -1473,7 +1488,6 @@ void shrink_dcache_parent(struct dentry *parent)
 			break;
 
 		shrink_dentry_list(&data.dispose);
-		cond_resched();
 	}
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
@@ -1600,7 +1614,6 @@ void d_invalidate(struct dentry *dentry)
 			detach_mounts(data.mountpoint);
 			dput(data.mountpoint);
 		}
-		cond_resched();
 	}
 }
 EXPORT_SYMBOL(d_invalidate);
@@ -1617,6 +1630,7 @@ EXPORT_SYMBOL(d_invalidate);
  
 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 {
+	struct external_name *ext = NULL;
 	struct dentry *dentry;
 	char *dname;
 	int err;
@@ -1637,14 +1651,14 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 		dname = dentry->d_iname;
 	} else if (name->len > DNAME_INLINE_LEN-1) {
 		size_t size = offsetof(struct external_name, name[1]);
-		struct external_name *p = kmalloc(size + name->len,
-						  GFP_KERNEL_ACCOUNT);
-		if (!p) {
+
+		ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT);
+		if (!ext) {
 			kmem_cache_free(dentry_cache, dentry); 
 			return NULL;
 		}
-		atomic_set(&p->u.count, 1);
-		dname = p->name;
+		atomic_set(&ext->u.count, 1);
+		dname = ext->name;
 	} else  {
 		dname = dentry->d_iname;
 	}	
@@ -1683,6 +1697,12 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 		}
 	}
 
+	if (unlikely(ext)) {
+		pg_data_t *pgdat = page_pgdat(virt_to_page(ext));
+		mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES,
+				    ksize(ext));
+	}
+
 	this_cpu_inc(nr_dentry);
 
 	return dentry;
@@ -2770,7 +2790,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
 		dentry->d_name.hash_len = target->d_name.hash_len;
 	}
 	if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
-		kfree_rcu(old_name, u.head);
+		call_rcu(&old_name->u.head, __d_free_external_name);
 }
 
 /*
diff --git a/fs/exec.c b/fs/exec.c
index a919a827d181..183059c427b9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -257,7 +257,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		 *    to work from.
 		 */
 		limit = _STK_LIM / 4 * 3;
-		limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+		limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
 		if (size > limit)
 			goto fail;
 	}
@@ -411,6 +411,11 @@ static int bprm_mm_init(struct linux_binprm *bprm)
 	if (!mm)
 		goto err;
 
+	/* Save current stack limit for all calculations made during exec. */
+	task_lock(current->group_leader);
+	bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
+	task_unlock(current->group_leader);
+
 	err = __bprm_mm_init(bprm);
 	if (err)
 		goto err;
@@ -697,7 +702,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 
 #ifdef CONFIG_STACK_GROWSUP
 	/* Limit stack size */
-	stack_base = rlimit_max(RLIMIT_STACK);
+	stack_base = bprm->rlim_stack.rlim_max;
 	if (stack_base > STACK_SIZE_MAX)
 		stack_base = STACK_SIZE_MAX;
 
@@ -770,7 +775,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
 	 * Align this down to a page boundary as expand_stack
 	 * will align it up.
 	 */
-	rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
+	rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
 #ifdef CONFIG_STACK_GROWSUP
 	if (stack_size + stack_expand > rlim_stack)
 		stack_base = vma->vm_start + rlim_stack;
@@ -1341,11 +1346,11 @@ void setup_new_exec(struct linux_binprm * bprm)
 		 * RLIMIT_STACK, but after the point of no return to avoid
 		 * needing to clean up the change on failure.
 		 */
-		if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
-			current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
+		if (bprm->rlim_stack.rlim_cur > _STK_LIM)
+			bprm->rlim_stack.rlim_cur = _STK_LIM;
 	}
 
-	arch_pick_mmap_layout(current->mm);
+	arch_pick_mmap_layout(current->mm, &bprm->rlim_stack);
 
 	current->sas_ss_sp = current->sas_ss_size = 0;
 
@@ -1378,6 +1383,16 @@ void setup_new_exec(struct linux_binprm * bprm)
 }
 EXPORT_SYMBOL(setup_new_exec);
 
+/* Runs immediately before start_thread() takes over. */
+void finalize_exec(struct linux_binprm *bprm)
+{
+	/* Store any stack rlimit changes before starting thread. */
+	task_lock(current->group_leader);
+	current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
+	task_unlock(current->group_leader);
+}
+EXPORT_SYMBOL(finalize_exec);
+
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
  * install_exec_creds() commits the new creds and drops the lock.
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 032295e1d386..cc40802ddfa8 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct inode_operations ext2_file_inode_operations;
 extern const struct file_operations ext2_file_operations;
 
 /* inode.c */
+extern void ext2_set_file_ops(struct inode *inode);
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
 extern const struct iomap_ops ext2_iomap_ops;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 9b2ac55ac34f..1e01fabef130 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -940,9 +940,6 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (WARN_ON_ONCE(IS_DAX(inode)))
-		return -EIO;
-
 	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
@@ -952,17 +949,16 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-#ifdef CONFIG_FS_DAX
-	if (dax_mapping(mapping)) {
-		return dax_writeback_mapping_range(mapping,
-						   mapping->host->i_sb->s_bdev,
-						   wbc);
-	}
-#endif
-
 	return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
+static int
+ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+	return dax_writeback_mapping_range(mapping,
+			mapping->host->i_sb->s_bdev, wbc);
+}
+
 const struct address_space_operations ext2_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
@@ -990,6 +986,13 @@ const struct address_space_operations ext2_nobh_aops = {
 	.error_remove_page	= generic_error_remove_page,
 };
 
+static const struct address_space_operations ext2_dax_aops = {
+	.writepages		= ext2_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
+
 /*
  * Probably it should be a library function... search for first non-zero word
  * or memcmp with zero_page, whatever is better for particular architecture.
@@ -1388,6 +1391,18 @@ void ext2_set_inode_flags(struct inode *inode)
 		inode->i_flags |= S_DAX;
 }
 
+void ext2_set_file_ops(struct inode *inode)
+{
+	inode->i_op = &ext2_file_inode_operations;
+	inode->i_fop = &ext2_file_operations;
+	if (IS_DAX(inode))
+		inode->i_mapping->a_ops = &ext2_dax_aops;
+	else if (test_opt(inode->i_sb, NOBH))
+		inode->i_mapping->a_ops = &ext2_nobh_aops;
+	else
+		inode->i_mapping->a_ops = &ext2_aops;
+}
+
 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 {
 	struct ext2_inode_info *ei;
@@ -1480,14 +1495,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 		ei->i_data[n] = raw_inode->i_block[n];
 
 	if (S_ISREG(inode->i_mode)) {
-		inode->i_op = &ext2_file_inode_operations;
-		if (test_opt(inode->i_sb, NOBH)) {
-			inode->i_mapping->a_ops = &ext2_nobh_aops;
-			inode->i_fop = &ext2_file_operations;
-		} else {
-			inode->i_mapping->a_ops = &ext2_aops;
-			inode->i_fop = &ext2_file_operations;
-		}
+		ext2_set_file_ops(inode);
 	} else if (S_ISDIR(inode->i_mode)) {
 		inode->i_op = &ext2_dir_inode_operations;
 		inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e078075dc66f..55f7caadb093 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -107,14 +107,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	inode->i_op = &ext2_file_inode_operations;
-	if (test_opt(inode->i_sb, NOBH)) {
-		inode->i_mapping->a_ops = &ext2_nobh_aops;
-		inode->i_fop = &ext2_file_operations;
-	} else {
-		inode->i_mapping->a_ops = &ext2_aops;
-		inode->i_fop = &ext2_file_operations;
-	}
+	ext2_set_file_ops(inode);
 	mark_inode_dirty(inode);
 	return ext2_add_nondir(dentry, inode);
 }
@@ -125,14 +118,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	inode->i_op = &ext2_file_inode_operations;
-	if (test_opt(inode->i_sb, NOBH)) {
-		inode->i_mapping->a_ops = &ext2_nobh_aops;
-		inode->i_fop = &ext2_file_operations;
-	} else {
-		inode->i_mapping->a_ops = &ext2_aops;
-		inode->i_fop = &ext2_file_operations;
-	}
+	ext2_set_file_ops(inode);
 	mark_inode_dirty(inode);
 	d_tmpfile(dentry, inode);
 	unlock_new_inode(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 129205028300..1e50c5efae67 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2716,12 +2716,6 @@ static int ext4_writepages(struct address_space *mapping,
 	percpu_down_read(&sbi->s_journal_flag_rwsem);
 	trace_ext4_writepages(inode, wbc);
 
-	if (dax_mapping(mapping)) {
-		ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
-						  wbc);
-		goto out_writepages;
-	}
-
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
 	 * a transaction for special inodes like journal inode on last iput()
@@ -2942,6 +2936,27 @@ out_writepages:
 	return ret;
 }
 
+static int ext4_dax_writepages(struct address_space *mapping,
+			       struct writeback_control *wbc)
+{
+	int ret;
+	long nr_to_write = wbc->nr_to_write;
+	struct inode *inode = mapping->host;
+	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+		return -EIO;
+
+	percpu_down_read(&sbi->s_journal_flag_rwsem);
+	trace_ext4_writepages(inode, wbc);
+
+	ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
+	trace_ext4_writepages_result(inode, wbc, ret,
+				     nr_to_write - wbc->nr_to_write);
+	percpu_up_read(&sbi->s_journal_flag_rwsem);
+	return ret;
+}
+
 static int ext4_nonda_switch(struct super_block *sb)
 {
 	s64 free_clusters, dirty_clusters;
@@ -3845,10 +3860,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (ext4_has_inline_data(inode))
 		return 0;
 
-	/* DAX uses iomap path now */
-	if (WARN_ON_ONCE(IS_DAX(inode)))
-		return 0;
-
 	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 	if (iov_iter_rw(iter) == READ)
 		ret = ext4_direct_IO_read(iocb, iter);
@@ -3934,6 +3945,13 @@ static const struct address_space_operations ext4_da_aops = {
 	.error_remove_page	= generic_error_remove_page,
 };
 
+static const struct address_space_operations ext4_dax_aops = {
+	.writepages		= ext4_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
+
 void ext4_set_aops(struct inode *inode)
 {
 	switch (ext4_inode_journal_mode(inode)) {
@@ -3946,7 +3964,9 @@ void ext4_set_aops(struct inode *inode)
 	default:
 		BUG();
 	}
-	if (test_opt(inode->i_sb, DELALLOC))
+	if (IS_DAX(inode))
+		inode->i_mapping->a_ops = &ext4_dax_aops;
+	else if (test_opt(inode->i_sb, DELALLOC))
 		inode->i_mapping->a_ops = &ext4_da_aops;
 	else
 		inode->i_mapping->a_ops = &ext4_aops;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index db50686f5096..02237d4d91f5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2424,12 +2424,12 @@ void f2fs_set_page_dirty_nobuffers(struct page *page)
 	SetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	WARN_ON_ONCE(!PageUptodate(page));
 	account_page_dirtied(page, mapping);
-	radix_tree_tag_set(&mapping->page_tree,
+	radix_tree_tag_set(&mapping->i_pages,
 			page_index(page), PAGECACHE_TAG_DIRTY);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 	unlock_page_memcg(page);
 
 	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index fe661274ff10..8c9c2f31b253 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -732,10 +732,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 
 	if (bit_pos == NR_DENTRY_IN_BLOCK &&
 			!truncate_hole(dir, page->index, page->index + 1)) {
-		spin_lock_irqsave(&mapping->tree_lock, flags);
-		radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+		xa_lock_irqsave(&mapping->i_pages, flags);
+		radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 				     PAGECACHE_TAG_DIRTY);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 		clear_page_dirty_for_io(page);
 		ClearPagePrivate(page);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index bfb7a4a3a929..9327411fd93b 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1015,7 +1015,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
 	unsigned int init_segno = segno;
 	struct gc_inode_list gc_list = {
 		.ilist = LIST_HEAD_INIT(gc_list.ilist),
-		.iroot = RADIX_TREE_INIT(GFP_NOFS),
+		.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
 	};
 
 	trace_f2fs_gc_begin(sbi->sb, sync, background,
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 3b77d6421218..265da200daa8 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -226,10 +226,10 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
 	kunmap_atomic(src_addr);
 	set_page_dirty(dn.inode_page);
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
-	radix_tree_tag_clear(&mapping->page_tree, page_index(page),
+	xa_lock_irqsave(&mapping->i_pages, flags);
+	radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 			     PAGECACHE_TAG_DIRTY);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	set_inode_flag(inode, FI_APPEND_WRITE);
 	set_inode_flag(inode, FI_DATA_EXIST);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9a99243054ba..f202398e20ea 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -91,11 +91,11 @@ static void clear_node_page_dirty(struct page *page)
 	unsigned int long flags;
 
 	if (PageDirty(page)) {
-		spin_lock_irqsave(&mapping->tree_lock, flags);
-		radix_tree_tag_clear(&mapping->page_tree,
+		xa_lock_irqsave(&mapping->i_pages, flags);
+		radix_tree_tag_clear(&mapping->i_pages,
 				page_index(page),
 				PAGECACHE_TAG_DIRTY);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 		clear_page_dirty_for_io(page);
 		dec_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
@@ -1161,7 +1161,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
 	f2fs_bug_on(sbi, check_nid_range(sbi, nid));
 
 	rcu_read_lock();
-	apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
+	apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
 	rcu_read_unlock();
 	if (apage)
 		return;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1280f915079b..4b12ba70a895 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -347,9 +347,9 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	 * By the time control reaches here, RCU grace period has passed
 	 * since I_WB_SWITCH assertion and all wb stat update transactions
 	 * between unlocked_inode_to_wb_begin/end() are guaranteed to be
-	 * synchronizing against mapping->tree_lock.
+	 * synchronizing against the i_pages lock.
 	 *
-	 * Grabbing old_wb->list_lock, inode->i_lock and mapping->tree_lock
+	 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
 	 * gives us exclusion against all wb related operations on @inode
 	 * including IO list manipulations and stat updates.
 	 */
@@ -361,7 +361,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
 	}
 	spin_lock(&inode->i_lock);
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 
 	/*
 	 * Once I_FREEING is visible under i_lock, the eviction path owns
@@ -373,22 +373,22 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	/*
 	 * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
 	 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
-	 * pages actually under underwriteback.
+	 * pages actually under writeback.
 	 */
-	radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
 				   PAGECACHE_TAG_DIRTY) {
 		struct page *page = radix_tree_deref_slot_protected(slot,
-							&mapping->tree_lock);
+						&mapping->i_pages.xa_lock);
 		if (likely(page) && PageDirty(page)) {
 			dec_wb_stat(old_wb, WB_RECLAIMABLE);
 			inc_wb_stat(new_wb, WB_RECLAIMABLE);
 		}
 	}
 
-	radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, 0,
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, 0,
 				   PAGECACHE_TAG_WRITEBACK) {
 		struct page *page = radix_tree_deref_slot_protected(slot,
-							&mapping->tree_lock);
+						&mapping->i_pages.xa_lock);
 		if (likely(page)) {
 			WARN_ON_ONCE(!PageWriteback(page));
 			dec_wb_stat(old_wb, WB_WRITEBACK);
@@ -430,7 +430,7 @@ skip_switch:
 	 */
 	smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
 
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&new_wb->list_lock);
 	spin_unlock(&old_wb->list_lock);
@@ -506,8 +506,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
 
 	/*
 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
-	 * the RCU protected stat update paths to grab the mapping's
-	 * tree_lock so that stat transfer can synchronize against them.
+	 * the RCU protected stat update paths to grab the i_page
+	 * lock so that stat transfer can synchronize against them.
 	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
 	 */
 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 7dc55b93a830..97137d7ec5ee 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -832,7 +832,7 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie,
 	/* Clear pointers back to the netfs */
 	cookie->netfs_data	= NULL;
 	cookie->def		= NULL;
-	BUG_ON(cookie->stores.rnode);
+	BUG_ON(!radix_tree_empty(&cookie->stores));
 
 	if (cookie->parent) {
 		ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 1085ca12e25c..20e0d0a4dc8c 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -973,7 +973,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj
 	 * retire the object instead.
 	 */
 	if (!fscache_use_cookie(object)) {
-		ASSERT(object->cookie->stores.rnode == NULL);
+		ASSERT(radix_tree_empty(&object->cookie->stores));
 		set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
 		_leave(" [no cookie]");
 		return transit_to(KILL_OBJECT);
diff --git a/fs/inode.c b/fs/inode.c
index b153aeaa61ea..13ceb98c3bd3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,8 +348,7 @@ EXPORT_SYMBOL(inc_nlink);
 
 static void __address_space_init_once(struct address_space *mapping)
 {
-	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC | __GFP_ACCOUNT);
-	spin_lock_init(&mapping->tree_lock);
+	INIT_RADIX_TREE(&mapping->i_pages, GFP_ATOMIC | __GFP_ACCOUNT);
 	init_rwsem(&mapping->i_mmap_rwsem);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
@@ -504,14 +503,14 @@ EXPORT_SYMBOL(__remove_inode_hash);
 void clear_inode(struct inode *inode)
 {
 	/*
-	 * We have to cycle tree_lock here because reclaim can be still in the
+	 * We have to cycle the i_pages lock here because reclaim can be in the
 	 * process of removing the last page (in __delete_from_page_cache())
-	 * and we must not free mapping under it.
+	 * and we must not free the mapping under it.
 	 */
-	spin_lock_irq(&inode->i_data.tree_lock);
+	xa_lock_irq(&inode->i_data.i_pages);
 	BUG_ON(inode->i_data.nrpages);
 	BUG_ON(inode->i_data.nrexceptional);
-	spin_unlock_irq(&inode->i_data.tree_lock);
+	xa_unlock_irq(&inode->i_data.i_pages);
 	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
diff --git a/fs/libfs.c b/fs/libfs.c
index 7ff3cb904acd..0fb590d79f30 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1060,6 +1060,45 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(noop_fsync);
 
+int noop_set_page_dirty(struct page *page)
+{
+	/*
+	 * Unlike __set_page_dirty_no_writeback that handles dirty page
+	 * tracking in the page object, dax does all dirty tracking in
+	 * the inode address_space in response to mkwrite faults. In the
+	 * dax case we only need to worry about potentially dirty CPU
+	 * caches, not dirty page cache pages to write back.
+	 *
+	 * This callback is defined to prevent fallback to
+	 * __set_page_dirty_buffers() in set_page_dirty().
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(noop_set_page_dirty);
+
+void noop_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int length)
+{
+	/*
+	 * There is no page cache to invalidate in the dax case, however
+	 * we need this callback defined to prevent falling back to
+	 * block_invalidatepage() in do_invalidatepage().
+	 */
+}
+EXPORT_SYMBOL_GPL(noop_invalidatepage);
+
+ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+	/*
+	 * iomap based filesystems support direct I/O without need for
+	 * this callback. However, it still needs to be set in
+	 * inode->a_ops so that open/fcntl know that direct I/O is
+	 * generally supported.
+	 */
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(noop_direct_IO);
+
 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
 void kfree_link(void *p)
 {
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index c21e0b4454a6..dec98cab729d 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -193,9 +193,9 @@ retry:
 				       (unsigned long long)oldkey,
 				       (unsigned long long)newkey);
 
-		spin_lock_irq(&btnc->tree_lock);
-		err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
-		spin_unlock_irq(&btnc->tree_lock);
+		xa_lock_irq(&btnc->i_pages);
+		err = radix_tree_insert(&btnc->i_pages, newkey, obh->b_page);
+		xa_unlock_irq(&btnc->i_pages);
 		/*
 		 * Note: page->index will not change to newkey until
 		 * nilfs_btnode_commit_change_key() will be called.
@@ -251,11 +251,11 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
 				       (unsigned long long)newkey);
 		mark_buffer_dirty(obh);
 
-		spin_lock_irq(&btnc->tree_lock);
-		radix_tree_delete(&btnc->page_tree, oldkey);
-		radix_tree_tag_set(&btnc->page_tree, newkey,
+		xa_lock_irq(&btnc->i_pages);
+		radix_tree_delete(&btnc->i_pages, oldkey);
+		radix_tree_tag_set(&btnc->i_pages, newkey,
 				   PAGECACHE_TAG_DIRTY);
-		spin_unlock_irq(&btnc->tree_lock);
+		xa_unlock_irq(&btnc->i_pages);
 
 		opage->index = obh->b_blocknr = newkey;
 		unlock_page(opage);
@@ -283,9 +283,9 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
 		return;
 
 	if (nbh == NULL) {	/* blocksize == pagesize */
-		spin_lock_irq(&btnc->tree_lock);
-		radix_tree_delete(&btnc->page_tree, newkey);
-		spin_unlock_irq(&btnc->tree_lock);
+		xa_lock_irq(&btnc->i_pages);
+		radix_tree_delete(&btnc->i_pages, newkey);
+		xa_unlock_irq(&btnc->i_pages);
 		unlock_page(ctxt->bh->b_page);
 	} else
 		brelse(nbh);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 68241512d7c1..4cb850a6f1c2 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -331,15 +331,15 @@ repeat:
 			struct page *page2;
 
 			/* move the page to the destination cache */
-			spin_lock_irq(&smap->tree_lock);
-			page2 = radix_tree_delete(&smap->page_tree, offset);
+			xa_lock_irq(&smap->i_pages);
+			page2 = radix_tree_delete(&smap->i_pages, offset);
 			WARN_ON(page2 != page);
 
 			smap->nrpages--;
-			spin_unlock_irq(&smap->tree_lock);
+			xa_unlock_irq(&smap->i_pages);
 
-			spin_lock_irq(&dmap->tree_lock);
-			err = radix_tree_insert(&dmap->page_tree, offset, page);
+			xa_lock_irq(&dmap->i_pages);
+			err = radix_tree_insert(&dmap->i_pages, offset, page);
 			if (unlikely(err < 0)) {
 				WARN_ON(err == -EEXIST);
 				page->mapping = NULL;
@@ -348,11 +348,11 @@ repeat:
 				page->mapping = dmap;
 				dmap->nrpages++;
 				if (PageDirty(page))
-					radix_tree_tag_set(&dmap->page_tree,
+					radix_tree_tag_set(&dmap->i_pages,
 							   offset,
 							   PAGECACHE_TAG_DIRTY);
 			}
-			spin_unlock_irq(&dmap->tree_lock);
+			xa_unlock_irq(&dmap->i_pages);
 		}
 		unlock_page(page);
 	}
@@ -474,15 +474,15 @@ int __nilfs_clear_page_dirty(struct page *page)
 	struct address_space *mapping = page->mapping;
 
 	if (mapping) {
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 		if (test_bit(PG_dirty, &page->flags)) {
-			radix_tree_tag_clear(&mapping->page_tree,
+			radix_tree_tag_clear(&mapping->i_pages,
 					     page_index(page),
 					     PAGECACHE_TAG_DIRTY);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			return clear_page_dirty_for_io(page);
 		}
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return 0;
 	}
 	return TestClearPageDirty(page);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 598803576e4c..ae2c807fd719 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -141,25 +141,12 @@ static inline const char *get_task_state(struct task_struct *tsk)
 	return task_state_array[task_state_index(tsk)];
 }
 
-static inline int get_task_umask(struct task_struct *tsk)
-{
-	struct fs_struct *fs;
-	int umask = -ENOENT;
-
-	task_lock(tsk);
-	fs = tsk->fs;
-	if (fs)
-		umask = fs->umask;
-	task_unlock(tsk);
-	return umask;
-}
-
 static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 				struct pid *pid, struct task_struct *p)
 {
 	struct user_namespace *user_ns = seq_user_ns(m);
 	struct group_info *group_info;
-	int g, umask;
+	int g, umask = -1;
 	struct task_struct *tracer;
 	const struct cred *cred;
 	pid_t ppid, tpid = 0, tgid, ngid;
@@ -177,17 +164,18 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	ngid = task_numa_group_id(p);
 	cred = get_task_cred(p);
 
-	umask = get_task_umask(p);
-	if (umask >= 0)
-		seq_printf(m, "Umask:\t%#04o\n", umask);
-
 	task_lock(p);
+	if (p->fs)
+		umask = p->fs->umask;
 	if (p->files)
 		max_fds = files_fdtable(p->files)->max_fds;
 	task_unlock(p);
 	rcu_read_unlock();
 
-	seq_printf(m, "State:\t%s", get_task_state(p));
+	if (umask >= 0)
+		seq_printf(m, "Umask:\t%#04o\n", umask);
+	seq_puts(m, "State:\t");
+	seq_puts(m, get_task_state(p));
 
 	seq_put_decimal_ull(m, "\nTgid:\t", tgid);
 	seq_put_decimal_ull(m, "\nNgid:\t", ngid);
@@ -313,8 +301,8 @@ static void render_cap_t(struct seq_file *m, const char *header,
 
 	seq_puts(m, header);
 	CAP_FOR_EACH_U32(__capi) {
-		seq_printf(m, "%08x",
-			   a->cap[CAP_LAST_U32 - __capi]);
+		seq_put_hex_ll(m, NULL,
+			   a->cap[CAP_LAST_U32 - __capi], 8);
 	}
 	seq_putc(m, '\n');
 }
@@ -368,7 +356,8 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
 
 static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
 {
-	seq_printf(m, "CoreDumping:\t%d\n", !!mm->core_state);
+	seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+	seq_putc(m, '\n');
 }
 
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
@@ -504,7 +493,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	/* convert nsec -> ticks */
 	start_time = nsec_to_clock_t(task->real_start_time);
 
-	seq_printf(m, "%d (%s) %c", pid_nr_ns(pid, ns), tcomm, state);
+	seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
+	seq_puts(m, " (");
+	seq_puts(m, tcomm);
+	seq_puts(m, ") ");
+	seq_putc(m, state);
 	seq_put_decimal_ll(m, " ", ppid);
 	seq_put_decimal_ll(m, " ", pgid);
 	seq_put_decimal_ll(m, " ", sid);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index d53246863cfb..eafa39a3a88c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -388,14 +388,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
 	unsigned long wchan;
 	char symname[KSYM_NAME_LEN];
 
-	wchan = get_wchan(task);
+	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+		goto print0;
 
-	if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)
-			&& !lookup_symbol_name(wchan, symname))
-		seq_printf(m, "%s", symname);
-	else
-		seq_putc(m, '0');
+	wchan = get_wchan(task);
+	if (wchan && !lookup_symbol_name(wchan, symname)) {
+		seq_puts(m, symname);
+		return 0;
+	}
 
+print0:
+	seq_putc(m, '0');
 	return 0;
 }
 #endif /* CONFIG_KALLSYMS */
@@ -1910,6 +1913,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
 	unsigned long long sval, eval;
 	unsigned int len;
 
+	if (str[0] == '0' && str[1] != '-')
+		return -EINVAL;
 	len = _parse_integer(str, 16, &sval);
 	if (len & KSTRTOX_OVERFLOW)
 		return -EINVAL;
@@ -1921,6 +1926,8 @@ static int dname_to_vma_addr(struct dentry *dentry,
 		return -EINVAL;
 	str++;
 
+	if (str[0] == '0' && str[1])
+		return -EINVAL;
 	len = _parse_integer(str, 16, &eval);
 	if (len & KSTRTOX_OVERFLOW)
 		return -EINVAL;
@@ -2204,6 +2211,7 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 		}
 	}
 	up_read(&mm->mmap_sem);
+	mmput(mm);
 
 	for (i = 0; i < nr_files; i++) {
 		char buf[4 * sizeof(long) + 2];	/* max: %lx-%lx\0 */
@@ -2221,7 +2229,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
 	}
 	if (fa)
 		flex_array_free(fa);
-	mmput(mm);
 
 out_put_task:
 	put_task_struct(task);
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 403cbb12a6e9..8233e7af9389 100644
--- a/fs/proc/cmdline.c
+++ b/fs/proc/cmdline.c
@@ -6,7 +6,8 @@
 
 static int cmdline_proc_show(struct seq_file *m, void *v)
 {
-	seq_printf(m, "%s\n", saved_command_line);
+	seq_puts(m, saved_command_line);
+	seq_putc(m, '\n');
 	return 0;
 }
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5d709fa8f3a2..04c4804cbdef 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -8,6 +8,7 @@
  * Copyright (C) 1997 Theodore Ts'o
  */
 
+#include <linux/cache.h>
 #include <linux/errno.h>
 #include <linux/time.h>
 #include <linux/proc_fs.h>
@@ -28,6 +29,17 @@
 
 static DEFINE_RWLOCK(proc_subdir_lock);
 
+struct kmem_cache *proc_dir_entry_cache __ro_after_init;
+
+void pde_free(struct proc_dir_entry *pde)
+{
+	if (S_ISLNK(pde->mode))
+		kfree(pde->data);
+	if (pde->name != pde->inline_name)
+		kfree(pde->name);
+	kmem_cache_free(proc_dir_entry_cache, pde);
+}
+
 static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len)
 {
 	if (len < de->namelen)
@@ -40,8 +52,8 @@ static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int
 
 static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
 {
-	return rb_entry_safe(rb_first_cached(&dir->subdir),
-			     struct proc_dir_entry, subdir_node);
+	return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
+			     subdir_node);
 }
 
 static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
@@ -54,7 +66,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
 					      const char *name,
 					      unsigned int len)
 {
-	struct rb_node *node = dir->subdir.rb_root.rb_node;
+	struct rb_node *node = dir->subdir.rb_node;
 
 	while (node) {
 		struct proc_dir_entry *de = rb_entry(node,
@@ -75,9 +87,8 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
 static bool pde_subdir_insert(struct proc_dir_entry *dir,
 			      struct proc_dir_entry *de)
 {
-	struct rb_root_cached *root = &dir->subdir;
-	struct rb_node **new = &root->rb_root.rb_node, *parent = NULL;
-	bool leftmost = true;
+	struct rb_root *root = &dir->subdir;
+	struct rb_node **new = &root->rb_node, *parent = NULL;
 
 	/* Figure out where to put new node */
 	while (*new) {
@@ -89,16 +100,15 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
 		parent = *new;
 		if (result < 0)
 			new = &(*new)->rb_left;
-		else if (result > 0) {
+		else if (result > 0)
 			new = &(*new)->rb_right;
-			leftmost = false;
-		} else
+		else
 			return false;
 	}
 
 	/* Add new node and rebalance tree. */
 	rb_link_node(&de->subdir_node, parent, new);
-	rb_insert_color_cached(&de->subdir_node, root, leftmost);
+	rb_insert_color(&de->subdir_node, root);
 	return true;
 }
 
@@ -354,6 +364,14 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 		WARN(1, "name len %u\n", qstr.len);
 		return NULL;
 	}
+	if (qstr.len == 1 && fn[0] == '.') {
+		WARN(1, "name '.'\n");
+		return NULL;
+	}
+	if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') {
+		WARN(1, "name '..'\n");
+		return NULL;
+	}
 	if (*parent == &proc_root && name_to_int(&qstr) != ~0U) {
 		WARN(1, "create '/proc/%s' by hand\n", qstr.name);
 		return NULL;
@@ -363,16 +381,26 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 		return NULL;
 	}
 
-	ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
+	ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
 	if (!ent)
 		goto out;
 
+	if (qstr.len + 1 <= sizeof(ent->inline_name)) {
+		ent->name = ent->inline_name;
+	} else {
+		ent->name = kmalloc(qstr.len + 1, GFP_KERNEL);
+		if (!ent->name) {
+			pde_free(ent);
+			return NULL;
+		}
+	}
+
 	memcpy(ent->name, fn, qstr.len + 1);
 	ent->namelen = qstr.len;
 	ent->mode = mode;
 	ent->nlink = nlink;
-	ent->subdir = RB_ROOT_CACHED;
-	atomic_set(&ent->count, 1);
+	ent->subdir = RB_ROOT;
+	refcount_set(&ent->refcnt, 1);
 	spin_lock_init(&ent->pde_unload_lock);
 	INIT_LIST_HEAD(&ent->pde_openers);
 	proc_set_user(ent, (*parent)->uid, (*parent)->gid);
@@ -395,12 +423,11 @@ struct proc_dir_entry *proc_symlink(const char *name,
 			strcpy((char*)ent->data,dest);
 			ent->proc_iops = &proc_link_inode_operations;
 			if (proc_register(parent, ent) < 0) {
-				kfree(ent->data);
-				kfree(ent);
+				pde_free(ent);
 				ent = NULL;
 			}
 		} else {
-			kfree(ent);
+			pde_free(ent);
 			ent = NULL;
 		}
 	}
@@ -423,7 +450,7 @@ struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
 		ent->proc_iops = &proc_dir_inode_operations;
 		parent->nlink++;
 		if (proc_register(parent, ent) < 0) {
-			kfree(ent);
+			pde_free(ent);
 			parent->nlink--;
 			ent = NULL;
 		}
@@ -458,7 +485,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
 		ent->proc_iops = NULL;
 		parent->nlink++;
 		if (proc_register(parent, ent) < 0) {
-			kfree(ent);
+			pde_free(ent);
 			parent->nlink--;
 			ent = NULL;
 		}
@@ -495,7 +522,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
 		goto out_free;
 	return pde;
 out_free:
-	kfree(pde);
+	pde_free(pde);
 out:
 	return NULL;
 }
@@ -522,19 +549,12 @@ void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
 }
 EXPORT_SYMBOL(proc_set_user);
 
-static void free_proc_entry(struct proc_dir_entry *de)
-{
-	proc_free_inum(de->low_ino);
-
-	if (S_ISLNK(de->mode))
-		kfree(de->data);
-	kfree(de);
-}
-
 void pde_put(struct proc_dir_entry *pde)
 {
-	if (atomic_dec_and_test(&pde->count))
-		free_proc_entry(pde);
+	if (refcount_dec_and_test(&pde->refcnt)) {
+		proc_free_inum(pde->low_ino);
+		pde_free(pde);
+	}
 }
 
 /*
@@ -555,7 +575,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 
 	de = pde_subdir_find(parent, fn, len);
 	if (de)
-		rb_erase_cached(&de->subdir_node, &parent->subdir);
+		rb_erase(&de->subdir_node, &parent->subdir);
 	write_unlock(&proc_subdir_lock);
 	if (!de) {
 		WARN(1, "name '%s'\n", name);
@@ -592,13 +612,13 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 		write_unlock(&proc_subdir_lock);
 		return -ENOENT;
 	}
-	rb_erase_cached(&root->subdir_node, &parent->subdir);
+	rb_erase(&root->subdir_node, &parent->subdir);
 
 	de = root;
 	while (1) {
 		next = pde_subdir_first(de);
 		if (next) {
-			rb_erase_cached(&next->subdir_node, &de->subdir);
+			rb_erase(&next->subdir_node, &de->subdir);
 			de = next;
 			continue;
 		}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 6e8724958116..2cf3b74391ca 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -54,6 +54,7 @@ static void proc_evict_inode(struct inode *inode)
 }
 
 static struct kmem_cache *proc_inode_cachep __ro_after_init;
+static struct kmem_cache *pde_opener_cache __ro_after_init;
 
 static struct inode *proc_alloc_inode(struct super_block *sb)
 {
@@ -92,7 +93,7 @@ static void init_once(void *foo)
 	inode_init_once(&ei->vfs_inode);
 }
 
-void __init proc_init_inodecache(void)
+void __init proc_init_kmemcache(void)
 {
 	proc_inode_cachep = kmem_cache_create("proc_inode_cache",
 					     sizeof(struct proc_inode),
@@ -100,6 +101,13 @@ void __init proc_init_inodecache(void)
 						SLAB_MEM_SPREAD|SLAB_ACCOUNT|
 						SLAB_PANIC),
 					     init_once);
+	pde_opener_cache =
+		kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
+				  SLAB_ACCOUNT|SLAB_PANIC, NULL);
+	proc_dir_entry_cache = kmem_cache_create_usercopy(
+		"proc_dir_entry", sizeof(struct proc_dir_entry), 0, SLAB_PANIC,
+		offsetof(struct proc_dir_entry, inline_name),
+		sizeof_field(struct proc_dir_entry, inline_name), NULL);
 }
 
 static int proc_show_options(struct seq_file *seq, struct dentry *root)
@@ -138,7 +146,7 @@ static void unuse_pde(struct proc_dir_entry *pde)
 		complete(pde->pde_unload_completion);
 }
 
-/* pde is locked */
+/* pde is locked on entry, unlocked on exit */
 static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 {
 	/*
@@ -157,9 +165,10 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 		pdeo->c = &c;
 		spin_unlock(&pde->pde_unload_lock);
 		wait_for_completion(&c);
-		spin_lock(&pde->pde_unload_lock);
 	} else {
 		struct file *file;
+		struct completion *c;
+
 		pdeo->closing = true;
 		spin_unlock(&pde->pde_unload_lock);
 		file = pdeo->file;
@@ -167,9 +176,11 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 		spin_lock(&pde->pde_unload_lock);
 		/* After ->release. */
 		list_del(&pdeo->lh);
-		if (unlikely(pdeo->c))
-			complete(pdeo->c);
-		kfree(pdeo);
+		c = pdeo->c;
+		spin_unlock(&pde->pde_unload_lock);
+		if (unlikely(c))
+			complete(c);
+		kmem_cache_free(pde_opener_cache, pdeo);
 	}
 }
 
@@ -188,6 +199,7 @@ void proc_entry_rundown(struct proc_dir_entry *de)
 		struct pde_opener *pdeo;
 		pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
 		close_pdeo(de, pdeo);
+		spin_lock(&de->pde_unload_lock);
 	}
 	spin_unlock(&de->pde_unload_lock);
 }
@@ -338,31 +350,36 @@ static int proc_reg_open(struct inode *inode, struct file *file)
 	 *
 	 * Save every "struct file" with custom ->release hook.
 	 */
-	pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
-	if (!pdeo)
-		return -ENOMEM;
-
-	if (!use_pde(pde)) {
-		kfree(pdeo);
+	if (!use_pde(pde))
 		return -ENOENT;
-	}
-	open = pde->proc_fops->open;
+
 	release = pde->proc_fops->release;
+	if (release) {
+		pdeo = kmem_cache_alloc(pde_opener_cache, GFP_KERNEL);
+		if (!pdeo) {
+			rv = -ENOMEM;
+			goto out_unuse;
+		}
+	}
 
+	open = pde->proc_fops->open;
 	if (open)
 		rv = open(inode, file);
 
-	if (rv == 0 && release) {
-		/* To know what to release. */
-		pdeo->file = file;
-		pdeo->closing = false;
-		pdeo->c = NULL;
-		spin_lock(&pde->pde_unload_lock);
-		list_add(&pdeo->lh, &pde->pde_openers);
-		spin_unlock(&pde->pde_unload_lock);
-	} else
-		kfree(pdeo);
+	if (release) {
+		if (rv == 0) {
+			/* To know what to release. */
+			pdeo->file = file;
+			pdeo->closing = false;
+			pdeo->c = NULL;
+			spin_lock(&pde->pde_unload_lock);
+			list_add(&pdeo->lh, &pde->pde_openers);
+			spin_unlock(&pde->pde_unload_lock);
+		} else
+			kmem_cache_free(pde_opener_cache, pdeo);
+	}
 
+out_unuse:
 	unuse_pde(pde);
 	return rv;
 }
@@ -375,7 +392,7 @@ static int proc_reg_release(struct inode *inode, struct file *file)
 	list_for_each_entry(pdeo, &pde->pde_openers, lh) {
 		if (pdeo->file == file) {
 			close_pdeo(pde, pdeo);
-			break;
+			return 0;
 		}
 	}
 	spin_unlock(&pde->pde_unload_lock);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index d697c8ab0a14..0f1692e63cb6 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,7 @@
 
 #include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
+#include <linux/refcount.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
 #include <linux/binfmts.h>
@@ -36,7 +37,7 @@ struct proc_dir_entry {
 	 * negative -> it's going away RSN
 	 */
 	atomic_t in_use;
-	atomic_t count;		/* use count */
+	refcount_t refcnt;
 	struct list_head pde_openers;	/* who did ->open, but not ->release */
 	/* protects ->pde_openers and all struct pde_opener instances */
 	spinlock_t pde_unload_lock;
@@ -50,13 +51,22 @@ struct proc_dir_entry {
 	kgid_t gid;
 	loff_t size;
 	struct proc_dir_entry *parent;
-	struct rb_root_cached subdir;
+	struct rb_root subdir;
 	struct rb_node subdir_node;
+	char *name;
 	umode_t mode;
 	u8 namelen;
-	char name[];
+#ifdef CONFIG_64BIT
+#define SIZEOF_PDE_INLINE_NAME	(192-139)
+#else
+#define SIZEOF_PDE_INLINE_NAME	(128-87)
+#endif
+	char inline_name[SIZEOF_PDE_INLINE_NAME];
 } __randomize_layout;
 
+extern struct kmem_cache *proc_dir_entry_cache;
+void pde_free(struct proc_dir_entry *pde);
+
 union proc_op {
 	int (*proc_get_link)(struct dentry *, struct path *);
 	int (*proc_show)(struct seq_file *m,
@@ -159,7 +169,7 @@ int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *
 
 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
 {
-	atomic_inc(&pde->count);
+	refcount_inc(&pde->refcnt);
 	return pde;
 }
 extern void pde_put(struct proc_dir_entry *);
@@ -177,12 +187,12 @@ struct pde_opener {
 	struct list_head lh;
 	bool closing;
 	struct completion *c;
-};
+} __randomize_layout;
 extern const struct inode_operations proc_link_inode_operations;
 
 extern const struct inode_operations proc_pid_link_inode_operations;
 
-extern void proc_init_inodecache(void);
+void proc_init_kmemcache(void);
 void set_proc_pid_nlink(void);
 extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
 extern int proc_fill_super(struct super_block *, void *data, int flags);
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 6bb20f864259..65a72ab57471 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -26,20 +26,7 @@ void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
 
 static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 {
-	char v[32];
-	static const char blanks[7] = {' ', ' ', ' ', ' ',' ', ' ', ' '};
-	int len;
-
-	len = num_to_str(v, sizeof(v), num << (PAGE_SHIFT - 10));
-
-	seq_write(m, s, 16);
-
-	if (len > 0) {
-		if (len < 8)
-			seq_write(m, blanks, 8 - len);
-
-		seq_write(m, v, len);
-	}
+	seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
 	seq_write(m, " kB\n", 4);
 }
 
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 68c06ae7888c..1763f370489d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -192,15 +192,16 @@ static __net_init int proc_net_ns_init(struct net *net)
 	int err;
 
 	err = -ENOMEM;
-	netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL);
+	netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL);
 	if (!netd)
 		goto out;
 
-	netd->subdir = RB_ROOT_CACHED;
+	netd->subdir = RB_ROOT;
 	netd->data = net;
 	netd->nlink = 2;
 	netd->namelen = 3;
 	netd->parent = &proc_root;
+	netd->name = netd->inline_name;
 	memcpy(netd->name, "net", 4);
 
 	uid = make_kuid(net->user_ns, 0);
@@ -223,7 +224,7 @@ static __net_init int proc_net_ns_init(struct net *net)
 	return 0;
 
 free_net:
-	kfree(netd);
+	pde_free(netd);
 out:
 	return err;
 }
@@ -231,7 +232,7 @@ out:
 static __net_exit void proc_net_ns_exit(struct net *net)
 {
 	remove_proc_entry("stat", net->proc_net);
-	kfree(net->proc_net);
+	pde_free(net->proc_net);
 }
 
 static struct pernet_operations __net_initdata proc_net_ns_ops = {
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index c41ab261397d..8989936f2995 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -707,14 +707,14 @@ static bool proc_sys_link_fill_cache(struct file *file,
 				    struct ctl_table *table)
 {
 	bool ret = true;
+
 	head = sysctl_head_grab(head);
+	if (IS_ERR(head))
+		return false;
 
-	if (S_ISLNK(table->mode)) {
-		/* It is not an error if we can not follow the link ignore it */
-		int err = sysctl_follow_link(&head, &table);
-		if (err)
-			goto out;
-	}
+	/* It is not an error if we can not follow the link ignore it */
+	if (sysctl_follow_link(&head, &table))
+		goto out;
 
 	ret = proc_sys_fill_cache(file, ctx, head, table);
 out:
@@ -1086,7 +1086,7 @@ static int sysctl_check_table_array(const char *path, struct ctl_table *table)
 	if ((table->proc_handler == proc_douintvec) ||
 	    (table->proc_handler == proc_douintvec_minmax)) {
 		if (table->maxlen != sizeof(unsigned int))
-			err |= sysctl_err(path, table, "array now allowed");
+			err |= sysctl_err(path, table, "array not allowed");
 	}
 
 	return err;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ede8e64974be..61b7340b357a 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -123,23 +123,13 @@ static struct file_system_type proc_fs_type = {
 
 void __init proc_root_init(void)
 {
-	int err;
-
-	proc_init_inodecache();
+	proc_init_kmemcache();
 	set_proc_pid_nlink();
-	err = register_filesystem(&proc_fs_type);
-	if (err)
-		return;
-
 	proc_self_init();
 	proc_thread_self_init();
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	proc_net_init();
-
-#ifdef CONFIG_SYSVIPC
-	proc_mkdir("sysvipc", NULL);
-#endif
 	proc_mkdir("fs", NULL);
 	proc_mkdir("driver", NULL);
 	proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
@@ -150,6 +140,8 @@ void __init proc_root_init(void)
 	proc_tty_init();
 	proc_mkdir("bus", NULL);
 	proc_sys_init();
+
+	register_filesystem(&proc_fs_type);
 }
 
 static int proc_root_getattr(const struct path *path, struct kstat *stat,
@@ -207,12 +199,13 @@ struct proc_dir_entry proc_root = {
 	.namelen	= 5, 
 	.mode		= S_IFDIR | S_IRUGO | S_IXUGO, 
 	.nlink		= 2, 
-	.count		= ATOMIC_INIT(1),
+	.refcnt		= REFCOUNT_INIT(1),
 	.proc_iops	= &proc_root_inode_operations, 
 	.proc_fops	= &proc_root_operations,
 	.parent		= &proc_root,
-	.subdir		= RB_ROOT_CACHED,
-	.name		= "/proc",
+	.subdir		= RB_ROOT,
+	.name		= proc_root.inline_name,
+	.inline_name	= "/proc",
 };
 
 int pid_ns_prepare_proc(struct pid_namespace *ns)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ec6d2983a5cb..65ae54659833 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -24,6 +24,8 @@
 #include <asm/tlbflush.h>
 #include "internal.h"
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8)
 void task_mem(struct seq_file *m, struct mm_struct *mm)
 {
 	unsigned long text, lib, swap, anon, file, shmem;
@@ -53,39 +55,28 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	lib = (mm->exec_vm << PAGE_SHIFT) - text;
 
 	swap = get_mm_counter(mm, MM_SWAPENTS);
-	seq_printf(m,
-		"VmPeak:\t%8lu kB\n"
-		"VmSize:\t%8lu kB\n"
-		"VmLck:\t%8lu kB\n"
-		"VmPin:\t%8lu kB\n"
-		"VmHWM:\t%8lu kB\n"
-		"VmRSS:\t%8lu kB\n"
-		"RssAnon:\t%8lu kB\n"
-		"RssFile:\t%8lu kB\n"
-		"RssShmem:\t%8lu kB\n"
-		"VmData:\t%8lu kB\n"
-		"VmStk:\t%8lu kB\n"
-		"VmExe:\t%8lu kB\n"
-		"VmLib:\t%8lu kB\n"
-		"VmPTE:\t%8lu kB\n"
-		"VmSwap:\t%8lu kB\n",
-		hiwater_vm << (PAGE_SHIFT-10),
-		total_vm << (PAGE_SHIFT-10),
-		mm->locked_vm << (PAGE_SHIFT-10),
-		mm->pinned_vm << (PAGE_SHIFT-10),
-		hiwater_rss << (PAGE_SHIFT-10),
-		total_rss << (PAGE_SHIFT-10),
-		anon << (PAGE_SHIFT-10),
-		file << (PAGE_SHIFT-10),
-		shmem << (PAGE_SHIFT-10),
-		mm->data_vm << (PAGE_SHIFT-10),
-		mm->stack_vm << (PAGE_SHIFT-10),
-		text >> 10,
-		lib >> 10,
-		mm_pgtables_bytes(mm) >> 10,
-		swap << (PAGE_SHIFT-10));
+	SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
+	SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
+	SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
+	SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+	SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
+	SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
+	SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
+	SEQ_PUT_DEC(" kB\nRssFile:\t", file);
+	SEQ_PUT_DEC(" kB\nRssShmem:\t", shmem);
+	SEQ_PUT_DEC(" kB\nVmData:\t", mm->data_vm);
+	SEQ_PUT_DEC(" kB\nVmStk:\t", mm->stack_vm);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmExe:\t", text >> 10, 8);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmLib:\t", lib >> 10, 8);
+	seq_put_decimal_ull_width(m,
+		    " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8);
+	SEQ_PUT_DEC(" kB\nVmSwap:\t", swap);
+	seq_puts(m, " kB\n");
 	hugetlb_report_usage(m, mm);
 }
+#undef SEQ_PUT_DEC
 
 unsigned long task_vsize(struct mm_struct *mm)
 {
@@ -287,15 +278,18 @@ static void show_vma_header_prefix(struct seq_file *m,
 				   dev_t dev, unsigned long ino)
 {
 	seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
-	seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
-		   start,
-		   end,
-		   flags & VM_READ ? 'r' : '-',
-		   flags & VM_WRITE ? 'w' : '-',
-		   flags & VM_EXEC ? 'x' : '-',
-		   flags & VM_MAYSHARE ? 's' : 'p',
-		   pgoff,
-		   MAJOR(dev), MINOR(dev), ino);
+	seq_put_hex_ll(m, NULL, start, 8);
+	seq_put_hex_ll(m, "-", end, 8);
+	seq_putc(m, ' ');
+	seq_putc(m, flags & VM_READ ? 'r' : '-');
+	seq_putc(m, flags & VM_WRITE ? 'w' : '-');
+	seq_putc(m, flags & VM_EXEC ? 'x' : '-');
+	seq_putc(m, flags & VM_MAYSHARE ? 's' : 'p');
+	seq_put_hex_ll(m, " ", pgoff, 8);
+	seq_put_hex_ll(m, " ", MAJOR(dev), 2);
+	seq_put_hex_ll(m, ":", MINOR(dev), 2);
+	seq_put_decimal_ull(m, " ", ino);
+	seq_putc(m, ' ');
 }
 
 static void
@@ -694,8 +688,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		if (!mnemonics[i][0])
 			continue;
 		if (vma->vm_flags & (1UL << i)) {
-			seq_printf(m, "%c%c ",
-				   mnemonics[i][0], mnemonics[i][1]);
+			seq_putc(m, mnemonics[i][0]);
+			seq_putc(m, mnemonics[i][1]);
+			seq_putc(m, ' ');
 		}
 	}
 	seq_putc(m, '\n');
@@ -736,6 +731,8 @@ void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
 {
 }
 
+#define SEQ_PUT_DEC(str, val) \
+		seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
 static int show_smap(struct seq_file *m, void *v, int is_pid)
 {
 	struct proc_maps_private *priv = m->private;
@@ -809,51 +806,34 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		ret = SEQ_SKIP;
 	}
 
-	if (!rollup_mode)
-		seq_printf(m,
-			   "Size:           %8lu kB\n"
-			   "KernelPageSize: %8lu kB\n"
-			   "MMUPageSize:    %8lu kB\n",
-			   (vma->vm_end - vma->vm_start) >> 10,
-			   vma_kernel_pagesize(vma) >> 10,
-			   vma_mmu_pagesize(vma) >> 10);
-
-
-	if (!rollup_mode || last_vma)
-		seq_printf(m,
-			   "Rss:            %8lu kB\n"
-			   "Pss:            %8lu kB\n"
-			   "Shared_Clean:   %8lu kB\n"
-			   "Shared_Dirty:   %8lu kB\n"
-			   "Private_Clean:  %8lu kB\n"
-			   "Private_Dirty:  %8lu kB\n"
-			   "Referenced:     %8lu kB\n"
-			   "Anonymous:      %8lu kB\n"
-			   "LazyFree:       %8lu kB\n"
-			   "AnonHugePages:  %8lu kB\n"
-			   "ShmemPmdMapped: %8lu kB\n"
-			   "Shared_Hugetlb: %8lu kB\n"
-			   "Private_Hugetlb: %7lu kB\n"
-			   "Swap:           %8lu kB\n"
-			   "SwapPss:        %8lu kB\n"
-			   "Locked:         %8lu kB\n",
-			   mss->resident >> 10,
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)),
-			   mss->shared_clean  >> 10,
-			   mss->shared_dirty  >> 10,
-			   mss->private_clean >> 10,
-			   mss->private_dirty >> 10,
-			   mss->referenced >> 10,
-			   mss->anonymous >> 10,
-			   mss->lazyfree >> 10,
-			   mss->anonymous_thp >> 10,
-			   mss->shmem_thp >> 10,
-			   mss->shared_hugetlb >> 10,
-			   mss->private_hugetlb >> 10,
-			   mss->swap >> 10,
-			   (unsigned long)(mss->swap_pss >> (10 + PSS_SHIFT)),
-			   (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));
+	if (!rollup_mode) {
+		SEQ_PUT_DEC("Size:           ", vma->vm_end - vma->vm_start);
+		SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+		SEQ_PUT_DEC(" kB\nMMUPageSize:    ", vma_mmu_pagesize(vma));
+		seq_puts(m, " kB\n");
+	}
 
+	if (!rollup_mode || last_vma) {
+		SEQ_PUT_DEC("Rss:            ", mss->resident);
+		SEQ_PUT_DEC(" kB\nPss:            ", mss->pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nShared_Clean:   ", mss->shared_clean);
+		SEQ_PUT_DEC(" kB\nShared_Dirty:   ", mss->shared_dirty);
+		SEQ_PUT_DEC(" kB\nPrivate_Clean:  ", mss->private_clean);
+		SEQ_PUT_DEC(" kB\nPrivate_Dirty:  ", mss->private_dirty);
+		SEQ_PUT_DEC(" kB\nReferenced:     ", mss->referenced);
+		SEQ_PUT_DEC(" kB\nAnonymous:      ", mss->anonymous);
+		SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
+		SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
+		SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+		SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+		seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+					  mss->private_hugetlb >> 10, 7);
+		SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
+		SEQ_PUT_DEC(" kB\nSwapPss:        ",
+						mss->swap_pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+		seq_puts(m, " kB\n");
+	}
 	if (!rollup_mode) {
 		arch_show_smap(m, vma);
 		show_smap_vma_flags(m, vma);
@@ -861,6 +841,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 	m_cache_vma(m, vma);
 	return ret;
 }
+#undef SEQ_PUT_DEC
 
 static int show_pid_smap(struct seq_file *m, void *v)
 {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 70057359fbaf..23148c3ed675 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2643,7 +2643,7 @@ static int journal_init_dev(struct super_block *super,
 	if (IS_ERR(journal->j_dev_bd)) {
 		result = PTR_ERR(journal->j_dev_bd);
 		journal->j_dev_bd = NULL;
-		reiserfs_warning(super,
+		reiserfs_warning(super, "sh-457",
 				 "journal_init_dev: Cannot open '%s': %i",
 				 jdev_name, result);
 		return result;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index eea09f6d8830..c6c27f1f9c98 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -6,6 +6,7 @@
  * initial implementation -- AV, Oct 2001.
  */
 
+#include <linux/cache.h>
 #include <linux/fs.h>
 #include <linux/export.h>
 #include <linux/seq_file.h>
@@ -19,6 +20,8 @@
 #include <linux/uaccess.h>
 #include <asm/page.h>
 
+static struct kmem_cache *seq_file_cache __ro_after_init;
+
 static void seq_set_overflow(struct seq_file *m)
 {
 	m->count = m->size;
@@ -26,7 +29,7 @@ static void seq_set_overflow(struct seq_file *m)
 
 static void *seq_buf_alloc(unsigned long size)
 {
-	return kvmalloc(size, GFP_KERNEL);
+	return kvmalloc(size, GFP_KERNEL_ACCOUNT);
 }
 
 /**
@@ -51,7 +54,7 @@ int seq_open(struct file *file, const struct seq_operations *op)
 
 	WARN_ON(file->private_data);
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 
@@ -366,7 +369,7 @@ int seq_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
 	kvfree(m->buf);
-	kfree(m);
+	kmem_cache_free(seq_file_cache, m);
 	return 0;
 }
 EXPORT_SYMBOL(seq_release);
@@ -563,7 +566,7 @@ static void single_stop(struct seq_file *p, void *v)
 int single_open(struct file *file, int (*show)(struct seq_file *, void *),
 		void *data)
 {
-	struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL);
+	struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT);
 	int res = -ENOMEM;
 
 	if (op) {
@@ -625,7 +628,7 @@ void *__seq_open_private(struct file *f, const struct seq_operations *ops,
 	void *private;
 	struct seq_file *seq;
 
-	private = kzalloc(psize, GFP_KERNEL);
+	private = kzalloc(psize, GFP_KERNEL_ACCOUNT);
 	if (private == NULL)
 		goto out;
 
@@ -673,29 +676,37 @@ void seq_puts(struct seq_file *m, const char *s)
 }
 EXPORT_SYMBOL(seq_puts);
 
-/*
+/**
  * A helper routine for putting decimal numbers without rich format of printf().
  * only 'unsigned long long' is supported.
- * This routine will put strlen(delimiter) + number into seq_file.
+ * @m: seq_file identifying the buffer to which data should be written
+ * @delimiter: a string which is printed before the number
+ * @num: the number
+ * @width: a minimum field width
+ *
+ * This routine will put strlen(delimiter) + number into seq_filed.
  * This routine is very quick when you show lots of numbers.
  * In usual cases, it will be better to use seq_printf(). It's easier to read.
  */
-void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
-			 unsigned long long num)
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+			 unsigned long long num, unsigned int width)
 {
 	int len;
 
 	if (m->count + 2 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (!width)
+		width = 1;
 
-	if (m->count + 1 >= m->size)
+	if (m->count + width >= m->size)
 		goto overflow;
 
 	if (num < 10) {
@@ -703,7 +714,7 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, width);
 	if (!len)
 		goto overflow;
 
@@ -713,8 +724,60 @@ void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 overflow:
 	seq_set_overflow(m);
 }
+
+void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
+			 unsigned long long num)
+{
+	return seq_put_decimal_ull_width(m, delimiter, num, 0);
+}
 EXPORT_SYMBOL(seq_put_decimal_ull);
 
+/**
+ * seq_put_hex_ll - put a number in hexadecimal notation
+ * @m: seq_file identifying the buffer to which data should be written
+ * @delimiter: a string which is printed before the number
+ * @v: the number
+ * @width: a minimum field width
+ *
+ * seq_put_hex_ll(m, "", v, 8) is equal to seq_printf(m, "%08llx", v)
+ *
+ * This routine is very quick when you show lots of numbers.
+ * In usual cases, it will be better to use seq_printf(). It's easier to read.
+ */
+void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
+				unsigned long long v, unsigned int width)
+{
+	unsigned int len;
+	int i;
+
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
+
+	/* If x is 0, the result of __builtin_clzll is undefined */
+	if (v == 0)
+		len = 1;
+	else
+		len = (sizeof(v) * 8 - __builtin_clzll(v) + 3) / 4;
+
+	if (len < width)
+		len = width;
+
+	if (m->count + len > m->size) {
+		seq_set_overflow(m);
+		return;
+	}
+
+	for (i = len - 1; i >= 0; i--) {
+		m->buf[m->count + i] = hex_asc[0xf & v];
+		v = v >> 4;
+	}
+	m->count += len;
+}
+
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num)
 {
 	int len;
@@ -722,12 +785,12 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 	if (m->count + 3 >= m->size) /* we'll write 2 bytes at least */
 		goto overflow;
 
-	len = strlen(delimiter);
-	if (m->count + len >= m->size)
-		goto overflow;
-
-	memcpy(m->buf + m->count, delimiter, len);
-	m->count += len;
+	if (delimiter && delimiter[0]) {
+		if (delimiter[1] == 0)
+			seq_putc(m, delimiter[0]);
+		else
+			seq_puts(m, delimiter);
+	}
 
 	if (m->count + 2 >= m->size)
 		goto overflow;
@@ -742,7 +805,7 @@ void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num
 		return;
 	}
 
-	len = num_to_str(m->buf + m->count, m->size - m->count, num);
+	len = num_to_str(m->buf + m->count, m->size - m->count, num, 0);
 	if (!len)
 		goto overflow;
 
@@ -782,8 +845,14 @@ EXPORT_SYMBOL(seq_write);
 void seq_pad(struct seq_file *m, char c)
 {
 	int size = m->pad_until - m->count;
-	if (size > 0)
-		seq_printf(m, "%*s", size, "");
+	if (size > 0) {
+		if (size + m->count > m->size) {
+			seq_set_overflow(m);
+			return;
+		}
+		memset(m->buf + m->count, ' ', size);
+		m->count += size;
+	}
 	if (c)
 		seq_putc(m, c);
 }
@@ -1040,3 +1109,8 @@ seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head,
 	return NULL;
 }
 EXPORT_SYMBOL(seq_hlist_next_percpu);
+
+void __init seq_file_init(void)
+{
+	seq_file_cache = KMEM_CACHE(seq_file, SLAB_ACCOUNT|SLAB_PANIC);
+}
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 2dcf3d473fec..9571616b5dda 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -632,7 +632,7 @@ static int scan_for_idx_cb(struct ubifs_info *c,
  */
 static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
 {
-	struct ubifs_lprops *lprops;
+	const struct ubifs_lprops *lprops;
 	struct scan_data data;
 	int err;
 
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 6c3a1abd0e22..f5a46844340c 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -244,7 +244,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c,
 /**
  * lpt_heap_replace - replace lprops in a category heap.
  * @c: UBIFS file-system description object
- * @old_lprops: LEB properties to replace
  * @new_lprops: LEB properties with which to replace
  * @cat: LEB category
  *
@@ -254,7 +253,6 @@ static void remove_from_lpt_heap(struct ubifs_info *c,
  * lprops.  This function does that.
  */
 static void lpt_heap_replace(struct ubifs_info *c,
-			     struct ubifs_lprops *old_lprops,
 			     struct ubifs_lprops *new_lprops, int cat)
 {
 	struct ubifs_lpt_heap *heap;
@@ -362,7 +360,7 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
 	case LPROPS_DIRTY:
 	case LPROPS_DIRTY_IDX:
 	case LPROPS_FREE:
-		lpt_heap_replace(c, old_lprops, new_lprops, cat);
+		lpt_heap_replace(c, new_lprops, cat);
 		break;
 	case LPROPS_UNCAT:
 	case LPROPS_EMPTY:
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index aab87340d3de..16f03d9929e5 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -175,7 +175,6 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
 void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 		    int lnum, int offs)
 {
-	lnum = lnum;
 	dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
 	ubifs_assert(offs % c->min_io_size == 0);
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index b16ef162344a..6c397a389105 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1737,8 +1737,11 @@ static void ubifs_remount_ro(struct ubifs_info *c)
 
 	dbg_save_space_info(c);
 
-	for (i = 0; i < c->jhead_cnt; i++)
-		ubifs_wbuf_sync(&c->jheads[i].wbuf);
+	for (i = 0; i < c->jhead_cnt; i++) {
+		err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+		if (err)
+			ubifs_ro_mode(c, err);
+	}
 
 	c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
 	c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
@@ -1804,8 +1807,11 @@ static void ubifs_put_super(struct super_block *sb)
 			int err;
 
 			/* Synchronize write-buffers */
-			for (i = 0; i < c->jhead_cnt; i++)
-				ubifs_wbuf_sync(&c->jheads[i].wbuf);
+			for (i = 0; i < c->jhead_cnt; i++) {
+				err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
+				if (err)
+					ubifs_ro_mode(c, err);
+			}
 
 			/*
 			 * We are being cleanly unmounted which means the
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 31f1f10eecd1..0ab824f574ed 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1195,16 +1195,22 @@ xfs_vm_writepages(
 	int			ret;
 
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-	if (dax_mapping(mapping))
-		return dax_writeback_mapping_range(mapping,
-				xfs_find_bdev_for_inode(mapping->host), wbc);
-
 	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
 	if (wpc.ioend)
 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
 	return ret;
 }
 
+STATIC int
+xfs_dax_writepages(
+	struct address_space	*mapping,
+	struct writeback_control *wbc)
+{
+	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+	return dax_writeback_mapping_range(mapping,
+			xfs_find_bdev_for_inode(mapping->host), wbc);
+}
+
 /*
  * Called to move a page into cleanable state - and from there
  * to be released. The page should already be clean. We always
@@ -1367,17 +1373,6 @@ out_unlock:
 	return error;
 }
 
-STATIC ssize_t
-xfs_vm_direct_IO(
-	struct kiocb		*iocb,
-	struct iov_iter		*iter)
-{
-	/*
-	 * We just need the method present so that open/fcntl allow direct I/O.
-	 */
-	return -EINVAL;
-}
-
 STATIC sector_t
 xfs_vm_bmap(
 	struct address_space	*mapping,
@@ -1472,19 +1467,8 @@ xfs_vm_set_page_dirty(
 	newly_dirty = !TestSetPageDirty(page);
 	spin_unlock(&mapping->private_lock);
 
-	if (newly_dirty) {
-		/* sigh - __set_page_dirty() is static, so copy it here, too */
-		unsigned long flags;
-
-		spin_lock_irqsave(&mapping->tree_lock, flags);
-		if (page->mapping) {	/* Race with truncate? */
-			WARN_ON_ONCE(!PageUptodate(page));
-			account_page_dirtied(page, mapping);
-			radix_tree_tag_set(&mapping->page_tree,
-					page_index(page), PAGECACHE_TAG_DIRTY);
-		}
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
-	}
+	if (newly_dirty)
+		__set_page_dirty(page, mapping, 1);
 	unlock_page_memcg(page);
 	if (newly_dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1500,8 +1484,15 @@ const struct address_space_operations xfs_address_space_operations = {
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.bmap			= xfs_vm_bmap,
-	.direct_IO		= xfs_vm_direct_IO,
+	.direct_IO		= noop_direct_IO,
 	.migratepage		= buffer_migrate_page,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
 };
+
+const struct address_space_operations xfs_dax_aops = {
+	.writepages		= xfs_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 88c85ea63da0..69346d460dfa 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -54,6 +54,7 @@ struct xfs_ioend {
 };
 
 extern const struct address_space_operations xfs_address_space_operations;
+extern const struct address_space_operations xfs_dax_aops;
 
 int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index e0307fbff911..154725b1b813 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1285,7 +1285,10 @@ xfs_setup_iops(
 	case S_IFREG:
 		inode->i_op = &xfs_inode_operations;
 		inode->i_fop = &xfs_file_operations;
-		inode->i_mapping->a_ops = &xfs_address_space_operations;
+		if (IS_DAX(inode))
+			inode->i_mapping->a_ops = &xfs_dax_aops;
+		else
+			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
 		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index d591bb77f592..40a916efd7c0 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -254,6 +254,8 @@ int acpi_processor_pstate_control(void);
 /* note: this locks both the calling module and the processor module
          if a _PPC object exists, rmmod is disallowed then */
 int acpi_processor_notify_smm(struct module *calling_module);
+int acpi_processor_get_psd(acpi_handle handle,
+			   struct acpi_psd_package *pdomain);
 
 /* parsing the _P* objects. */
 extern int acpi_processor_get_performance_info(struct acpi_processor *pr);
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 04c4cc6fd820..66d1d45fa2e1 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -25,6 +25,50 @@
 #define mmiowb() do {} while (0)
 #endif
 
+#ifndef __io_br
+#define __io_br()      barrier()
+#endif
+
+/* prevent prefetching of coherent DMA data ahead of a dma-complete */
+#ifndef __io_ar
+#ifdef rmb
+#define __io_ar()      rmb()
+#else
+#define __io_ar()      barrier()
+#endif
+#endif
+
+/* flush writes to coherent DMA data before possibly triggering a DMA read */
+#ifndef __io_bw
+#ifdef wmb
+#define __io_bw()      wmb()
+#else
+#define __io_bw()      barrier()
+#endif
+#endif
+
+/* serialize device access against a spin_unlock, usually handled there. */
+#ifndef __io_aw
+#define __io_aw()      barrier()
+#endif
+
+#ifndef __io_pbw
+#define __io_pbw()     __io_bw()
+#endif
+
+#ifndef __io_paw
+#define __io_paw()     __io_aw()
+#endif
+
+#ifndef __io_pbr
+#define __io_pbr()     __io_br()
+#endif
+
+#ifndef __io_par
+#define __io_par()     __io_ar()
+#endif
+
+
 /*
  * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
  *
@@ -110,7 +154,12 @@ static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
 #define readb readb
 static inline u8 readb(const volatile void __iomem *addr)
 {
-	return __raw_readb(addr);
+	u8 val;
+
+	__io_br();
+	val = __raw_readb(addr);
+	__io_ar();
+	return val;
 }
 #endif
 
@@ -118,7 +167,12 @@ static inline u8 readb(const volatile void __iomem *addr)
 #define readw readw
 static inline u16 readw(const volatile void __iomem *addr)
 {
-	return __le16_to_cpu(__raw_readw(addr));
+	u16 val;
+
+	__io_br();
+	val = __le16_to_cpu(__raw_readw(addr));
+	__io_ar();
+	return val;
 }
 #endif
 
@@ -126,7 +180,12 @@ static inline u16 readw(const volatile void __iomem *addr)
 #define readl readl
 static inline u32 readl(const volatile void __iomem *addr)
 {
-	return __le32_to_cpu(__raw_readl(addr));
+	u32 val;
+
+	__io_br();
+	val = __le32_to_cpu(__raw_readl(addr));
+	__io_ar();
+	return val;
 }
 #endif
 
@@ -135,7 +194,12 @@ static inline u32 readl(const volatile void __iomem *addr)
 #define readq readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
-	return __le64_to_cpu(__raw_readq(addr));
+	u64 val;
+
+	__io_br();
+	val = __le64_to_cpu(__raw_readq(addr));
+	__io_ar();
+	return val;
 }
 #endif
 #endif /* CONFIG_64BIT */
@@ -144,7 +208,9 @@ static inline u64 readq(const volatile void __iomem *addr)
 #define writeb writeb
 static inline void writeb(u8 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writeb(value, addr);
+	__io_aw();
 }
 #endif
 
@@ -152,7 +218,9 @@ static inline void writeb(u8 value, volatile void __iomem *addr)
 #define writew writew
 static inline void writew(u16 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writew(cpu_to_le16(value), addr);
+	__io_aw();
 }
 #endif
 
@@ -160,7 +228,9 @@ static inline void writew(u16 value, volatile void __iomem *addr)
 #define writel writel
 static inline void writel(u32 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writel(__cpu_to_le32(value), addr);
+	__io_aw();
 }
 #endif
 
@@ -169,7 +239,9 @@ static inline void writel(u32 value, volatile void __iomem *addr)
 #define writeq writeq
 static inline void writeq(u64 value, volatile void __iomem *addr)
 {
+	__io_bw();
 	__raw_writeq(__cpu_to_le64(value), addr);
+	__io_aw();
 }
 #endif
 #endif /* CONFIG_64BIT */
@@ -180,35 +252,67 @@ static inline void writeq(u64 value, volatile void __iomem *addr)
  * accesses.
  */
 #ifndef readb_relaxed
-#define readb_relaxed readb
+#define readb_relaxed readb_relaxed
+static inline u8 readb_relaxed(const volatile void __iomem *addr)
+{
+	return __raw_readb(addr);
+}
 #endif
 
 #ifndef readw_relaxed
-#define readw_relaxed readw
+#define readw_relaxed readw_relaxed
+static inline u16 readw_relaxed(const volatile void __iomem *addr)
+{
+	return __le16_to_cpu(__raw_readw(addr));
+}
 #endif
 
 #ifndef readl_relaxed
-#define readl_relaxed readl
+#define readl_relaxed readl_relaxed
+static inline u32 readl_relaxed(const volatile void __iomem *addr)
+{
+	return __le32_to_cpu(__raw_readl(addr));
+}
 #endif
 
 #if defined(readq) && !defined(readq_relaxed)
-#define readq_relaxed readq
+#define readq_relaxed readq_relaxed
+static inline u64 readq_relaxed(const volatile void __iomem *addr)
+{
+	return __le64_to_cpu(__raw_readq(addr));
+}
 #endif
 
 #ifndef writeb_relaxed
-#define writeb_relaxed writeb
+#define writeb_relaxed writeb_relaxed
+static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
+{
+	__raw_writeb(value, addr);
+}
 #endif
 
 #ifndef writew_relaxed
-#define writew_relaxed writew
+#define writew_relaxed writew_relaxed
+static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
+{
+	__raw_writew(cpu_to_le16(value), addr);
+}
 #endif
 
 #ifndef writel_relaxed
-#define writel_relaxed writel
+#define writel_relaxed writel_relaxed
+static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
+{
+	__raw_writel(__cpu_to_le32(value), addr);
+}
 #endif
 
 #if defined(writeq) && !defined(writeq_relaxed)
-#define writeq_relaxed writeq
+#define writeq_relaxed writeq_relaxed
+static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
+{
+	__raw_writeq(__cpu_to_le64(value), addr);
+}
 #endif
 
 /*
@@ -363,7 +467,12 @@ static inline void writesq(volatile void __iomem *addr, const void *buffer,
 #define inb inb
 static inline u8 inb(unsigned long addr)
 {
-	return readb(PCI_IOBASE + addr);
+	u8 val;
+
+	__io_pbr();
+	val = __raw_readb(PCI_IOBASE + addr);
+	__io_par();
+	return val;
 }
 #endif
 
@@ -371,7 +480,12 @@ static inline u8 inb(unsigned long addr)
 #define inw inw
 static inline u16 inw(unsigned long addr)
 {
-	return readw(PCI_IOBASE + addr);
+	u16 val;
+
+	__io_pbr();
+	val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr));
+	__io_par();
+	return val;
 }
 #endif
 
@@ -379,7 +493,12 @@ static inline u16 inw(unsigned long addr)
 #define inl inl
 static inline u32 inl(unsigned long addr)
 {
-	return readl(PCI_IOBASE + addr);
+	u32 val;
+
+	__io_pbr();
+	val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr));
+	__io_par();
+	return val;
 }
 #endif
 
@@ -387,7 +506,9 @@ static inline u32 inl(unsigned long addr)
 #define outb outb
 static inline void outb(u8 value, unsigned long addr)
 {
-	writeb(value, PCI_IOBASE + addr);
+	__io_pbw();
+	__raw_writeb(value, PCI_IOBASE + addr);
+	__io_paw();
 }
 #endif
 
@@ -395,7 +516,9 @@ static inline void outb(u8 value, unsigned long addr)
 #define outw outw
 static inline void outw(u16 value, unsigned long addr)
 {
-	writew(value, PCI_IOBASE + addr);
+	__io_pbw();
+	__raw_writew(cpu_to_le16(value), PCI_IOBASE + addr);
+	__io_paw();
 }
 #endif
 
@@ -403,7 +526,9 @@ static inline void outw(u16 value, unsigned long addr)
 #define outl outl
 static inline void outl(u32 value, unsigned long addr)
 {
-	writel(value, PCI_IOBASE + addr);
+	__io_pbw();
+	__raw_writel(cpu_to_le32(value), PCI_IOBASE + addr);
+	__io_paw();
 }
 #endif
 
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 2f7a29242b87..38cd77b39a64 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -26,7 +26,8 @@
 #define IORT_IRQ_MASK(irq)		(irq & 0xffffffffULL)
 #define IORT_IRQ_TRIGGER_MASK(irq)	((irq >> 32) & 0xffffffffULL)
 
-int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node);
+int iort_register_domain_token(int trans_id, phys_addr_t base,
+			       struct fwnode_handle *fw_node);
 void iort_deregister_domain_token(int trans_id);
 struct fwnode_handle *iort_find_domain_token(int trans_id);
 #ifdef CONFIG_ACPI_IORT
@@ -38,6 +39,7 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
 /* IOMMU interface */
 void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
 const struct iommu_ops *iort_iommu_configure(struct device *dev);
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
 #else
 static inline void acpi_iort_init(void) { }
 static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
@@ -52,6 +54,9 @@ static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
 static inline const struct iommu_ops *iort_iommu_configure(
 				      struct device *dev)
 { return NULL; }
+static inline
+int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
+{ return 0; }
 #endif
 
 #endif /* __ACPI_IORT_H__ */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3e4ce54d84ab..09da0f124699 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -175,7 +175,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 }
 
 long congestion_wait(int sync, long timeout);
-long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
+long wait_iff_congested(int sync, long timeout);
 
 static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi)
 {
@@ -329,7 +329,7 @@ static inline bool inode_to_wb_is_valid(struct inode *inode)
  * @inode: inode of interest
  *
  * Returns the wb @inode is currently associated with.  The caller must be
- * holding either @inode->i_lock, @inode->i_mapping->tree_lock, or the
+ * holding either @inode->i_lock, the i_pages lock, or the
  * associated wb's list_lock.
  */
 static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
@@ -337,7 +337,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
 #ifdef CONFIG_LOCKDEP
 	WARN_ON_ONCE(debug_locks &&
 		     (!lockdep_is_held(&inode->i_lock) &&
-		      !lockdep_is_held(&inode->i_mapping->tree_lock) &&
+		      !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) &&
 		      !lockdep_is_held(&inode->i_wb->list_lock)));
 #endif
 	return inode->i_wb;
@@ -349,7 +349,7 @@ static inline struct bdi_writeback *inode_to_wb(const struct inode *inode)
  * @lockedp: temp bool output param, to be passed to the end function
  *
  * The caller wants to access the wb associated with @inode but isn't
- * holding inode->i_lock, mapping->tree_lock or wb->list_lock.  This
+ * holding inode->i_lock, the i_pages lock or wb->list_lock.  This
  * function determines the wb associated with @inode and ensures that the
  * association doesn't change until the transaction is finished with
  * unlocked_inode_to_wb_end().
@@ -370,11 +370,11 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 	*lockedp = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
 
 	if (unlikely(*lockedp))
-		spin_lock_irq(&inode->i_mapping->tree_lock);
+		xa_lock_irq(&inode->i_mapping->i_pages);
 
 	/*
-	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or tree_lock.
-	 * inode_to_wb() will bark.  Deref directly.
+	 * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages
+	 * lock.  inode_to_wb() will bark.  Deref directly.
 	 */
 	return inode->i_wb;
 }
@@ -387,7 +387,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, bool *lockedp)
 static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
 {
 	if (unlikely(locked))
-		spin_unlock_irq(&inode->i_mapping->tree_lock);
+		xa_unlock_irq(&inode->i_mapping->i_pages);
 
 	rcu_read_unlock();
 }
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index b0abe21d6cc9..4955e0863b83 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -61,6 +61,8 @@ struct linux_binprm {
 	unsigned interp_flags;
 	unsigned interp_data;
 	unsigned long loader, exec;
+
+	struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
 } __randomize_layout;
 
 #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0
@@ -118,6 +120,7 @@ extern int __must_check remove_arg_zero(struct linux_binprm *);
 extern int search_binary_handler(struct linux_binprm *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 extern void setup_new_exec(struct linux_binprm * bprm);
+extern void finalize_exec(struct linux_binprm *bprm);
 extern void would_dump(struct linux_binprm *, struct file *);
 
 extern int suid_dumpable;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 59042d5ac520..3901927cf6a0 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -204,6 +204,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_OSD_PRIMARY_AFFINITY |	\
 	 CEPH_FEATURE_MSGR_KEEPALIVE2 |		\
 	 CEPH_FEATURE_OSD_POOLRESEND |		\
+	 CEPH_FEATURE_MDS_QUOTA |		\
 	 CEPH_FEATURE_CRUSH_V4 |		\
 	 CEPH_FEATURE_NEW_OSDOP_ENCODING |	\
 	 CEPH_FEATURE_SERVER_JEWEL |		\
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 88dd51381aaf..7ecfc88314d8 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -134,6 +134,7 @@ struct ceph_dir_layout {
 #define CEPH_MSG_CLIENT_LEASE           0x311
 #define CEPH_MSG_CLIENT_SNAP            0x312
 #define CEPH_MSG_CLIENT_CAPRELEASE      0x313
+#define CEPH_MSG_CLIENT_QUOTA           0x314
 
 /* pool ops */
 #define CEPH_MSG_POOLOP_REPLY           48
@@ -807,4 +808,20 @@ struct ceph_mds_snap_realm {
 } __attribute__ ((packed));
 /* followed by my snap list, then prior parent snap list */
 
+/*
+ * quotas
+ */
+struct ceph_mds_quota {
+	__le64 ino;		/* ino */
+	struct ceph_timespec rctime;
+	__le64 rbytes;		/* dir stats */
+	__le64 rfiles;
+	__le64 rsubdirs;
+	__u8 struct_v;		/* compat */
+	__u8 struct_compat;
+	__le32 struct_len;
+	__le64 max_bytes;	/* quota max. bytes */
+	__le64 max_files;	/* quota max. files */
+} __attribute__ ((packed));
+
 #endif
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index c2ec44cf5098..49c93b9308d7 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -262,6 +262,7 @@ extern struct kmem_cache *ceph_cap_cachep;
 extern struct kmem_cache *ceph_cap_flush_cachep;
 extern struct kmem_cache *ceph_dentry_cachep;
 extern struct kmem_cache *ceph_file_cachep;
+extern struct kmem_cache *ceph_dir_file_cachep;
 
 /* ceph_common.c */
 extern bool libceph_compatible(void *data);
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index ead9d85f1c11..c7dfcb8a1fb2 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -76,6 +76,7 @@ enum ceph_msg_data_type {
 #ifdef CONFIG_BLOCK
 	CEPH_MSG_DATA_BIO,	/* data source/destination is a bio list */
 #endif /* CONFIG_BLOCK */
+	CEPH_MSG_DATA_BVECS,	/* data source/destination is a bio_vec array */
 };
 
 static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
@@ -87,22 +88,106 @@ static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
 #ifdef CONFIG_BLOCK
 	case CEPH_MSG_DATA_BIO:
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
 		return true;
 	default:
 		return false;
 	}
 }
 
+#ifdef CONFIG_BLOCK
+
+struct ceph_bio_iter {
+	struct bio *bio;
+	struct bvec_iter iter;
+};
+
+#define __ceph_bio_iter_advance_step(it, n, STEP) do {			      \
+	unsigned int __n = (n), __cur_n;				      \
+									      \
+	while (__n) {							      \
+		BUG_ON(!(it)->iter.bi_size);				      \
+		__cur_n = min((it)->iter.bi_size, __n);			      \
+		(void)(STEP);						      \
+		bio_advance_iter((it)->bio, &(it)->iter, __cur_n);	      \
+		if (!(it)->iter.bi_size && (it)->bio->bi_next) {	      \
+			dout("__ceph_bio_iter_advance_step next bio\n");      \
+			(it)->bio = (it)->bio->bi_next;			      \
+			(it)->iter = (it)->bio->bi_iter;		      \
+		}							      \
+		__n -= __cur_n;						      \
+	}								      \
+} while (0)
+
+/*
+ * Advance @it by @n bytes.
+ */
+#define ceph_bio_iter_advance(it, n)					      \
+	__ceph_bio_iter_advance_step(it, n, 0)
+
+/*
+ * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
+ */
+#define ceph_bio_iter_advance_step(it, n, BVEC_STEP)			      \
+	__ceph_bio_iter_advance_step(it, n, ({				      \
+		struct bio_vec bv;					      \
+		struct bvec_iter __cur_iter;				      \
+									      \
+		__cur_iter = (it)->iter;				      \
+		__cur_iter.bi_size = __cur_n;				      \
+		__bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \
+			(void)(BVEC_STEP);				      \
+	}))
+
+#endif /* CONFIG_BLOCK */
+
+struct ceph_bvec_iter {
+	struct bio_vec *bvecs;
+	struct bvec_iter iter;
+};
+
+#define __ceph_bvec_iter_advance_step(it, n, STEP) do {			      \
+	BUG_ON((n) > (it)->iter.bi_size);				      \
+	(void)(STEP);							      \
+	bvec_iter_advance((it)->bvecs, &(it)->iter, (n));		      \
+} while (0)
+
+/*
+ * Advance @it by @n bytes.
+ */
+#define ceph_bvec_iter_advance(it, n)					      \
+	__ceph_bvec_iter_advance_step(it, n, 0)
+
+/*
+ * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec.
+ */
+#define ceph_bvec_iter_advance_step(it, n, BVEC_STEP)			      \
+	__ceph_bvec_iter_advance_step(it, n, ({				      \
+		struct bio_vec bv;					      \
+		struct bvec_iter __cur_iter;				      \
+									      \
+		__cur_iter = (it)->iter;				      \
+		__cur_iter.bi_size = (n);				      \
+		for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter)	      \
+			(void)(BVEC_STEP);				      \
+	}))
+
+#define ceph_bvec_iter_shorten(it, n) do {				      \
+	BUG_ON((n) > (it)->iter.bi_size);				      \
+	(it)->iter.bi_size = (n);					      \
+} while (0)
+
 struct ceph_msg_data {
 	struct list_head		links;	/* ceph_msg->data */
 	enum ceph_msg_data_type		type;
 	union {
 #ifdef CONFIG_BLOCK
 		struct {
-			struct bio	*bio;
-			size_t		bio_length;
+			struct ceph_bio_iter	bio_pos;
+			u32			bio_length;
 		};
 #endif /* CONFIG_BLOCK */
+		struct ceph_bvec_iter	bvec_pos;
 		struct {
 			struct page	**pages;	/* NOT OWNER. */
 			size_t		length;		/* total # bytes */
@@ -122,11 +207,9 @@ struct ceph_msg_data_cursor {
 	bool			need_crc;	/* crc update needed */
 	union {
 #ifdef CONFIG_BLOCK
-		struct {				/* bio */
-			struct bio	*bio;		/* bio from list */
-			struct bvec_iter bvec_iter;
-		};
+		struct ceph_bio_iter	bio_iter;
 #endif /* CONFIG_BLOCK */
+		struct bvec_iter	bvec_iter;
 		struct {				/* pages */
 			unsigned int	page_offset;	/* offset in page */
 			unsigned short	page_index;	/* index in array */
@@ -290,9 +373,11 @@ extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 				struct ceph_pagelist *pagelist);
 #ifdef CONFIG_BLOCK
-extern void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
-				size_t length);
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+			   u32 length);
 #endif /* CONFIG_BLOCK */
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+			     struct ceph_bvec_iter *bvec_pos);
 
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 				     bool can_fail);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 52fb37d1c2a5..528ccc943cee 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -57,6 +57,7 @@ enum ceph_osd_data_type {
 #ifdef CONFIG_BLOCK
 	CEPH_OSD_DATA_TYPE_BIO,
 #endif /* CONFIG_BLOCK */
+	CEPH_OSD_DATA_TYPE_BVECS,
 };
 
 struct ceph_osd_data {
@@ -72,10 +73,11 @@ struct ceph_osd_data {
 		struct ceph_pagelist	*pagelist;
 #ifdef CONFIG_BLOCK
 		struct {
-			struct bio	*bio;		/* list of bios */
-			size_t		bio_length;	/* total in list */
+			struct ceph_bio_iter	bio_pos;
+			u32			bio_length;
 		};
 #endif /* CONFIG_BLOCK */
+		struct ceph_bvec_iter	bvec_pos;
 	};
 };
 
@@ -405,10 +407,14 @@ extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
 					unsigned int which,
 					struct ceph_pagelist *pagelist);
 #ifdef CONFIG_BLOCK
-extern void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *,
-					unsigned int which,
-					struct bio *bio, size_t bio_length);
+void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
+				    unsigned int which,
+				    struct ceph_bio_iter *bio_pos,
+				    u32 bio_length);
 #endif /* CONFIG_BLOCK */
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+					 unsigned int which,
+					 struct ceph_bvec_iter *bvec_pos);
 
 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
 					unsigned int which,
@@ -418,6 +424,9 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
 					struct page **pages, u64 length,
 					u32 alignment, bool pages_from_pool,
 					bool own_pages);
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+				       unsigned int which,
+				       struct bio_vec *bvecs, u32 bytes);
 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 					unsigned int which,
 					struct page **pages, u64 length,
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index d41fad99c0fa..e71fb222c7c3 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -5,7 +5,6 @@
 #include <linux/rbtree.h>
 #include <linux/ceph/types.h>
 #include <linux/ceph/decode.h>
-#include <linux/ceph/ceph_fs.h>
 #include <linux/crush/crush.h>
 
 /*
@@ -280,11 +279,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
 		       const struct ceph_osds *new_acting,
 		       bool any_change);
 
-/* calculate mapping of a file extent to an object */
-extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-					 u64 off, u64 len,
-					 u64 *bno, u64 *oxoff, u64 *oxlen);
-
 int __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
 				const struct ceph_object_id *oid,
 				const struct ceph_object_locator *oloc,
diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h
new file mode 100644
index 000000000000..cbd0d24b7148
--- /dev/null
+++ b/include/linux/ceph/striper.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CEPH_STRIPER_H
+#define _LINUX_CEPH_STRIPER_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct ceph_file_layout;
+
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+				   u64 off, u64 len,
+				   u64 *objno, u64 *objoff, u32 *xlen);
+
+struct ceph_object_extent {
+	struct list_head oe_item;
+	u64 oe_objno;
+	u64 oe_off;
+	u64 oe_len;
+};
+
+static inline void ceph_object_extent_init(struct ceph_object_extent *ex)
+{
+	INIT_LIST_HEAD(&ex->oe_item);
+}
+
+/*
+ * Called for each mapped stripe unit.
+ *
+ * @bytes: number of bytes mapped, i.e. the minimum of the full length
+ *         requested (file extent length) or the remainder of the stripe
+ *         unit within an object
+ */
+typedef void (*ceph_object_extent_fn_t)(struct ceph_object_extent *ex,
+					u32 bytes, void *arg);
+
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 struct ceph_object_extent *alloc_fn(void *arg),
+			 void *alloc_arg,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg);
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg);
+
+struct ceph_file_extent {
+	u64 fe_off;
+	u64 fe_len;
+};
+
+static inline u64 ceph_file_extents_bytes(struct ceph_file_extent *file_extents,
+					  u32 num_file_extents)
+{
+	u64 bytes = 0;
+	u32 i;
+
+	for (i = 0; i < num_file_extents; i++)
+		bytes += file_extents[i].fe_len;
+
+	return bytes;
+}
+
+int ceph_extent_to_file(struct ceph_file_layout *l,
+			u64 objno, u64 objoff, u64 objlen,
+			struct ceph_file_extent **file_extents,
+			u32 *num_file_extents);
+
+#endif
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d3f264a5b04d..ceb96ecab96e 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -17,9 +17,6 @@
  */
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#define randomized_struct_fields_start	struct {
-#define randomized_struct_fields_end	};
-
 /* all clang versions usable with the kernel support KASAN ABI version 5 */
 #define KASAN_ABI_VERSION 5
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e2c7f4369eff..b4bf73f5e38f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -242,6 +242,9 @@
 #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 #define __randomize_layout __attribute__((randomize_layout))
 #define __no_randomize_layout __attribute__((no_randomize_layout))
+/* This anon struct can add padding, so only enable it under randstruct. */
+#define randomized_struct_fields_start	struct {
+#define randomized_struct_fields_end	} __randomize_layout;
 #endif
 
 #endif /* GCC_VERSION >= 40500 */
@@ -256,15 +259,6 @@
  */
 #define __visible	__attribute__((externally_visible))
 
-/*
- * RANDSTRUCT_PLUGIN wants to use an anonymous struct, but it is only
- * possible since GCC 4.6. To provide as much build testing coverage
- * as possible, this is used for all GCC 4.6+ builds, and not just on
- * RANDSTRUCT_PLUGIN builds.
- */
-#define randomized_struct_fields_start	struct {
-#define randomized_struct_fields_end	} __randomize_layout;
-
 #endif /* GCC_VERSION >= 40600 */
 
 
diff --git a/include/linux/const.h b/include/linux/const.h
new file mode 100644
index 000000000000..7b55a55f5911
--- /dev/null
+++ b/include/linux/const.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_CONST_H
+#define _LINUX_CONST_H
+
+#include <uapi/linux/const.h>
+
+#define UL(x)		(_UL(x))
+#define ULL(x)		(_ULL(x))
+
+#endif /* _LINUX_CONST_H */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1fe49724da9e..87f48dd932eb 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -960,8 +960,6 @@ extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
 extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs;
 extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs;
 extern struct freq_attr *cpufreq_generic_attr[];
-int cpufreq_table_validate_and_show(struct cpufreq_policy *policy,
-				      struct cpufreq_frequency_table *table);
 int cpufreq_table_validate_and_sort(struct cpufreq_policy *policy);
 
 unsigned int cpufreq_generic_get(unsigned int cpu);
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a806e94c482f..1eefabf1621f 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv,
 				  struct cpuidle_device *dev);
 
 extern int cpuidle_select(struct cpuidle_driver *drv,
-			  struct cpuidle_device *dev);
+			  struct cpuidle_device *dev,
+			  bool *stop_tick);
 extern int cpuidle_enter(struct cpuidle_driver *drv,
 			 struct cpuidle_device *dev, int index);
 extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -167,7 +168,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
 					 struct cpuidle_device *dev)
 {return true; }
 static inline int cpuidle_select(struct cpuidle_driver *drv,
-				 struct cpuidle_device *dev)
+				 struct cpuidle_device *dev, bool *stop_tick)
 {return -ENODEV; }
 static inline int cpuidle_enter(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev, int index)
@@ -250,7 +251,8 @@ struct cpuidle_governor {
 					struct cpuidle_device *dev);
 
 	int  (*select)		(struct cpuidle_driver *drv,
-					struct cpuidle_device *dev);
+					struct cpuidle_device *dev,
+					bool *stop_tick);
 	void (*reflect)		(struct cpuidle_device *dev, int index);
 };
 
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0185ecdae135..f9eb22ad341e 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -26,18 +26,42 @@ extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *dax_get_by_host(const char *host);
+struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops);
 void put_dax(struct dax_device *dax_dev);
+void kill_dax(struct dax_device *dax_dev);
+void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
 #else
 static inline struct dax_device *dax_get_by_host(const char *host)
 {
 	return NULL;
 }
-
+static inline struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops)
+{
+	/*
+	 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
+	 * NULL is an error or expected.
+	 */
+	return NULL;
+}
 static inline void put_dax(struct dax_device *dax_dev)
 {
 }
+static inline void kill_dax(struct dax_device *dax_dev)
+{
+}
+static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
+{
+}
+static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+	return false;
+}
 #endif
 
+struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int __bdev_dax_supported(struct super_block *sb, int blocksize);
@@ -57,6 +81,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
 }
 
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -76,22 +102,23 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
 	return NULL;
 }
+
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *alloc_dax(void *private, const char *host,
-		const struct dax_operations *ops);
 bool dax_alive(struct dax_device *dax_dev);
-void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		void **kaddr, pfn_t *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
-void dax_write_cache(struct dax_device *dax_dev, bool wc);
-bool dax_write_cache_enabled(struct dax_device *dax_dev);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops);
@@ -121,7 +148,4 @@ static inline bool dax_mapping(struct address_space *mapping)
 	return mapping->host && IS_DAX(mapping->host);
 }
 
-struct writeback_control;
-int dax_writeback_mapping_range(struct address_space *mapping,
-		struct block_device *bdev, struct writeback_control *wbc);
 #endif
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index f838764993eb..861be5cab1df 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -470,7 +470,11 @@ typedef void (*dma_async_tx_callback_result)(void *dma_async_param,
 				const struct dmaengine_result *result);
 
 struct dmaengine_unmap_data {
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
+	u16 map_cnt;
+#else
 	u8 map_cnt;
+#endif
 	u8 to_cnt;
 	u8 from_cnt;
 	u8 bidi_cnt;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ee7f592e239..92efaf1f8977 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -13,6 +13,7 @@
 #include <linux/list_lru.h>
 #include <linux/llist.h>
 #include <linux/radix-tree.h>
+#include <linux/xarray.h>
 #include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/pid.h>
@@ -390,12 +391,11 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 
 struct address_space {
 	struct inode		*host;		/* owner: inode, block_device */
-	struct radix_tree_root	page_tree;	/* radix tree of all pages */
-	spinlock_t		tree_lock;	/* and lock protecting it */
+	struct radix_tree_root	i_pages;	/* cached pages */
 	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
 	struct rb_root_cached	i_mmap;		/* tree of private and shared mappings */
 	struct rw_semaphore	i_mmap_rwsem;	/* protect tree, count, list */
-	/* Protected by tree_lock together with the radix tree */
+	/* Protected by the i_pages lock */
 	unsigned long		nrpages;	/* number of total pages */
 	/* number of shadow or DAX exceptional entries */
 	unsigned long		nrexceptional;
@@ -1989,7 +1989,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  *
  * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
  *			synchronize competing switching instances and to tell
- *			wb stat updates to grab mapping->tree_lock.  See
+ *			wb stat updates to grab the i_pages lock.  See
  *			inode_switch_wb_work_fn() for details.
  *
  * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
@@ -3127,6 +3127,10 @@ extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *,
 			 struct inode *, struct dentry *, unsigned int);
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
+extern int noop_set_page_dirty(struct page *page);
+extern void noop_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int length);
+extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 325017ad9311..39988924de3a 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -80,76 +80,145 @@
 struct hmm;
 
 /*
- * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
+ * hmm_pfn_flag_e - HMM flag enums
  *
  * Flags:
- * HMM_PFN_VALID: pfn is valid
- * HMM_PFN_READ:  CPU page table has read permission set
+ * HMM_PFN_VALID: pfn is valid. It has, at least, read permission.
  * HMM_PFN_WRITE: CPU page table has write permission set
+ * HMM_PFN_DEVICE_PRIVATE: private device memory (ZONE_DEVICE)
+ *
+ * The driver provide a flags array, if driver valid bit for an entry is bit
+ * 3 ie (entry & (1 << 3)) is true if entry is valid then driver must provide
+ * an array in hmm_range.flags with hmm_range.flags[HMM_PFN_VALID] == 1 << 3.
+ * Same logic apply to all flags. This is same idea as vm_page_prot in vma
+ * except that this is per device driver rather than per architecture.
+ */
+enum hmm_pfn_flag_e {
+	HMM_PFN_VALID = 0,
+	HMM_PFN_WRITE,
+	HMM_PFN_DEVICE_PRIVATE,
+	HMM_PFN_FLAG_MAX
+};
+
+/*
+ * hmm_pfn_value_e - HMM pfn special value
+ *
+ * Flags:
  * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
- * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none()
+ * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
  * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
  *      result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
  *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
  *      set and the pfn value is undefined.
- * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE)
+ *
+ * Driver provide entry value for none entry, error entry and special entry,
+ * driver can alias (ie use same value for error and special for instance). It
+ * should not alias none and error or special.
+ *
+ * HMM pfn value returned by hmm_vma_get_pfns() or hmm_vma_fault() will be:
+ * hmm_range.values[HMM_PFN_ERROR] if CPU page table entry is poisonous,
+ * hmm_range.values[HMM_PFN_NONE] if there is no CPU page table
+ * hmm_range.values[HMM_PFN_SPECIAL] if CPU page table entry is a special one
  */
-typedef unsigned long hmm_pfn_t;
+enum hmm_pfn_value_e {
+	HMM_PFN_ERROR,
+	HMM_PFN_NONE,
+	HMM_PFN_SPECIAL,
+	HMM_PFN_VALUE_MAX
+};
 
-#define HMM_PFN_VALID (1 << 0)
-#define HMM_PFN_READ (1 << 1)
-#define HMM_PFN_WRITE (1 << 2)
-#define HMM_PFN_ERROR (1 << 3)
-#define HMM_PFN_EMPTY (1 << 4)
-#define HMM_PFN_SPECIAL (1 << 5)
-#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6)
-#define HMM_PFN_SHIFT 7
+/*
+ * struct hmm_range - track invalidation lock on virtual address range
+ *
+ * @vma: the vm area struct for the range
+ * @list: all range lock are on a list
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @pfns: array of pfns (big enough for the range)
+ * @flags: pfn flags to match device driver page table
+ * @values: pfn value for some special case (none, special, error, ...)
+ * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
+ * @valid: pfns array did not change since it has been fill by an HMM function
+ */
+struct hmm_range {
+	struct vm_area_struct	*vma;
+	struct list_head	list;
+	unsigned long		start;
+	unsigned long		end;
+	uint64_t		*pfns;
+	const uint64_t		*flags;
+	const uint64_t		*values;
+	uint8_t			pfn_shift;
+	bool			valid;
+};
 
 /*
- * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t
- * @pfn: hmm_pfn_t to convert to struct page
- * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise
+ * hmm_pfn_to_page() - return struct page pointed to by a valid HMM pfn
+ * @range: range use to decode HMM pfn value
+ * @pfn: HMM pfn value to get corresponding struct page from
+ * Returns: struct page pointer if pfn is a valid HMM pfn, NULL otherwise
  *
- * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page
- * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL.
+ * If the HMM pfn is valid (ie valid flag set) then return the struct page
+ * matching the pfn value stored in the HMM pfn. Otherwise return NULL.
  */
-static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn)
+static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
+					   uint64_t pfn)
 {
-	if (!(pfn & HMM_PFN_VALID))
+	if (pfn == range->values[HMM_PFN_NONE])
+		return NULL;
+	if (pfn == range->values[HMM_PFN_ERROR])
 		return NULL;
-	return pfn_to_page(pfn >> HMM_PFN_SHIFT);
+	if (pfn == range->values[HMM_PFN_SPECIAL])
+		return NULL;
+	if (!(pfn & range->flags[HMM_PFN_VALID]))
+		return NULL;
+	return pfn_to_page(pfn >> range->pfn_shift);
 }
 
 /*
- * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t
- * @pfn: hmm_pfn_t to extract pfn from
- * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise
+ * hmm_pfn_to_pfn() - return pfn value store in a HMM pfn
+ * @range: range use to decode HMM pfn value
+ * @pfn: HMM pfn value to extract pfn from
+ * Returns: pfn value if HMM pfn is valid, -1UL otherwise
  */
-static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn)
+static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
+					   uint64_t pfn)
 {
-	if (!(pfn & HMM_PFN_VALID))
+	if (pfn == range->values[HMM_PFN_NONE])
+		return -1UL;
+	if (pfn == range->values[HMM_PFN_ERROR])
+		return -1UL;
+	if (pfn == range->values[HMM_PFN_SPECIAL])
+		return -1UL;
+	if (!(pfn & range->flags[HMM_PFN_VALID]))
 		return -1UL;
-	return (pfn >> HMM_PFN_SHIFT);
+	return (pfn >> range->pfn_shift);
 }
 
 /*
- * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page
- * @page: struct page pointer for which to create the hmm_pfn_t
- * Returns: valid hmm_pfn_t for the page
+ * hmm_pfn_from_page() - create a valid HMM pfn value from struct page
+ * @range: range use to encode HMM pfn value
+ * @page: struct page pointer for which to create the HMM pfn
+ * Returns: valid HMM pfn for the page
  */
-static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page)
+static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
+					 struct page *page)
 {
-	return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+	return (page_to_pfn(page) << range->pfn_shift) |
+		range->flags[HMM_PFN_VALID];
 }
 
 /*
- * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn
- * @pfn: pfn value for which to create the hmm_pfn_t
- * Returns: valid hmm_pfn_t for the pfn
+ * hmm_pfn_from_pfn() - create a valid HMM pfn value from pfn
+ * @range: range use to encode HMM pfn value
+ * @pfn: pfn value for which to create the HMM pfn
+ * Returns: valid HMM pfn for the pfn
  */
-static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn)
+static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
+					unsigned long pfn)
 {
-	return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+	return (pfn << range->pfn_shift) |
+		range->flags[HMM_PFN_VALID];
 }
 
 
@@ -218,6 +287,16 @@ enum hmm_update_type {
  * @update: callback to update range on a device
  */
 struct hmm_mirror_ops {
+	/* release() - release hmm_mirror
+	 *
+	 * @mirror: pointer to struct hmm_mirror
+	 *
+	 * This is called when the mm_struct is being released.
+	 * The callback should make sure no references to the mirror occur
+	 * after the callback returns.
+	 */
+	void (*release)(struct hmm_mirror *mirror);
+
 	/* sync_cpu_device_pagetables() - synchronize page tables
 	 *
 	 * @mirror: pointer to struct hmm_mirror
@@ -262,23 +341,6 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
 
 
 /*
- * struct hmm_range - track invalidation lock on virtual address range
- *
- * @list: all range lock are on a list
- * @start: range virtual start address (inclusive)
- * @end: range virtual end address (exclusive)
- * @pfns: array of pfns (big enough for the range)
- * @valid: pfns array did not change since it has been fill by an HMM function
- */
-struct hmm_range {
-	struct list_head	list;
-	unsigned long		start;
-	unsigned long		end;
-	hmm_pfn_t		*pfns;
-	bool			valid;
-};
-
-/*
  * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device
  * driver lock that serializes device page table updates, then call
  * hmm_vma_range_done(), to check if the snapshot is still valid. The same
@@ -291,17 +353,13 @@ struct hmm_range {
  *
  * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID !
  */
-int hmm_vma_get_pfns(struct vm_area_struct *vma,
-		     struct hmm_range *range,
-		     unsigned long start,
-		     unsigned long end,
-		     hmm_pfn_t *pfns);
-bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
+int hmm_vma_get_pfns(struct hmm_range *range);
+bool hmm_vma_range_done(struct hmm_range *range);
 
 
 /*
  * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will
- * not migrate any device memory back to system memory. The hmm_pfn_t array will
+ * not migrate any device memory back to system memory. The HMM pfn array will
  * be updated with the fault result and current snapshot of the CPU page table
  * for the range.
  *
@@ -310,22 +368,26 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
  * function returns -EAGAIN.
  *
  * Return value does not reflect if the fault was successful for every single
- * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to
+ * address or not. Therefore, the caller must to inspect the HMM pfn array to
  * determine fault status for each address.
  *
  * Trying to fault inside an invalid vma will result in -EINVAL.
  *
  * See the function description in mm/hmm.c for further documentation.
  */
-int hmm_vma_fault(struct vm_area_struct *vma,
-		  struct hmm_range *range,
-		  unsigned long start,
-		  unsigned long end,
-		  hmm_pfn_t *pfns,
-		  bool write,
-		  bool block);
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+int hmm_vma_fault(struct hmm_range *range, bool block);
 
+/* Below are for HMM internal use only! Not to be used by device driver! */
+void hmm_mm_destroy(struct mm_struct *mm);
+
+static inline void hmm_mm_init(struct mm_struct *mm)
+{
+	mm->hmm = NULL;
+}
+#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+static inline void hmm_mm_init(struct mm_struct *mm) {}
+#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 struct hmm_devmem;
@@ -498,23 +560,9 @@ struct hmm_device {
 struct hmm_device *hmm_device_new(void *drvdata);
 void hmm_device_put(struct hmm_device *hmm_device);
 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-#endif /* IS_ENABLED(CONFIG_HMM) */
-
-/* Below are for HMM internal use only! Not to be used by device driver! */
-#if IS_ENABLED(CONFIG_HMM_MIRROR)
-void hmm_mm_destroy(struct mm_struct *mm);
-
-static inline void hmm_mm_init(struct mm_struct *mm)
-{
-	mm->hmm = NULL;
-}
-#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-static inline void hmm_mm_destroy(struct mm_struct *mm) {}
-static inline void hmm_mm_init(struct mm_struct *mm) {}
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
-
 #else /* IS_ENABLED(CONFIG_HMM) */
 static inline void hmm_mm_destroy(struct mm_struct *mm) {}
 static inline void hmm_mm_init(struct mm_struct *mm) {}
+#endif /* IS_ENABLED(CONFIG_HMM) */
+
 #endif /* LINUX_HMM_H */
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 78f456fcd242..a2656c3ebe81 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -424,6 +424,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
 }
 
 extern u64 hrtimer_get_next_event(void);
+extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);
 
 extern bool hrtimer_active(const struct hrtimer *timer);
 
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 7d6a6313f0ab..e856f4e0ab35 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -29,29 +29,31 @@ struct idr {
 #define IDR_FREE	0
 
 /* Set the IDR flag and the IDR_FREE tag */
-#define IDR_RT_MARKER		((__force gfp_t)(3 << __GFP_BITS_SHIFT))
+#define IDR_RT_MARKER	(ROOT_IS_IDR | (__force gfp_t)			\
+					(1 << (ROOT_TAG_SHIFT + IDR_FREE)))
 
-#define IDR_INIT_BASE(base) {						\
-	.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER),			\
+#define IDR_INIT_BASE(name, base) {					\
+	.idr_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER),			\
 	.idr_base = (base),						\
 	.idr_next = 0,							\
 }
 
 /**
  * IDR_INIT() - Initialise an IDR.
+ * @name: Name of IDR.
  *
  * A freshly-initialised IDR contains no IDs.
  */
-#define IDR_INIT	IDR_INIT_BASE(0)
+#define IDR_INIT(name)	IDR_INIT_BASE(name, 0)
 
 /**
- * DEFINE_IDR() - Define a statically-allocated IDR
- * @name: Name of IDR
+ * DEFINE_IDR() - Define a statically-allocated IDR.
+ * @name: Name of IDR.
  *
  * An IDR defined using this macro is ready for use with no additional
  * initialisation required.  It contains no IDs.
  */
-#define DEFINE_IDR(name)	struct idr name = IDR_INIT
+#define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -218,10 +220,10 @@ struct ida {
 	struct radix_tree_root	ida_rt;
 };
 
-#define IDA_INIT	{						\
-	.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),		\
+#define IDA_INIT(name)	{						\
+	.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),	\
 }
-#define DEFINE_IDA(name)	struct ida name = IDA_INIT
+#define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 8dad3dd26eae..ef169d67df92 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -209,12 +209,12 @@
 #define DMA_FECTL_IM (((u32)1) << 31)
 
 /* FSTS_REG */
-#define DMA_FSTS_PPF ((u32)2)
-#define DMA_FSTS_PFO ((u32)1)
-#define DMA_FSTS_IQE (1 << 4)
-#define DMA_FSTS_ICE (1 << 5)
-#define DMA_FSTS_ITE (1 << 6)
-#define DMA_FSTS_PRO (1 << 7)
+#define DMA_FSTS_PFO (1 << 0) /* Primary Fault Overflow */
+#define DMA_FSTS_PPF (1 << 1) /* Primary Pending Fault */
+#define DMA_FSTS_IQE (1 << 4) /* Invalidation Queue Error */
+#define DMA_FSTS_ICE (1 << 5) /* Invalidation Completion Error */
+#define DMA_FSTS_ITE (1 << 6) /* Invalidation Time-out Error */
+#define DMA_FSTS_PRO (1 << 7) /* Page Request Overflow */
 #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
 
 /* FRCD_REG, 32 bits access */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 41b8c5757859..19938ee6eb31 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -465,23 +465,23 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	return -ENODEV;
 }
 
-static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			      size_t size)
+static inline size_t iommu_unmap(struct iommu_domain *domain,
+				 unsigned long iova, size_t size)
 {
-	return -ENODEV;
+	return 0;
 }
 
-static inline int iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova,
-				   int gfp_order)
+static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
+				      unsigned long iova, int gfp_order)
 {
-	return -ENODEV;
+	return 0;
 }
 
 static inline size_t iommu_map_sg(struct iommu_domain *domain,
 				  unsigned long iova, struct scatterlist *sg,
 				  unsigned int nents, int prot)
 {
-	return -ENODEV;
+	return 0;
 }
 
 static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 9385aa57497b..a27cf6652327 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate);
 /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
 #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
 
-/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
-#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
+/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
+#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
+
+/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
+#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
 
 #ifndef __jiffy_arch_data
 #define __jiffy_arch_data
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 52b70894eaa5..6a1eb0b0aad9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -439,7 +439,8 @@ extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
 
-extern int num_to_str(char *buf, int size, unsigned long long num);
+extern int num_to_str(char *buf, int size,
+		      unsigned long long num, unsigned int width);
 
 /* lib/printf utilities */
 
@@ -543,6 +544,7 @@ extern enum system_states {
 	SYSTEM_RESTART,
 } system_state;
 
+/* This cannot be an enum because some may be used in assembly source. */
 #define TAINT_PROPRIETARY_MODULE	0
 #define TAINT_FORCED_MODULE		1
 #define TAINT_CPU_OUT_OF_SPEC		2
@@ -560,7 +562,8 @@ extern enum system_states {
 #define TAINT_SOFTLOCKUP		14
 #define TAINT_LIVEPATCH			15
 #define TAINT_AUX			16
-#define TAINT_FLAGS_COUNT		17
+#define TAINT_RANDSTRUCT		17
+#define TAINT_FLAGS_COUNT		18
 
 struct taint_flag {
 	char c_true;	/* character printed when tainted */
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index e251533a5939..89fc8dc7bf38 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -41,11 +41,11 @@
  */
 
 /*
- * Note about locking : There is no locking required until only * one reader
- * and one writer is using the fifo and no kfifo_reset() will be * called
- *  kfifo_reset_out() can be safely used, until it will be only called
+ * Note about locking: There is no locking required until only one reader
+ * and one writer is using the fifo and no kfifo_reset() will be called.
+ * kfifo_reset_out() can be safely used, until it will be only called
  * in the reader thread.
- *  For multiple writer and one reader there is only a need to lock the writer.
+ * For multiple writer and one reader there is only a need to lock the writer.
  * And vice versa for only one writer and multiple reader there is only a need
  * to lock the reader.
  */
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index ff855ed965fb..097072c5a852 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -76,12 +76,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len, int *cmd_rc);
 
+struct device_node;
 struct nvdimm_bus_descriptor {
 	const struct attribute_group **attr_groups;
 	unsigned long bus_dsm_mask;
 	unsigned long cmd_mask;
 	struct module *module;
 	char *provider_name;
+	struct device_node *of_node;
 	ndctl_fn ndctl;
 	int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
 	int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc,
@@ -123,6 +125,7 @@ struct nd_region_desc {
 	int num_lanes;
 	int numa_node;
 	unsigned long flags;
+	struct device_node *of_node;
 };
 
 struct device;
@@ -164,6 +167,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
+struct device *nd_region_dev(struct nd_region *nd_region);
 struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c46016bb25eb..d99b71bc2c66 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -48,13 +48,12 @@ enum memcg_stat_item {
 	MEMCG_NR_STAT,
 };
 
-/* Cgroup-specific events, on top of universal VM events */
-enum memcg_event_item {
-	MEMCG_LOW = NR_VM_EVENT_ITEMS,
+enum memcg_memory_event {
+	MEMCG_LOW,
 	MEMCG_HIGH,
 	MEMCG_MAX,
 	MEMCG_OOM,
-	MEMCG_NR_EVENTS,
+	MEMCG_NR_MEMORY_EVENTS,
 };
 
 struct mem_cgroup_reclaim_cookie {
@@ -88,7 +87,7 @@ enum mem_cgroup_events_target {
 
 struct mem_cgroup_stat_cpu {
 	long count[MEMCG_NR_STAT];
-	unsigned long events[MEMCG_NR_EVENTS];
+	unsigned long events[NR_VM_EVENT_ITEMS];
 	unsigned long nr_page_events;
 	unsigned long targets[MEM_CGROUP_NTARGETS];
 };
@@ -120,6 +119,9 @@ struct mem_cgroup_per_node {
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
+	bool			congested;	/* memcg has many dirty pages */
+						/* backed by a congested BDI */
+
 	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
 						/* use container_of	   */
 };
@@ -202,7 +204,8 @@ struct mem_cgroup {
 	/* OOM-Killer disable */
 	int		oom_kill_disable;
 
-	/* handle for "memory.events" */
+	/* memory.events */
+	atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
 	struct cgroup_file events_file;
 
 	/* protect arrays of thresholds */
@@ -231,9 +234,10 @@ struct mem_cgroup {
 	struct task_struct	*move_lock_task;
 	unsigned long		move_lock_flags;
 
+	/* memory.stat */
 	struct mem_cgroup_stat_cpu __percpu *stat_cpu;
 	atomic_long_t		stat[MEMCG_NR_STAT];
-	atomic_long_t		events[MEMCG_NR_EVENTS];
+	atomic_long_t		events[NR_VM_EVENT_ITEMS];
 
 	unsigned long		socket_pressure;
 
@@ -645,9 +649,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void __count_memcg_events(struct mem_cgroup *memcg,
-					int idx, unsigned long count)
+					enum vm_event_item idx,
+					unsigned long count)
 {
 	unsigned long x;
 
@@ -663,7 +667,8 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
 }
 
 static inline void count_memcg_events(struct mem_cgroup *memcg,
-				      int idx, unsigned long count)
+				      enum vm_event_item idx,
+				      unsigned long count)
 {
 	unsigned long flags;
 
@@ -672,9 +677,8 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
 	local_irq_restore(flags);
 }
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void count_memcg_page_event(struct page *page,
-					  int idx)
+					  enum vm_event_item idx)
 {
 	if (page->mem_cgroup)
 		count_memcg_events(page->mem_cgroup, idx, 1);
@@ -698,10 +702,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
 	rcu_read_unlock();
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-				    enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+				      enum memcg_memory_event event)
 {
-	count_memcg_events(memcg, event, 1);
+	atomic_long_inc(&memcg->memory_events[event]);
 	cgroup_file_notify(&memcg->events_file);
 }
 
@@ -721,8 +725,8 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-				    enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+				      enum memcg_memory_event event)
 {
 }
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 2b0265265c28..e0e49b5b1ee1 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -216,9 +216,6 @@ void put_online_mems(void);
 void mem_hotplug_begin(void);
 void mem_hotplug_done(void);
 
-extern void set_zone_contiguous(struct zone *zone);
-extern void clear_zone_contiguous(struct zone *zone);
-
 #else /* ! CONFIG_MEMORY_HOTPLUG */
 #define pfn_to_online_page(pfn)			\
 ({						\
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 48c3c5be7eb1..9ed2871ea335 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -141,15 +141,4 @@ enum s2mps_rtc_reg {
 #define WTSR_ENABLE_SHIFT	6
 #define WTSR_ENABLE_MASK	(1 << WTSR_ENABLE_SHIFT)
 
-enum {
-	RTC_SEC = 0,
-	RTC_MIN,
-	RTC_HOUR,
-	RTC_WEEKDAY,
-	RTC_DATE,
-	RTC_MONTH,
-	RTC_YEAR1,
-	RTC_YEAR2,
-};
-
 #endif /*  __LINUX_MFD_SEC_RTC_H */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ab45f8a0d288..f2b4abbca55e 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,8 +7,7 @@
 #include <linux/migrate_mode.h>
 #include <linux/hugetlb.h>
 
-typedef struct page *new_page_t(struct page *page, unsigned long private,
-				int **reason);
+typedef struct page *new_page_t(struct page *page, unsigned long private);
 typedef void free_page_t(struct page *page, unsigned long private);
 
 /*
@@ -43,9 +42,9 @@ static inline struct page *new_page_nodemask(struct page *page,
 		return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
 				preferred_nid, nodemask);
 
-	if (thp_migration_supported() && PageTransHuge(page)) {
-		order = HPAGE_PMD_ORDER;
+	if (PageTransHuge(page)) {
 		gfp_mask |= GFP_TRANSHUGE;
+		order = HPAGE_PMD_ORDER;
 	}
 
 	if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3ad632366973..1ac1f06a4be6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -747,7 +747,7 @@ int finish_mkwrite_fault(struct vm_fault *vmf);
  * refcount. The each user mapping also has a reference to the page.
  *
  * The pagecache pages are stored in a per-mapping radix tree, which is
- * rooted at mapping->page_tree, and indexed by offset.
+ * rooted at mapping->i_pages, and indexed by offset.
  * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
  * lists, we instead now tag pages as dirty/writeback in the radix tree.
  *
@@ -1466,6 +1466,7 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned int offset,
 			      unsigned int length);
 
+void __set_page_dirty(struct page *, struct address_space *, int warn);
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
@@ -2108,6 +2109,7 @@ extern void setup_per_cpu_pageset(void);
 
 extern void zone_pcp_update(struct zone *zone);
 extern void zone_pcp_reset(struct zone *zone);
+extern void setup_zone_pageset(struct zone *zone);
 
 /* page_alloc.c */
 extern int min_free_kbytes;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f11ae29005f1..32699b2dc52a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -180,6 +180,7 @@ enum node_stat_item {
 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
+	NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
 	NR_VM_NODE_STAT_ITEMS
 };
 
@@ -884,7 +885,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
+extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
diff --git a/include/linux/nd.h b/include/linux/nd.h
index 5dc6b695437d..43c181a6add5 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -180,6 +180,12 @@ struct nd_region;
 void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event);
 int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
 		struct module *module, const char *mod_name);
+static inline void nd_driver_unregister(struct nd_device_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
 #define nd_driver_register(driver) \
 	__nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#define module_nd_driver(driver) \
+	module_driver(driver, nd_driver_register, nd_driver_unregister)
 #endif /* __LINUX_ND_H__ */
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index cdad58bbfd8b..4ae347cbc36d 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -63,7 +63,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 			bool skip_hwpoisoned_pages);
 
-struct page *alloc_migrate_target(struct page *page, unsigned long private,
-				int **resultp);
+struct page *alloc_migrate_target(struct page *page, unsigned long private);
 
 #endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 34ce3ebf97d5..b1bd2186e6d2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -144,7 +144,7 @@ void release_pages(struct page **pages, int nr);
  * 3. check the page is still in pagecache (if no, goto 1)
  *
  * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with tree_lock held for write):
+ * following (with the i_pages lock held):
  * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
  * B. remove page from pagecache
  * C. free the page
@@ -157,7 +157,7 @@ void release_pages(struct page **pages, int nr);
  *
  * It is possible that between 1 and 2, the page is removed then the exact same
  * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using tree_lock could equally have run before or after
+ * old find_get_page using a lock could equally have run before or after
  * such a re-insertion, depending on order that locks are granted.
  *
  * Lookups racing against pagecache insertion isn't a big problem: either 1
diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index fcdc707eab99..2744cff1b297 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -129,6 +129,8 @@ struct mlxreg_core_platform_data {
  * @mask: top aggregation interrupt common mask;
  * @cell_low: location of low aggregation interrupt register;
  * @mask_low: low aggregation interrupt common mask;
+ * @deferred_nr: I2C adapter number must be exist prior probing execution;
+ * @shift_nr: I2C adapter numbers must be incremented by this value;
  */
 struct mlxreg_core_hotplug_platform_data {
 	struct mlxreg_core_item *items;
@@ -139,6 +141,8 @@ struct mlxreg_core_hotplug_platform_data {
 	u32 mask;
 	u32 cell_low;
 	u32 mask_low;
+	int deferred_nr;
+	int shift_nr;
 };
 
 #endif /* __LINUX_PLATFORM_DATA_MLXREG_H */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index fc55ff31eca7..34149e8b5f73 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -104,25 +104,29 @@ struct radix_tree_node {
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
-#define ROOT_IS_IDR	((__force gfp_t)(1 << __GFP_BITS_SHIFT))
-#define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT + 1)
+/* The IDR tag is stored in the low bits of the GFP flags */
+#define ROOT_IS_IDR	((__force gfp_t)4)
+/* The top bits of gfp_mask are used to store the root tags */
+#define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT)
 
 struct radix_tree_root {
+	spinlock_t		xa_lock;
 	gfp_t			gfp_mask;
 	struct radix_tree_node	__rcu *rnode;
 };
 
-#define RADIX_TREE_INIT(mask)	{					\
+#define RADIX_TREE_INIT(name, mask)	{				\
+	.xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),			\
 	.gfp_mask = (mask),						\
 	.rnode = NULL,							\
 }
 
 #define RADIX_TREE(name, mask) \
-	struct radix_tree_root name = RADIX_TREE_INIT(mask)
+	struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
 #define INIT_RADIX_TREE(root, mask)					\
 do {									\
+	spin_lock_init(&(root)->xa_lock);				\
 	(root)->gfp_mask = (mask);					\
 	(root)->rnode = NULL;						\
 } while (0)
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 728d421fffe9..d09a9c7af109 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -344,7 +344,7 @@ struct rproc_ops {
 	int (*stop)(struct rproc *rproc);
 	void (*kick)(struct rproc *rproc, int vqid);
 	void * (*da_to_va)(struct rproc *rproc, u64 da, int len);
-	int (*load_rsc_table)(struct rproc *rproc, const struct firmware *fw);
+	int (*parse_fw)(struct rproc *rproc, const struct firmware *fw);
 	struct resource_table *(*find_loaded_rsc_table)(
 				struct rproc *rproc, const struct firmware *fw);
 	int (*load)(struct rproc *rproc, const struct firmware *fw);
@@ -395,6 +395,21 @@ enum rproc_crash_type {
 };
 
 /**
+ * struct rproc_dump_segment - segment info from ELF header
+ * @node:	list node related to the rproc segment list
+ * @da:		device address of the segment
+ * @size:	size of the segment
+ */
+struct rproc_dump_segment {
+	struct list_head node;
+
+	dma_addr_t da;
+	size_t size;
+
+	loff_t offset;
+};
+
+/**
  * struct rproc - represents a physical remote processor device
  * @node: list node of this rproc object
  * @domain: iommu domain
@@ -424,6 +439,7 @@ enum rproc_crash_type {
  * @cached_table: copy of the resource table
  * @table_sz: size of @cached_table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
+ * @dump_segments: list of segments in the firmware
  */
 struct rproc {
 	struct list_head node;
@@ -455,19 +471,21 @@ struct rproc {
 	size_t table_sz;
 	bool has_iommu;
 	bool auto_boot;
+	struct list_head dump_segments;
 };
 
 /**
  * struct rproc_subdev - subdevice tied to a remoteproc
  * @node: list node related to the rproc subdevs list
  * @probe: probe function, called as the rproc is started
- * @remove: remove function, called as the rproc is stopped
+ * @remove: remove function, called as the rproc is being stopped, the @crashed
+ *	    parameter indicates if this originates from the a recovery
  */
 struct rproc_subdev {
 	struct list_head node;
 
 	int (*probe)(struct rproc_subdev *subdev);
-	void (*remove)(struct rproc_subdev *subdev);
+	void (*remove)(struct rproc_subdev *subdev, bool crashed);
 };
 
 /* we currently support only two vrings per rvdev */
@@ -534,6 +552,7 @@ void rproc_free(struct rproc *rproc);
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
+int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size);
 
 static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
 {
@@ -550,7 +569,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
 void rproc_add_subdev(struct rproc *rproc,
 		      struct rproc_subdev *subdev,
 		      int (*probe)(struct rproc_subdev *subdev),
-		      void (*remove)(struct rproc_subdev *subdev));
+		      void (*remove)(struct rproc_subdev *subdev, bool graceful));
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 7d9eb39fa76a..a0233edc0718 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -34,10 +34,12 @@ struct ring_buffer_event {
  *				 array[0] = time delta (28 .. 59)
  *				 size = 8 bytes
  *
- * @RINGBUF_TYPE_TIME_STAMP:	Sync time stamp with external clock
- *				 array[0]    = tv_nsec
- *				 array[1..2] = tv_sec
- *				 size = 16 bytes
+ * @RINGBUF_TYPE_TIME_STAMP:	Absolute timestamp
+ *				 Same format as TIME_EXTEND except that the
+ *				 value is an absolute timestamp, not a delta
+ *				 event.time_delta contains bottom 27 bits
+ *				 array[0] = top (28 .. 59) bits
+ *				 size = 8 bytes
  *
  * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
  *				Data record
@@ -54,12 +56,12 @@ enum ring_buffer_type {
 	RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
 	RINGBUF_TYPE_PADDING,
 	RINGBUF_TYPE_TIME_EXTEND,
-	/* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
 	RINGBUF_TYPE_TIME_STAMP,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
 void *ring_buffer_event_data(struct ring_buffer_event *event);
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event);
 
 /*
  * ring_buffer_discard_commit will remove an event that has not
@@ -115,6 +117,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 int ring_buffer_write(struct ring_buffer *buffer,
 		      unsigned long length, void *data);
 
+void ring_buffer_nest_start(struct ring_buffer *buffer);
+void ring_buffer_nest_end(struct ring_buffer *buffer);
+
 struct ring_buffer_event *
 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
 		 unsigned long *lost_events);
@@ -178,6 +183,8 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
 				      int cpu, u64 *ts);
 void ring_buffer_set_clock(struct ring_buffer *buffer,
 			   u64 (*clock)(void));
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs);
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
 
 size_t ring_buffer_page_len(void *page);
 
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index fc6c90b57be0..4c007f69082f 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -145,12 +145,17 @@ struct rtc_device {
 
 	bool registered;
 
-	struct nvmem_config *nvmem_config;
 	struct nvmem_device *nvmem;
 	/* Old ABI support */
 	bool nvram_old_abi;
 	struct bin_attribute *nvram;
 
+	time64_t range_min;
+	timeu64_t range_max;
+	time64_t start_secs;
+	time64_t offset_secs;
+	bool set_start_time;
+
 #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL
 	struct work_struct uie_task;
 	struct timer_list uie_timer;
@@ -164,6 +169,11 @@ struct rtc_device {
 };
 #define to_rtc_device(d) container_of(d, struct rtc_device, dev)
 
+/* useful timestamps */
+#define RTC_TIMESTAMP_BEGIN_1900	-2208989361LL /* 1900-01-01 00:00:00 */
+#define RTC_TIMESTAMP_BEGIN_2000	946684800LL /* 2000-01-01 00:00:00 */
+#define RTC_TIMESTAMP_END_2099		4102444799LL /* 2099-12-31 23:59:59 */
+
 extern struct rtc_device *rtc_device_register(const char *name,
 					struct device *dev,
 					const struct rtc_class_ops *ops,
@@ -212,10 +222,6 @@ void rtc_aie_update_irq(void *private);
 void rtc_uie_update_irq(void *private);
 enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer);
 
-int rtc_register(rtc_task_t *task);
-int rtc_unregister(rtc_task_t *task);
-int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
-
 void rtc_timer_init(struct rtc_timer *timer, void (*f)(void *p), void *data);
 int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer,
 		    ktime_t expires, ktime_t period);
@@ -271,4 +277,17 @@ extern int rtc_hctosys_ret;
 #define rtc_hctosys_ret -ENODEV
 #endif
 
+#ifdef CONFIG_RTC_NVMEM
+int rtc_nvmem_register(struct rtc_device *rtc,
+		       struct nvmem_config *nvmem_config);
+void rtc_nvmem_unregister(struct rtc_device *rtc);
+#else
+static inline int rtc_nvmem_register(struct rtc_device *rtc,
+				     struct nvmem_config *nvmem_config)
+{
+	return -ENODEV;
+}
+static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
+#endif
+
 #endif /* _LINUX_RTC_H_ */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 9806184bb3d5..2c570cd934af 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -104,7 +104,8 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 #endif /* CONFIG_MEMCG */
 
 #ifdef CONFIG_MMU
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
+extern void arch_pick_mmap_layout(struct mm_struct *mm,
+				  struct rlimit *rlim_stack);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
@@ -113,7 +114,8 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long len, unsigned long pgoff,
 			  unsigned long flags);
 #else
-static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+static inline void arch_pick_mmap_layout(struct mm_struct *mm,
+					 struct rlimit *rlim_stack) {}
 #endif
 
 static inline bool in_vfork(struct task_struct *tsk)
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index ab437dd2e3b9..a121982af0f5 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,9 +118,14 @@ __printf(2, 3)
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 void seq_putc(struct seq_file *m, char c);
 void seq_puts(struct seq_file *m, const char *s);
+void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
+			       unsigned long long num, unsigned int width);
 void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
 			 unsigned long long num);
 void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num);
+void seq_put_hex_ll(struct seq_file *m, const char *delimiter,
+		    unsigned long long v, unsigned int width);
+
 void seq_escape(struct seq_file *m, const char *s, const char *esc);
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
@@ -235,4 +240,5 @@ extern struct hlist_node *seq_hlist_start_percpu(struct hlist_head __percpu *hea
 
 extern struct hlist_node *seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos);
 
+void seq_file_init(void);
 #endif
diff --git a/include/linux/soc/qcom/mdt_loader.h b/include/linux/soc/qcom/mdt_loader.h
index bd8e0864b059..5b98bbdabc25 100644
--- a/include/linux/soc/qcom/mdt_loader.h
+++ b/include/linux/soc/qcom/mdt_loader.h
@@ -14,6 +14,7 @@ struct firmware;
 ssize_t qcom_mdt_get_size(const struct firmware *fw);
 int qcom_mdt_load(struct device *dev, const struct firmware *fw,
 		  const char *fw_name, int pas_id, void *mem_region,
-		  phys_addr_t mem_phys, size_t mem_size);
+		  phys_addr_t mem_phys, size_t mem_size,
+		  phys_addr_t *reloc_base);
 
 #endif
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7f8c9a127f5a..55388ab45fd4 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -115,27 +115,46 @@ enum tick_dep_bits {
 extern bool tick_nohz_enabled;
 extern bool tick_nohz_tick_stopped(void);
 extern bool tick_nohz_tick_stopped_cpu(int cpu);
+extern void tick_nohz_idle_stop_tick(void);
+extern void tick_nohz_idle_retain_tick(void);
+extern void tick_nohz_idle_restart_tick(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
-extern ktime_t tick_nohz_get_sleep_length(void);
+extern bool tick_nohz_idle_got_tick(void);
+extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
 extern unsigned long tick_nohz_get_idle_calls(void);
 extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
+
+static inline void tick_nohz_idle_stop_tick_protected(void)
+{
+	local_irq_disable();
+	tick_nohz_idle_stop_tick();
+	local_irq_enable();
+}
+
 #else /* !CONFIG_NO_HZ_COMMON */
 #define tick_nohz_enabled (0)
 static inline int tick_nohz_tick_stopped(void) { return 0; }
 static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
+static inline void tick_nohz_idle_stop_tick(void) { }
+static inline void tick_nohz_idle_retain_tick(void) { }
+static inline void tick_nohz_idle_restart_tick(void) { }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
+static inline bool tick_nohz_idle_got_tick(void) { return false; }
 
-static inline ktime_t tick_nohz_get_sleep_length(void)
+static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 {
-	return NSEC_PER_SEC / HZ;
+	*delta_next = TICK_NSEC;
+	return *delta_next;
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
+
+static inline void tick_nohz_idle_stop_tick_protected(void) { }
 #endif /* !CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 82c219dfd3bb..9737fbec7019 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -31,6 +31,7 @@ struct timespec64 get_monotonic_coarse64(void);
 extern void getrawmonotonic64(struct timespec64 *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 extern time64_t ktime_get_seconds(void);
+extern time64_t __ktime_get_real_seconds(void);
 extern time64_t ktime_get_real_seconds(void);
 extern void ktime_get_active_ts64(struct timespec64 *ts);
 
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index e0e98000b665..2bde3eff564c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -430,11 +430,13 @@ enum event_trigger_type {
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
 
-extern enum event_trigger_type event_triggers_call(struct trace_event_file *file,
-						   void *rec);
-extern void event_triggers_post_call(struct trace_event_file *file,
-				     enum event_trigger_type tt,
-				     void *rec);
+extern enum event_trigger_type
+event_triggers_call(struct trace_event_file *file, void *rec,
+		    struct ring_buffer_event *event);
+extern void
+event_triggers_post_call(struct trace_event_file *file,
+			 enum event_trigger_type tt,
+			 void *rec, struct ring_buffer_event *event);
 
 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file);
 
@@ -454,7 +456,7 @@ trace_trigger_soft_disabled(struct trace_event_file *file)
 
 	if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) {
 		if (eflags & EVENT_FILE_FL_TRIGGER_MODE)
-			event_triggers_call(file, NULL);
+			event_triggers_call(file, NULL, NULL);
 		if (eflags & EVENT_FILE_FL_SOFT_DISABLED)
 			return true;
 		if (eflags & EVENT_FILE_FL_PID_FILTER)
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index c8060c2ecd04..44429d9142ca 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -44,6 +44,8 @@ static inline void put_uts_ns(struct uts_namespace *ns)
 {
 	kref_put(&ns->kref, free_uts_ns);
 }
+
+void uts_ns_init(void);
 #else
 static inline void get_uts_ns(struct uts_namespace *ns)
 {
@@ -61,6 +63,10 @@ static inline struct uts_namespace *copy_utsname(unsigned long flags,
 
 	return old_ns;
 }
+
+static inline void uts_ns_init(void)
+{
+}
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index a4c2317d8b9f..f25cef84b41d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -20,6 +20,17 @@ extern int sysctl_vm_numa_stat_handler(struct ctl_table *table,
 		int write, void __user *buffer, size_t *length, loff_t *ppos);
 #endif
 
+struct reclaim_stat {
+	unsigned nr_dirty;
+	unsigned nr_unqueued_dirty;
+	unsigned nr_congested;
+	unsigned nr_writeback;
+	unsigned nr_immediate;
+	unsigned nr_activate;
+	unsigned nr_ref_keep;
+	unsigned nr_unmap_fail;
+};
+
 #ifdef CONFIG_VM_EVENT_COUNTERS
 /*
  * Light weight per cpu counter implementation.
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
new file mode 100644
index 000000000000..2dfc8006fe64
--- /dev/null
+++ b/include/linux/xarray.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _LINUX_XARRAY_H
+#define _LINUX_XARRAY_H
+/*
+ * eXtensible Arrays
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ */
+
+#include <linux/spinlock.h>
+
+#define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
+#define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
+#define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
+#define xa_lock_bh(xa)		spin_lock_bh(&(xa)->xa_lock)
+#define xa_unlock_bh(xa)	spin_unlock_bh(&(xa)->xa_lock)
+#define xa_lock_irq(xa)		spin_lock_irq(&(xa)->xa_lock)
+#define xa_unlock_irq(xa)	spin_unlock_irq(&(xa)->xa_lock)
+#define xa_lock_irqsave(xa, flags) \
+				spin_lock_irqsave(&(xa)->xa_lock, flags)
+#define xa_unlock_irqrestore(xa, flags) \
+				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
+
+#endif /* _LINUX_XARRAY_H */
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 54b689247937..160bca96d524 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -320,6 +320,7 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
  *	set of resolutions contained in an array of a driver specific struct.
  *
  * @array: a driver specific array of image sizes
+ * @array_size: the length of the driver specific array of image sizes
  * @width_field: the name of the width field in the driver specific struct
  * @height_field: the name of the height field in the driver specific struct
  * @width: desired width.
@@ -332,13 +333,13 @@ void v4l_bound_align_image(unsigned int *width, unsigned int wmin,
  *
  * Returns the best match or NULL if the length of the array is zero.
  */
-#define v4l2_find_nearest_size(array, width_field, height_field, \
+#define v4l2_find_nearest_size(array, array_size, width_field, height_field, \
 			       width, height)				\
 	({								\
 		BUILD_BUG_ON(sizeof((array)->width_field) != sizeof(u32) || \
 			     sizeof((array)->height_field) != sizeof(u32)); \
 		(typeof(&(*(array))))__v4l2_find_nearest_size(		\
-			(array), ARRAY_SIZE(array), sizeof(*(array)),	\
+			(array), array_size, sizeof(*(array)),		\
 			offsetof(typeof(*(array)), width_field),	\
 			offsetof(typeof(*(array)), height_field),	\
 			width, height);					\
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 27634e8d2585..f60cf9cf3b9c 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -33,13 +33,13 @@
  */
 enum vfl_devnode_type {
 	VFL_TYPE_GRABBER	= 0,
-	VFL_TYPE_VBI		= 1,
-	VFL_TYPE_RADIO		= 2,
-	VFL_TYPE_SUBDEV		= 3,
-	VFL_TYPE_SDR		= 4,
-	VFL_TYPE_TOUCH		= 5,
+	VFL_TYPE_VBI,
+	VFL_TYPE_RADIO,
+	VFL_TYPE_SUBDEV,
+	VFL_TYPE_SDR,
+	VFL_TYPE_TOUCH,
+	VFL_TYPE_MAX /* Shall be the last one */
 };
-#define VFL_TYPE_MAX VFL_TYPE_TOUCH
 
 /**
  * enum  vfl_direction - Identifies if a &struct video_device corresponds
diff --git a/include/trace/events/initcall.h b/include/trace/events/initcall.h
new file mode 100644
index 000000000000..8d6cf10d27c9
--- /dev/null
+++ b/include/trace/events/initcall.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM initcall
+
+#if !defined(_TRACE_INITCALL_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_INITCALL_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(initcall_level,
+
+	TP_PROTO(const char *level),
+
+	TP_ARGS(level),
+
+	TP_STRUCT__entry(
+		__string(level, level)
+	),
+
+	TP_fast_assign(
+		__assign_str(level, level);
+	),
+
+	TP_printk("level=%s", __get_str(level))
+);
+
+TRACE_EVENT(initcall_start,
+
+	TP_PROTO(initcall_t func),
+
+	TP_ARGS(func),
+
+	TP_STRUCT__entry(
+		__field(initcall_t, func)
+	),
+
+	TP_fast_assign(
+		__entry->func = func;
+	),
+
+	TP_printk("func=%pS", __entry->func)
+);
+
+TRACE_EVENT(initcall_finish,
+
+	TP_PROTO(initcall_t func, int ret),
+
+	TP_ARGS(func, ret),
+
+	TP_STRUCT__entry(
+		__field(initcall_t,	func)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->func = func;
+		__entry->ret = ret;
+	),
+
+	TP_printk("func=%pS ret=%d", __entry->func, __entry->ret)
+);
+
+#endif /* if !defined(_TRACE_GPIO_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rtc.h b/include/trace/events/rtc.h
new file mode 100644
index 000000000000..621333f1c890
--- /dev/null
+++ b/include/trace/events/rtc.h
@@ -0,0 +1,206 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rtc
+
+#if !defined(_TRACE_RTC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RTC_H
+
+#include <linux/rtc.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(rtc_time_alarm_class,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err),
+
+	TP_STRUCT__entry(
+		__field(time64_t, secs)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->secs = secs;
+		__entry->err = err;
+	),
+
+	TP_printk("UTC (%lld) (%d)",
+		  __entry->secs, __entry->err
+	)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_set_time,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_read_time,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_set_alarm,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+DEFINE_EVENT(rtc_time_alarm_class, rtc_read_alarm,
+
+	TP_PROTO(time64_t secs, int err),
+
+	TP_ARGS(secs, err)
+);
+
+TRACE_EVENT(rtc_irq_set_freq,
+
+	TP_PROTO(int freq, int err),
+
+	TP_ARGS(freq, err),
+
+	TP_STRUCT__entry(
+		__field(int, freq)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->freq = freq;
+		__entry->err = err;
+	),
+
+	TP_printk("set RTC periodic IRQ frequency:%u (%d)",
+		  __entry->freq, __entry->err
+	)
+);
+
+TRACE_EVENT(rtc_irq_set_state,
+
+	TP_PROTO(int enabled, int err),
+
+	TP_ARGS(enabled, err),
+
+	TP_STRUCT__entry(
+		__field(int, enabled)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->enabled = enabled;
+		__entry->err = err;
+	),
+
+	TP_printk("%s RTC 2^N Hz periodic IRQs (%d)",
+		  __entry->enabled ? "enable" : "disable",
+		  __entry->err
+	)
+);
+
+TRACE_EVENT(rtc_alarm_irq_enable,
+
+	TP_PROTO(unsigned int enabled, int err),
+
+	TP_ARGS(enabled, err),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, enabled)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->enabled = enabled;
+		__entry->err = err;
+	),
+
+	TP_printk("%s RTC alarm IRQ (%d)",
+		  __entry->enabled ? "enable" : "disable",
+		  __entry->err
+	)
+);
+
+DECLARE_EVENT_CLASS(rtc_offset_class,
+
+	TP_PROTO(long offset, int err),
+
+	TP_ARGS(offset, err),
+
+	TP_STRUCT__entry(
+		__field(long, offset)
+		__field(int, err)
+	),
+
+	TP_fast_assign(
+		__entry->offset = offset;
+		__entry->err = err;
+	),
+
+	TP_printk("RTC offset: %ld (%d)",
+		  __entry->offset, __entry->err
+	)
+);
+
+DEFINE_EVENT(rtc_offset_class, rtc_set_offset,
+
+	TP_PROTO(long offset, int err),
+
+	TP_ARGS(offset, err)
+);
+
+DEFINE_EVENT(rtc_offset_class, rtc_read_offset,
+
+	TP_PROTO(long offset, int err),
+
+	TP_ARGS(offset, err)
+);
+
+DECLARE_EVENT_CLASS(rtc_timer_class,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field(struct rtc_timer *, timer)
+		__field(ktime_t, expires)
+		__field(ktime_t, period)
+	),
+
+	TP_fast_assign(
+		__entry->timer = timer;
+		__entry->expires = timer->node.expires;
+		__entry->period = timer->period;
+	),
+
+	TP_printk("RTC timer:(%p) expires:%lld period:%lld",
+		  __entry->timer, __entry->expires, __entry->period
+	)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_enqueue,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_dequeue,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer)
+);
+
+DEFINE_EVENT(rtc_timer_class, rtc_timer_fired,
+
+	TP_PROTO(struct rtc_timer *timer),
+
+	TP_ARGS(timer)
+);
+
+#endif /* _TRACE_RTC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 6570c5b45ba1..a1cb91342231 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -346,15 +346,9 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 
 	TP_PROTO(int nid,
 		unsigned long nr_scanned, unsigned long nr_reclaimed,
-		unsigned long nr_dirty, unsigned long nr_writeback,
-		unsigned long nr_congested, unsigned long nr_immediate,
-		unsigned long nr_activate, unsigned long nr_ref_keep,
-		unsigned long nr_unmap_fail,
-		int priority, int file),
+		struct reclaim_stat *stat, int priority, int file),
 
-	TP_ARGS(nid, nr_scanned, nr_reclaimed, nr_dirty, nr_writeback,
-		nr_congested, nr_immediate, nr_activate, nr_ref_keep,
-		nr_unmap_fail, priority, file),
+	TP_ARGS(nid, nr_scanned, nr_reclaimed, stat, priority, file),
 
 	TP_STRUCT__entry(
 		__field(int, nid)
@@ -375,13 +369,13 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
 		__entry->nid = nid;
 		__entry->nr_scanned = nr_scanned;
 		__entry->nr_reclaimed = nr_reclaimed;
-		__entry->nr_dirty = nr_dirty;
-		__entry->nr_writeback = nr_writeback;
-		__entry->nr_congested = nr_congested;
-		__entry->nr_immediate = nr_immediate;
-		__entry->nr_activate = nr_activate;
-		__entry->nr_ref_keep = nr_ref_keep;
-		__entry->nr_unmap_fail = nr_unmap_fail;
+		__entry->nr_dirty = stat->nr_dirty;
+		__entry->nr_writeback = stat->nr_writeback;
+		__entry->nr_congested = stat->nr_congested;
+		__entry->nr_immediate = stat->nr_immediate;
+		__entry->nr_activate = stat->nr_activate;
+		__entry->nr_ref_keep = stat->nr_ref_keep;
+		__entry->nr_unmap_fail = stat->nr_unmap_fail;
 		__entry->priority = priority;
 		__entry->reclaim_flags = trace_shrink_flags(file);
 	),
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index f8b134f5608f..e7ee32861d51 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -27,6 +27,9 @@
 # define MAP_UNINITIALIZED 0x0		/* Don't support this flag */
 #endif
 
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE	0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
+
 /*
  * Flags for mlock
  */
diff --git a/include/uapi/linux/const.h b/include/uapi/linux/const.h
index 92537757590a..5ed721ad5b19 100644
--- a/include/uapi/linux/const.h
+++ b/include/uapi/linux/const.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* const.h: Macros for dealing with constants.  */
 
-#ifndef _LINUX_CONST_H
-#define _LINUX_CONST_H
+#ifndef _UAPI_LINUX_CONST_H
+#define _UAPI_LINUX_CONST_H
 
 /* Some constant macros are used in both assembler and
  * C code.  Therefore we cannot annotate them always with
@@ -22,7 +22,10 @@
 #define _AT(T,X)	((T)(X))
 #endif
 
-#define _BITUL(x)	(_AC(1,UL) << (x))
-#define _BITULL(x)	(_AC(1,ULL) << (x))
+#define _UL(x)		(_AC(x, UL))
+#define _ULL(x)		(_AC(x, ULL))
 
-#endif /* !(_LINUX_CONST_H) */
+#define _BITUL(x)	(_UL(1) << (x))
+#define _BITULL(x)	(_ULL(1) << (x))
+
+#endif /* _UAPI_LINUX_CONST_H */
diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h
index 5d5ab81dc9be..e4a0d9a9a9e8 100644
--- a/include/uapi/linux/msg.h
+++ b/include/uapi/linux/msg.h
@@ -7,6 +7,7 @@
 /* ipcs ctl commands */
 #define MSG_STAT 11
 #define MSG_INFO 12
+#define MSG_STAT_ANY 13
 
 /* msgrcv options */
 #define MSG_NOERROR     010000  /* no error if message is too big */
diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h
index 9c3e745b0656..39a1876f039e 100644
--- a/include/uapi/linux/sem.h
+++ b/include/uapi/linux/sem.h
@@ -19,6 +19,7 @@
 /* ipcs ctl cmds */
 #define SEM_STAT 18
 #define SEM_INFO 19
+#define SEM_STAT_ANY 20
 
 /* Obsolete, used only for backwards compatibility and libc5 compiles */
 struct semid_ds {
diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index 4de12a39b075..dde1344f047c 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -83,8 +83,9 @@ struct shmid_ds {
 #define SHM_UNLOCK 	12
 
 /* ipcs ctl commands */
-#define SHM_STAT 	13
-#define SHM_INFO 	14
+#define SHM_STAT	13
+#define SHM_INFO	14
+#define SHM_STAT_ANY    15
 
 /* Obsolete, used only for backwards compatibility */
 struct	shminfo {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 4e8b8304b793..40297a3181ed 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -53,7 +53,9 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
 #define VIRTIO_BALLOON_S_AVAIL    6   /* Available memory as in /proc */
 #define VIRTIO_BALLOON_S_CACHES   7   /* Disk caches */
-#define VIRTIO_BALLOON_S_NR       8
+#define VIRTIO_BALLOON_S_HTLB_PGALLOC  8  /* Hugetlb page allocations */
+#define VIRTIO_BALLOON_S_HTLB_PGFAIL   9  /* Hugetlb page allocation failures */
+#define VIRTIO_BALLOON_S_NR       10
 
 /*
  * Memory statistics structure.
diff --git a/include/video/of_display_timing.h b/include/video/of_display_timing.h
index 956455fc9f9a..bb29e5954000 100644
--- a/include/video/of_display_timing.h
+++ b/include/video/of_display_timing.h
@@ -19,7 +19,6 @@ struct display_timings;
 int of_get_display_timing(const struct device_node *np, const char *name,
 		struct display_timing *dt);
 struct display_timings *of_get_display_timings(const struct device_node *np);
-int of_display_timings_exist(const struct device_node *np);
 #else
 static inline int of_get_display_timing(const struct device_node *np,
 		const char *name, struct display_timing *dt)
@@ -31,10 +30,6 @@ of_get_display_timings(const struct device_node *np)
 {
 	return NULL;
 }
-static inline int of_display_timings_exist(const struct device_node *np)
-{
-	return -ENOSYS;
-}
 #endif
 
 #endif
diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h
index 9b0eb574f0d1..6d1384abfbdf 100644
--- a/include/xen/interface/features.h
+++ b/include/xen/interface/features.h
@@ -42,6 +42,9 @@
 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
 #define XENFEAT_mmu_pt_update_preserve_ad  5
 
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist             6
+
 /*
  * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
  * available pte bits.
@@ -60,6 +63,26 @@
 /* operation as Dom0 is supported */
 #define XENFEAT_dom0                      11
 
+/* Xen also maps grant references at pfn = mfn.
+ * This feature flag is deprecated and should not be used.
+#define XENFEAT_grant_map_identity        12
+ */
+
+/* Guest can use XENMEMF_vnode to specify virtual node for memory op. */
+#define XENFEAT_memory_op_vnode_supported 13
+
+/* arm: Hypervisor supports ARM SMC calling convention. */
+#define XENFEAT_ARM_SMCCC_supported       14
+
+/*
+ * x86/PVH: If set, ACPI RSDP can be placed at any address. Otherwise RSDP
+ * must be located in lower 1MB, as required by ACPI Specification for IA-PC
+ * systems.
+ * This feature flag is only consulted if XEN_ELFNOTE_GUEST_OS contains
+ * the "linux" string.
+ */
+#define XENFEAT_linux_rsdp_unrestricted   15
+
 #define XENFEAT_NR_SUBMAPS 1
 
 #endif /* __XEN_PUBLIC_FEATURES_H__ */
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 12c159824c7b..035a5f0ab26b 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -255,7 +255,7 @@ int __init rd_load_image(char *from)
 		nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
 	for (i = 0, disk = 1; i < nblocks; i++) {
 		if (i && (i % devblocks == 0)) {
-			printk("done disk #%d.\n", disk++);
+			pr_cont("done disk #%d.\n", disk++);
 			rotate = 0;
 			if (ksys_close(in_fd)) {
 				printk("Error closing the disk.\n");
@@ -278,7 +278,7 @@ int __init rd_load_image(char *from)
 		}
 #endif
 	}
-	printk("done.\n");
+	pr_cont("done.\n");
 
 successful_load:
 	res = 1;
diff --git a/init/main.c b/init/main.c
index e4a3160991ea..b795aa341a3a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -51,6 +51,7 @@
 #include <linux/taskstats_kern.h>
 #include <linux/delayacct.h>
 #include <linux/unistd.h>
+#include <linux/utsname.h>
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
@@ -97,6 +98,9 @@
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/initcall.h>
+
 static int kernel_init(void *);
 
 extern void init_IRQ(void);
@@ -491,6 +495,17 @@ void __init __weak thread_stack_cache_init(void)
 
 void __init __weak mem_encrypt_init(void) { }
 
+bool initcall_debug;
+core_param(initcall_debug, initcall_debug, bool, 0644);
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void);
+#else
+static inline void initcall_debug_enable(void)
+{
+}
+#endif
+
 /*
  * Set up kernel memory allocators
  */
@@ -612,6 +627,9 @@ asmlinkage __visible void __init start_kernel(void)
 	/* Trace events are available after this */
 	trace_init();
 
+	if (initcall_debug)
+		initcall_debug_enable();
+
 	context_tracking_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
@@ -689,6 +707,7 @@ asmlinkage __visible void __init start_kernel(void)
 	cred_init();
 	fork_init();
 	proc_caches_init();
+	uts_ns_init();
 	buffer_init();
 	key_init();
 	security_init();
@@ -696,6 +715,7 @@ asmlinkage __visible void __init start_kernel(void)
 	vfs_caches_init();
 	pagecache_init();
 	signals_init();
+	seq_file_init();
 	proc_root_init();
 	nsfs_init();
 	cpuset_init();
@@ -728,9 +748,6 @@ static void __init do_ctors(void)
 #endif
 }
 
-bool initcall_debug;
-core_param(initcall_debug, initcall_debug, bool, 0644);
-
 #ifdef CONFIG_KALLSYMS
 struct blacklist_entry {
 	struct list_head next;
@@ -800,37 +817,71 @@ static bool __init_or_module initcall_blacklisted(initcall_t fn)
 #endif
 __setup("initcall_blacklist=", initcall_blacklist);
 
-static int __init_or_module do_one_initcall_debug(initcall_t fn)
+static __init_or_module void
+trace_initcall_start_cb(void *data, initcall_t fn)
 {
-	ktime_t calltime, delta, rettime;
-	unsigned long long duration;
-	int ret;
+	ktime_t *calltime = (ktime_t *)data;
 
 	printk(KERN_DEBUG "calling  %pF @ %i\n", fn, task_pid_nr(current));
-	calltime = ktime_get();
-	ret = fn();
+	*calltime = ktime_get();
+}
+
+static __init_or_module void
+trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
+{
+	ktime_t *calltime = (ktime_t *)data;
+	ktime_t delta, rettime;
+	unsigned long long duration;
+
 	rettime = ktime_get();
-	delta = ktime_sub(rettime, calltime);
+	delta = ktime_sub(rettime, *calltime);
 	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 	printk(KERN_DEBUG "initcall %pF returned %d after %lld usecs\n",
 		 fn, ret, duration);
+}
 
-	return ret;
+static ktime_t initcall_calltime;
+
+#ifdef TRACEPOINTS_ENABLED
+static void __init initcall_debug_enable(void)
+{
+	int ret;
+
+	ret = register_trace_initcall_start(trace_initcall_start_cb,
+					    &initcall_calltime);
+	ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
+					      &initcall_calltime);
+	WARN(ret, "Failed to register initcall tracepoints\n");
 }
+# define do_trace_initcall_start	trace_initcall_start
+# define do_trace_initcall_finish	trace_initcall_finish
+#else
+static inline void do_trace_initcall_start(initcall_t fn)
+{
+	if (!initcall_debug)
+		return;
+	trace_initcall_start_cb(&initcall_calltime, fn);
+}
+static inline void do_trace_initcall_finish(initcall_t fn, int ret)
+{
+	if (!initcall_debug)
+		return;
+	trace_initcall_finish_cb(&initcall_calltime, fn, ret);
+}
+#endif /* !TRACEPOINTS_ENABLED */
 
 int __init_or_module do_one_initcall(initcall_t fn)
 {
 	int count = preempt_count();
-	int ret;
 	char msgbuf[64];
+	int ret;
 
 	if (initcall_blacklisted(fn))
 		return -EPERM;
 
-	if (initcall_debug)
-		ret = do_one_initcall_debug(fn);
-	else
-		ret = fn();
+	do_trace_initcall_start(fn);
+	ret = fn();
+	do_trace_initcall_finish(fn, ret);
 
 	msgbuf[0] = 0;
 
@@ -874,7 +925,7 @@ static initcall_t *initcall_levels[] __initdata = {
 
 /* Keep these in sync with initcalls in include/linux/init.h */
 static char *initcall_level_names[] __initdata = {
-	"early",
+	"pure",
 	"core",
 	"postcore",
 	"arch",
@@ -895,6 +946,7 @@ static void __init do_initcall_level(int level)
 		   level, level,
 		   NULL, &repair_env_string);
 
+	trace_initcall_level(initcall_level_names[level]);
 	for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
 		do_one_initcall(*fn);
 }
@@ -929,6 +981,7 @@ static void __init do_pre_smp_initcalls(void)
 {
 	initcall_t *fn;
 
+	trace_initcall_level("early");
 	for (fn = __initcall_start; fn < __initcall0_start; fn++)
 		do_one_initcall(*fn);
 }
diff --git a/ipc/msg.c b/ipc/msg.c
index 114a21189613..56fd1c73eedc 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -497,14 +497,14 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
 	memset(p, 0, sizeof(*p));
 
 	rcu_read_lock();
-	if (cmd == MSG_STAT) {
+	if (cmd == MSG_STAT || cmd == MSG_STAT_ANY) {
 		msq = msq_obtain_object(ns, msqid);
 		if (IS_ERR(msq)) {
 			err = PTR_ERR(msq);
 			goto out_unlock;
 		}
 		id = msq->q_perm.id;
-	} else {
+	} else { /* IPC_STAT */
 		msq = msq_obtain_object_check(ns, msqid);
 		if (IS_ERR(msq)) {
 			err = PTR_ERR(msq);
@@ -512,9 +512,14 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
 		}
 	}
 
-	err = -EACCES;
-	if (ipcperms(ns, &msq->q_perm, S_IRUGO))
-		goto out_unlock;
+	/* see comment for SHM_STAT_ANY */
+	if (cmd == MSG_STAT_ANY)
+		audit_ipc_obj(&msq->q_perm);
+	else {
+		err = -EACCES;
+		if (ipcperms(ns, &msq->q_perm, S_IRUGO))
+			goto out_unlock;
+	}
 
 	err = security_msg_queue_msgctl(&msq->q_perm, cmd);
 	if (err)
@@ -572,6 +577,7 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 		return err;
 	}
 	case MSG_STAT:	/* msqid is an index rather than a msg queue id */
+	case MSG_STAT_ANY:
 	case IPC_STAT:
 		err = msgctl_stat(ns, msqid, cmd, &msqid64);
 		if (err < 0)
@@ -690,6 +696,7 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
 	}
 	case IPC_STAT:
 	case MSG_STAT:
+	case MSG_STAT_ANY:
 		err = msgctl_stat(ns, msqid, cmd, &msqid64);
 		if (err < 0)
 			return err;
diff --git a/ipc/sem.c b/ipc/sem.c
index 2994da8ccc7f..06be75d9217a 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1220,14 +1220,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
 	memset(semid64, 0, sizeof(*semid64));
 
 	rcu_read_lock();
-	if (cmd == SEM_STAT) {
+	if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) {
 		sma = sem_obtain_object(ns, semid);
 		if (IS_ERR(sma)) {
 			err = PTR_ERR(sma);
 			goto out_unlock;
 		}
 		id = sma->sem_perm.id;
-	} else {
+	} else { /* IPC_STAT */
 		sma = sem_obtain_object_check(ns, semid);
 		if (IS_ERR(sma)) {
 			err = PTR_ERR(sma);
@@ -1235,9 +1235,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
 		}
 	}
 
-	err = -EACCES;
-	if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
-		goto out_unlock;
+	/* see comment for SHM_STAT_ANY */
+	if (cmd == SEM_STAT_ANY)
+		audit_ipc_obj(&sma->sem_perm);
+	else {
+		err = -EACCES;
+		if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
+			goto out_unlock;
+	}
 
 	err = security_sem_semctl(&sma->sem_perm, cmd);
 	if (err)
@@ -1626,6 +1631,7 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
 		return semctl_info(ns, semid, cmd, p);
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		err = semctl_stat(ns, semid, cmd, &semid64);
 		if (err < 0)
 			return err;
@@ -1732,6 +1738,7 @@ long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
 		return semctl_info(ns, semid, cmd, p);
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		err = semctl_stat(ns, semid, cmd, &semid64);
 		if (err < 0)
 			return err;
diff --git a/ipc/shm.c b/ipc/shm.c
index acefe44fefef..5639345dbec9 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -415,7 +415,7 @@ static int shm_split(struct vm_area_struct *vma, unsigned long addr)
 	struct file *file = vma->vm_file;
 	struct shm_file_data *sfd = shm_file_data(file);
 
-	if (sfd->vm_ops && sfd->vm_ops->split)
+	if (sfd->vm_ops->split)
 		return sfd->vm_ops->split(vma, addr);
 
 	return 0;
@@ -947,14 +947,14 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
 	memset(tbuf, 0, sizeof(*tbuf));
 
 	rcu_read_lock();
-	if (cmd == SHM_STAT) {
+	if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
 		shp = shm_obtain_object(ns, shmid);
 		if (IS_ERR(shp)) {
 			err = PTR_ERR(shp);
 			goto out_unlock;
 		}
 		id = shp->shm_perm.id;
-	} else {
+	} else { /* IPC_STAT */
 		shp = shm_obtain_object_check(ns, shmid);
 		if (IS_ERR(shp)) {
 			err = PTR_ERR(shp);
@@ -962,9 +962,20 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
 		}
 	}
 
-	err = -EACCES;
-	if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
-		goto out_unlock;
+	/*
+	 * Semantically SHM_STAT_ANY ought to be identical to
+	 * that functionality provided by the /proc/sysvipc/
+	 * interface. As such, only audit these calls and
+	 * do not do traditional S_IRUGO permission checks on
+	 * the ipc object.
+	 */
+	if (cmd == SHM_STAT_ANY)
+		audit_ipc_obj(&shp->shm_perm);
+	else {
+		err = -EACCES;
+		if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
+			goto out_unlock;
+	}
 
 	err = security_shm_shmctl(&shp->shm_perm, cmd);
 	if (err)
@@ -1104,6 +1115,7 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 		return err;
 	}
 	case SHM_STAT:
+	case SHM_STAT_ANY:
 	case IPC_STAT: {
 		err = shmctl_stat(ns, shmid, cmd, &sem64);
 		if (err < 0)
@@ -1282,6 +1294,7 @@ long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
 		return err;
 	}
 	case IPC_STAT:
+	case SHM_STAT_ANY:
 	case SHM_STAT:
 		err = shmctl_stat(ns, shmid, cmd, &sem64);
 		if (err < 0)
diff --git a/ipc/util.c b/ipc/util.c
index 3783b7991cc7..4e81182fa0ac 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -89,6 +89,7 @@ static int __init ipc_init(void)
 {
 	int err_sem, err_msg;
 
+	proc_mkdir("sysvipc", NULL);
 	err_sem = sem_init();
 	WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
 	err_msg = msg_init();
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index 90ff129c88a2..62c301ad0773 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -242,11 +242,11 @@ static void kdb_printbp(kdb_bp_t *bp, int i)
 	kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT);
 
 	if (bp->bp_enabled)
-		kdb_printf("\n    is enabled");
+		kdb_printf("\n    is enabled ");
 	else
 		kdb_printf("\n    is disabled");
 
-	kdb_printf("\taddr at %016lx, hardtype=%d installed=%d\n",
+	kdb_printf("  addr at %016lx, hardtype=%d installed=%d\n",
 		   bp->bp_addr, bp->bp_type, bp->bp_installed);
 
 	kdb_printf("\n");
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index dbb0781a0533..e405677ee08d 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1150,6 +1150,16 @@ void kdb_set_current_task(struct task_struct *p)
 	kdb_current_regs = NULL;
 }
 
+static void drop_newline(char *buf)
+{
+	size_t len = strlen(buf);
+
+	if (len == 0)
+		return;
+	if (*(buf + len - 1) == '\n')
+		*(buf + len - 1) = '\0';
+}
+
 /*
  * kdb_local - The main code for kdb.  This routine is invoked on a
  *	specific processor, it is not global.  The main kdb() routine
@@ -1327,6 +1337,7 @@ do_full_getstr:
 		cmdptr = cmd_head;
 		diag = kdb_parse(cmdbuf);
 		if (diag == KDB_NOTFOUND) {
+			drop_newline(cmdbuf);
 			kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
 			diag = 0;
 		}
@@ -1566,6 +1577,7 @@ static int kdb_md(int argc, const char **argv)
 	int symbolic = 0;
 	int valid = 0;
 	int phys = 0;
+	int raw = 0;
 
 	kdbgetintenv("MDCOUNT", &mdcount);
 	kdbgetintenv("RADIX", &radix);
@@ -1575,9 +1587,10 @@ static int kdb_md(int argc, const char **argv)
 	repeat = mdcount * 16 / bytesperword;
 
 	if (strcmp(argv[0], "mdr") == 0) {
-		if (argc != 2)
+		if (argc == 2 || (argc == 0 && last_addr != 0))
+			valid = raw = 1;
+		else
 			return KDB_ARGCOUNT;
-		valid = 1;
 	} else if (isdigit(argv[0][2])) {
 		bytesperword = (int)(argv[0][2] - '0');
 		if (bytesperword == 0) {
@@ -1613,7 +1626,10 @@ static int kdb_md(int argc, const char **argv)
 		radix = last_radix;
 		bytesperword = last_bytesperword;
 		repeat = last_repeat;
-		mdcount = ((repeat * bytesperword) + 15) / 16;
+		if (raw)
+			mdcount = repeat;
+		else
+			mdcount = ((repeat * bytesperword) + 15) / 16;
 	}
 
 	if (argc) {
@@ -1630,7 +1646,10 @@ static int kdb_md(int argc, const char **argv)
 			diag = kdbgetularg(argv[nextarg], &val);
 			if (!diag) {
 				mdcount = (int) val;
-				repeat = mdcount * 16 / bytesperword;
+				if (raw)
+					repeat = mdcount;
+				else
+					repeat = mdcount * 16 / bytesperword;
 			}
 		}
 		if (argc >= nextarg+1) {
@@ -1640,8 +1659,15 @@ static int kdb_md(int argc, const char **argv)
 		}
 	}
 
-	if (strcmp(argv[0], "mdr") == 0)
-		return kdb_mdr(addr, mdcount);
+	if (strcmp(argv[0], "mdr") == 0) {
+		int ret;
+		last_addr = addr;
+		ret = kdb_mdr(addr, mdcount);
+		last_addr += mdcount;
+		last_repeat = mdcount;
+		last_bytesperword = bytesperword; // to make REPEAT happy
+		return ret;
+	}
 
 	switch (radix) {
 	case 10:
@@ -2473,41 +2499,6 @@ static int kdb_kill(int argc, const char **argv)
 	return 0;
 }
 
-struct kdb_tm {
-	int tm_sec;	/* seconds */
-	int tm_min;	/* minutes */
-	int tm_hour;	/* hours */
-	int tm_mday;	/* day of the month */
-	int tm_mon;	/* month */
-	int tm_year;	/* year */
-};
-
-static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
-{
-	/* This will work from 1970-2099, 2100 is not a leap year */
-	static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31,
-				 31, 30, 31, 30, 31 };
-	memset(tm, 0, sizeof(*tm));
-	tm->tm_sec  = tv->tv_sec % (24 * 60 * 60);
-	tm->tm_mday = tv->tv_sec / (24 * 60 * 60) +
-		(2 * 365 + 1); /* shift base from 1970 to 1968 */
-	tm->tm_min =  tm->tm_sec / 60 % 60;
-	tm->tm_hour = tm->tm_sec / 60 / 60;
-	tm->tm_sec =  tm->tm_sec % 60;
-	tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
-	tm->tm_mday %= (4*365+1);
-	mon_day[1] = 29;
-	while (tm->tm_mday >= mon_day[tm->tm_mon]) {
-		tm->tm_mday -= mon_day[tm->tm_mon];
-		if (++tm->tm_mon == 12) {
-			tm->tm_mon = 0;
-			++tm->tm_year;
-			mon_day[1] = 28;
-		}
-	}
-	++tm->tm_mday;
-}
-
 /*
  * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
  * I cannot call that code directly from kdb, it has an unconditional
@@ -2515,10 +2506,10 @@ static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
  */
 static void kdb_sysinfo(struct sysinfo *val)
 {
-	struct timespec uptime;
-	ktime_get_ts(&uptime);
+	u64 uptime = ktime_get_mono_fast_ns();
+
 	memset(val, 0, sizeof(*val));
-	val->uptime = uptime.tv_sec;
+	val->uptime = div_u64(uptime, NSEC_PER_SEC);
 	val->loads[0] = avenrun[0];
 	val->loads[1] = avenrun[1];
 	val->loads[2] = avenrun[2];
@@ -2533,8 +2524,8 @@ static void kdb_sysinfo(struct sysinfo *val)
  */
 static int kdb_summary(int argc, const char **argv)
 {
-	struct timespec now;
-	struct kdb_tm tm;
+	time64_t now;
+	struct tm tm;
 	struct sysinfo val;
 
 	if (argc)
@@ -2548,9 +2539,9 @@ static int kdb_summary(int argc, const char **argv)
 	kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
 	kdb_printf("ccversion  %s\n", __stringify(CCVERSION));
 
-	now = __current_kernel_time();
-	kdb_gmtime(&now, &tm);
-	kdb_printf("date       %04d-%02d-%02d %02d:%02d:%02d "
+	now = __ktime_get_real_seconds();
+	time64_to_tm(now, 0, &tm);
+	kdb_printf("date       %04ld-%02d-%02d %02d:%02d:%02d "
 		   "tz_minuteswest %d\n",
 		1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
 		tm.tm_hour, tm.tm_min, tm.tm_sec,
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index d35cc2d3a4cc..990b3cc526c8 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -129,13 +129,13 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
 		}
 		if (i >= ARRAY_SIZE(kdb_name_table)) {
 			debug_kfree(kdb_name_table[0]);
-			memcpy(kdb_name_table, kdb_name_table+1,
+			memmove(kdb_name_table, kdb_name_table+1,
 			       sizeof(kdb_name_table[0]) *
 			       (ARRAY_SIZE(kdb_name_table)-1));
 		} else {
 			debug_kfree(knt1);
 			knt1 = kdb_name_table[i];
-			memcpy(kdb_name_table+i, kdb_name_table+i+1,
+			memmove(kdb_name_table+i, kdb_name_table+i+1,
 			       sizeof(kdb_name_table[0]) *
 			       (ARRAY_SIZE(kdb_name_table)-i-1));
 		}
diff --git a/kernel/panic.c b/kernel/panic.c
index 9d833d913c84..42e487488554 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,7 +34,8 @@
 #define PANIC_BLINK_SPD 18
 
 int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
-static unsigned long tainted_mask;
+static unsigned long tainted_mask =
+	IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
@@ -308,52 +309,40 @@ EXPORT_SYMBOL(panic);
  * is being removed anyway.
  */
 const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
-	{ 'P', 'G', true },	/* TAINT_PROPRIETARY_MODULE */
-	{ 'F', ' ', true },	/* TAINT_FORCED_MODULE */
-	{ 'S', ' ', false },	/* TAINT_CPU_OUT_OF_SPEC */
-	{ 'R', ' ', false },	/* TAINT_FORCED_RMMOD */
-	{ 'M', ' ', false },	/* TAINT_MACHINE_CHECK */
-	{ 'B', ' ', false },	/* TAINT_BAD_PAGE */
-	{ 'U', ' ', false },	/* TAINT_USER */
-	{ 'D', ' ', false },	/* TAINT_DIE */
-	{ 'A', ' ', false },	/* TAINT_OVERRIDDEN_ACPI_TABLE */
-	{ 'W', ' ', false },	/* TAINT_WARN */
-	{ 'C', ' ', true },	/* TAINT_CRAP */
-	{ 'I', ' ', false },	/* TAINT_FIRMWARE_WORKAROUND */
-	{ 'O', ' ', true },	/* TAINT_OOT_MODULE */
-	{ 'E', ' ', true },	/* TAINT_UNSIGNED_MODULE */
-	{ 'L', ' ', false },	/* TAINT_SOFTLOCKUP */
-	{ 'K', ' ', true },	/* TAINT_LIVEPATCH */
-	{ 'X', ' ', true },	/* TAINT_AUX */
+	[ TAINT_PROPRIETARY_MODULE ]	= { 'P', 'G', true },
+	[ TAINT_FORCED_MODULE ]		= { 'F', ' ', true },
+	[ TAINT_CPU_OUT_OF_SPEC ]	= { 'S', ' ', false },
+	[ TAINT_FORCED_RMMOD ]		= { 'R', ' ', false },
+	[ TAINT_MACHINE_CHECK ]		= { 'M', ' ', false },
+	[ TAINT_BAD_PAGE ]		= { 'B', ' ', false },
+	[ TAINT_USER ]			= { 'U', ' ', false },
+	[ TAINT_DIE ]			= { 'D', ' ', false },
+	[ TAINT_OVERRIDDEN_ACPI_TABLE ]	= { 'A', ' ', false },
+	[ TAINT_WARN ]			= { 'W', ' ', false },
+	[ TAINT_CRAP ]			= { 'C', ' ', true },
+	[ TAINT_FIRMWARE_WORKAROUND ]	= { 'I', ' ', false },
+	[ TAINT_OOT_MODULE ]		= { 'O', ' ', true },
+	[ TAINT_UNSIGNED_MODULE ]	= { 'E', ' ', true },
+	[ TAINT_SOFTLOCKUP ]		= { 'L', ' ', false },
+	[ TAINT_LIVEPATCH ]		= { 'K', ' ', true },
+	[ TAINT_AUX ]			= { 'X', ' ', true },
+	[ TAINT_RANDSTRUCT ]		= { 'T', ' ', true },
 };
 
 /**
- *	print_tainted - return a string to represent the kernel taint state.
+ * print_tainted - return a string to represent the kernel taint state.
  *
- *  'P' - Proprietary module has been loaded.
- *  'F' - Module has been forcibly loaded.
- *  'S' - SMP with CPUs not designed for SMP.
- *  'R' - User forced a module unload.
- *  'M' - System experienced a machine check exception.
- *  'B' - System has hit bad_page.
- *  'U' - Userspace-defined naughtiness.
- *  'D' - Kernel has oopsed before
- *  'A' - ACPI table overridden.
- *  'W' - Taint on warning.
- *  'C' - modules from drivers/staging are loaded.
- *  'I' - Working around severe firmware bug.
- *  'O' - Out-of-tree module has been loaded.
- *  'E' - Unsigned module has been loaded.
- *  'L' - A soft lockup has previously occurred.
- *  'K' - Kernel has been live patched.
- *  'X' - Auxiliary taint, for distros' use.
+ * For individual taint flag meanings, see Documentation/sysctl/kernel.txt
  *
- *	The string is overwritten by the next call to print_tainted().
+ * The string is overwritten by the next call to print_tainted(),
+ * but is always NULL terminated.
  */
 const char *print_tainted(void)
 {
 	static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];
 
+	BUILD_BUG_ON(ARRAY_SIZE(taint_flags) != TAINT_FLAGS_COUNT);
+
 	if (tainted_mask) {
 		char *s;
 		int i;
@@ -554,6 +543,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 	else
 		dump_stack();
 
+	print_irqtrace_events(current);
+
 	print_oops_end_marker();
 
 	/* Just a warning, don't kill lockdep. */
diff --git a/kernel/params.c b/kernel/params.c
index cc9108c2a1fd..ce89f757e6da 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -111,8 +111,8 @@ bool parameq(const char *a, const char *b)
 static void param_check_unsafe(const struct kernel_param *kp)
 {
 	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
-		pr_warn("Setting dangerous option %s - tainting kernel\n",
-			kp->name);
+		pr_notice("Setting dangerous option %s - tainting kernel\n",
+			  kp->name);
 		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 	}
 }
diff --git a/kernel/pid.c b/kernel/pid.c
index ed6c343fe50d..157fe4b19971 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -70,7 +70,7 @@ int pid_max_max = PID_MAX_LIMIT;
  */
 struct pid_namespace init_pid_ns = {
 	.kref = KREF_INIT(2),
-	.idr = IDR_INIT,
+	.idr = IDR_INIT(init_pid_ns.idr),
 	.pid_allocated = PIDNS_ADDING,
 	.level = 0,
 	.child_reaper = &init_task,
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 9d7503910ce2..fa39092b7aea 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -295,6 +295,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
 		 * changed
 		 */
 		plist_del(node, &c->list);
+		/* fall through */
 	case PM_QOS_ADD_REQ:
 		plist_node_init(node, new_value);
 		plist_add(node, &c->list);
@@ -367,6 +368,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf,
 		break;
 	case PM_QOS_UPDATE_REQ:
 		pm_qos_flags_remove_req(pqf, req);
+		/* fall through */
 	case PM_QOS_ADD_REQ:
 		req->flags = val;
 		INIT_LIST_HEAD(&req->node);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 704e55129c3a..2f4af216bd6e 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -51,6 +51,7 @@
 #include <linux/uaccess.h>
 #include <asm/sections.h>
 
+#include <trace/events/initcall.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/printk.h>
 
@@ -2780,6 +2781,7 @@ EXPORT_SYMBOL(unregister_console);
  */
 void __init console_init(void)
 {
+	int ret;
 	initcall_t *call;
 
 	/* Setup the default TTY line discipline. */
@@ -2790,8 +2792,11 @@ void __init console_init(void)
 	 * inform about problems etc..
 	 */
 	call = __con_initcall_start;
+	trace_initcall_level("console");
 	while (call < __con_initcall_end) {
-		(*call)();
+		trace_initcall_start((*call));
+		ret = (*call)();
+		trace_initcall_finish((*call), ret);
 		call++;
 	}
 }
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 2975f195e1c4..1a3e9bddd17b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -141,13 +141,15 @@ static void cpuidle_idle_call(void)
 	}
 
 	/*
-	 * Tell the RCU framework we are entering an idle section,
-	 * so no more rcu read side critical sections and one more
+	 * The RCU framework needs to be told that we are entering an idle
+	 * section, so no more rcu read side critical sections and one more
 	 * step to the grace period
 	 */
-	rcu_idle_enter();
 
 	if (cpuidle_not_available(drv, dev)) {
+		tick_nohz_idle_stop_tick();
+		rcu_idle_enter();
+
 		default_idle_call();
 		goto exit_idle;
 	}
@@ -164,20 +166,37 @@ static void cpuidle_idle_call(void)
 
 	if (idle_should_enter_s2idle() || dev->use_deepest_state) {
 		if (idle_should_enter_s2idle()) {
+			rcu_idle_enter();
+
 			entered_state = cpuidle_enter_s2idle(drv, dev);
 			if (entered_state > 0) {
 				local_irq_enable();
 				goto exit_idle;
 			}
+
+			rcu_idle_exit();
 		}
 
+		tick_nohz_idle_stop_tick();
+		rcu_idle_enter();
+
 		next_state = cpuidle_find_deepest_state(drv, dev);
 		call_cpuidle(drv, dev, next_state);
 	} else {
+		bool stop_tick = true;
+
 		/*
 		 * Ask the cpuidle framework to choose a convenient idle state.
 		 */
-		next_state = cpuidle_select(drv, dev);
+		next_state = cpuidle_select(drv, dev, &stop_tick);
+
+		if (stop_tick)
+			tick_nohz_idle_stop_tick();
+		else
+			tick_nohz_idle_retain_tick();
+
+		rcu_idle_enter();
+
 		entered_state = call_cpuidle(drv, dev, next_state);
 		/*
 		 * Give the governor an opportunity to reflect on the outcome
@@ -222,6 +241,7 @@ static void do_idle(void)
 		rmb();
 
 		if (cpu_is_offline(cpu)) {
+			tick_nohz_idle_stop_tick_protected();
 			cpuhp_report_idle_dead();
 			arch_cpu_idle_dead();
 		}
@@ -235,10 +255,12 @@ static void do_idle(void)
 		 * broadcast device expired for us, we don't want to go deep
 		 * idle as we know that the IPI is going to arrive right away.
 		 */
-		if (cpu_idle_force_poll || tick_check_broadcast_expired())
+		if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
+			tick_nohz_idle_restart_tick();
 			cpu_idle_poll();
-		else
+		} else {
 			cpuidle_idle_call();
+		}
 		arch_cpu_idle_exit();
 	}
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bdf7090b106d..6a78cf70761d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1340,7 +1340,7 @@ static struct ctl_table vm_table[] = {
 	{
 		.procname	= "dirtytime_expire_seconds",
 		.data		= &dirtytime_expire_interval,
-		.maxlen		= sizeof(dirty_expire_interval),
+		.maxlen		= sizeof(dirtytime_expire_interval),
 		.mode		= 0644,
 		.proc_handler	= dirtytime_interval_handler,
 		.extra1		= &zero,
@@ -2511,6 +2511,15 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 }
 #endif
 
+/**
+ * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
+ * @min: pointer to minimum allowable value
+ * @max: pointer to maximum allowable value
+ *
+ * The do_proc_dointvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_dointvec_minmax() handler.
+ */
 struct do_proc_dointvec_minmax_conv_param {
 	int *min;
 	int *max;
@@ -2554,7 +2563,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
  * This routine will ensure the values are within the range specified by
  * table->extra1 (min) and table->extra2 (max).
  *
- * Returns 0 on success.
+ * Returns 0 on success or -EINVAL on write when the range check fails.
  */
 int proc_dointvec_minmax(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -2567,6 +2576,15 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
+/**
+ * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
+ * @min: pointer to minimum allowable value
+ * @max: pointer to maximum allowable value
+ *
+ * The do_proc_douintvec_minmax_conv_param structure provides the
+ * minimum and maximum values for doing range checking for those sysctl
+ * parameters that use the proc_douintvec_minmax() handler.
+ */
 struct do_proc_douintvec_minmax_conv_param {
 	unsigned int *min;
 	unsigned int *max;
@@ -2614,7 +2632,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
  * check for UINT_MAX to avoid having to support wrap around uses from
  * userspace.
  *
- * Returns 0 on success.
+ * Returns 0 on success or -ERANGE on write when the range check fails.
  */
 int proc_douintvec_minmax(struct ctl_table *table, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 9b082ce86325..eda1210ce50f 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -480,6 +480,7 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
 	while ((base = __next_base((cpu_base), &(active))))
 
 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
+					 const struct hrtimer *exclude,
 					 unsigned int active,
 					 ktime_t expires_next)
 {
@@ -492,9 +493,22 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
 
 		next = timerqueue_getnext(&base->active);
 		timer = container_of(next, struct hrtimer, node);
+		if (timer == exclude) {
+			/* Get to the next timer in the queue. */
+			next = timerqueue_iterate_next(next);
+			if (!next)
+				continue;
+
+			timer = container_of(next, struct hrtimer, node);
+		}
 		expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
 		if (expires < expires_next) {
 			expires_next = expires;
+
+			/* Skip cpu_base update if a timer is being excluded. */
+			if (exclude)
+				continue;
+
 			if (timer->is_soft)
 				cpu_base->softirq_next_timer = timer;
 			else
@@ -538,7 +552,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
 	if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
 		active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
 		cpu_base->softirq_next_timer = NULL;
-		expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX);
+		expires_next = __hrtimer_next_event_base(cpu_base, NULL,
+							 active, KTIME_MAX);
 
 		next_timer = cpu_base->softirq_next_timer;
 	}
@@ -546,7 +561,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
 	if (active_mask & HRTIMER_ACTIVE_HARD) {
 		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
 		cpu_base->next_timer = next_timer;
-		expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
+		expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
+							 expires_next);
 	}
 
 	return expires_next;
@@ -1190,6 +1206,39 @@ u64 hrtimer_get_next_event(void)
 
 	return expires;
 }
+
+/**
+ * hrtimer_next_event_without - time until next expiry event w/o one timer
+ * @exclude:	timer to exclude
+ *
+ * Returns the next expiry time over all timers except for the @exclude one or
+ * KTIME_MAX if none of them is pending.
+ */
+u64 hrtimer_next_event_without(const struct hrtimer *exclude)
+{
+	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
+	u64 expires = KTIME_MAX;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&cpu_base->lock, flags);
+
+	if (__hrtimer_hres_active(cpu_base)) {
+		unsigned int active;
+
+		if (!cpu_base->softirq_activated) {
+			active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
+			expires = __hrtimer_next_event_base(cpu_base, exclude,
+							    active, KTIME_MAX);
+		}
+		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+		expires = __hrtimer_next_event_base(cpu_base, exclude, active,
+						    expires);
+	}
+
+	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+
+	return expires;
+}
 #endif
 
 static inline int hrtimer_clockid_to_base(clockid_t clock_id)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8d70da1b9a0d..a09ded765f6c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -31,7 +31,7 @@
 
 
 /* USER_HZ period (usecs): */
-unsigned long			tick_usec = TICK_USEC;
+unsigned long			tick_usec = USER_TICK_USEC;
 
 /* SHIFTED_HZ period (nsecs): */
 unsigned long			tick_nsec;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f3ab08caa2c3..646645e981f9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -122,8 +122,7 @@ static ktime_t tick_init_jiffy_update(void)
 	return period;
 }
 
-
-static void tick_sched_do_timer(ktime_t now)
+static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
 	int cpu = smp_processor_id();
 
@@ -143,6 +142,9 @@ static void tick_sched_do_timer(ktime_t now)
 	/* Check, if the jiffies need an update */
 	if (tick_do_timer_cpu == cpu)
 		tick_do_update_jiffies64(now);
+
+	if (ts->inidle)
+		ts->got_idle_tick = 1;
 }
 
 static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
@@ -474,7 +476,9 @@ __setup("nohz=", setup_tick_nohz);
 
 bool tick_nohz_tick_stopped(void)
 {
-	return __this_cpu_read(tick_cpu_sched.tick_stopped);
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	return ts->tick_stopped;
 }
 
 bool tick_nohz_tick_stopped_cpu(int cpu)
@@ -537,14 +541,11 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 	sched_clock_idle_wakeup_event();
 }
 
-static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
+static void tick_nohz_start_idle(struct tick_sched *ts)
 {
-	ktime_t now = ktime_get();
-
-	ts->idle_entrytime = now;
+	ts->idle_entrytime = ktime_get();
 	ts->idle_active = 1;
 	sched_clock_idle_sleep_event();
-	return now;
 }
 
 /**
@@ -653,13 +654,10 @@ static inline bool local_timer_softirq_pending(void)
 	return local_softirq_pending() & TIMER_SOFTIRQ;
 }
 
-static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
-					 ktime_t now, int cpu)
+static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
-	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
 	unsigned long seq, basejiff;
-	ktime_t	tick;
 
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
@@ -668,6 +666,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		basejiff = jiffies;
 	} while (read_seqretry(&jiffies_lock, seq));
 	ts->last_jiffies = basejiff;
+	ts->timer_expires_base = basemono;
 
 	/*
 	 * Keep the periodic tick, when RCU, architecture or irq_work
@@ -712,47 +711,63 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		 * next period, so no point in stopping it either, bail.
 		 */
 		if (!ts->tick_stopped) {
-			tick = 0;
+			ts->timer_expires = 0;
 			goto out;
 		}
 	}
 
 	/*
+	 * If this CPU is the one which had the do_timer() duty last, we limit
+	 * the sleep time to the timekeeping max_deferment value.
+	 * Otherwise we can sleep as long as we want.
+	 */
+	delta = timekeeping_max_deferment();
+	if (cpu != tick_do_timer_cpu &&
+	    (tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
+		delta = KTIME_MAX;
+
+	/* Calculate the next expiry time */
+	if (delta < (KTIME_MAX - basemono))
+		expires = basemono + delta;
+	else
+		expires = KTIME_MAX;
+
+	ts->timer_expires = min_t(u64, expires, next_tick);
+
+out:
+	return ts->timer_expires;
+}
+
+static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
+{
+	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+	u64 basemono = ts->timer_expires_base;
+	u64 expires = ts->timer_expires;
+	ktime_t tick = expires;
+
+	/* Make sure we won't be trying to stop it twice in a row. */
+	ts->timer_expires_base = 0;
+
+	/*
 	 * If this CPU is the one which updates jiffies, then give up
 	 * the assignment and let it be taken by the CPU which runs
 	 * the tick timer next, which might be this CPU as well. If we
 	 * don't drop this here the jiffies might be stale and
 	 * do_timer() never invoked. Keep track of the fact that it
-	 * was the one which had the do_timer() duty last. If this CPU
-	 * is the one which had the do_timer() duty last, we limit the
-	 * sleep time to the timekeeping max_deferment value.
-	 * Otherwise we can sleep as long as we want.
+	 * was the one which had the do_timer() duty last.
 	 */
-	delta = timekeeping_max_deferment();
 	if (cpu == tick_do_timer_cpu) {
 		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 		ts->do_timer_last = 1;
 	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
-		delta = KTIME_MAX;
 		ts->do_timer_last = 0;
-	} else if (!ts->do_timer_last) {
-		delta = KTIME_MAX;
 	}
 
-	/* Calculate the next expiry time */
-	if (delta < (KTIME_MAX - basemono))
-		expires = basemono + delta;
-	else
-		expires = KTIME_MAX;
-
-	expires = min_t(u64, expires, next_tick);
-	tick = expires;
-
 	/* Skip reprogram of event if its not changed */
 	if (ts->tick_stopped && (expires == ts->next_tick)) {
 		/* Sanity check: make sure clockevent is actually programmed */
 		if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
-			goto out;
+			return;
 
 		WARN_ON_ONCE(1);
 		printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
@@ -786,7 +801,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	if (unlikely(expires == KTIME_MAX)) {
 		if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
 			hrtimer_cancel(&ts->sched_timer);
-		goto out;
+		return;
 	}
 
 	hrtimer_set_expires(&ts->sched_timer, tick);
@@ -795,15 +810,23 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
 	else
 		tick_program_event(tick, 1);
-out:
-	/*
-	 * Update the estimated sleep length until the next timer
-	 * (not only the tick).
-	 */
-	ts->sleep_length = ktime_sub(dev->next_event, now);
-	return tick;
 }
 
+static void tick_nohz_retain_tick(struct tick_sched *ts)
+{
+	ts->timer_expires_base = 0;
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
+{
+	if (tick_nohz_next_event(ts, cpu))
+		tick_nohz_stop_tick(ts, cpu);
+	else
+		tick_nohz_retain_tick(ts);
+}
+#endif /* CONFIG_NO_HZ_FULL */
+
 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 {
 	/* Update jiffies first */
@@ -839,7 +862,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
 		return;
 
 	if (can_stop_full_tick(cpu, ts))
-		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
+		tick_nohz_stop_sched_tick(ts, cpu);
 	else if (ts->tick_stopped)
 		tick_nohz_restart_sched_tick(ts, ktime_get());
 #endif
@@ -865,10 +888,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 		return false;
 	}
 
-	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
-		ts->sleep_length = NSEC_PER_SEC / HZ;
+	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
 		return false;
-	}
 
 	if (need_resched())
 		return false;
@@ -903,42 +924,65 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 	return true;
 }
 
-static void __tick_nohz_idle_enter(struct tick_sched *ts)
+static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
 {
-	ktime_t now, expires;
+	ktime_t expires;
 	int cpu = smp_processor_id();
 
-	now = tick_nohz_start_idle(ts);
+	/*
+	 * If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
+	 * tick timer expiration time is known already.
+	 */
+	if (ts->timer_expires_base)
+		expires = ts->timer_expires;
+	else if (can_stop_idle_tick(cpu, ts))
+		expires = tick_nohz_next_event(ts, cpu);
+	else
+		return;
+
+	ts->idle_calls++;
 
-	if (can_stop_idle_tick(cpu, ts)) {
+	if (expires > 0LL) {
 		int was_stopped = ts->tick_stopped;
 
-		ts->idle_calls++;
+		tick_nohz_stop_tick(ts, cpu);
 
-		expires = tick_nohz_stop_sched_tick(ts, now, cpu);
-		if (expires > 0LL) {
-			ts->idle_sleeps++;
-			ts->idle_expires = expires;
-		}
+		ts->idle_sleeps++;
+		ts->idle_expires = expires;
 
 		if (!was_stopped && ts->tick_stopped) {
 			ts->idle_jiffies = ts->last_jiffies;
 			nohz_balance_enter_idle(cpu);
 		}
+	} else {
+		tick_nohz_retain_tick(ts);
 	}
 }
 
 /**
- * tick_nohz_idle_enter - stop the idle tick from the idle task
+ * tick_nohz_idle_stop_tick - stop the idle tick from the idle task
  *
  * When the next event is more than a tick into the future, stop the idle tick
- * Called when we start the idle loop.
- *
- * The arch is responsible of calling:
+ */
+void tick_nohz_idle_stop_tick(void)
+{
+	__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
+}
+
+void tick_nohz_idle_retain_tick(void)
+{
+	tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
+	/*
+	 * Undo the effect of get_next_timer_interrupt() called from
+	 * tick_nohz_next_event().
+	 */
+	timer_clear_idle();
+}
+
+/**
+ * tick_nohz_idle_enter - prepare for entering idle on the current CPU
  *
- * - rcu_idle_enter() after its last use of RCU before the CPU is put
- *  to sleep.
- * - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
+ * Called when we start the idle loop.
  */
 void tick_nohz_idle_enter(void)
 {
@@ -949,8 +993,11 @@ void tick_nohz_idle_enter(void)
 	local_irq_disable();
 
 	ts = this_cpu_ptr(&tick_cpu_sched);
+
+	WARN_ON_ONCE(ts->timer_expires_base);
+
 	ts->inidle = 1;
-	__tick_nohz_idle_enter(ts);
+	tick_nohz_start_idle(ts);
 
 	local_irq_enable();
 }
@@ -968,21 +1015,62 @@ void tick_nohz_irq_exit(void)
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 
 	if (ts->inidle)
-		__tick_nohz_idle_enter(ts);
+		tick_nohz_start_idle(ts);
 	else
 		tick_nohz_full_update_tick(ts);
 }
 
 /**
- * tick_nohz_get_sleep_length - return the length of the current sleep
+ * tick_nohz_idle_got_tick - Check whether or not the tick handler has run
+ */
+bool tick_nohz_idle_got_tick(void)
+{
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	if (ts->got_idle_tick) {
+		ts->got_idle_tick = 0;
+		return true;
+	}
+	return false;
+}
+
+/**
+ * tick_nohz_get_sleep_length - return the expected length of the current sleep
+ * @delta_next: duration until the next event if the tick cannot be stopped
  *
  * Called from power state control code with interrupts disabled
  */
-ktime_t tick_nohz_get_sleep_length(void)
+ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
 {
+	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+	int cpu = smp_processor_id();
+	/*
+	 * The idle entry time is expected to be a sufficient approximation of
+	 * the current time at this point.
+	 */
+	ktime_t now = ts->idle_entrytime;
+	ktime_t next_event;
+
+	WARN_ON_ONCE(!ts->inidle);
+
+	*delta_next = ktime_sub(dev->next_event, now);
+
+	if (!can_stop_idle_tick(cpu, ts))
+		return *delta_next;
+
+	next_event = tick_nohz_next_event(ts, cpu);
+	if (!next_event)
+		return *delta_next;
+
+	/*
+	 * If the next highres timer to expire is earlier than next_event, the
+	 * idle governor needs to know that.
+	 */
+	next_event = min_t(u64, next_event,
+			   hrtimer_next_event_without(&ts->sched_timer));
 
-	return ts->sleep_length;
+	return ktime_sub(next_event, now);
 }
 
 /**
@@ -1031,6 +1119,20 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 #endif
 }
 
+static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
+{
+	tick_nohz_restart_sched_tick(ts, now);
+	tick_nohz_account_idle_ticks(ts);
+}
+
+void tick_nohz_idle_restart_tick(void)
+{
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	if (ts->tick_stopped)
+		__tick_nohz_idle_restart_tick(ts, ktime_get());
+}
+
 /**
  * tick_nohz_idle_exit - restart the idle tick from the idle task
  *
@@ -1041,24 +1143,26 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 void tick_nohz_idle_exit(void)
 {
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+	bool idle_active, tick_stopped;
 	ktime_t now;
 
 	local_irq_disable();
 
 	WARN_ON_ONCE(!ts->inidle);
+	WARN_ON_ONCE(ts->timer_expires_base);
 
 	ts->inidle = 0;
+	idle_active = ts->idle_active;
+	tick_stopped = ts->tick_stopped;
 
-	if (ts->idle_active || ts->tick_stopped)
+	if (idle_active || tick_stopped)
 		now = ktime_get();
 
-	if (ts->idle_active)
+	if (idle_active)
 		tick_nohz_stop_idle(ts, now);
 
-	if (ts->tick_stopped) {
-		tick_nohz_restart_sched_tick(ts, now);
-		tick_nohz_account_idle_ticks(ts);
-	}
+	if (tick_stopped)
+		__tick_nohz_idle_restart_tick(ts, now);
 
 	local_irq_enable();
 }
@@ -1074,7 +1178,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
 
 	dev->next_event = KTIME_MAX;
 
-	tick_sched_do_timer(now);
+	tick_sched_do_timer(ts, now);
 	tick_sched_handle(ts, regs);
 
 	/* No need to reprogram if we are running tickless  */
@@ -1169,7 +1273,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
 	struct pt_regs *regs = get_irq_regs();
 	ktime_t now = ktime_get();
 
-	tick_sched_do_timer(now);
+	tick_sched_do_timer(ts, now);
 
 	/*
 	 * Do not call, when we are not in irq context and have
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 954b43dbf21c..6de959a854b2 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -38,31 +38,37 @@ enum tick_nohz_mode {
  * @idle_exittime:	Time when the idle state was left
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
  * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @sleep_length:	Duration of the current idle sleep
+ * @timer_expires:	Anticipated timer expiration time (in case sched tick is stopped)
+ * @timer_expires_base:	Base time clock monotonic for @timer_expires
  * @do_timer_lst:	CPU was the last one doing do_timer before going idle
+ * @got_idle_tick:	Tick timer function has run with @inidle set
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
 	unsigned long			check_clocks;
 	enum tick_nohz_mode		nohz_mode;
+
+	unsigned int			inidle		: 1;
+	unsigned int			tick_stopped	: 1;
+	unsigned int			idle_active	: 1;
+	unsigned int			do_timer_last	: 1;
+	unsigned int			got_idle_tick	: 1;
+
 	ktime_t				last_tick;
 	ktime_t				next_tick;
-	int				inidle;
-	int				tick_stopped;
 	unsigned long			idle_jiffies;
 	unsigned long			idle_calls;
 	unsigned long			idle_sleeps;
-	int				idle_active;
 	ktime_t				idle_entrytime;
 	ktime_t				idle_waketime;
 	ktime_t				idle_exittime;
 	ktime_t				idle_sleeptime;
 	ktime_t				iowait_sleeptime;
-	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
+	u64				timer_expires;
+	u64				timer_expires_base;
 	u64				next_timer;
 	ktime_t				idle_expires;
-	int				do_timer_last;
 	atomic_t			tick_dep_mask;
 };
 
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index fdbeeb02dde9..cf5c0828ee31 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -31,6 +31,4 @@ static inline u64 clocksource_delta(u64 now, u64 last, u64 mask)
 }
 #endif
 
-extern time64_t __ktime_get_real_seconds(void);
-
 #endif /* _TIMEKEEPING_INTERNAL_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 0b249e2f0c3c..c4f0f2e4126e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -606,7 +606,10 @@ config HIST_TRIGGERS
 	  event activity as an initial guide for further investigation
 	  using more advanced tools.
 
-	  See Documentation/trace/events.txt.
+	  Inter-event tracing of quantities such as latencies is also
+	  supported using hist triggers under this option.
+
+	  See Documentation/trace/histogram.txt.
 	  If in doubt, say N.
 
 config MMIOTRACE_TEST
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eac9ce2c57a2..16bbf062018f 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3902,14 +3902,13 @@ static bool module_exists(const char *module)
 {
 	/* All modules have the symbol __this_module */
 	const char this_mod[] = "__this_module";
-	const int modname_size = MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 1;
-	char modname[modname_size + 1];
+	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
 	unsigned long val;
 	int n;
 
-	n = snprintf(modname, modname_size + 1, "%s:%s", module, this_mod);
+	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
 
-	if (n > modname_size)
+	if (n > sizeof(modname) - 1)
 		return false;
 
 	val = module_kallsyms_lookup_name(modname);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index dcf1c4dd3efe..c9cb9767d49b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -22,6 +22,7 @@
 #include <linux/hash.h>
 #include <linux/list.h>
 #include <linux/cpu.h>
+#include <linux/oom.h>
 
 #include <asm/local.h>
 
@@ -41,6 +42,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 			 RINGBUF_TYPE_PADDING);
 	trace_seq_printf(s, "\ttime_extend : type == %d\n",
 			 RINGBUF_TYPE_TIME_EXTEND);
+	trace_seq_printf(s, "\ttime_stamp : type == %d\n",
+			 RINGBUF_TYPE_TIME_STAMP);
 	trace_seq_printf(s, "\tdata max type_len  == %d\n",
 			 RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 
@@ -140,12 +143,15 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 
 enum {
 	RB_LEN_TIME_EXTEND = 8,
-	RB_LEN_TIME_STAMP = 16,
+	RB_LEN_TIME_STAMP =  8,
 };
 
 #define skip_time_extend(event) \
 	((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
 
+#define extended_time(event) \
+	(event->type_len >= RINGBUF_TYPE_TIME_EXTEND)
+
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
 	return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -209,7 +215,7 @@ rb_event_ts_length(struct ring_buffer_event *event)
 {
 	unsigned len = 0;
 
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
+	if (extended_time(event)) {
 		/* time extends include the data event after it */
 		len = RB_LEN_TIME_EXTEND;
 		event = skip_time_extend(event);
@@ -231,7 +237,7 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
 	unsigned length;
 
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+	if (extended_time(event))
 		event = skip_time_extend(event);
 
 	length = rb_event_length(event);
@@ -248,7 +254,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static __always_inline void *
 rb_event_data(struct ring_buffer_event *event)
 {
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+	if (extended_time(event))
 		event = skip_time_extend(event);
 	BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
 	/* If length is in len field, then array[0] has the data */
@@ -275,6 +281,27 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 #define TS_MASK		((1ULL << TS_SHIFT) - 1)
 #define TS_DELTA_TEST	(~TS_MASK)
 
+/**
+ * ring_buffer_event_time_stamp - return the event's extended timestamp
+ * @event: the event to get the timestamp of
+ *
+ * Returns the extended timestamp associated with a data event.
+ * An extended time_stamp is a 64-bit timestamp represented
+ * internally in a special way that makes the best use of space
+ * contained within a ring buffer event.  This function decodes
+ * it and maps it to a straight u64 value.
+ */
+u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event)
+{
+	u64 ts;
+
+	ts = event->array[0];
+	ts <<= TS_SHIFT;
+	ts += event->time_delta;
+
+	return ts;
+}
+
 /* Flag when events were overwritten */
 #define RB_MISSED_EVENTS	(1 << 31)
 /* Missed count stored at end */
@@ -451,6 +478,7 @@ struct ring_buffer_per_cpu {
 	struct buffer_page		*reader_page;
 	unsigned long			lost_events;
 	unsigned long			last_overrun;
+	unsigned long			nest;
 	local_t				entries_bytes;
 	local_t				entries;
 	local_t				overrun;
@@ -488,6 +516,7 @@ struct ring_buffer {
 	u64				(*clock)(void);
 
 	struct rb_irq_work		irq_work;
+	bool				time_stamp_abs;
 };
 
 struct ring_buffer_iter {
@@ -1134,30 +1163,60 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
 {
 	struct buffer_page *bpage, *tmp;
+	bool user_thread = current->mm != NULL;
+	gfp_t mflags;
 	long i;
 
+	/*
+	 * Check if the available memory is there first.
+	 * Note, si_mem_available() only gives us a rough estimate of available
+	 * memory. It may not be accurate. But we don't care, we just want
+	 * to prevent doing any allocation when it is obvious that it is
+	 * not going to succeed.
+	 */
+	i = si_mem_available();
+	if (i < nr_pages)
+		return -ENOMEM;
+
+	/*
+	 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
+	 * gracefully without invoking oom-killer and the system is not
+	 * destabilized.
+	 */
+	mflags = GFP_KERNEL | __GFP_RETRY_MAYFAIL;
+
+	/*
+	 * If a user thread allocates too much, and si_mem_available()
+	 * reports there's enough memory, even though there is not.
+	 * Make sure the OOM killer kills this thread. This can happen
+	 * even with RETRY_MAYFAIL because another task may be doing
+	 * an allocation after this task has taken all memory.
+	 * This is the task the OOM killer needs to take out during this
+	 * loop, even if it was triggered by an allocation somewhere else.
+	 */
+	if (user_thread)
+		set_current_oom_origin();
 	for (i = 0; i < nr_pages; i++) {
 		struct page *page;
-		/*
-		 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
-		 * gracefully without invoking oom-killer and the system is not
-		 * destabilized.
-		 */
+
 		bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
-				    GFP_KERNEL | __GFP_RETRY_MAYFAIL,
-				    cpu_to_node(cpu));
+				    mflags, cpu_to_node(cpu));
 		if (!bpage)
 			goto free_pages;
 
 		list_add(&bpage->list, pages);
 
-		page = alloc_pages_node(cpu_to_node(cpu),
-					GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
+		page = alloc_pages_node(cpu_to_node(cpu), mflags, 0);
 		if (!page)
 			goto free_pages;
 		bpage->page = page_address(page);
 		rb_init_page(bpage->page);
+
+		if (user_thread && fatal_signal_pending(current))
+			goto free_pages;
 	}
+	if (user_thread)
+		clear_current_oom_origin();
 
 	return 0;
 
@@ -1166,6 +1225,8 @@ free_pages:
 		list_del_init(&bpage->list);
 		free_buffer_page(bpage);
 	}
+	if (user_thread)
+		clear_current_oom_origin();
 
 	return -ENOMEM;
 }
@@ -1382,6 +1443,16 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
 	buffer->clock = clock;
 }
 
+void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs)
+{
+	buffer->time_stamp_abs = abs;
+}
+
+bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer)
+{
+	return buffer->time_stamp_abs;
+}
+
 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
 
 static inline unsigned long rb_page_entries(struct buffer_page *bpage)
@@ -2206,12 +2277,15 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 
 /* Slow path, do not inline */
 static noinline struct ring_buffer_event *
-rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
+rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs)
 {
-	event->type_len = RINGBUF_TYPE_TIME_EXTEND;
+	if (abs)
+		event->type_len = RINGBUF_TYPE_TIME_STAMP;
+	else
+		event->type_len = RINGBUF_TYPE_TIME_EXTEND;
 
-	/* Not the first event on the page? */
-	if (rb_event_index(event)) {
+	/* Not the first event on the page, or not delta? */
+	if (abs || rb_event_index(event)) {
 		event->time_delta = delta & TS_MASK;
 		event->array[0] = delta >> TS_SHIFT;
 	} else {
@@ -2254,7 +2328,9 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 	 * add it to the start of the resevered space.
 	 */
 	if (unlikely(info->add_timestamp)) {
-		event = rb_add_time_stamp(event, delta);
+		bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
+
+		event = rb_add_time_stamp(event, info->delta, abs);
 		length -= RB_LEN_TIME_EXTEND;
 		delta = 0;
 	}
@@ -2442,7 +2518,7 @@ static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer
 
 static inline void rb_event_discard(struct ring_buffer_event *event)
 {
-	if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
+	if (extended_time(event))
 		event = skip_time_extend(event);
 
 	/* array[0] holds the actual length for the discarded event */
@@ -2486,10 +2562,11 @@ rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 			cpu_buffer->write_stamp =
 				cpu_buffer->commit_page->page->time_stamp;
 		else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
-			delta = event->array[0];
-			delta <<= TS_SHIFT;
-			delta += event->time_delta;
+			delta = ring_buffer_event_time_stamp(event);
 			cpu_buffer->write_stamp += delta;
+		} else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) {
+			delta = ring_buffer_event_time_stamp(event);
+			cpu_buffer->write_stamp = delta;
 		} else
 			cpu_buffer->write_stamp += event->time_delta;
 	}
@@ -2581,10 +2658,10 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 		bit = pc & NMI_MASK ? RB_CTX_NMI :
 			pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
 
-	if (unlikely(val & (1 << bit)))
+	if (unlikely(val & (1 << (bit + cpu_buffer->nest))))
 		return 1;
 
-	val |= (1 << bit);
+	val |= (1 << (bit + cpu_buffer->nest));
 	cpu_buffer->current_context = val;
 
 	return 0;
@@ -2593,7 +2670,57 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 static __always_inline void
 trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-	cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+	cpu_buffer->current_context &=
+		cpu_buffer->current_context - (1 << cpu_buffer->nest);
+}
+
+/* The recursive locking above uses 4 bits */
+#define NESTED_BITS 4
+
+/**
+ * ring_buffer_nest_start - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * The ring buffer has a safty mechanism to prevent recursion.
+ * But there may be a case where a trace needs to be done while
+ * tracing something else. In this case, calling this function
+ * will allow this function to nest within a currently active
+ * ring_buffer_lock_reserve().
+ *
+ * Call this function before calling another ring_buffer_lock_reserve() and
+ * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_start(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* Enabled by ring_buffer_nest_end() */
+	preempt_disable_notrace();
+	cpu = raw_smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+	/* This is the shift value for the above recusive locking */
+	cpu_buffer->nest += NESTED_BITS;
+}
+
+/**
+ * ring_buffer_nest_end - Allow to trace while nested
+ * @buffer: The ring buffer to modify
+ *
+ * Must be called after ring_buffer_nest_start() and after the
+ * ring_buffer_unlock_commit().
+ */
+void ring_buffer_nest_end(struct ring_buffer *buffer)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	int cpu;
+
+	/* disabled by ring_buffer_nest_start() */
+	cpu = raw_smp_processor_id();
+	cpu_buffer = buffer->buffers[cpu];
+	/* This is the shift value for the above recusive locking */
+	cpu_buffer->nest -= NESTED_BITS;
+	preempt_enable_notrace();
 }
 
 /**
@@ -2637,7 +2764,8 @@ rb_handle_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
 		  sched_clock_stable() ? "" :
 		  "If you just came from a suspend/resume,\n"
 		  "please switch to the trace global clock:\n"
-		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n");
+		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n"
+		  "or add trace_clock=global to the kernel command line\n");
 	info->add_timestamp = 1;
 }
 
@@ -2669,7 +2797,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	 * If this is the first commit on the page, then it has the same
 	 * timestamp as the page itself.
 	 */
-	if (!tail)
+	if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer))
 		info->delta = 0;
 
 	/* See if we shot pass the end of this buffer page */
@@ -2746,8 +2874,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 	/* make sure this diff is calculated here */
 	barrier();
 
-	/* Did the write stamp get updated already? */
-	if (likely(info.ts >= cpu_buffer->write_stamp)) {
+	if (ring_buffer_time_stamp_abs(buffer)) {
+		info.delta = info.ts;
+		rb_handle_timestamp(cpu_buffer, &info);
+	} else /* Did the write stamp get updated already? */
+		if (likely(info.ts >= cpu_buffer->write_stamp)) {
 		info.delta = diff;
 		if (unlikely(test_time_stamp(info.delta)))
 			rb_handle_timestamp(cpu_buffer, &info);
@@ -3429,14 +3560,13 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 		return;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
-		delta = event->array[0];
-		delta <<= TS_SHIFT;
-		delta += event->time_delta;
+		delta = ring_buffer_event_time_stamp(event);
 		cpu_buffer->read_stamp += delta;
 		return;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		delta = ring_buffer_event_time_stamp(event);
+		cpu_buffer->read_stamp = delta;
 		return;
 
 	case RINGBUF_TYPE_DATA:
@@ -3460,14 +3590,13 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 		return;
 
 	case RINGBUF_TYPE_TIME_EXTEND:
-		delta = event->array[0];
-		delta <<= TS_SHIFT;
-		delta += event->time_delta;
+		delta = ring_buffer_event_time_stamp(event);
 		iter->read_stamp += delta;
 		return;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		delta = ring_buffer_event_time_stamp(event);
+		iter->read_stamp = delta;
 		return;
 
 	case RINGBUF_TYPE_DATA:
@@ -3691,6 +3820,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
 	struct buffer_page *reader;
 	int nr_loops = 0;
 
+	if (ts)
+		*ts = 0;
  again:
 	/*
 	 * We repeat when a time extend is encountered.
@@ -3727,12 +3858,17 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
 		goto again;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		if (ts) {
+			*ts = ring_buffer_event_time_stamp(event);
+			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+							 cpu_buffer->cpu, ts);
+		}
+		/* Internal data, OK to advance */
 		rb_advance_reader(cpu_buffer);
 		goto again;
 
 	case RINGBUF_TYPE_DATA:
-		if (ts) {
+		if (ts && !(*ts)) {
 			*ts = cpu_buffer->read_stamp + event->time_delta;
 			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
 							 cpu_buffer->cpu, ts);
@@ -3757,6 +3893,9 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 	struct ring_buffer_event *event;
 	int nr_loops = 0;
 
+	if (ts)
+		*ts = 0;
+
 	cpu_buffer = iter->cpu_buffer;
 	buffer = cpu_buffer->buffer;
 
@@ -3809,12 +3948,17 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 		goto again;
 
 	case RINGBUF_TYPE_TIME_STAMP:
-		/* FIXME: not implemented */
+		if (ts) {
+			*ts = ring_buffer_event_time_stamp(event);
+			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
+							 cpu_buffer->cpu, ts);
+		}
+		/* Internal data, OK to advance */
 		rb_advance_iter(iter);
 		goto again;
 
 	case RINGBUF_TYPE_DATA:
-		if (ts) {
+		if (ts && !(*ts)) {
 			*ts = iter->read_stamp + event->time_delta;
 			ring_buffer_normalize_time_stamp(buffer,
 							 cpu_buffer->cpu, ts);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5071931eb943..dfbcf9ee1447 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -41,6 +41,7 @@
 #include <linux/nmi.h>
 #include <linux/fs.h>
 #include <linux/trace.h>
+#include <linux/sched/clock.h>
 #include <linux/sched/rt.h>
 
 #include "trace.h"
@@ -1168,6 +1169,14 @@ static struct {
 	ARCH_TRACE_CLOCKS
 };
 
+bool trace_clock_in_ns(struct trace_array *tr)
+{
+	if (trace_clocks[tr->clock_id].in_ns)
+		return true;
+
+	return false;
+}
+
 /*
  * trace_parser_get_init - gets the buffer for trace parser
  */
@@ -2269,7 +2278,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
 
 	*current_rb = trace_file->tr->trace_buffer.buffer;
 
-	if ((trace_file->flags &
+	if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
 	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
 	    (entry = this_cpu_read(trace_buffered_event))) {
 		/* Try to use the per cpu buffer first */
@@ -4515,6 +4524,9 @@ static const char readme_msg[] =
 #ifdef CONFIG_X86_64
 	"     x86-tsc:   TSC cycle counter\n"
 #endif
+	"\n  timestamp_mode\t-view the mode used to timestamp events\n"
+	"       delta:   Delta difference against a buffer-wide timestamp\n"
+	"    absolute:   Absolute (standalone) timestamp\n"
 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
@@ -4691,8 +4703,9 @@ static const char readme_msg[] =
 	"\t            .sym        display an address as a symbol\n"
 	"\t            .sym-offset display an address as a symbol and offset\n"
 	"\t            .execname   display a common_pid as a program name\n"
-	"\t            .syscall    display a syscall id as a syscall name\n\n"
-	"\t            .log2       display log2 value rather than raw number\n\n"
+	"\t            .syscall    display a syscall id as a syscall name\n"
+	"\t            .log2       display log2 value rather than raw number\n"
+	"\t            .usecs      display a common_timestamp in microseconds\n\n"
 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
 	"\t    trigger or to start a hist trigger but not log any events\n"
 	"\t    until told to do so.  'continue' can be used to start or\n"
@@ -6202,7 +6215,7 @@ static int tracing_clock_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
+int tracing_set_clock(struct trace_array *tr, const char *clockstr)
 {
 	int i;
 
@@ -6282,6 +6295,71 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
+{
+	struct trace_array *tr = m->private;
+
+	mutex_lock(&trace_types_lock);
+
+	if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
+		seq_puts(m, "delta [absolute]\n");
+	else
+		seq_puts(m, "[delta] absolute\n");
+
+	mutex_unlock(&trace_types_lock);
+
+	return 0;
+}
+
+static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
+{
+	struct trace_array *tr = inode->i_private;
+	int ret;
+
+	if (tracing_disabled)
+		return -ENODEV;
+
+	if (trace_array_get(tr))
+		return -ENODEV;
+
+	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
+	if (ret < 0)
+		trace_array_put(tr);
+
+	return ret;
+}
+
+int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+{
+	int ret = 0;
+
+	mutex_lock(&trace_types_lock);
+
+	if (abs && tr->time_stamp_abs_ref++)
+		goto out;
+
+	if (!abs) {
+		if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (--tr->time_stamp_abs_ref)
+			goto out;
+	}
+
+	ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+	if (tr->max_buffer.buffer)
+		ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
+#endif
+ out:
+	mutex_unlock(&trace_types_lock);
+
+	return ret;
+}
+
 struct ftrace_buffer_info {
 	struct trace_iterator	iter;
 	void			*spare;
@@ -6529,6 +6607,13 @@ static const struct file_operations trace_clock_fops = {
 	.write		= tracing_clock_write,
 };
 
+static const struct file_operations trace_time_stamp_mode_fops = {
+	.open		= tracing_time_stamp_mode_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= tracing_single_release_tr,
+};
+
 #ifdef CONFIG_TRACER_SNAPSHOT
 static const struct file_operations snapshot_fops = {
 	.open		= tracing_snapshot_open,
@@ -7699,6 +7784,7 @@ static int instance_mkdir(const char *name)
 
 	INIT_LIST_HEAD(&tr->systems);
 	INIT_LIST_HEAD(&tr->events);
+	INIT_LIST_HEAD(&tr->hist_vars);
 
 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
 		goto out_free_tr;
@@ -7851,6 +7937,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 	trace_create_file("tracing_on", 0644, d_tracer,
 			  tr, &rb_simple_fops);
 
+	trace_create_file("timestamp_mode", 0444, d_tracer, tr,
+			  &trace_time_stamp_mode_fops);
+
 	create_trace_options_dir(tr);
 
 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
@@ -8446,6 +8535,7 @@ __init static int tracer_alloc_buffers(void)
 
 	INIT_LIST_HEAD(&global_trace.systems);
 	INIT_LIST_HEAD(&global_trace.events);
+	INIT_LIST_HEAD(&global_trace.hist_vars);
 	list_add(&global_trace.list, &ftrace_trace_arrays);
 
 	apply_trace_boot_options();
@@ -8507,3 +8597,21 @@ __init static int clear_boot_tracer(void)
 
 fs_initcall(tracer_init_tracefs);
 late_initcall_sync(clear_boot_tracer);
+
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__init static int tracing_set_default_clock(void)
+{
+	/* sched_clock_stable() is determined in late_initcall */
+	if (!trace_boot_clock && !sched_clock_stable()) {
+		printk(KERN_WARNING
+		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
+		       "If you want to keep using the local clock, then add:\n"
+		       "  \"trace_clock=local\"\n"
+		       "on the kernel command line\n");
+		tracing_set_clock(&global_trace, "global");
+	}
+
+	return 0;
+}
+late_initcall_sync(tracing_set_default_clock);
+#endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2a6d0325a761..6fb46a06c9dc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -273,6 +273,8 @@ struct trace_array {
 	/* function tracing enabled */
 	int			function_enabled;
 #endif
+	int			time_stamp_abs_ref;
+	struct list_head	hist_vars;
 };
 
 enum {
@@ -286,6 +288,11 @@ extern struct mutex trace_types_lock;
 extern int trace_array_get(struct trace_array *tr);
 extern void trace_array_put(struct trace_array *tr);
 
+extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
+extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
+
+extern bool trace_clock_in_ns(struct trace_array *tr);
+
 /*
  * The global tracer (top) should be the first trace array added,
  * but we check the flag anyway.
@@ -1209,12 +1216,11 @@ struct ftrace_event_field {
 	int			is_signed;
 };
 
+struct prog_entry;
+
 struct event_filter {
-	int			n_preds;	/* Number assigned */
-	int			a_preds;	/* allocated */
-	struct filter_pred __rcu	*preds;
-	struct filter_pred __rcu	*root;
-	char				*filter_string;
+	struct prog_entry __rcu	*prog;
+	char			*filter_string;
 };
 
 struct event_subsystem {
@@ -1291,7 +1297,7 @@ __event_trigger_test_discard(struct trace_event_file *file,
 	unsigned long eflags = file->flags;
 
 	if (eflags & EVENT_FILE_FL_TRIGGER_COND)
-		*tt = event_triggers_call(file, entry);
+		*tt = event_triggers_call(file, entry, event);
 
 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
@@ -1328,7 +1334,7 @@ event_trigger_unlock_commit(struct trace_event_file *file,
 		trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc);
 
 	if (tt)
-		event_triggers_post_call(file, tt, entry);
+		event_triggers_post_call(file, tt, entry, event);
 }
 
 /**
@@ -1361,7 +1367,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file,
 						irq_flags, pc, regs);
 
 	if (tt)
-		event_triggers_post_call(file, tt, entry);
+		event_triggers_post_call(file, tt, entry, event);
 }
 
 #define FILTER_PRED_INVALID	((unsigned short)-1)
@@ -1406,12 +1412,8 @@ struct filter_pred {
 	unsigned short		*ops;
 	struct ftrace_event_field *field;
 	int 			offset;
-	int 			not;
+	int			not;
 	int 			op;
-	unsigned short		index;
-	unsigned short		parent;
-	unsigned short		left;
-	unsigned short		right;
 };
 
 static inline bool is_string_field(struct ftrace_event_field *field)
@@ -1543,6 +1545,8 @@ extern void pause_named_trigger(struct event_trigger_data *data);
 extern void unpause_named_trigger(struct event_trigger_data *data);
 extern void set_named_trigger_data(struct event_trigger_data *data,
 				   struct event_trigger_data *named_data);
+extern struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data);
 extern int register_event_command(struct event_command *cmd);
 extern int unregister_event_command(struct event_command *cmd);
 extern int register_trigger_hist_enable_disable_cmds(void);
@@ -1586,7 +1590,8 @@ extern int register_trigger_hist_enable_disable_cmds(void);
  */
 struct event_trigger_ops {
 	void			(*func)(struct event_trigger_data *data,
-					void *rec);
+					void *rec,
+					struct ring_buffer_event *rbe);
 	int			(*init)(struct event_trigger_ops *ops,
 					struct event_trigger_data *data);
 	void			(*free)(struct event_trigger_ops *ops,
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 5fdc779f411d..d8a188e0418a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -96,7 +96,7 @@ u64 notrace trace_clock_global(void)
 	int this_cpu;
 	u64 now;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 
 	this_cpu = raw_smp_processor_id();
 	now = sched_clock_cpu(this_cpu);
@@ -122,7 +122,7 @@ u64 notrace trace_clock_global(void)
 	arch_spin_unlock(&trace_clock_struct.lock);
 
  out:
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	return now;
 }
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index a764aec3c9a1..1bda4ec95e18 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -33,163 +33,595 @@
 	"# Only events with the given fields will be affected.\n"	\
 	"# If no events are modified, an error message will be displayed here"
 
-enum filter_op_ids
-{
-	OP_OR,
-	OP_AND,
-	OP_GLOB,
-	OP_NE,
-	OP_EQ,
-	OP_LT,
-	OP_LE,
-	OP_GT,
-	OP_GE,
-	OP_BAND,
-	OP_NOT,
-	OP_NONE,
-	OP_OPEN_PAREN,
-};
+/* Due to token parsing '<=' must be before '<' and '>=' must be before '>' */
+#define OPS					\
+	C( OP_GLOB,	"~"  ),			\
+	C( OP_NE,	"!=" ),			\
+	C( OP_EQ,	"==" ),			\
+	C( OP_LE,	"<=" ),			\
+	C( OP_LT,	"<"  ),			\
+	C( OP_GE,	">=" ),			\
+	C( OP_GT,	">"  ),			\
+	C( OP_BAND,	"&"  ),			\
+	C( OP_MAX,	NULL )
 
-struct filter_op {
-	int id;
-	char *string;
-	int precedence;
-};
+#undef C
+#define C(a, b)	a
 
-/* Order must be the same as enum filter_op_ids above */
-static struct filter_op filter_ops[] = {
-	{ OP_OR,	"||",		1 },
-	{ OP_AND,	"&&",		2 },
-	{ OP_GLOB,	"~",		4 },
-	{ OP_NE,	"!=",		4 },
-	{ OP_EQ,	"==",		4 },
-	{ OP_LT,	"<",		5 },
-	{ OP_LE,	"<=",		5 },
-	{ OP_GT,	">",		5 },
-	{ OP_GE,	">=",		5 },
-	{ OP_BAND,	"&",		6 },
-	{ OP_NOT,	"!",		6 },
-	{ OP_NONE,	"OP_NONE",	0 },
-	{ OP_OPEN_PAREN, "(",		0 },
-};
+enum filter_op_ids { OPS };
 
-enum {
-	FILT_ERR_NONE,
-	FILT_ERR_INVALID_OP,
-	FILT_ERR_UNBALANCED_PAREN,
-	FILT_ERR_TOO_MANY_OPERANDS,
-	FILT_ERR_OPERAND_TOO_LONG,
-	FILT_ERR_FIELD_NOT_FOUND,
-	FILT_ERR_ILLEGAL_FIELD_OP,
-	FILT_ERR_ILLEGAL_INTVAL,
-	FILT_ERR_BAD_SUBSYS_FILTER,
-	FILT_ERR_TOO_MANY_PREDS,
-	FILT_ERR_MISSING_FIELD,
-	FILT_ERR_INVALID_FILTER,
-	FILT_ERR_IP_FIELD_ONLY,
-	FILT_ERR_ILLEGAL_NOT_OP,
-};
+#undef C
+#define C(a, b)	b
 
-static char *err_text[] = {
-	"No error",
-	"Invalid operator",
-	"Unbalanced parens",
-	"Too many operands",
-	"Operand too long",
-	"Field not found",
-	"Illegal operation for field type",
-	"Illegal integer value",
-	"Couldn't find or set field in one of a subsystem's events",
-	"Too many terms in predicate expression",
-	"Missing field name and/or value",
-	"Meaningless filter expression",
-	"Only 'ip' field is supported for function trace",
-	"Illegal use of '!'",
-};
+static const char * ops[] = { OPS };
 
-struct opstack_op {
-	enum filter_op_ids op;
-	struct list_head list;
-};
+/*
+ * pred functions are OP_LE, OP_LT, OP_GE, OP_GT, and OP_BAND
+ * pred_funcs_##type below must match the order of them above.
+ */
+#define PRED_FUNC_START			OP_LE
+#define PRED_FUNC_MAX			(OP_BAND - PRED_FUNC_START)
+
+#define ERRORS								\
+	C(NONE,			"No error"),				\
+	C(INVALID_OP,		"Invalid operator"),			\
+	C(TOO_MANY_OPEN,	"Too many '('"),			\
+	C(TOO_MANY_CLOSE,	"Too few '('"),				\
+	C(MISSING_QUOTE,	"Missing matching quote"),		\
+	C(OPERAND_TOO_LONG,	"Operand too long"),			\
+	C(EXPECT_STRING,	"Expecting string field"),		\
+	C(EXPECT_DIGIT,		"Expecting numeric field"),		\
+	C(ILLEGAL_FIELD_OP,	"Illegal operation for field type"),	\
+	C(FIELD_NOT_FOUND,	"Field not found"),			\
+	C(ILLEGAL_INTVAL,	"Illegal integer value"),		\
+	C(BAD_SUBSYS_FILTER,	"Couldn't find or set field in one of a subsystem's events"), \
+	C(TOO_MANY_PREDS,	"Too many terms in predicate expression"), \
+	C(INVALID_FILTER,	"Meaningless filter expression"),	\
+	C(IP_FIELD_ONLY,	"Only 'ip' field is supported for function trace"), \
+	C(INVALID_VALUE,	"Invalid value (did you forget quotes)?"),
+
+#undef C
+#define C(a, b)		FILT_ERR_##a
+
+enum { ERRORS };
+
+#undef C
+#define C(a, b)		b
+
+static char *err_text[] = { ERRORS };
+
+/* Called after a '!' character but "!=" and "!~" are not "not"s */
+static bool is_not(const char *str)
+{
+	switch (str[1]) {
+	case '=':
+	case '~':
+		return false;
+	}
+	return true;
+}
 
-struct postfix_elt {
-	enum filter_op_ids op;
-	char *operand;
-	struct list_head list;
+/**
+ * prog_entry - a singe entry in the filter program
+ * @target:	     Index to jump to on a branch (actually one minus the index)
+ * @when_to_branch:  The value of the result of the predicate to do a branch
+ * @pred:	     The predicate to execute.
+ */
+struct prog_entry {
+	int			target;
+	int			when_to_branch;
+	struct filter_pred	*pred;
 };
 
-struct filter_parse_state {
-	struct filter_op *ops;
-	struct list_head opstack;
-	struct list_head postfix;
+/**
+ * update_preds- assign a program entry a label target
+ * @prog: The program array
+ * @N: The index of the current entry in @prog
+ * @when_to_branch: What to assign a program entry for its branch condition
+ *
+ * The program entry at @N has a target that points to the index of a program
+ * entry that can have its target and when_to_branch fields updated.
+ * Update the current program entry denoted by index @N target field to be
+ * that of the updated entry. This will denote the entry to update if
+ * we are processing an "||" after an "&&"
+ */
+static void update_preds(struct prog_entry *prog, int N, int invert)
+{
+	int t, s;
+
+	t = prog[N].target;
+	s = prog[t].target;
+	prog[t].when_to_branch = invert;
+	prog[t].target = N;
+	prog[N].target = s;
+}
+
+struct filter_parse_error {
 	int lasterr;
 	int lasterr_pos;
-
-	struct {
-		char *string;
-		unsigned int cnt;
-		unsigned int tail;
-	} infix;
-
-	struct {
-		char string[MAX_FILTER_STR_VAL];
-		int pos;
-		unsigned int tail;
-	} operand;
 };
 
-struct pred_stack {
-	struct filter_pred	**preds;
-	int			index;
+static void parse_error(struct filter_parse_error *pe, int err, int pos)
+{
+	pe->lasterr = err;
+	pe->lasterr_pos = pos;
+}
+
+typedef int (*parse_pred_fn)(const char *str, void *data, int pos,
+			     struct filter_parse_error *pe,
+			     struct filter_pred **pred);
+
+enum {
+	INVERT		= 1,
+	PROCESS_AND	= 2,
+	PROCESS_OR	= 4,
 };
 
-/* If not of not match is equal to not of not, then it is a match */
+/*
+ * Without going into a formal proof, this explains the method that is used in
+ * parsing the logical expressions.
+ *
+ * For example, if we have: "a && !(!b || (c && g)) || d || e && !f"
+ * The first pass will convert it into the following program:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto l4;
+ * n4: r=g; r=!r; l4: if (r) goto l5;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto l7;
+ * n7: r=f; r=!r; l7: if (!r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * To do this, we use a data structure to represent each of the above
+ * predicate and conditions that has:
+ *
+ *  predicate, when_to_branch, invert, target
+ *
+ * The "predicate" will hold the function to determine the result "r".
+ * The "when_to_branch" denotes what "r" should be if a branch is to be taken
+ * "&&" would contain "!r" or (0) and "||" would contain "r" or (1).
+ * The "invert" holds whether the value should be reversed before testing.
+ * The "target" contains the label "l#" to jump to.
+ *
+ * A stack is created to hold values when parentheses are used.
+ *
+ * To simplify the logic, the labels will start at 0 and not 1.
+ *
+ * The possible invert values are 1 and 0. The number of "!"s that are in scope
+ * before the predicate determines the invert value, if the number is odd then
+ * the invert value is 1 and 0 otherwise. This means the invert value only
+ * needs to be toggled when a new "!" is introduced compared to what is stored
+ * on the stack, where parentheses were used.
+ *
+ * The top of the stack and "invert" are initialized to zero.
+ *
+ * ** FIRST PASS **
+ *
+ * #1 A loop through all the tokens is done:
+ *
+ * #2 If the token is an "(", the stack is push, and the current stack value
+ *    gets the current invert value, and the loop continues to the next token.
+ *    The top of the stack saves the "invert" value to keep track of what
+ *    the current inversion is. As "!(a && !b || c)" would require all
+ *    predicates being affected separately by the "!" before the parentheses.
+ *    And that would end up being equivalent to "(!a || b) && !c"
+ *
+ * #3 If the token is an "!", the current "invert" value gets inverted, and
+ *    the loop continues. Note, if the next token is a predicate, then
+ *    this "invert" value is only valid for the current program entry,
+ *    and does not affect other predicates later on.
+ *
+ * The only other acceptable token is the predicate string.
+ *
+ * #4 A new entry into the program is added saving: the predicate and the
+ *    current value of "invert". The target is currently assigned to the
+ *    previous program index (this will not be its final value).
+ *
+ * #5 We now enter another loop and look at the next token. The only valid
+ *    tokens are ")", "&&", "||" or end of the input string "\0".
+ *
+ * #6 The invert variable is reset to the current value saved on the top of
+ *    the stack.
+ *
+ * #7 The top of the stack holds not only the current invert value, but also
+ *    if a "&&" or "||" needs to be processed. Note, the "&&" takes higher
+ *    precedence than "||". That is "a && b || c && d" is equivalent to
+ *    "(a && b) || (c && d)". Thus the first thing to do is to see if "&&" needs
+ *    to be processed. This is the case if an "&&" was the last token. If it was
+ *    then we call update_preds(). This takes the program, the current index in
+ *    the program, and the current value of "invert".  More will be described
+ *    below about this function.
+ *
+ * #8 If the next token is "&&" then we set a flag in the top of the stack
+ *    that denotes that "&&" needs to be processed, break out of this loop
+ *    and continue with the outer loop.
+ *
+ * #9 Otherwise, if a "||" needs to be processed then update_preds() is called.
+ *    This is called with the program, the current index in the program, but
+ *    this time with an inverted value of "invert" (that is !invert). This is
+ *    because the value taken will become the "when_to_branch" value of the
+ *    program.
+ *    Note, this is called when the next token is not an "&&". As stated before,
+ *    "&&" takes higher precedence, and "||" should not be processed yet if the
+ *    next logical operation is "&&".
+ *
+ * #10 If the next token is "||" then we set a flag in the top of the stack
+ *     that denotes that "||" needs to be processed, break out of this loop
+ *     and continue with the outer loop.
+ *
+ * #11 If this is the end of the input string "\0" then we break out of both
+ *     loops.
+ *
+ * #12 Otherwise, the next token is ")", where we pop the stack and continue
+ *     this inner loop.
+ *
+ * Now to discuss the update_pred() function, as that is key to the setting up
+ * of the program. Remember the "target" of the program is initialized to the
+ * previous index and not the "l" label. The target holds the index into the
+ * program that gets affected by the operand. Thus if we have something like
+ *  "a || b && c", when we process "a" the target will be "-1" (undefined).
+ * When we process "b", its target is "0", which is the index of "a", as that's
+ * the predicate that is affected by "||". But because the next token after "b"
+ * is "&&" we don't call update_preds(). Instead continue to "c". As the
+ * next token after "c" is not "&&" but the end of input, we first process the
+ * "&&" by calling update_preds() for the "&&" then we process the "||" by
+ * callin updates_preds() with the values for processing "||".
+ *
+ * What does that mean? What update_preds() does is to first save the "target"
+ * of the program entry indexed by the current program entry's "target"
+ * (remember the "target" is initialized to previous program entry), and then
+ * sets that "target" to the current index which represents the label "l#".
+ * That entry's "when_to_branch" is set to the value passed in (the "invert"
+ * or "!invert"). Then it sets the current program entry's target to the saved
+ * "target" value (the old value of the program that had its "target" updated
+ * to the label).
+ *
+ * Looking back at "a || b && c", we have the following steps:
+ *  "a"  - prog[0] = { "a", X, -1 } // pred, when_to_branch, target
+ *  "||" - flag that we need to process "||"; continue outer loop
+ *  "b"  - prog[1] = { "b", X, 0 }
+ *  "&&" - flag that we need to process "&&"; continue outer loop
+ * (Notice we did not process "||")
+ *  "c"  - prog[2] = { "c", X, 1 }
+ *  update_preds(prog, 2, 0); // invert = 0 as we are processing "&&"
+ *    t = prog[2].target; // t = 1
+ *    s = prog[t].target; // s = 0
+ *    prog[t].target = 2; // Set target to "l2"
+ *    prog[t].when_to_branch = 0;
+ *    prog[2].target = s;
+ * update_preds(prog, 2, 1); // invert = 1 as we are now processing "||"
+ *    t = prog[2].target; // t = 0
+ *    s = prog[t].target; // s = -1
+ *    prog[t].target = 2; // Set target to "l2"
+ *    prog[t].when_to_branch = 1;
+ *    prog[2].target = s;
+ *
+ * #13 Which brings us to the final step of the first pass, which is to set
+ *     the last program entry's when_to_branch and target, which will be
+ *     when_to_branch = 0; target = N; ( the label after the program entry after
+ *     the last program entry processed above).
+ *
+ * If we denote "TRUE" to be the entry after the last program entry processed,
+ * and "FALSE" the program entry after that, we are now done with the first
+ * pass.
+ *
+ * Making the above "a || b && c" have a progam of:
+ *  prog[0] = { "a", 1, 2 }
+ *  prog[1] = { "b", 0, 2 }
+ *  prog[2] = { "c", 0, 3 }
+ *
+ * Which translates into:
+ * n0: r = a; l0: if (r) goto l2;
+ * n1: r = b; l1: if (!r) goto l2;
+ * n2: r = c; l2: if (!r) goto l3;  // Which is the same as "goto F;"
+ * T: return TRUE; l3:
+ * F: return FALSE
+ *
+ * Although, after the first pass, the program is correct, it is
+ * inefficient. The simple sample of "a || b && c" could be easily been
+ * converted into:
+ * n0: r = a; if (r) goto T
+ * n1: r = b; if (!r) goto F
+ * n2: r = c; if (!r) goto F
+ * T: return TRUE;
+ * F: return FALSE;
+ *
+ * The First Pass is over the input string. The next too passes are over
+ * the program itself.
+ *
+ * ** SECOND PASS **
+ *
+ * Which brings us to the second pass. If a jump to a label has the
+ * same condition as that label, it can instead jump to its target.
+ * The original example of "a && !(!b || (c && g)) || d || e && !f"
+ * where the first pass gives us:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto l4;
+ * n4: r=g; r=!r; l4: if (r) goto l5;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto l7;
+ * n7: r=f; r=!r; l7: if (!r) goto F:
+ * T: return TRUE;
+ * F: return FALSE
+ *
+ * We can see that "l3: if (r) goto l4;" and at l4, we have "if (r) goto l5;".
+ * And "l5: if (r) goto T", we could optimize this by converting l3 and l4
+ * to go directly to T. To accomplish this, we start from the last
+ * entry in the program and work our way back. If the target of the entry
+ * has the same "when_to_branch" then we could use that entry's target.
+ * Doing this, the above would end up as:
+ *
+ * n1: r=a;       l1: if (!r) goto l4;
+ * n2: r=b;       l2: if (!r) goto l4;
+ * n3: r=c; r=!r; l3: if (r) goto T;
+ * n4: r=g; r=!r; l4: if (r) goto T;
+ * n5: r=d;       l5: if (r) goto T;
+ * n6: r=e;       l6: if (!r) goto F;
+ * n7: r=f; r=!r; l7: if (!r) goto F;
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * In that same pass, if the "when_to_branch" doesn't match, we can simply
+ * go to the program entry after the label. That is, "l2: if (!r) goto l4;"
+ * where "l4: if (r) goto T;", then we can convert l2 to be:
+ * "l2: if (!r) goto n5;".
+ *
+ * This will have the second pass give us:
+ * n1: r=a;       l1: if (!r) goto n5;
+ * n2: r=b;       l2: if (!r) goto n5;
+ * n3: r=c; r=!r; l3: if (r) goto T;
+ * n4: r=g; r=!r; l4: if (r) goto T;
+ * n5: r=d;       l5: if (r) goto T
+ * n6: r=e;       l6: if (!r) goto F;
+ * n7: r=f; r=!r; l7: if (!r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * Notice, all the "l#" labels are no longer used, and they can now
+ * be discarded.
+ *
+ * ** THIRD PASS **
+ *
+ * For the third pass we deal with the inverts. As they simply just
+ * make the "when_to_branch" get inverted, a simple loop over the
+ * program to that does: "when_to_branch ^= invert;" will do the
+ * job, leaving us with:
+ * n1: r=a; if (!r) goto n5;
+ * n2: r=b; if (!r) goto n5;
+ * n3: r=c: if (!r) goto T;
+ * n4: r=g; if (!r) goto T;
+ * n5: r=d; if (r) goto T
+ * n6: r=e; if (!r) goto F;
+ * n7: r=f; if (r) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * As "r = a; if (!r) goto n5;" is obviously the same as
+ * "if (!a) goto n5;" without doing anything we can interperate the
+ * program as:
+ * n1: if (!a) goto n5;
+ * n2: if (!b) goto n5;
+ * n3: if (!c) goto T;
+ * n4: if (!g) goto T;
+ * n5: if (d) goto T
+ * n6: if (!e) goto F;
+ * n7: if (f) goto F
+ * T: return TRUE
+ * F: return FALSE
+ *
+ * Since the inverts are discarded at the end, there's no reason to store
+ * them in the program array (and waste memory). A separate array to hold
+ * the inverts is used and freed at the end.
+ */
+static struct prog_entry *
+predicate_parse(const char *str, int nr_parens, int nr_preds,
+		parse_pred_fn parse_pred, void *data,
+		struct filter_parse_error *pe)
+{
+	struct prog_entry *prog_stack;
+	struct prog_entry *prog;
+	const char *ptr = str;
+	char *inverts = NULL;
+	int *op_stack;
+	int *top;
+	int invert = 0;
+	int ret = -ENOMEM;
+	int len;
+	int N = 0;
+	int i;
+
+	nr_preds += 2; /* For TRUE and FALSE */
+
+	op_stack = kmalloc(sizeof(*op_stack) * nr_parens, GFP_KERNEL);
+	if (!op_stack)
+		return ERR_PTR(-ENOMEM);
+	prog_stack = kmalloc(sizeof(*prog_stack) * nr_preds, GFP_KERNEL);
+	if (!prog_stack) {
+		parse_error(pe, -ENOMEM, 0);
+		goto out_free;
+	}
+	inverts = kmalloc(sizeof(*inverts) * nr_preds, GFP_KERNEL);
+	if (!inverts) {
+		parse_error(pe, -ENOMEM, 0);
+		goto out_free;
+	}
+
+	top = op_stack;
+	prog = prog_stack;
+	*top = 0;
+
+	/* First pass */
+	while (*ptr) {						/* #1 */
+		const char *next = ptr++;
+
+		if (isspace(*next))
+			continue;
+
+		switch (*next) {
+		case '(':					/* #2 */
+			if (top - op_stack > nr_parens)
+				return ERR_PTR(-EINVAL);
+			*(++top) = invert;
+			continue;
+		case '!':					/* #3 */
+			if (!is_not(next))
+				break;
+			invert = !invert;
+			continue;
+		}
+
+		if (N >= nr_preds) {
+			parse_error(pe, FILT_ERR_TOO_MANY_PREDS, next - str);
+			goto out_free;
+		}
+
+		inverts[N] = invert;				/* #4 */
+		prog[N].target = N-1;
+
+		len = parse_pred(next, data, ptr - str, pe, &prog[N].pred);
+		if (len < 0) {
+			ret = len;
+			goto out_free;
+		}
+		ptr = next + len;
+
+		N++;
+
+		ret = -1;
+		while (1) {					/* #5 */
+			next = ptr++;
+			if (isspace(*next))
+				continue;
+
+			switch (*next) {
+			case ')':
+			case '\0':
+				break;
+			case '&':
+			case '|':
+				if (next[1] == next[0]) {
+					ptr++;
+					break;
+				}
+			default:
+				parse_error(pe, FILT_ERR_TOO_MANY_PREDS,
+					    next - str);
+				goto out_free;
+			}
+
+			invert = *top & INVERT;
+
+			if (*top & PROCESS_AND) {		/* #7 */
+				update_preds(prog, N - 1, invert);
+				*top &= ~PROCESS_AND;
+			}
+			if (*next == '&') {			/* #8 */
+				*top |= PROCESS_AND;
+				break;
+			}
+			if (*top & PROCESS_OR) {		/* #9 */
+				update_preds(prog, N - 1, !invert);
+				*top &= ~PROCESS_OR;
+			}
+			if (*next == '|') {			/* #10 */
+				*top |= PROCESS_OR;
+				break;
+			}
+			if (!*next)				/* #11 */
+				goto out;
+
+			if (top == op_stack) {
+				ret = -1;
+				/* Too few '(' */
+				parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, ptr - str);
+				goto out_free;
+			}
+			top--;					/* #12 */
+		}
+	}
+ out:
+	if (top != op_stack) {
+		/* Too many '(' */
+		parse_error(pe, FILT_ERR_TOO_MANY_OPEN, ptr - str);
+		goto out_free;
+	}
+
+	prog[N].pred = NULL;					/* #13 */
+	prog[N].target = 1;		/* TRUE */
+	prog[N+1].pred = NULL;
+	prog[N+1].target = 0;		/* FALSE */
+	prog[N-1].target = N;
+	prog[N-1].when_to_branch = false;
+
+	/* Second Pass */
+	for (i = N-1 ; i--; ) {
+		int target = prog[i].target;
+		if (prog[i].when_to_branch == prog[target].when_to_branch)
+			prog[i].target = prog[target].target;
+	}
+
+	/* Third Pass */
+	for (i = 0; i < N; i++) {
+		invert = inverts[i] ^ prog[i].when_to_branch;
+		prog[i].when_to_branch = invert;
+		/* Make sure the program always moves forward */
+		if (WARN_ON(prog[i].target <= i)) {
+			ret = -EINVAL;
+			goto out_free;
+		}
+	}
+
+	return prog;
+out_free:
+	kfree(op_stack);
+	kfree(prog_stack);
+	kfree(inverts);
+	return ERR_PTR(ret);
+}
+
 #define DEFINE_COMPARISON_PRED(type)					\
 static int filter_pred_LT_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr < val);					\
-	return !!match == !pred->not;					\
+	return *addr < val;						\
 }									\
 static int filter_pred_LE_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr <= val);					\
-	return !!match == !pred->not;					\
+	return *addr <= val;						\
 }									\
 static int filter_pred_GT_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr > val);					\
-	return !!match == !pred->not;					\
+	return *addr > val;					\
 }									\
 static int filter_pred_GE_##type(struct filter_pred *pred, void *event)	\
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = (*addr >= val);					\
-	return !!match == !pred->not;					\
+	return *addr >= val;						\
 }									\
 static int filter_pred_BAND_##type(struct filter_pred *pred, void *event) \
 {									\
 	type *addr = (type *)(event + pred->offset);			\
 	type val = (type)pred->val;					\
-	int match = !!(*addr & val);					\
-	return match == !pred->not;					\
+	return !!(*addr & val);						\
 }									\
 static const filter_pred_fn_t pred_funcs_##type[] = {			\
-	filter_pred_LT_##type,						\
 	filter_pred_LE_##type,						\
-	filter_pred_GT_##type,						\
+	filter_pred_LT_##type,						\
 	filter_pred_GE_##type,						\
+	filter_pred_GT_##type,						\
 	filter_pred_BAND_##type,					\
 };
 
-#define PRED_FUNC_START			OP_LT
-
 #define DEFINE_EQUALITY_PRED(size)					\
 static int filter_pred_##size(struct filter_pred *pred, void *event)	\
 {									\
@@ -272,44 +704,36 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
 static int filter_pred_cpu(struct filter_pred *pred, void *event)
 {
 	int cpu, cmp;
-	int match = 0;
 
 	cpu = raw_smp_processor_id();
 	cmp = pred->val;
 
 	switch (pred->op) {
 	case OP_EQ:
-		match = cpu == cmp;
-		break;
+		return cpu == cmp;
+	case OP_NE:
+		return cpu != cmp;
 	case OP_LT:
-		match = cpu < cmp;
-		break;
+		return cpu < cmp;
 	case OP_LE:
-		match = cpu <= cmp;
-		break;
+		return cpu <= cmp;
 	case OP_GT:
-		match = cpu > cmp;
-		break;
+		return cpu > cmp;
 	case OP_GE:
-		match = cpu >= cmp;
-		break;
+		return cpu >= cmp;
 	default:
-		break;
+		return 0;
 	}
-
-	return !!match == !pred->not;
 }
 
 /* Filter predicate for COMM. */
 static int filter_pred_comm(struct filter_pred *pred, void *event)
 {
-	int cmp, match;
+	int cmp;
 
 	cmp = pred->regex.match(current->comm, &pred->regex,
-				pred->regex.field_len);
-	match = cmp ^ pred->not;
-
-	return match;
+				TASK_COMM_LEN);
+	return cmp ^ pred->not;
 }
 
 static int filter_pred_none(struct filter_pred *pred, void *event)
@@ -366,6 +790,7 @@ static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused)
 		return 1;
 	return 0;
 }
+
 /**
  * filter_parse_regex - parse a basic regex
  * @buff:   the raw regex
@@ -426,10 +851,9 @@ static void filter_build_regex(struct filter_pred *pred)
 	struct regex *r = &pred->regex;
 	char *search;
 	enum regex_type type = MATCH_FULL;
-	int not = 0;
 
 	if (pred->op == OP_GLOB) {
-		type = filter_parse_regex(r->pattern, r->len, &search, &not);
+		type = filter_parse_regex(r->pattern, r->len, &search, &pred->not);
 		r->len = strlen(search);
 		memmove(r->pattern, search, r->len+1);
 	}
@@ -451,210 +875,32 @@ static void filter_build_regex(struct filter_pred *pred)
 		r->match = regex_match_glob;
 		break;
 	}
-
-	pred->not ^= not;
-}
-
-enum move_type {
-	MOVE_DOWN,
-	MOVE_UP_FROM_LEFT,
-	MOVE_UP_FROM_RIGHT
-};
-
-static struct filter_pred *
-get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
-		int index, enum move_type *move)
-{
-	if (pred->parent & FILTER_PRED_IS_RIGHT)
-		*move = MOVE_UP_FROM_RIGHT;
-	else
-		*move = MOVE_UP_FROM_LEFT;
-	pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
-
-	return pred;
-}
-
-enum walk_return {
-	WALK_PRED_ABORT,
-	WALK_PRED_PARENT,
-	WALK_PRED_DEFAULT,
-};
-
-typedef int (*filter_pred_walkcb_t) (enum move_type move,
-				     struct filter_pred *pred,
-				     int *err, void *data);
-
-static int walk_pred_tree(struct filter_pred *preds,
-			  struct filter_pred *root,
-			  filter_pred_walkcb_t cb, void *data)
-{
-	struct filter_pred *pred = root;
-	enum move_type move = MOVE_DOWN;
-	int done = 0;
-
-	if  (!preds)
-		return -EINVAL;
-
-	do {
-		int err = 0, ret;
-
-		ret = cb(move, pred, &err, data);
-		if (ret == WALK_PRED_ABORT)
-			return err;
-		if (ret == WALK_PRED_PARENT)
-			goto get_parent;
-
-		switch (move) {
-		case MOVE_DOWN:
-			if (pred->left != FILTER_PRED_INVALID) {
-				pred = &preds[pred->left];
-				continue;
-			}
-			goto get_parent;
-		case MOVE_UP_FROM_LEFT:
-			pred = &preds[pred->right];
-			move = MOVE_DOWN;
-			continue;
-		case MOVE_UP_FROM_RIGHT:
- get_parent:
-			if (pred == root)
-				break;
-			pred = get_pred_parent(pred, preds,
-					       pred->parent,
-					       &move);
-			continue;
-		}
-		done = 1;
-	} while (!done);
-
-	/* We are fine. */
-	return 0;
-}
-
-/*
- * A series of AND or ORs where found together. Instead of
- * climbing up and down the tree branches, an array of the
- * ops were made in order of checks. We can just move across
- * the array and short circuit if needed.
- */
-static int process_ops(struct filter_pred *preds,
-		       struct filter_pred *op, void *rec)
-{
-	struct filter_pred *pred;
-	int match = 0;
-	int type;
-	int i;
-
-	/*
-	 * Micro-optimization: We set type to true if op
-	 * is an OR and false otherwise (AND). Then we
-	 * just need to test if the match is equal to
-	 * the type, and if it is, we can short circuit the
-	 * rest of the checks:
-	 *
-	 * if ((match && op->op == OP_OR) ||
-	 *     (!match && op->op == OP_AND))
-	 *	  return match;
-	 */
-	type = op->op == OP_OR;
-
-	for (i = 0; i < op->val; i++) {
-		pred = &preds[op->ops[i]];
-		if (!WARN_ON_ONCE(!pred->fn))
-			match = pred->fn(pred, rec);
-		if (!!match == type)
-			break;
-	}
-	/* If not of not match is equal to not of not, then it is a match */
-	return !!match == !op->not;
-}
-
-struct filter_match_preds_data {
-	struct filter_pred *preds;
-	int match;
-	void *rec;
-};
-
-static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
-				 int *err, void *data)
-{
-	struct filter_match_preds_data *d = data;
-
-	*err = 0;
-	switch (move) {
-	case MOVE_DOWN:
-		/* only AND and OR have children */
-		if (pred->left != FILTER_PRED_INVALID) {
-			/* If ops is set, then it was folded. */
-			if (!pred->ops)
-				return WALK_PRED_DEFAULT;
-			/* We can treat folded ops as a leaf node */
-			d->match = process_ops(d->preds, pred, d->rec);
-		} else {
-			if (!WARN_ON_ONCE(!pred->fn))
-				d->match = pred->fn(pred, d->rec);
-		}
-
-		return WALK_PRED_PARENT;
-	case MOVE_UP_FROM_LEFT:
-		/*
-		 * Check for short circuits.
-		 *
-		 * Optimization: !!match == (pred->op == OP_OR)
-		 *   is the same as:
-		 * if ((match && pred->op == OP_OR) ||
-		 *     (!match && pred->op == OP_AND))
-		 */
-		if (!!d->match == (pred->op == OP_OR))
-			return WALK_PRED_PARENT;
-		break;
-	case MOVE_UP_FROM_RIGHT:
-		break;
-	}
-
-	return WALK_PRED_DEFAULT;
 }
 
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct event_filter *filter, void *rec)
 {
-	struct filter_pred *preds;
-	struct filter_pred *root;
-	struct filter_match_preds_data data = {
-		/* match is currently meaningless */
-		.match = -1,
-		.rec   = rec,
-	};
-	int n_preds, ret;
+	struct prog_entry *prog;
+	int i;
 
 	/* no filter is considered a match */
 	if (!filter)
 		return 1;
 
-	n_preds = filter->n_preds;
-	if (!n_preds)
+	prog = rcu_dereference_sched(filter->prog);
+	if (!prog)
 		return 1;
 
-	/*
-	 * n_preds, root and filter->preds are protect with preemption disabled.
-	 */
-	root = rcu_dereference_sched(filter->root);
-	if (!root)
-		return 1;
-
-	data.preds = preds = rcu_dereference_sched(filter->preds);
-	ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data);
-	WARN_ON(ret);
-	return data.match;
+	for (i = 0; prog[i].pred; i++) {
+		struct filter_pred *pred = prog[i].pred;
+		int match = pred->fn(pred, rec);
+		if (match == prog[i].when_to_branch)
+			i = prog[i].target;
+	}
+	return prog[i].target;
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
-static void parse_error(struct filter_parse_state *ps, int err, int pos)
-{
-	ps->lasterr = err;
-	ps->lasterr_pos = pos;
-}
-
 static void remove_filter_string(struct event_filter *filter)
 {
 	if (!filter)
@@ -664,57 +910,44 @@ static void remove_filter_string(struct event_filter *filter)
 	filter->filter_string = NULL;
 }
 
-static int replace_filter_string(struct event_filter *filter,
-				 char *filter_string)
-{
-	kfree(filter->filter_string);
-	filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
-	if (!filter->filter_string)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static int append_filter_string(struct event_filter *filter,
-				char *string)
-{
-	int newlen;
-	char *new_filter_string;
-
-	BUG_ON(!filter->filter_string);
-	newlen = strlen(filter->filter_string) + strlen(string) + 1;
-	new_filter_string = kmalloc(newlen, GFP_KERNEL);
-	if (!new_filter_string)
-		return -ENOMEM;
-
-	strcpy(new_filter_string, filter->filter_string);
-	strcat(new_filter_string, string);
-	kfree(filter->filter_string);
-	filter->filter_string = new_filter_string;
-
-	return 0;
-}
-
-static void append_filter_err(struct filter_parse_state *ps,
+static void append_filter_err(struct filter_parse_error *pe,
 			      struct event_filter *filter)
 {
-	int pos = ps->lasterr_pos;
-	char *buf, *pbuf;
+	struct trace_seq *s;
+	int pos = pe->lasterr_pos;
+	char *buf;
+	int len;
+
+	if (WARN_ON(!filter->filter_string))
+		return;
 
-	buf = (char *)__get_free_page(GFP_KERNEL);
-	if (!buf)
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
 		return;
+	trace_seq_init(s);
 
-	append_filter_string(filter, "\n");
-	memset(buf, ' ', PAGE_SIZE);
-	if (pos > PAGE_SIZE - 128)
-		pos = 0;
-	buf[pos] = '^';
-	pbuf = &buf[pos] + 1;
+	len = strlen(filter->filter_string);
+	if (pos > len)
+		pos = len;
 
-	sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
-	append_filter_string(filter, buf);
-	free_page((unsigned long) buf);
+	/* indexing is off by one */
+	if (pos)
+		pos++;
+
+	trace_seq_puts(s, filter->filter_string);
+	if (pe->lasterr > 0) {
+		trace_seq_printf(s, "\n%*s", pos, "^");
+		trace_seq_printf(s, "\nparse_error: %s\n", err_text[pe->lasterr]);
+	} else {
+		trace_seq_printf(s, "\nError: (%d)\n", pe->lasterr);
+	}
+	trace_seq_putc(s, 0);
+	buf = kmemdup_nul(s->buffer, s->seq.len, GFP_KERNEL);
+	if (buf) {
+		kfree(filter->filter_string);
+		filter->filter_string = buf;
+	}
+	kfree(s);
 }
 
 static inline struct event_filter *event_filter(struct trace_event_file *file)
@@ -747,108 +980,18 @@ void print_subsystem_event_filter(struct event_subsystem *system,
 	mutex_unlock(&event_mutex);
 }
 
-static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
-{
-	stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL);
-	if (!stack->preds)
-		return -ENOMEM;
-	stack->index = n_preds;
-	return 0;
-}
-
-static void __free_pred_stack(struct pred_stack *stack)
-{
-	kfree(stack->preds);
-	stack->index = 0;
-}
-
-static int __push_pred_stack(struct pred_stack *stack,
-			     struct filter_pred *pred)
-{
-	int index = stack->index;
-
-	if (WARN_ON(index == 0))
-		return -ENOSPC;
-
-	stack->preds[--index] = pred;
-	stack->index = index;
-	return 0;
-}
-
-static struct filter_pred *
-__pop_pred_stack(struct pred_stack *stack)
-{
-	struct filter_pred *pred;
-	int index = stack->index;
-
-	pred = stack->preds[index++];
-	if (!pred)
-		return NULL;
-
-	stack->index = index;
-	return pred;
-}
-
-static int filter_set_pred(struct event_filter *filter,
-			   int idx,
-			   struct pred_stack *stack,
-			   struct filter_pred *src)
-{
-	struct filter_pred *dest = &filter->preds[idx];
-	struct filter_pred *left;
-	struct filter_pred *right;
-
-	*dest = *src;
-	dest->index = idx;
-
-	if (dest->op == OP_OR || dest->op == OP_AND) {
-		right = __pop_pred_stack(stack);
-		left = __pop_pred_stack(stack);
-		if (!left || !right)
-			return -EINVAL;
-		/*
-		 * If both children can be folded
-		 * and they are the same op as this op or a leaf,
-		 * then this op can be folded.
-		 */
-		if (left->index & FILTER_PRED_FOLD &&
-		    ((left->op == dest->op && !left->not) ||
-		     left->left == FILTER_PRED_INVALID) &&
-		    right->index & FILTER_PRED_FOLD &&
-		    ((right->op == dest->op && !right->not) ||
-		     right->left == FILTER_PRED_INVALID))
-			dest->index |= FILTER_PRED_FOLD;
-
-		dest->left = left->index & ~FILTER_PRED_FOLD;
-		dest->right = right->index & ~FILTER_PRED_FOLD;
-		left->parent = dest->index & ~FILTER_PRED_FOLD;
-		right->parent = dest->index | FILTER_PRED_IS_RIGHT;
-	} else {
-		/*
-		 * Make dest->left invalid to be used as a quick
-		 * way to know this is a leaf node.
-		 */
-		dest->left = FILTER_PRED_INVALID;
-
-		/* All leafs allow folding the parent ops. */
-		dest->index |= FILTER_PRED_FOLD;
-	}
-
-	return __push_pred_stack(stack, dest);
-}
-
-static void __free_preds(struct event_filter *filter)
+static void free_prog(struct event_filter *filter)
 {
+	struct prog_entry *prog;
 	int i;
 
-	if (filter->preds) {
-		for (i = 0; i < filter->n_preds; i++)
-			kfree(filter->preds[i].ops);
-		kfree(filter->preds);
-		filter->preds = NULL;
-	}
-	filter->a_preds = 0;
-	filter->n_preds = 0;
+	prog = rcu_access_pointer(filter->prog);
+	if (!prog)
+		return;
+
+	for (i = 0; prog[i].pred; i++)
+		kfree(prog[i].pred);
+	kfree(prog);
 }
 
 static void filter_disable(struct trace_event_file *file)
@@ -866,7 +1009,7 @@ static void __free_filter(struct event_filter *filter)
 	if (!filter)
 		return;
 
-	__free_preds(filter);
+	free_prog(filter);
 	kfree(filter->filter_string);
 	kfree(filter);
 }
@@ -876,38 +1019,6 @@ void free_event_filter(struct event_filter *filter)
 	__free_filter(filter);
 }
 
-static struct event_filter *__alloc_filter(void)
-{
-	struct event_filter *filter;
-
-	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-	return filter;
-}
-
-static int __alloc_preds(struct event_filter *filter, int n_preds)
-{
-	struct filter_pred *pred;
-	int i;
-
-	if (filter->preds)
-		__free_preds(filter);
-
-	filter->preds = kcalloc(n_preds, sizeof(*filter->preds), GFP_KERNEL);
-
-	if (!filter->preds)
-		return -ENOMEM;
-
-	filter->a_preds = n_preds;
-	filter->n_preds = 0;
-
-	for (i = 0; i < n_preds; i++) {
-		pred = &filter->preds[i];
-		pred->fn = filter_pred_none;
-	}
-
-	return 0;
-}
-
 static inline void __remove_filter(struct trace_event_file *file)
 {
 	filter_disable(file);
@@ -944,27 +1055,6 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
 	}
 }
 
-static int filter_add_pred(struct filter_parse_state *ps,
-			   struct event_filter *filter,
-			   struct filter_pred *pred,
-			   struct pred_stack *stack)
-{
-	int err;
-
-	if (WARN_ON(filter->n_preds == filter->a_preds)) {
-		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-		return -ENOSPC;
-	}
-
-	err = filter_set_pred(filter, filter->n_preds, stack, pred);
-	if (err)
-		return err;
-
-	filter->n_preds++;
-
-	return 0;
-}
-
 int filter_assign_type(const char *type)
 {
 	if (strstr(type, "__data_loc") && strstr(type, "char"))
@@ -976,761 +1066,449 @@ int filter_assign_type(const char *type)
 	return FILTER_OTHER;
 }
 
-static bool is_legal_op(struct ftrace_event_field *field, enum filter_op_ids op)
-{
-	if (is_string_field(field) &&
-	    (op != OP_EQ && op != OP_NE && op != OP_GLOB))
-		return false;
-	if (!is_string_field(field) && op == OP_GLOB)
-		return false;
-
-	return true;
-}
-
 static filter_pred_fn_t select_comparison_fn(enum filter_op_ids op,
 					    int field_size, int field_is_signed)
 {
 	filter_pred_fn_t fn = NULL;
+	int pred_func_index = -1;
+
+	switch (op) {
+	case OP_EQ:
+	case OP_NE:
+		break;
+	default:
+		if (WARN_ON_ONCE(op < PRED_FUNC_START))
+			return NULL;
+		pred_func_index = op - PRED_FUNC_START;
+		if (WARN_ON_ONCE(pred_func_index > PRED_FUNC_MAX))
+			return NULL;
+	}
 
 	switch (field_size) {
 	case 8:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_64;
 		else if (field_is_signed)
-			fn = pred_funcs_s64[op - PRED_FUNC_START];
+			fn = pred_funcs_s64[pred_func_index];
 		else
-			fn = pred_funcs_u64[op - PRED_FUNC_START];
+			fn = pred_funcs_u64[pred_func_index];
 		break;
 	case 4:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_32;
 		else if (field_is_signed)
-			fn = pred_funcs_s32[op - PRED_FUNC_START];
+			fn = pred_funcs_s32[pred_func_index];
 		else
-			fn = pred_funcs_u32[op - PRED_FUNC_START];
+			fn = pred_funcs_u32[pred_func_index];
 		break;
 	case 2:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_16;
 		else if (field_is_signed)
-			fn = pred_funcs_s16[op - PRED_FUNC_START];
+			fn = pred_funcs_s16[pred_func_index];
 		else
-			fn = pred_funcs_u16[op - PRED_FUNC_START];
+			fn = pred_funcs_u16[pred_func_index];
 		break;
 	case 1:
-		if (op == OP_EQ || op == OP_NE)
+		if (pred_func_index < 0)
 			fn = filter_pred_8;
 		else if (field_is_signed)
-			fn = pred_funcs_s8[op - PRED_FUNC_START];
+			fn = pred_funcs_s8[pred_func_index];
 		else
-			fn = pred_funcs_u8[op - PRED_FUNC_START];
+			fn = pred_funcs_u8[pred_func_index];
 		break;
 	}
 
 	return fn;
 }
 
-static int init_pred(struct filter_parse_state *ps,
-		     struct ftrace_event_field *field,
-		     struct filter_pred *pred)
-
+/* Called when a predicate is encountered by predicate_parse() */
+static int parse_pred(const char *str, void *data,
+		      int pos, struct filter_parse_error *pe,
+		      struct filter_pred **pred_ptr)
 {
-	filter_pred_fn_t fn = filter_pred_none;
-	unsigned long long val;
+	struct trace_event_call *call = data;
+	struct ftrace_event_field *field;
+	struct filter_pred *pred = NULL;
+	char num_buf[24];	/* Big enough to hold an address */
+	char *field_name;
+	char q;
+	u64 val;
+	int len;
 	int ret;
+	int op;
+	int s;
+	int i = 0;
 
-	pred->offset = field->offset;
-
-	if (!is_legal_op(field, pred->op)) {
-		parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
-		return -EINVAL;
-	}
-
-	if (field->filter_type == FILTER_COMM) {
-		filter_build_regex(pred);
-		fn = filter_pred_comm;
-		pred->regex.field_len = TASK_COMM_LEN;
-	} else if (is_string_field(field)) {
-		filter_build_regex(pred);
-
-		if (field->filter_type == FILTER_STATIC_STRING) {
-			fn = filter_pred_string;
-			pred->regex.field_len = field->size;
-		} else if (field->filter_type == FILTER_DYN_STRING)
-			fn = filter_pred_strloc;
-		else
-			fn = filter_pred_pchar;
-	} else if (is_function_field(field)) {
-		if (strcmp(field->name, "ip")) {
-			parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
-			return -EINVAL;
-		}
-	} else {
-		if (field->is_signed)
-			ret = kstrtoll(pred->regex.pattern, 0, &val);
-		else
-			ret = kstrtoull(pred->regex.pattern, 0, &val);
-		if (ret) {
-			parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
-			return -EINVAL;
-		}
-		pred->val = val;
-
-		if (field->filter_type == FILTER_CPU)
-			fn = filter_pred_cpu;
-		else
-			fn = select_comparison_fn(pred->op, field->size,
-					  field->is_signed);
-		if (!fn) {
-			parse_error(ps, FILT_ERR_INVALID_OP, 0);
-			return -EINVAL;
-		}
-	}
-
-	if (pred->op == OP_NE)
-		pred->not ^= 1;
-
-	pred->fn = fn;
-	return 0;
-}
-
-static void parse_init(struct filter_parse_state *ps,
-		       struct filter_op *ops,
-		       char *infix_string)
-{
-	memset(ps, '\0', sizeof(*ps));
-
-	ps->infix.string = infix_string;
-	ps->infix.cnt = strlen(infix_string);
-	ps->ops = ops;
-
-	INIT_LIST_HEAD(&ps->opstack);
-	INIT_LIST_HEAD(&ps->postfix);
-}
-
-static char infix_next(struct filter_parse_state *ps)
-{
-	if (!ps->infix.cnt)
-		return 0;
-
-	ps->infix.cnt--;
-
-	return ps->infix.string[ps->infix.tail++];
-}
+	/* First find the field to associate to */
+	while (isspace(str[i]))
+		i++;
+	s = i;
 
-static char infix_peek(struct filter_parse_state *ps)
-{
-	if (ps->infix.tail == strlen(ps->infix.string))
-		return 0;
+	while (isalnum(str[i]) || str[i] == '_')
+		i++;
 
-	return ps->infix.string[ps->infix.tail];
-}
+	len = i - s;
 
-static void infix_advance(struct filter_parse_state *ps)
-{
-	if (!ps->infix.cnt)
-		return;
+	if (!len)
+		return -1;
 
-	ps->infix.cnt--;
-	ps->infix.tail++;
-}
+	field_name = kmemdup_nul(str + s, len, GFP_KERNEL);
+	if (!field_name)
+		return -ENOMEM;
 
-static inline int is_precedence_lower(struct filter_parse_state *ps,
-				      int a, int b)
-{
-	return ps->ops[a].precedence < ps->ops[b].precedence;
-}
+	/* Make sure that the field exists */
 
-static inline int is_op_char(struct filter_parse_state *ps, char c)
-{
-	int i;
-
-	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-		if (ps->ops[i].string[0] == c)
-			return 1;
+	field = trace_find_event_field(call, field_name);
+	kfree(field_name);
+	if (!field) {
+		parse_error(pe, FILT_ERR_FIELD_NOT_FOUND, pos + i);
+		return -EINVAL;
 	}
 
-	return 0;
-}
+	while (isspace(str[i]))
+		i++;
 
-static int infix_get_op(struct filter_parse_state *ps, char firstc)
-{
-	char nextc = infix_peek(ps);
-	char opstr[3];
-	int i;
-
-	opstr[0] = firstc;
-	opstr[1] = nextc;
-	opstr[2] = '\0';
-
-	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-		if (!strcmp(opstr, ps->ops[i].string)) {
-			infix_advance(ps);
-			return ps->ops[i].id;
-		}
+	/* Make sure this op is supported */
+	for (op = 0; ops[op]; op++) {
+		/* This is why '<=' must come before '<' in ops[] */
+		if (strncmp(str + i, ops[op], strlen(ops[op])) == 0)
+			break;
 	}
 
-	opstr[1] = '\0';
-
-	for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
-		if (!strcmp(opstr, ps->ops[i].string))
-			return ps->ops[i].id;
+	if (!ops[op]) {
+		parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
+		goto err_free;
 	}
 
-	return OP_NONE;
-}
-
-static inline void clear_operand_string(struct filter_parse_state *ps)
-{
-	memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
-	ps->operand.tail = 0;
-}
-
-static inline int append_operand_char(struct filter_parse_state *ps, char c)
-{
-	if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
-		return -EINVAL;
-
-	ps->operand.string[ps->operand.tail++] = c;
+	i += strlen(ops[op]);
 
-	return 0;
-}
+	while (isspace(str[i]))
+		i++;
 
-static int filter_opstack_push(struct filter_parse_state *ps,
-			       enum filter_op_ids op)
-{
-	struct opstack_op *opstack_op;
+	s = i;
 
-	opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
-	if (!opstack_op)
+	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+	if (!pred)
 		return -ENOMEM;
 
-	opstack_op->op = op;
-	list_add(&opstack_op->list, &ps->opstack);
+	pred->field = field;
+	pred->offset = field->offset;
+	pred->op = op;
 
-	return 0;
-}
+	if (ftrace_event_is_function(call)) {
+		/*
+		 * Perf does things different with function events.
+		 * It only allows an "ip" field, and expects a string.
+		 * But the string does not need to be surrounded by quotes.
+		 * If it is a string, the assigned function as a nop,
+		 * (perf doesn't use it) and grab everything.
+		 */
+		if (strcmp(field->name, "ip") != 0) {
+			 parse_error(pe, FILT_ERR_IP_FIELD_ONLY, pos + i);
+			 goto err_free;
+		 }
+		 pred->fn = filter_pred_none;
+
+		 /*
+		  * Quotes are not required, but if they exist then we need
+		  * to read them till we hit a matching one.
+		  */
+		 if (str[i] == '\'' || str[i] == '"')
+			 q = str[i];
+		 else
+			 q = 0;
+
+		 for (i++; str[i]; i++) {
+			 if (q && str[i] == q)
+				 break;
+			 if (!q && (str[i] == ')' || str[i] == '&' ||
+				    str[i] == '|'))
+				 break;
+		 }
+		 /* Skip quotes */
+		 if (q)
+			 s++;
+		len = i - s;
+		if (len >= MAX_FILTER_STR_VAL) {
+			parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+			goto err_free;
+		}
 
-static int filter_opstack_empty(struct filter_parse_state *ps)
-{
-	return list_empty(&ps->opstack);
-}
+		pred->regex.len = len;
+		strncpy(pred->regex.pattern, str + s, len);
+		pred->regex.pattern[len] = 0;
+
+	/* This is either a string, or an integer */
+	} else if (str[i] == '\'' || str[i] == '"') {
+		char q = str[i];
+
+		/* Make sure the op is OK for strings */
+		switch (op) {
+		case OP_NE:
+			pred->not = 1;
+			/* Fall through */
+		case OP_GLOB:
+		case OP_EQ:
+			break;
+		default:
+			parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+			goto err_free;
+		}
 
-static int filter_opstack_top(struct filter_parse_state *ps)
-{
-	struct opstack_op *opstack_op;
+		/* Make sure the field is OK for strings */
+		if (!is_string_field(field)) {
+			parse_error(pe, FILT_ERR_EXPECT_DIGIT, pos + i);
+			goto err_free;
+		}
 
-	if (filter_opstack_empty(ps))
-		return OP_NONE;
+		for (i++; str[i]; i++) {
+			if (str[i] == q)
+				break;
+		}
+		if (!str[i]) {
+			parse_error(pe, FILT_ERR_MISSING_QUOTE, pos + i);
+			goto err_free;
+		}
 
-	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+		/* Skip quotes */
+		s++;
+		len = i - s;
+		if (len >= MAX_FILTER_STR_VAL) {
+			parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+			goto err_free;
+		}
 
-	return opstack_op->op;
-}
+		pred->regex.len = len;
+		strncpy(pred->regex.pattern, str + s, len);
+		pred->regex.pattern[len] = 0;
 
-static int filter_opstack_pop(struct filter_parse_state *ps)
-{
-	struct opstack_op *opstack_op;
-	enum filter_op_ids op;
+		filter_build_regex(pred);
 
-	if (filter_opstack_empty(ps))
-		return OP_NONE;
+		if (field->filter_type == FILTER_COMM) {
+			pred->fn = filter_pred_comm;
 
-	opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
-	op = opstack_op->op;
-	list_del(&opstack_op->list);
+		} else if (field->filter_type == FILTER_STATIC_STRING) {
+			pred->fn = filter_pred_string;
+			pred->regex.field_len = field->size;
 
-	kfree(opstack_op);
+		} else if (field->filter_type == FILTER_DYN_STRING)
+			pred->fn = filter_pred_strloc;
+		else
+			pred->fn = filter_pred_pchar;
+		/* go past the last quote */
+		i++;
 
-	return op;
-}
+	} else if (isdigit(str[i])) {
 
-static void filter_opstack_clear(struct filter_parse_state *ps)
-{
-	while (!filter_opstack_empty(ps))
-		filter_opstack_pop(ps);
-}
-
-static char *curr_operand(struct filter_parse_state *ps)
-{
-	return ps->operand.string;
-}
+		/* Make sure the field is not a string */
+		if (is_string_field(field)) {
+			parse_error(pe, FILT_ERR_EXPECT_STRING, pos + i);
+			goto err_free;
+		}
 
-static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
-{
-	struct postfix_elt *elt;
+		if (op == OP_GLOB) {
+			parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+			goto err_free;
+		}
 
-	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
-	if (!elt)
-		return -ENOMEM;
+		/* We allow 0xDEADBEEF */
+		while (isalnum(str[i]))
+			i++;
 
-	elt->op = OP_NONE;
-	elt->operand = kstrdup(operand, GFP_KERNEL);
-	if (!elt->operand) {
-		kfree(elt);
-		return -ENOMEM;
-	}
+		len = i - s;
+		/* 0xfeedfacedeadbeef is 18 chars max */
+		if (len >= sizeof(num_buf)) {
+			parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+			goto err_free;
+		}
 
-	list_add_tail(&elt->list, &ps->postfix);
+		strncpy(num_buf, str + s, len);
+		num_buf[len] = 0;
 
-	return 0;
-}
+		/* Make sure it is a value */
+		if (field->is_signed)
+			ret = kstrtoll(num_buf, 0, &val);
+		else
+			ret = kstrtoull(num_buf, 0, &val);
+		if (ret) {
+			parse_error(pe, FILT_ERR_ILLEGAL_INTVAL, pos + s);
+			goto err_free;
+		}
 
-static int postfix_append_op(struct filter_parse_state *ps, enum filter_op_ids op)
-{
-	struct postfix_elt *elt;
+		pred->val = val;
 
-	elt = kmalloc(sizeof(*elt), GFP_KERNEL);
-	if (!elt)
-		return -ENOMEM;
+		if (field->filter_type == FILTER_CPU)
+			pred->fn = filter_pred_cpu;
+		else {
+			pred->fn = select_comparison_fn(pred->op, field->size,
+							field->is_signed);
+			if (pred->op == OP_NE)
+				pred->not = 1;
+		}
 
-	elt->op = op;
-	elt->operand = NULL;
+	} else {
+		parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
+		goto err_free;
+	}
 
-	list_add_tail(&elt->list, &ps->postfix);
+	*pred_ptr = pred;
+	return i;
 
-	return 0;
+err_free:
+	kfree(pred);
+	return -EINVAL;
 }
 
-static void postfix_clear(struct filter_parse_state *ps)
-{
-	struct postfix_elt *elt;
+enum {
+	TOO_MANY_CLOSE		= -1,
+	TOO_MANY_OPEN		= -2,
+	MISSING_QUOTE		= -3,
+};
 
-	while (!list_empty(&ps->postfix)) {
-		elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
-		list_del(&elt->list);
-		kfree(elt->operand);
-		kfree(elt);
-	}
-}
+/*
+ * Read the filter string once to calculate the number of predicates
+ * as well as how deep the parentheses go.
+ *
+ * Returns:
+ *   0 - everything is fine (err is undefined)
+ *  -1 - too many ')'
+ *  -2 - too many '('
+ *  -3 - No matching quote
+ */
+static int calc_stack(const char *str, int *parens, int *preds, int *err)
+{
+	bool is_pred = false;
+	int nr_preds = 0;
+	int open = 1; /* Count the expression as "(E)" */
+	int last_quote = 0;
+	int max_open = 1;
+	int quote = 0;
+	int i;
 
-static int filter_parse(struct filter_parse_state *ps)
-{
-	enum filter_op_ids op, top_op;
-	int in_string = 0;
-	char ch;
+	*err = 0;
 
-	while ((ch = infix_next(ps))) {
-		if (ch == '"') {
-			in_string ^= 1;
+	for (i = 0; str[i]; i++) {
+		if (isspace(str[i]))
 			continue;
-		}
-
-		if (in_string)
-			goto parse_operand;
-
-		if (isspace(ch))
+		if (quote) {
+			if (str[i] == quote)
+			       quote = 0;
 			continue;
+		}
 
-		if (is_op_char(ps, ch)) {
-			op = infix_get_op(ps, ch);
-			if (op == OP_NONE) {
-				parse_error(ps, FILT_ERR_INVALID_OP, 0);
-				return -EINVAL;
-			}
-
-			if (strlen(curr_operand(ps))) {
-				postfix_append_operand(ps, curr_operand(ps));
-				clear_operand_string(ps);
-			}
-
-			while (!filter_opstack_empty(ps)) {
-				top_op = filter_opstack_top(ps);
-				if (!is_precedence_lower(ps, top_op, op)) {
-					top_op = filter_opstack_pop(ps);
-					postfix_append_op(ps, top_op);
-					continue;
-				}
+		switch (str[i]) {
+		case '\'':
+		case '"':
+			quote = str[i];
+			last_quote = i;
+			break;
+		case '|':
+		case '&':
+			if (str[i+1] != str[i])
 				break;
-			}
-
-			filter_opstack_push(ps, op);
+			is_pred = false;
 			continue;
-		}
-
-		if (ch == '(') {
-			filter_opstack_push(ps, OP_OPEN_PAREN);
+		case '(':
+			is_pred = false;
+			open++;
+			if (open > max_open)
+				max_open = open;
 			continue;
-		}
-
-		if (ch == ')') {
-			if (strlen(curr_operand(ps))) {
-				postfix_append_operand(ps, curr_operand(ps));
-				clear_operand_string(ps);
-			}
-
-			top_op = filter_opstack_pop(ps);
-			while (top_op != OP_NONE) {
-				if (top_op == OP_OPEN_PAREN)
-					break;
-				postfix_append_op(ps, top_op);
-				top_op = filter_opstack_pop(ps);
-			}
-			if (top_op == OP_NONE) {
-				parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
-				return -EINVAL;
+		case ')':
+			is_pred = false;
+			if (open == 1) {
+				*err = i;
+				return TOO_MANY_CLOSE;
 			}
+			open--;
 			continue;
 		}
-parse_operand:
-		if (append_operand_char(ps, ch)) {
-			parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
-			return -EINVAL;
+		if (!is_pred) {
+			nr_preds++;
+			is_pred = true;
 		}
 	}
 
-	if (strlen(curr_operand(ps)))
-		postfix_append_operand(ps, curr_operand(ps));
-
-	while (!filter_opstack_empty(ps)) {
-		top_op = filter_opstack_pop(ps);
-		if (top_op == OP_NONE)
-			break;
-		if (top_op == OP_OPEN_PAREN) {
-			parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
-			return -EINVAL;
-		}
-		postfix_append_op(ps, top_op);
+	if (quote) {
+		*err = last_quote;
+		return MISSING_QUOTE;
 	}
 
-	return 0;
-}
-
-static struct filter_pred *create_pred(struct filter_parse_state *ps,
-				       struct trace_event_call *call,
-				       enum filter_op_ids op,
-				       char *operand1, char *operand2)
-{
-	struct ftrace_event_field *field;
-	static struct filter_pred pred;
-
-	memset(&pred, 0, sizeof(pred));
-	pred.op = op;
-
-	if (op == OP_AND || op == OP_OR)
-		return &pred;
-
-	if (!operand1 || !operand2) {
-		parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
-		return NULL;
-	}
-
-	field = trace_find_event_field(call, operand1);
-	if (!field) {
-		parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
-		return NULL;
-	}
-
-	strcpy(pred.regex.pattern, operand2);
-	pred.regex.len = strlen(pred.regex.pattern);
-	pred.field = field;
-	return init_pred(ps, field, &pred) ? NULL : &pred;
-}
-
-static int check_preds(struct filter_parse_state *ps)
-{
-	int n_normal_preds = 0, n_logical_preds = 0;
-	struct postfix_elt *elt;
-	int cnt = 0;
-
-	list_for_each_entry(elt, &ps->postfix, list) {
-		if (elt->op == OP_NONE) {
-			cnt++;
-			continue;
-		}
+	if (open != 1) {
+		int level = open;
 
-		if (elt->op == OP_AND || elt->op == OP_OR) {
-			n_logical_preds++;
-			cnt--;
-			continue;
+		/* find the bad open */
+		for (i--; i; i--) {
+			if (quote) {
+				if (str[i] == quote)
+					quote = 0;
+				continue;
+			}
+			switch (str[i]) {
+			case '(':
+				if (level == open) {
+					*err = i;
+					return TOO_MANY_OPEN;
+				}
+				level--;
+				break;
+			case ')':
+				level++;
+				break;
+			case '\'':
+			case '"':
+				quote = str[i];
+				break;
+			}
 		}
-		if (elt->op != OP_NOT)
-			cnt--;
-		n_normal_preds++;
-		/* all ops should have operands */
-		if (cnt < 0)
-			break;
-	}
-
-	if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) {
-		parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
-		return -EINVAL;
+		/* First character is the '(' with missing ')' */
+		*err = 0;
+		return TOO_MANY_OPEN;
 	}
 
+	/* Set the size of the required stacks */
+	*parens = max_open;
+	*preds = nr_preds;
 	return 0;
 }
 
-static int count_preds(struct filter_parse_state *ps)
-{
-	struct postfix_elt *elt;
-	int n_preds = 0;
-
-	list_for_each_entry(elt, &ps->postfix, list) {
-		if (elt->op == OP_NONE)
-			continue;
-		n_preds++;
-	}
-
-	return n_preds;
-}
-
-struct check_pred_data {
-	int count;
-	int max;
-};
-
-static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
-			      int *err, void *data)
-{
-	struct check_pred_data *d = data;
-
-	if (WARN_ON(d->count++ > d->max)) {
-		*err = -EINVAL;
-		return WALK_PRED_ABORT;
-	}
-	return WALK_PRED_DEFAULT;
-}
-
-/*
- * The tree is walked at filtering of an event. If the tree is not correctly
- * built, it may cause an infinite loop. Check here that the tree does
- * indeed terminate.
- */
-static int check_pred_tree(struct event_filter *filter,
-			   struct filter_pred *root)
-{
-	struct check_pred_data data = {
-		/*
-		 * The max that we can hit a node is three times.
-		 * Once going down, once coming up from left, and
-		 * once coming up from right. This is more than enough
-		 * since leafs are only hit a single time.
-		 */
-		.max   = 3 * filter->n_preds,
-		.count = 0,
-	};
-
-	return walk_pred_tree(filter->preds, root,
-			      check_pred_tree_cb, &data);
-}
-
-static int count_leafs_cb(enum move_type move, struct filter_pred *pred,
-			  int *err, void *data)
-{
-	int *count = data;
-
-	if ((move == MOVE_DOWN) &&
-	    (pred->left == FILTER_PRED_INVALID))
-		(*count)++;
-
-	return WALK_PRED_DEFAULT;
-}
-
-static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
-{
-	int count = 0, ret;
-
-	ret = walk_pred_tree(preds, root, count_leafs_cb, &count);
-	WARN_ON(ret);
-	return count;
-}
-
-struct fold_pred_data {
-	struct filter_pred *root;
-	int count;
-	int children;
-};
-
-static int fold_pred_cb(enum move_type move, struct filter_pred *pred,
-			int *err, void *data)
-{
-	struct fold_pred_data *d = data;
-	struct filter_pred *root = d->root;
-
-	if (move != MOVE_DOWN)
-		return WALK_PRED_DEFAULT;
-	if (pred->left != FILTER_PRED_INVALID)
-		return WALK_PRED_DEFAULT;
-
-	if (WARN_ON(d->count == d->children)) {
-		*err = -EINVAL;
-		return WALK_PRED_ABORT;
-	}
-
-	pred->index &= ~FILTER_PRED_FOLD;
-	root->ops[d->count++] = pred->index;
-	return WALK_PRED_DEFAULT;
-}
-
-static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
-{
-	struct fold_pred_data data = {
-		.root  = root,
-		.count = 0,
-	};
-	int children;
-
-	/* No need to keep the fold flag */
-	root->index &= ~FILTER_PRED_FOLD;
-
-	/* If the root is a leaf then do nothing */
-	if (root->left == FILTER_PRED_INVALID)
-		return 0;
-
-	/* count the children */
-	children = count_leafs(preds, &preds[root->left]);
-	children += count_leafs(preds, &preds[root->right]);
-
-	root->ops = kcalloc(children, sizeof(*root->ops), GFP_KERNEL);
-	if (!root->ops)
-		return -ENOMEM;
-
-	root->val = children;
-	data.children = children;
-	return walk_pred_tree(preds, root, fold_pred_cb, &data);
-}
-
-static int fold_pred_tree_cb(enum move_type move, struct filter_pred *pred,
-			     int *err, void *data)
-{
-	struct filter_pred *preds = data;
-
-	if (move != MOVE_DOWN)
-		return WALK_PRED_DEFAULT;
-	if (!(pred->index & FILTER_PRED_FOLD))
-		return WALK_PRED_DEFAULT;
-
-	*err = fold_pred(preds, pred);
-	if (*err)
-		return WALK_PRED_ABORT;
-
-	/* eveyrhing below is folded, continue with parent */
-	return WALK_PRED_PARENT;
-}
-
-/*
- * To optimize the processing of the ops, if we have several "ors" or
- * "ands" together, we can put them in an array and process them all
- * together speeding up the filter logic.
- */
-static int fold_pred_tree(struct event_filter *filter,
-			   struct filter_pred *root)
-{
-	return walk_pred_tree(filter->preds, root, fold_pred_tree_cb,
-			      filter->preds);
-}
-
-static int replace_preds(struct trace_event_call *call,
+static int process_preds(struct trace_event_call *call,
+			 const char *filter_string,
 			 struct event_filter *filter,
-			 struct filter_parse_state *ps,
-			 bool dry_run)
+			 struct filter_parse_error *pe)
 {
-	char *operand1 = NULL, *operand2 = NULL;
-	struct filter_pred *pred;
-	struct filter_pred *root;
-	struct postfix_elt *elt;
-	struct pred_stack stack = { }; /* init to NULL */
-	int err;
-	int n_preds = 0;
-
-	n_preds = count_preds(ps);
-	if (n_preds >= MAX_FILTER_PRED) {
-		parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-		return -ENOSPC;
-	}
-
-	err = check_preds(ps);
-	if (err)
-		return err;
-
-	if (!dry_run) {
-		err = __alloc_pred_stack(&stack, n_preds);
-		if (err)
-			return err;
-		err = __alloc_preds(filter, n_preds);
-		if (err)
-			goto fail;
-	}
-
-	n_preds = 0;
-	list_for_each_entry(elt, &ps->postfix, list) {
-		if (elt->op == OP_NONE) {
-			if (!operand1)
-				operand1 = elt->operand;
-			else if (!operand2)
-				operand2 = elt->operand;
-			else {
-				parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
-				err = -EINVAL;
-				goto fail;
-			}
-			continue;
-		}
-
-		if (elt->op == OP_NOT) {
-			if (!n_preds || operand1 || operand2) {
-				parse_error(ps, FILT_ERR_ILLEGAL_NOT_OP, 0);
-				err = -EINVAL;
-				goto fail;
-			}
-			if (!dry_run)
-				filter->preds[n_preds - 1].not ^= 1;
-			continue;
-		}
-
-		if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
-			parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-			err = -ENOSPC;
-			goto fail;
-		}
-
-		pred = create_pred(ps, call, elt->op, operand1, operand2);
-		if (!pred) {
-			err = -EINVAL;
-			goto fail;
-		}
+	struct prog_entry *prog;
+	int nr_parens;
+	int nr_preds;
+	int index;
+	int ret;
 
-		if (!dry_run) {
-			err = filter_add_pred(ps, filter, pred, &stack);
-			if (err)
-				goto fail;
+	ret = calc_stack(filter_string, &nr_parens, &nr_preds, &index);
+	if (ret < 0) {
+		switch (ret) {
+		case MISSING_QUOTE:
+			parse_error(pe, FILT_ERR_MISSING_QUOTE, index);
+			break;
+		case TOO_MANY_OPEN:
+			parse_error(pe, FILT_ERR_TOO_MANY_OPEN, index);
+			break;
+		default:
+			parse_error(pe, FILT_ERR_TOO_MANY_CLOSE, index);
 		}
-
-		operand1 = operand2 = NULL;
+		return ret;
 	}
 
-	if (!dry_run) {
-		/* We should have one item left on the stack */
-		pred = __pop_pred_stack(&stack);
-		if (!pred)
-			return -EINVAL;
-		/* This item is where we start from in matching */
-		root = pred;
-		/* Make sure the stack is empty */
-		pred = __pop_pred_stack(&stack);
-		if (WARN_ON(pred)) {
-			err = -EINVAL;
-			filter->root = NULL;
-			goto fail;
-		}
-		err = check_pred_tree(filter, root);
-		if (err)
-			goto fail;
-
-		/* Optimize the tree */
-		err = fold_pred_tree(filter, root);
-		if (err)
-			goto fail;
-
-		/* We don't set root until we know it works */
-		barrier();
-		filter->root = root;
+	if (!nr_preds) {
+		prog = NULL;
+	} else {
+		prog = predicate_parse(filter_string, nr_parens, nr_preds,
+			       parse_pred, call, pe);
+		if (IS_ERR(prog))
+			return PTR_ERR(prog);
 	}
-
-	err = 0;
-fail:
-	__free_pred_stack(&stack);
-	return err;
+	rcu_assign_pointer(filter->prog, prog);
+	return 0;
 }
 
 static inline void event_set_filtered_flag(struct trace_event_file *file)
@@ -1780,72 +1558,53 @@ struct filter_list {
 	struct event_filter	*filter;
 };
 
-static int replace_system_preds(struct trace_subsystem_dir *dir,
+static int process_system_preds(struct trace_subsystem_dir *dir,
 				struct trace_array *tr,
-				struct filter_parse_state *ps,
+				struct filter_parse_error *pe,
 				char *filter_string)
 {
 	struct trace_event_file *file;
 	struct filter_list *filter_item;
+	struct event_filter *filter = NULL;
 	struct filter_list *tmp;
 	LIST_HEAD(filter_list);
 	bool fail = true;
 	int err;
 
 	list_for_each_entry(file, &tr->events, list) {
-		if (file->system != dir)
-			continue;
-
-		/*
-		 * Try to see if the filter can be applied
-		 *  (filter arg is ignored on dry_run)
-		 */
-		err = replace_preds(file->event_call, NULL, ps, true);
-		if (err)
-			event_set_no_set_filter_flag(file);
-		else
-			event_clear_no_set_filter_flag(file);
-	}
-
-	list_for_each_entry(file, &tr->events, list) {
-		struct event_filter *filter;
 
 		if (file->system != dir)
 			continue;
 
-		if (event_no_set_filter_flag(file))
-			continue;
-
-		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
-		if (!filter_item)
-			goto fail_mem;
-
-		list_add_tail(&filter_item->list, &filter_list);
-
-		filter_item->filter = __alloc_filter();
-		if (!filter_item->filter)
+		filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+		if (!filter)
 			goto fail_mem;
-		filter = filter_item->filter;
 
-		/* Can only fail on no memory */
-		err = replace_filter_string(filter, filter_string);
-		if (err)
+		filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+		if (!filter->filter_string)
 			goto fail_mem;
 
-		err = replace_preds(file->event_call, filter, ps, false);
+		err = process_preds(file->event_call, filter_string, filter, pe);
 		if (err) {
 			filter_disable(file);
-			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-			append_filter_err(ps, filter);
+			parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+			append_filter_err(pe, filter);
 		} else
 			event_set_filtered_flag(file);
+
+
+		filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
+		if (!filter_item)
+			goto fail_mem;
+
+		list_add_tail(&filter_item->list, &filter_list);
 		/*
 		 * Regardless of if this returned an error, we still
 		 * replace the filter for the call.
 		 */
-		filter = event_filter(file);
-		event_set_filter(file, filter_item->filter);
-		filter_item->filter = filter;
+		filter_item->filter = event_filter(file);
+		event_set_filter(file, filter);
+		filter = NULL;
 
 		fail = false;
 	}
@@ -1871,9 +1630,10 @@ static int replace_system_preds(struct trace_subsystem_dir *dir,
 		list_del(&filter_item->list);
 		kfree(filter_item);
 	}
-	parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+	parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
 	return -EINVAL;
  fail_mem:
+	kfree(filter);
 	/* If any call succeeded, we still need to sync */
 	if (!fail)
 		synchronize_sched();
@@ -1885,47 +1645,42 @@ static int replace_system_preds(struct trace_subsystem_dir *dir,
 	return -ENOMEM;
 }
 
-static int create_filter_start(char *filter_str, bool set_str,
-			       struct filter_parse_state **psp,
+static int create_filter_start(char *filter_string, bool set_str,
+			       struct filter_parse_error **pse,
 			       struct event_filter **filterp)
 {
 	struct event_filter *filter;
-	struct filter_parse_state *ps = NULL;
+	struct filter_parse_error *pe = NULL;
 	int err = 0;
 
-	WARN_ON_ONCE(*psp || *filterp);
+	if (WARN_ON_ONCE(*pse || *filterp))
+		return -EINVAL;
 
-	/* allocate everything, and if any fails, free all and fail */
-	filter = __alloc_filter();
-	if (filter && set_str)
-		err = replace_filter_string(filter, filter_str);
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (filter && set_str) {
+		filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+		if (!filter->filter_string)
+			err = -ENOMEM;
+	}
 
-	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
 
-	if (!filter || !ps || err) {
-		kfree(ps);
+	if (!filter || !pe || err) {
+		kfree(pe);
 		__free_filter(filter);
 		return -ENOMEM;
 	}
 
 	/* we're committed to creating a new filter */
 	*filterp = filter;
-	*psp = ps;
+	*pse = pe;
 
-	parse_init(ps, filter_ops, filter_str);
-	err = filter_parse(ps);
-	if (err && set_str)
-		append_filter_err(ps, filter);
-	return err;
+	return 0;
 }
 
-static void create_filter_finish(struct filter_parse_state *ps)
+static void create_filter_finish(struct filter_parse_error *pe)
 {
-	if (ps) {
-		filter_opstack_clear(ps);
-		postfix_clear(ps);
-		kfree(ps);
-	}
+	kfree(pe);
 }
 
 /**
@@ -1945,24 +1700,20 @@ static void create_filter_finish(struct filter_parse_state *ps)
  * freeing it.
  */
 static int create_filter(struct trace_event_call *call,
-			 char *filter_str, bool set_str,
+			 char *filter_string, bool set_str,
 			 struct event_filter **filterp)
 {
+	struct filter_parse_error *pe = NULL;
 	struct event_filter *filter = NULL;
-	struct filter_parse_state *ps = NULL;
 	int err;
 
-	err = create_filter_start(filter_str, set_str, &ps, &filter);
-	if (!err) {
-		err = replace_preds(call, filter, ps, false);
-		if (err && set_str)
-			append_filter_err(ps, filter);
-	}
-	if (err && !set_str) {
-		free_event_filter(filter);
-		filter = NULL;
-	}
-	create_filter_finish(ps);
+	err = create_filter_start(filter_string, set_str, &pe, &filter);
+	if (err)
+		return err;
+
+	err = process_preds(call, filter_string, filter, pe);
+	if (err && set_str)
+		append_filter_err(pe, filter);
 
 	*filterp = filter;
 	return err;
@@ -1989,21 +1740,21 @@ static int create_system_filter(struct trace_subsystem_dir *dir,
 				char *filter_str, struct event_filter **filterp)
 {
 	struct event_filter *filter = NULL;
-	struct filter_parse_state *ps = NULL;
+	struct filter_parse_error *pe = NULL;
 	int err;
 
-	err = create_filter_start(filter_str, true, &ps, &filter);
+	err = create_filter_start(filter_str, true, &pe, &filter);
 	if (!err) {
-		err = replace_system_preds(dir, tr, ps, filter_str);
+		err = process_system_preds(dir, tr, pe, filter_str);
 		if (!err) {
 			/* System filters just show a default message */
 			kfree(filter->filter_string);
 			filter->filter_string = NULL;
 		} else {
-			append_filter_err(ps, filter);
+			append_filter_err(pe, filter);
 		}
 	}
-	create_filter_finish(ps);
+	create_filter_finish(pe);
 
 	*filterp = filter;
 	return err;
@@ -2186,66 +1937,80 @@ static int __ftrace_function_set_filter(int filter, char *buf, int len,
 	return ret;
 }
 
-static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
+static int ftrace_function_check_pred(struct filter_pred *pred)
 {
 	struct ftrace_event_field *field = pred->field;
 
-	if (leaf) {
-		/*
-		 * Check the leaf predicate for function trace, verify:
-		 *  - only '==' and '!=' is used
-		 *  - the 'ip' field is used
-		 */
-		if ((pred->op != OP_EQ) && (pred->op != OP_NE))
-			return -EINVAL;
+	/*
+	 * Check the predicate for function trace, verify:
+	 *  - only '==' and '!=' is used
+	 *  - the 'ip' field is used
+	 */
+	if ((pred->op != OP_EQ) && (pred->op != OP_NE))
+		return -EINVAL;
 
-		if (strcmp(field->name, "ip"))
-			return -EINVAL;
-	} else {
-		/*
-		 * Check the non leaf predicate for function trace, verify:
-		 *  - only '||' is used
-		*/
-		if (pred->op != OP_OR)
-			return -EINVAL;
-	}
+	if (strcmp(field->name, "ip"))
+		return -EINVAL;
 
 	return 0;
 }
 
-static int ftrace_function_set_filter_cb(enum move_type move,
-					 struct filter_pred *pred,
-					 int *err, void *data)
+static int ftrace_function_set_filter_pred(struct filter_pred *pred,
+					   struct function_filter_data *data)
 {
+	int ret;
+
 	/* Checking the node is valid for function trace. */
-	if ((move != MOVE_DOWN) ||
-	    (pred->left != FILTER_PRED_INVALID)) {
-		*err = ftrace_function_check_pred(pred, 0);
-	} else {
-		*err = ftrace_function_check_pred(pred, 1);
-		if (*err)
-			return WALK_PRED_ABORT;
-
-		*err = __ftrace_function_set_filter(pred->op == OP_EQ,
-						    pred->regex.pattern,
-						    pred->regex.len,
-						    data);
-	}
+	ret = ftrace_function_check_pred(pred);
+	if (ret)
+		return ret;
 
-	return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
+	return __ftrace_function_set_filter(pred->op == OP_EQ,
+					    pred->regex.pattern,
+					    pred->regex.len,
+					    data);
+}
+
+static bool is_or(struct prog_entry *prog, int i)
+{
+	int target;
+
+	/*
+	 * Only "||" is allowed for function events, thus,
+	 * all true branches should jump to true, and any
+	 * false branch should jump to false.
+	 */
+	target = prog[i].target + 1;
+	/* True and false have NULL preds (all prog entries should jump to one */
+	if (prog[target].pred)
+		return false;
+
+	/* prog[target].target is 1 for TRUE, 0 for FALSE */
+	return prog[i].when_to_branch == prog[target].target;
 }
 
 static int ftrace_function_set_filter(struct perf_event *event,
 				      struct event_filter *filter)
 {
+	struct prog_entry *prog = rcu_dereference_protected(filter->prog,
+						lockdep_is_held(&event_mutex));
 	struct function_filter_data data = {
 		.first_filter  = 1,
 		.first_notrace = 1,
 		.ops           = &event->ftrace_ops,
 	};
+	int i;
 
-	return walk_pred_tree(filter->preds, filter->root,
-			      ftrace_function_set_filter_cb, &data);
+	for (i = 0; prog[i].pred; i++) {
+		struct filter_pred *pred = prog[i].pred;
+
+		if (!is_or(prog, i))
+			return -EINVAL;
+
+		if (ftrace_function_set_filter_pred(pred, &data) < 0)
+			return -EINVAL;
+	}
+	return 0;
 }
 #else
 static int ftrace_function_set_filter(struct perf_event *event,
@@ -2388,26 +2153,28 @@ static int test_pred_visited_fn(struct filter_pred *pred, void *event)
 	return 1;
 }
 
-static int test_walk_pred_cb(enum move_type move, struct filter_pred *pred,
-			     int *err, void *data)
+static void update_pred_fn(struct event_filter *filter, char *fields)
 {
-	char *fields = data;
+	struct prog_entry *prog = rcu_dereference_protected(filter->prog,
+						lockdep_is_held(&event_mutex));
+	int i;
 
-	if ((move == MOVE_DOWN) &&
-	    (pred->left == FILTER_PRED_INVALID)) {
+	for (i = 0; prog[i].pred; i++) {
+		struct filter_pred *pred = prog[i].pred;
 		struct ftrace_event_field *field = pred->field;
 
+		WARN_ON_ONCE(!pred->fn);
+
 		if (!field) {
-			WARN(1, "all leafs should have field defined");
-			return WALK_PRED_DEFAULT;
+			WARN_ONCE(1, "all leafs should have field defined %d", i);
+			continue;
 		}
+
 		if (!strchr(fields, *field->name))
-			return WALK_PRED_DEFAULT;
+			continue;
 
-		WARN_ON(!pred->fn);
 		pred->fn = test_pred_visited_fn;
 	}
-	return WALK_PRED_DEFAULT;
 }
 
 static __init int ftrace_test_event_filter(void)
@@ -2431,20 +2198,22 @@ static __init int ftrace_test_event_filter(void)
 			break;
 		}
 
+		/* Needed to dereference filter->prog */
+		mutex_lock(&event_mutex);
 		/*
 		 * The preemption disabling is not really needed for self
 		 * tests, but the rcu dereference will complain without it.
 		 */
 		preempt_disable();
 		if (*d->not_visited)
-			walk_pred_tree(filter->preds, filter->root,
-				       test_walk_pred_cb,
-				       d->not_visited);
+			update_pred_fn(filter, d->not_visited);
 
 		test_pred_visited = 0;
 		err = filter_match_preds(filter, &d->rec);
 		preempt_enable();
 
+		mutex_unlock(&event_mutex);
+
 		__free_filter(filter);
 
 		if (test_pred_visited) {
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1e1558c99d56..0d7b3ffbecc2 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -20,15 +20,39 @@
 #include <linux/slab.h>
 #include <linux/stacktrace.h>
 #include <linux/rculist.h>
+#include <linux/tracefs.h>
 
 #include "tracing_map.h"
 #include "trace.h"
 
+#define SYNTH_SYSTEM		"synthetic"
+#define SYNTH_FIELDS_MAX	16
+
+#define STR_VAR_LEN_MAX		32 /* must be multiple of sizeof(u64) */
+
 struct hist_field;
 
-typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event);
+typedef u64 (*hist_field_fn_t) (struct hist_field *field,
+				struct tracing_map_elt *elt,
+				struct ring_buffer_event *rbe,
+				void *event);
 
 #define HIST_FIELD_OPERANDS_MAX	2
+#define HIST_FIELDS_MAX		(TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
+#define HIST_ACTIONS_MAX	8
+
+enum field_op_id {
+	FIELD_OP_NONE,
+	FIELD_OP_PLUS,
+	FIELD_OP_MINUS,
+	FIELD_OP_UNARY_MINUS,
+};
+
+struct hist_var {
+	char				*name;
+	struct hist_trigger_data	*hist_data;
+	unsigned int			idx;
+};
 
 struct hist_field {
 	struct ftrace_event_field	*field;
@@ -37,27 +61,49 @@ struct hist_field {
 	unsigned int			size;
 	unsigned int			offset;
 	unsigned int                    is_signed;
+	const char			*type;
 	struct hist_field		*operands[HIST_FIELD_OPERANDS_MAX];
+	struct hist_trigger_data	*hist_data;
+	struct hist_var			var;
+	enum field_op_id		operator;
+	char				*system;
+	char				*event_name;
+	char				*name;
+	unsigned int			var_idx;
+	unsigned int			var_ref_idx;
+	bool                            read_once;
 };
 
-static u64 hist_field_none(struct hist_field *field, void *event)
+static u64 hist_field_none(struct hist_field *field,
+			   struct tracing_map_elt *elt,
+			   struct ring_buffer_event *rbe,
+			   void *event)
 {
 	return 0;
 }
 
-static u64 hist_field_counter(struct hist_field *field, void *event)
+static u64 hist_field_counter(struct hist_field *field,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *event)
 {
 	return 1;
 }
 
-static u64 hist_field_string(struct hist_field *hist_field, void *event)
+static u64 hist_field_string(struct hist_field *hist_field,
+			     struct tracing_map_elt *elt,
+			     struct ring_buffer_event *rbe,
+			     void *event)
 {
 	char *addr = (char *)(event + hist_field->field->offset);
 
 	return (u64)(unsigned long)addr;
 }
 
-static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_dynstring(struct hist_field *hist_field,
+				struct tracing_map_elt *elt,
+				struct ring_buffer_event *rbe,
+				void *event)
 {
 	u32 str_item = *(u32 *)(event + hist_field->field->offset);
 	int str_loc = str_item & 0xffff;
@@ -66,24 +112,74 @@ static u64 hist_field_dynstring(struct hist_field *hist_field, void *event)
 	return (u64)(unsigned long)addr;
 }
 
-static u64 hist_field_pstring(struct hist_field *hist_field, void *event)
+static u64 hist_field_pstring(struct hist_field *hist_field,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *event)
 {
 	char **addr = (char **)(event + hist_field->field->offset);
 
 	return (u64)(unsigned long)*addr;
 }
 
-static u64 hist_field_log2(struct hist_field *hist_field, void *event)
+static u64 hist_field_log2(struct hist_field *hist_field,
+			   struct tracing_map_elt *elt,
+			   struct ring_buffer_event *rbe,
+			   void *event)
 {
 	struct hist_field *operand = hist_field->operands[0];
 
-	u64 val = operand->fn(operand, event);
+	u64 val = operand->fn(operand, elt, rbe, event);
 
 	return (u64) ilog2(roundup_pow_of_two(val));
 }
 
+static u64 hist_field_plus(struct hist_field *hist_field,
+			   struct tracing_map_elt *elt,
+			   struct ring_buffer_event *rbe,
+			   void *event)
+{
+	struct hist_field *operand1 = hist_field->operands[0];
+	struct hist_field *operand2 = hist_field->operands[1];
+
+	u64 val1 = operand1->fn(operand1, elt, rbe, event);
+	u64 val2 = operand2->fn(operand2, elt, rbe, event);
+
+	return val1 + val2;
+}
+
+static u64 hist_field_minus(struct hist_field *hist_field,
+			    struct tracing_map_elt *elt,
+			    struct ring_buffer_event *rbe,
+			    void *event)
+{
+	struct hist_field *operand1 = hist_field->operands[0];
+	struct hist_field *operand2 = hist_field->operands[1];
+
+	u64 val1 = operand1->fn(operand1, elt, rbe, event);
+	u64 val2 = operand2->fn(operand2, elt, rbe, event);
+
+	return val1 - val2;
+}
+
+static u64 hist_field_unary_minus(struct hist_field *hist_field,
+				  struct tracing_map_elt *elt,
+				  struct ring_buffer_event *rbe,
+				  void *event)
+{
+	struct hist_field *operand = hist_field->operands[0];
+
+	s64 sval = (s64)operand->fn(operand, elt, rbe, event);
+	u64 val = (u64)-sval;
+
+	return val;
+}
+
 #define DEFINE_HIST_FIELD_FN(type)					\
-static u64 hist_field_##type(struct hist_field *hist_field, void *event)\
+	static u64 hist_field_##type(struct hist_field *hist_field,	\
+				     struct tracing_map_elt *elt,	\
+				     struct ring_buffer_event *rbe,	\
+				     void *event)			\
 {									\
 	type *addr = (type *)(event + hist_field->field->offset);	\
 									\
@@ -126,6 +222,19 @@ enum hist_field_flags {
 	HIST_FIELD_FL_SYSCALL		= 1 << 7,
 	HIST_FIELD_FL_STACKTRACE	= 1 << 8,
 	HIST_FIELD_FL_LOG2		= 1 << 9,
+	HIST_FIELD_FL_TIMESTAMP		= 1 << 10,
+	HIST_FIELD_FL_TIMESTAMP_USECS	= 1 << 11,
+	HIST_FIELD_FL_VAR		= 1 << 12,
+	HIST_FIELD_FL_EXPR		= 1 << 13,
+	HIST_FIELD_FL_VAR_REF		= 1 << 14,
+	HIST_FIELD_FL_CPU		= 1 << 15,
+	HIST_FIELD_FL_ALIAS		= 1 << 16,
+};
+
+struct var_defs {
+	unsigned int	n_vars;
+	char		*name[TRACING_MAP_VARS_MAX];
+	char		*expr[TRACING_MAP_VARS_MAX];
 };
 
 struct hist_trigger_attrs {
@@ -133,25 +242,1437 @@ struct hist_trigger_attrs {
 	char		*vals_str;
 	char		*sort_key_str;
 	char		*name;
+	char		*clock;
 	bool		pause;
 	bool		cont;
 	bool		clear;
+	bool		ts_in_usecs;
 	unsigned int	map_bits;
+
+	char		*assignment_str[TRACING_MAP_VARS_MAX];
+	unsigned int	n_assignments;
+
+	char		*action_str[HIST_ACTIONS_MAX];
+	unsigned int	n_actions;
+
+	struct var_defs	var_defs;
+};
+
+struct field_var {
+	struct hist_field	*var;
+	struct hist_field	*val;
+};
+
+struct field_var_hist {
+	struct hist_trigger_data	*hist_data;
+	char				*cmd;
 };
 
 struct hist_trigger_data {
-	struct hist_field               *fields[TRACING_MAP_FIELDS_MAX];
+	struct hist_field               *fields[HIST_FIELDS_MAX];
 	unsigned int			n_vals;
 	unsigned int			n_keys;
 	unsigned int			n_fields;
+	unsigned int			n_vars;
 	unsigned int			key_size;
 	struct tracing_map_sort_key	sort_keys[TRACING_MAP_SORT_KEYS_MAX];
 	unsigned int			n_sort_keys;
 	struct trace_event_file		*event_file;
 	struct hist_trigger_attrs	*attrs;
 	struct tracing_map		*map;
+	bool				enable_timestamps;
+	bool				remove;
+	struct hist_field               *var_refs[TRACING_MAP_VARS_MAX];
+	unsigned int			n_var_refs;
+
+	struct action_data		*actions[HIST_ACTIONS_MAX];
+	unsigned int			n_actions;
+
+	struct hist_field               *synth_var_refs[SYNTH_FIELDS_MAX];
+	unsigned int                    n_synth_var_refs;
+	struct field_var		*field_vars[SYNTH_FIELDS_MAX];
+	unsigned int			n_field_vars;
+	unsigned int			n_field_var_str;
+	struct field_var_hist		*field_var_hists[SYNTH_FIELDS_MAX];
+	unsigned int			n_field_var_hists;
+
+	struct field_var		*max_vars[SYNTH_FIELDS_MAX];
+	unsigned int			n_max_vars;
+	unsigned int			n_max_var_str;
+};
+
+struct synth_field {
+	char *type;
+	char *name;
+	size_t size;
+	bool is_signed;
+	bool is_string;
+};
+
+struct synth_event {
+	struct list_head			list;
+	int					ref;
+	char					*name;
+	struct synth_field			**fields;
+	unsigned int				n_fields;
+	unsigned int				n_u64;
+	struct trace_event_class		class;
+	struct trace_event_call			call;
+	struct tracepoint			*tp;
+};
+
+struct action_data;
+
+typedef void (*action_fn_t) (struct hist_trigger_data *hist_data,
+			     struct tracing_map_elt *elt, void *rec,
+			     struct ring_buffer_event *rbe,
+			     struct action_data *data, u64 *var_ref_vals);
+
+struct action_data {
+	action_fn_t		fn;
+	unsigned int		n_params;
+	char			*params[SYNTH_FIELDS_MAX];
+
+	union {
+		struct {
+			unsigned int		var_ref_idx;
+			char			*match_event;
+			char			*match_event_system;
+			char			*synth_event_name;
+			struct synth_event	*synth_event;
+		} onmatch;
+
+		struct {
+			char			*var_str;
+			char			*fn_name;
+			unsigned int		max_var_ref_idx;
+			struct hist_field	*max_var;
+			struct hist_field	*var;
+		} onmax;
+	};
+};
+
+
+static char last_hist_cmd[MAX_FILTER_STR_VAL];
+static char hist_err_str[MAX_FILTER_STR_VAL];
+
+static void last_cmd_set(char *str)
+{
+	if (!str)
+		return;
+
+	strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1);
+}
+
+static void hist_err(char *str, char *var)
+{
+	int maxlen = MAX_FILTER_STR_VAL - 1;
+
+	if (!str)
+		return;
+
+	if (strlen(hist_err_str))
+		return;
+
+	if (!var)
+		var = "";
+
+	if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen)
+		return;
+
+	strcat(hist_err_str, str);
+	strcat(hist_err_str, var);
+}
+
+static void hist_err_event(char *str, char *system, char *event, char *var)
+{
+	char err[MAX_FILTER_STR_VAL];
+
+	if (system && var)
+		snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var);
+	else if (system)
+		snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event);
+	else
+		strncpy(err, var, MAX_FILTER_STR_VAL);
+
+	hist_err(str, err);
+}
+
+static void hist_err_clear(void)
+{
+	hist_err_str[0] = '\0';
+}
+
+static bool have_hist_err(void)
+{
+	if (strlen(hist_err_str))
+		return true;
+
+	return false;
+}
+
+static LIST_HEAD(synth_event_list);
+static DEFINE_MUTEX(synth_event_mutex);
+
+struct synth_trace_event {
+	struct trace_entry	ent;
+	u64			fields[];
+};
+
+static int synth_event_define_fields(struct trace_event_call *call)
+{
+	struct synth_trace_event trace;
+	int offset = offsetof(typeof(trace), fields);
+	struct synth_event *event = call->data;
+	unsigned int i, size, n_u64;
+	char *name, *type;
+	bool is_signed;
+	int ret = 0;
+
+	for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+		size = event->fields[i]->size;
+		is_signed = event->fields[i]->is_signed;
+		type = event->fields[i]->type;
+		name = event->fields[i]->name;
+		ret = trace_define_field(call, type, name, offset, size,
+					 is_signed, FILTER_OTHER);
+		if (ret)
+			break;
+
+		if (event->fields[i]->is_string) {
+			offset += STR_VAR_LEN_MAX;
+			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+		} else {
+			offset += sizeof(u64);
+			n_u64++;
+		}
+	}
+
+	event->n_u64 = n_u64;
+
+	return ret;
+}
+
+static bool synth_field_signed(char *type)
+{
+	if (strncmp(type, "u", 1) == 0)
+		return false;
+
+	return true;
+}
+
+static int synth_field_is_string(char *type)
+{
+	if (strstr(type, "char[") != NULL)
+		return true;
+
+	return false;
+}
+
+static int synth_field_string_size(char *type)
+{
+	char buf[4], *end, *start;
+	unsigned int len;
+	int size, err;
+
+	start = strstr(type, "char[");
+	if (start == NULL)
+		return -EINVAL;
+	start += strlen("char[");
+
+	end = strchr(type, ']');
+	if (!end || end < start)
+		return -EINVAL;
+
+	len = end - start;
+	if (len > 3)
+		return -EINVAL;
+
+	strncpy(buf, start, len);
+	buf[len] = '\0';
+
+	err = kstrtouint(buf, 0, &size);
+	if (err)
+		return err;
+
+	if (size > STR_VAR_LEN_MAX)
+		return -EINVAL;
+
+	return size;
+}
+
+static int synth_field_size(char *type)
+{
+	int size = 0;
+
+	if (strcmp(type, "s64") == 0)
+		size = sizeof(s64);
+	else if (strcmp(type, "u64") == 0)
+		size = sizeof(u64);
+	else if (strcmp(type, "s32") == 0)
+		size = sizeof(s32);
+	else if (strcmp(type, "u32") == 0)
+		size = sizeof(u32);
+	else if (strcmp(type, "s16") == 0)
+		size = sizeof(s16);
+	else if (strcmp(type, "u16") == 0)
+		size = sizeof(u16);
+	else if (strcmp(type, "s8") == 0)
+		size = sizeof(s8);
+	else if (strcmp(type, "u8") == 0)
+		size = sizeof(u8);
+	else if (strcmp(type, "char") == 0)
+		size = sizeof(char);
+	else if (strcmp(type, "unsigned char") == 0)
+		size = sizeof(unsigned char);
+	else if (strcmp(type, "int") == 0)
+		size = sizeof(int);
+	else if (strcmp(type, "unsigned int") == 0)
+		size = sizeof(unsigned int);
+	else if (strcmp(type, "long") == 0)
+		size = sizeof(long);
+	else if (strcmp(type, "unsigned long") == 0)
+		size = sizeof(unsigned long);
+	else if (strcmp(type, "pid_t") == 0)
+		size = sizeof(pid_t);
+	else if (synth_field_is_string(type))
+		size = synth_field_string_size(type);
+
+	return size;
+}
+
+static const char *synth_field_fmt(char *type)
+{
+	const char *fmt = "%llu";
+
+	if (strcmp(type, "s64") == 0)
+		fmt = "%lld";
+	else if (strcmp(type, "u64") == 0)
+		fmt = "%llu";
+	else if (strcmp(type, "s32") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "u32") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "s16") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "u16") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "s8") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "u8") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "char") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "unsigned char") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "int") == 0)
+		fmt = "%d";
+	else if (strcmp(type, "unsigned int") == 0)
+		fmt = "%u";
+	else if (strcmp(type, "long") == 0)
+		fmt = "%ld";
+	else if (strcmp(type, "unsigned long") == 0)
+		fmt = "%lu";
+	else if (strcmp(type, "pid_t") == 0)
+		fmt = "%d";
+	else if (synth_field_is_string(type))
+		fmt = "%s";
+
+	return fmt;
+}
+
+static enum print_line_t print_synth_event(struct trace_iterator *iter,
+					   int flags,
+					   struct trace_event *event)
+{
+	struct trace_array *tr = iter->tr;
+	struct trace_seq *s = &iter->seq;
+	struct synth_trace_event *entry;
+	struct synth_event *se;
+	unsigned int i, n_u64;
+	char print_fmt[32];
+	const char *fmt;
+
+	entry = (struct synth_trace_event *)iter->ent;
+	se = container_of(event, struct synth_event, call.event);
+
+	trace_seq_printf(s, "%s: ", se->name);
+
+	for (i = 0, n_u64 = 0; i < se->n_fields; i++) {
+		if (trace_seq_has_overflowed(s))
+			goto end;
+
+		fmt = synth_field_fmt(se->fields[i]->type);
+
+		/* parameter types */
+		if (tr->trace_flags & TRACE_ITER_VERBOSE)
+			trace_seq_printf(s, "%s ", fmt);
+
+		snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt);
+
+		/* parameter values */
+		if (se->fields[i]->is_string) {
+			trace_seq_printf(s, print_fmt, se->fields[i]->name,
+					 (char *)&entry->fields[n_u64],
+					 i == se->n_fields - 1 ? "" : " ");
+			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+		} else {
+			trace_seq_printf(s, print_fmt, se->fields[i]->name,
+					 entry->fields[n_u64],
+					 i == se->n_fields - 1 ? "" : " ");
+			n_u64++;
+		}
+	}
+end:
+	trace_seq_putc(s, '\n');
+
+	return trace_handle_return(s);
+}
+
+static struct trace_event_functions synth_event_funcs = {
+	.trace		= print_synth_event
+};
+
+static notrace void trace_event_raw_event_synth(void *__data,
+						u64 *var_ref_vals,
+						unsigned int var_ref_idx)
+{
+	struct trace_event_file *trace_file = __data;
+	struct synth_trace_event *entry;
+	struct trace_event_buffer fbuffer;
+	struct ring_buffer *buffer;
+	struct synth_event *event;
+	unsigned int i, n_u64;
+	int fields_size = 0;
+
+	event = trace_file->event_call->data;
+
+	if (trace_trigger_soft_disabled(trace_file))
+		return;
+
+	fields_size = event->n_u64 * sizeof(u64);
+
+	/*
+	 * Avoid ring buffer recursion detection, as this event
+	 * is being performed within another event.
+	 */
+	buffer = trace_file->tr->trace_buffer.buffer;
+	ring_buffer_nest_start(buffer);
+
+	entry = trace_event_buffer_reserve(&fbuffer, trace_file,
+					   sizeof(*entry) + fields_size);
+	if (!entry)
+		goto out;
+
+	for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
+		if (event->fields[i]->is_string) {
+			char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i];
+			char *str_field = (char *)&entry->fields[n_u64];
+
+			strscpy(str_field, str_val, STR_VAR_LEN_MAX);
+			n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
+		} else {
+			entry->fields[n_u64] = var_ref_vals[var_ref_idx + i];
+			n_u64++;
+		}
+	}
+
+	trace_event_buffer_commit(&fbuffer);
+out:
+	ring_buffer_nest_end(buffer);
+}
+
+static void free_synth_event_print_fmt(struct trace_event_call *call)
+{
+	if (call) {
+		kfree(call->print_fmt);
+		call->print_fmt = NULL;
+	}
+}
+
+static int __set_synth_event_print_fmt(struct synth_event *event,
+				       char *buf, int len)
+{
+	const char *fmt;
+	int pos = 0;
+	int i;
+
+	/* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+
+	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
+	for (i = 0; i < event->n_fields; i++) {
+		fmt = synth_field_fmt(event->fields[i]->type);
+		pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s",
+				event->fields[i]->name, fmt,
+				i == event->n_fields - 1 ? "" : ", ");
+	}
+	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
+
+	for (i = 0; i < event->n_fields; i++) {
+		pos += snprintf(buf + pos, LEN_OR_ZERO,
+				", REC->%s", event->fields[i]->name);
+	}
+
+#undef LEN_OR_ZERO
+
+	/* return the length of print_fmt */
+	return pos;
+}
+
+static int set_synth_event_print_fmt(struct trace_event_call *call)
+{
+	struct synth_event *event = call->data;
+	char *print_fmt;
+	int len;
+
+	/* First: called with 0 length to calculate the needed length */
+	len = __set_synth_event_print_fmt(event, NULL, 0);
+
+	print_fmt = kmalloc(len + 1, GFP_KERNEL);
+	if (!print_fmt)
+		return -ENOMEM;
+
+	/* Second: actually write the @print_fmt */
+	__set_synth_event_print_fmt(event, print_fmt, len + 1);
+	call->print_fmt = print_fmt;
+
+	return 0;
+}
+
+static void free_synth_field(struct synth_field *field)
+{
+	kfree(field->type);
+	kfree(field->name);
+	kfree(field);
+}
+
+static struct synth_field *parse_synth_field(char *field_type,
+					     char *field_name)
+{
+	struct synth_field *field;
+	int len, ret = 0;
+	char *array;
+
+	if (field_type[0] == ';')
+		field_type++;
+
+	len = strlen(field_name);
+	if (field_name[len - 1] == ';')
+		field_name[len - 1] = '\0';
+
+	field = kzalloc(sizeof(*field), GFP_KERNEL);
+	if (!field)
+		return ERR_PTR(-ENOMEM);
+
+	len = strlen(field_type) + 1;
+	array = strchr(field_name, '[');
+	if (array)
+		len += strlen(array);
+	field->type = kzalloc(len, GFP_KERNEL);
+	if (!field->type) {
+		ret = -ENOMEM;
+		goto free;
+	}
+	strcat(field->type, field_type);
+	if (array) {
+		strcat(field->type, array);
+		*array = '\0';
+	}
+
+	field->size = synth_field_size(field->type);
+	if (!field->size) {
+		ret = -EINVAL;
+		goto free;
+	}
+
+	if (synth_field_is_string(field->type))
+		field->is_string = true;
+
+	field->is_signed = synth_field_signed(field->type);
+
+	field->name = kstrdup(field_name, GFP_KERNEL);
+	if (!field->name) {
+		ret = -ENOMEM;
+		goto free;
+	}
+ out:
+	return field;
+ free:
+	free_synth_field(field);
+	field = ERR_PTR(ret);
+	goto out;
+}
+
+static void free_synth_tracepoint(struct tracepoint *tp)
+{
+	if (!tp)
+		return;
+
+	kfree(tp->name);
+	kfree(tp);
+}
+
+static struct tracepoint *alloc_synth_tracepoint(char *name)
+{
+	struct tracepoint *tp;
+
+	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
+	if (!tp)
+		return ERR_PTR(-ENOMEM);
+
+	tp->name = kstrdup(name, GFP_KERNEL);
+	if (!tp->name) {
+		kfree(tp);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return tp;
+}
+
+typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals,
+				    unsigned int var_ref_idx);
+
+static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals,
+			       unsigned int var_ref_idx)
+{
+	struct tracepoint *tp = event->tp;
+
+	if (unlikely(atomic_read(&tp->key.enabled) > 0)) {
+		struct tracepoint_func *probe_func_ptr;
+		synth_probe_func_t probe_func;
+		void *__data;
+
+		if (!(cpu_online(raw_smp_processor_id())))
+			return;
+
+		probe_func_ptr = rcu_dereference_sched((tp)->funcs);
+		if (probe_func_ptr) {
+			do {
+				probe_func = probe_func_ptr->func;
+				__data = probe_func_ptr->data;
+				probe_func(__data, var_ref_vals, var_ref_idx);
+			} while ((++probe_func_ptr)->func);
+		}
+	}
+}
+
+static struct synth_event *find_synth_event(const char *name)
+{
+	struct synth_event *event;
+
+	list_for_each_entry(event, &synth_event_list, list) {
+		if (strcmp(event->name, name) == 0)
+			return event;
+	}
+
+	return NULL;
+}
+
+static int register_synth_event(struct synth_event *event)
+{
+	struct trace_event_call *call = &event->call;
+	int ret = 0;
+
+	event->call.class = &event->class;
+	event->class.system = kstrdup(SYNTH_SYSTEM, GFP_KERNEL);
+	if (!event->class.system) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	event->tp = alloc_synth_tracepoint(event->name);
+	if (IS_ERR(event->tp)) {
+		ret = PTR_ERR(event->tp);
+		event->tp = NULL;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&call->class->fields);
+	call->event.funcs = &synth_event_funcs;
+	call->class->define_fields = synth_event_define_fields;
+
+	ret = register_trace_event(&call->event);
+	if (!ret) {
+		ret = -ENODEV;
+		goto out;
+	}
+	call->flags = TRACE_EVENT_FL_TRACEPOINT;
+	call->class->reg = trace_event_reg;
+	call->class->probe = trace_event_raw_event_synth;
+	call->data = event;
+	call->tp = event->tp;
+
+	ret = trace_add_event_call(call);
+	if (ret) {
+		pr_warn("Failed to register synthetic event: %s\n",
+			trace_event_name(call));
+		goto err;
+	}
+
+	ret = set_synth_event_print_fmt(call);
+	if (ret < 0) {
+		trace_remove_event_call(call);
+		goto err;
+	}
+ out:
+	return ret;
+ err:
+	unregister_trace_event(&call->event);
+	goto out;
+}
+
+static int unregister_synth_event(struct synth_event *event)
+{
+	struct trace_event_call *call = &event->call;
+	int ret;
+
+	ret = trace_remove_event_call(call);
+
+	return ret;
+}
+
+static void free_synth_event(struct synth_event *event)
+{
+	unsigned int i;
+
+	if (!event)
+		return;
+
+	for (i = 0; i < event->n_fields; i++)
+		free_synth_field(event->fields[i]);
+
+	kfree(event->fields);
+	kfree(event->name);
+	kfree(event->class.system);
+	free_synth_tracepoint(event->tp);
+	free_synth_event_print_fmt(&event->call);
+	kfree(event);
+}
+
+static struct synth_event *alloc_synth_event(char *event_name, int n_fields,
+					     struct synth_field **fields)
+{
+	struct synth_event *event;
+	unsigned int i;
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event) {
+		event = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	event->name = kstrdup(event_name, GFP_KERNEL);
+	if (!event->name) {
+		kfree(event);
+		event = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	event->fields = kcalloc(n_fields, sizeof(*event->fields), GFP_KERNEL);
+	if (!event->fields) {
+		free_synth_event(event);
+		event = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	for (i = 0; i < n_fields; i++)
+		event->fields[i] = fields[i];
+
+	event->n_fields = n_fields;
+ out:
+	return event;
+}
+
+static void action_trace(struct hist_trigger_data *hist_data,
+			 struct tracing_map_elt *elt, void *rec,
+			 struct ring_buffer_event *rbe,
+			 struct action_data *data, u64 *var_ref_vals)
+{
+	struct synth_event *event = data->onmatch.synth_event;
+
+	trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx);
+}
+
+struct hist_var_data {
+	struct list_head list;
+	struct hist_trigger_data *hist_data;
+};
+
+static void add_or_delete_synth_event(struct synth_event *event, int delete)
+{
+	if (delete)
+		free_synth_event(event);
+	else {
+		mutex_lock(&synth_event_mutex);
+		if (!find_synth_event(event->name))
+			list_add(&event->list, &synth_event_list);
+		else
+			free_synth_event(event);
+		mutex_unlock(&synth_event_mutex);
+	}
+}
+
+static int create_synth_event(int argc, char **argv)
+{
+	struct synth_field *field, *fields[SYNTH_FIELDS_MAX];
+	struct synth_event *event = NULL;
+	bool delete_event = false;
+	int i, n_fields = 0, ret = 0;
+	char *name;
+
+	mutex_lock(&synth_event_mutex);
+
+	/*
+	 * Argument syntax:
+	 *  - Add synthetic event: <event_name> field[;field] ...
+	 *  - Remove synthetic event: !<event_name> field[;field] ...
+	 *      where 'field' = type field_name
+	 */
+	if (argc < 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	name = argv[0];
+	if (name[0] == '!') {
+		delete_event = true;
+		name++;
+	}
+
+	event = find_synth_event(name);
+	if (event) {
+		if (delete_event) {
+			if (event->ref) {
+				event = NULL;
+				ret = -EBUSY;
+				goto out;
+			}
+			list_del(&event->list);
+			goto out;
+		}
+		event = NULL;
+		ret = -EEXIST;
+		goto out;
+	} else if (delete_event)
+		goto out;
+
+	if (argc < 2) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (i = 1; i < argc - 1; i++) {
+		if (strcmp(argv[i], ";") == 0)
+			continue;
+		if (n_fields == SYNTH_FIELDS_MAX) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		field = parse_synth_field(argv[i], argv[i + 1]);
+		if (IS_ERR(field)) {
+			ret = PTR_ERR(field);
+			goto err;
+		}
+		fields[n_fields] = field;
+		i++; n_fields++;
+	}
+
+	if (i < argc) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	event = alloc_synth_event(name, n_fields, fields);
+	if (IS_ERR(event)) {
+		ret = PTR_ERR(event);
+		event = NULL;
+		goto err;
+	}
+ out:
+	mutex_unlock(&synth_event_mutex);
+
+	if (event) {
+		if (delete_event) {
+			ret = unregister_synth_event(event);
+			add_or_delete_synth_event(event, !ret);
+		} else {
+			ret = register_synth_event(event);
+			add_or_delete_synth_event(event, ret);
+		}
+	}
+
+	return ret;
+ err:
+	mutex_unlock(&synth_event_mutex);
+
+	for (i = 0; i < n_fields; i++)
+		free_synth_field(fields[i]);
+	free_synth_event(event);
+
+	return ret;
+}
+
+static int release_all_synth_events(void)
+{
+	struct list_head release_events;
+	struct synth_event *event, *e;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&release_events);
+
+	mutex_lock(&synth_event_mutex);
+
+	list_for_each_entry(event, &synth_event_list, list) {
+		if (event->ref) {
+			mutex_unlock(&synth_event_mutex);
+			return -EBUSY;
+		}
+	}
+
+	list_splice_init(&event->list, &release_events);
+
+	mutex_unlock(&synth_event_mutex);
+
+	list_for_each_entry_safe(event, e, &release_events, list) {
+		list_del(&event->list);
+
+		ret = unregister_synth_event(event);
+		add_or_delete_synth_event(event, !ret);
+	}
+
+	return ret;
+}
+
+
+static void *synth_events_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&synth_event_mutex);
+
+	return seq_list_start(&synth_event_list, *pos);
+}
+
+static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &synth_event_list, pos);
+}
+
+static void synth_events_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&synth_event_mutex);
+}
+
+static int synth_events_seq_show(struct seq_file *m, void *v)
+{
+	struct synth_field *field;
+	struct synth_event *event = v;
+	unsigned int i;
+
+	seq_printf(m, "%s\t", event->name);
+
+	for (i = 0; i < event->n_fields; i++) {
+		field = event->fields[i];
+
+		/* parameter values */
+		seq_printf(m, "%s %s%s", field->type, field->name,
+			   i == event->n_fields - 1 ? "" : "; ");
+	}
+
+	seq_putc(m, '\n');
+
+	return 0;
+}
+
+static const struct seq_operations synth_events_seq_op = {
+	.start  = synth_events_seq_start,
+	.next   = synth_events_seq_next,
+	.stop   = synth_events_seq_stop,
+	.show   = synth_events_seq_show
+};
+
+static int synth_events_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+		ret = release_all_synth_events();
+		if (ret < 0)
+			return ret;
+	}
+
+	return seq_open(file, &synth_events_seq_op);
+}
+
+static ssize_t synth_events_write(struct file *file,
+				  const char __user *buffer,
+				  size_t count, loff_t *ppos)
+{
+	return trace_parse_run_command(file, buffer, count, ppos,
+				       create_synth_event);
+}
+
+static const struct file_operations synth_events_fops = {
+	.open           = synth_events_open,
+	.write		= synth_events_write,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static u64 hist_field_timestamp(struct hist_field *hist_field,
+				struct tracing_map_elt *elt,
+				struct ring_buffer_event *rbe,
+				void *event)
+{
+	struct hist_trigger_data *hist_data = hist_field->hist_data;
+	struct trace_array *tr = hist_data->event_file->tr;
+
+	u64 ts = ring_buffer_event_time_stamp(rbe);
+
+	if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr))
+		ts = ns2usecs(ts);
+
+	return ts;
+}
+
+static u64 hist_field_cpu(struct hist_field *hist_field,
+			  struct tracing_map_elt *elt,
+			  struct ring_buffer_event *rbe,
+			  void *event)
+{
+	int cpu = smp_processor_id();
+
+	return cpu;
+}
+
+static struct hist_field *
+check_field_for_var_ref(struct hist_field *hist_field,
+			struct hist_trigger_data *var_data,
+			unsigned int var_idx)
+{
+	struct hist_field *found = NULL;
+
+	if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+		if (hist_field->var.idx == var_idx &&
+		    hist_field->var.hist_data == var_data) {
+			found = hist_field;
+		}
+	}
+
+	return found;
+}
+
+static struct hist_field *
+check_field_for_var_refs(struct hist_trigger_data *hist_data,
+			 struct hist_field *hist_field,
+			 struct hist_trigger_data *var_data,
+			 unsigned int var_idx,
+			 unsigned int level)
+{
+	struct hist_field *found = NULL;
+	unsigned int i;
+
+	if (level > 3)
+		return found;
+
+	if (!hist_field)
+		return found;
+
+	found = check_field_for_var_ref(hist_field, var_data, var_idx);
+	if (found)
+		return found;
+
+	for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+		struct hist_field *operand;
+
+		operand = hist_field->operands[i];
+		found = check_field_for_var_refs(hist_data, operand, var_data,
+						 var_idx, level + 1);
+		if (found)
+			return found;
+	}
+
+	return found;
+}
+
+static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data,
+				       struct hist_trigger_data *var_data,
+				       unsigned int var_idx)
+{
+	struct hist_field *hist_field, *found = NULL;
+	unsigned int i;
+
+	for_each_hist_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		found = check_field_for_var_refs(hist_data, hist_field,
+						 var_data, var_idx, 0);
+		if (found)
+			return found;
+	}
+
+	for (i = 0; i < hist_data->n_synth_var_refs; i++) {
+		hist_field = hist_data->synth_var_refs[i];
+		found = check_field_for_var_refs(hist_data, hist_field,
+						 var_data, var_idx, 0);
+		if (found)
+			return found;
+	}
+
+	return found;
+}
+
+static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data,
+					   unsigned int var_idx)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_field *found = NULL;
+	struct hist_var_data *var_data;
+
+	list_for_each_entry(var_data, &tr->hist_vars, list) {
+		if (var_data->hist_data == hist_data)
+			continue;
+		found = find_var_ref(var_data->hist_data, hist_data, var_idx);
+		if (found)
+			break;
+	}
+
+	return found;
+}
+
+static bool check_var_refs(struct hist_trigger_data *hist_data)
+{
+	struct hist_field *field;
+	bool found = false;
+	int i;
+
+	for_each_hist_field(i, hist_data) {
+		field = hist_data->fields[i];
+		if (field && field->flags & HIST_FIELD_FL_VAR) {
+			if (find_any_var_ref(hist_data, field->var.idx)) {
+				found = true;
+				break;
+			}
+		}
+	}
+
+	return found;
+}
+
+static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_var_data *var_data, *found = NULL;
+
+	list_for_each_entry(var_data, &tr->hist_vars, list) {
+		if (var_data->hist_data == hist_data) {
+			found = var_data;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static bool field_has_hist_vars(struct hist_field *hist_field,
+				unsigned int level)
+{
+	int i;
+
+	if (level > 3)
+		return false;
+
+	if (!hist_field)
+		return false;
+
+	if (hist_field->flags & HIST_FIELD_FL_VAR ||
+	    hist_field->flags & HIST_FIELD_FL_VAR_REF)
+		return true;
+
+	for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) {
+		struct hist_field *operand;
+
+		operand = hist_field->operands[i];
+		if (field_has_hist_vars(operand, level + 1))
+			return true;
+	}
+
+	return false;
+}
+
+static bool has_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct hist_field *hist_field;
+	int i;
+
+	for_each_hist_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		if (field_has_hist_vars(hist_field, 0))
+			return true;
+	}
+
+	return false;
+}
+
+static int save_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_var_data *var_data;
+
+	var_data = find_hist_vars(hist_data);
+	if (var_data)
+		return 0;
+
+	if (trace_array_get(tr) < 0)
+		return -ENODEV;
+
+	var_data = kzalloc(sizeof(*var_data), GFP_KERNEL);
+	if (!var_data) {
+		trace_array_put(tr);
+		return -ENOMEM;
+	}
+
+	var_data->hist_data = hist_data;
+	list_add(&var_data->list, &tr->hist_vars);
+
+	return 0;
+}
+
+static void remove_hist_vars(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_var_data *var_data;
+
+	var_data = find_hist_vars(hist_data);
+	if (!var_data)
+		return;
+
+	if (WARN_ON(check_var_refs(hist_data)))
+		return;
+
+	list_del(&var_data->list);
+
+	kfree(var_data);
+
+	trace_array_put(tr);
+}
+
+static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
+					 const char *var_name)
+{
+	struct hist_field *hist_field, *found = NULL;
+	int i;
+
+	for_each_hist_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR &&
+		    strcmp(hist_field->var.name, var_name) == 0) {
+			found = hist_field;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static struct hist_field *find_var(struct hist_trigger_data *hist_data,
+				   struct trace_event_file *file,
+				   const char *var_name)
+{
+	struct hist_trigger_data *test_data;
+	struct event_trigger_data *test;
+	struct hist_field *hist_field;
+
+	hist_field = find_var_field(hist_data, var_name);
+	if (hist_field)
+		return hist_field;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			test_data = test->private_data;
+			hist_field = find_var_field(test_data, var_name);
+			if (hist_field)
+				return hist_field;
+		}
+	}
+
+	return NULL;
+}
+
+static struct trace_event_file *find_var_file(struct trace_array *tr,
+					      char *system,
+					      char *event_name,
+					      char *var_name)
+{
+	struct hist_trigger_data *var_hist_data;
+	struct hist_var_data *var_data;
+	struct trace_event_file *file, *found = NULL;
+
+	if (system)
+		return find_event_file(tr, system, event_name);
+
+	list_for_each_entry(var_data, &tr->hist_vars, list) {
+		var_hist_data = var_data->hist_data;
+		file = var_hist_data->event_file;
+		if (file == found)
+			continue;
+
+		if (find_var_field(var_hist_data, var_name)) {
+			if (found) {
+				hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+				return NULL;
+			}
+
+			found = file;
+		}
+	}
+
+	return found;
+}
+
+static struct hist_field *find_file_var(struct trace_event_file *file,
+					const char *var_name)
+{
+	struct hist_trigger_data *test_data;
+	struct event_trigger_data *test;
+	struct hist_field *hist_field;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			test_data = test->private_data;
+			hist_field = find_var_field(test_data, var_name);
+			if (hist_field)
+				return hist_field;
+		}
+	}
+
+	return NULL;
+}
+
+static struct hist_field *
+find_match_var(struct hist_trigger_data *hist_data, char *var_name)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_field *hist_field, *found = NULL;
+	struct trace_event_file *file;
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == action_trace) {
+			char *system = data->onmatch.match_event_system;
+			char *event_name = data->onmatch.match_event;
+
+			file = find_var_file(tr, system, event_name, var_name);
+			if (!file)
+				continue;
+			hist_field = find_file_var(file, var_name);
+			if (hist_field) {
+				if (found) {
+					hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name);
+					return ERR_PTR(-EINVAL);
+				}
+
+				found = hist_field;
+			}
+		}
+	}
+	return found;
+}
+
+static struct hist_field *find_event_var(struct hist_trigger_data *hist_data,
+					 char *system,
+					 char *event_name,
+					 char *var_name)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct hist_field *hist_field = NULL;
+	struct trace_event_file *file;
+
+	if (!system || !event_name) {
+		hist_field = find_match_var(hist_data, var_name);
+		if (IS_ERR(hist_field))
+			return NULL;
+		if (hist_field)
+			return hist_field;
+	}
+
+	file = find_var_file(tr, system, event_name, var_name);
+	if (!file)
+		return NULL;
+
+	hist_field = find_file_var(file, var_name);
+
+	return hist_field;
+}
+
+struct hist_elt_data {
+	char *comm;
+	u64 *var_ref_vals;
+	char *field_var_str[SYNTH_FIELDS_MAX];
 };
 
+static u64 hist_field_var_ref(struct hist_field *hist_field,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *event)
+{
+	struct hist_elt_data *elt_data;
+	u64 var_val = 0;
+
+	elt_data = elt->private_data;
+	var_val = elt_data->var_ref_vals[hist_field->var_ref_idx];
+
+	return var_val;
+}
+
+static bool resolve_var_refs(struct hist_trigger_data *hist_data, void *key,
+			     u64 *var_ref_vals, bool self)
+{
+	struct hist_trigger_data *var_data;
+	struct tracing_map_elt *var_elt;
+	struct hist_field *hist_field;
+	unsigned int i, var_idx;
+	bool resolved = true;
+	u64 var_val = 0;
+
+	for (i = 0; i < hist_data->n_var_refs; i++) {
+		hist_field = hist_data->var_refs[i];
+		var_idx = hist_field->var.idx;
+		var_data = hist_field->var.hist_data;
+
+		if (var_data == NULL) {
+			resolved = false;
+			break;
+		}
+
+		if ((self && var_data != hist_data) ||
+		    (!self && var_data == hist_data))
+			continue;
+
+		var_elt = tracing_map_lookup(var_data->map, key);
+		if (!var_elt) {
+			resolved = false;
+			break;
+		}
+
+		if (!tracing_map_var_set(var_elt, var_idx)) {
+			resolved = false;
+			break;
+		}
+
+		if (self || !hist_field->read_once)
+			var_val = tracing_map_read_var(var_elt, var_idx);
+		else
+			var_val = tracing_map_read_var_once(var_elt, var_idx);
+
+		var_ref_vals[i] = var_val;
+	}
+
+	return resolved;
+}
+
 static const char *hist_field_name(struct hist_field *field,
 				   unsigned int level)
 {
@@ -162,8 +1683,26 @@ static const char *hist_field_name(struct hist_field *field,
 
 	if (field->field)
 		field_name = field->field->name;
-	else if (field->flags & HIST_FIELD_FL_LOG2)
+	else if (field->flags & HIST_FIELD_FL_LOG2 ||
+		 field->flags & HIST_FIELD_FL_ALIAS)
 		field_name = hist_field_name(field->operands[0], ++level);
+	else if (field->flags & HIST_FIELD_FL_CPU)
+		field_name = "cpu";
+	else if (field->flags & HIST_FIELD_FL_EXPR ||
+		 field->flags & HIST_FIELD_FL_VAR_REF) {
+		if (field->system) {
+			static char full_name[MAX_FILTER_STR_VAL];
+
+			strcat(full_name, field->system);
+			strcat(full_name, ".");
+			strcat(full_name, field->event_name);
+			strcat(full_name, ".");
+			strcat(full_name, field->name);
+			field_name = full_name;
+		} else
+			field_name = field->name;
+	} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
+		field_name = "common_timestamp";
 
 	if (field_name == NULL)
 		field_name = "";
@@ -232,16 +1771,119 @@ static int parse_map_size(char *str)
 
 static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs)
 {
+	unsigned int i;
+
 	if (!attrs)
 		return;
 
+	for (i = 0; i < attrs->n_assignments; i++)
+		kfree(attrs->assignment_str[i]);
+
+	for (i = 0; i < attrs->n_actions; i++)
+		kfree(attrs->action_str[i]);
+
 	kfree(attrs->name);
 	kfree(attrs->sort_key_str);
 	kfree(attrs->keys_str);
 	kfree(attrs->vals_str);
+	kfree(attrs->clock);
 	kfree(attrs);
 }
 
+static int parse_action(char *str, struct hist_trigger_attrs *attrs)
+{
+	int ret = -EINVAL;
+
+	if (attrs->n_actions >= HIST_ACTIONS_MAX)
+		return ret;
+
+	if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) ||
+	    (strncmp(str, "onmax(", strlen("onmax(")) == 0)) {
+		attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL);
+		if (!attrs->action_str[attrs->n_actions]) {
+			ret = -ENOMEM;
+			return ret;
+		}
+		attrs->n_actions++;
+		ret = 0;
+	}
+
+	return ret;
+}
+
+static int parse_assignment(char *str, struct hist_trigger_attrs *attrs)
+{
+	int ret = 0;
+
+	if ((strncmp(str, "key=", strlen("key=")) == 0) ||
+	    (strncmp(str, "keys=", strlen("keys=")) == 0)) {
+		attrs->keys_str = kstrdup(str, GFP_KERNEL);
+		if (!attrs->keys_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
+		 (strncmp(str, "vals=", strlen("vals=")) == 0) ||
+		 (strncmp(str, "values=", strlen("values=")) == 0)) {
+		attrs->vals_str = kstrdup(str, GFP_KERNEL);
+		if (!attrs->vals_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "sort=", strlen("sort=")) == 0) {
+		attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
+		if (!attrs->sort_key_str) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "name=", strlen("name=")) == 0) {
+		attrs->name = kstrdup(str, GFP_KERNEL);
+		if (!attrs->name) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "clock=", strlen("clock=")) == 0) {
+		strsep(&str, "=");
+		if (!str) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		str = strstrip(str);
+		attrs->clock = kstrdup(str, GFP_KERNEL);
+		if (!attrs->clock) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	} else if (strncmp(str, "size=", strlen("size=")) == 0) {
+		int map_bits = parse_map_size(str);
+
+		if (map_bits < 0) {
+			ret = map_bits;
+			goto out;
+		}
+		attrs->map_bits = map_bits;
+	} else {
+		char *assignment;
+
+		if (attrs->n_assignments == TRACING_MAP_VARS_MAX) {
+			hist_err("Too many variables defined: ", str);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		assignment = kstrdup(str, GFP_KERNEL);
+		if (!assignment) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		attrs->assignment_str[attrs->n_assignments++] = assignment;
+	}
+ out:
+	return ret;
+}
+
 static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
 {
 	struct hist_trigger_attrs *attrs;
@@ -254,35 +1896,21 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
 	while (trigger_str) {
 		char *str = strsep(&trigger_str, ":");
 
-		if ((strncmp(str, "key=", strlen("key=")) == 0) ||
-		    (strncmp(str, "keys=", strlen("keys=")) == 0))
-			attrs->keys_str = kstrdup(str, GFP_KERNEL);
-		else if ((strncmp(str, "val=", strlen("val=")) == 0) ||
-			 (strncmp(str, "vals=", strlen("vals=")) == 0) ||
-			 (strncmp(str, "values=", strlen("values=")) == 0))
-			attrs->vals_str = kstrdup(str, GFP_KERNEL);
-		else if (strncmp(str, "sort=", strlen("sort=")) == 0)
-			attrs->sort_key_str = kstrdup(str, GFP_KERNEL);
-		else if (strncmp(str, "name=", strlen("name=")) == 0)
-			attrs->name = kstrdup(str, GFP_KERNEL);
-		else if (strcmp(str, "pause") == 0)
+		if (strchr(str, '=')) {
+			ret = parse_assignment(str, attrs);
+			if (ret)
+				goto free;
+		} else if (strcmp(str, "pause") == 0)
 			attrs->pause = true;
 		else if ((strcmp(str, "cont") == 0) ||
 			 (strcmp(str, "continue") == 0))
 			attrs->cont = true;
 		else if (strcmp(str, "clear") == 0)
 			attrs->clear = true;
-		else if (strncmp(str, "size=", strlen("size=")) == 0) {
-			int map_bits = parse_map_size(str);
-
-			if (map_bits < 0) {
-				ret = map_bits;
+		else {
+			ret = parse_action(str, attrs);
+			if (ret)
 				goto free;
-			}
-			attrs->map_bits = map_bits;
-		} else {
-			ret = -EINVAL;
-			goto free;
 		}
 	}
 
@@ -291,6 +1919,14 @@ static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str)
 		goto free;
 	}
 
+	if (!attrs->clock) {
+		attrs->clock = kstrdup("global", GFP_KERNEL);
+		if (!attrs->clock) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
+
 	return attrs;
  free:
 	destroy_hist_trigger_attrs(attrs);
@@ -313,64 +1949,203 @@ static inline void save_comm(char *comm, struct task_struct *task)
 	memcpy(comm, task->comm, TASK_COMM_LEN);
 }
 
-static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt)
+static void hist_elt_data_free(struct hist_elt_data *elt_data)
 {
-	kfree((char *)elt->private_data);
+	unsigned int i;
+
+	for (i = 0; i < SYNTH_FIELDS_MAX; i++)
+		kfree(elt_data->field_var_str[i]);
+
+	kfree(elt_data->comm);
+	kfree(elt_data);
 }
 
-static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt)
+static void hist_trigger_elt_data_free(struct tracing_map_elt *elt)
+{
+	struct hist_elt_data *elt_data = elt->private_data;
+
+	hist_elt_data_free(elt_data);
+}
+
+static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
 {
 	struct hist_trigger_data *hist_data = elt->map->private_data;
+	unsigned int size = TASK_COMM_LEN;
+	struct hist_elt_data *elt_data;
 	struct hist_field *key_field;
-	unsigned int i;
+	unsigned int i, n_str;
+
+	elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL);
+	if (!elt_data)
+		return -ENOMEM;
 
 	for_each_hist_key_field(i, hist_data) {
 		key_field = hist_data->fields[i];
 
 		if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
-			unsigned int size = TASK_COMM_LEN + 1;
-
-			elt->private_data = kzalloc(size, GFP_KERNEL);
-			if (!elt->private_data)
+			elt_data->comm = kzalloc(size, GFP_KERNEL);
+			if (!elt_data->comm) {
+				kfree(elt_data);
 				return -ENOMEM;
+			}
 			break;
 		}
 	}
 
+	n_str = hist_data->n_field_var_str + hist_data->n_max_var_str;
+
+	size = STR_VAR_LEN_MAX;
+
+	for (i = 0; i < n_str; i++) {
+		elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL);
+		if (!elt_data->field_var_str[i]) {
+			hist_elt_data_free(elt_data);
+			return -ENOMEM;
+		}
+	}
+
+	elt->private_data = elt_data;
+
 	return 0;
 }
 
-static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to,
-				       struct tracing_map_elt *from)
+static void hist_trigger_elt_data_init(struct tracing_map_elt *elt)
+{
+	struct hist_elt_data *elt_data = elt->private_data;
+
+	if (elt_data->comm)
+		save_comm(elt_data->comm, current);
+}
+
+static const struct tracing_map_ops hist_trigger_elt_data_ops = {
+	.elt_alloc	= hist_trigger_elt_data_alloc,
+	.elt_free	= hist_trigger_elt_data_free,
+	.elt_init	= hist_trigger_elt_data_init,
+};
+
+static const char *get_hist_field_flags(struct hist_field *hist_field)
+{
+	const char *flags_str = NULL;
+
+	if (hist_field->flags & HIST_FIELD_FL_HEX)
+		flags_str = "hex";
+	else if (hist_field->flags & HIST_FIELD_FL_SYM)
+		flags_str = "sym";
+	else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
+		flags_str = "sym-offset";
+	else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
+		flags_str = "execname";
+	else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
+		flags_str = "syscall";
+	else if (hist_field->flags & HIST_FIELD_FL_LOG2)
+		flags_str = "log2";
+	else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+		flags_str = "usecs";
+
+	return flags_str;
+}
+
+static void expr_field_str(struct hist_field *field, char *expr)
 {
-	char *comm_from = from->private_data;
-	char *comm_to = to->private_data;
+	if (field->flags & HIST_FIELD_FL_VAR_REF)
+		strcat(expr, "$");
+
+	strcat(expr, hist_field_name(field, 0));
 
-	if (comm_from)
-		memcpy(comm_to, comm_from, TASK_COMM_LEN + 1);
+	if (field->flags && !(field->flags & HIST_FIELD_FL_VAR_REF)) {
+		const char *flags_str = get_hist_field_flags(field);
+
+		if (flags_str) {
+			strcat(expr, ".");
+			strcat(expr, flags_str);
+		}
+	}
 }
 
-static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt)
+static char *expr_str(struct hist_field *field, unsigned int level)
 {
-	char *comm = elt->private_data;
+	char *expr;
+
+	if (level > 1)
+		return NULL;
+
+	expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+	if (!expr)
+		return NULL;
+
+	if (!field->operands[0]) {
+		expr_field_str(field, expr);
+		return expr;
+	}
+
+	if (field->operator == FIELD_OP_UNARY_MINUS) {
+		char *subexpr;
 
-	if (comm)
-		save_comm(comm, current);
+		strcat(expr, "-(");
+		subexpr = expr_str(field->operands[0], ++level);
+		if (!subexpr) {
+			kfree(expr);
+			return NULL;
+		}
+		strcat(expr, subexpr);
+		strcat(expr, ")");
+
+		kfree(subexpr);
+
+		return expr;
+	}
+
+	expr_field_str(field->operands[0], expr);
+
+	switch (field->operator) {
+	case FIELD_OP_MINUS:
+		strcat(expr, "-");
+		break;
+	case FIELD_OP_PLUS:
+		strcat(expr, "+");
+		break;
+	default:
+		kfree(expr);
+		return NULL;
+	}
+
+	expr_field_str(field->operands[1], expr);
+
+	return expr;
 }
 
-static const struct tracing_map_ops hist_trigger_elt_comm_ops = {
-	.elt_alloc	= hist_trigger_elt_comm_alloc,
-	.elt_copy	= hist_trigger_elt_comm_copy,
-	.elt_free	= hist_trigger_elt_comm_free,
-	.elt_init	= hist_trigger_elt_comm_init,
-};
+static int contains_operator(char *str)
+{
+	enum field_op_id field_op = FIELD_OP_NONE;
+	char *op;
+
+	op = strpbrk(str, "+-");
+	if (!op)
+		return FIELD_OP_NONE;
+
+	switch (*op) {
+	case '-':
+		if (*str == '-')
+			field_op = FIELD_OP_UNARY_MINUS;
+		else
+			field_op = FIELD_OP_MINUS;
+		break;
+	case '+':
+		field_op = FIELD_OP_PLUS;
+		break;
+	default:
+		break;
+	}
+
+	return field_op;
+}
 
 static void destroy_hist_field(struct hist_field *hist_field,
 			       unsigned int level)
 {
 	unsigned int i;
 
-	if (level > 2)
+	if (level > 3)
 		return;
 
 	if (!hist_field)
@@ -379,11 +2154,17 @@ static void destroy_hist_field(struct hist_field *hist_field,
 	for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++)
 		destroy_hist_field(hist_field->operands[i], level + 1);
 
+	kfree(hist_field->var.name);
+	kfree(hist_field->name);
+	kfree(hist_field->type);
+
 	kfree(hist_field);
 }
 
-static struct hist_field *create_hist_field(struct ftrace_event_field *field,
-					    unsigned long flags)
+static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
+					    struct ftrace_event_field *field,
+					    unsigned long flags,
+					    char *var_name)
 {
 	struct hist_field *hist_field;
 
@@ -394,8 +2175,22 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	if (!hist_field)
 		return NULL;
 
+	hist_field->hist_data = hist_data;
+
+	if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS)
+		goto out; /* caller will populate */
+
+	if (flags & HIST_FIELD_FL_VAR_REF) {
+		hist_field->fn = hist_field_var_ref;
+		goto out;
+	}
+
 	if (flags & HIST_FIELD_FL_HITCOUNT) {
 		hist_field->fn = hist_field_counter;
+		hist_field->size = sizeof(u64);
+		hist_field->type = kstrdup("u64", GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
 		goto out;
 	}
 
@@ -407,8 +2202,29 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	if (flags & HIST_FIELD_FL_LOG2) {
 		unsigned long fl = flags & ~HIST_FIELD_FL_LOG2;
 		hist_field->fn = hist_field_log2;
-		hist_field->operands[0] = create_hist_field(field, fl);
+		hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL);
 		hist_field->size = hist_field->operands[0]->size;
+		hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+		goto out;
+	}
+
+	if (flags & HIST_FIELD_FL_TIMESTAMP) {
+		hist_field->fn = hist_field_timestamp;
+		hist_field->size = sizeof(u64);
+		hist_field->type = kstrdup("u64", GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+		goto out;
+	}
+
+	if (flags & HIST_FIELD_FL_CPU) {
+		hist_field->fn = hist_field_cpu;
+		hist_field->size = sizeof(int);
+		hist_field->type = kstrdup("unsigned int", GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
 		goto out;
 	}
 
@@ -418,6 +2234,11 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	if (is_string_field(field)) {
 		flags |= HIST_FIELD_FL_STRING;
 
+		hist_field->size = MAX_FILTER_STR_VAL;
+		hist_field->type = kstrdup(field->type, GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+
 		if (field->filter_type == FILTER_STATIC_STRING)
 			hist_field->fn = hist_field_string;
 		else if (field->filter_type == FILTER_DYN_STRING)
@@ -425,6 +2246,12 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 		else
 			hist_field->fn = hist_field_pstring;
 	} else {
+		hist_field->size = field->size;
+		hist_field->is_signed = field->is_signed;
+		hist_field->type = kstrdup(field->type, GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
+
 		hist_field->fn = select_value_fn(field->size,
 						 field->is_signed);
 		if (!hist_field->fn) {
@@ -436,14 +2263,23 @@ static struct hist_field *create_hist_field(struct ftrace_event_field *field,
 	hist_field->field = field;
 	hist_field->flags = flags;
 
+	if (var_name) {
+		hist_field->var.name = kstrdup(var_name, GFP_KERNEL);
+		if (!hist_field->var.name)
+			goto free;
+	}
+
 	return hist_field;
+ free:
+	destroy_hist_field(hist_field, 0);
+	return NULL;
 }
 
 static void destroy_hist_fields(struct hist_trigger_data *hist_data)
 {
 	unsigned int i;
 
-	for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) {
+	for (i = 0; i < HIST_FIELDS_MAX; i++) {
 		if (hist_data->fields[i]) {
 			destroy_hist_field(hist_data->fields[i], 0);
 			hist_data->fields[i] = NULL;
@@ -451,69 +2287,1610 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data)
 	}
 }
 
-static int create_hitcount_val(struct hist_trigger_data *hist_data)
+static int init_var_ref(struct hist_field *ref_field,
+			struct hist_field *var_field,
+			char *system, char *event_name)
 {
-	hist_data->fields[HITCOUNT_IDX] =
-		create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT);
-	if (!hist_data->fields[HITCOUNT_IDX])
-		return -ENOMEM;
+	int err = 0;
+
+	ref_field->var.idx = var_field->var.idx;
+	ref_field->var.hist_data = var_field->hist_data;
+	ref_field->size = var_field->size;
+	ref_field->is_signed = var_field->is_signed;
+	ref_field->flags |= var_field->flags &
+		(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+	if (system) {
+		ref_field->system = kstrdup(system, GFP_KERNEL);
+		if (!ref_field->system)
+			return -ENOMEM;
+	}
 
-	hist_data->n_vals++;
+	if (event_name) {
+		ref_field->event_name = kstrdup(event_name, GFP_KERNEL);
+		if (!ref_field->event_name) {
+			err = -ENOMEM;
+			goto free;
+		}
+	}
 
-	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+	if (var_field->var.name) {
+		ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL);
+		if (!ref_field->name) {
+			err = -ENOMEM;
+			goto free;
+		}
+	} else if (var_field->name) {
+		ref_field->name = kstrdup(var_field->name, GFP_KERNEL);
+		if (!ref_field->name) {
+			err = -ENOMEM;
+			goto free;
+		}
+	}
+
+	ref_field->type = kstrdup(var_field->type, GFP_KERNEL);
+	if (!ref_field->type) {
+		err = -ENOMEM;
+		goto free;
+	}
+ out:
+	return err;
+ free:
+	kfree(ref_field->system);
+	kfree(ref_field->event_name);
+	kfree(ref_field->name);
+
+	goto out;
+}
+
+static struct hist_field *create_var_ref(struct hist_field *var_field,
+					 char *system, char *event_name)
+{
+	unsigned long flags = HIST_FIELD_FL_VAR_REF;
+	struct hist_field *ref_field;
+
+	ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
+	if (ref_field) {
+		if (init_var_ref(ref_field, var_field, system, event_name)) {
+			destroy_hist_field(ref_field, 0);
+			return NULL;
+		}
+	}
+
+	return ref_field;
+}
+
+static bool is_var_ref(char *var_name)
+{
+	if (!var_name || strlen(var_name) < 2 || var_name[0] != '$')
+		return false;
+
+	return true;
+}
+
+static char *field_name_from_var(struct hist_trigger_data *hist_data,
+				 char *var_name)
+{
+	char *name, *field;
+	unsigned int i;
+
+	for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+		name = hist_data->attrs->var_defs.name[i];
+
+		if (strcmp(var_name, name) == 0) {
+			field = hist_data->attrs->var_defs.expr[i];
+			if (contains_operator(field) || is_var_ref(field))
+				continue;
+			return field;
+		}
+	}
+
+	return NULL;
+}
+
+static char *local_field_var_ref(struct hist_trigger_data *hist_data,
+				 char *system, char *event_name,
+				 char *var_name)
+{
+	struct trace_event_call *call;
+
+	if (system && event_name) {
+		call = hist_data->event_file->event_call;
+
+		if (strcmp(system, call->class->system) != 0)
+			return NULL;
+
+		if (strcmp(event_name, trace_event_name(call)) != 0)
+			return NULL;
+	}
+
+	if (!!system != !!event_name)
+		return NULL;
+
+	if (!is_var_ref(var_name))
+		return NULL;
+
+	var_name++;
+
+	return field_name_from_var(hist_data, var_name);
+}
+
+static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
+					char *system, char *event_name,
+					char *var_name)
+{
+	struct hist_field *var_field = NULL, *ref_field = NULL;
+
+	if (!is_var_ref(var_name))
+		return NULL;
+
+	var_name++;
+
+	var_field = find_event_var(hist_data, system, event_name, var_name);
+	if (var_field)
+		ref_field = create_var_ref(var_field, system, event_name);
+
+	if (!ref_field)
+		hist_err_event("Couldn't find variable: $",
+			       system, event_name, var_name);
+
+	return ref_field;
+}
+
+static struct ftrace_event_field *
+parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
+	    char *field_str, unsigned long *flags)
+{
+	struct ftrace_event_field *field = NULL;
+	char *field_name, *modifier, *str;
+
+	modifier = str = kstrdup(field_str, GFP_KERNEL);
+	if (!modifier)
+		return ERR_PTR(-ENOMEM);
+
+	field_name = strsep(&modifier, ".");
+	if (modifier) {
+		if (strcmp(modifier, "hex") == 0)
+			*flags |= HIST_FIELD_FL_HEX;
+		else if (strcmp(modifier, "sym") == 0)
+			*flags |= HIST_FIELD_FL_SYM;
+		else if (strcmp(modifier, "sym-offset") == 0)
+			*flags |= HIST_FIELD_FL_SYM_OFFSET;
+		else if ((strcmp(modifier, "execname") == 0) &&
+			 (strcmp(field_name, "common_pid") == 0))
+			*flags |= HIST_FIELD_FL_EXECNAME;
+		else if (strcmp(modifier, "syscall") == 0)
+			*flags |= HIST_FIELD_FL_SYSCALL;
+		else if (strcmp(modifier, "log2") == 0)
+			*flags |= HIST_FIELD_FL_LOG2;
+		else if (strcmp(modifier, "usecs") == 0)
+			*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
+		else {
+			field = ERR_PTR(-EINVAL);
+			goto out;
+		}
+	}
+
+	if (strcmp(field_name, "common_timestamp") == 0) {
+		*flags |= HIST_FIELD_FL_TIMESTAMP;
+		hist_data->enable_timestamps = true;
+		if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
+			hist_data->attrs->ts_in_usecs = true;
+	} else if (strcmp(field_name, "cpu") == 0)
+		*flags |= HIST_FIELD_FL_CPU;
+	else {
+		field = trace_find_event_field(file->event_call, field_name);
+		if (!field || !field->size) {
+			field = ERR_PTR(-EINVAL);
+			goto out;
+		}
+	}
+ out:
+	kfree(str);
+
+	return field;
+}
+
+static struct hist_field *create_alias(struct hist_trigger_data *hist_data,
+				       struct hist_field *var_ref,
+				       char *var_name)
+{
+	struct hist_field *alias = NULL;
+	unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR;
+
+	alias = create_hist_field(hist_data, NULL, flags, var_name);
+	if (!alias)
+		return NULL;
+
+	alias->fn = var_ref->fn;
+	alias->operands[0] = var_ref;
+
+	if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) {
+		destroy_hist_field(alias, 0);
+		return NULL;
+	}
+
+	return alias;
+}
+
+static struct hist_field *parse_atom(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file, char *str,
+				     unsigned long *flags, char *var_name)
+{
+	char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str;
+	struct ftrace_event_field *field = NULL;
+	struct hist_field *hist_field = NULL;
+	int ret = 0;
+
+	s = strchr(str, '.');
+	if (s) {
+		s = strchr(++s, '.');
+		if (s) {
+			ref_system = strsep(&str, ".");
+			if (!str) {
+				ret = -EINVAL;
+				goto out;
+			}
+			ref_event = strsep(&str, ".");
+			if (!str) {
+				ret = -EINVAL;
+				goto out;
+			}
+			ref_var = str;
+		}
+	}
+
+	s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var);
+	if (!s) {
+		hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var);
+		if (hist_field) {
+			hist_data->var_refs[hist_data->n_var_refs] = hist_field;
+			hist_field->var_ref_idx = hist_data->n_var_refs++;
+			if (var_name) {
+				hist_field = create_alias(hist_data, hist_field, var_name);
+				if (!hist_field) {
+					ret = -ENOMEM;
+					goto out;
+				}
+			}
+			return hist_field;
+		}
+	} else
+		str = s;
+
+	field = parse_field(hist_data, file, str, flags);
+	if (IS_ERR(field)) {
+		ret = PTR_ERR(field);
+		goto out;
+	}
+
+	hist_field = create_hist_field(hist_data, field, *flags, var_name);
+	if (!hist_field) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	return hist_field;
+ out:
+	return ERR_PTR(ret);
+}
+
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file,
+				     char *str, unsigned long flags,
+				     char *var_name, unsigned int level);
+
+static struct hist_field *parse_unary(struct hist_trigger_data *hist_data,
+				      struct trace_event_file *file,
+				      char *str, unsigned long flags,
+				      char *var_name, unsigned int level)
+{
+	struct hist_field *operand1, *expr = NULL;
+	unsigned long operand_flags;
+	int ret = 0;
+	char *s;
+
+	/* we support only -(xxx) i.e. explicit parens required */
+
+	if (level > 3) {
+		hist_err("Too many subexpressions (3 max): ", str);
+		ret = -EINVAL;
+		goto free;
+	}
+
+	str++; /* skip leading '-' */
+
+	s = strchr(str, '(');
+	if (s)
+		str++;
+	else {
+		ret = -EINVAL;
+		goto free;
+	}
+
+	s = strrchr(str, ')');
+	if (s)
+		*s = '\0';
+	else {
+		ret = -EINVAL; /* no closing ')' */
+		goto free;
+	}
+
+	flags |= HIST_FIELD_FL_EXPR;
+	expr = create_hist_field(hist_data, NULL, flags, var_name);
+	if (!expr) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	operand_flags = 0;
+	operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+	if (IS_ERR(operand1)) {
+		ret = PTR_ERR(operand1);
+		goto free;
+	}
+
+	expr->flags |= operand1->flags &
+		(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+	expr->fn = hist_field_unary_minus;
+	expr->operands[0] = operand1;
+	expr->operator = FIELD_OP_UNARY_MINUS;
+	expr->name = expr_str(expr, 0);
+	expr->type = kstrdup(operand1->type, GFP_KERNEL);
+	if (!expr->type) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	return expr;
+ free:
+	destroy_hist_field(expr, 0);
+	return ERR_PTR(ret);
+}
+
+static int check_expr_operands(struct hist_field *operand1,
+			       struct hist_field *operand2)
+{
+	unsigned long operand1_flags = operand1->flags;
+	unsigned long operand2_flags = operand2->flags;
+
+	if ((operand1_flags & HIST_FIELD_FL_VAR_REF) ||
+	    (operand1_flags & HIST_FIELD_FL_ALIAS)) {
+		struct hist_field *var;
+
+		var = find_var_field(operand1->var.hist_data, operand1->name);
+		if (!var)
+			return -EINVAL;
+		operand1_flags = var->flags;
+	}
+
+	if ((operand2_flags & HIST_FIELD_FL_VAR_REF) ||
+	    (operand2_flags & HIST_FIELD_FL_ALIAS)) {
+		struct hist_field *var;
+
+		var = find_var_field(operand2->var.hist_data, operand2->name);
+		if (!var)
+			return -EINVAL;
+		operand2_flags = var->flags;
+	}
+
+	if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) !=
+	    (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) {
+		hist_err("Timestamp units in expression don't match", NULL);
 		return -EINVAL;
+	}
 
 	return 0;
 }
 
-static int create_val_field(struct hist_trigger_data *hist_data,
-			    unsigned int val_idx,
-			    struct trace_event_file *file,
-			    char *field_str)
+static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file,
+				     char *str, unsigned long flags,
+				     char *var_name, unsigned int level)
 {
-	struct ftrace_event_field *field = NULL;
-	unsigned long flags = 0;
-	char *field_name;
+	struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL;
+	unsigned long operand_flags;
+	int field_op, ret = -EINVAL;
+	char *sep, *operand1_str;
+
+	if (level > 3) {
+		hist_err("Too many subexpressions (3 max): ", str);
+		return ERR_PTR(-EINVAL);
+	}
+
+	field_op = contains_operator(str);
+
+	if (field_op == FIELD_OP_NONE)
+		return parse_atom(hist_data, file, str, &flags, var_name);
+
+	if (field_op == FIELD_OP_UNARY_MINUS)
+		return parse_unary(hist_data, file, str, flags, var_name, ++level);
+
+	switch (field_op) {
+	case FIELD_OP_MINUS:
+		sep = "-";
+		break;
+	case FIELD_OP_PLUS:
+		sep = "+";
+		break;
+	default:
+		goto free;
+	}
+
+	operand1_str = strsep(&str, sep);
+	if (!operand1_str || !str)
+		goto free;
+
+	operand_flags = 0;
+	operand1 = parse_atom(hist_data, file, operand1_str,
+			      &operand_flags, NULL);
+	if (IS_ERR(operand1)) {
+		ret = PTR_ERR(operand1);
+		operand1 = NULL;
+		goto free;
+	}
+
+	/* rest of string could be another expression e.g. b+c in a+b+c */
+	operand_flags = 0;
+	operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level);
+	if (IS_ERR(operand2)) {
+		ret = PTR_ERR(operand2);
+		operand2 = NULL;
+		goto free;
+	}
+
+	ret = check_expr_operands(operand1, operand2);
+	if (ret)
+		goto free;
+
+	flags |= HIST_FIELD_FL_EXPR;
+
+	flags |= operand1->flags &
+		(HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS);
+
+	expr = create_hist_field(hist_data, NULL, flags, var_name);
+	if (!expr) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	operand1->read_once = true;
+	operand2->read_once = true;
+
+	expr->operands[0] = operand1;
+	expr->operands[1] = operand2;
+	expr->operator = field_op;
+	expr->name = expr_str(expr, 0);
+	expr->type = kstrdup(operand1->type, GFP_KERNEL);
+	if (!expr->type) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	switch (field_op) {
+	case FIELD_OP_MINUS:
+		expr->fn = hist_field_minus;
+		break;
+	case FIELD_OP_PLUS:
+		expr->fn = hist_field_plus;
+		break;
+	default:
+		ret = -EINVAL;
+		goto free;
+	}
+
+	return expr;
+ free:
+	destroy_hist_field(operand1, 0);
+	destroy_hist_field(operand2, 0);
+	destroy_hist_field(expr, 0);
+
+	return ERR_PTR(ret);
+}
+
+static char *find_trigger_filter(struct hist_trigger_data *hist_data,
+				 struct trace_event_file *file)
+{
+	struct event_trigger_data *test;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			if (test->private_data == hist_data)
+				return test->filter_str;
+		}
+	}
+
+	return NULL;
+}
+
+static struct event_command trigger_hist_cmd;
+static int event_hist_trigger_func(struct event_command *cmd_ops,
+				   struct trace_event_file *file,
+				   char *glob, char *cmd, char *param);
+
+static bool compatible_keys(struct hist_trigger_data *target_hist_data,
+			    struct hist_trigger_data *hist_data,
+			    unsigned int n_keys)
+{
+	struct hist_field *target_hist_field, *hist_field;
+	unsigned int n, i, j;
+
+	if (hist_data->n_fields - hist_data->n_vals != n_keys)
+		return false;
+
+	i = hist_data->n_vals;
+	j = target_hist_data->n_vals;
+
+	for (n = 0; n < n_keys; n++) {
+		hist_field = hist_data->fields[i + n];
+		target_hist_field = target_hist_data->fields[j + n];
+
+		if (strcmp(hist_field->type, target_hist_field->type) != 0)
+			return false;
+		if (hist_field->size != target_hist_field->size)
+			return false;
+		if (hist_field->is_signed != target_hist_field->is_signed)
+			return false;
+	}
+
+	return true;
+}
+
+static struct hist_trigger_data *
+find_compatible_hist(struct hist_trigger_data *target_hist_data,
+		     struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data;
+	struct event_trigger_data *test;
+	unsigned int n_keys;
+
+	n_keys = target_hist_data->n_fields - target_hist_data->n_vals;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			hist_data = test->private_data;
+
+			if (compatible_keys(target_hist_data, hist_data, n_keys))
+				return hist_data;
+		}
+	}
+
+	return NULL;
+}
+
+static struct trace_event_file *event_file(struct trace_array *tr,
+					   char *system, char *event_name)
+{
+	struct trace_event_file *file;
+
+	file = find_event_file(tr, system, event_name);
+	if (!file)
+		return ERR_PTR(-EINVAL);
+
+	return file;
+}
+
+static struct hist_field *
+find_synthetic_field_var(struct hist_trigger_data *target_hist_data,
+			 char *system, char *event_name, char *field_name)
+{
+	struct hist_field *event_var;
+	char *synthetic_name;
+
+	synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+	if (!synthetic_name)
+		return ERR_PTR(-ENOMEM);
+
+	strcpy(synthetic_name, "synthetic_");
+	strcat(synthetic_name, field_name);
+
+	event_var = find_event_var(target_hist_data, system, event_name, synthetic_name);
+
+	kfree(synthetic_name);
+
+	return event_var;
+}
+
+/**
+ * create_field_var_hist - Automatically create a histogram and var for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @field_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events.  However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * If a user specifies a field on an event that isn't the event the
+ * histogram currently being defined (the target event histogram), the
+ * only way that can be accomplished is if a new hist trigger is
+ * created and the field variable defined on that.
+ *
+ * This function creates a new histogram compatible with the target
+ * event (meaning a histogram with the same key as the target
+ * histogram), and creates a variable for the specified field, but
+ * with 'synthetic_' prepended to the variable name in order to avoid
+ * collision with normal field variables.
+ *
+ * Return: The variable created for the field.
+ */
+static struct hist_field *
+create_field_var_hist(struct hist_trigger_data *target_hist_data,
+		      char *subsys_name, char *event_name, char *field_name)
+{
+	struct trace_array *tr = target_hist_data->event_file->tr;
+	struct hist_field *event_var = ERR_PTR(-EINVAL);
+	struct hist_trigger_data *hist_data;
+	unsigned int i, n, first = true;
+	struct field_var_hist *var_hist;
+	struct trace_event_file *file;
+	struct hist_field *key_field;
+	char *saved_filter;
+	char *cmd;
+	int ret;
+
+	if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) {
+		hist_err_event("onmatch: Too many field variables defined: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	file = event_file(tr, subsys_name, event_name);
+
+	if (IS_ERR(file)) {
+		hist_err_event("onmatch: Event file not found: ",
+			       subsys_name, event_name, field_name);
+		ret = PTR_ERR(file);
+		return ERR_PTR(ret);
+	}
+
+	/*
+	 * Look for a histogram compatible with target.  We'll use the
+	 * found histogram specification to create a new matching
+	 * histogram with our variable on it.  target_hist_data is not
+	 * yet a registered histogram so we can't use that.
+	 */
+	hist_data = find_compatible_hist(target_hist_data, file);
+	if (!hist_data) {
+		hist_err_event("onmatch: Matching event histogram not found: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* See if a synthetic field variable has already been created */
+	event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+					     event_name, field_name);
+	if (!IS_ERR_OR_NULL(event_var))
+		return event_var;
+
+	var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL);
+	if (!var_hist)
+		return ERR_PTR(-ENOMEM);
+
+	cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL);
+	if (!cmd) {
+		kfree(var_hist);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Use the same keys as the compatible histogram */
+	strcat(cmd, "keys=");
+
+	for_each_hist_key_field(i, hist_data) {
+		key_field = hist_data->fields[i];
+		if (!first)
+			strcat(cmd, ",");
+		strcat(cmd, key_field->field->name);
+		first = false;
+	}
+
+	/* Create the synthetic field variable specification */
+	strcat(cmd, ":synthetic_");
+	strcat(cmd, field_name);
+	strcat(cmd, "=");
+	strcat(cmd, field_name);
+
+	/* Use the same filter as the compatible histogram */
+	saved_filter = find_trigger_filter(hist_data, file);
+	if (saved_filter) {
+		strcat(cmd, " if ");
+		strcat(cmd, saved_filter);
+	}
+
+	var_hist->cmd = kstrdup(cmd, GFP_KERNEL);
+	if (!var_hist->cmd) {
+		kfree(cmd);
+		kfree(var_hist);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Save the compatible histogram information */
+	var_hist->hist_data = hist_data;
+
+	/* Create the new histogram with our variable */
+	ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+				      "", "hist", cmd);
+	if (ret) {
+		kfree(cmd);
+		kfree(var_hist->cmd);
+		kfree(var_hist);
+		hist_err_event("onmatch: Couldn't create histogram for field: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(ret);
+	}
+
+	kfree(cmd);
+
+	/* If we can't find the variable, something went wrong */
+	event_var = find_synthetic_field_var(target_hist_data, subsys_name,
+					     event_name, field_name);
+	if (IS_ERR_OR_NULL(event_var)) {
+		kfree(var_hist->cmd);
+		kfree(var_hist);
+		hist_err_event("onmatch: Couldn't find synthetic variable: ",
+			       subsys_name, event_name, field_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	n = target_hist_data->n_field_var_hists;
+	target_hist_data->field_var_hists[n] = var_hist;
+	target_hist_data->n_field_var_hists++;
+
+	return event_var;
+}
+
+static struct hist_field *
+find_target_event_var(struct hist_trigger_data *hist_data,
+		      char *subsys_name, char *event_name, char *var_name)
+{
+	struct trace_event_file *file = hist_data->event_file;
+	struct hist_field *hist_field = NULL;
+
+	if (subsys_name) {
+		struct trace_event_call *call;
+
+		if (!event_name)
+			return NULL;
+
+		call = file->event_call;
+
+		if (strcmp(subsys_name, call->class->system) != 0)
+			return NULL;
+
+		if (strcmp(event_name, trace_event_name(call)) != 0)
+			return NULL;
+	}
+
+	hist_field = find_var_field(hist_data, var_name);
+
+	return hist_field;
+}
+
+static inline void __update_field_vars(struct tracing_map_elt *elt,
+				       struct ring_buffer_event *rbe,
+				       void *rec,
+				       struct field_var **field_vars,
+				       unsigned int n_field_vars,
+				       unsigned int field_var_str_start)
+{
+	struct hist_elt_data *elt_data = elt->private_data;
+	unsigned int i, j, var_idx;
+	u64 var_val;
+
+	for (i = 0, j = field_var_str_start; i < n_field_vars; i++) {
+		struct field_var *field_var = field_vars[i];
+		struct hist_field *var = field_var->var;
+		struct hist_field *val = field_var->val;
+
+		var_val = val->fn(val, elt, rbe, rec);
+		var_idx = var->var.idx;
+
+		if (val->flags & HIST_FIELD_FL_STRING) {
+			char *str = elt_data->field_var_str[j++];
+			char *val_str = (char *)(uintptr_t)var_val;
+
+			strscpy(str, val_str, STR_VAR_LEN_MAX);
+			var_val = (u64)(uintptr_t)str;
+		}
+		tracing_map_set_var(elt, var_idx, var_val);
+	}
+}
+
+static void update_field_vars(struct hist_trigger_data *hist_data,
+			      struct tracing_map_elt *elt,
+			      struct ring_buffer_event *rbe,
+			      void *rec)
+{
+	__update_field_vars(elt, rbe, rec, hist_data->field_vars,
+			    hist_data->n_field_vars, 0);
+}
+
+static void update_max_vars(struct hist_trigger_data *hist_data,
+			    struct tracing_map_elt *elt,
+			    struct ring_buffer_event *rbe,
+			    void *rec)
+{
+	__update_field_vars(elt, rbe, rec, hist_data->max_vars,
+			    hist_data->n_max_vars, hist_data->n_field_var_str);
+}
+
+static struct hist_field *create_var(struct hist_trigger_data *hist_data,
+				     struct trace_event_file *file,
+				     char *name, int size, const char *type)
+{
+	struct hist_field *var;
+	int idx;
+
+	if (find_var(hist_data, file, name) && !hist_data->remove) {
+		var = ERR_PTR(-EINVAL);
+		goto out;
+	}
+
+	var = kzalloc(sizeof(struct hist_field), GFP_KERNEL);
+	if (!var) {
+		var = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	idx = tracing_map_add_var(hist_data->map);
+	if (idx < 0) {
+		kfree(var);
+		var = ERR_PTR(-EINVAL);
+		goto out;
+	}
+
+	var->flags = HIST_FIELD_FL_VAR;
+	var->var.idx = idx;
+	var->var.hist_data = var->hist_data = hist_data;
+	var->size = size;
+	var->var.name = kstrdup(name, GFP_KERNEL);
+	var->type = kstrdup(type, GFP_KERNEL);
+	if (!var->var.name || !var->type) {
+		kfree(var->var.name);
+		kfree(var->type);
+		kfree(var);
+		var = ERR_PTR(-ENOMEM);
+	}
+ out:
+	return var;
+}
+
+static struct field_var *create_field_var(struct hist_trigger_data *hist_data,
+					  struct trace_event_file *file,
+					  char *field_name)
+{
+	struct hist_field *val = NULL, *var = NULL;
+	unsigned long flags = HIST_FIELD_FL_VAR;
+	struct field_var *field_var;
 	int ret = 0;
 
-	if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
+	if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) {
+		hist_err("Too many field variables defined: ", field_name);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	val = parse_atom(hist_data, file, field_name, &flags, NULL);
+	if (IS_ERR(val)) {
+		hist_err("Couldn't parse field variable: ", field_name);
+		ret = PTR_ERR(val);
+		goto err;
+	}
+
+	var = create_var(hist_data, file, field_name, val->size, val->type);
+	if (IS_ERR(var)) {
+		hist_err("Couldn't create or find variable: ", field_name);
+		kfree(val);
+		ret = PTR_ERR(var);
+		goto err;
+	}
+
+	field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL);
+	if (!field_var) {
+		kfree(val);
+		kfree(var);
+		ret =  -ENOMEM;
+		goto err;
+	}
+
+	field_var->var = var;
+	field_var->val = val;
+ out:
+	return field_var;
+ err:
+	field_var = ERR_PTR(ret);
+	goto out;
+}
+
+/**
+ * create_target_field_var - Automatically create a variable for a field
+ * @target_hist_data: The target hist trigger
+ * @subsys_name: Optional subsystem name
+ * @event_name: Optional event name
+ * @var_name: The name of the field (and the resulting variable)
+ *
+ * Hist trigger actions fetch data from variables, not directly from
+ * events.  However, for convenience, users are allowed to directly
+ * specify an event field in an action, which will be automatically
+ * converted into a variable on their behalf.
+
+ * This function creates a field variable with the name var_name on
+ * the hist trigger currently being defined on the target event.  If
+ * subsys_name and event_name are specified, this function simply
+ * verifies that they do in fact match the target event subsystem and
+ * event name.
+ *
+ * Return: The variable created for the field.
+ */
+static struct field_var *
+create_target_field_var(struct hist_trigger_data *target_hist_data,
+			char *subsys_name, char *event_name, char *var_name)
+{
+	struct trace_event_file *file = target_hist_data->event_file;
+
+	if (subsys_name) {
+		struct trace_event_call *call;
+
+		if (!event_name)
+			return NULL;
+
+		call = file->event_call;
+
+		if (strcmp(subsys_name, call->class->system) != 0)
+			return NULL;
+
+		if (strcmp(event_name, trace_event_name(call)) != 0)
+			return NULL;
+	}
+
+	return create_field_var(target_hist_data, file, var_name);
+}
+
+static void onmax_print(struct seq_file *m,
+			struct hist_trigger_data *hist_data,
+			struct tracing_map_elt *elt,
+			struct action_data *data)
+{
+	unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx;
+
+	seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx));
+
+	for (i = 0; i < hist_data->n_max_vars; i++) {
+		struct hist_field *save_val = hist_data->max_vars[i]->val;
+		struct hist_field *save_var = hist_data->max_vars[i]->var;
+		u64 val;
+
+		save_var_idx = save_var->var.idx;
+
+		val = tracing_map_read_var(elt, save_var_idx);
+
+		if (save_val->flags & HIST_FIELD_FL_STRING) {
+			seq_printf(m, "  %s: %-32s", save_var->var.name,
+				   (char *)(uintptr_t)(val));
+		} else
+			seq_printf(m, "  %s: %10llu", save_var->var.name, val);
+	}
+}
+
+static void onmax_save(struct hist_trigger_data *hist_data,
+		       struct tracing_map_elt *elt, void *rec,
+		       struct ring_buffer_event *rbe,
+		       struct action_data *data, u64 *var_ref_vals)
+{
+	unsigned int max_idx = data->onmax.max_var->var.idx;
+	unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx;
+
+	u64 var_val, max_val;
+
+	var_val = var_ref_vals[max_var_ref_idx];
+	max_val = tracing_map_read_var(elt, max_idx);
+
+	if (var_val <= max_val)
+		return;
+
+	tracing_map_set_var(elt, max_idx, var_val);
+
+	update_max_vars(hist_data, elt, rbe, rec);
+}
+
+static void onmax_destroy(struct action_data *data)
+{
+	unsigned int i;
+
+	destroy_hist_field(data->onmax.max_var, 0);
+	destroy_hist_field(data->onmax.var, 0);
+
+	kfree(data->onmax.var_str);
+	kfree(data->onmax.fn_name);
+
+	for (i = 0; i < data->n_params; i++)
+		kfree(data->params[i]);
+
+	kfree(data);
+}
+
+static int onmax_create(struct hist_trigger_data *hist_data,
+			struct action_data *data)
+{
+	struct trace_event_file *file = hist_data->event_file;
+	struct hist_field *var_field, *ref_field, *max_var;
+	unsigned int var_ref_idx = hist_data->n_var_refs;
+	struct field_var *field_var;
+	char *onmax_var_str, *param;
+	unsigned long flags;
+	unsigned int i;
+	int ret = 0;
+
+	onmax_var_str = data->onmax.var_str;
+	if (onmax_var_str[0] != '$') {
+		hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str);
 		return -EINVAL;
+	}
+	onmax_var_str++;
 
-	field_name = strsep(&field_str, ".");
-	if (field_str) {
-		if (strcmp(field_str, "hex") == 0)
-			flags |= HIST_FIELD_FL_HEX;
-		else {
+	var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str);
+	if (!var_field) {
+		hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str);
+		return -EINVAL;
+	}
+
+	flags = HIST_FIELD_FL_VAR_REF;
+	ref_field = create_hist_field(hist_data, NULL, flags, NULL);
+	if (!ref_field)
+		return -ENOMEM;
+
+	if (init_var_ref(ref_field, var_field, NULL, NULL)) {
+		destroy_hist_field(ref_field, 0);
+		ret = -ENOMEM;
+		goto out;
+	}
+	hist_data->var_refs[hist_data->n_var_refs] = ref_field;
+	ref_field->var_ref_idx = hist_data->n_var_refs++;
+	data->onmax.var = ref_field;
+
+	data->fn = onmax_save;
+	data->onmax.max_var_ref_idx = var_ref_idx;
+	max_var = create_var(hist_data, file, "max", sizeof(u64), "u64");
+	if (IS_ERR(max_var)) {
+		hist_err("onmax: Couldn't create onmax variable: ", "max");
+		ret = PTR_ERR(max_var);
+		goto out;
+	}
+	data->onmax.max_var = max_var;
+
+	for (i = 0; i < data->n_params; i++) {
+		param = kstrdup(data->params[i], GFP_KERNEL);
+		if (!param) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		field_var = create_target_field_var(hist_data, NULL, NULL, param);
+		if (IS_ERR(field_var)) {
+			hist_err("onmax: Couldn't create field variable: ", param);
+			ret = PTR_ERR(field_var);
+			kfree(param);
+			goto out;
+		}
+
+		hist_data->max_vars[hist_data->n_max_vars++] = field_var;
+		if (field_var->val->flags & HIST_FIELD_FL_STRING)
+			hist_data->n_max_var_str++;
+
+		kfree(param);
+	}
+ out:
+	return ret;
+}
+
+static int parse_action_params(char *params, struct action_data *data)
+{
+	char *param, *saved_param;
+	int ret = 0;
+
+	while (params) {
+		if (data->n_params >= SYNTH_FIELDS_MAX)
+			goto out;
+
+		param = strsep(&params, ",");
+		if (!param) {
 			ret = -EINVAL;
 			goto out;
 		}
+
+		param = strstrip(param);
+		if (strlen(param) < 2) {
+			hist_err("Invalid action param: ", param);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		saved_param = kstrdup(param, GFP_KERNEL);
+		if (!saved_param) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		data->params[data->n_params++] = saved_param;
 	}
+ out:
+	return ret;
+}
 
-	field = trace_find_event_field(file->event_call, field_name);
-	if (!field || !field->size) {
+static struct action_data *onmax_parse(char *str)
+{
+	char *onmax_fn_name, *onmax_var_str;
+	struct action_data *data;
+	int ret = -EINVAL;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	onmax_var_str = strsep(&str, ")");
+	if (!onmax_var_str || !str) {
 		ret = -EINVAL;
-		goto out;
+		goto free;
+	}
+
+	data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL);
+	if (!data->onmax.var_str) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	strsep(&str, ".");
+	if (!str)
+		goto free;
+
+	onmax_fn_name = strsep(&str, "(");
+	if (!onmax_fn_name || !str)
+		goto free;
+
+	if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) {
+		char *params = strsep(&str, ")");
+
+		if (!params) {
+			ret = -EINVAL;
+			goto free;
+		}
+
+		ret = parse_action_params(params, data);
+		if (ret)
+			goto free;
+	} else
+		goto free;
+
+	data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL);
+	if (!data->onmax.fn_name) {
+		ret = -ENOMEM;
+		goto free;
+	}
+ out:
+	return data;
+ free:
+	onmax_destroy(data);
+	data = ERR_PTR(ret);
+	goto out;
+}
+
+static void onmatch_destroy(struct action_data *data)
+{
+	unsigned int i;
+
+	mutex_lock(&synth_event_mutex);
+
+	kfree(data->onmatch.match_event);
+	kfree(data->onmatch.match_event_system);
+	kfree(data->onmatch.synth_event_name);
+
+	for (i = 0; i < data->n_params; i++)
+		kfree(data->params[i]);
+
+	if (data->onmatch.synth_event)
+		data->onmatch.synth_event->ref--;
+
+	kfree(data);
+
+	mutex_unlock(&synth_event_mutex);
+}
+
+static void destroy_field_var(struct field_var *field_var)
+{
+	if (!field_var)
+		return;
+
+	destroy_hist_field(field_var->var, 0);
+	destroy_hist_field(field_var->val, 0);
+
+	kfree(field_var);
+}
+
+static void destroy_field_vars(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_field_vars; i++)
+		destroy_field_var(hist_data->field_vars[i]);
+}
+
+static void save_field_var(struct hist_trigger_data *hist_data,
+			   struct field_var *field_var)
+{
+	hist_data->field_vars[hist_data->n_field_vars++] = field_var;
+
+	if (field_var->val->flags & HIST_FIELD_FL_STRING)
+		hist_data->n_field_var_str++;
+}
+
+
+static void destroy_synth_var_refs(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_synth_var_refs; i++)
+		destroy_hist_field(hist_data->synth_var_refs[i], 0);
+}
+
+static void save_synth_var_ref(struct hist_trigger_data *hist_data,
+			 struct hist_field *var_ref)
+{
+	hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref;
+
+	hist_data->var_refs[hist_data->n_var_refs] = var_ref;
+	var_ref->var_ref_idx = hist_data->n_var_refs++;
+}
+
+static int check_synth_field(struct synth_event *event,
+			     struct hist_field *hist_field,
+			     unsigned int field_pos)
+{
+	struct synth_field *field;
+
+	if (field_pos >= event->n_fields)
+		return -EINVAL;
+
+	field = event->fields[field_pos];
+
+	if (strcmp(field->type, hist_field->type) != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct hist_field *
+onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data,
+		 char *system, char *event, char *var)
+{
+	struct hist_field *hist_field;
+
+	var++; /* skip '$' */
+
+	hist_field = find_target_event_var(hist_data, system, event, var);
+	if (!hist_field) {
+		if (!system) {
+			system = data->onmatch.match_event_system;
+			event = data->onmatch.match_event;
+		}
+
+		hist_field = find_event_var(hist_data, system, event, var);
+	}
+
+	if (!hist_field)
+		hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var);
+
+	return hist_field;
+}
+
+static struct hist_field *
+onmatch_create_field_var(struct hist_trigger_data *hist_data,
+			 struct action_data *data, char *system,
+			 char *event, char *var)
+{
+	struct hist_field *hist_field = NULL;
+	struct field_var *field_var;
+
+	/*
+	 * First try to create a field var on the target event (the
+	 * currently being defined).  This will create a variable for
+	 * unqualified fields on the target event, or if qualified,
+	 * target fields that have qualified names matching the target.
+	 */
+	field_var = create_target_field_var(hist_data, system, event, var);
+
+	if (field_var && !IS_ERR(field_var)) {
+		save_field_var(hist_data, field_var);
+		hist_field = field_var->var;
+	} else {
+		field_var = NULL;
+		/*
+		 * If no explicit system.event is specfied, default to
+		 * looking for fields on the onmatch(system.event.xxx)
+		 * event.
+		 */
+		if (!system) {
+			system = data->onmatch.match_event_system;
+			event = data->onmatch.match_event;
+		}
+
+		/*
+		 * At this point, we're looking at a field on another
+		 * event.  Because we can't modify a hist trigger on
+		 * another event to add a variable for a field, we need
+		 * to create a new trigger on that event and create the
+		 * variable at the same time.
+		 */
+		hist_field = create_field_var_hist(hist_data, system, event, var);
+		if (IS_ERR(hist_field))
+			goto free;
+	}
+ out:
+	return hist_field;
+ free:
+	destroy_field_var(field_var);
+	hist_field = NULL;
+	goto out;
+}
+
+static int onmatch_create(struct hist_trigger_data *hist_data,
+			  struct trace_event_file *file,
+			  struct action_data *data)
+{
+	char *event_name, *param, *system = NULL;
+	struct hist_field *hist_field, *var_ref;
+	unsigned int i, var_ref_idx;
+	unsigned int field_pos = 0;
+	struct synth_event *event;
+	int ret = 0;
+
+	mutex_lock(&synth_event_mutex);
+	event = find_synth_event(data->onmatch.synth_event_name);
+	if (!event) {
+		hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name);
+		mutex_unlock(&synth_event_mutex);
+		return -EINVAL;
+	}
+	event->ref++;
+	mutex_unlock(&synth_event_mutex);
+
+	var_ref_idx = hist_data->n_var_refs;
+
+	for (i = 0; i < data->n_params; i++) {
+		char *p;
+
+		p = param = kstrdup(data->params[i], GFP_KERNEL);
+		if (!param) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		system = strsep(&param, ".");
+		if (!param) {
+			param = (char *)system;
+			system = event_name = NULL;
+		} else {
+			event_name = strsep(&param, ".");
+			if (!param) {
+				kfree(p);
+				ret = -EINVAL;
+				goto err;
+			}
+		}
+
+		if (param[0] == '$')
+			hist_field = onmatch_find_var(hist_data, data, system,
+						      event_name, param);
+		else
+			hist_field = onmatch_create_field_var(hist_data, data,
+							      system,
+							      event_name,
+							      param);
+
+		if (!hist_field) {
+			kfree(p);
+			ret = -EINVAL;
+			goto err;
+		}
+
+		if (check_synth_field(event, hist_field, field_pos) == 0) {
+			var_ref = create_var_ref(hist_field, system, event_name);
+			if (!var_ref) {
+				kfree(p);
+				ret = -ENOMEM;
+				goto err;
+			}
+
+			save_synth_var_ref(hist_data, var_ref);
+			field_pos++;
+			kfree(p);
+			continue;
+		}
+
+		hist_err_event("onmatch: Param type doesn't match synthetic event field type: ",
+			       system, event_name, param);
+		kfree(p);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (field_pos != event->n_fields) {
+		hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	data->fn = action_trace;
+	data->onmatch.synth_event = event;
+	data->onmatch.var_ref_idx = var_ref_idx;
+ out:
+	return ret;
+ err:
+	mutex_lock(&synth_event_mutex);
+	event->ref--;
+	mutex_unlock(&synth_event_mutex);
+
+	goto out;
+}
+
+static struct action_data *onmatch_parse(struct trace_array *tr, char *str)
+{
+	char *match_event, *match_event_system;
+	char *synth_event_name, *params;
+	struct action_data *data;
+	int ret = -EINVAL;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return ERR_PTR(-ENOMEM);
+
+	match_event = strsep(&str, ")");
+	if (!match_event || !str) {
+		hist_err("onmatch: Missing closing paren: ", match_event);
+		goto free;
+	}
+
+	match_event_system = strsep(&match_event, ".");
+	if (!match_event) {
+		hist_err("onmatch: Missing subsystem for match event: ", match_event_system);
+		goto free;
+	}
+
+	if (IS_ERR(event_file(tr, match_event_system, match_event))) {
+		hist_err_event("onmatch: Invalid subsystem or event name: ",
+			       match_event_system, match_event, NULL);
+		goto free;
+	}
+
+	data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL);
+	if (!data->onmatch.match_event) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL);
+	if (!data->onmatch.match_event_system) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	strsep(&str, ".");
+	if (!str) {
+		hist_err("onmatch: Missing . after onmatch(): ", str);
+		goto free;
+	}
+
+	synth_event_name = strsep(&str, "(");
+	if (!synth_event_name || !str) {
+		hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name);
+		goto free;
 	}
 
-	hist_data->fields[val_idx] = create_hist_field(field, flags);
-	if (!hist_data->fields[val_idx]) {
+	data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL);
+	if (!data->onmatch.synth_event_name) {
 		ret = -ENOMEM;
+		goto free;
+	}
+
+	params = strsep(&str, ")");
+	if (!params || !str || (str && strlen(str))) {
+		hist_err("onmatch: Missing closing paramlist paren: ", params);
+		goto free;
+	}
+
+	ret = parse_action_params(params, data);
+	if (ret)
+		goto free;
+ out:
+	return data;
+ free:
+	onmatch_destroy(data);
+	data = ERR_PTR(ret);
+	goto out;
+}
+
+static int create_hitcount_val(struct hist_trigger_data *hist_data)
+{
+	hist_data->fields[HITCOUNT_IDX] =
+		create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL);
+	if (!hist_data->fields[HITCOUNT_IDX])
+		return -ENOMEM;
+
+	hist_data->n_vals++;
+	hist_data->n_fields++;
+
+	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __create_val_field(struct hist_trigger_data *hist_data,
+			      unsigned int val_idx,
+			      struct trace_event_file *file,
+			      char *var_name, char *field_str,
+			      unsigned long flags)
+{
+	struct hist_field *hist_field;
+	int ret = 0;
+
+	hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0);
+	if (IS_ERR(hist_field)) {
+		ret = PTR_ERR(hist_field);
 		goto out;
 	}
 
+	hist_data->fields[val_idx] = hist_field;
+
 	++hist_data->n_vals;
+	++hist_data->n_fields;
 
-	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX))
+	if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
 		ret = -EINVAL;
  out:
 	return ret;
 }
 
+static int create_val_field(struct hist_trigger_data *hist_data,
+			    unsigned int val_idx,
+			    struct trace_event_file *file,
+			    char *field_str)
+{
+	if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX))
+		return -EINVAL;
+
+	return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0);
+}
+
+static int create_var_field(struct hist_trigger_data *hist_data,
+			    unsigned int val_idx,
+			    struct trace_event_file *file,
+			    char *var_name, char *expr_str)
+{
+	unsigned long flags = 0;
+
+	if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX))
+		return -EINVAL;
+
+	if (find_var(hist_data, file, var_name) && !hist_data->remove) {
+		hist_err("Variable already defined: ", var_name);
+		return -EINVAL;
+	}
+
+	flags |= HIST_FIELD_FL_VAR;
+	hist_data->n_vars++;
+	if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX))
+		return -EINVAL;
+
+	return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags);
+}
+
 static int create_val_fields(struct hist_trigger_data *hist_data,
 			     struct trace_event_file *file)
 {
 	char *fields_str, *field_str;
-	unsigned int i, j;
+	unsigned int i, j = 1;
 	int ret;
 
 	ret = create_hitcount_val(hist_data);
@@ -533,12 +3910,15 @@ static int create_val_fields(struct hist_trigger_data *hist_data,
 		field_str = strsep(&fields_str, ",");
 		if (!field_str)
 			break;
+
 		if (strcmp(field_str, "hitcount") == 0)
 			continue;
+
 		ret = create_val_field(hist_data, j++, file, field_str);
 		if (ret)
 			goto out;
 	}
+
 	if (fields_str && (strcmp(fields_str, "hitcount") != 0))
 		ret = -EINVAL;
  out:
@@ -551,12 +3931,13 @@ static int create_key_field(struct hist_trigger_data *hist_data,
 			    struct trace_event_file *file,
 			    char *field_str)
 {
-	struct ftrace_event_field *field = NULL;
+	struct hist_field *hist_field = NULL;
+
 	unsigned long flags = 0;
 	unsigned int key_size;
 	int ret = 0;
 
-	if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX))
+	if (WARN_ON(key_idx >= HIST_FIELDS_MAX))
 		return -EINVAL;
 
 	flags |= HIST_FIELD_FL_KEY;
@@ -564,57 +3945,40 @@ static int create_key_field(struct hist_trigger_data *hist_data,
 	if (strcmp(field_str, "stacktrace") == 0) {
 		flags |= HIST_FIELD_FL_STACKTRACE;
 		key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH;
+		hist_field = create_hist_field(hist_data, NULL, flags, NULL);
 	} else {
-		char *field_name = strsep(&field_str, ".");
-
-		if (field_str) {
-			if (strcmp(field_str, "hex") == 0)
-				flags |= HIST_FIELD_FL_HEX;
-			else if (strcmp(field_str, "sym") == 0)
-				flags |= HIST_FIELD_FL_SYM;
-			else if (strcmp(field_str, "sym-offset") == 0)
-				flags |= HIST_FIELD_FL_SYM_OFFSET;
-			else if ((strcmp(field_str, "execname") == 0) &&
-				 (strcmp(field_name, "common_pid") == 0))
-				flags |= HIST_FIELD_FL_EXECNAME;
-			else if (strcmp(field_str, "syscall") == 0)
-				flags |= HIST_FIELD_FL_SYSCALL;
-			else if (strcmp(field_str, "log2") == 0)
-				flags |= HIST_FIELD_FL_LOG2;
-			else {
-				ret = -EINVAL;
-				goto out;
-			}
+		hist_field = parse_expr(hist_data, file, field_str, flags,
+					NULL, 0);
+		if (IS_ERR(hist_field)) {
+			ret = PTR_ERR(hist_field);
+			goto out;
 		}
 
-		field = trace_find_event_field(file->event_call, field_name);
-		if (!field || !field->size) {
+		if (hist_field->flags & HIST_FIELD_FL_VAR_REF) {
+			hist_err("Using variable references as keys not supported: ", field_str);
+			destroy_hist_field(hist_field, 0);
 			ret = -EINVAL;
 			goto out;
 		}
 
-		if (is_string_field(field))
-			key_size = MAX_FILTER_STR_VAL;
-		else
-			key_size = field->size;
+		key_size = hist_field->size;
 	}
 
-	hist_data->fields[key_idx] = create_hist_field(field, flags);
-	if (!hist_data->fields[key_idx]) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	hist_data->fields[key_idx] = hist_field;
 
 	key_size = ALIGN(key_size, sizeof(u64));
 	hist_data->fields[key_idx]->size = key_size;
 	hist_data->fields[key_idx]->offset = key_offset;
+
 	hist_data->key_size += key_size;
+
 	if (hist_data->key_size > HIST_KEY_SIZE_MAX) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	hist_data->n_keys++;
+	hist_data->n_fields++;
 
 	if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX))
 		return -EINVAL;
@@ -658,21 +4022,113 @@ static int create_key_fields(struct hist_trigger_data *hist_data,
 	return ret;
 }
 
+static int create_var_fields(struct hist_trigger_data *hist_data,
+			     struct trace_event_file *file)
+{
+	unsigned int i, j = hist_data->n_vals;
+	int ret = 0;
+
+	unsigned int n_vars = hist_data->attrs->var_defs.n_vars;
+
+	for (i = 0; i < n_vars; i++) {
+		char *var_name = hist_data->attrs->var_defs.name[i];
+		char *expr = hist_data->attrs->var_defs.expr[i];
+
+		ret = create_var_field(hist_data, j++, file, var_name, expr);
+		if (ret)
+			goto out;
+	}
+ out:
+	return ret;
+}
+
+static void free_var_defs(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) {
+		kfree(hist_data->attrs->var_defs.name[i]);
+		kfree(hist_data->attrs->var_defs.expr[i]);
+	}
+
+	hist_data->attrs->var_defs.n_vars = 0;
+}
+
+static int parse_var_defs(struct hist_trigger_data *hist_data)
+{
+	char *s, *str, *var_name, *field_str;
+	unsigned int i, j, n_vars = 0;
+	int ret = 0;
+
+	for (i = 0; i < hist_data->attrs->n_assignments; i++) {
+		str = hist_data->attrs->assignment_str[i];
+		for (j = 0; j < TRACING_MAP_VARS_MAX; j++) {
+			field_str = strsep(&str, ",");
+			if (!field_str)
+				break;
+
+			var_name = strsep(&field_str, "=");
+			if (!var_name || !field_str) {
+				hist_err("Malformed assignment: ", var_name);
+				ret = -EINVAL;
+				goto free;
+			}
+
+			if (n_vars == TRACING_MAP_VARS_MAX) {
+				hist_err("Too many variables defined: ", var_name);
+				ret = -EINVAL;
+				goto free;
+			}
+
+			s = kstrdup(var_name, GFP_KERNEL);
+			if (!s) {
+				ret = -ENOMEM;
+				goto free;
+			}
+			hist_data->attrs->var_defs.name[n_vars] = s;
+
+			s = kstrdup(field_str, GFP_KERNEL);
+			if (!s) {
+				kfree(hist_data->attrs->var_defs.name[n_vars]);
+				ret = -ENOMEM;
+				goto free;
+			}
+			hist_data->attrs->var_defs.expr[n_vars++] = s;
+
+			hist_data->attrs->var_defs.n_vars = n_vars;
+		}
+	}
+
+	return ret;
+ free:
+	free_var_defs(hist_data);
+
+	return ret;
+}
+
 static int create_hist_fields(struct hist_trigger_data *hist_data,
 			      struct trace_event_file *file)
 {
 	int ret;
 
+	ret = parse_var_defs(hist_data);
+	if (ret)
+		goto out;
+
 	ret = create_val_fields(hist_data, file);
 	if (ret)
 		goto out;
 
-	ret = create_key_fields(hist_data, file);
+	ret = create_var_fields(hist_data, file);
 	if (ret)
 		goto out;
 
-	hist_data->n_fields = hist_data->n_vals + hist_data->n_keys;
+	ret = create_key_fields(hist_data, file);
+	if (ret)
+		goto out;
  out:
+	free_var_defs(hist_data);
+
 	return ret;
 }
 
@@ -695,7 +4151,7 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
 	char *fields_str = hist_data->attrs->sort_key_str;
 	struct tracing_map_sort_key *sort_key;
 	int descending, ret = 0;
-	unsigned int i, j;
+	unsigned int i, j, k;
 
 	hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */
 
@@ -743,12 +4199,19 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
 			continue;
 		}
 
-		for (j = 1; j < hist_data->n_fields; j++) {
+		for (j = 1, k = 1; j < hist_data->n_fields; j++) {
+			unsigned int idx;
+
 			hist_field = hist_data->fields[j];
+			if (hist_field->flags & HIST_FIELD_FL_VAR)
+				continue;
+
+			idx = k++;
+
 			test_name = hist_field_name(hist_field, 0);
 
 			if (strcmp(field_name, test_name) == 0) {
-				sort_key->field_idx = j;
+				sort_key->field_idx = idx;
 				descending = is_descending(field_str);
 				if (descending < 0) {
 					ret = descending;
@@ -763,16 +4226,230 @@ static int create_sort_keys(struct hist_trigger_data *hist_data)
 			break;
 		}
 	}
+
 	hist_data->n_sort_keys = i;
  out:
 	return ret;
 }
 
+static void destroy_actions(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == action_trace)
+			onmatch_destroy(data);
+		else if (data->fn == onmax_save)
+			onmax_destroy(data);
+		else
+			kfree(data);
+	}
+}
+
+static int parse_actions(struct hist_trigger_data *hist_data)
+{
+	struct trace_array *tr = hist_data->event_file->tr;
+	struct action_data *data;
+	unsigned int i;
+	int ret = 0;
+	char *str;
+
+	for (i = 0; i < hist_data->attrs->n_actions; i++) {
+		str = hist_data->attrs->action_str[i];
+
+		if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) {
+			char *action_str = str + strlen("onmatch(");
+
+			data = onmatch_parse(tr, action_str);
+			if (IS_ERR(data)) {
+				ret = PTR_ERR(data);
+				break;
+			}
+			data->fn = action_trace;
+		} else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) {
+			char *action_str = str + strlen("onmax(");
+
+			data = onmax_parse(action_str);
+			if (IS_ERR(data)) {
+				ret = PTR_ERR(data);
+				break;
+			}
+			data->fn = onmax_save;
+		} else {
+			ret = -EINVAL;
+			break;
+		}
+
+		hist_data->actions[hist_data->n_actions++] = data;
+	}
+
+	return ret;
+}
+
+static int create_actions(struct hist_trigger_data *hist_data,
+			  struct trace_event_file *file)
+{
+	struct action_data *data;
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < hist_data->attrs->n_actions; i++) {
+		data = hist_data->actions[i];
+
+		if (data->fn == action_trace) {
+			ret = onmatch_create(hist_data, file, data);
+			if (ret)
+				return ret;
+		} else if (data->fn == onmax_save) {
+			ret = onmax_create(hist_data, data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void print_actions(struct seq_file *m,
+			  struct hist_trigger_data *hist_data,
+			  struct tracing_map_elt *elt)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == onmax_save)
+			onmax_print(m, hist_data, elt, data);
+	}
+}
+
+static void print_onmax_spec(struct seq_file *m,
+			     struct hist_trigger_data *hist_data,
+			     struct action_data *data)
+{
+	unsigned int i;
+
+	seq_puts(m, ":onmax(");
+	seq_printf(m, "%s", data->onmax.var_str);
+	seq_printf(m, ").%s(", data->onmax.fn_name);
+
+	for (i = 0; i < hist_data->n_max_vars; i++) {
+		seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name);
+		if (i < hist_data->n_max_vars - 1)
+			seq_puts(m, ",");
+	}
+	seq_puts(m, ")");
+}
+
+static void print_onmatch_spec(struct seq_file *m,
+			       struct hist_trigger_data *hist_data,
+			       struct action_data *data)
+{
+	unsigned int i;
+
+	seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system,
+		   data->onmatch.match_event);
+
+	seq_printf(m, "%s(", data->onmatch.synth_event->name);
+
+	for (i = 0; i < data->n_params; i++) {
+		if (i)
+			seq_puts(m, ",");
+		seq_printf(m, "%s", data->params[i]);
+	}
+
+	seq_puts(m, ")");
+}
+
+static bool actions_match(struct hist_trigger_data *hist_data,
+			  struct hist_trigger_data *hist_data_test)
+{
+	unsigned int i, j;
+
+	if (hist_data->n_actions != hist_data_test->n_actions)
+		return false;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+		struct action_data *data_test = hist_data_test->actions[i];
+
+		if (data->fn != data_test->fn)
+			return false;
+
+		if (data->n_params != data_test->n_params)
+			return false;
+
+		for (j = 0; j < data->n_params; j++) {
+			if (strcmp(data->params[j], data_test->params[j]) != 0)
+				return false;
+		}
+
+		if (data->fn == action_trace) {
+			if (strcmp(data->onmatch.synth_event_name,
+				   data_test->onmatch.synth_event_name) != 0)
+				return false;
+			if (strcmp(data->onmatch.match_event_system,
+				   data_test->onmatch.match_event_system) != 0)
+				return false;
+			if (strcmp(data->onmatch.match_event,
+				   data_test->onmatch.match_event) != 0)
+				return false;
+		} else if (data->fn == onmax_save) {
+			if (strcmp(data->onmax.var_str,
+				   data_test->onmax.var_str) != 0)
+				return false;
+			if (strcmp(data->onmax.fn_name,
+				   data_test->onmax.fn_name) != 0)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+
+static void print_actions_spec(struct seq_file *m,
+			       struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		struct action_data *data = hist_data->actions[i];
+
+		if (data->fn == action_trace)
+			print_onmatch_spec(m, hist_data, data);
+		else if (data->fn == onmax_save)
+			print_onmax_spec(m, hist_data, data);
+	}
+}
+
+static void destroy_field_var_hists(struct hist_trigger_data *hist_data)
+{
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_field_var_hists; i++) {
+		kfree(hist_data->field_var_hists[i]->cmd);
+		kfree(hist_data->field_var_hists[i]);
+	}
+}
+
 static void destroy_hist_data(struct hist_trigger_data *hist_data)
 {
+	if (!hist_data)
+		return;
+
 	destroy_hist_trigger_attrs(hist_data->attrs);
 	destroy_hist_fields(hist_data);
 	tracing_map_destroy(hist_data->map);
+
+	destroy_actions(hist_data);
+	destroy_field_vars(hist_data);
+	destroy_field_var_hists(hist_data);
+	destroy_synth_var_refs(hist_data);
+
 	kfree(hist_data);
 }
 
@@ -781,7 +4458,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
 	struct tracing_map *map = hist_data->map;
 	struct ftrace_event_field *field;
 	struct hist_field *hist_field;
-	int i, idx;
+	int i, idx = 0;
 
 	for_each_hist_field(i, hist_data) {
 		hist_field = hist_data->fields[i];
@@ -792,6 +4469,9 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
 
 			if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
 				cmp_fn = tracing_map_cmp_none;
+			else if (!field)
+				cmp_fn = tracing_map_cmp_num(hist_field->size,
+							     hist_field->is_signed);
 			else if (is_string_field(field))
 				cmp_fn = tracing_map_cmp_string;
 			else
@@ -800,36 +4480,29 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data)
 			idx = tracing_map_add_key_field(map,
 							hist_field->offset,
 							cmp_fn);
-
-		} else
+		} else if (!(hist_field->flags & HIST_FIELD_FL_VAR))
 			idx = tracing_map_add_sum_field(map);
 
 		if (idx < 0)
 			return idx;
-	}
-
-	return 0;
-}
-
-static bool need_tracing_map_ops(struct hist_trigger_data *hist_data)
-{
-	struct hist_field *key_field;
-	unsigned int i;
-
-	for_each_hist_key_field(i, hist_data) {
-		key_field = hist_data->fields[i];
 
-		if (key_field->flags & HIST_FIELD_FL_EXECNAME)
-			return true;
+		if (hist_field->flags & HIST_FIELD_FL_VAR) {
+			idx = tracing_map_add_var(map);
+			if (idx < 0)
+				return idx;
+			hist_field->var.idx = idx;
+			hist_field->var.hist_data = hist_data;
+		}
 	}
 
-	return false;
+	return 0;
 }
 
 static struct hist_trigger_data *
 create_hist_data(unsigned int map_bits,
 		 struct hist_trigger_attrs *attrs,
-		 struct trace_event_file *file)
+		 struct trace_event_file *file,
+		 bool remove)
 {
 	const struct tracing_map_ops *map_ops = NULL;
 	struct hist_trigger_data *hist_data;
@@ -840,6 +4513,12 @@ create_hist_data(unsigned int map_bits,
 		return ERR_PTR(-ENOMEM);
 
 	hist_data->attrs = attrs;
+	hist_data->remove = remove;
+	hist_data->event_file = file;
+
+	ret = parse_actions(hist_data);
+	if (ret)
+		goto free;
 
 	ret = create_hist_fields(hist_data, file);
 	if (ret)
@@ -849,8 +4528,7 @@ create_hist_data(unsigned int map_bits,
 	if (ret)
 		goto free;
 
-	if (need_tracing_map_ops(hist_data))
-		map_ops = &hist_trigger_elt_comm_ops;
+	map_ops = &hist_trigger_elt_data_ops;
 
 	hist_data->map = tracing_map_create(map_bits, hist_data->key_size,
 					    map_ops, hist_data);
@@ -863,12 +4541,6 @@ create_hist_data(unsigned int map_bits,
 	ret = create_tracing_map_fields(hist_data);
 	if (ret)
 		goto free;
-
-	ret = tracing_map_init(hist_data->map);
-	if (ret)
-		goto free;
-
-	hist_data->event_file = file;
  out:
 	return hist_data;
  free:
@@ -882,18 +4554,39 @@ create_hist_data(unsigned int map_bits,
 }
 
 static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
-				    struct tracing_map_elt *elt,
-				    void *rec)
+				    struct tracing_map_elt *elt, void *rec,
+				    struct ring_buffer_event *rbe,
+				    u64 *var_ref_vals)
 {
+	struct hist_elt_data *elt_data;
 	struct hist_field *hist_field;
-	unsigned int i;
+	unsigned int i, var_idx;
 	u64 hist_val;
 
+	elt_data = elt->private_data;
+	elt_data->var_ref_vals = var_ref_vals;
+
 	for_each_hist_val_field(i, hist_data) {
 		hist_field = hist_data->fields[i];
-		hist_val = hist_field->fn(hist_field, rec);
+		hist_val = hist_field->fn(hist_field, elt, rbe, rec);
+		if (hist_field->flags & HIST_FIELD_FL_VAR) {
+			var_idx = hist_field->var.idx;
+			tracing_map_set_var(elt, var_idx, hist_val);
+			continue;
+		}
 		tracing_map_update_sum(elt, i, hist_val);
 	}
+
+	for_each_hist_key_field(i, hist_data) {
+		hist_field = hist_data->fields[i];
+		if (hist_field->flags & HIST_FIELD_FL_VAR) {
+			hist_val = hist_field->fn(hist_field, elt, rbe, rec);
+			var_idx = hist_field->var.idx;
+			tracing_map_set_var(elt, var_idx, hist_val);
+		}
+	}
+
+	update_field_vars(hist_data, elt, rbe, rec);
 }
 
 static inline void add_to_key(char *compound_key, void *key,
@@ -920,15 +4613,31 @@ static inline void add_to_key(char *compound_key, void *key,
 	memcpy(compound_key + key_field->offset, key, size);
 }
 
-static void event_hist_trigger(struct event_trigger_data *data, void *rec)
+static void
+hist_trigger_actions(struct hist_trigger_data *hist_data,
+		     struct tracing_map_elt *elt, void *rec,
+		     struct ring_buffer_event *rbe, u64 *var_ref_vals)
+{
+	struct action_data *data;
+	unsigned int i;
+
+	for (i = 0; i < hist_data->n_actions; i++) {
+		data = hist_data->actions[i];
+		data->fn(hist_data, elt, rec, rbe, data, var_ref_vals);
+	}
+}
+
+static void event_hist_trigger(struct event_trigger_data *data, void *rec,
+			       struct ring_buffer_event *rbe)
 {
 	struct hist_trigger_data *hist_data = data->private_data;
 	bool use_compound_key = (hist_data->n_keys > 1);
 	unsigned long entries[HIST_STACKTRACE_DEPTH];
+	u64 var_ref_vals[TRACING_MAP_VARS_MAX];
 	char compound_key[HIST_KEY_SIZE_MAX];
+	struct tracing_map_elt *elt = NULL;
 	struct stack_trace stacktrace;
 	struct hist_field *key_field;
-	struct tracing_map_elt *elt;
 	u64 field_contents;
 	void *key = NULL;
 	unsigned int i;
@@ -949,7 +4658,7 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec)
 
 			key = entries;
 		} else {
-			field_contents = key_field->fn(key_field, rec);
+			field_contents = key_field->fn(key_field, elt, rbe, rec);
 			if (key_field->flags & HIST_FIELD_FL_STRING) {
 				key = (void *)(unsigned long)field_contents;
 				use_compound_key = true;
@@ -964,9 +4673,18 @@ static void event_hist_trigger(struct event_trigger_data *data, void *rec)
 	if (use_compound_key)
 		key = compound_key;
 
+	if (hist_data->n_var_refs &&
+	    !resolve_var_refs(hist_data, key, var_ref_vals, false))
+		return;
+
 	elt = tracing_map_insert(hist_data->map, key);
-	if (elt)
-		hist_trigger_elt_update(hist_data, elt, rec);
+	if (!elt)
+		return;
+
+	hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals);
+
+	if (resolve_var_refs(hist_data, key, var_ref_vals, true))
+		hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals);
 }
 
 static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -1023,7 +4741,13 @@ hist_trigger_entry_print(struct seq_file *m,
 			seq_printf(m, "%s: [%llx] %-55s", field_name,
 				   uval, str);
 		} else if (key_field->flags & HIST_FIELD_FL_EXECNAME) {
-			char *comm = elt->private_data;
+			struct hist_elt_data *elt_data = elt->private_data;
+			char *comm;
+
+			if (WARN_ON_ONCE(!elt_data))
+				return;
+
+			comm = elt_data->comm;
 
 			uval = *(u64 *)(key + key_field->offset);
 			seq_printf(m, "%s: %-16s[%10llu]", field_name,
@@ -1067,6 +4791,10 @@ hist_trigger_entry_print(struct seq_file *m,
 	for (i = 1; i < hist_data->n_vals; i++) {
 		field_name = hist_field_name(hist_data->fields[i], 0);
 
+		if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR ||
+		    hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
+			continue;
+
 		if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
 			seq_printf(m, "  %s: %10llx", field_name,
 				   tracing_map_read_sum(elt, i));
@@ -1076,6 +4804,8 @@ hist_trigger_entry_print(struct seq_file *m,
 		}
 	}
 
+	print_actions(m, hist_data, elt);
+
 	seq_puts(m, "\n");
 }
 
@@ -1144,6 +4874,11 @@ static int hist_show(struct seq_file *m, void *v)
 			hist_trigger_show(m, data, n++);
 	}
 
+	if (have_hist_err()) {
+		seq_printf(m, "\nERROR: %s\n", hist_err_str);
+		seq_printf(m, "  Last command: %s\n", last_hist_cmd);
+	}
+
  out_unlock:
 	mutex_unlock(&event_mutex);
 
@@ -1162,37 +4897,22 @@ const struct file_operations event_hist_fops = {
 	.release = single_release,
 };
 
-static const char *get_hist_field_flags(struct hist_field *hist_field)
-{
-	const char *flags_str = NULL;
-
-	if (hist_field->flags & HIST_FIELD_FL_HEX)
-		flags_str = "hex";
-	else if (hist_field->flags & HIST_FIELD_FL_SYM)
-		flags_str = "sym";
-	else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET)
-		flags_str = "sym-offset";
-	else if (hist_field->flags & HIST_FIELD_FL_EXECNAME)
-		flags_str = "execname";
-	else if (hist_field->flags & HIST_FIELD_FL_SYSCALL)
-		flags_str = "syscall";
-	else if (hist_field->flags & HIST_FIELD_FL_LOG2)
-		flags_str = "log2";
-
-	return flags_str;
-}
-
 static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
 {
 	const char *field_name = hist_field_name(hist_field, 0);
 
-	seq_printf(m, "%s", field_name);
-	if (hist_field->flags) {
-		const char *flags_str = get_hist_field_flags(hist_field);
-
-		if (flags_str)
-			seq_printf(m, ".%s", flags_str);
-	}
+	if (hist_field->var.name)
+		seq_printf(m, "%s=", hist_field->var.name);
+
+	if (hist_field->flags & HIST_FIELD_FL_CPU)
+		seq_puts(m, "cpu");
+	else if (field_name) {
+		if (hist_field->flags & HIST_FIELD_FL_VAR_REF ||
+		    hist_field->flags & HIST_FIELD_FL_ALIAS)
+			seq_putc(m, '$');
+		seq_printf(m, "%s", field_name);
+	} else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
+		seq_puts(m, "common_timestamp");
 }
 
 static int event_hist_trigger_print(struct seq_file *m,
@@ -1200,7 +4920,8 @@ static int event_hist_trigger_print(struct seq_file *m,
 				    struct event_trigger_data *data)
 {
 	struct hist_trigger_data *hist_data = data->private_data;
-	struct hist_field *key_field;
+	struct hist_field *field;
+	bool have_var = false;
 	unsigned int i;
 
 	seq_puts(m, "hist:");
@@ -1211,25 +4932,47 @@ static int event_hist_trigger_print(struct seq_file *m,
 	seq_puts(m, "keys=");
 
 	for_each_hist_key_field(i, hist_data) {
-		key_field = hist_data->fields[i];
+		field = hist_data->fields[i];
 
 		if (i > hist_data->n_vals)
 			seq_puts(m, ",");
 
-		if (key_field->flags & HIST_FIELD_FL_STACKTRACE)
+		if (field->flags & HIST_FIELD_FL_STACKTRACE)
 			seq_puts(m, "stacktrace");
 		else
-			hist_field_print(m, key_field);
+			hist_field_print(m, field);
 	}
 
 	seq_puts(m, ":vals=");
 
 	for_each_hist_val_field(i, hist_data) {
+		field = hist_data->fields[i];
+		if (field->flags & HIST_FIELD_FL_VAR) {
+			have_var = true;
+			continue;
+		}
+
 		if (i == HITCOUNT_IDX)
 			seq_puts(m, "hitcount");
 		else {
 			seq_puts(m, ",");
-			hist_field_print(m, hist_data->fields[i]);
+			hist_field_print(m, field);
+		}
+	}
+
+	if (have_var) {
+		unsigned int n = 0;
+
+		seq_puts(m, ":");
+
+		for_each_hist_val_field(i, hist_data) {
+			field = hist_data->fields[i];
+
+			if (field->flags & HIST_FIELD_FL_VAR) {
+				if (n++)
+					seq_puts(m, ",");
+				hist_field_print(m, field);
+			}
 		}
 	}
 
@@ -1237,28 +4980,36 @@ static int event_hist_trigger_print(struct seq_file *m,
 
 	for (i = 0; i < hist_data->n_sort_keys; i++) {
 		struct tracing_map_sort_key *sort_key;
+		unsigned int idx, first_key_idx;
+
+		/* skip VAR vals */
+		first_key_idx = hist_data->n_vals - hist_data->n_vars;
 
 		sort_key = &hist_data->sort_keys[i];
+		idx = sort_key->field_idx;
+
+		if (WARN_ON(idx >= HIST_FIELDS_MAX))
+			return -EINVAL;
 
 		if (i > 0)
 			seq_puts(m, ",");
 
-		if (sort_key->field_idx == HITCOUNT_IDX)
+		if (idx == HITCOUNT_IDX)
 			seq_puts(m, "hitcount");
 		else {
-			unsigned int idx = sort_key->field_idx;
-
-			if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX))
-				return -EINVAL;
-
+			if (idx >= first_key_idx)
+				idx += hist_data->n_vars;
 			hist_field_print(m, hist_data->fields[idx]);
 		}
 
 		if (sort_key->descending)
 			seq_puts(m, ".descending");
 	}
-
 	seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
+	if (hist_data->enable_timestamps)
+		seq_printf(m, ":clock=%s", hist_data->attrs->clock);
+
+	print_actions_spec(m, hist_data);
 
 	if (data->filter_str)
 		seq_printf(m, " if %s", data->filter_str);
@@ -1286,6 +5037,21 @@ static int event_hist_trigger_init(struct event_trigger_ops *ops,
 	return 0;
 }
 
+static void unregister_field_var_hists(struct hist_trigger_data *hist_data)
+{
+	struct trace_event_file *file;
+	unsigned int i;
+	char *cmd;
+	int ret;
+
+	for (i = 0; i < hist_data->n_field_var_hists; i++) {
+		file = hist_data->field_var_hists[i]->hist_data->event_file;
+		cmd = hist_data->field_var_hists[i]->cmd;
+		ret = event_hist_trigger_func(&trigger_hist_cmd, file,
+					      "!hist", "hist", cmd);
+	}
+}
+
 static void event_hist_trigger_free(struct event_trigger_ops *ops,
 				    struct event_trigger_data *data)
 {
@@ -1298,7 +5064,13 @@ static void event_hist_trigger_free(struct event_trigger_ops *ops,
 	if (!data->ref) {
 		if (data->name)
 			del_named_trigger(data);
+
 		trigger_data_free(data);
+
+		remove_hist_vars(hist_data);
+
+		unregister_field_var_hists(hist_data);
+
 		destroy_hist_data(hist_data);
 	}
 }
@@ -1425,6 +5197,15 @@ static bool hist_trigger_match(struct event_trigger_data *data,
 			return false;
 		if (key_field->offset != key_field_test->offset)
 			return false;
+		if (key_field->size != key_field_test->size)
+			return false;
+		if (key_field->is_signed != key_field_test->is_signed)
+			return false;
+		if (!!key_field->var.name != !!key_field_test->var.name)
+			return false;
+		if (key_field->var.name &&
+		    strcmp(key_field->var.name, key_field_test->var.name) != 0)
+			return false;
 	}
 
 	for (i = 0; i < hist_data->n_sort_keys; i++) {
@@ -1440,6 +5221,9 @@ static bool hist_trigger_match(struct event_trigger_data *data,
 	    (strcmp(data->filter_str, data_test->filter_str) != 0))
 		return false;
 
+	if (!actions_match(hist_data, hist_data_test))
+		return false;
+
 	return true;
 }
 
@@ -1456,6 +5240,7 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 		if (named_data) {
 			if (!hist_trigger_match(data, named_data, named_data,
 						true)) {
+				hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name);
 				ret = -EINVAL;
 				goto out;
 			}
@@ -1475,13 +5260,16 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 				test->paused = false;
 			else if (hist_data->attrs->clear)
 				hist_clear(test);
-			else
+			else {
+				hist_err("Hist trigger already exists", NULL);
 				ret = -EEXIST;
+			}
 			goto out;
 		}
 	}
  new:
 	if (hist_data->attrs->cont || hist_data->attrs->clear) {
+		hist_err("Can't clear or continue a nonexistent hist trigger", NULL);
 		ret = -ENOENT;
 		goto out;
 	}
@@ -1490,7 +5278,6 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 		data->paused = true;
 
 	if (named_data) {
-		destroy_hist_data(data->private_data);
 		data->private_data = named_data->private_data;
 		set_named_trigger_data(data, named_data);
 		data->ops = &event_hist_trigger_named_ops;
@@ -1502,8 +5289,32 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 			goto out;
 	}
 
-	list_add_rcu(&data->list, &file->triggers);
+	if (hist_data->enable_timestamps) {
+		char *clock = hist_data->attrs->clock;
+
+		ret = tracing_set_clock(file->tr, hist_data->attrs->clock);
+		if (ret) {
+			hist_err("Couldn't set trace_clock: ", clock);
+			goto out;
+		}
+
+		tracing_set_time_stamp_abs(file->tr, true);
+	}
+
+	if (named_data)
+		destroy_hist_data(hist_data);
+
 	ret++;
+ out:
+	return ret;
+}
+
+static int hist_trigger_enable(struct event_trigger_data *data,
+			       struct trace_event_file *file)
+{
+	int ret = 0;
+
+	list_add_tail_rcu(&data->list, &file->triggers);
 
 	update_cond_flag(file);
 
@@ -1512,10 +5323,55 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops,
 		update_cond_flag(file);
 		ret--;
 	}
- out:
+
 	return ret;
 }
 
+static bool have_hist_trigger_match(struct event_trigger_data *data,
+				    struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data = data->private_data;
+	struct event_trigger_data *test, *named_data = NULL;
+	bool match = false;
+
+	if (hist_data->attrs->name)
+		named_data = find_named_trigger(hist_data->attrs->name);
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			if (hist_trigger_match(data, test, named_data, false)) {
+				match = true;
+				break;
+			}
+		}
+	}
+
+	return match;
+}
+
+static bool hist_trigger_check_refs(struct event_trigger_data *data,
+				    struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data = data->private_data;
+	struct event_trigger_data *test, *named_data = NULL;
+
+	if (hist_data->attrs->name)
+		named_data = find_named_trigger(hist_data->attrs->name);
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			if (!hist_trigger_match(data, test, named_data, false))
+				continue;
+			hist_data = test->private_data;
+			if (check_var_refs(hist_data))
+				return true;
+			break;
+		}
+	}
+
+	return false;
+}
+
 static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
 				    struct event_trigger_data *data,
 				    struct trace_event_file *file)
@@ -1541,17 +5397,55 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops,
 
 	if (unregistered && test->ops->free)
 		test->ops->free(test->ops, test);
+
+	if (hist_data->enable_timestamps) {
+		if (!hist_data->remove || unregistered)
+			tracing_set_time_stamp_abs(file->tr, false);
+	}
+}
+
+static bool hist_file_check_refs(struct trace_event_file *file)
+{
+	struct hist_trigger_data *hist_data;
+	struct event_trigger_data *test;
+
+	list_for_each_entry_rcu(test, &file->triggers, list) {
+		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			hist_data = test->private_data;
+			if (check_var_refs(hist_data))
+				return true;
+		}
+	}
+
+	return false;
 }
 
 static void hist_unreg_all(struct trace_event_file *file)
 {
 	struct event_trigger_data *test, *n;
+	struct hist_trigger_data *hist_data;
+	struct synth_event *se;
+	const char *se_name;
+
+	if (hist_file_check_refs(file))
+		return;
 
 	list_for_each_entry_safe(test, n, &file->triggers, list) {
 		if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+			hist_data = test->private_data;
 			list_del_rcu(&test->list);
 			trace_event_trigger_enable_disable(file, 0);
+
+			mutex_lock(&synth_event_mutex);
+			se_name = trace_event_name(file->event_call);
+			se = find_synth_event(se_name);
+			if (se)
+				se->ref--;
+			mutex_unlock(&synth_event_mutex);
+
 			update_cond_flag(file);
+			if (hist_data->enable_timestamps)
+				tracing_set_time_stamp_abs(file->tr, false);
 			if (test->ops->free)
 				test->ops->free(test->ops, test);
 		}
@@ -1567,16 +5461,54 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 	struct hist_trigger_attrs *attrs;
 	struct event_trigger_ops *trigger_ops;
 	struct hist_trigger_data *hist_data;
-	char *trigger;
+	struct synth_event *se;
+	const char *se_name;
+	bool remove = false;
+	char *trigger, *p;
 	int ret = 0;
 
+	if (glob && strlen(glob)) {
+		last_cmd_set(param);
+		hist_err_clear();
+	}
+
 	if (!param)
 		return -EINVAL;
 
-	/* separate the trigger from the filter (k:v [if filter]) */
-	trigger = strsep(&param, " \t");
-	if (!trigger)
-		return -EINVAL;
+	if (glob[0] == '!')
+		remove = true;
+
+	/*
+	 * separate the trigger from the filter (k:v [if filter])
+	 * allowing for whitespace in the trigger
+	 */
+	p = trigger = param;
+	do {
+		p = strstr(p, "if");
+		if (!p)
+			break;
+		if (p == param)
+			return -EINVAL;
+		if (*(p - 1) != ' ' && *(p - 1) != '\t') {
+			p++;
+			continue;
+		}
+		if (p >= param + strlen(param) - strlen("if") - 1)
+			return -EINVAL;
+		if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') {
+			p++;
+			continue;
+		}
+		break;
+	} while (p);
+
+	if (!p)
+		param = NULL;
+	else {
+		*(p - 1) = '\0';
+		param = strstrip(p);
+		trigger = strstrip(trigger);
+	}
 
 	attrs = parse_hist_trigger_attrs(trigger);
 	if (IS_ERR(attrs))
@@ -1585,7 +5517,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 	if (attrs->map_bits)
 		hist_trigger_bits = attrs->map_bits;
 
-	hist_data = create_hist_data(hist_trigger_bits, attrs, file);
+	hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove);
 	if (IS_ERR(hist_data)) {
 		destroy_hist_trigger_attrs(attrs);
 		return PTR_ERR(hist_data);
@@ -1593,10 +5525,11 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 
 	trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
 
-	ret = -ENOMEM;
 	trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL);
-	if (!trigger_data)
+	if (!trigger_data) {
+		ret = -ENOMEM;
 		goto out_free;
+	}
 
 	trigger_data->count = -1;
 	trigger_data->ops = trigger_ops;
@@ -1614,8 +5547,24 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 			goto out_free;
 	}
 
-	if (glob[0] == '!') {
+	if (remove) {
+		if (!have_hist_trigger_match(trigger_data, file))
+			goto out_free;
+
+		if (hist_trigger_check_refs(trigger_data, file)) {
+			ret = -EBUSY;
+			goto out_free;
+		}
+
 		cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
+
+		mutex_lock(&synth_event_mutex);
+		se_name = trace_event_name(file->event_call);
+		se = find_synth_event(se_name);
+		if (se)
+			se->ref--;
+		mutex_unlock(&synth_event_mutex);
+
 		ret = 0;
 		goto out_free;
 	}
@@ -1632,14 +5581,47 @@ static int event_hist_trigger_func(struct event_command *cmd_ops,
 		goto out_free;
 	} else if (ret < 0)
 		goto out_free;
+
+	if (get_named_trigger_data(trigger_data))
+		goto enable;
+
+	if (has_hist_vars(hist_data))
+		save_hist_vars(hist_data);
+
+	ret = create_actions(hist_data, file);
+	if (ret)
+		goto out_unreg;
+
+	ret = tracing_map_init(hist_data->map);
+	if (ret)
+		goto out_unreg;
+enable:
+	ret = hist_trigger_enable(trigger_data, file);
+	if (ret)
+		goto out_unreg;
+
+	mutex_lock(&synth_event_mutex);
+	se_name = trace_event_name(file->event_call);
+	se = find_synth_event(se_name);
+	if (se)
+		se->ref++;
+	mutex_unlock(&synth_event_mutex);
+
 	/* Just return zero, not the number of registered triggers */
 	ret = 0;
  out:
+	if (ret == 0)
+		hist_err_clear();
+
 	return ret;
+ out_unreg:
+	cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file);
  out_free:
 	if (cmd_ops->set_filter)
 		cmd_ops->set_filter(NULL, trigger_data, NULL);
 
+	remove_hist_vars(hist_data);
+
 	kfree(trigger_data);
 
 	destroy_hist_data(hist_data);
@@ -1669,7 +5651,8 @@ __init int register_trigger_hist_cmd(void)
 }
 
 static void
-hist_enable_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_trigger(struct event_trigger_data *data, void *rec,
+		    struct ring_buffer_event *event)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 	struct event_trigger_data *test;
@@ -1685,7 +5668,8 @@ hist_enable_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
+hist_enable_count_trigger(struct event_trigger_data *data, void *rec,
+			  struct ring_buffer_event *event)
 {
 	if (!data->count)
 		return;
@@ -1693,7 +5677,7 @@ hist_enable_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	hist_enable_trigger(data, rec);
+	hist_enable_trigger(data, rec, event);
 }
 
 static struct event_trigger_ops hist_enable_trigger_ops = {
@@ -1798,3 +5782,31 @@ __init int register_trigger_hist_enable_disable_cmds(void)
 
 	return ret;
 }
+
+static __init int trace_events_hist_init(void)
+{
+	struct dentry *entry = NULL;
+	struct dentry *d_tracer;
+	int err = 0;
+
+	d_tracer = tracing_init_dentry();
+	if (IS_ERR(d_tracer)) {
+		err = PTR_ERR(d_tracer);
+		goto err;
+	}
+
+	entry = tracefs_create_file("synthetic_events", 0644, d_tracer,
+				    NULL, &synth_events_fops);
+	if (!entry) {
+		err = -ENODEV;
+		goto err;
+	}
+
+	return err;
+ err:
+	pr_warn("Could not create tracefs 'synthetic_events' entry\n");
+
+	return err;
+}
+
+fs_initcall(trace_events_hist_init);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 87411482a46f..d251cabcf69a 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -63,7 +63,8 @@ void trigger_data_free(struct event_trigger_data *data)
  * any trigger that should be deferred, ETT_NONE if nothing to defer.
  */
 enum event_trigger_type
-event_triggers_call(struct trace_event_file *file, void *rec)
+event_triggers_call(struct trace_event_file *file, void *rec,
+		    struct ring_buffer_event *event)
 {
 	struct event_trigger_data *data;
 	enum event_trigger_type tt = ETT_NONE;
@@ -76,7 +77,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
 		if (data->paused)
 			continue;
 		if (!rec) {
-			data->ops->func(data, rec);
+			data->ops->func(data, rec, event);
 			continue;
 		}
 		filter = rcu_dereference_sched(data->filter);
@@ -86,7 +87,7 @@ event_triggers_call(struct trace_event_file *file, void *rec)
 			tt |= data->cmd_ops->trigger_type;
 			continue;
 		}
-		data->ops->func(data, rec);
+		data->ops->func(data, rec, event);
 	}
 	return tt;
 }
@@ -108,7 +109,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
 void
 event_triggers_post_call(struct trace_event_file *file,
 			 enum event_trigger_type tt,
-			 void *rec)
+			 void *rec, struct ring_buffer_event *event)
 {
 	struct event_trigger_data *data;
 
@@ -116,7 +117,7 @@ event_triggers_post_call(struct trace_event_file *file,
 		if (data->paused)
 			continue;
 		if (data->cmd_ops->trigger_type & tt)
-			data->ops->func(data, rec);
+			data->ops->func(data, rec, event);
 	}
 }
 EXPORT_SYMBOL_GPL(event_triggers_post_call);
@@ -908,8 +909,15 @@ void set_named_trigger_data(struct event_trigger_data *data,
 	data->named_data = named_data;
 }
 
+struct event_trigger_data *
+get_named_trigger_data(struct event_trigger_data *data)
+{
+	return data->named_data;
+}
+
 static void
-traceon_trigger(struct event_trigger_data *data, void *rec)
+traceon_trigger(struct event_trigger_data *data, void *rec,
+		struct ring_buffer_event *event)
 {
 	if (tracing_is_on())
 		return;
@@ -918,7 +926,8 @@ traceon_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceon_count_trigger(struct event_trigger_data *data, void *rec)
+traceon_count_trigger(struct event_trigger_data *data, void *rec,
+		      struct ring_buffer_event *event)
 {
 	if (tracing_is_on())
 		return;
@@ -933,7 +942,8 @@ traceon_count_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceoff_trigger(struct event_trigger_data *data, void *rec)
+traceoff_trigger(struct event_trigger_data *data, void *rec,
+		 struct ring_buffer_event *event)
 {
 	if (!tracing_is_on())
 		return;
@@ -942,7 +952,8 @@ traceoff_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-traceoff_count_trigger(struct event_trigger_data *data, void *rec)
+traceoff_count_trigger(struct event_trigger_data *data, void *rec,
+		       struct ring_buffer_event *event)
 {
 	if (!tracing_is_on())
 		return;
@@ -1039,13 +1050,15 @@ static struct event_command trigger_traceoff_cmd = {
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 static void
-snapshot_trigger(struct event_trigger_data *data, void *rec)
+snapshot_trigger(struct event_trigger_data *data, void *rec,
+		 struct ring_buffer_event *event)
 {
 	tracing_snapshot();
 }
 
 static void
-snapshot_count_trigger(struct event_trigger_data *data, void *rec)
+snapshot_count_trigger(struct event_trigger_data *data, void *rec,
+		       struct ring_buffer_event *event)
 {
 	if (!data->count)
 		return;
@@ -1053,7 +1066,7 @@ snapshot_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	snapshot_trigger(data, rec);
+	snapshot_trigger(data, rec, event);
 }
 
 static int
@@ -1141,13 +1154,15 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
 #endif
 
 static void
-stacktrace_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_trigger(struct event_trigger_data *data, void *rec,
+		   struct ring_buffer_event *event)
 {
 	trace_dump_stack(STACK_SKIP);
 }
 
 static void
-stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
+stacktrace_count_trigger(struct event_trigger_data *data, void *rec,
+			 struct ring_buffer_event *event)
 {
 	if (!data->count)
 		return;
@@ -1155,7 +1170,7 @@ stacktrace_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	stacktrace_trigger(data, rec);
+	stacktrace_trigger(data, rec, event);
 }
 
 static int
@@ -1217,7 +1232,8 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void)
 }
 
 static void
-event_enable_trigger(struct event_trigger_data *data, void *rec)
+event_enable_trigger(struct event_trigger_data *data, void *rec,
+		     struct ring_buffer_event *event)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1228,7 +1244,8 @@ event_enable_trigger(struct event_trigger_data *data, void *rec)
 }
 
 static void
-event_enable_count_trigger(struct event_trigger_data *data, void *rec)
+event_enable_count_trigger(struct event_trigger_data *data, void *rec,
+			   struct ring_buffer_event *event)
 {
 	struct enable_trigger_data *enable_data = data->private_data;
 
@@ -1242,7 +1259,7 @@ event_enable_count_trigger(struct event_trigger_data *data, void *rec)
 	if (data->count != -1)
 		(data->count)--;
 
-	event_enable_trigger(data, rec);
+	event_enable_trigger(data, rec, event);
 }
 
 int event_enable_trigger_print(struct seq_file *m,
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 07e75344725b..5cadb1b8b5fe 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -66,6 +66,73 @@ u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i)
 	return (u64)atomic64_read(&elt->fields[i].sum);
 }
 
+/**
+ * tracing_map_set_var - Assign a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ * @n: The value to assign
+ *
+ * Assign n to variable i associated with the specified tracing_map_elt
+ * instance.  The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.
+ */
+void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n)
+{
+	atomic64_set(&elt->vars[i], n);
+	elt->var_set[i] = true;
+}
+
+/**
+ * tracing_map_var_set - Return whether or not a variable has been set
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Return true if the variable has been set, false otherwise.  The
+ * index i is the index returned by the call to tracing_map_add_var()
+ * when the tracing map was set up.
+ */
+bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i)
+{
+	return elt->var_set[i];
+}
+
+/**
+ * tracing_map_read_var - Return the value of a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance.  The index i is the index returned by the
+ * call to tracing_map_add_var() when the tracing map was set
+ * up.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i)
+{
+	return (u64)atomic64_read(&elt->vars[i]);
+}
+
+/**
+ * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field
+ * @elt: The tracing_map_elt
+ * @i: The index of the given variable associated with the tracing_map_elt
+ *
+ * Retrieve the value of the variable i associated with the specified
+ * tracing_map_elt instance, and reset the variable to the 'not set'
+ * state.  The index i is the index returned by the call to
+ * tracing_map_add_var() when the tracing map was set up.  The reset
+ * essentially makes the variable a read-once variable if it's only
+ * accessed using this function.
+ *
+ * Return: The variable value associated with field i for elt.
+ */
+u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
+{
+	elt->var_set[i] = false;
+	return (u64)atomic64_read(&elt->vars[i]);
+}
+
 int tracing_map_cmp_string(void *val_a, void *val_b)
 {
 	char *a = val_a;
@@ -171,6 +238,28 @@ int tracing_map_add_sum_field(struct tracing_map *map)
 }
 
 /**
+ * tracing_map_add_var - Add a field describing a tracing_map var
+ * @map: The tracing_map
+ *
+ * Add a var to the map and return the index identifying it in the map
+ * and associated tracing_map_elts.  This is the index used for
+ * instance to update a var for a particular tracing_map_elt using
+ * tracing_map_update_var() or reading it via tracing_map_read_var().
+ *
+ * Return: The index identifying the var in the map and associated
+ * tracing_map_elts, or -EINVAL on error.
+ */
+int tracing_map_add_var(struct tracing_map *map)
+{
+	int ret = -EINVAL;
+
+	if (map->n_vars < TRACING_MAP_VARS_MAX)
+		ret = map->n_vars++;
+
+	return ret;
+}
+
+/**
  * tracing_map_add_key_field - Add a field describing a tracing_map key
  * @map: The tracing_map
  * @offset: The offset within the key
@@ -280,6 +369,11 @@ static void tracing_map_elt_clear(struct tracing_map_elt *elt)
 		if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64)
 			atomic64_set(&elt->fields[i].sum, 0);
 
+	for (i = 0; i < elt->map->n_vars; i++) {
+		atomic64_set(&elt->vars[i], 0);
+		elt->var_set[i] = false;
+	}
+
 	if (elt->map->ops && elt->map->ops->elt_clear)
 		elt->map->ops->elt_clear(elt);
 }
@@ -306,6 +400,8 @@ static void tracing_map_elt_free(struct tracing_map_elt *elt)
 	if (elt->map->ops && elt->map->ops->elt_free)
 		elt->map->ops->elt_free(elt);
 	kfree(elt->fields);
+	kfree(elt->vars);
+	kfree(elt->var_set);
 	kfree(elt->key);
 	kfree(elt);
 }
@@ -333,6 +429,18 @@ static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map)
 		goto free;
 	}
 
+	elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL);
+	if (!elt->vars) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL);
+	if (!elt->var_set) {
+		err = -ENOMEM;
+		goto free;
+	}
+
 	tracing_map_elt_init_fields(elt);
 
 	if (map->ops && map->ops->elt_alloc) {
@@ -414,7 +522,9 @@ static inline struct tracing_map_elt *
 __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 {
 	u32 idx, key_hash, test_key;
+	int dup_try = 0;
 	struct tracing_map_entry *entry;
+	struct tracing_map_elt *val;
 
 	key_hash = jhash(key, map->key_size, 0);
 	if (key_hash == 0)
@@ -426,11 +536,33 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 		entry = TRACING_MAP_ENTRY(map->map, idx);
 		test_key = entry->key;
 
-		if (test_key && test_key == key_hash && entry->val &&
-		    keys_match(key, entry->val->key, map->key_size)) {
-			if (!lookup_only)
-				atomic64_inc(&map->hits);
-			return entry->val;
+		if (test_key && test_key == key_hash) {
+			val = READ_ONCE(entry->val);
+			if (val &&
+			    keys_match(key, val->key, map->key_size)) {
+				if (!lookup_only)
+					atomic64_inc(&map->hits);
+				return val;
+			} else if (unlikely(!val)) {
+				/*
+				 * The key is present. But, val (pointer to elt
+				 * struct) is still NULL. which means some other
+				 * thread is in the process of inserting an
+				 * element.
+				 *
+				 * On top of that, it's key_hash is same as the
+				 * one being inserted right now. So, it's
+				 * possible that the element has the same
+				 * key as well.
+				 */
+
+				dup_try++;
+				if (dup_try > map->map_size) {
+					atomic64_inc(&map->drops);
+					break;
+				}
+				continue;
+			}
 		}
 
 		if (!test_key) {
@@ -452,6 +584,13 @@ __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
 				atomic64_inc(&map->hits);
 
 				return entry->val;
+			} else {
+				/*
+				 * cmpxchg() failed. Loop around once
+				 * more to check what key was inserted.
+				 */
+				dup_try++;
+				continue;
 			}
 		}
 
@@ -816,67 +955,15 @@ create_sort_entry(void *key, struct tracing_map_elt *elt)
 	return sort_entry;
 }
 
-static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt)
-{
-	struct tracing_map_elt *dup_elt;
-	unsigned int i;
-
-	dup_elt = tracing_map_elt_alloc(elt->map);
-	if (IS_ERR(dup_elt))
-		return NULL;
-
-	if (elt->map->ops && elt->map->ops->elt_copy)
-		elt->map->ops->elt_copy(dup_elt, elt);
-
-	dup_elt->private_data = elt->private_data;
-	memcpy(dup_elt->key, elt->key, elt->map->key_size);
-
-	for (i = 0; i < elt->map->n_fields; i++) {
-		atomic64_set(&dup_elt->fields[i].sum,
-			     atomic64_read(&elt->fields[i].sum));
-		dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn;
-	}
-
-	return dup_elt;
-}
-
-static int merge_dup(struct tracing_map_sort_entry **sort_entries,
-		     unsigned int target, unsigned int dup)
-{
-	struct tracing_map_elt *target_elt, *elt;
-	bool first_dup = (target - dup) == 1;
-	int i;
-
-	if (first_dup) {
-		elt = sort_entries[target]->elt;
-		target_elt = copy_elt(elt);
-		if (!target_elt)
-			return -ENOMEM;
-		sort_entries[target]->elt = target_elt;
-		sort_entries[target]->elt_copied = true;
-	} else
-		target_elt = sort_entries[target]->elt;
-
-	elt = sort_entries[dup]->elt;
-
-	for (i = 0; i < elt->map->n_fields; i++)
-		atomic64_add(atomic64_read(&elt->fields[i].sum),
-			     &target_elt->fields[i].sum);
-
-	sort_entries[dup]->dup = true;
-
-	return 0;
-}
-
-static int merge_dups(struct tracing_map_sort_entry **sort_entries,
+static void detect_dups(struct tracing_map_sort_entry **sort_entries,
 		      int n_entries, unsigned int key_size)
 {
 	unsigned int dups = 0, total_dups = 0;
-	int err, i, j;
+	int i;
 	void *key;
 
 	if (n_entries < 2)
-		return total_dups;
+		return;
 
 	sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *),
 	     (int (*)(const void *, const void *))cmp_entries_dup, NULL);
@@ -885,30 +972,14 @@ static int merge_dups(struct tracing_map_sort_entry **sort_entries,
 	for (i = 1; i < n_entries; i++) {
 		if (!memcmp(sort_entries[i]->key, key, key_size)) {
 			dups++; total_dups++;
-			err = merge_dup(sort_entries, i - dups, i);
-			if (err)
-				return err;
 			continue;
 		}
 		key = sort_entries[i]->key;
 		dups = 0;
 	}
 
-	if (!total_dups)
-		return total_dups;
-
-	for (i = 0, j = 0; i < n_entries; i++) {
-		if (!sort_entries[i]->dup) {
-			sort_entries[j] = sort_entries[i];
-			if (j++ != i)
-				sort_entries[i] = NULL;
-		} else {
-			destroy_sort_entry(sort_entries[i]);
-			sort_entries[i] = NULL;
-		}
-	}
-
-	return total_dups;
+	WARN_ONCE(total_dups > 0,
+		  "Duplicates detected: %d\n", total_dups);
 }
 
 static bool is_key(struct tracing_map *map, unsigned int field_idx)
@@ -1034,10 +1105,7 @@ int tracing_map_sort_entries(struct tracing_map *map,
 		return 1;
 	}
 
-	ret = merge_dups(entries, n_entries, map->key_size);
-	if (ret < 0)
-		goto free;
-	n_entries -= ret;
+	detect_dups(entries, n_entries, map->key_size);
 
 	if (is_key(map, sort_keys[0].field_idx))
 		cmp_entries_fn = cmp_entries_key;
diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h
index 5b5bbf8ae550..053eb92b2d31 100644
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -10,6 +10,7 @@
 #define TRACING_MAP_VALS_MAX		3
 #define TRACING_MAP_FIELDS_MAX		(TRACING_MAP_KEYS_MAX + \
 					 TRACING_MAP_VALS_MAX)
+#define TRACING_MAP_VARS_MAX		16
 #define TRACING_MAP_SORT_KEYS_MAX	2
 
 typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b);
@@ -137,6 +138,8 @@ struct tracing_map_field {
 struct tracing_map_elt {
 	struct tracing_map		*map;
 	struct tracing_map_field	*fields;
+	atomic64_t			*vars;
+	bool				*var_set;
 	void				*key;
 	void				*private_data;
 };
@@ -192,6 +195,7 @@ struct tracing_map {
 	int				key_idx[TRACING_MAP_KEYS_MAX];
 	unsigned int			n_keys;
 	struct tracing_map_sort_key	sort_key;
+	unsigned int			n_vars;
 	atomic64_t			hits;
 	atomic64_t			drops;
 };
@@ -215,11 +219,6 @@ struct tracing_map {
  *	Element allocation occurs before tracing begins, when the
  *	tracing_map_init() call is made by client code.
  *
- * @elt_copy: At certain points in the lifetime of an element, it may
- *	need to be copied.  The copy should include a copy of the
- *	client-allocated data, which can be copied into the 'to'
- *	element from the 'from' element.
- *
  * @elt_free: When a tracing_map_elt is freed, this function is called
  *	and allows client-allocated per-element data to be freed.
  *
@@ -233,8 +232,6 @@ struct tracing_map {
  */
 struct tracing_map_ops {
 	int			(*elt_alloc)(struct tracing_map_elt *elt);
-	void			(*elt_copy)(struct tracing_map_elt *to,
-					    struct tracing_map_elt *from);
 	void			(*elt_free)(struct tracing_map_elt *elt);
 	void			(*elt_clear)(struct tracing_map_elt *elt);
 	void			(*elt_init)(struct tracing_map_elt *elt);
@@ -248,6 +245,7 @@ tracing_map_create(unsigned int map_bits,
 extern int tracing_map_init(struct tracing_map *map);
 
 extern int tracing_map_add_sum_field(struct tracing_map *map);
+extern int tracing_map_add_var(struct tracing_map *map);
 extern int tracing_map_add_key_field(struct tracing_map *map,
 				     unsigned int offset,
 				     tracing_map_cmp_fn_t cmp_fn);
@@ -267,7 +265,13 @@ extern int tracing_map_cmp_none(void *val_a, void *val_b);
 
 extern void tracing_map_update_sum(struct tracing_map_elt *elt,
 				   unsigned int i, u64 n);
+extern void tracing_map_set_var(struct tracing_map_elt *elt,
+				unsigned int i, u64 n);
+extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i);
 extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i);
+extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i);
+
 extern void tracing_map_set_field_descr(struct tracing_map *map,
 					unsigned int i,
 					unsigned int key_offset,
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 913fe4336d2b..dcd6be1996fe 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -19,6 +19,8 @@
 #include <linux/proc_ns.h>
 #include <linux/sched/task.h>
 
+static struct kmem_cache *uts_ns_cache __ro_after_init;
+
 static struct ucounts *inc_uts_namespaces(struct user_namespace *ns)
 {
 	return inc_ucount(ns, current_euid(), UCOUNT_UTS_NAMESPACES);
@@ -33,7 +35,7 @@ static struct uts_namespace *create_uts_ns(void)
 {
 	struct uts_namespace *uts_ns;
 
-	uts_ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+	uts_ns = kmem_cache_alloc(uts_ns_cache, GFP_KERNEL);
 	if (uts_ns)
 		kref_init(&uts_ns->kref);
 	return uts_ns;
@@ -42,7 +44,7 @@ static struct uts_namespace *create_uts_ns(void)
 /*
  * Clone a new ns copying an original utsname, setting refcount to 1
  * @old_ns: namespace to clone
- * Return ERR_PTR(-ENOMEM) on error (failure to kmalloc), new ns otherwise
+ * Return ERR_PTR(-ENOMEM) on error (failure to allocate), new ns otherwise
  */
 static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 					  struct uts_namespace *old_ns)
@@ -75,7 +77,7 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	return ns;
 
 fail_free:
-	kfree(ns);
+	kmem_cache_free(uts_ns_cache, ns);
 fail_dec:
 	dec_uts_namespaces(ucounts);
 fail:
@@ -113,7 +115,7 @@ void free_uts_ns(struct kref *kref)
 	dec_uts_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
 	ns_free_inum(&ns->ns);
-	kfree(ns);
+	kmem_cache_free(uts_ns_cache, ns);
 }
 
 static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
@@ -169,3 +171,13 @@ const struct proc_ns_operations utsns_operations = {
 	.install	= utsns_install,
 	.owner		= utsns_owner,
 };
+
+void __init uts_ns_init(void)
+{
+	uts_ns_cache = kmem_cache_create_usercopy(
+			"uts_namespace", sizeof(struct uts_namespace), 0,
+			SLAB_PANIC|SLAB_ACCOUNT,
+			offsetof(struct uts_namespace, name),
+			sizeof_field(struct uts_namespace, name),
+			NULL);
+}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 51c6bf0d93c6..c40c7b734cd1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -800,6 +800,30 @@ config SOFTLOCKUP_DETECTOR
 	  chance to run.  The current stack trace is displayed upon
 	  detection and the system will stay locked up.
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+	bool "Panic (Reboot) On Soft Lockups"
+	depends on SOFTLOCKUP_DETECTOR
+	help
+	  Say Y here to enable the kernel to panic on "soft lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode for more than 20 seconds (configurable using the watchdog_thresh
+	  sysctl), without giving other tasks a chance to run.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after a
+	  lockup has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a lockup must be resolved ASAP.
+
+	  Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+	int
+	depends on SOFTLOCKUP_DETECTOR
+	range 0 1
+	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config HARDLOCKUP_DETECTOR_PERF
 	bool
 	select SOFTLOCKUP_DETECTOR
@@ -849,30 +873,6 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE
 	default 0 if !BOOTPARAM_HARDLOCKUP_PANIC
 	default 1 if BOOTPARAM_HARDLOCKUP_PANIC
 
-config BOOTPARAM_SOFTLOCKUP_PANIC
-	bool "Panic (Reboot) On Soft Lockups"
-	depends on SOFTLOCKUP_DETECTOR
-	help
-	  Say Y here to enable the kernel to panic on "soft lockups",
-	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 20 seconds (configurable using the watchdog_thresh
-	  sysctl), without giving other tasks a chance to run.
-
-	  The panic can be used in combination with panic_timeout,
-	  to cause the system to reboot automatically after a
-	  lockup has been detected. This feature is useful for
-	  high-availability systems that have uptime guarantees and
-	  where a lockup must be resolved ASAP.
-
-	  Say N if unsure.
-
-config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
-	int
-	depends on SOFTLOCKUP_DETECTOR
-	range 0 1
-	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
-	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
-
 config DETECT_HUNG_TASK
 	bool "Detect Hung Tasks"
 	depends on DEBUG_KERNEL
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index a669c193b878..19d42ea75ec2 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -46,3 +46,10 @@ config UBSAN_NULL
 	help
 	  This option enables detection of memory accesses via a
 	  null pointer.
+
+config TEST_UBSAN
+	tristate "Module for testing for undefined behavior detection"
+	depends on m && UBSAN
+	help
+	  This is a test module for UBSAN.
+	  It triggers various undefined behavior, and detect it.
diff --git a/lib/Makefile b/lib/Makefile
index 8fc0d3a9b34f..ce20696d5a92 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -53,6 +53,9 @@ obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+CFLAGS_test_kasan.o += -fno-builtin
+obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
+UBSAN_SANITIZE_test_ubsan.o := y
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/list_debug.c b/lib/list_debug.c
index a34db8d27667..5d5424b51b74 100644
--- a/lib/list_debug.c
+++ b/lib/list_debug.c
@@ -21,13 +21,13 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
 		      struct list_head *next)
 {
 	if (CHECK_DATA_CORRUPTION(next->prev != prev,
-			"list_add corruption. next->prev should be prev (%p), but was %p. (next=%p).\n",
+			"list_add corruption. next->prev should be prev (%px), but was %px. (next=%px).\n",
 			prev, next->prev, next) ||
 	    CHECK_DATA_CORRUPTION(prev->next != next,
-			"list_add corruption. prev->next should be next (%p), but was %p. (prev=%p).\n",
+			"list_add corruption. prev->next should be next (%px), but was %px. (prev=%px).\n",
 			next, prev->next, prev) ||
 	    CHECK_DATA_CORRUPTION(new == prev || new == next,
-			"list_add double add: new=%p, prev=%p, next=%p.\n",
+			"list_add double add: new=%px, prev=%px, next=%px.\n",
 			new, prev, next))
 		return false;
 
@@ -43,16 +43,16 @@ bool __list_del_entry_valid(struct list_head *entry)
 	next = entry->next;
 
 	if (CHECK_DATA_CORRUPTION(next == LIST_POISON1,
-			"list_del corruption, %p->next is LIST_POISON1 (%p)\n",
+			"list_del corruption, %px->next is LIST_POISON1 (%px)\n",
 			entry, LIST_POISON1) ||
 	    CHECK_DATA_CORRUPTION(prev == LIST_POISON2,
-			"list_del corruption, %p->prev is LIST_POISON2 (%p)\n",
+			"list_del corruption, %px->prev is LIST_POISON2 (%px)\n",
 			entry, LIST_POISON2) ||
 	    CHECK_DATA_CORRUPTION(prev->next != entry,
-			"list_del corruption. prev->next should be %p, but was %p\n",
+			"list_del corruption. prev->next should be %px, but was %px\n",
 			entry, prev->next) ||
 	    CHECK_DATA_CORRUPTION(next->prev != entry,
-			"list_del corruption. next->prev should be %p, but was %p\n",
+			"list_del corruption. next->prev should be %px, but was %px\n",
 			entry, next->prev))
 		return false;
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e00138d593f..da9e10c827df 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -146,7 +146,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 
 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
-	return root->gfp_mask & __GFP_BITS_MASK;
+	return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
 }
 
 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -2285,6 +2285,7 @@ void __init radix_tree_init(void)
 	int ret;
 
 	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
+	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 47aeb04c1997..de7cc540450f 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -719,7 +719,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		goto out_warn;
 
 	*dma_handle = __phys_to_dma(dev, phys_addr);
-	if (dma_coherent_ok(dev, *dma_handle, size))
+	if (!dma_coherent_ok(dev, *dma_handle, size))
 		goto out_unmap;
 
 	memset(phys_to_virt(phys_addr), 0, size);
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 413367cf569e..de16f7869fb1 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -296,15 +296,17 @@ static void __init test_bitmap_parselist(void)
 	}
 }
 
+#define EXP_BYTES	(sizeof(exp) * 8)
+
 static void __init test_bitmap_arr32(void)
 {
-	unsigned int nbits, next_bit, len = sizeof(exp) * 8;
+	unsigned int nbits, next_bit;
 	u32 arr[sizeof(exp) / 4];
-	DECLARE_BITMAP(bmap2, len);
+	DECLARE_BITMAP(bmap2, EXP_BYTES);
 
 	memset(arr, 0xa5, sizeof(arr));
 
-	for (nbits = 0; nbits < len; ++nbits) {
+	for (nbits = 0; nbits < EXP_BYTES; ++nbits) {
 		bitmap_to_arr32(arr, exp, nbits);
 		bitmap_from_arr32(bmap2, arr, nbits);
 		expect_eq_bitmap(bmap2, exp, nbits);
@@ -316,7 +318,7 @@ static void __init test_bitmap_arr32(void)
 				" tail is not safely cleared: %d\n",
 				nbits, next_bit);
 
-		if (nbits < len - 32)
+		if (nbits < EXP_BYTES - 32)
 			expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
 								0xa5a5a5a5);
 	}
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 98854a64b014..ec657105edbf 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -567,7 +567,15 @@ static noinline void __init kmem_cache_invalid_free(void)
 		return;
 	}
 
+	/* Trigger invalid free, the object doesn't get freed */
 	kmem_cache_free(cache, p + 1);
+
+	/*
+	 * Properly free the object to prevent the "Objects remaining in
+	 * test_cache on __kmem_cache_shutdown" BUG failure.
+	 */
+	kmem_cache_free(cache, p);
+
 	kmem_cache_destroy(cache);
 }
 
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
new file mode 100644
index 000000000000..280f4979d00e
--- /dev/null
+++ b/lib/test_ubsan.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+typedef void(*test_ubsan_fp)(void);
+
+static void test_ubsan_add_overflow(void)
+{
+	volatile int val = INT_MAX;
+
+	val += 2;
+}
+
+static void test_ubsan_sub_overflow(void)
+{
+	volatile int val = INT_MIN;
+	volatile int val2 = 2;
+
+	val -= val2;
+}
+
+static void test_ubsan_mul_overflow(void)
+{
+	volatile int val = INT_MAX / 2;
+
+	val *= 3;
+}
+
+static void test_ubsan_negate_overflow(void)
+{
+	volatile int val = INT_MIN;
+
+	val = -val;
+}
+
+static void test_ubsan_divrem_overflow(void)
+{
+	volatile int val = 16;
+	volatile int val2 = 0;
+
+	val /= val2;
+}
+
+static void test_ubsan_vla_bound_not_positive(void)
+{
+	volatile int size = -1;
+	char buf[size];
+
+	(void)buf;
+}
+
+static void test_ubsan_shift_out_of_bounds(void)
+{
+	volatile int val = -1;
+	int val2 = 10;
+
+	val2 <<= val;
+}
+
+static void test_ubsan_out_of_bounds(void)
+{
+	volatile int i = 4, j = 5;
+	volatile int arr[i];
+
+	arr[j] = i;
+}
+
+static void test_ubsan_load_invalid_value(void)
+{
+	volatile char *dst, *src;
+	bool val, val2, *ptr;
+	char c = 4;
+
+	dst = (char *)&val;
+	src = &c;
+	*dst = *src;
+
+	ptr = &val2;
+	val2 = val;
+}
+
+static void test_ubsan_null_ptr_deref(void)
+{
+	volatile int *ptr = NULL;
+	int val;
+
+	val = *ptr;
+}
+
+static void test_ubsan_misaligned_access(void)
+{
+	volatile char arr[5] __aligned(4) = {1, 2, 3, 4, 5};
+	volatile int *ptr, val = 6;
+
+	ptr = (int *)(arr + 1);
+	*ptr = val;
+}
+
+static void test_ubsan_object_size_mismatch(void)
+{
+	/* "((aligned(8)))" helps this not into be misaligned for ptr-access. */
+	volatile int val __aligned(8) = 4;
+	volatile long long *ptr, val2;
+
+	ptr = (long long *)&val;
+	val2 = *ptr;
+}
+
+static const test_ubsan_fp test_ubsan_array[] = {
+	test_ubsan_add_overflow,
+	test_ubsan_sub_overflow,
+	test_ubsan_mul_overflow,
+	test_ubsan_negate_overflow,
+	test_ubsan_divrem_overflow,
+	test_ubsan_vla_bound_not_positive,
+	test_ubsan_shift_out_of_bounds,
+	test_ubsan_out_of_bounds,
+	test_ubsan_load_invalid_value,
+	//test_ubsan_null_ptr_deref, /* exclude it because there is a crash */
+	test_ubsan_misaligned_access,
+	test_ubsan_object_size_mismatch,
+};
+
+static int __init test_ubsan_init(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_ubsan_array); i++)
+		test_ubsan_array[i]();
+
+	(void)test_ubsan_null_ptr_deref; /* to avoid unsed-function warning */
+	return 0;
+}
+module_init(test_ubsan_init);
+
+static void __exit test_ubsan_exit(void)
+{
+	/* do nothing */
+}
+module_exit(test_ubsan_exit);
+
+MODULE_AUTHOR("Jinbum Park <jinb.park7@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d7a708f82559..30c0cb8cc9bc 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -336,7 +336,7 @@ char *put_dec(char *buf, unsigned long long n)
  *
  * If speed is not important, use snprintf(). It's easy to read the code.
  */
-int num_to_str(char *buf, int size, unsigned long long num)
+int num_to_str(char *buf, int size, unsigned long long num, unsigned int width)
 {
 	/* put_dec requires 2-byte alignment of the buffer. */
 	char tmp[sizeof(num) * 3] __aligned(2);
@@ -350,11 +350,21 @@ int num_to_str(char *buf, int size, unsigned long long num)
 		len = put_dec(tmp, num) - tmp;
 	}
 
-	if (len > size)
+	if (len > size || width > size)
 		return 0;
+
+	if (width > len) {
+		width = width - len;
+		for (idx = 0; idx < width; idx++)
+			buf[idx] = ' ';
+	} else {
+		width = 0;
+	}
+
 	for (idx = 0; idx < len; ++idx)
-		buf[idx] = tmp[len - idx - 1];
-	return len;
+		buf[idx + width] = tmp[len - idx - 1];
+
+	return len + width;
 }
 
 #define SIGN	1		/* unsigned/signed, must be 1 */
@@ -2591,6 +2601,8 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 			case 's':
 			case 'F':
 			case 'f':
+			case 'x':
+			case 'K':
 				save_arg(void *);
 				break;
 			default:
@@ -2765,6 +2777,8 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			case 's':
 			case 'F':
 			case 'f':
+			case 'x':
+			case 'K':
 				process = true;
 				break;
 			default:
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 08b9aab631ab..023190c69dce 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1020,23 +1020,18 @@ EXPORT_SYMBOL(congestion_wait);
 
 /**
  * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
- * @pgdat: A pgdat to check if it is heavily congested
  * @sync: SYNC or ASYNC IO
  * @timeout: timeout in jiffies
  *
- * In the event of a congested backing_dev (any backing_dev) and the given
- * @pgdat has experienced recent congestion, this waits for up to @timeout
- * jiffies for either a BDI to exit congestion of the given @sync queue
- * or a write to complete.
- *
- * In the absence of pgdat congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the event of a congested backing_dev (any backing_dev) this waits
+ * for up to @timeout jiffies for either a BDI to exit congestion of the
+ * given @sync queue or a write to complete.
  *
  * The return value is 0 if the sleep is for the full timeout. Otherwise,
  * it is the number of jiffies that were still remaining when the function
  * returned. return_value == timeout implies the function did not sleep.
  */
-long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
+long wait_iff_congested(int sync, long timeout)
 {
 	long ret;
 	unsigned long start = jiffies;
@@ -1044,12 +1039,10 @@ long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
 	wait_queue_head_t *wqh = &congestion_wqh[sync];
 
 	/*
-	 * If there is no congestion, or heavy congestion is not being
-	 * encountered in the current pgdat, yield if necessary instead
+	 * If there is no congestion, yield if necessary instead
 	 * of sleeping on the congestion queue
 	 */
-	if (atomic_read(&nr_wb_congested[sync]) == 0 ||
-	    !test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
+	if (atomic_read(&nr_wb_congested[sync]) == 0) {
 		cond_resched();
 
 		/* In case we scheduled, work out time remaining */
diff --git a/mm/cma.c b/mm/cma.c
index 5809bbe360d7..aa40e6c7b042 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -39,6 +39,7 @@
 #include <trace/events/cma.h>
 
 #include "cma.h"
+#include "internal.h"
 
 struct cma cma_areas[MAX_CMA_AREAS];
 unsigned cma_area_count;
@@ -109,23 +110,25 @@ static int __init cma_activate_area(struct cma *cma)
 	if (!cma->bitmap)
 		return -ENOMEM;
 
-	WARN_ON_ONCE(!pfn_valid(pfn));
-	zone = page_zone(pfn_to_page(pfn));
-
 	do {
 		unsigned j;
 
 		base_pfn = pfn;
+		if (!pfn_valid(base_pfn))
+			goto err;
+
+		zone = page_zone(pfn_to_page(base_pfn));
 		for (j = pageblock_nr_pages; j; --j, pfn++) {
-			WARN_ON_ONCE(!pfn_valid(pfn));
+			if (!pfn_valid(pfn))
+				goto err;
+
 			/*
-			 * alloc_contig_range requires the pfn range
-			 * specified to be in the same zone. Make this
-			 * simple by forcing the entire CMA resv range
-			 * to be in the same zone.
+			 * In init_cma_reserved_pageblock(), present_pages
+			 * is adjusted with assumption that all pages in
+			 * the pageblock come from a single zone.
 			 */
 			if (page_zone(pfn_to_page(pfn)) != zone)
-				goto not_in_zone;
+				goto err;
 		}
 		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
 	} while (--i);
@@ -139,7 +142,7 @@ static int __init cma_activate_area(struct cma *cma)
 
 	return 0;
 
-not_in_zone:
+err:
 	pr_err("CMA area %s could not be activated\n", cma->name);
 	kfree(cma->bitmap);
 	cma->count = 0;
@@ -149,6 +152,41 @@ not_in_zone:
 static int __init cma_init_reserved_areas(void)
 {
 	int i;
+	struct zone *zone;
+	pg_data_t *pgdat;
+
+	if (!cma_area_count)
+		return 0;
+
+	for_each_online_pgdat(pgdat) {
+		unsigned long start_pfn = UINT_MAX, end_pfn = 0;
+
+		zone = &pgdat->node_zones[ZONE_MOVABLE];
+
+		/*
+		 * In this case, we cannot adjust the zone range
+		 * since it is now maximum node span and we don't
+		 * know original zone range.
+		 */
+		if (populated_zone(zone))
+			continue;
+
+		for (i = 0; i < cma_area_count; i++) {
+			if (pfn_to_nid(cma_areas[i].base_pfn) !=
+				pgdat->node_id)
+				continue;
+
+			start_pfn = min(start_pfn, cma_areas[i].base_pfn);
+			end_pfn = max(end_pfn, cma_areas[i].base_pfn +
+						cma_areas[i].count);
+		}
+
+		if (!end_pfn)
+			continue;
+
+		zone->zone_start_pfn = start_pfn;
+		zone->spanned_pages = end_pfn - start_pfn;
+	}
 
 	for (i = 0; i < cma_area_count; i++) {
 		int ret = cma_activate_area(&cma_areas[i]);
@@ -157,9 +195,32 @@ static int __init cma_init_reserved_areas(void)
 			return ret;
 	}
 
+	/*
+	 * Reserved pages for ZONE_MOVABLE are now activated and
+	 * this would change ZONE_MOVABLE's managed page counter and
+	 * the other zones' present counter. We need to re-calculate
+	 * various zone information that depends on this initialization.
+	 */
+	build_all_zonelists(NULL);
+	for_each_populated_zone(zone) {
+		if (zone_idx(zone) == ZONE_MOVABLE) {
+			zone_pcp_reset(zone);
+			setup_zone_pageset(zone);
+		} else
+			zone_pcp_update(zone);
+
+		set_zone_contiguous(zone);
+	}
+
+	/*
+	 * We need to re-init per zone wmark by calling
+	 * init_per_zone_wmark_min() but doesn't call here because it is
+	 * registered on core_initcall and it will be called later than us.
+	 */
+
 	return 0;
 }
-core_initcall(cma_init_reserved_areas);
+pure_initcall(cma_init_reserved_areas);
 
 /**
  * cma_init_reserved_mem() - create custom contiguous area from reserved memory
diff --git a/mm/compaction.c b/mm/compaction.c
index 88d01a50a015..028b7210a669 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1166,8 +1166,7 @@ static void isolate_freepages(struct compact_control *cc)
  * from the isolated freelists in the block we are migrating to.
  */
 static struct page *compaction_alloc(struct page *migratepage,
-					unsigned long data,
-					int **result)
+					unsigned long data)
 {
 	struct compact_control *cc = (struct compact_control *)data;
 	struct page *freepage;
@@ -1451,14 +1450,12 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 	 * if compaction succeeds.
 	 * For costly orders, we require low watermark instead of min for
 	 * compaction to proceed to increase its chances.
-	 * ALLOC_CMA is used, as pages in CMA pageblocks are considered
-	 * suitable migration targets
 	 */
 	watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
 				low_wmark_pages(zone) : min_wmark_pages(zone);
 	watermark += compact_gap(order);
 	if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
-						ALLOC_CMA, wmark_target))
+						0, wmark_target))
 		return COMPACT_SKIPPED;
 
 	return COMPACT_CONTINUE;
diff --git a/mm/filemap.c b/mm/filemap.c
index 693f62212a59..ab77e19ab09c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -66,7 +66,7 @@
  *  ->i_mmap_rwsem		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
- *        ->mapping->tree_lock
+ *        ->i_pages lock
  *
  *  ->i_mutex
  *    ->i_mmap_rwsem		(truncate->unmap_mapping_range)
@@ -74,7 +74,7 @@
  *  ->mmap_sem
  *    ->i_mmap_rwsem
  *      ->page_table_lock or pte_lock	(various, mainly in memory.c)
- *        ->mapping->tree_lock	(arch-dependent flush_dcache_mmap_lock)
+ *        ->i_pages lock	(arch-dependent flush_dcache_mmap_lock)
  *
  *  ->mmap_sem
  *    ->lock_page		(access_process_vm)
@@ -84,7 +84,7 @@
  *
  *  bdi->wb.list_lock
  *    sb_lock			(fs/fs-writeback.c)
- *    ->mapping->tree_lock	(__sync_single_inode)
+ *    ->i_pages lock		(__sync_single_inode)
  *
  *  ->i_mmap_rwsem
  *    ->anon_vma.lock		(vma_adjust)
@@ -95,11 +95,11 @@
  *  ->page_table_lock or pte_lock
  *    ->swap_lock		(try_to_unmap_one)
  *    ->private_lock		(try_to_unmap_one)
- *    ->tree_lock		(try_to_unmap_one)
+ *    ->i_pages lock		(try_to_unmap_one)
  *    ->zone_lru_lock(zone)	(follow_page->mark_page_accessed)
  *    ->zone_lru_lock(zone)	(check_pte_range->isolate_lru_page)
  *    ->private_lock		(page_remove_rmap->set_page_dirty)
- *    ->tree_lock		(page_remove_rmap->set_page_dirty)
+ *    ->i_pages lock		(page_remove_rmap->set_page_dirty)
  *    bdi.wb->list_lock		(page_remove_rmap->set_page_dirty)
  *    ->inode->i_lock		(page_remove_rmap->set_page_dirty)
  *    ->memcg->move_lock	(page_remove_rmap->lock_page_memcg)
@@ -118,14 +118,15 @@ static int page_cache_tree_insert(struct address_space *mapping,
 	void **slot;
 	int error;
 
-	error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+	error = __radix_tree_create(&mapping->i_pages, page->index, 0,
 				    &node, &slot);
 	if (error)
 		return error;
 	if (*slot) {
 		void *p;
 
-		p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		p = radix_tree_deref_slot_protected(slot,
+						    &mapping->i_pages.xa_lock);
 		if (!radix_tree_exceptional_entry(p))
 			return -EEXIST;
 
@@ -133,7 +134,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 		if (shadowp)
 			*shadowp = p;
 	}
-	__radix_tree_replace(&mapping->page_tree, node, slot, page,
+	__radix_tree_replace(&mapping->i_pages, node, slot, page,
 			     workingset_lookup_update(mapping));
 	mapping->nrpages++;
 	return 0;
@@ -155,13 +156,13 @@ static void page_cache_tree_delete(struct address_space *mapping,
 		struct radix_tree_node *node;
 		void **slot;
 
-		__radix_tree_lookup(&mapping->page_tree, page->index + i,
+		__radix_tree_lookup(&mapping->i_pages, page->index + i,
 				    &node, &slot);
 
 		VM_BUG_ON_PAGE(!node && nr != 1, page);
 
-		radix_tree_clear_tags(&mapping->page_tree, node, slot);
-		__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
+		radix_tree_clear_tags(&mapping->i_pages, node, slot);
+		__radix_tree_replace(&mapping->i_pages, node, slot, shadow,
 				workingset_lookup_update(mapping));
 	}
 
@@ -253,7 +254,7 @@ static void unaccount_page_cache_page(struct address_space *mapping,
 /*
  * Delete a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
- * is safe.  The caller must hold the mapping's tree_lock.
+ * is safe.  The caller must hold the i_pages lock.
  */
 void __delete_from_page_cache(struct page *page, void *shadow)
 {
@@ -296,9 +297,9 @@ void delete_from_page_cache(struct page *page)
 	unsigned long flags;
 
 	BUG_ON(!PageLocked(page));
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	__delete_from_page_cache(page, NULL);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	page_cache_free_page(mapping, page);
 }
@@ -309,14 +310,14 @@ EXPORT_SYMBOL(delete_from_page_cache);
  * @mapping: the mapping to which pages belong
  * @pvec: pagevec with pages to delete
  *
- * The function walks over mapping->page_tree and removes pages passed in @pvec
- * from the radix tree. The function expects @pvec to be sorted by page index.
- * It tolerates holes in @pvec (radix tree entries at those indices are not
+ * The function walks over mapping->i_pages and removes pages passed in @pvec
+ * from the mapping. The function expects @pvec to be sorted by page index.
+ * It tolerates holes in @pvec (mapping entries at those indices are not
  * modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the radix
- * tree as well.
+ * @pvec and takes care to delete all corresponding tail pages from the
+ * mapping as well.
  *
- * The function expects mapping->tree_lock to be held.
+ * The function expects the i_pages lock to be held.
  */
 static void
 page_cache_tree_delete_batch(struct address_space *mapping,
@@ -330,11 +331,11 @@ page_cache_tree_delete_batch(struct address_space *mapping,
 	pgoff_t start;
 
 	start = pvec->pages[0]->index;
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		if (i >= pagevec_count(pvec) && !tail_pages)
 			break;
 		page = radix_tree_deref_slot_protected(slot,
-						       &mapping->tree_lock);
+						       &mapping->i_pages.xa_lock);
 		if (radix_tree_exceptional_entry(page))
 			continue;
 		if (!tail_pages) {
@@ -357,8 +358,8 @@ page_cache_tree_delete_batch(struct address_space *mapping,
 		} else {
 			tail_pages--;
 		}
-		radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
-		__radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
+		radix_tree_clear_tags(&mapping->i_pages, iter.node, slot);
+		__radix_tree_replace(&mapping->i_pages, iter.node, slot, NULL,
 				workingset_lookup_update(mapping));
 		total_pages++;
 	}
@@ -374,14 +375,14 @@ void delete_from_page_cache_batch(struct address_space *mapping,
 	if (!pagevec_count(pvec))
 		return;
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
 
 		unaccount_page_cache_page(mapping, pvec->pages[i]);
 	}
 	page_cache_tree_delete_batch(mapping, pvec);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	for (i = 0; i < pagevec_count(pvec); i++)
 		page_cache_free_page(mapping, pvec->pages[i]);
@@ -798,7 +799,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		new->mapping = mapping;
 		new->index = offset;
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		__delete_from_page_cache(old, NULL);
 		error = page_cache_tree_insert(mapping, new, NULL);
 		BUG_ON(error);
@@ -810,7 +811,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 			__inc_node_page_state(new, NR_FILE_PAGES);
 		if (PageSwapBacked(new))
 			__inc_node_page_state(new, NR_SHMEM);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		mem_cgroup_migrate(old, new);
 		radix_tree_preload_end();
 		if (freepage)
@@ -852,7 +853,7 @@ static int __add_to_page_cache_locked(struct page *page,
 	page->mapping = mapping;
 	page->index = offset;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	error = page_cache_tree_insert(mapping, page, shadowp);
 	radix_tree_preload_end();
 	if (unlikely(error))
@@ -861,7 +862,7 @@ static int __add_to_page_cache_locked(struct page *page,
 	/* hugetlb pages do not participate in page cache accounting. */
 	if (!huge)
 		__inc_node_page_state(page, NR_FILE_PAGES);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_commit_charge(page, memcg, false, false);
 	trace_mm_filemap_add_to_page_cache(page);
@@ -869,7 +870,7 @@ static int __add_to_page_cache_locked(struct page *page,
 err_insert:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
 	put_page(page);
@@ -1353,7 +1354,7 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
 	for (i = 0; i < max_scan; i++) {
 		struct page *page;
 
-		page = radix_tree_lookup(&mapping->page_tree, index);
+		page = radix_tree_lookup(&mapping->i_pages, index);
 		if (!page || radix_tree_exceptional_entry(page))
 			break;
 		index++;
@@ -1394,7 +1395,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
 	for (i = 0; i < max_scan; i++) {
 		struct page *page;
 
-		page = radix_tree_lookup(&mapping->page_tree, index);
+		page = radix_tree_lookup(&mapping->i_pages, index);
 		if (!page || radix_tree_exceptional_entry(page))
 			break;
 		index--;
@@ -1427,7 +1428,7 @@ struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 	rcu_read_lock();
 repeat:
 	page = NULL;
-	pagep = radix_tree_lookup_slot(&mapping->page_tree, offset);
+	pagep = radix_tree_lookup_slot(&mapping->i_pages, offset);
 	if (pagep) {
 		page = radix_tree_deref_slot(pagep);
 		if (unlikely(!page))
@@ -1633,7 +1634,7 @@ unsigned find_get_entries(struct address_space *mapping,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -1710,7 +1711,7 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, *start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, *start) {
 		struct page *head, *page;
 
 		if (iter.index > end)
@@ -1795,7 +1796,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_contig(slot, &mapping->page_tree, &iter, index) {
+	radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
 		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -1875,8 +1876,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, *index, tag) {
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, *index, tag) {
 		struct page *head, *page;
 
 		if (iter.index > end)
@@ -1969,8 +1969,7 @@ unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->page_tree,
-				   &iter, start, tag) {
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, tag) {
 		struct page *head, *page;
 repeat:
 		page = radix_tree_deref_slot(slot);
@@ -2624,8 +2623,7 @@ void filemap_map_pages(struct vm_fault *vmf,
 	struct page *head, *page;
 
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
-			start_pgoff) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) {
 		if (iter.index > end_pgoff)
 			break;
 repeat:
diff --git a/mm/hmm.c b/mm/hmm.c
index 320545b98ff5..486dc394a5a3 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -160,6 +160,32 @@ static void hmm_invalidate_range(struct hmm *hmm,
 	up_read(&hmm->mirrors_sem);
 }
 
+static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
+{
+	struct hmm_mirror *mirror;
+	struct hmm *hmm = mm->hmm;
+
+	down_write(&hmm->mirrors_sem);
+	mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
+					  list);
+	while (mirror) {
+		list_del_init(&mirror->list);
+		if (mirror->ops->release) {
+			/*
+			 * Drop mirrors_sem so callback can wait on any pending
+			 * work that might itself trigger mmu_notifier callback
+			 * and thus would deadlock with us.
+			 */
+			up_write(&hmm->mirrors_sem);
+			mirror->ops->release(mirror);
+			down_write(&hmm->mirrors_sem);
+		}
+		mirror = list_first_entry_or_null(&hmm->mirrors,
+						  struct hmm_mirror, list);
+	}
+	up_write(&hmm->mirrors_sem);
+}
+
 static void hmm_invalidate_range_start(struct mmu_notifier *mn,
 				       struct mm_struct *mm,
 				       unsigned long start,
@@ -185,6 +211,7 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
+	.release		= hmm_release,
 	.invalidate_range_start	= hmm_invalidate_range_start,
 	.invalidate_range_end	= hmm_invalidate_range_end,
 };
@@ -206,13 +233,24 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
 	if (!mm || !mirror || !mirror->ops)
 		return -EINVAL;
 
+again:
 	mirror->hmm = hmm_register(mm);
 	if (!mirror->hmm)
 		return -ENOMEM;
 
 	down_write(&mirror->hmm->mirrors_sem);
-	list_add(&mirror->list, &mirror->hmm->mirrors);
-	up_write(&mirror->hmm->mirrors_sem);
+	if (mirror->hmm->mm == NULL) {
+		/*
+		 * A racing hmm_mirror_unregister() is about to destroy the hmm
+		 * struct. Try again to allocate a new one.
+		 */
+		up_write(&mirror->hmm->mirrors_sem);
+		mirror->hmm = NULL;
+		goto again;
+	} else {
+		list_add(&mirror->list, &mirror->hmm->mirrors);
+		up_write(&mirror->hmm->mirrors_sem);
+	}
 
 	return 0;
 }
@@ -227,11 +265,32 @@ EXPORT_SYMBOL(hmm_mirror_register);
  */
 void hmm_mirror_unregister(struct hmm_mirror *mirror)
 {
-	struct hmm *hmm = mirror->hmm;
+	bool should_unregister = false;
+	struct mm_struct *mm;
+	struct hmm *hmm;
 
+	if (mirror->hmm == NULL)
+		return;
+
+	hmm = mirror->hmm;
 	down_write(&hmm->mirrors_sem);
-	list_del(&mirror->list);
+	list_del_init(&mirror->list);
+	should_unregister = list_empty(&hmm->mirrors);
+	mirror->hmm = NULL;
+	mm = hmm->mm;
+	hmm->mm = NULL;
 	up_write(&hmm->mirrors_sem);
+
+	if (!should_unregister || mm == NULL)
+		return;
+
+	spin_lock(&mm->page_table_lock);
+	if (mm->hmm == hmm)
+		mm->hmm = NULL;
+	spin_unlock(&mm->page_table_lock);
+
+	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
+	kfree(hmm);
 }
 EXPORT_SYMBOL(hmm_mirror_unregister);
 
@@ -240,110 +299,275 @@ struct hmm_vma_walk {
 	unsigned long		last;
 	bool			fault;
 	bool			block;
-	bool			write;
 };
 
-static int hmm_vma_do_fault(struct mm_walk *walk,
-			    unsigned long addr,
-			    hmm_pfn_t *pfn)
+static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
+			    bool write_fault, uint64_t *pfn)
 {
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE;
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
 	struct vm_area_struct *vma = walk->vma;
 	int r;
 
 	flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
-	flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0;
+	flags |= write_fault ? FAULT_FLAG_WRITE : 0;
 	r = handle_mm_fault(vma, addr, flags);
 	if (r & VM_FAULT_RETRY)
 		return -EBUSY;
 	if (r & VM_FAULT_ERROR) {
-		*pfn = HMM_PFN_ERROR;
+		*pfn = range->values[HMM_PFN_ERROR];
 		return -EFAULT;
 	}
 
 	return -EAGAIN;
 }
 
-static void hmm_pfns_special(hmm_pfn_t *pfns,
-			     unsigned long addr,
-			     unsigned long end)
-{
-	for (; addr < end; addr += PAGE_SIZE, pfns++)
-		*pfns = HMM_PFN_SPECIAL;
-}
-
 static int hmm_pfns_bad(unsigned long addr,
 			unsigned long end,
 			struct mm_walk *walk)
 {
-	struct hmm_range *range = walk->private;
-	hmm_pfn_t *pfns = range->pfns;
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	uint64_t *pfns = range->pfns;
 	unsigned long i;
 
 	i = (addr - range->start) >> PAGE_SHIFT;
 	for (; addr < end; addr += PAGE_SIZE, i++)
-		pfns[i] = HMM_PFN_ERROR;
+		pfns[i] = range->values[HMM_PFN_ERROR];
 
 	return 0;
 }
 
-static void hmm_pfns_clear(hmm_pfn_t *pfns,
-			   unsigned long addr,
-			   unsigned long end)
-{
-	for (; addr < end; addr += PAGE_SIZE, pfns++)
-		*pfns = 0;
-}
-
-static int hmm_vma_walk_hole(unsigned long addr,
-			     unsigned long end,
-			     struct mm_walk *walk)
+/*
+ * hmm_vma_walk_hole() - handle a range lacking valid pmd or pte(s)
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @fault: should we fault or not ?
+ * @write_fault: write fault ?
+ * @walk: mm_walk structure
+ * Returns: 0 on success, -EAGAIN after page fault, or page fault error
+ *
+ * This function will be called whenever pmd_none() or pte_none() returns true,
+ * or whenever there is no page directory covering the virtual address range.
+ */
+static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
+			      bool fault, bool write_fault,
+			      struct mm_walk *walk)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	hmm_pfn_t *pfns = range->pfns;
+	uint64_t *pfns = range->pfns;
 	unsigned long i;
 
 	hmm_vma_walk->last = addr;
 	i = (addr - range->start) >> PAGE_SHIFT;
 	for (; addr < end; addr += PAGE_SIZE, i++) {
-		pfns[i] = HMM_PFN_EMPTY;
-		if (hmm_vma_walk->fault) {
+		pfns[i] = range->values[HMM_PFN_NONE];
+		if (fault || write_fault) {
 			int ret;
 
-			ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
+			ret = hmm_vma_do_fault(walk, addr, write_fault,
+					       &pfns[i]);
 			if (ret != -EAGAIN)
 				return ret;
 		}
 	}
 
-	return hmm_vma_walk->fault ? -EAGAIN : 0;
+	return (fault || write_fault) ? -EAGAIN : 0;
 }
 
-static int hmm_vma_walk_clear(unsigned long addr,
-			      unsigned long end,
-			      struct mm_walk *walk)
+static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+				      uint64_t pfns, uint64_t cpu_flags,
+				      bool *fault, bool *write_fault)
 {
-	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	hmm_pfn_t *pfns = range->pfns;
+
+	*fault = *write_fault = false;
+	if (!hmm_vma_walk->fault)
+		return;
+
+	/* We aren't ask to do anything ... */
+	if (!(pfns & range->flags[HMM_PFN_VALID]))
+		return;
+	/* If this is device memory than only fault if explicitly requested */
+	if ((cpu_flags & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
+		/* Do we fault on device memory ? */
+		if (pfns & range->flags[HMM_PFN_DEVICE_PRIVATE]) {
+			*write_fault = pfns & range->flags[HMM_PFN_WRITE];
+			*fault = true;
+		}
+		return;
+	}
+
+	/* If CPU page table is not valid then we need to fault */
+	*fault = !(cpu_flags & range->flags[HMM_PFN_VALID]);
+	/* Need to write fault ? */
+	if ((pfns & range->flags[HMM_PFN_WRITE]) &&
+	    !(cpu_flags & range->flags[HMM_PFN_WRITE])) {
+		*write_fault = true;
+		*fault = true;
+	}
+}
+
+static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+				 const uint64_t *pfns, unsigned long npages,
+				 uint64_t cpu_flags, bool *fault,
+				 bool *write_fault)
+{
 	unsigned long i;
 
-	hmm_vma_walk->last = addr;
+	if (!hmm_vma_walk->fault) {
+		*fault = *write_fault = false;
+		return;
+	}
+
+	for (i = 0; i < npages; ++i) {
+		hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags,
+				   fault, write_fault);
+		if ((*fault) || (*write_fault))
+			return;
+	}
+}
+
+static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
+			     struct mm_walk *walk)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	bool fault, write_fault;
+	unsigned long i, npages;
+	uint64_t *pfns;
+
 	i = (addr - range->start) >> PAGE_SHIFT;
-	for (; addr < end; addr += PAGE_SIZE, i++) {
-		pfns[i] = 0;
-		if (hmm_vma_walk->fault) {
-			int ret;
+	npages = (end - addr) >> PAGE_SHIFT;
+	pfns = &range->pfns[i];
+	hmm_range_need_fault(hmm_vma_walk, pfns, npages,
+			     0, &fault, &write_fault);
+	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+}
 
-			ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
-			if (ret != -EAGAIN)
-				return ret;
+static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd)
+{
+	if (pmd_protnone(pmd))
+		return 0;
+	return pmd_write(pmd) ? range->flags[HMM_PFN_VALID] |
+				range->flags[HMM_PFN_WRITE] :
+				range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_handle_pmd(struct mm_walk *walk,
+			      unsigned long addr,
+			      unsigned long end,
+			      uint64_t *pfns,
+			      pmd_t pmd)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	unsigned long pfn, npages, i;
+	bool fault, write_fault;
+	uint64_t cpu_flags;
+
+	npages = (end - addr) >> PAGE_SHIFT;
+	cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
+	hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags,
+			     &fault, &write_fault);
+
+	if (pmd_protnone(pmd) || fault || write_fault)
+		return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+
+	pfn = pmd_pfn(pmd) + pte_index(addr);
+	for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
+		pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+	hmm_vma_walk->last = end;
+	return 0;
+}
+
+static inline uint64_t pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte)
+{
+	if (pte_none(pte) || !pte_present(pte))
+		return 0;
+	return pte_write(pte) ? range->flags[HMM_PFN_VALID] |
+				range->flags[HMM_PFN_WRITE] :
+				range->flags[HMM_PFN_VALID];
+}
+
+static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
+			      unsigned long end, pmd_t *pmdp, pte_t *ptep,
+			      uint64_t *pfn)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	struct vm_area_struct *vma = walk->vma;
+	bool fault, write_fault;
+	uint64_t cpu_flags;
+	pte_t pte = *ptep;
+	uint64_t orig_pfn = *pfn;
+
+	*pfn = range->values[HMM_PFN_NONE];
+	cpu_flags = pte_to_hmm_pfn_flags(range, pte);
+	hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+			   &fault, &write_fault);
+
+	if (pte_none(pte)) {
+		if (fault || write_fault)
+			goto fault;
+		return 0;
+	}
+
+	if (!pte_present(pte)) {
+		swp_entry_t entry = pte_to_swp_entry(pte);
+
+		if (!non_swap_entry(entry)) {
+			if (fault || write_fault)
+				goto fault;
+			return 0;
 		}
+
+		/*
+		 * This is a special swap entry, ignore migration, use
+		 * device and report anything else as error.
+		 */
+		if (is_device_private_entry(entry)) {
+			cpu_flags = range->flags[HMM_PFN_VALID] |
+				range->flags[HMM_PFN_DEVICE_PRIVATE];
+			cpu_flags |= is_write_device_private_entry(entry) ?
+				range->flags[HMM_PFN_WRITE] : 0;
+			hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+					   &fault, &write_fault);
+			if (fault || write_fault)
+				goto fault;
+			*pfn = hmm_pfn_from_pfn(range, swp_offset(entry));
+			*pfn |= cpu_flags;
+			return 0;
+		}
+
+		if (is_migration_entry(entry)) {
+			if (fault || write_fault) {
+				pte_unmap(ptep);
+				hmm_vma_walk->last = addr;
+				migration_entry_wait(vma->vm_mm,
+						     pmdp, addr);
+				return -EAGAIN;
+			}
+			return 0;
+		}
+
+		/* Report error for everything else */
+		*pfn = range->values[HMM_PFN_ERROR];
+		return -EFAULT;
 	}
 
-	return hmm_vma_walk->fault ? -EAGAIN : 0;
+	if (fault || write_fault)
+		goto fault;
+
+	*pfn = hmm_pfn_from_pfn(range, pte_pfn(pte)) | cpu_flags;
+	return 0;
+
+fault:
+	pte_unmap(ptep);
+	/* Fault any virtual address we were asked to fault */
+	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 }
 
 static int hmm_vma_walk_pmd(pmd_t *pmdp,
@@ -353,26 +577,20 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
-	struct vm_area_struct *vma = walk->vma;
-	hmm_pfn_t *pfns = range->pfns;
+	uint64_t *pfns = range->pfns;
 	unsigned long addr = start, i;
-	bool write_fault;
-	hmm_pfn_t flag;
 	pte_t *ptep;
 
 	i = (addr - range->start) >> PAGE_SHIFT;
-	flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0;
-	write_fault = hmm_vma_walk->fault & hmm_vma_walk->write;
 
 again:
 	if (pmd_none(*pmdp))
 		return hmm_vma_walk_hole(start, end, walk);
 
-	if (pmd_huge(*pmdp) && vma->vm_flags & VM_HUGETLB)
+	if (pmd_huge(*pmdp) && (range->vma->vm_flags & VM_HUGETLB))
 		return hmm_pfns_bad(start, end, walk);
 
 	if (pmd_devmap(*pmdp) || pmd_trans_huge(*pmdp)) {
-		unsigned long pfn;
 		pmd_t pmd;
 
 		/*
@@ -388,17 +606,8 @@ again:
 		barrier();
 		if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
 			goto again;
-		if (pmd_protnone(pmd))
-			return hmm_vma_walk_clear(start, end, walk);
 
-		if (write_fault && !pmd_write(pmd))
-			return hmm_vma_walk_clear(start, end, walk);
-
-		pfn = pmd_pfn(pmd) + pte_index(addr);
-		flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
-		for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
-			pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
-		return 0;
+		return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd);
 	}
 
 	if (pmd_bad(*pmdp))
@@ -406,79 +615,43 @@ again:
 
 	ptep = pte_offset_map(pmdp, addr);
 	for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
-		pte_t pte = *ptep;
-
-		pfns[i] = 0;
+		int r;
 
-		if (pte_none(pte)) {
-			pfns[i] = HMM_PFN_EMPTY;
-			if (hmm_vma_walk->fault)
-				goto fault;
-			continue;
+		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
+		if (r) {
+			/* hmm_vma_handle_pte() did unmap pte directory */
+			hmm_vma_walk->last = addr;
+			return r;
 		}
-
-		if (!pte_present(pte)) {
-			swp_entry_t entry = pte_to_swp_entry(pte);
-
-			if (!non_swap_entry(entry)) {
-				if (hmm_vma_walk->fault)
-					goto fault;
-				continue;
-			}
-
-			/*
-			 * This is a special swap entry, ignore migration, use
-			 * device and report anything else as error.
-			 */
-			if (is_device_private_entry(entry)) {
-				pfns[i] = hmm_pfn_t_from_pfn(swp_offset(entry));
-				if (is_write_device_private_entry(entry)) {
-					pfns[i] |= HMM_PFN_WRITE;
-				} else if (write_fault)
-					goto fault;
-				pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE;
-				pfns[i] |= flag;
-			} else if (is_migration_entry(entry)) {
-				if (hmm_vma_walk->fault) {
-					pte_unmap(ptep);
-					hmm_vma_walk->last = addr;
-					migration_entry_wait(vma->vm_mm,
-							     pmdp, addr);
-					return -EAGAIN;
-				}
-				continue;
-			} else {
-				/* Report error for everything else */
-				pfns[i] = HMM_PFN_ERROR;
-			}
-			continue;
-		}
-
-		if (write_fault && !pte_write(pte))
-			goto fault;
-
-		pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
-		pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
-		continue;
-
-fault:
-		pte_unmap(ptep);
-		/* Fault all pages in range */
-		return hmm_vma_walk_clear(start, end, walk);
 	}
 	pte_unmap(ptep - 1);
 
+	hmm_vma_walk->last = addr;
 	return 0;
 }
 
+static void hmm_pfns_clear(struct hmm_range *range,
+			   uint64_t *pfns,
+			   unsigned long addr,
+			   unsigned long end)
+{
+	for (; addr < end; addr += PAGE_SIZE, pfns++)
+		*pfns = range->values[HMM_PFN_NONE];
+}
+
+static void hmm_pfns_special(struct hmm_range *range)
+{
+	unsigned long addr = range->start, i = 0;
+
+	for (; addr < range->end; addr += PAGE_SIZE, i++)
+		range->pfns[i] = range->values[HMM_PFN_SPECIAL];
+}
+
 /*
  * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses
- * @vma: virtual memory area containing the virtual address range
- * @range: used to track snapshot validity
- * @start: range virtual start address (inclusive)
- * @end: range virtual end address (exclusive)
- * @entries: array of hmm_pfn_t: provided by the caller, filled in by function
- * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, 0 success
+ * @range: range being snapshotted
+ * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
+ *          vma permission, 0 success
  *
  * This snapshots the CPU page table for a range of virtual addresses. Snapshot
  * validity is tracked by range struct. See hmm_vma_range_done() for further
@@ -491,26 +664,17 @@ fault:
  * NOT CALLING hmm_vma_range_done() IF FUNCTION RETURNS 0 WILL LEAD TO SERIOUS
  * MEMORY CORRUPTION ! YOU HAVE BEEN WARNED !
  */
-int hmm_vma_get_pfns(struct vm_area_struct *vma,
-		     struct hmm_range *range,
-		     unsigned long start,
-		     unsigned long end,
-		     hmm_pfn_t *pfns)
+int hmm_vma_get_pfns(struct hmm_range *range)
 {
+	struct vm_area_struct *vma = range->vma;
 	struct hmm_vma_walk hmm_vma_walk;
 	struct mm_walk mm_walk;
 	struct hmm *hmm;
 
-	/* FIXME support hugetlb fs */
-	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
-		hmm_pfns_special(pfns, start, end);
-		return -EINVAL;
-	}
-
 	/* Sanity check, this really should not happen ! */
-	if (start < vma->vm_start || start >= vma->vm_end)
+	if (range->start < vma->vm_start || range->start >= vma->vm_end)
 		return -EINVAL;
-	if (end < vma->vm_start || end > vma->vm_end)
+	if (range->end < vma->vm_start || range->end > vma->vm_end)
 		return -EINVAL;
 
 	hmm = hmm_register(vma->vm_mm);
@@ -520,10 +684,24 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma,
 	if (!hmm->mmu_notifier.ops)
 		return -EINVAL;
 
+	/* FIXME support hugetlb fs */
+	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+		hmm_pfns_special(range);
+		return -EINVAL;
+	}
+
+	if (!(vma->vm_flags & VM_READ)) {
+		/*
+		 * If vma do not allow read access, then assume that it does
+		 * not allow write access, either. Architecture that allow
+		 * write without read access are not supported by HMM, because
+		 * operations such has atomic access would not work.
+		 */
+		hmm_pfns_clear(range, range->pfns, range->start, range->end);
+		return -EPERM;
+	}
+
 	/* Initialize range to track CPU page table update */
-	range->start = start;
-	range->pfns = pfns;
-	range->end = end;
 	spin_lock(&hmm->lock);
 	range->valid = true;
 	list_add_rcu(&range->list, &hmm->ranges);
@@ -541,14 +719,13 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma,
 	mm_walk.pmd_entry = hmm_vma_walk_pmd;
 	mm_walk.pte_hole = hmm_vma_walk_hole;
 
-	walk_page_range(start, end, &mm_walk);
+	walk_page_range(range->start, range->end, &mm_walk);
 	return 0;
 }
 EXPORT_SYMBOL(hmm_vma_get_pfns);
 
 /*
  * hmm_vma_range_done() - stop tracking change to CPU page table over a range
- * @vma: virtual memory area containing the virtual address range
  * @range: range being tracked
  * Returns: false if range data has been invalidated, true otherwise
  *
@@ -568,10 +745,10 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
  *
  * There are two ways to use this :
  * again:
- *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
  *   trans = device_build_page_table_update_transaction(pfns);
  *   device_page_table_lock();
- *   if (!hmm_vma_range_done(vma, range)) {
+ *   if (!hmm_vma_range_done(range)) {
  *     device_page_table_unlock();
  *     goto again;
  *   }
@@ -579,13 +756,13 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
  *   device_page_table_unlock();
  *
  * Or:
- *   hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ *   hmm_vma_get_pfns(range); or hmm_vma_fault(...);
  *   device_page_table_lock();
- *   hmm_vma_range_done(vma, range);
- *   device_update_page_table(pfns);
+ *   hmm_vma_range_done(range);
+ *   device_update_page_table(range->pfns);
  *   device_page_table_unlock();
  */
-bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
+bool hmm_vma_range_done(struct hmm_range *range)
 {
 	unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
 	struct hmm *hmm;
@@ -595,7 +772,7 @@ bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
 		return false;
 	}
 
-	hmm = hmm_register(vma->vm_mm);
+	hmm = hmm_register(range->vma->vm_mm);
 	if (!hmm) {
 		memset(range->pfns, 0, sizeof(*range->pfns) * npages);
 		return false;
@@ -611,36 +788,34 @@ EXPORT_SYMBOL(hmm_vma_range_done);
 
 /*
  * hmm_vma_fault() - try to fault some address in a virtual address range
- * @vma: virtual memory area containing the virtual address range
- * @range: use to track pfns array content validity
- * @start: fault range virtual start address (inclusive)
- * @end: fault range virtual end address (exclusive)
- * @pfns: array of hmm_pfn_t, only entry with fault flag set will be faulted
- * @write: is it a write fault
+ * @range: range being faulted
  * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
  * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop)
  *
  * This is similar to a regular CPU page fault except that it will not trigger
  * any memory migration if the memory being faulted is not accessible by CPUs.
  *
- * On error, for one virtual address in the range, the function will set the
- * hmm_pfn_t error flag for the corresponding pfn entry.
+ * On error, for one virtual address in the range, the function will mark the
+ * corresponding HMM pfn entry with an error flag.
  *
  * Expected use pattern:
  * retry:
  *   down_read(&mm->mmap_sem);
  *   // Find vma and address device wants to fault, initialize hmm_pfn_t
  *   // array accordingly
- *   ret = hmm_vma_fault(vma, start, end, pfns, allow_retry);
+ *   ret = hmm_vma_fault(range, write, block);
  *   switch (ret) {
  *   case -EAGAIN:
- *     hmm_vma_range_done(vma, range);
+ *     hmm_vma_range_done(range);
  *     // You might want to rate limit or yield to play nicely, you may
  *     // also commit any valid pfn in the array assuming that you are
  *     // getting true from hmm_vma_range_monitor_end()
  *     goto retry;
  *   case 0:
  *     break;
+ *   case -ENOMEM:
+ *   case -EINVAL:
+ *   case -EPERM:
  *   default:
  *     // Handle error !
  *     up_read(&mm->mmap_sem)
@@ -648,7 +823,7 @@ EXPORT_SYMBOL(hmm_vma_range_done);
  *   }
  *   // Take device driver lock that serialize device page table update
  *   driver_lock_device_page_table_update();
- *   hmm_vma_range_done(vma, range);
+ *   hmm_vma_range_done(range);
  *   // Commit pfns we got from hmm_vma_fault()
  *   driver_unlock_device_page_table_update();
  *   up_read(&mm->mmap_sem)
@@ -658,51 +833,54 @@ EXPORT_SYMBOL(hmm_vma_range_done);
  *
  * YOU HAVE BEEN WARNED !
  */
-int hmm_vma_fault(struct vm_area_struct *vma,
-		  struct hmm_range *range,
-		  unsigned long start,
-		  unsigned long end,
-		  hmm_pfn_t *pfns,
-		  bool write,
-		  bool block)
+int hmm_vma_fault(struct hmm_range *range, bool block)
 {
+	struct vm_area_struct *vma = range->vma;
+	unsigned long start = range->start;
 	struct hmm_vma_walk hmm_vma_walk;
 	struct mm_walk mm_walk;
 	struct hmm *hmm;
 	int ret;
 
 	/* Sanity check, this really should not happen ! */
-	if (start < vma->vm_start || start >= vma->vm_end)
+	if (range->start < vma->vm_start || range->start >= vma->vm_end)
 		return -EINVAL;
-	if (end < vma->vm_start || end > vma->vm_end)
+	if (range->end < vma->vm_start || range->end > vma->vm_end)
 		return -EINVAL;
 
 	hmm = hmm_register(vma->vm_mm);
 	if (!hmm) {
-		hmm_pfns_clear(pfns, start, end);
+		hmm_pfns_clear(range, range->pfns, range->start, range->end);
 		return -ENOMEM;
 	}
 	/* Caller must have registered a mirror using hmm_mirror_register() */
 	if (!hmm->mmu_notifier.ops)
 		return -EINVAL;
 
+	/* FIXME support hugetlb fs */
+	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+		hmm_pfns_special(range);
+		return -EINVAL;
+	}
+
+	if (!(vma->vm_flags & VM_READ)) {
+		/*
+		 * If vma do not allow read access, then assume that it does
+		 * not allow write access, either. Architecture that allow
+		 * write without read access are not supported by HMM, because
+		 * operations such has atomic access would not work.
+		 */
+		hmm_pfns_clear(range, range->pfns, range->start, range->end);
+		return -EPERM;
+	}
+
 	/* Initialize range to track CPU page table update */
-	range->start = start;
-	range->pfns = pfns;
-	range->end = end;
 	spin_lock(&hmm->lock);
 	range->valid = true;
 	list_add_rcu(&range->list, &hmm->ranges);
 	spin_unlock(&hmm->lock);
 
-	/* FIXME support hugetlb fs */
-	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
-		hmm_pfns_special(pfns, start, end);
-		return 0;
-	}
-
 	hmm_vma_walk.fault = true;
-	hmm_vma_walk.write = write;
 	hmm_vma_walk.block = block;
 	hmm_vma_walk.range = range;
 	mm_walk.private = &hmm_vma_walk;
@@ -717,7 +895,7 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 	mm_walk.pte_hole = hmm_vma_walk_hole;
 
 	do {
-		ret = walk_page_range(start, end, &mm_walk);
+		ret = walk_page_range(start, range->end, &mm_walk);
 		start = hmm_vma_walk.last;
 	} while (ret == -EAGAIN);
 
@@ -725,8 +903,9 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 		unsigned long i;
 
 		i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-		hmm_pfns_clear(&pfns[i], hmm_vma_walk.last, end);
-		hmm_vma_range_done(vma, range);
+		hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
+			       range->end);
+		hmm_vma_range_done(range);
 	}
 	return ret;
 }
@@ -845,13 +1024,6 @@ static void hmm_devmem_release(struct device *dev, void *data)
 	hmm_devmem_radix_release(resource);
 }
 
-static struct hmm_devmem *hmm_devmem_find(resource_size_t phys)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	return radix_tree_lookup(&hmm_devmem_radix, phys >> PA_SECTION_SHIFT);
-}
-
 static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
 {
 	resource_size_t key, align_start, align_size, align_end;
@@ -892,9 +1064,8 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
 	for (key = align_start; key <= align_end; key += PA_SECTION_SIZE) {
 		struct hmm_devmem *dup;
 
-		rcu_read_lock();
-		dup = hmm_devmem_find(key);
-		rcu_read_unlock();
+		dup = radix_tree_lookup(&hmm_devmem_radix,
+					key >> PA_SECTION_SHIFT);
 		if (dup) {
 			dev_err(device, "%s: collides with mapping for %s\n",
 				__func__, dev_name(dup->device));
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f0ae8d1d4329..14ed6ee5e02f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -555,8 +555,7 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
-				  true)) {
+	if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1317,7 +1316,7 @@ alloc:
 	}
 
 	if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
-				huge_gfp | __GFP_NORETRY, &memcg, true))) {
+					huge_gfp, &memcg, true))) {
 		put_page(new_page);
 		split_huge_pmd(vma, vmf->pmd, vmf->address);
 		if (page)
@@ -2402,6 +2401,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
 
 	page_tail->index = head->index + tail;
 	page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
+
+	/*
+	 * always add to the tail because some iterators expect new
+	 * pages to show after the currently processed elements - e.g.
+	 * migrate_pages
+	 */
 	lru_add_page_tail(head, page_tail, lruvec, list);
 }
 
@@ -2445,7 +2450,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	} else {
 		/* Additional pin to radix tree */
 		page_ref_add(head, 2);
-		spin_unlock(&head->mapping->tree_lock);
+		xa_unlock(&head->mapping->i_pages);
 	}
 
 	spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
@@ -2653,15 +2658,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	if (mapping) {
 		void **pslot;
 
-		spin_lock(&mapping->tree_lock);
-		pslot = radix_tree_lookup_slot(&mapping->page_tree,
+		xa_lock(&mapping->i_pages);
+		pslot = radix_tree_lookup_slot(&mapping->i_pages,
 				page_index(head));
 		/*
 		 * Check if the head page is present in radix tree.
 		 * We assume all tail are present too, if head is there.
 		 */
 		if (radix_tree_deref_slot_protected(pslot,
-					&mapping->tree_lock) != head)
+					&mapping->i_pages.xa_lock) != head)
 			goto fail;
 	}
 
@@ -2695,7 +2700,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		}
 		spin_unlock(&pgdata->split_queue_lock);
 fail:		if (mapping)
-			spin_unlock(&mapping->tree_lock);
+			xa_unlock(&mapping->i_pages);
 		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
 		unfreeze_page(head);
 		ret = -EBUSY;
diff --git a/mm/internal.h b/mm/internal.h
index e6bd35182dae..62d8c34e63d5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -168,6 +168,9 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
 
+extern void set_zone_contiguous(struct zone *zone);
+extern void clear_zone_contiguous(struct zone *zone);
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
@@ -495,7 +498,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 #define ALLOC_HARDER		0x10 /* try to alloc harder */
 #define ALLOC_HIGH		0x20 /* __GFP_HIGH set */
 #define ALLOC_CPUSET		0x40 /* check for correct cpuset */
-#define ALLOC_CMA		0x80 /* allow allocations from CMA areas */
 
 enum ttu_flags;
 struct tlbflush_unmap_batch;
@@ -538,4 +540,5 @@ static inline bool is_migrate_highatomic_page(struct page *page)
 }
 
 void setup_zone_pageset(struct zone *zone);
+extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
 #endif	/* __MM_INTERNAL_H */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e42568284e06..d7b2a4bf8671 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -965,9 +965,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out_nolock;
 	}
 
-	/* Do not oom kill for khugepaged charges */
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
-					   &memcg, true))) {
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out_nolock;
 	}
@@ -1326,9 +1324,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		goto out;
 	}
 
-	/* Do not oom kill for khugepaged charges */
-	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
-					   &memcg, true))) {
+	if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out;
 	}
@@ -1348,8 +1344,8 @@ static void collapse_shmem(struct mm_struct *mm,
 	 */
 
 	index = start;
-	spin_lock_irq(&mapping->tree_lock);
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	xa_lock_irq(&mapping->i_pages);
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		int n = min(iter.index, end) - index;
 
 		/*
@@ -1362,7 +1358,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		}
 		nr_none += n;
 		for (; index < min(iter.index, end); index++) {
-			radix_tree_insert(&mapping->page_tree, index,
+			radix_tree_insert(&mapping->i_pages, index,
 					new_page + (index % HPAGE_PMD_NR));
 		}
 
@@ -1371,16 +1367,16 @@ static void collapse_shmem(struct mm_struct *mm,
 			break;
 
 		page = radix_tree_deref_slot_protected(slot,
-				&mapping->tree_lock);
+				&mapping->i_pages.xa_lock);
 		if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			/* swap in or instantiate fallocated page */
 			if (shmem_getpage(mapping->host, index, &page,
 						SGP_NOHUGE)) {
 				result = SCAN_FAIL;
 				goto tree_unlocked;
 			}
-			spin_lock_irq(&mapping->tree_lock);
+			xa_lock_irq(&mapping->i_pages);
 		} else if (trylock_page(page)) {
 			get_page(page);
 		} else {
@@ -1389,7 +1385,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		}
 
 		/*
-		 * The page must be locked, so we can drop the tree_lock
+		 * The page must be locked, so we can drop the i_pages lock
 		 * without racing with truncate.
 		 */
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -1400,7 +1396,7 @@ static void collapse_shmem(struct mm_struct *mm,
 			result = SCAN_TRUNCATED;
 			goto out_unlock;
 		}
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 
 		if (isolate_lru_page(page)) {
 			result = SCAN_DEL_PAGE_LRU;
@@ -1410,11 +1406,11 @@ static void collapse_shmem(struct mm_struct *mm,
 		if (page_mapped(page))
 			unmap_mapping_pages(mapping, index, 1, false);
 
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 
-		slot = radix_tree_lookup_slot(&mapping->page_tree, index);
+		slot = radix_tree_lookup_slot(&mapping->i_pages, index);
 		VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
-					&mapping->tree_lock), page);
+					&mapping->i_pages.xa_lock), page);
 		VM_BUG_ON_PAGE(page_mapped(page), page);
 
 		/*
@@ -1435,14 +1431,14 @@ static void collapse_shmem(struct mm_struct *mm,
 		list_add_tail(&page->lru, &pagelist);
 
 		/* Finally, replace with the new page. */
-		radix_tree_replace_slot(&mapping->page_tree, slot,
+		radix_tree_replace_slot(&mapping->i_pages, slot,
 				new_page + (index % HPAGE_PMD_NR));
 
 		slot = radix_tree_iter_resume(slot, &iter);
 		index++;
 		continue;
 out_lru:
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		putback_lru_page(page);
 out_isolate_failed:
 		unlock_page(page);
@@ -1468,14 +1464,14 @@ out_unlock:
 		}
 
 		for (; index < end; index++) {
-			radix_tree_insert(&mapping->page_tree, index,
+			radix_tree_insert(&mapping->i_pages, index,
 					new_page + (index % HPAGE_PMD_NR));
 		}
 		nr_none += n;
 	}
 
 tree_locked:
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 tree_unlocked:
 
 	if (result == SCAN_SUCCEED) {
@@ -1524,9 +1520,8 @@ tree_unlocked:
 	} else {
 		/* Something went wrong: rollback changes to the radix-tree */
 		shmem_uncharge(mapping->host, nr_none);
-		spin_lock_irq(&mapping->tree_lock);
-		radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
-				start) {
+		xa_lock_irq(&mapping->i_pages);
+		radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 			if (iter.index >= end)
 				break;
 			page = list_first_entry_or_null(&pagelist,
@@ -1536,8 +1531,7 @@ tree_unlocked:
 					break;
 				nr_none--;
 				/* Put holes back where they were */
-				radix_tree_delete(&mapping->page_tree,
-						  iter.index);
+				radix_tree_delete(&mapping->i_pages, iter.index);
 				continue;
 			}
 
@@ -1546,16 +1540,15 @@ tree_unlocked:
 			/* Unfreeze the page. */
 			list_del(&page->lru);
 			page_ref_unfreeze(page, 2);
-			radix_tree_replace_slot(&mapping->page_tree,
-						slot, page);
+			radix_tree_replace_slot(&mapping->i_pages, slot, page);
 			slot = radix_tree_iter_resume(slot, &iter);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 			putback_lru_page(page);
 			unlock_page(page);
-			spin_lock_irq(&mapping->tree_lock);
+			xa_lock_irq(&mapping->i_pages);
 		}
 		VM_BUG_ON(nr_none);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 
 		/* Unfreeze new_page, caller would take care about freeing it */
 		page_ref_unfreeze(new_page, 1);
@@ -1583,7 +1576,7 @@ static void khugepaged_scan_shmem(struct mm_struct *mm,
 	swap = 0;
 	memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load));
 	rcu_read_lock();
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		if (iter.index >= start + HPAGE_PMD_NR)
 			break;
 
@@ -1883,8 +1876,16 @@ static void set_recommended_min_free_kbytes(void)
 	int nr_zones = 0;
 	unsigned long recommended_min;
 
-	for_each_populated_zone(zone)
+	for_each_populated_zone(zone) {
+		/*
+		 * We don't need to worry about fragmentation of
+		 * ZONE_MOVABLE since it only has movable pages.
+		 */
+		if (zone_idx(zone) > gfp_zone(GFP_USER))
+			continue;
+
 		nr_zones++;
+	}
 
 	/* Ensure 2 pageblocks are free to assist fragmentation avoidance */
 	recommended_min = pageblock_nr_pages * nr_zones * 2;
diff --git a/mm/ksm.c b/mm/ksm.c
index e8d6c6210b80..e3cbf9a92f3c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1131,6 +1131,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 	} else {
 		newpte = pte_mkspecial(pfn_pte(page_to_pfn(kpage),
 					       vma->vm_page_prot));
+		/*
+		 * We're replacing an anonymous page with a zero page, which is
+		 * not anonymous. We need to do proper accounting otherwise we
+		 * will get wrong values in /proc, and a BUG message in dmesg
+		 * when tearing down the mm.
+		 */
+		dec_mm_counter(mm, MM_ANONPAGES);
 	}
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9ec024b862ac..e074f7c637aa 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1485,7 +1485,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 
 static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-	if (!current->memcg_may_oom)
+	if (!current->memcg_may_oom || order > PAGE_ALLOC_COSTLY_ORDER)
 		return;
 	/*
 	 * We are in the middle of the charge context here, so we
@@ -1839,7 +1839,7 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 			}
 		}
 
-		for (i = 0; i < MEMCG_NR_EVENTS; i++) {
+		for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
 			long x;
 
 			x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
@@ -1858,7 +1858,7 @@ static void reclaim_high(struct mem_cgroup *memcg,
 	do {
 		if (page_counter_read(&memcg->memory) <= memcg->high)
 			continue;
-		mem_cgroup_event(memcg, MEMCG_HIGH);
+		memcg_memory_event(memcg, MEMCG_HIGH);
 		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
 	} while ((memcg = parent_mem_cgroup(memcg)));
 }
@@ -1949,7 +1949,7 @@ retry:
 	if (!gfpflags_allow_blocking(gfp_mask))
 		goto nomem;
 
-	mem_cgroup_event(mem_over_limit, MEMCG_MAX);
+	memcg_memory_event(mem_over_limit, MEMCG_MAX);
 
 	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
 						    gfp_mask, may_swap);
@@ -1992,7 +1992,7 @@ retry:
 	if (fatal_signal_pending(current))
 		goto force;
 
-	mem_cgroup_event(mem_over_limit, MEMCG_OOM);
+	memcg_memory_event(mem_over_limit, MEMCG_OOM);
 
 	mem_cgroup_oom(mem_over_limit, gfp_mask,
 		       get_order(nr_pages * PAGE_SIZE));
@@ -2688,10 +2688,10 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
 	struct mem_cgroup *iter;
 	int i;
 
-	memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS);
+	memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);
 
 	for_each_mem_cgroup_tree(iter, memcg) {
-		for (i = 0; i < MEMCG_NR_EVENTS; i++)
+		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
 			events[i] += memcg_sum_events(iter, i);
 	}
 }
@@ -4108,6 +4108,9 @@ static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
 {
 	struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 
+	if (!pn)
+		return;
+
 	free_percpu(pn->lruvec_stat_cpu);
 	kfree(pn);
 }
@@ -5178,7 +5181,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 			continue;
 		}
 
-		mem_cgroup_event(memcg, MEMCG_OOM);
+		memcg_memory_event(memcg, MEMCG_OOM);
 		if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
 			break;
 	}
@@ -5191,10 +5194,14 @@ static int memory_events_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
 
-	seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW));
-	seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
-	seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
-	seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+	seq_printf(m, "low %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
+	seq_printf(m, "high %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_HIGH]));
+	seq_printf(m, "max %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
+	seq_printf(m, "oom %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
 	seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 
 	return 0;
@@ -5204,7 +5211,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
 	unsigned long stat[MEMCG_NR_STAT];
-	unsigned long events[MEMCG_NR_EVENTS];
+	unsigned long events[NR_VM_EVENT_ITEMS];
 	int i;
 
 	/*
@@ -5967,9 +5974,9 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 
 	/*
 	 * Interrupts should be disabled here because the caller holds the
-	 * mapping->tree_lock lock which is taken with interrupts-off. It is
+	 * i_pages lock which is taken with interrupts-off. It is
 	 * important here to have the interrupts disabled because it is the
-	 * only synchronisation we have for udpating the per-CPU variables.
+	 * only synchronisation we have for updating the per-CPU variables.
 	 */
 	VM_BUG_ON(!irqs_disabled());
 	mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page),
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2d4bf647cf01..9d142b9b86dc 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1487,7 +1487,7 @@ int unpoison_memory(unsigned long pfn)
 }
 EXPORT_SYMBOL(unpoison_memory);
 
-static struct page *new_page(struct page *p, unsigned long private, int **x)
+static struct page *new_page(struct page *p, unsigned long private)
 {
 	int nid = page_to_nid(p);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index cc6dfa5832ca..f74826cdceea 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1329,8 +1329,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 	return 0;
 }
 
-static struct page *new_node_page(struct page *page, unsigned long private,
-		int **result)
+static struct page *new_node_page(struct page *page, unsigned long private)
 {
 	int nid = page_to_nid(page);
 	nodemask_t nmask = node_states[N_MEMORY];
@@ -1373,7 +1372,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 			if (isolate_huge_page(page, &source))
 				move_pages -= 1 << compound_order(head);
 			continue;
-		} else if (thp_migration_supported() && PageTransHuge(page))
+		} else if (PageTransHuge(page))
 			pfn = page_to_pfn(compound_head(page))
 				+ hpage_nr_pages(page) - 1;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 01cbb7078d6c..9ac49ef17b4e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -446,15 +446,6 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
 		__split_huge_pmd(walk->vma, pmd, addr, false, NULL);
 		goto out;
 	}
-	if (!thp_migration_supported()) {
-		get_page(page);
-		spin_unlock(ptl);
-		lock_page(page);
-		ret = split_huge_page(page);
-		unlock_page(page);
-		put_page(page);
-		goto out;
-	}
 	if (!queue_pages_required(page, qp)) {
 		ret = 1;
 		goto unlock;
@@ -495,7 +486,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 
 	if (pmd_trans_unstable(pmd))
 		return 0;
-retry:
+
 	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		if (!pte_present(*pte))
@@ -511,22 +502,6 @@ retry:
 			continue;
 		if (!queue_pages_required(page, qp))
 			continue;
-		if (PageTransCompound(page) && !thp_migration_supported()) {
-			get_page(page);
-			pte_unmap_unlock(pte, ptl);
-			lock_page(page);
-			ret = split_huge_page(page);
-			unlock_page(page);
-			put_page(page);
-			/* Failed to split -- skip. */
-			if (ret) {
-				pte = pte_offset_map_lock(walk->mm, pmd,
-						addr, &ptl);
-				continue;
-			}
-			goto retry;
-		}
-
 		migrate_page_add(page, qp->pagelist, flags);
 	}
 	pte_unmap_unlock(pte - 1, ptl);
@@ -942,12 +917,13 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 	}
 }
 
-static struct page *new_node_page(struct page *page, unsigned long node, int **x)
+/* page allocation callback for NUMA node migration */
+struct page *alloc_new_node_page(struct page *page, unsigned long node)
 {
 	if (PageHuge(page))
 		return alloc_huge_page_node(page_hstate(compound_head(page)),
 					node);
-	else if (thp_migration_supported() && PageTransHuge(page)) {
+	else if (PageTransHuge(page)) {
 		struct page *thp;
 
 		thp = alloc_pages_node(node,
@@ -986,7 +962,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 			flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 
 	if (!list_empty(&pagelist)) {
-		err = migrate_pages(&pagelist, new_node_page, NULL, dest,
+		err = migrate_pages(&pagelist, alloc_new_node_page, NULL, dest,
 					MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
 			putback_movable_pages(&pagelist);
@@ -1107,7 +1083,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
  * list of pages handed to migrate_pages()--which is how we get here--
  * is in virtual address order.
  */
-static struct page *new_page(struct page *page, unsigned long start, int **x)
+static struct page *new_page(struct page *page, unsigned long start)
 {
 	struct vm_area_struct *vma;
 	unsigned long uninitialized_var(address);
@@ -1123,7 +1099,7 @@ static struct page *new_page(struct page *page, unsigned long start, int **x)
 	if (PageHuge(page)) {
 		return alloc_huge_page_vma(page_hstate(compound_head(page)),
 				vma, address);
-	} else if (thp_migration_supported() && PageTransHuge(page)) {
+	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
 		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
@@ -1152,7 +1128,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 	return -ENOSYS;
 }
 
-static struct page *new_page(struct page *page, unsigned long start, int **x)
+static struct page *new_page(struct page *page, unsigned long start)
 {
 	return NULL;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 003886606a22..f65dd69e1fd1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -467,20 +467,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	oldzone = page_zone(page);
 	newzone = page_zone(newpage);
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 
-	pslot = radix_tree_lookup_slot(&mapping->page_tree,
+	pslot = radix_tree_lookup_slot(&mapping->i_pages,
  					page_index(page));
 
 	expected_count += 1 + page_has_private(page);
 	if (page_count(page) != expected_count ||
-		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-		spin_unlock_irq(&mapping->tree_lock);
+		radix_tree_deref_slot_protected(pslot,
+					&mapping->i_pages.xa_lock) != page) {
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
 	if (!page_ref_freeze(page, expected_count)) {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
@@ -494,7 +495,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	if (mode == MIGRATE_ASYNC && head &&
 			!buffer_migrate_lock_buffers(head, mode)) {
 		page_ref_unfreeze(page, expected_count);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
@@ -522,7 +523,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 		SetPageDirty(newpage);
 	}
 
-	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+	radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
 	/*
 	 * Drop cache reference from old page by unfreezing
@@ -531,7 +532,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 */
 	page_ref_unfreeze(page, expected_count - 1);
 
-	spin_unlock(&mapping->tree_lock);
+	xa_unlock(&mapping->i_pages);
 	/* Leave irq disabled to prevent preemption while updating stats */
 
 	/*
@@ -574,20 +575,19 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	int expected_count;
 	void **pslot;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 
-	pslot = radix_tree_lookup_slot(&mapping->page_tree,
-					page_index(page));
+	pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
 
 	expected_count = 2 + page_has_private(page);
 	if (page_count(page) != expected_count ||
-		radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
-		spin_unlock_irq(&mapping->tree_lock);
+		radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) {
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
 	if (!page_ref_freeze(page, expected_count)) {
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		return -EAGAIN;
 	}
 
@@ -596,11 +596,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 
 	get_page(newpage);
 
-	radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
+	radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
 
 	page_ref_unfreeze(page, expected_count - 1);
 
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 
 	return MIGRATEPAGE_SUCCESS;
 }
@@ -1137,10 +1137,12 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 				   enum migrate_reason reason)
 {
 	int rc = MIGRATEPAGE_SUCCESS;
-	int *result = NULL;
 	struct page *newpage;
 
-	newpage = get_new_page(page, private, &result);
+	if (!thp_migration_supported() && PageTransHuge(page))
+		return -ENOMEM;
+
+	newpage = get_new_page(page, private);
 	if (!newpage)
 		return -ENOMEM;
 
@@ -1161,14 +1163,6 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 		goto out;
 	}
 
-	if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
-		lock_page(page);
-		rc = split_huge_page(page);
-		unlock_page(page);
-		if (rc)
-			goto out;
-	}
-
 	rc = __unmap_and_move(page, newpage, force, mode);
 	if (rc == MIGRATEPAGE_SUCCESS)
 		set_page_owner_migrate_reason(newpage, reason);
@@ -1231,12 +1225,6 @@ put_new:
 			put_page(newpage);
 	}
 
-	if (result) {
-		if (rc)
-			*result = rc;
-		else
-			*result = page_to_nid(newpage);
-	}
 	return rc;
 }
 
@@ -1264,7 +1252,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 				enum migrate_mode mode, int reason)
 {
 	int rc = -EAGAIN;
-	int *result = NULL;
 	int page_was_mapped = 0;
 	struct page *new_hpage;
 	struct anon_vma *anon_vma = NULL;
@@ -1281,7 +1268,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 		return -ENOSYS;
 	}
 
-	new_hpage = get_new_page(hpage, private, &result);
+	new_hpage = get_new_page(hpage, private);
 	if (!new_hpage)
 		return -ENOMEM;
 
@@ -1345,12 +1332,6 @@ out:
 	else
 		putback_active_hugepage(new_hpage);
 
-	if (result) {
-		if (rc)
-			*result = rc;
-		else
-			*result = page_to_nid(new_hpage);
-	}
 	return rc;
 }
 
@@ -1395,6 +1376,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		retry = 0;
 
 		list_for_each_entry_safe(page, page2, from, lru) {
+retry:
 			cond_resched();
 
 			if (PageHuge(page))
@@ -1408,6 +1390,26 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 
 			switch(rc) {
 			case -ENOMEM:
+				/*
+				 * THP migration might be unsupported or the
+				 * allocation could've failed so we should
+				 * retry on the same page with the THP split
+				 * to base pages.
+				 *
+				 * Head page is retried immediately and tail
+				 * pages are added to the tail of the list so
+				 * we encounter them after the rest of the list
+				 * is processed.
+				 */
+				if (PageTransHuge(page)) {
+					lock_page(page);
+					rc = split_huge_page_to_list(page, from);
+					unlock_page(page);
+					if (!rc) {
+						list_safe_reset_next(page, page2, lru);
+						goto retry;
+					}
+				}
 				nr_failed++;
 				goto out;
 			case -EAGAIN:
@@ -1444,141 +1446,101 @@ out:
 }
 
 #ifdef CONFIG_NUMA
-/*
- * Move a list of individual pages
- */
-struct page_to_node {
-	unsigned long addr;
-	struct page *page;
-	int node;
-	int status;
-};
 
-static struct page *new_page_node(struct page *p, unsigned long private,
-		int **result)
+static int store_status(int __user *status, int start, int value, int nr)
 {
-	struct page_to_node *pm = (struct page_to_node *)private;
-
-	while (pm->node != MAX_NUMNODES && pm->page != p)
-		pm++;
+	while (nr-- > 0) {
+		if (put_user(value, status + start))
+			return -EFAULT;
+		start++;
+	}
 
-	if (pm->node == MAX_NUMNODES)
-		return NULL;
+	return 0;
+}
 
-	*result = &pm->status;
+static int do_move_pages_to_node(struct mm_struct *mm,
+		struct list_head *pagelist, int node)
+{
+	int err;
 
-	if (PageHuge(p))
-		return alloc_huge_page_node(page_hstate(compound_head(p)),
-					pm->node);
-	else if (thp_migration_supported() && PageTransHuge(p)) {
-		struct page *thp;
+	if (list_empty(pagelist))
+		return 0;
 
-		thp = alloc_pages_node(pm->node,
-			(GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
-			HPAGE_PMD_ORDER);
-		if (!thp)
-			return NULL;
-		prep_transhuge_page(thp);
-		return thp;
-	} else
-		return __alloc_pages_node(pm->node,
-				GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
+	err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
+			MIGRATE_SYNC, MR_SYSCALL);
+	if (err)
+		putback_movable_pages(pagelist);
+	return err;
 }
 
 /*
- * Move a set of pages as indicated in the pm array. The addr
- * field must be set to the virtual address of the page to be moved
- * and the node number must contain a valid target node.
- * The pm array ends with node = MAX_NUMNODES.
+ * Resolves the given address to a struct page, isolates it from the LRU and
+ * puts it to the given pagelist.
+ * Returns -errno if the page cannot be found/isolated or 0 when it has been
+ * queued or the page doesn't need to be migrated because it is already on
+ * the target node
  */
-static int do_move_page_to_node_array(struct mm_struct *mm,
-				      struct page_to_node *pm,
-				      int migrate_all)
+static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
+		int node, struct list_head *pagelist, bool migrate_all)
 {
+	struct vm_area_struct *vma;
+	struct page *page;
+	unsigned int follflags;
 	int err;
-	struct page_to_node *pp;
-	LIST_HEAD(pagelist);
 
 	down_read(&mm->mmap_sem);
+	err = -EFAULT;
+	vma = find_vma(mm, addr);
+	if (!vma || addr < vma->vm_start || !vma_migratable(vma))
+		goto out;
 
-	/*
-	 * Build a list of pages to migrate
-	 */
-	for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
-		struct vm_area_struct *vma;
-		struct page *page;
-		struct page *head;
-		unsigned int follflags;
-
-		err = -EFAULT;
-		vma = find_vma(mm, pp->addr);
-		if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
-			goto set_status;
-
-		/* FOLL_DUMP to ignore special (like zero) pages */
-		follflags = FOLL_GET | FOLL_DUMP;
-		if (!thp_migration_supported())
-			follflags |= FOLL_SPLIT;
-		page = follow_page(vma, pp->addr, follflags);
+	/* FOLL_DUMP to ignore special (like zero) pages */
+	follflags = FOLL_GET | FOLL_DUMP;
+	page = follow_page(vma, addr, follflags);
 
-		err = PTR_ERR(page);
-		if (IS_ERR(page))
-			goto set_status;
+	err = PTR_ERR(page);
+	if (IS_ERR(page))
+		goto out;
 
-		err = -ENOENT;
-		if (!page)
-			goto set_status;
+	err = -ENOENT;
+	if (!page)
+		goto out;
 
-		err = page_to_nid(page);
+	err = 0;
+	if (page_to_nid(page) == node)
+		goto out_putpage;
 
-		if (err == pp->node)
-			/*
-			 * Node already in the right place
-			 */
-			goto put_and_set;
+	err = -EACCES;
+	if (page_mapcount(page) > 1 && !migrate_all)
+		goto out_putpage;
 
-		err = -EACCES;
-		if (page_mapcount(page) > 1 &&
-				!migrate_all)
-			goto put_and_set;
-
-		if (PageHuge(page)) {
-			if (PageHead(page)) {
-				isolate_huge_page(page, &pagelist);
-				err = 0;
-				pp->page = page;
-			}
-			goto put_and_set;
+	if (PageHuge(page)) {
+		if (PageHead(page)) {
+			isolate_huge_page(page, pagelist);
+			err = 0;
 		}
+	} else {
+		struct page *head;
 
-		pp->page = compound_head(page);
 		head = compound_head(page);
 		err = isolate_lru_page(head);
-		if (!err) {
-			list_add_tail(&head->lru, &pagelist);
-			mod_node_page_state(page_pgdat(head),
-				NR_ISOLATED_ANON + page_is_file_cache(head),
-				hpage_nr_pages(head));
-		}
-put_and_set:
-		/*
-		 * Either remove the duplicate refcount from
-		 * isolate_lru_page() or drop the page ref if it was
-		 * not isolated.
-		 */
-		put_page(page);
-set_status:
-		pp->status = err;
-	}
-
-	err = 0;
-	if (!list_empty(&pagelist)) {
-		err = migrate_pages(&pagelist, new_page_node, NULL,
-				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
-			putback_movable_pages(&pagelist);
-	}
+			goto out_putpage;
 
+		err = 0;
+		list_add_tail(&head->lru, pagelist);
+		mod_node_page_state(page_pgdat(head),
+			NR_ISOLATED_ANON + page_is_file_cache(head),
+			hpage_nr_pages(head));
+	}
+out_putpage:
+	/*
+	 * Either remove the duplicate refcount from
+	 * isolate_lru_page() or drop the page ref if it was
+	 * not isolated.
+	 */
+	put_page(page);
+out:
 	up_read(&mm->mmap_sem);
 	return err;
 }
@@ -1593,79 +1555,79 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
 			 const int __user *nodes,
 			 int __user *status, int flags)
 {
-	struct page_to_node *pm;
-	unsigned long chunk_nr_pages;
-	unsigned long chunk_start;
-	int err;
-
-	err = -ENOMEM;
-	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
-	if (!pm)
-		goto out;
+	int current_node = NUMA_NO_NODE;
+	LIST_HEAD(pagelist);
+	int start, i;
+	int err = 0, err1;
 
 	migrate_prep();
 
-	/*
-	 * Store a chunk of page_to_node array in a page,
-	 * but keep the last one as a marker
-	 */
-	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
-
-	for (chunk_start = 0;
-	     chunk_start < nr_pages;
-	     chunk_start += chunk_nr_pages) {
-		int j;
+	for (i = start = 0; i < nr_pages; i++) {
+		const void __user *p;
+		unsigned long addr;
+		int node;
 
-		if (chunk_start + chunk_nr_pages > nr_pages)
-			chunk_nr_pages = nr_pages - chunk_start;
-
-		/* fill the chunk pm with addrs and nodes from user-space */
-		for (j = 0; j < chunk_nr_pages; j++) {
-			const void __user *p;
-			int node;
-
-			err = -EFAULT;
-			if (get_user(p, pages + j + chunk_start))
-				goto out_pm;
-			pm[j].addr = (unsigned long) p;
-
-			if (get_user(node, nodes + j + chunk_start))
-				goto out_pm;
-
-			err = -ENODEV;
-			if (node < 0 || node >= MAX_NUMNODES)
-				goto out_pm;
-
-			if (!node_state(node, N_MEMORY))
-				goto out_pm;
-
-			err = -EACCES;
-			if (!node_isset(node, task_nodes))
-				goto out_pm;
+		err = -EFAULT;
+		if (get_user(p, pages + i))
+			goto out_flush;
+		if (get_user(node, nodes + i))
+			goto out_flush;
+		addr = (unsigned long)p;
+
+		err = -ENODEV;
+		if (node < 0 || node >= MAX_NUMNODES)
+			goto out_flush;
+		if (!node_state(node, N_MEMORY))
+			goto out_flush;
 
-			pm[j].node = node;
+		err = -EACCES;
+		if (!node_isset(node, task_nodes))
+			goto out_flush;
+
+		if (current_node == NUMA_NO_NODE) {
+			current_node = node;
+			start = i;
+		} else if (node != current_node) {
+			err = do_move_pages_to_node(mm, &pagelist, current_node);
+			if (err)
+				goto out;
+			err = store_status(status, start, current_node, i - start);
+			if (err)
+				goto out;
+			start = i;
+			current_node = node;
 		}
 
-		/* End marker for this chunk */
-		pm[chunk_nr_pages].node = MAX_NUMNODES;
-
-		/* Migrate this chunk */
-		err = do_move_page_to_node_array(mm, pm,
-						 flags & MPOL_MF_MOVE_ALL);
-		if (err < 0)
-			goto out_pm;
+		/*
+		 * Errors in the page lookup or isolation are not fatal and we simply
+		 * report them via status
+		 */
+		err = add_page_for_migration(mm, addr, current_node,
+				&pagelist, flags & MPOL_MF_MOVE_ALL);
+		if (!err)
+			continue;
 
-		/* Return status information */
-		for (j = 0; j < chunk_nr_pages; j++)
-			if (put_user(pm[j].status, status + j + chunk_start)) {
-				err = -EFAULT;
-				goto out_pm;
-			}
-	}
-	err = 0;
+		err = store_status(status, i, err, 1);
+		if (err)
+			goto out_flush;
 
-out_pm:
-	free_page((unsigned long)pm);
+		err = do_move_pages_to_node(mm, &pagelist, current_node);
+		if (err)
+			goto out;
+		if (i > start) {
+			err = store_status(status, start, current_node, i - start);
+			if (err)
+				goto out;
+		}
+		current_node = NUMA_NO_NODE;
+	}
+out_flush:
+	/* Make sure we do not overwrite the existing error */
+	err1 = do_move_pages_to_node(mm, &pagelist, current_node);
+	if (!err1)
+		err1 = store_status(status, start, current_node, i - start);
+	if (!err)
+		err = err1;
 out:
 	return err;
 }
@@ -1866,8 +1828,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
 }
 
 static struct page *alloc_misplaced_dst_page(struct page *page,
-					   unsigned long data,
-					   int **result)
+					   unsigned long data)
 {
 	int nid = (int) data;
 	struct page *newpage;
@@ -1987,6 +1948,13 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 		goto out;
 
 	/*
+	 * Also do not migrate dirty pages as not all filesystems can move
+	 * dirty pages in MIGRATE_ASYNC mode which is a waste of cycles.
+	 */
+	if (page_is_file_cache(page) && PageDirty(page))
+		goto out;
+
+	/*
 	 * Rate-limit the amount of data that is being migrated to a node.
 	 * Optimal placement is no good if the memory bus is saturated and
 	 * all the time is being spent migrating!
@@ -2339,7 +2307,8 @@ again:
 			ptep_get_and_clear(mm, addr, ptep);
 
 			/* Setup special migration page table entry */
-			entry = make_migration_entry(page, pte_write(pte));
+			entry = make_migration_entry(page, mpfn &
+						     MIGRATE_PFN_WRITE);
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_soft_dirty(pte))
 				swp_pte = pte_swp_mksoft_dirty(swp_pte);
diff --git a/mm/mmap.c b/mm/mmap.c
index f2154fc2548b..188f195883b9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1342,6 +1342,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 		if (!(file && path_noexec(&file->f_path)))
 			prot |= PROT_EXEC;
 
+	/* force arch specific MAP_FIXED handling in get_unmapped_area */
+	if (flags & MAP_FIXED_NOREPLACE)
+		flags |= MAP_FIXED;
+
 	if (!(flags & MAP_FIXED))
 		addr = round_hint_to_min(addr);
 
@@ -1365,6 +1369,13 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 	if (offset_in_page(addr))
 		return addr;
 
+	if (flags & MAP_FIXED_NOREPLACE) {
+		struct vm_area_struct *vma = find_vma(mm, addr);
+
+		if (vma && vma->vm_start <= addr)
+			return -EEXIST;
+	}
+
 	if (prot == PROT_EXEC) {
 		pkey = execute_only_pkey(mm);
 		if (pkey < 0)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c1d6af7455da..625608bc8962 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -27,6 +27,7 @@
 #include <linux/pkeys.h>
 #include <linux/ksm.h>
 #include <linux/uaccess.h>
+#include <linux/mm_inline.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
@@ -89,6 +90,14 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				    page_mapcount(page) != 1)
 					continue;
 
+				/*
+				 * While migration can move some dirty pages,
+				 * it cannot move them all from MIGRATE_ASYNC
+				 * context.
+				 */
+				if (page_is_file_cache(page) && PageDirty(page))
+					continue;
+
 				/* Avoid TLB flush if possible */
 				if (pte_protnone(oldpte))
 					continue;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 586f31261c83..5c1a3279e63f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2099,7 +2099,8 @@ void __init page_writeback_init(void)
  * so that it can tag pages faster than a dirtying process can create them).
  */
 /*
- * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency.
+ * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
+ * latency.
  */
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end)
@@ -2109,22 +2110,22 @@ void tag_pages_for_writeback(struct address_space *mapping,
 	struct radix_tree_iter iter;
 	void **slot;
 
-	spin_lock_irq(&mapping->tree_lock);
-	radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, start,
+	xa_lock_irq(&mapping->i_pages);
+	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
 							PAGECACHE_TAG_DIRTY) {
 		if (iter.index > end)
 			break;
-		radix_tree_iter_tag_set(&mapping->page_tree, &iter,
+		radix_tree_iter_tag_set(&mapping->i_pages, &iter,
 							PAGECACHE_TAG_TOWRITE);
 		tagged++;
 		if ((tagged % WRITEBACK_TAG_BATCH) != 0)
 			continue;
 		slot = radix_tree_iter_resume(slot, &iter);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		cond_resched();
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 	}
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
@@ -2467,13 +2468,13 @@ int __set_page_dirty_nobuffers(struct page *page)
 			return 1;
 		}
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		BUG_ON(page_mapping(page) != mapping);
 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
-		radix_tree_tag_set(&mapping->page_tree, page_index(page),
+		radix_tree_tag_set(&mapping->i_pages, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		unlock_page_memcg(page);
 
 		if (mapping->host) {
@@ -2718,11 +2719,10 @@ int test_clear_page_writeback(struct page *page)
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		ret = TestClearPageWriteback(page);
 		if (ret) {
-			radix_tree_tag_clear(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
@@ -2736,7 +2736,7 @@ int test_clear_page_writeback(struct page *page)
 						     PAGECACHE_TAG_WRITEBACK))
 			sb_clear_inode_writeback(mapping->host);
 
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 	} else {
 		ret = TestClearPageWriteback(page);
 	}
@@ -2766,7 +2766,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
-		spin_lock_irqsave(&mapping->tree_lock, flags);
+		xa_lock_irqsave(&mapping->i_pages, flags);
 		ret = TestSetPageWriteback(page);
 		if (!ret) {
 			bool on_wblist;
@@ -2774,8 +2774,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 			on_wblist = mapping_tagged(mapping,
 						   PAGECACHE_TAG_WRITEBACK);
 
-			radix_tree_tag_set(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_set(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
 				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
@@ -2789,14 +2788,12 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 				sb_mark_inode_writeback(mapping->host);
 		}
 		if (!PageDirty(page))
-			radix_tree_tag_clear(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_DIRTY);
 		if (!keep_write)
-			radix_tree_tag_clear(&mapping->page_tree,
-						page_index(page),
+			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_TOWRITE);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
 	}
@@ -2816,7 +2813,7 @@ EXPORT_SYMBOL(__test_set_page_writeback);
  */
 int mapping_tagged(struct address_space *mapping, int tag)
 {
-	return radix_tree_tagged(&mapping->page_tree, tag);
+	return radix_tree_tagged(&mapping->i_pages, tag);
 }
 EXPORT_SYMBOL(mapping_tagged);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0b97b8ece4a9..905db9d7962f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -46,7 +46,6 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
-#include <xen/xen.h>
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
@@ -205,17 +204,18 @@ static void __free_pages_ok(struct page *page, unsigned int order);
  * TBD: should special case ZONE_DMA32 machines here - in those we normally
  * don't need any ZONE_NORMAL reservation
  */
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES] = {
 #ifdef CONFIG_ZONE_DMA
-	 256,
+	[ZONE_DMA] = 256,
 #endif
 #ifdef CONFIG_ZONE_DMA32
-	 256,
+	[ZONE_DMA32] = 256,
 #endif
+	[ZONE_NORMAL] = 32,
 #ifdef CONFIG_HIGHMEM
-	 32,
+	[ZONE_HIGHMEM] = 0,
 #endif
-	 32,
+	[ZONE_MOVABLE] = 0,
 };
 
 EXPORT_SYMBOL(totalram_pages);
@@ -316,9 +316,6 @@ static inline bool update_defer_init(pg_data_t *pgdat,
 	/* Always populate low zones for address-constrained allocations */
 	if (zone_end < pgdat_end_pfn(pgdat))
 		return true;
-	/* Xen PV domains need page structures early */
-	if (xen_pv_domain())
-		return true;
 	(*nr_initialised)++;
 	if ((*nr_initialised > pgdat->static_init_pgcnt) &&
 	    (pfn & (PAGES_PER_SECTION - 1)) == 0) {
@@ -1746,16 +1743,38 @@ void __init page_alloc_init_late(void)
 }
 
 #ifdef CONFIG_CMA
+static void __init adjust_present_page_count(struct page *page, long count)
+{
+	struct zone *zone = page_zone(page);
+
+	/* We don't need to hold a lock since it is boot-up process */
+	zone->present_pages += count;
+}
+
 /* Free whole pageblock and set its migration type to MIGRATE_CMA. */
 void __init init_cma_reserved_pageblock(struct page *page)
 {
 	unsigned i = pageblock_nr_pages;
+	unsigned long pfn = page_to_pfn(page);
 	struct page *p = page;
+	int nid = page_to_nid(page);
+
+	/*
+	 * ZONE_MOVABLE will steal present pages from other zones by
+	 * changing page links so page_zone() is changed. Before that,
+	 * we need to adjust previous zone's page count first.
+	 */
+	adjust_present_page_count(page, -pageblock_nr_pages);
 
 	do {
 		__ClearPageReserved(p);
 		set_page_count(p, 0);
-	} while (++p, --i);
+
+		/* Steal pages from other zones */
+		set_page_links(p, ZONE_MOVABLE, nid, pfn);
+	} while (++p, ++pfn, --i);
+
+	adjust_present_page_count(page, pageblock_nr_pages);
 
 	set_pageblock_migratetype(page, MIGRATE_CMA);
 
@@ -2870,7 +2889,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
 		 * exists.
 		 */
 		watermark = min_wmark_pages(zone) + (1UL << order);
-		if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
+		if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
 			return 0;
 
 		__mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -3146,12 +3165,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 	}
 
 
-#ifdef CONFIG_CMA
-	/* If allocation can't use CMA areas don't use free CMA pages */
-	if (!(alloc_flags & ALLOC_CMA))
-		free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
-
 	/*
 	 * Check watermarks for an order-0 allocation request. If these
 	 * are not met, then a high-order request also cannot go ahead
@@ -3178,10 +3191,8 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 		}
 
 #ifdef CONFIG_CMA
-		if ((alloc_flags & ALLOC_CMA) &&
-		    !list_empty(&area->free_list[MIGRATE_CMA])) {
+		if (!list_empty(&area->free_list[MIGRATE_CMA]))
 			return true;
-		}
 #endif
 		if (alloc_harder &&
 			!list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
@@ -3201,13 +3212,6 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
 		unsigned long mark, int classzone_idx, unsigned int alloc_flags)
 {
 	long free_pages = zone_page_state(z, NR_FREE_PAGES);
-	long cma_pages = 0;
-
-#ifdef CONFIG_CMA
-	/* If allocation can't use CMA areas don't use free CMA pages */
-	if (!(alloc_flags & ALLOC_CMA))
-		cma_pages = zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif
 
 	/*
 	 * Fast check for order-0 only. If this fails then the reserves
@@ -3216,7 +3220,7 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order,
 	 * the caller is !atomic then it'll uselessly search the free
 	 * list. That corner case is then slower but it is harmless.
 	 */
-	if (!order && (free_pages - cma_pages) > mark + z->lowmem_reserve[classzone_idx])
+	if (!order && free_pages > mark + z->lowmem_reserve[classzone_idx])
 		return true;
 
 	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
@@ -3852,10 +3856,6 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 	} else if (unlikely(rt_task(current)) && !in_interrupt())
 		alloc_flags |= ALLOC_HARDER;
 
-#ifdef CONFIG_CMA
-	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
-		alloc_flags |= ALLOC_CMA;
-#endif
 	return alloc_flags;
 }
 
@@ -4322,9 +4322,6 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 	if (should_fail_alloc_page(gfp_mask, order))
 		return false;
 
-	if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
-		*alloc_flags |= ALLOC_CMA;
-
 	return true;
 }
 
@@ -4734,6 +4731,13 @@ long si_mem_available(void)
 		     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
 			 wmark_low);
 
+	/*
+	 * Part of the kernel memory, which can be released under memory
+	 * pressure.
+	 */
+	available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
+		PAGE_SHIFT;
+
 	if (available < 0)
 		available = 0;
 	return available;
@@ -6200,6 +6204,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 {
 	enum zone_type j;
 	int nid = pgdat->node_id;
+	unsigned long node_end_pfn = 0;
 
 	pgdat_resize_init(pgdat);
 #ifdef CONFIG_NUMA_BALANCING
@@ -6227,9 +6232,13 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, freesize, memmap_pages;
 		unsigned long zone_start_pfn = zone->zone_start_pfn;
+		unsigned long movable_size = 0;
 
 		size = zone->spanned_pages;
 		realsize = freesize = zone->present_pages;
+		if (zone_end_pfn(zone) > node_end_pfn)
+			node_end_pfn = zone_end_pfn(zone);
+
 
 		/*
 		 * Adjust freesize so that it accounts for how much memory
@@ -6278,12 +6287,30 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
 		zone_seqlock_init(zone);
 		zone_pcp_init(zone);
 
-		if (!size)
+		/*
+		 * The size of the CMA area is unknown now so we need to
+		 * prepare the memory for the usemap at maximum.
+		 */
+		if (IS_ENABLED(CONFIG_CMA) && j == ZONE_MOVABLE &&
+			pgdat->node_spanned_pages) {
+			movable_size = node_end_pfn - pgdat->node_start_pfn;
+		}
+
+		if (!size && !movable_size)
 			continue;
 
 		set_pageblock_order();
-		setup_usemap(pgdat, zone, zone_start_pfn, size);
-		init_currently_empty_zone(zone, zone_start_pfn, size);
+		if (movable_size) {
+			zone->zone_start_pfn = pgdat->node_start_pfn;
+			zone->spanned_pages = movable_size;
+			setup_usemap(pgdat, zone,
+				pgdat->node_start_pfn, movable_size);
+			init_currently_empty_zone(zone,
+				pgdat->node_start_pfn, movable_size);
+		} else {
+			setup_usemap(pgdat, zone, zone_start_pfn, size);
+			init_currently_empty_zone(zone, zone_start_pfn, size);
+		}
 		memmap_init(size, nid, j, zone_start_pfn);
 	}
 }
@@ -7125,13 +7152,15 @@ static void setup_per_zone_lowmem_reserve(void)
 				struct zone *lower_zone;
 
 				idx--;
-
-				if (sysctl_lowmem_reserve_ratio[idx] < 1)
-					sysctl_lowmem_reserve_ratio[idx] = 1;
-
 				lower_zone = pgdat->node_zones + idx;
-				lower_zone->lowmem_reserve[j] = managed_pages /
-					sysctl_lowmem_reserve_ratio[idx];
+
+				if (sysctl_lowmem_reserve_ratio[idx] < 1) {
+					sysctl_lowmem_reserve_ratio[idx] = 0;
+					lower_zone->lowmem_reserve[j] = 0;
+				} else {
+					lower_zone->lowmem_reserve[j] =
+						managed_pages / sysctl_lowmem_reserve_ratio[idx];
+				}
 				managed_pages += lower_zone->managed_pages;
 			}
 		}
@@ -7922,7 +7951,7 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 }
 #endif
 
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined CONFIG_MEMORY_HOTPLUG || defined CONFIG_CMA
 /*
  * The zone indicated has a new number of managed_pages; batch sizes and percpu
  * page high values need to be recalulated.
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 61dee77bb211..43e085608846 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -309,8 +309,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
 	return pfn < end_pfn ? -EBUSY : 0;
 }
 
-struct page *alloc_migrate_target(struct page *page, unsigned long private,
-				  int **resultp)
+struct page *alloc_migrate_target(struct page *page, unsigned long private)
 {
 	return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
 }
diff --git a/mm/readahead.c b/mm/readahead.c
index 4d57b4644f98..539bbb6c1fad 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -175,7 +175,7 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			break;
 
 		rcu_read_lock();
-		page = radix_tree_lookup(&mapping->page_tree, page_offset);
+		page = radix_tree_lookup(&mapping->i_pages, page_offset);
 		rcu_read_unlock();
 		if (page && !radix_tree_exceptional_entry(page))
 			continue;
diff --git a/mm/rmap.c b/mm/rmap.c
index 9122787c4947..f0dd4e4565bc 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -32,11 +32,11 @@
  *                 mmlist_lock (in mmput, drain_mmlist and others)
  *                 mapping->private_lock (in __set_page_dirty_buffers)
  *                   mem_cgroup_{begin,end}_page_stat (memcg->move_lock)
- *                     mapping->tree_lock (widely used)
+ *                     i_pages lock (widely used)
  *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
  *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
  *                   sb_lock (within inode_lock in fs/fs-writeback.c)
- *                   mapping->tree_lock (widely used, in set_page_dirty,
+ *                   i_pages lock (widely used, in set_page_dirty,
  *                             in arch-dependent flush_dcache_mmap_lock,
  *                             within bdi.wb->list_lock in __sync_single_inode)
  *
diff --git a/mm/shmem.c b/mm/shmem.c
index 4424fc0c33aa..9d6c7e595415 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -332,12 +332,12 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
 
 	VM_BUG_ON(!expected);
 	VM_BUG_ON(!replacement);
-	item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot);
+	item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot);
 	if (!item)
 		return -ENOENT;
 	if (item != expected)
 		return -ENOENT;
-	__radix_tree_replace(&mapping->page_tree, node, pslot,
+	__radix_tree_replace(&mapping->i_pages, node, pslot,
 			     replacement, NULL);
 	return 0;
 }
@@ -355,7 +355,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 	void *item;
 
 	rcu_read_lock();
-	item = radix_tree_lookup(&mapping->page_tree, index);
+	item = radix_tree_lookup(&mapping->i_pages, index);
 	rcu_read_unlock();
 	return item == swp_to_radix_entry(swap);
 }
@@ -590,14 +590,14 @@ static int shmem_add_to_page_cache(struct page *page,
 	page->mapping = mapping;
 	page->index = index;
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	if (PageTransHuge(page)) {
 		void __rcu **results;
 		pgoff_t idx;
 		int i;
 
 		error = 0;
-		if (radix_tree_gang_lookup_slot(&mapping->page_tree,
+		if (radix_tree_gang_lookup_slot(&mapping->i_pages,
 					&results, &idx, index, 1) &&
 				idx < index + HPAGE_PMD_NR) {
 			error = -EEXIST;
@@ -605,14 +605,14 @@ static int shmem_add_to_page_cache(struct page *page,
 
 		if (!error) {
 			for (i = 0; i < HPAGE_PMD_NR; i++) {
-				error = radix_tree_insert(&mapping->page_tree,
+				error = radix_tree_insert(&mapping->i_pages,
 						index + i, page + i);
 				VM_BUG_ON(error);
 			}
 			count_vm_event(THP_FILE_ALLOC);
 		}
 	} else if (!expected) {
-		error = radix_tree_insert(&mapping->page_tree, index, page);
+		error = radix_tree_insert(&mapping->i_pages, index, page);
 	} else {
 		error = shmem_radix_tree_replace(mapping, index, expected,
 								 page);
@@ -624,10 +624,10 @@ static int shmem_add_to_page_cache(struct page *page,
 			__inc_node_page_state(page, NR_SHMEM_THPS);
 		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 	} else {
 		page->mapping = NULL;
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 		page_ref_sub(page, nr);
 	}
 	return error;
@@ -643,13 +643,13 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 
 	VM_BUG_ON_PAGE(PageCompound(page), page);
 
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
 	page->mapping = NULL;
 	mapping->nrpages--;
 	__dec_node_page_state(page, NR_FILE_PAGES);
 	__dec_node_page_state(page, NR_SHMEM);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 	put_page(page);
 	BUG_ON(error);
 }
@@ -662,9 +662,9 @@ static int shmem_free_swap(struct address_space *mapping,
 {
 	void *old;
 
-	spin_lock_irq(&mapping->tree_lock);
-	old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
+	old = radix_tree_delete_item(&mapping->i_pages, index, radswap);
+	xa_unlock_irq(&mapping->i_pages);
 	if (old != radswap)
 		return -ENOENT;
 	free_swap_and_cache(radix_to_swp_entry(radswap));
@@ -675,7 +675,7 @@ static int shmem_free_swap(struct address_space *mapping,
  * Determine (in bytes) how many of the shmem object's pages mapped by the
  * given offsets are swapped out.
  *
- * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
  * as long as the inode doesn't go away and racy results are not a problem.
  */
 unsigned long shmem_partial_swap_usage(struct address_space *mapping,
@@ -688,7 +688,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 
 	rcu_read_lock();
 
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		if (iter.index >= end)
 			break;
 
@@ -717,7 +717,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
  * Determine (in bytes) how many of the shmem object's pages mapped by the
  * given vma is swapped out.
  *
- * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
+ * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
  * as long as the inode doesn't go away and racy results are not a problem.
  */
 unsigned long shmem_swap_usage(struct vm_area_struct *vma)
@@ -1132,7 +1132,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
 	int error = 0;
 
 	radswap = swp_to_radix_entry(swap);
-	index = find_swap_entry(&mapping->page_tree, radswap);
+	index = find_swap_entry(&mapping->i_pages, radswap);
 	if (index == -1)
 		return -EAGAIN;	/* tell shmem_unuse we found nothing */
 
@@ -1448,7 +1448,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	hindex = round_down(index, HPAGE_PMD_NR);
 	rcu_read_lock();
-	if (radix_tree_gang_lookup_slot(&mapping->page_tree, &results, &idx,
+	if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx,
 				hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
 		rcu_read_unlock();
 		return NULL;
@@ -1561,14 +1561,14 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 	 * Our caller will very soon move newpage out of swapcache, but it's
 	 * a nice clean interface for us to replace oldpage by newpage there.
 	 */
-	spin_lock_irq(&swap_mapping->tree_lock);
+	xa_lock_irq(&swap_mapping->i_pages);
 	error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
 								   newpage);
 	if (!error) {
 		__inc_node_page_state(newpage, NR_FILE_PAGES);
 		__dec_node_page_state(oldpage, NR_FILE_PAGES);
 	}
-	spin_unlock_irq(&swap_mapping->tree_lock);
+	xa_unlock_irq(&swap_mapping->i_pages);
 
 	if (unlikely(error)) {
 		/*
@@ -2634,7 +2634,7 @@ static void shmem_tag_pins(struct address_space *mapping)
 	start = 0;
 	rcu_read_lock();
 
-	radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
 		page = radix_tree_deref_slot(slot);
 		if (!page || radix_tree_exception(page)) {
 			if (radix_tree_deref_retry(page)) {
@@ -2642,10 +2642,10 @@ static void shmem_tag_pins(struct address_space *mapping)
 				continue;
 			}
 		} else if (page_count(page) - page_mapcount(page) > 1) {
-			spin_lock_irq(&mapping->tree_lock);
-			radix_tree_tag_set(&mapping->page_tree, iter.index,
+			xa_lock_irq(&mapping->i_pages);
+			radix_tree_tag_set(&mapping->i_pages, iter.index,
 					   SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 		}
 
 		if (need_resched()) {
@@ -2677,7 +2677,7 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 
 	error = 0;
 	for (scan = 0; scan <= LAST_SCAN; scan++) {
-		if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
+		if (!radix_tree_tagged(&mapping->i_pages, SHMEM_TAG_PINNED))
 			break;
 
 		if (!scan)
@@ -2687,7 +2687,7 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 
 		start = 0;
 		rcu_read_lock();
-		radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
+		radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter,
 					   start, SHMEM_TAG_PINNED) {
 
 			page = radix_tree_deref_slot(slot);
@@ -2713,10 +2713,10 @@ static int shmem_wait_for_pins(struct address_space *mapping)
 				error = -EBUSY;
 			}
 
-			spin_lock_irq(&mapping->tree_lock);
-			radix_tree_tag_clear(&mapping->page_tree,
+			xa_lock_irq(&mapping->i_pages);
+			radix_tree_tag_clear(&mapping->i_pages,
 					     iter.index, SHMEM_TAG_PINNED);
-			spin_unlock_irq(&mapping->tree_lock);
+			xa_unlock_irq(&mapping->i_pages);
 continue_resched:
 			if (need_resched()) {
 				slot = radix_tree_iter_resume(slot, &iter);
diff --git a/mm/slub.c b/mm/slub.c
index 4fb037c98782..44aa7847324a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1363,10 +1363,8 @@ static __always_inline void kfree_hook(void *x)
 	kasan_kfree_large(x, _RET_IP_);
 }
 
-static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x)
+static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
 {
-	void *freeptr;
-
 	kmemleak_free_recursive(x, s->flags);
 
 	/*
@@ -1386,17 +1384,12 @@ static __always_inline void *slab_free_hook(struct kmem_cache *s, void *x)
 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
 		debug_check_no_obj_freed(x, s->object_size);
 
-	freeptr = get_freepointer(s, x);
-	/*
-	 * kasan_slab_free() may put x into memory quarantine, delaying its
-	 * reuse. In this case the object's freelist pointer is changed.
-	 */
-	kasan_slab_free(s, x, _RET_IP_);
-	return freeptr;
+	/* KASAN might put x into memory quarantine, delaying its reuse */
+	return kasan_slab_free(s, x, _RET_IP_);
 }
 
-static inline void slab_free_freelist_hook(struct kmem_cache *s,
-					   void *head, void *tail)
+static inline bool slab_free_freelist_hook(struct kmem_cache *s,
+					   void **head, void **tail)
 {
 /*
  * Compiler cannot detect this function can be removed if slab_free_hook()
@@ -1407,13 +1400,33 @@ static inline void slab_free_freelist_hook(struct kmem_cache *s,
 	defined(CONFIG_DEBUG_OBJECTS_FREE) ||	\
 	defined(CONFIG_KASAN)
 
-	void *object = head;
-	void *tail_obj = tail ? : head;
-	void *freeptr;
+	void *object;
+	void *next = *head;
+	void *old_tail = *tail ? *tail : *head;
+
+	/* Head and tail of the reconstructed freelist */
+	*head = NULL;
+	*tail = NULL;
 
 	do {
-		freeptr = slab_free_hook(s, object);
-	} while ((object != tail_obj) && (object = freeptr));
+		object = next;
+		next = get_freepointer(s, object);
+		/* If object's reuse doesn't have to be delayed */
+		if (!slab_free_hook(s, object)) {
+			/* Move object to the new freelist */
+			set_freepointer(s, object, *head);
+			*head = object;
+			if (!*tail)
+				*tail = object;
+		}
+	} while (object != old_tail);
+
+	if (*head == *tail)
+		*tail = NULL;
+
+	return *head != NULL;
+#else
+	return true;
 #endif
 }
 
@@ -2968,14 +2981,12 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
 				      void *head, void *tail, int cnt,
 				      unsigned long addr)
 {
-	slab_free_freelist_hook(s, head, tail);
 	/*
-	 * slab_free_freelist_hook() could have put the items into quarantine.
-	 * If so, no need to free them.
+	 * With KASAN enabled slab_free_freelist_hook modifies the freelist
+	 * to remove objects, whose reuse must be delayed.
 	 */
-	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU))
-		return;
-	do_slab_free(s, page, head, tail, cnt, addr);
+	if (slab_free_freelist_hook(s, &head, &tail))
+		do_slab_free(s, page, head, tail, cnt, addr);
 }
 
 #ifdef CONFIG_KASAN
diff --git a/mm/swap_state.c b/mm/swap_state.c
index f233dccd3b1b..07f9aa2340c3 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -124,10 +124,10 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 	SetPageSwapCache(page);
 
 	address_space = swap_address_space(entry);
-	spin_lock_irq(&address_space->tree_lock);
+	xa_lock_irq(&address_space->i_pages);
 	for (i = 0; i < nr; i++) {
 		set_page_private(page + i, entry.val + i);
-		error = radix_tree_insert(&address_space->page_tree,
+		error = radix_tree_insert(&address_space->i_pages,
 					  idx + i, page + i);
 		if (unlikely(error))
 			break;
@@ -145,13 +145,13 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry)
 		VM_BUG_ON(error == -EEXIST);
 		set_page_private(page + i, 0UL);
 		while (i--) {
-			radix_tree_delete(&address_space->page_tree, idx + i);
+			radix_tree_delete(&address_space->i_pages, idx + i);
 			set_page_private(page + i, 0UL);
 		}
 		ClearPageSwapCache(page);
 		page_ref_sub(page, nr);
 	}
-	spin_unlock_irq(&address_space->tree_lock);
+	xa_unlock_irq(&address_space->i_pages);
 
 	return error;
 }
@@ -188,7 +188,7 @@ void __delete_from_swap_cache(struct page *page)
 	address_space = swap_address_space(entry);
 	idx = swp_offset(entry);
 	for (i = 0; i < nr; i++) {
-		radix_tree_delete(&address_space->page_tree, idx + i);
+		radix_tree_delete(&address_space->i_pages, idx + i);
 		set_page_private(page + i, 0);
 	}
 	ClearPageSwapCache(page);
@@ -272,9 +272,9 @@ void delete_from_swap_cache(struct page *page)
 	entry.val = page_private(page);
 
 	address_space = swap_address_space(entry);
-	spin_lock_irq(&address_space->tree_lock);
+	xa_lock_irq(&address_space->i_pages);
 	__delete_from_swap_cache(page);
-	spin_unlock_irq(&address_space->tree_lock);
+	xa_unlock_irq(&address_space->i_pages);
 
 	put_swap_page(page, entry);
 	page_ref_sub(page, hpage_nr_pages(page));
@@ -628,12 +628,11 @@ int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 		return -ENOMEM;
 	for (i = 0; i < nr; i++) {
 		space = spaces + i;
-		INIT_RADIX_TREE(&space->page_tree, GFP_ATOMIC|__GFP_NOWARN);
+		INIT_RADIX_TREE(&space->i_pages, GFP_ATOMIC|__GFP_NOWARN);
 		atomic_set(&space->i_mmap_writable, 0);
 		space->a_ops = &swap_aops;
 		/* swap cache doesn't use writeback related tags */
 		mapping_set_no_writeback_tags(space);
-		spin_lock_init(&space->tree_lock);
 	}
 	nr_swapper_spaces[type] = nr;
 	rcu_assign_pointer(swapper_spaces[type], spaces);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c7a33717d079..cc2cf04d9018 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -85,7 +85,7 @@ PLIST_HEAD(swap_active_head);
  * is held and the locking order requires swap_lock to be taken
  * before any swap_info_struct->lock.
  */
-struct plist_head *swap_avail_heads;
+static struct plist_head *swap_avail_heads;
 static DEFINE_SPINLOCK(swap_avail_lock);
 
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
@@ -2961,6 +2961,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 	maxpages = swp_offset(pte_to_swp_entry(
 			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
 	last_page = swap_header->info.last_page;
+	if (!last_page) {
+		pr_warn("Empty swap-file\n");
+		return 0;
+	}
 	if (last_page > maxpages) {
 		pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
 			maxpages << (PAGE_SHIFT - 10),
diff --git a/mm/truncate.c b/mm/truncate.c
index c34e2fd4f583..1d2fb2dca96f 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -36,11 +36,11 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
 	struct radix_tree_node *node;
 	void **slot;
 
-	if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
+	if (!__radix_tree_lookup(&mapping->i_pages, index, &node, &slot))
 		return;
 	if (*slot != entry)
 		return;
-	__radix_tree_replace(&mapping->page_tree, node, slot, NULL,
+	__radix_tree_replace(&mapping->i_pages, node, slot, NULL,
 			     workingset_update_node);
 	mapping->nrexceptional--;
 }
@@ -48,9 +48,9 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
 static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
 			       void *entry)
 {
-	spin_lock_irq(&mapping->tree_lock);
+	xa_lock_irq(&mapping->i_pages);
 	__clear_shadow_entry(mapping, index, entry);
-	spin_unlock_irq(&mapping->tree_lock);
+	xa_unlock_irq(&mapping->i_pages);
 }
 
 /*
@@ -79,7 +79,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 	dax = dax_mapping(mapping);
 	lock = !dax && indices[j] < end;
 	if (lock)
-		spin_lock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
 
 	for (i = j; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
@@ -102,7 +102,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 	}
 
 	if (lock)
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_unlock_irq(&mapping->i_pages);
 	pvec->nr = j;
 }
 
@@ -518,8 +518,8 @@ void truncate_inode_pages_final(struct address_space *mapping)
 		 * modification that does not see AS_EXITING is
 		 * completed before starting the final truncate.
 		 */
-		spin_lock_irq(&mapping->tree_lock);
-		spin_unlock_irq(&mapping->tree_lock);
+		xa_lock_irq(&mapping->i_pages);
+		xa_unlock_irq(&mapping->i_pages);
 
 		truncate_inode_pages(mapping, 0);
 	}
@@ -627,13 +627,13 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	if (PageDirty(page))
 		goto failed;
 
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL);
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -641,7 +641,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	put_page(page);	/* pagecache ref */
 	return 1;
 failed:
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 	return 0;
 }
 
diff --git a/mm/util.c b/mm/util.c
index 029fc2f3b395..1fc4fa7576f7 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -287,7 +287,7 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
 }
 
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
 	mm->get_unmapped_area = arch_get_unmapped_area;
@@ -668,6 +668,13 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		free += global_node_page_state(NR_SLAB_RECLAIMABLE);
 
 		/*
+		 * Part of the kernel memory, which can be released
+		 * under memory pressure.
+		 */
+		free += global_node_page_state(
+			NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
+
+		/*
 		 * Leave reserved pages. The pages are not for anonymous pages.
 		 */
 		if (free <= totalreserve_pages)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4390a8d5be41..8b920ce3ae02 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -116,6 +116,16 @@ struct scan_control {
 
 	/* Number of pages freed so far during a call to shrink_zones() */
 	unsigned long nr_reclaimed;
+
+	struct {
+		unsigned int dirty;
+		unsigned int unqueued_dirty;
+		unsigned int congested;
+		unsigned int writeback;
+		unsigned int immediate;
+		unsigned int file_taken;
+		unsigned int taken;
+	} nr;
 };
 
 #ifdef ARCH_HAS_PREFETCH
@@ -190,6 +200,29 @@ static bool sane_reclaim(struct scan_control *sc)
 #endif
 	return false;
 }
+
+static void set_memcg_congestion(pg_data_t *pgdat,
+				struct mem_cgroup *memcg,
+				bool congested)
+{
+	struct mem_cgroup_per_node *mn;
+
+	if (!memcg)
+		return;
+
+	mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+	WRITE_ONCE(mn->congested, congested);
+}
+
+static bool memcg_congested(pg_data_t *pgdat,
+			struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_per_node *mn;
+
+	mn = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
+	return READ_ONCE(mn->congested);
+
+}
 #else
 static bool global_reclaim(struct scan_control *sc)
 {
@@ -200,6 +233,18 @@ static bool sane_reclaim(struct scan_control *sc)
 {
 	return true;
 }
+
+static inline void set_memcg_congestion(struct pglist_data *pgdat,
+				struct mem_cgroup *memcg, bool congested)
+{
+}
+
+static inline bool memcg_congested(struct pglist_data *pgdat,
+			struct mem_cgroup *memcg)
+{
+	return false;
+
+}
 #endif
 
 /*
@@ -648,7 +693,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
-	spin_lock_irqsave(&mapping->tree_lock, flags);
+	xa_lock_irqsave(&mapping->i_pages, flags);
 	/*
 	 * The non racy check for a busy page.
 	 *
@@ -672,7 +717,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	 * load is not satisfied before that of page->_refcount.
 	 *
 	 * Note that if SetPageDirty is always performed via set_page_dirty,
-	 * and thus under tree_lock, then this ordering is not required.
+	 * and thus under the i_pages lock, then this ordering is not required.
 	 */
 	if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
 		refcount = 1 + HPAGE_PMD_NR;
@@ -690,7 +735,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		swp_entry_t swap = { .val = page_private(page) };
 		mem_cgroup_swapout(page, swap);
 		__delete_from_swap_cache(page);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		put_swap_page(page, swap);
 	} else {
 		void (*freepage)(struct page *);
@@ -711,13 +756,13 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		 * only page cache pages found in these are zero pages
 		 * covering holes, and because we don't want to mix DAX
 		 * exceptional entries and shadow exceptional entries in the
-		 * same page_tree.
+		 * same address_space.
 		 */
 		if (reclaimed && page_is_file_cache(page) &&
 		    !mapping_exiting(mapping) && !dax_mapping(mapping))
 			shadow = workingset_eviction(mapping, page);
 		__delete_from_page_cache(page, shadow);
-		spin_unlock_irqrestore(&mapping->tree_lock, flags);
+		xa_unlock_irqrestore(&mapping->i_pages, flags);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -726,7 +771,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	return 1;
 
 cannot_free:
-	spin_unlock_irqrestore(&mapping->tree_lock, flags);
+	xa_unlock_irqrestore(&mapping->i_pages, flags);
 	return 0;
 }
 
@@ -857,17 +902,6 @@ static void page_check_dirty_writeback(struct page *page,
 		mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
 }
 
-struct reclaim_stat {
-	unsigned nr_dirty;
-	unsigned nr_unqueued_dirty;
-	unsigned nr_congested;
-	unsigned nr_writeback;
-	unsigned nr_immediate;
-	unsigned nr_activate;
-	unsigned nr_ref_keep;
-	unsigned nr_unmap_fail;
-};
-
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -926,7 +960,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
 
 		/*
-		 * The number of dirty pages determines if a zone is marked
+		 * The number of dirty pages determines if a node is marked
 		 * reclaim_congested which affects wait_iff_congested. kswapd
 		 * will stall and start writing pages if the tail of the LRU
 		 * is all dirty unqueued pages.
@@ -1755,23 +1789,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	free_unref_page_list(&page_list);
 
 	/*
-	 * If reclaim is isolating dirty pages under writeback, it implies
-	 * that the long-lived page allocation rate is exceeding the page
-	 * laundering rate. Either the global limits are not being effective
-	 * at throttling processes due to the page distribution throughout
-	 * zones or there is heavy usage of a slow backing device. The
-	 * only option is to throttle from reclaim context which is not ideal
-	 * as there is no guarantee the dirtying process is throttled in the
-	 * same way balance_dirty_pages() manages.
-	 *
-	 * Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number
-	 * of pages under pages flagged for immediate reclaim and stall if any
-	 * are encountered in the nr_immediate check below.
-	 */
-	if (stat.nr_writeback && stat.nr_writeback == nr_taken)
-		set_bit(PGDAT_WRITEBACK, &pgdat->flags);
-
-	/*
 	 * If dirty pages are scanned that are not queued for IO, it
 	 * implies that flushers are not doing their job. This can
 	 * happen when memory pressure pushes dirty pages to the end of
@@ -1785,48 +1802,17 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	if (stat.nr_unqueued_dirty == nr_taken)
 		wakeup_flusher_threads(WB_REASON_VMSCAN);
 
-	/*
-	 * Legacy memcg will stall in page writeback so avoid forcibly
-	 * stalling here.
-	 */
-	if (sane_reclaim(sc)) {
-		/*
-		 * Tag a zone as congested if all the dirty pages scanned were
-		 * backed by a congested BDI and wait_iff_congested will stall.
-		 */
-		if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
-			set_bit(PGDAT_CONGESTED, &pgdat->flags);
-
-		/* Allow kswapd to start writing pages during reclaim. */
-		if (stat.nr_unqueued_dirty == nr_taken)
-			set_bit(PGDAT_DIRTY, &pgdat->flags);
-
-		/*
-		 * If kswapd scans pages marked marked for immediate
-		 * reclaim and under writeback (nr_immediate), it implies
-		 * that pages are cycling through the LRU faster than
-		 * they are written so also forcibly stall.
-		 */
-		if (stat.nr_immediate && current_may_throttle())
-			congestion_wait(BLK_RW_ASYNC, HZ/10);
-	}
-
-	/*
-	 * Stall direct reclaim for IO completions if underlying BDIs or zone
-	 * is congested. Allow kswapd to continue until it starts encountering
-	 * unqueued dirty pages or cycling through the LRU too quickly.
-	 */
-	if (!sc->hibernation_mode && !current_is_kswapd() &&
-	    current_may_throttle())
-		wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10);
+	sc->nr.dirty += stat.nr_dirty;
+	sc->nr.congested += stat.nr_congested;
+	sc->nr.unqueued_dirty += stat.nr_unqueued_dirty;
+	sc->nr.writeback += stat.nr_writeback;
+	sc->nr.immediate += stat.nr_immediate;
+	sc->nr.taken += nr_taken;
+	if (file)
+		sc->nr.file_taken += nr_taken;
 
 	trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
-			nr_scanned, nr_reclaimed,
-			stat.nr_dirty,  stat.nr_writeback,
-			stat.nr_congested, stat.nr_immediate,
-			stat.nr_activate, stat.nr_ref_keep,
-			stat.nr_unmap_fail,
-			sc->priority, file);
+			nr_scanned, nr_reclaimed, &stat, sc->priority, file);
 	return nr_reclaimed;
 }
 
@@ -2507,6 +2493,12 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 	return true;
 }
 
+static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
+{
+	return test_bit(PGDAT_CONGESTED, &pgdat->flags) ||
+		(memcg && memcg_congested(pgdat, memcg));
+}
+
 static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 {
 	struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2522,6 +2514,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 		unsigned long node_lru_pages = 0;
 		struct mem_cgroup *memcg;
 
+		memset(&sc->nr, 0, sizeof(sc->nr));
+
 		nr_reclaimed = sc->nr_reclaimed;
 		nr_scanned = sc->nr_scanned;
 
@@ -2536,7 +2530,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 					sc->memcg_low_skipped = 1;
 					continue;
 				}
-				mem_cgroup_event(memcg, MEMCG_LOW);
+				memcg_memory_event(memcg, MEMCG_LOW);
 			}
 
 			reclaimed = sc->nr_reclaimed;
@@ -2587,6 +2581,67 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 		if (sc->nr_reclaimed - nr_reclaimed)
 			reclaimable = true;
 
+		if (current_is_kswapd()) {
+			/*
+			 * If reclaim is isolating dirty pages under writeback,
+			 * it implies that the long-lived page allocation rate
+			 * is exceeding the page laundering rate. Either the
+			 * global limits are not being effective at throttling
+			 * processes due to the page distribution throughout
+			 * zones or there is heavy usage of a slow backing
+			 * device. The only option is to throttle from reclaim
+			 * context which is not ideal as there is no guarantee
+			 * the dirtying process is throttled in the same way
+			 * balance_dirty_pages() manages.
+			 *
+			 * Once a node is flagged PGDAT_WRITEBACK, kswapd will
+			 * count the number of pages under pages flagged for
+			 * immediate reclaim and stall if any are encountered
+			 * in the nr_immediate check below.
+			 */
+			if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
+				set_bit(PGDAT_WRITEBACK, &pgdat->flags);
+
+			/*
+			 * Tag a node as congested if all the dirty pages
+			 * scanned were backed by a congested BDI and
+			 * wait_iff_congested will stall.
+			 */
+			if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+				set_bit(PGDAT_CONGESTED, &pgdat->flags);
+
+			/* Allow kswapd to start writing pages during reclaim.*/
+			if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+				set_bit(PGDAT_DIRTY, &pgdat->flags);
+
+			/*
+			 * If kswapd scans pages marked marked for immediate
+			 * reclaim and under writeback (nr_immediate), it
+			 * implies that pages are cycling through the LRU
+			 * faster than they are written so also forcibly stall.
+			 */
+			if (sc->nr.immediate)
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
+		}
+
+		/*
+		 * Legacy memcg will stall in page writeback so avoid forcibly
+		 * stalling in wait_iff_congested().
+		 */
+		if (!global_reclaim(sc) && sane_reclaim(sc) &&
+		    sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+			set_memcg_congestion(pgdat, root, true);
+
+		/*
+		 * Stall direct reclaim for IO completions if underlying BDIs
+		 * and node is congested. Allow kswapd to continue until it
+		 * starts encountering unqueued dirty pages or cycling through
+		 * the LRU too quickly.
+		 */
+		if (!sc->hibernation_mode && !current_is_kswapd() &&
+		   current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+			wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+
 	} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
 					 sc->nr_scanned - nr_scanned, sc));
 
@@ -2802,6 +2857,7 @@ retry:
 			continue;
 		last_pgdat = zone->zone_pgdat;
 		snapshot_refaults(sc->target_mem_cgroup, zone->zone_pgdat);
+		set_memcg_congestion(last_pgdat, sc->target_mem_cgroup, false);
 	}
 
 	delayacct_freepages_end();
@@ -3808,7 +3864,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 
 	if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
 		/*
-		 * Free memory by calling shrink zone with increasing
+		 * Free memory by calling shrink node with increasing
 		 * priorities until we have enough memory freed.
 		 */
 		do {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 33581be705f0..536332e988b8 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1161,6 +1161,7 @@ const char * const vmstat_text[] = {
 	"nr_vmscan_immediate_reclaim",
 	"nr_dirtied",
 	"nr_written",
+	"nr_indirectly_reclaimable",
 
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
diff --git a/mm/workingset.c b/mm/workingset.c
index b7d616a3bbbe..40ee02c83978 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -202,7 +202,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
  * @mapping: address space the page was backing
  * @page: the page being evicted
  *
- * Returns a shadow entry to be stored in @mapping->page_tree in place
+ * Returns a shadow entry to be stored in @mapping->i_pages in place
  * of the evicted @page so that a later refault can be detected.
  */
 void *workingset_eviction(struct address_space *mapping, struct page *page)
@@ -348,7 +348,7 @@ void workingset_update_node(struct radix_tree_node *node)
 	 *
 	 * Avoid acquiring the list_lru lock when the nodes are
 	 * already where they should be. The list_empty() test is safe
-	 * as node->private_list is protected by &mapping->tree_lock.
+	 * as node->private_list is protected by the i_pages lock.
 	 */
 	if (node->count && node->count == node->exceptional) {
 		if (list_empty(&node->private_list))
@@ -366,7 +366,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	unsigned long nodes;
 	unsigned long cache;
 
-	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
+	/* list_lru lock nests inside the IRQ-safe i_pages lock */
 	local_irq_disable();
 	nodes = list_lru_shrink_count(&shadow_nodes, sc);
 	local_irq_enable();
@@ -419,21 +419,21 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 
 	/*
 	 * Page cache insertions and deletions synchroneously maintain
-	 * the shadow node LRU under the mapping->tree_lock and the
+	 * the shadow node LRU under the i_pages lock and the
 	 * lru_lock.  Because the page cache tree is emptied before
 	 * the inode can be destroyed, holding the lru_lock pins any
 	 * address_space that has radix tree nodes on the LRU.
 	 *
-	 * We can then safely transition to the mapping->tree_lock to
+	 * We can then safely transition to the i_pages lock to
 	 * pin only the address_space of the particular node we want
 	 * to reclaim, take the node off-LRU, and drop the lru_lock.
 	 */
 
 	node = container_of(item, struct radix_tree_node, private_list);
-	mapping = container_of(node->root, struct address_space, page_tree);
+	mapping = container_of(node->root, struct address_space, i_pages);
 
 	/* Coming from the list, invert the lock order */
-	if (!spin_trylock(&mapping->tree_lock)) {
+	if (!xa_trylock(&mapping->i_pages)) {
 		spin_unlock(lru_lock);
 		ret = LRU_RETRY;
 		goto out;
@@ -468,11 +468,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	if (WARN_ON_ONCE(node->exceptional))
 		goto out_invalid;
 	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
-	__radix_tree_delete_node(&mapping->page_tree, node,
+	__radix_tree_delete_node(&mapping->i_pages, node,
 				 workingset_lookup_update(mapping));
 
 out_invalid:
-	spin_unlock(&mapping->tree_lock);
+	xa_unlock(&mapping->i_pages);
 	ret = LRU_REMOVED_RETRY;
 out:
 	local_irq_enable();
@@ -487,7 +487,7 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
 {
 	unsigned long ret;
 
-	/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
+	/* list_lru lock nests inside the IRQ-safe i_pages lock */
 	local_irq_disable();
 	ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL);
 	local_irq_enable();
@@ -503,7 +503,7 @@ static struct shrinker workingset_shadow_shrinker = {
 
 /*
  * Our list_lru->lock is IRQ-safe as it nests inside the IRQ-safe
- * mapping->tree_lock.
+ * i_pages lock.
  */
 static struct lock_class_key shadow_nodes_key;
 
diff --git a/mm/z3fold.c b/mm/z3fold.c
index f579ad4a8100..c0bca6153b95 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -467,6 +467,8 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 	spin_lock_init(&pool->lock);
 	spin_lock_init(&pool->stale_lock);
 	pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
+	if (!pool->unbuddied)
+		goto out_pool;
 	for_each_possible_cpu(cpu) {
 		struct list_head *unbuddied =
 				per_cpu_ptr(pool->unbuddied, cpu);
@@ -479,7 +481,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 	pool->name = name;
 	pool->compact_wq = create_singlethread_workqueue(pool->name);
 	if (!pool->compact_wq)
-		goto out;
+		goto out_unbuddied;
 	pool->release_wq = create_singlethread_workqueue(pool->name);
 	if (!pool->release_wq)
 		goto out_wq;
@@ -489,8 +491,11 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 
 out_wq:
 	destroy_workqueue(pool->compact_wq);
-out:
+out_unbuddied:
+	free_percpu(pool->unbuddied);
+out_pool:
 	kfree(pool);
+out:
 	return NULL;
 }
 
@@ -533,7 +538,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 	struct z3fold_header *zhdr = NULL;
 	struct page *page = NULL;
 	enum buddy bud;
-	bool can_sleep = (gfp & __GFP_RECLAIM) == __GFP_RECLAIM;
+	bool can_sleep = gfpflags_allow_blocking(gfp);
 
 	if (!size || (gfp & __GFP_HIGHMEM))
 		return -EINVAL;
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index b4bded4b5396..12bf49772d24 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
 	mon_client.o \
 	cls_lock_client.o \
 	osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+	striper.o \
 	debugfs.o \
 	auth.o auth_none.o \
 	crypto.o armor.o \
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4adf07826f4a..584fdbef2088 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,7 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
 	case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
 	case CEPH_MSG_MDS_MAP: return "mds_map";
+	case CEPH_MSG_FS_MAP_USER: return "fs_map_user";
 	case CEPH_MSG_CLIENT_SESSION: return "client_session";
 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
 	case CEPH_MSG_CLIENT_REQUEST: return "client_request";
@@ -79,8 +80,13 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_CLIENT_REPLY: return "client_reply";
 	case CEPH_MSG_CLIENT_CAPS: return "client_caps";
 	case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
+	case CEPH_MSG_CLIENT_QUOTA: return "client_quota";
 	case CEPH_MSG_CLIENT_SNAP: return "client_snap";
 	case CEPH_MSG_CLIENT_LEASE: return "client_lease";
+	case CEPH_MSG_POOLOP_REPLY: return "poolop_reply";
+	case CEPH_MSG_POOLOP: return "poolop";
+	case CEPH_MSG_MON_COMMAND: return "mon_command";
+	case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack";
 	case CEPH_MSG_OSD_MAP: return "osd_map";
 	case CEPH_MSG_OSD_OP: return "osd_op";
 	case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
@@ -217,7 +223,7 @@ static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 
 	if (i == 16)
 		err = 0;
-	dout("parse_fsid ret %d got fsid %pU", err, fsid);
+	dout("parse_fsid ret %d got fsid %pU\n", err, fsid);
 	return err;
 }
 
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index bf9d079cbafd..02172c408ff2 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -347,10 +347,12 @@ struct key_type key_type_ceph = {
 	.destroy	= ceph_key_destroy,
 };
 
-int ceph_crypto_init(void) {
+int __init ceph_crypto_init(void)
+{
 	return register_key_type(&key_type_ceph);
 }
 
-void ceph_crypto_shutdown(void) {
+void ceph_crypto_shutdown(void)
+{
 	unregister_key_type(&key_type_ceph);
 }
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 1eef6806aa1a..02952605d121 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -389,7 +389,7 @@ CEPH_DEFINE_SHOW_FUNC(monc_show)
 CEPH_DEFINE_SHOW_FUNC(osdc_show)
 CEPH_DEFINE_SHOW_FUNC(client_options_show)
 
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
 {
 	ceph_debugfs_dir = debugfs_create_dir("ceph", NULL);
 	if (!ceph_debugfs_dir)
@@ -418,7 +418,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->monc.debugfs_file = debugfs_create_file("monc",
-						      0600,
+						      0400,
 						      client->debugfs_dir,
 						      client,
 						      &monc_show_fops);
@@ -426,7 +426,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->osdc.debugfs_file = debugfs_create_file("osdc",
-						      0600,
+						      0400,
 						      client->debugfs_dir,
 						      client,
 						      &osdc_show_fops);
@@ -434,7 +434,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->debugfs_monmap = debugfs_create_file("monmap",
-					0600,
+					0400,
 					client->debugfs_dir,
 					client,
 					&monmap_show_fops);
@@ -442,7 +442,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->debugfs_osdmap = debugfs_create_file("osdmap",
-					0600,
+					0400,
 					client->debugfs_dir,
 					client,
 					&osdmap_show_fops);
@@ -450,7 +450,7 @@ int ceph_debugfs_client_init(struct ceph_client *client)
 		goto out;
 
 	client->debugfs_options = debugfs_create_file("client_options",
-					0600,
+					0400,
 					client->debugfs_dir,
 					client,
 					&client_options_show_fops);
@@ -477,7 +477,7 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
 
 #else  /* CONFIG_DEBUG_FS */
 
-int ceph_debugfs_init(void)
+int __init ceph_debugfs_init(void)
 {
 	return 0;
 }
@@ -496,6 +496,3 @@ void ceph_debugfs_client_cleanup(struct ceph_client *client)
 }
 
 #endif  /* CONFIG_DEBUG_FS */
-
-EXPORT_SYMBOL(ceph_debugfs_init);
-EXPORT_SYMBOL(ceph_debugfs_cleanup);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8a4d3758030b..fcb40c12b1f8 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -277,7 +277,7 @@ static void _ceph_msgr_exit(void)
 	ceph_msgr_slab_exit();
 }
 
-int ceph_msgr_init(void)
+int __init ceph_msgr_init(void)
 {
 	if (ceph_msgr_slab_init())
 		return -ENOMEM;
@@ -299,7 +299,6 @@ int ceph_msgr_init(void)
 
 	return -ENOMEM;
 }
-EXPORT_SYMBOL(ceph_msgr_init);
 
 void ceph_msgr_exit(void)
 {
@@ -307,7 +306,6 @@ void ceph_msgr_exit(void)
 
 	_ceph_msgr_exit();
 }
-EXPORT_SYMBOL(ceph_msgr_exit);
 
 void ceph_msgr_flush(void)
 {
@@ -839,93 +837,112 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
 					size_t length)
 {
 	struct ceph_msg_data *data = cursor->data;
-	struct bio *bio;
+	struct ceph_bio_iter *it = &cursor->bio_iter;
 
-	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
+	cursor->resid = min_t(size_t, length, data->bio_length);
+	*it = data->bio_pos;
+	if (cursor->resid < it->iter.bi_size)
+		it->iter.bi_size = cursor->resid;
 
-	bio = data->bio;
-	BUG_ON(!bio);
-
-	cursor->resid = min(length, data->bio_length);
-	cursor->bio = bio;
-	cursor->bvec_iter = bio->bi_iter;
-	cursor->last_piece =
-		cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
+	BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+	cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
 }
 
 static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
 						size_t *page_offset,
 						size_t *length)
 {
-	struct ceph_msg_data *data = cursor->data;
-	struct bio *bio;
-	struct bio_vec bio_vec;
-
-	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
-
-	bio = cursor->bio;
-	BUG_ON(!bio);
-
-	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
-
-	*page_offset = (size_t) bio_vec.bv_offset;
-	BUG_ON(*page_offset >= PAGE_SIZE);
-	if (cursor->last_piece) /* pagelist offset is always 0 */
-		*length = cursor->resid;
-	else
-		*length = (size_t) bio_vec.bv_len;
-	BUG_ON(*length > cursor->resid);
-	BUG_ON(*page_offset + *length > PAGE_SIZE);
+	struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
+					   cursor->bio_iter.iter);
 
-	return bio_vec.bv_page;
+	*page_offset = bv.bv_offset;
+	*length = bv.bv_len;
+	return bv.bv_page;
 }
 
 static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
 					size_t bytes)
 {
-	struct bio *bio;
-	struct bio_vec bio_vec;
+	struct ceph_bio_iter *it = &cursor->bio_iter;
 
-	BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
+	BUG_ON(bytes > cursor->resid);
+	BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
+	cursor->resid -= bytes;
+	bio_advance_iter(it->bio, &it->iter, bytes);
 
-	bio = cursor->bio;
-	BUG_ON(!bio);
+	if (!cursor->resid) {
+		BUG_ON(!cursor->last_piece);
+		return false;   /* no more data */
+	}
 
-	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
+	if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
+		return false;	/* more bytes to process in this segment */
 
-	/* Advance the cursor offset */
+	if (!it->iter.bi_size) {
+		it->bio = it->bio->bi_next;
+		it->iter = it->bio->bi_iter;
+		if (cursor->resid < it->iter.bi_size)
+			it->iter.bi_size = cursor->resid;
+	}
 
-	BUG_ON(cursor->resid < bytes);
-	cursor->resid -= bytes;
+	BUG_ON(cursor->last_piece);
+	BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
+	cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
+	return true;
+}
+#endif /* CONFIG_BLOCK */
 
-	bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+static void ceph_msg_data_bvecs_cursor_init(struct ceph_msg_data_cursor *cursor,
+					size_t length)
+{
+	struct ceph_msg_data *data = cursor->data;
+	struct bio_vec *bvecs = data->bvec_pos.bvecs;
 
-	if (bytes < bio_vec.bv_len)
-		return false;	/* more bytes to process in this segment */
+	cursor->resid = min_t(size_t, length, data->bvec_pos.iter.bi_size);
+	cursor->bvec_iter = data->bvec_pos.iter;
+	cursor->bvec_iter.bi_size = cursor->resid;
 
-	/* Move on to the next segment, and possibly the next bio */
+	BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+	cursor->last_piece =
+	    cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
+}
 
-	if (!cursor->bvec_iter.bi_size) {
-		bio = bio->bi_next;
-		cursor->bio = bio;
-		if (bio)
-			cursor->bvec_iter = bio->bi_iter;
-		else
-			memset(&cursor->bvec_iter, 0,
-			       sizeof(cursor->bvec_iter));
-	}
+static struct page *ceph_msg_data_bvecs_next(struct ceph_msg_data_cursor *cursor,
+						size_t *page_offset,
+						size_t *length)
+{
+	struct bio_vec bv = bvec_iter_bvec(cursor->data->bvec_pos.bvecs,
+					   cursor->bvec_iter);
+
+	*page_offset = bv.bv_offset;
+	*length = bv.bv_len;
+	return bv.bv_page;
+}
+
+static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor,
+					size_t bytes)
+{
+	struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs;
+
+	BUG_ON(bytes > cursor->resid);
+	BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter));
+	cursor->resid -= bytes;
+	bvec_iter_advance(bvecs, &cursor->bvec_iter, bytes);
 
-	if (!cursor->last_piece) {
-		BUG_ON(!cursor->resid);
-		BUG_ON(!bio);
-		/* A short read is OK, so use <= rather than == */
-		if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
-			cursor->last_piece = true;
+	if (!cursor->resid) {
+		BUG_ON(!cursor->last_piece);
+		return false;   /* no more data */
 	}
 
+	if (!bytes || cursor->bvec_iter.bi_bvec_done)
+		return false;	/* more bytes to process in this segment */
+
+	BUG_ON(cursor->last_piece);
+	BUG_ON(cursor->resid < bvec_iter_len(bvecs, cursor->bvec_iter));
+	cursor->last_piece =
+	    cursor->resid == bvec_iter_len(bvecs, cursor->bvec_iter);
 	return true;
 }
-#endif /* CONFIG_BLOCK */
 
 /*
  * For a page array, a piece comes from the first page in the array
@@ -1110,6 +1127,9 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
 		ceph_msg_data_bio_cursor_init(cursor, length);
 		break;
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
+		ceph_msg_data_bvecs_cursor_init(cursor, length);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		/* BUG(); */
@@ -1158,14 +1178,19 @@ static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
 		page = ceph_msg_data_bio_next(cursor, page_offset, length);
 		break;
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
+		page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		page = NULL;
 		break;
 	}
+
 	BUG_ON(!page);
 	BUG_ON(*page_offset + *length > PAGE_SIZE);
 	BUG_ON(!*length);
+	BUG_ON(*length > cursor->resid);
 	if (last_piece)
 		*last_piece = cursor->last_piece;
 
@@ -1194,6 +1219,9 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 		new_piece = ceph_msg_data_bio_advance(cursor, bytes);
 		break;
 #endif /* CONFIG_BLOCK */
+	case CEPH_MSG_DATA_BVECS:
+		new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
+		break;
 	case CEPH_MSG_DATA_NONE:
 	default:
 		BUG();
@@ -1575,13 +1603,18 @@ static int write_partial_message_data(struct ceph_connection *con)
 	 * been revoked, so use the zero page.
 	 */
 	crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
-	while (cursor->resid) {
+	while (cursor->total_resid) {
 		struct page *page;
 		size_t page_offset;
 		size_t length;
 		bool last_piece;
 		int ret;
 
+		if (!cursor->resid) {
+			ceph_msg_data_advance(cursor, 0);
+			continue;
+		}
+
 		page = ceph_msg_data_next(cursor, &page_offset, &length,
 					  &last_piece);
 		ret = ceph_tcp_sendpage(con->sock, page, page_offset,
@@ -2297,7 +2330,12 @@ static int read_partial_msg_data(struct ceph_connection *con)
 
 	if (do_datacrc)
 		crc = con->in_data_crc;
-	while (cursor->resid) {
+	while (cursor->total_resid) {
+		if (!cursor->resid) {
+			ceph_msg_data_advance(cursor, 0);
+			continue;
+		}
+
 		page = ceph_msg_data_next(cursor, &page_offset, &length, NULL);
 		ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
 		if (ret <= 0) {
@@ -3262,16 +3300,14 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
 
 #ifdef	CONFIG_BLOCK
-void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
-		size_t length)
+void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
+			   u32 length)
 {
 	struct ceph_msg_data *data;
 
-	BUG_ON(!bio);
-
 	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
 	BUG_ON(!data);
-	data->bio = bio;
+	data->bio_pos = *bio_pos;
 	data->bio_length = length;
 
 	list_add_tail(&data->links, &msg->data);
@@ -3280,6 +3316,20 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
 EXPORT_SYMBOL(ceph_msg_data_add_bio);
 #endif	/* CONFIG_BLOCK */
 
+void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
+			     struct ceph_bvec_iter *bvec_pos)
+{
+	struct ceph_msg_data *data;
+
+	data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
+	BUG_ON(!data);
+	data->bvec_pos = *bvec_pos;
+
+	list_add_tail(&data->links, &msg->data);
+	msg->data_length += bvec_pos->iter.bi_size;
+}
+EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+
 /*
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 1547107f4854..b3dac24412d3 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -60,7 +60,7 @@ struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
 	num_mon = ceph_decode_32(&p);
 	ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad);
 
-	if (num_mon >= CEPH_MAX_MON)
+	if (num_mon > CEPH_MAX_MON)
 		goto bad;
 	m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS);
 	if (m == NULL)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 2814dba5902d..ea2a6c9fb7ce 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -20,6 +20,7 @@
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
 #include <linux/ceph/pagelist.h>
+#include <linux/ceph/striper.h>
 
 #define OSD_OPREPLY_FRONT_LEN	512
 
@@ -103,13 +104,12 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
 			u64 *objnum, u64 *objoff, u64 *objlen)
 {
 	u64 orig_len = *plen;
-	int r;
+	u32 xlen;
 
 	/* object extent? */
-	r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
-					  objoff, objlen);
-	if (r < 0)
-		return r;
+	ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
+					  objoff, &xlen);
+	*objlen = xlen;
 	if (*objlen < orig_len) {
 		*plen = *objlen;
 		dout(" skipping last %llu, final file extent %llu~%llu\n",
@@ -117,7 +117,6 @@ static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
 	}
 
 	dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
-
 	return 0;
 }
 
@@ -148,14 +147,22 @@ static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
 
 #ifdef CONFIG_BLOCK
 static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
-			struct bio *bio, size_t bio_length)
+				   struct ceph_bio_iter *bio_pos,
+				   u32 bio_length)
 {
 	osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
-	osd_data->bio = bio;
+	osd_data->bio_pos = *bio_pos;
 	osd_data->bio_length = bio_length;
 }
 #endif /* CONFIG_BLOCK */
 
+static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data,
+				     struct ceph_bvec_iter *bvec_pos)
+{
+	osd_data->type = CEPH_OSD_DATA_TYPE_BVECS;
+	osd_data->bvec_pos = *bvec_pos;
+}
+
 #define osd_req_op_data(oreq, whch, typ, fld)				\
 ({									\
 	struct ceph_osd_request *__oreq = (oreq);			\
@@ -218,16 +225,29 @@ EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
 
 #ifdef CONFIG_BLOCK
 void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
-			unsigned int which, struct bio *bio, size_t bio_length)
+				    unsigned int which,
+				    struct ceph_bio_iter *bio_pos,
+				    u32 bio_length)
 {
 	struct ceph_osd_data *osd_data;
 
 	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
-	ceph_osd_data_bio_init(osd_data, bio, bio_length);
+	ceph_osd_data_bio_init(osd_data, bio_pos, bio_length);
 }
 EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
 #endif /* CONFIG_BLOCK */
 
+void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
+					 unsigned int which,
+					 struct ceph_bvec_iter *bvec_pos)
+{
+	struct ceph_osd_data *osd_data;
+
+	osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+	ceph_osd_data_bvecs_init(osd_data, bvec_pos);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+
 static void osd_req_op_cls_request_info_pagelist(
 			struct ceph_osd_request *osd_req,
 			unsigned int which, struct ceph_pagelist *pagelist)
@@ -265,6 +285,23 @@ void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
 }
 EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
 
+void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
+				       unsigned int which,
+				       struct bio_vec *bvecs, u32 bytes)
+{
+	struct ceph_osd_data *osd_data;
+	struct ceph_bvec_iter it = {
+		.bvecs = bvecs,
+		.iter = { .bi_size = bytes },
+	};
+
+	osd_data = osd_req_op_data(osd_req, which, cls, request_data);
+	ceph_osd_data_bvecs_init(osd_data, &it);
+	osd_req->r_ops[which].cls.indata_len += bytes;
+	osd_req->r_ops[which].indata_len += bytes;
+}
+EXPORT_SYMBOL(osd_req_op_cls_request_data_bvecs);
+
 void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
 			unsigned int which, struct page **pages, u64 length,
 			u32 alignment, bool pages_from_pool, bool own_pages)
@@ -290,6 +327,8 @@ static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
 	case CEPH_OSD_DATA_TYPE_BIO:
 		return (u64)osd_data->bio_length;
 #endif /* CONFIG_BLOCK */
+	case CEPH_OSD_DATA_TYPE_BVECS:
+		return osd_data->bvec_pos.iter.bi_size;
 	default:
 		WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
 		return 0;
@@ -828,8 +867,10 @@ static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
 		ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
 #ifdef CONFIG_BLOCK
 	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
-		ceph_msg_data_add_bio(msg, osd_data->bio, length);
+		ceph_msg_data_add_bio(msg, &osd_data->bio_pos, length);
 #endif
+	} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
+		ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
 	} else {
 		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
 	}
@@ -5065,7 +5106,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
-int ceph_osdc_setup(void)
+int __init ceph_osdc_setup(void)
 {
 	size_t size = sizeof(struct ceph_osd_request) +
 	    CEPH_OSD_SLAB_OPS * sizeof(struct ceph_osd_req_op);
@@ -5076,7 +5117,6 @@ int ceph_osdc_setup(void)
 
 	return ceph_osd_request_cache ? 0 : -ENOMEM;
 }
-EXPORT_SYMBOL(ceph_osdc_setup);
 
 void ceph_osdc_cleanup(void)
 {
@@ -5084,7 +5124,6 @@ void ceph_osdc_cleanup(void)
 	kmem_cache_destroy(ceph_osd_request_cache);
 	ceph_osd_request_cache = NULL;
 }
-EXPORT_SYMBOL(ceph_osdc_cleanup);
 
 /*
  * handle incoming message
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 0da27c66349a..9645ffd6acfb 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -4,7 +4,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/div64.h>
 
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/osdmap.h>
@@ -2141,76 +2140,6 @@ bool ceph_osds_changed(const struct ceph_osds *old_acting,
 }
 
 /*
- * calculate file layout from given offset, length.
- * fill in correct oid, logical length, and object extent
- * offset, length.
- *
- * for now, we write only a single su, until we can
- * pass a stride back to the caller.
- */
-int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
-				   u64 off, u64 len,
-				   u64 *ono,
-				   u64 *oxoff, u64 *oxlen)
-{
-	u32 osize = layout->object_size;
-	u32 su = layout->stripe_unit;
-	u32 sc = layout->stripe_count;
-	u32 bl, stripeno, stripepos, objsetno;
-	u32 su_per_object;
-	u64 t, su_offset;
-
-	dout("mapping %llu~%llu  osize %u fl_su %u\n", off, len,
-	     osize, su);
-	if (su == 0 || sc == 0)
-		goto invalid;
-	su_per_object = osize / su;
-	if (su_per_object == 0)
-		goto invalid;
-	dout("osize %u / su %u = su_per_object %u\n", osize, su,
-	     su_per_object);
-
-	if ((su & ~PAGE_MASK) != 0)
-		goto invalid;
-
-	/* bl = *off / su; */
-	t = off;
-	do_div(t, su);
-	bl = t;
-	dout("off %llu / su %u = bl %u\n", off, su, bl);
-
-	stripeno = bl / sc;
-	stripepos = bl % sc;
-	objsetno = stripeno / su_per_object;
-
-	*ono = objsetno * sc + stripepos;
-	dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned int)*ono);
-
-	/* *oxoff = *off % layout->fl_stripe_unit;  # offset in su */
-	t = off;
-	su_offset = do_div(t, su);
-	*oxoff = su_offset + (stripeno % su_per_object) * su;
-
-	/*
-	 * Calculate the length of the extent being written to the selected
-	 * object. This is the minimum of the full length requested (len) or
-	 * the remainder of the current stripe being written to.
-	 */
-	*oxlen = min_t(u64, len, su - su_offset);
-
-	dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
-	return 0;
-
-invalid:
-	dout(" invalid layout\n");
-	*ono = 0;
-	*oxoff = 0;
-	*oxlen = 0;
-	return -EINVAL;
-}
-EXPORT_SYMBOL(ceph_calc_file_object_mapping);
-
-/*
  * Map an object into a PG.
  *
  * Should only be called with target_oid and target_oloc (as opposed to
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
new file mode 100644
index 000000000000..c36462dc86b7
--- /dev/null
+++ b/net/ceph/striper.c
@@ -0,0 +1,261 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/math64.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/striper.h>
+#include <linux/ceph/types.h>
+
+/*
+ * Map a file extent to a stripe unit within an object.
+ * Fill in objno, offset into object, and object extent length (i.e. the
+ * number of bytes mapped, less than or equal to @l->stripe_unit).
+ *
+ * Example for stripe_count = 3, stripes_per_object = 4:
+ *
+ * blockno   |  0  3  6  9 |  1  4  7 10 |  2  5  8 11 | 12 15 18 21 | 13 16 19
+ * stripeno  |  0  1  2  3 |  0  1  2  3 |  0  1  2  3 |  4  5  6  7 |  4  5  6
+ * stripepos |      0      |      1      |      2      |      0      |      1
+ * objno     |      0      |      1      |      2      |      3      |      4
+ * objsetno  |                    0                    |                    1
+ */
+void ceph_calc_file_object_mapping(struct ceph_file_layout *l,
+				   u64 off, u64 len,
+				   u64 *objno, u64 *objoff, u32 *xlen)
+{
+	u32 stripes_per_object = l->object_size / l->stripe_unit;
+	u64 blockno;	/* which su in the file (i.e. globally) */
+	u32 blockoff;	/* offset into su */
+	u64 stripeno;	/* which stripe */
+	u32 stripepos;	/* which su in the stripe,
+			   which object in the object set */
+	u64 objsetno;	/* which object set */
+	u32 objsetpos;	/* which stripe in the object set */
+
+	blockno = div_u64_rem(off, l->stripe_unit, &blockoff);
+	stripeno = div_u64_rem(blockno, l->stripe_count, &stripepos);
+	objsetno = div_u64_rem(stripeno, stripes_per_object, &objsetpos);
+
+	*objno = objsetno * l->stripe_count + stripepos;
+	*objoff = objsetpos * l->stripe_unit + blockoff;
+	*xlen = min_t(u64, len, l->stripe_unit - blockoff);
+}
+EXPORT_SYMBOL(ceph_calc_file_object_mapping);
+
+/*
+ * Return the last extent with given objno (@object_extents is sorted
+ * by objno).  If not found, return NULL and set @add_pos so that the
+ * new extent can be added with list_add(add_pos, new_ex).
+ */
+static struct ceph_object_extent *
+lookup_last(struct list_head *object_extents, u64 objno,
+	    struct list_head **add_pos)
+{
+	struct list_head *pos;
+
+	list_for_each_prev(pos, object_extents) {
+		struct ceph_object_extent *ex =
+		    list_entry(pos, typeof(*ex), oe_item);
+
+		if (ex->oe_objno == objno)
+			return ex;
+
+		if (ex->oe_objno < objno)
+			break;
+	}
+
+	*add_pos = pos;
+	return NULL;
+}
+
+static struct ceph_object_extent *
+lookup_containing(struct list_head *object_extents, u64 objno,
+		  u64 objoff, u32 xlen)
+{
+	struct ceph_object_extent *ex;
+
+	list_for_each_entry(ex, object_extents, oe_item) {
+		if (ex->oe_objno == objno &&
+		    ex->oe_off <= objoff &&
+		    ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
+			return ex;
+
+		if (ex->oe_objno > objno)
+			break;
+	}
+
+	return NULL;
+}
+
+/*
+ * Map a file extent to a sorted list of object extents.
+ *
+ * We want only one (or as few as possible) object extents per object.
+ * Adjacent object extents will be merged together, each returned object
+ * extent may reverse map to multiple different file extents.
+ *
+ * Call @alloc_fn for each new object extent and @action_fn for each
+ * mapped stripe unit, whether it was merged into an already allocated
+ * object extent or started a new object extent.
+ *
+ * Newly allocated object extents are added to @object_extents.
+ * To keep @object_extents sorted, successive calls to this function
+ * must map successive file extents (i.e. the list of file extents that
+ * are mapped using the same @object_extents must be sorted).
+ *
+ * The caller is responsible for @object_extents.
+ */
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 struct ceph_object_extent *alloc_fn(void *arg),
+			 void *alloc_arg,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg)
+{
+	struct ceph_object_extent *last_ex, *ex;
+
+	while (len) {
+		struct list_head *add_pos = NULL;
+		u64 objno, objoff;
+		u32 xlen;
+
+		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+					      &xlen);
+
+		last_ex = lookup_last(object_extents, objno, &add_pos);
+		if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
+			ex = alloc_fn(alloc_arg);
+			if (!ex)
+				return -ENOMEM;
+
+			ex->oe_objno = objno;
+			ex->oe_off = objoff;
+			ex->oe_len = xlen;
+			if (action_fn)
+				action_fn(ex, xlen, action_arg);
+
+			if (!last_ex)
+				list_add(&ex->oe_item, add_pos);
+			else
+				list_add(&ex->oe_item, &last_ex->oe_item);
+		} else {
+			last_ex->oe_len += xlen;
+			if (action_fn)
+				action_fn(last_ex, xlen, action_arg);
+		}
+
+		off += xlen;
+		len -= xlen;
+	}
+
+	for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
+	     ex = list_next_entry(last_ex, oe_item);
+	     &ex->oe_item != object_extents;
+	     last_ex = ex, ex = list_next_entry(ex, oe_item)) {
+		if (last_ex->oe_objno > ex->oe_objno ||
+		    (last_ex->oe_objno == ex->oe_objno &&
+		     last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
+			WARN(1, "%s: object_extents list not sorted!\n",
+			     __func__);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ceph_file_to_extents);
+
+/*
+ * A stripped down, non-allocating version of ceph_file_to_extents(),
+ * for when @object_extents is already populated.
+ */
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+			 struct list_head *object_extents,
+			 ceph_object_extent_fn_t action_fn,
+			 void *action_arg)
+{
+	while (len) {
+		struct ceph_object_extent *ex;
+		u64 objno, objoff;
+		u32 xlen;
+
+		ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+					      &xlen);
+
+		ex = lookup_containing(object_extents, objno, objoff, xlen);
+		if (!ex) {
+			WARN(1, "%s: objno %llu %llu~%u not found!\n",
+			     __func__, objno, objoff, xlen);
+			return -EINVAL;
+		}
+
+		action_fn(ex, xlen, action_arg);
+
+		off += xlen;
+		len -= xlen;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ceph_iterate_extents);
+
+/*
+ * Reverse map an object extent to a sorted list of file extents.
+ *
+ * On success, the caller is responsible for:
+ *
+ *     kfree(file_extents)
+ */
+int ceph_extent_to_file(struct ceph_file_layout *l,
+			u64 objno, u64 objoff, u64 objlen,
+			struct ceph_file_extent **file_extents,
+			u32 *num_file_extents)
+{
+	u32 stripes_per_object = l->object_size / l->stripe_unit;
+	u64 blockno;	/* which su */
+	u32 blockoff;	/* offset into su */
+	u64 stripeno;	/* which stripe */
+	u32 stripepos;	/* which su in the stripe,
+			   which object in the object set */
+	u64 objsetno;	/* which object set */
+	u32 i = 0;
+
+	if (!objlen) {
+		*file_extents = NULL;
+		*num_file_extents = 0;
+		return 0;
+	}
+
+	*num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
+				     DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
+	*file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
+				      GFP_NOIO);
+	if (!*file_extents)
+		return -ENOMEM;
+
+	div_u64_rem(objoff, l->stripe_unit, &blockoff);
+	while (objlen) {
+		u64 off, len;
+
+		objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
+		stripeno = div_u64(objoff, l->stripe_unit) +
+						objsetno * stripes_per_object;
+		blockno = stripeno * l->stripe_count + stripepos;
+		off = blockno * l->stripe_unit + blockoff;
+		len = min_t(u64, objlen, l->stripe_unit - blockoff);
+
+		(*file_extents)[i].fe_off = off;
+		(*file_extents)[i].fe_len = len;
+
+		blockoff = 0;
+		objoff += len;
+		objlen -= len;
+		i++;
+	}
+
+	BUG_ON(i != *num_file_extents);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_extent_to_file);
diff --git a/samples/Kconfig b/samples/Kconfig
index f524f551718e..3db002b9e1d3 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -62,6 +62,16 @@ config SAMPLE_KDB
 	  Build an example of how to dynamically add the hello
 	  command to the kdb shell.
 
+config SAMPLE_QMI_CLIENT
+	tristate "Build qmi client sample -- loadable modules only"
+	depends on m
+	depends on ARCH_QCOM
+	depends on NET
+	select QCOM_QMI_HELPERS
+	help
+	  Build an QMI client sample driver, which demonstrates how to
+	  communicate with a remote QRTR service, using QMI encoded messages.
+
 config SAMPLE_RPMSG_CLIENT
 	tristate "Build rpmsg client sample -- loadable modules only"
 	depends on RPMSG && m
diff --git a/samples/Makefile b/samples/Makefile
index 70cf3758dcf2..bd601c038b86 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,4 +3,4 @@
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
 			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
 			   configfs/ connector/ v4l/ trace_printk/ \
-			   vfio-mdev/ statx/
+			   vfio-mdev/ statx/ qmi/
diff --git a/samples/qmi/Makefile b/samples/qmi/Makefile
new file mode 100644
index 000000000000..2b111d2769df
--- /dev/null
+++ b/samples/qmi/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_QMI_CLIENT) += qmi_sample_client.o
diff --git a/samples/qmi/qmi_sample_client.c b/samples/qmi/qmi_sample_client.c
new file mode 100644
index 000000000000..c9e7276c3d83
--- /dev/null
+++ b/samples/qmi/qmi_sample_client.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sample in-kernel QMI client driver
+ *
+ * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2017 Linaro Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/qrtr.h>
+#include <linux/net.h>
+#include <linux/completion.h>
+#include <linux/idr.h>
+#include <linux/string.h>
+#include <net/sock.h>
+#include <linux/soc/qcom/qmi.h>
+
+#define PING_REQ1_TLV_TYPE		0x1
+#define PING_RESP1_TLV_TYPE		0x2
+#define PING_OPT1_TLV_TYPE		0x10
+#define PING_OPT2_TLV_TYPE		0x11
+
+#define DATA_REQ1_TLV_TYPE		0x1
+#define DATA_RESP1_TLV_TYPE		0x2
+#define DATA_OPT1_TLV_TYPE		0x10
+#define DATA_OPT2_TLV_TYPE		0x11
+
+#define TEST_MED_DATA_SIZE_V01		8192
+#define TEST_MAX_NAME_SIZE_V01		255
+
+#define TEST_PING_REQ_MSG_ID_V01	0x20
+#define TEST_DATA_REQ_MSG_ID_V01	0x21
+
+#define TEST_PING_REQ_MAX_MSG_LEN_V01	266
+#define TEST_DATA_REQ_MAX_MSG_LEN_V01	8456
+
+struct test_name_type_v01 {
+	u32 name_len;
+	char name[TEST_MAX_NAME_SIZE_V01];
+};
+
+static struct qmi_elem_info test_name_type_v01_ei[] = {
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= QMI_COMMON_TLV_TYPE,
+		.offset		= offsetof(struct test_name_type_v01,
+					   name_len),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= TEST_MAX_NAME_SIZE_V01,
+		.elem_size	= sizeof(char),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= QMI_COMMON_TLV_TYPE,
+		.offset		= offsetof(struct test_name_type_v01,
+					   name),
+	},
+	{}
+};
+
+struct test_ping_req_msg_v01 {
+	char ping[4];
+
+	u8 client_name_valid;
+	struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_ping_req_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 4,
+		.elem_size	= sizeof(char),
+		.array_type	= STATIC_ARRAY,
+		.tlv_type	= PING_REQ1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_req_msg_v01,
+					   ping),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_req_msg_v01,
+					   client_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_req_msg_v01,
+					   client_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+struct test_ping_resp_msg_v01 {
+	struct qmi_response_type_v01 resp;
+
+	u8 pong_valid;
+	char pong[4];
+
+	u8 service_name_valid;
+	struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_ping_resp_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_RESP1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   pong_valid),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= 4,
+		.elem_size	= sizeof(char),
+		.array_type	= STATIC_ARRAY,
+		.tlv_type	= PING_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   pong),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   service_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= PING_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_ping_resp_msg_v01,
+					   service_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+struct test_data_req_msg_v01 {
+	u32 data_len;
+	u8 data[TEST_MED_DATA_SIZE_V01];
+
+	u8 client_name_valid;
+	struct test_name_type_v01 client_name;
+};
+
+static struct qmi_elem_info test_data_req_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u32),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_REQ1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   data_len),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= TEST_MED_DATA_SIZE_V01,
+		.elem_size	= sizeof(u8),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= DATA_REQ1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   data),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   client_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_req_msg_v01,
+					   client_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+struct test_data_resp_msg_v01 {
+	struct qmi_response_type_v01 resp;
+
+	u8 data_valid;
+	u32 data_len;
+	u8 data[TEST_MED_DATA_SIZE_V01];
+
+	u8 service_name_valid;
+	struct test_name_type_v01 service_name;
+};
+
+static struct qmi_elem_info test_data_resp_msg_v01_ei[] = {
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct qmi_response_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_RESP1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   resp),
+		.ei_array	= qmi_response_type_v01_ei,
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   data_valid),
+	},
+	{
+		.data_type	= QMI_DATA_LEN,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u32),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   data_len),
+	},
+	{
+		.data_type	= QMI_UNSIGNED_1_BYTE,
+		.elem_len	= TEST_MED_DATA_SIZE_V01,
+		.elem_size	= sizeof(u8),
+		.array_type	= VAR_LEN_ARRAY,
+		.tlv_type	= DATA_OPT1_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   data),
+	},
+	{
+		.data_type	= QMI_OPT_FLAG,
+		.elem_len	= 1,
+		.elem_size	= sizeof(u8),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   service_name_valid),
+	},
+	{
+		.data_type	= QMI_STRUCT,
+		.elem_len	= 1,
+		.elem_size	= sizeof(struct test_name_type_v01),
+		.array_type	= NO_ARRAY,
+		.tlv_type	= DATA_OPT2_TLV_TYPE,
+		.offset		= offsetof(struct test_data_resp_msg_v01,
+					   service_name),
+		.ei_array	= test_name_type_v01_ei,
+	},
+	{}
+};
+
+/*
+ * ping_write() - ping_pong debugfs file write handler
+ * @file:	debugfs file context
+ * @user_buf:	reference to the user data (ignored)
+ * @count:	number of bytes in @user_buf
+ * @ppos:	offset in @file to write
+ *
+ * This function allows user space to send out a ping_pong QMI encoded message
+ * to the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to provide a custom response
+ * handler.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t ping_write(struct file *file, const char __user *user_buf,
+			  size_t count, loff_t *ppos)
+{
+	struct qmi_handle *qmi = file->private_data;
+	struct test_ping_req_msg_v01 req = {};
+	struct qmi_txn txn;
+	int ret;
+
+	memcpy(req.ping, "ping", sizeof(req.ping));
+
+	ret = qmi_txn_init(qmi, &txn, NULL, NULL);
+	if (ret < 0)
+		return ret;
+
+	ret = qmi_send_request(qmi, NULL, &txn,
+			       TEST_PING_REQ_MSG_ID_V01,
+			       TEST_PING_REQ_MAX_MSG_LEN_V01,
+			       test_ping_req_msg_v01_ei, &req);
+	if (ret < 0) {
+		qmi_txn_cancel(&txn);
+		return ret;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0)
+		count = ret;
+
+	return count;
+}
+
+static const struct file_operations ping_fops = {
+	.open = simple_open,
+	.write = ping_write,
+};
+
+static void ping_pong_cb(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
+			 struct qmi_txn *txn, const void *data)
+{
+	const struct test_ping_resp_msg_v01 *resp = data;
+
+	if (!txn) {
+		pr_err("spurious ping response\n");
+		return;
+	}
+
+	if (resp->resp.result == QMI_RESULT_FAILURE_V01)
+		txn->result = -ENXIO;
+	else if (!resp->pong_valid || memcmp(resp->pong, "pong", 4))
+		txn->result = -EINVAL;
+
+	complete(&txn->completion);
+}
+
+/*
+ * data_write() - data debugfs file write handler
+ * @file:	debugfs file context
+ * @user_buf:	reference to the user data
+ * @count:	number of bytes in @user_buf
+ * @ppos:	offset in @file to write
+ *
+ * This function allows user space to send out a data QMI encoded message to
+ * the associated remote test service and will return with the result of the
+ * transaction. It serves as an example of how to have the QMI helpers decode a
+ * transaction response into a provided object automatically.
+ *
+ * Return: @count, or negative errno on failure.
+ */
+static ssize_t data_write(struct file *file, const char __user *user_buf,
+			  size_t count, loff_t *ppos)
+
+{
+	struct qmi_handle *qmi = file->private_data;
+	struct test_data_resp_msg_v01 *resp;
+	struct test_data_req_msg_v01 *req;
+	struct qmi_txn txn;
+	int ret;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	resp = kzalloc(sizeof(*resp), GFP_KERNEL);
+	if (!resp) {
+		kfree(req);
+		return -ENOMEM;
+	}
+
+	req->data_len = min_t(size_t, sizeof(req->data), count);
+	if (copy_from_user(req->data, user_buf, req->data_len)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = qmi_txn_init(qmi, &txn, test_data_resp_msg_v01_ei, resp);
+	if (ret < 0)
+		goto out;
+
+	ret = qmi_send_request(qmi, NULL, &txn,
+			       TEST_DATA_REQ_MSG_ID_V01,
+			       TEST_DATA_REQ_MAX_MSG_LEN_V01,
+			       test_data_req_msg_v01_ei, req);
+	if (ret < 0) {
+		qmi_txn_cancel(&txn);
+		goto out;
+	}
+
+	ret = qmi_txn_wait(&txn, 5 * HZ);
+	if (ret < 0) {
+		goto out;
+	} else if (!resp->data_valid ||
+		   resp->data_len != req->data_len ||
+		   memcmp(resp->data, req->data, req->data_len)) {
+		pr_err("response data doesn't match expectation\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = count;
+
+out:
+	kfree(resp);
+	kfree(req);
+
+	return ret;
+}
+
+static const struct file_operations data_fops = {
+	.open = simple_open,
+	.write = data_write,
+};
+
+static struct qmi_msg_handler qmi_sample_handlers[] = {
+	{
+		.type = QMI_RESPONSE,
+		.msg_id = TEST_PING_REQ_MSG_ID_V01,
+		.ei = test_ping_resp_msg_v01_ei,
+		.decoded_size = sizeof(struct test_ping_req_msg_v01),
+		.fn = ping_pong_cb
+	},
+	{}
+};
+
+struct qmi_sample {
+	struct qmi_handle qmi;
+
+	struct dentry *de_dir;
+	struct dentry *de_data;
+	struct dentry *de_ping;
+};
+
+static struct dentry *qmi_debug_dir;
+
+static int qmi_sample_probe(struct platform_device *pdev)
+{
+	struct sockaddr_qrtr *sq;
+	struct qmi_sample *sample;
+	char path[20];
+	int ret;
+
+	sample = devm_kzalloc(&pdev->dev, sizeof(*sample), GFP_KERNEL);
+	if (!sample)
+		return -ENOMEM;
+
+	ret = qmi_handle_init(&sample->qmi, TEST_DATA_REQ_MAX_MSG_LEN_V01,
+			      NULL,
+			      qmi_sample_handlers);
+	if (ret < 0)
+		return ret;
+
+	sq = dev_get_platdata(&pdev->dev);
+	ret = kernel_connect(sample->qmi.sock, (struct sockaddr *)sq,
+			     sizeof(*sq), 0);
+	if (ret < 0) {
+		pr_err("failed to connect to remote service port\n");
+		goto err_release_qmi_handle;
+	}
+
+	snprintf(path, sizeof(path), "%d:%d", sq->sq_node, sq->sq_port);
+
+	sample->de_dir = debugfs_create_dir(path, qmi_debug_dir);
+	if (IS_ERR(sample->de_dir)) {
+		ret = PTR_ERR(sample->de_dir);
+		goto err_release_qmi_handle;
+	}
+
+	sample->de_data = debugfs_create_file("data", 0600, sample->de_dir,
+					      sample, &data_fops);
+	if (IS_ERR(sample->de_data)) {
+		ret = PTR_ERR(sample->de_data);
+		goto err_remove_de_dir;
+	}
+
+	sample->de_ping = debugfs_create_file("ping", 0600, sample->de_dir,
+					      sample, &ping_fops);
+	if (IS_ERR(sample->de_ping)) {
+		ret = PTR_ERR(sample->de_ping);
+		goto err_remove_de_data;
+	}
+
+	platform_set_drvdata(pdev, sample);
+
+	return 0;
+
+err_remove_de_data:
+	debugfs_remove(sample->de_data);
+err_remove_de_dir:
+	debugfs_remove(sample->de_dir);
+err_release_qmi_handle:
+	qmi_handle_release(&sample->qmi);
+
+	return ret;
+}
+
+static int qmi_sample_remove(struct platform_device *pdev)
+{
+	struct qmi_sample *sample = platform_get_drvdata(pdev);
+
+	debugfs_remove(sample->de_ping);
+	debugfs_remove(sample->de_data);
+	debugfs_remove(sample->de_dir);
+
+	qmi_handle_release(&sample->qmi);
+
+	return 0;
+}
+
+static struct platform_driver qmi_sample_driver = {
+	.probe = qmi_sample_probe,
+	.remove = qmi_sample_remove,
+	.driver = {
+		.name = "qmi_sample_client",
+	},
+};
+
+static int qmi_sample_new_server(struct qmi_handle *qmi,
+				 struct qmi_service *service)
+{
+	struct platform_device *pdev;
+	struct sockaddr_qrtr sq = { AF_QIPCRTR, service->node, service->port };
+	int ret;
+
+	pdev = platform_device_alloc("qmi_sample_client", PLATFORM_DEVID_AUTO);
+	if (!pdev)
+		return -ENOMEM;
+
+	ret = platform_device_add_data(pdev, &sq, sizeof(sq));
+	if (ret)
+		goto err_put_device;
+
+	ret = platform_device_add(pdev);
+	if (ret)
+		goto err_put_device;
+
+	service->priv = pdev;
+
+	return 0;
+
+err_put_device:
+	platform_device_put(pdev);
+
+	return ret;
+}
+
+static void qmi_sample_del_server(struct qmi_handle *qmi,
+				  struct qmi_service *service)
+{
+	struct platform_device *pdev = service->priv;
+
+	platform_device_unregister(pdev);
+}
+
+static struct qmi_handle lookup_client;
+
+static struct qmi_ops lookup_ops = {
+	.new_server = qmi_sample_new_server,
+	.del_server = qmi_sample_del_server,
+};
+
+static int qmi_sample_init(void)
+{
+	int ret;
+
+	qmi_debug_dir = debugfs_create_dir("qmi_sample", NULL);
+	if (IS_ERR(qmi_debug_dir)) {
+		pr_err("failed to create qmi_sample dir\n");
+		return PTR_ERR(qmi_debug_dir);
+	}
+
+	ret = platform_driver_register(&qmi_sample_driver);
+	if (ret)
+		goto err_remove_debug_dir;
+
+	ret = qmi_handle_init(&lookup_client, 0, &lookup_ops, NULL);
+	if (ret < 0)
+		goto err_unregister_driver;
+
+	qmi_add_lookup(&lookup_client, 15, 0, 0);
+
+	return 0;
+
+err_unregister_driver:
+	platform_driver_unregister(&qmi_sample_driver);
+err_remove_debug_dir:
+	debugfs_remove(qmi_debug_dir);
+
+	return ret;
+}
+
+static void qmi_sample_exit(void)
+{
+	qmi_handle_release(&lookup_client);
+
+	platform_driver_unregister(&qmi_sample_driver);
+
+	debugfs_remove(qmi_debug_dir);
+}
+
+module_init(qmi_sample_init);
+module_exit(qmi_sample_exit);
+
+MODULE_DESCRIPTION("Sample QMI client driver");
+MODULE_LICENSE("GPL v2");
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 764ffd1bb1c5..e16d6713f236 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -791,7 +791,8 @@ our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)};
 our $declaration_macros = qr{(?x:
 	(?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,6}\s*\(|
 	(?:$Storage\s+)?[HLP]?LIST_HEAD\s*\(|
-	(?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(
+	(?:$Storage\s+)?${Type}\s+uninitialized_var\s*\(|
+	(?:SKCIPHER_REQUEST|SHASH_DESC|AHASH_REQUEST)_ON_STACK\s*\(
 )};
 
 sub deparenthesize {
@@ -1075,7 +1076,7 @@ sub parse_email {
 	} elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
 		$address = $1;
 		$comment = $2 if defined $2;
-		$formatted_email =~ s/$address.*$//;
+		$formatted_email =~ s/\Q$address\E.*$//;
 		$name = $formatted_email;
 		$name = trim($name);
 		$name =~ s/^\"|\"$//g;
@@ -1217,7 +1218,7 @@ sub sanitise_line {
 	for ($off = 1; $off < length($line); $off++) {
 		$c = substr($line, $off, 1);
 
-		# Comments we are wacking completly including the begin
+		# Comments we are whacking completely including the begin
 		# and end, all to $;.
 		if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
 			$sanitise_quote = '*/';
@@ -1297,6 +1298,7 @@ sub sanitise_line {
 sub get_quoted_string {
 	my ($line, $rawline) = @_;
 
+	return "" if (!defined($line) || !defined($rawline));
 	return "" if ($line !~ m/($String)/g);
 	return substr($rawline, $-[0], $+[0] - $-[0]);
 }
@@ -1644,6 +1646,28 @@ sub raw_line {
 	return $line;
 }
 
+sub get_stat_real {
+	my ($linenr, $lc) = @_;
+
+	my $stat_real = raw_line($linenr, 0);
+	for (my $count = $linenr + 1; $count <= $lc; $count++) {
+		$stat_real = $stat_real . "\n" . raw_line($count, 0);
+	}
+
+	return $stat_real;
+}
+
+sub get_stat_here {
+	my ($linenr, $cnt, $here) = @_;
+
+	my $herectx = $here . "\n";
+	for (my $n = 0; $n < $cnt; $n++) {
+		$herectx .= raw_line($linenr, $n) . "\n";
+	}
+
+	return $herectx;
+}
+
 sub cat_vet {
 	my ($vet) = @_;
 	my ($res, $coded);
@@ -2257,6 +2281,8 @@ sub process {
 
 	my $camelcase_file_seeded = 0;
 
+	my $checklicenseline = 1;
+
 	sanitise_line_reset();
 	my $line;
 	foreach my $rawline (@rawlines) {
@@ -2448,6 +2474,7 @@ sub process {
 			} else {
 				$check = $check_orig;
 			}
+			$checklicenseline = 1;
 			next;
 		}
 
@@ -2911,6 +2938,30 @@ sub process {
 			}
 		}
 
+# check for using SPDX license tag at beginning of files
+		if ($realline == $checklicenseline) {
+			if ($rawline =~ /^[ \+]\s*\#\!\s*\//) {
+				$checklicenseline = 2;
+			} elsif ($rawline =~ /^\+/) {
+				my $comment = "";
+				if ($realfile =~ /\.(h|s|S)$/) {
+					$comment = '/*';
+				} elsif ($realfile =~ /\.(c|dts|dtsi)$/) {
+					$comment = '//';
+				} elsif (($checklicenseline == 2) || $realfile =~ /\.(sh|pl|py|awk|tc)$/) {
+					$comment = '#';
+				} elsif ($realfile =~ /\.rst$/) {
+					$comment = '..';
+				}
+
+				if ($comment !~ /^$/ &&
+				    $rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) {
+					WARN("SPDX_LICENSE_TAG",
+					     "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+				}
+			}
+		}
+
 # check we are in a valid source file if not then ignore this hunk
 		next if ($realfile !~ /\.(h|c|s|S|sh|dtsi|dts)$/);
 
@@ -3011,6 +3062,12 @@ sub process {
 			}
 		}
 
+# check for assignments on the start of a line
+		if ($sline =~ /^\+\s+($Assignment)[^=]/) {
+			CHK("ASSIGNMENT_CONTINUATIONS",
+			    "Assignment operator '$1' should be on the previous line\n" . $hereprev);
+		}
+
 # check for && or || at the start of a line
 		if ($rawline =~ /^\+\s*(&&|\|\|)/) {
 			CHK("LOGICAL_CONTINUATIONS",
@@ -4032,7 +4089,7 @@ sub process {
 			my ($where, $prefix) = ($-[1], $1);
 			if ($prefix !~ /$Type\s+$/ &&
 			    ($where != 0 || $prefix !~ /^.\s+$/) &&
-			    $prefix !~ /[{,]\s+$/) {
+			    $prefix !~ /[{,:]\s+$/) {
 				if (ERROR("BRACKET_SPACE",
 					  "space prohibited before open square bracket '['\n" . $herecurr) &&
 				    $fix) {
@@ -4928,12 +4985,8 @@ sub process {
 			#print "REST<$rest> dstat<$dstat> ctx<$ctx>\n";
 
 			$ctx =~ s/\n*$//;
-			my $herectx = $here . "\n";
 			my $stmt_cnt = statement_rawlines($ctx);
-
-			for (my $n = 0; $n < $stmt_cnt; $n++) {
-				$herectx .= raw_line($linenr, $n) . "\n";
-			}
+			my $herectx = get_stat_here($linenr, $stmt_cnt, $here);
 
 			if ($dstat ne '' &&
 			    $dstat !~ /^(?:$Ident|-?$Constant),$/ &&			# 10, // foo(),
@@ -5005,12 +5058,9 @@ sub process {
 # check for macros with flow control, but without ## concatenation
 # ## concatenation is commonly a macro that defines a function so ignore those
 			if ($has_flow_statement && !$has_arg_concat) {
-				my $herectx = $here . "\n";
 				my $cnt = statement_rawlines($ctx);
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
 				WARN("MACRO_WITH_FLOW_CONTROL",
 				     "Macros with flow control statements should be avoided\n" . "$herectx");
 			}
@@ -5050,11 +5100,7 @@ sub process {
 
 				$ctx =~ s/\n*$//;
 				my $cnt = statement_rawlines($ctx);
-				my $herectx = $here . "\n";
-
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
 				if (($stmts =~ tr/;/;/) == 1 &&
 				    $stmts !~ /^\s*(if|while|for|switch)\b/) {
@@ -5068,11 +5114,7 @@ sub process {
 			} elsif ($dstat =~ /^\+\s*#\s*define\s+$Ident.*;\s*$/) {
 				$ctx =~ s/\n*$//;
 				my $cnt = statement_rawlines($ctx);
-				my $herectx = $here . "\n";
-
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
 				WARN("TRAILING_SEMICOLON",
 				     "macros should not use a trailing semicolon\n" . "$herectx");
@@ -5195,12 +5237,8 @@ sub process {
 				}
 			}
 			if ($level == 0 && $block =~ /^\s*\{/ && !$allowed) {
-				my $herectx = $here . "\n";
 				my $cnt = statement_rawlines($block);
-
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
 
 				WARN("BRACES",
 				     "braces {} are not necessary for single statement blocks\n" . $herectx);
@@ -5776,36 +5814,50 @@ sub process {
 			}
 		}
 
-		# check for vsprintf extension %p<foo> misuses
+# check for vsprintf extension %p<foo> misuses
 		if ($^V && $^V ge 5.10.0 &&
 		    defined $stat &&
 		    $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s &&
 		    $1 !~ /^_*volatile_*$/) {
-			my $bad_extension = "";
+			my $specifier;
+			my $extension;
+			my $bad_specifier = "";
+			my $stat_real;
+
 			my $lc = $stat =~ tr@\n@@;
 			$lc = $lc + $linenr;
 		        for (my $count = $linenr; $count <= $lc; $count++) {
 				my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0));
 				$fmt =~ s/%%//g;
-				if ($fmt =~ /(\%[\*\d\.]*p(?![\WSsBKRraEhMmIiUDdgVCbGNOx]).)/) {
-					$bad_extension = $1;
-					last;
-				}
-			}
-			if ($bad_extension ne "") {
-				my $stat_real = raw_line($linenr, 0);
-				my $ext_type = "Invalid";
-				my $use = "";
-				for (my $count = $linenr + 1; $count <= $lc; $count++) {
-					$stat_real = $stat_real . "\n" . raw_line($count, 0);
+
+				while ($fmt =~ /(\%[\*\d\.]*p(\w))/g) {
+					$specifier = $1;
+					$extension = $2;
+					if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOx]/) {
+						$bad_specifier = $specifier;
+						last;
+					}
+					if ($extension eq "x" && !defined($stat_real)) {
+						if (!defined($stat_real)) {
+							$stat_real = get_stat_real($linenr, $lc);
+						}
+						WARN("VSPRINTF_SPECIFIER_PX",
+						     "Using vsprintf specifier '\%px' potentially exposes the kernel memory layout, if you don't really need the address please consider using '\%p'.\n" . "$here\n$stat_real\n");
+					}
 				}
-				if ($bad_extension =~ /p[Ff]/) {
-					$ext_type = "Deprecated";
-					$use = " - use %pS instead";
-					$use =~ s/pS/ps/ if ($bad_extension =~ /pf/);
+				if ($bad_specifier ne "") {
+					my $stat_real = get_stat_real($linenr, $lc);
+					my $ext_type = "Invalid";
+					my $use = "";
+					if ($bad_specifier =~ /p[Ff]/) {
+						$ext_type = "Deprecated";
+						$use = " - use %pS instead";
+						$use =~ s/pS/ps/ if ($bad_specifier =~ /pf/);
+					}
+
+					WARN("VSPRINTF_POINTER_EXTENSION",
+					     "$ext_type vsprintf pointer extension '$bad_specifier'$use\n" . "$here\n$stat_real\n");
 				}
-				WARN("VSPRINTF_POINTER_EXTENSION",
-				     "$ext_type vsprintf pointer extension '$bad_extension'$use\n" . "$here\n$stat_real\n");
 			}
 		}
 
@@ -5918,10 +5970,7 @@ sub process {
 		     $stat !~ /(?:$Compare)\s*\bsscanf\s*$balanced_parens/)) {
 			my $lc = $stat =~ tr@\n@@;
 			$lc = $lc + $linenr;
-			my $stat_real = raw_line($linenr, 0);
-		        for (my $count = $linenr + 1; $count <= $lc; $count++) {
-				$stat_real = $stat_real . "\n" . raw_line($count, 0);
-			}
+			my $stat_real = get_stat_real($linenr, $lc);
 			WARN("NAKED_SSCANF",
 			     "unchecked sscanf return value\n" . "$here\n$stat_real\n");
 		}
@@ -5932,10 +5981,7 @@ sub process {
 		    $line =~ /\bsscanf\b/) {
 			my $lc = $stat =~ tr@\n@@;
 			$lc = $lc + $linenr;
-			my $stat_real = raw_line($linenr, 0);
-		        for (my $count = $linenr + 1; $count <= $lc; $count++) {
-				$stat_real = $stat_real . "\n" . raw_line($count, 0);
-			}
+			my $stat_real = get_stat_real($linenr, $lc);
 			if ($stat_real =~ /\bsscanf\b\s*\(\s*$FuncArg\s*,\s*("[^"]+")/) {
 				my $format = $6;
 				my $count = $format =~ tr@%@%@;
@@ -6065,12 +6111,9 @@ sub process {
 			}
 			if ($r1 !~ /^sizeof\b/ && $r2 =~ /^sizeof\s*\S/ &&
 			    !($r1 =~ /^$Constant$/ || $r1 =~ /^[A-Z_][A-Z0-9_]*$/)) {
-				my $ctx = '';
-				my $herectx = $here . "\n";
 				my $cnt = statement_rawlines($stat);
-				for (my $n = 0; $n < $cnt; $n++) {
-					$herectx .= raw_line($linenr, $n) . "\n";
-				}
+				my $herectx = get_stat_here($linenr, $cnt, $here);
+
 				if (WARN("ALLOC_WITH_MULTIPLY",
 					 "Prefer $newfunc over $oldfunc with multiply\n" . $herectx) &&
 				    $cnt == 1 &&
@@ -6153,12 +6196,9 @@ sub process {
 		if ($^V && $^V ge 5.10.0 &&
 		    defined $stat &&
 		    $stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
-			my $ctx = '';
-			my $herectx = $here . "\n";
 			my $cnt = statement_rawlines($stat);
-			for (my $n = 0; $n < $cnt; $n++) {
-				$herectx .= raw_line($linenr, $n) . "\n";
-			}
+			my $herectx = get_stat_here($linenr, $cnt, $here);
+
 			WARN("DEFAULT_NO_BREAK",
 			     "switch default: should use break\n" . $herectx);
 		}
@@ -6211,6 +6251,12 @@ sub process {
 			}
 		}
 
+# check for bool bitfields
+		if ($sline =~ /^.\s+bool\s*$Ident\s*:\s*\d+\s*;/) {
+			WARN("BOOL_BITFIELD",
+			     "Avoid using bool as bitfield.  Prefer bool bitfields as unsigned int or u<8|16|32>\n" . $herecurr);
+		}
+
 # check for semaphores initialized locked
 		if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
 			WARN("CONSIDER_COMPLETION",
@@ -6369,10 +6415,7 @@ sub process {
 
 				my $lc = $stat =~ tr@\n@@;
 				$lc = $lc + $linenr;
-				my $stat_real = raw_line($linenr, 0);
-				for (my $count = $linenr + 1; $count <= $lc; $count++) {
-					$stat_real = $stat_real . "\n" . raw_line($count, 0);
-				}
+				my $stat_real = get_stat_real($linenr, $lc);
 
 				my $skip_args = "";
 				if ($arg_pos > 1) {
@@ -6398,7 +6441,7 @@ sub process {
 		}
 
 # check for uses of S_<PERMS> that could be octal for readability
-		if ($line =~ /\b($multi_mode_perms_string_search)\b/) {
+		while ($line =~ m{\b($multi_mode_perms_string_search)\b}g) {
 			my $oval = $1;
 			my $octal = perms_to_octal($oval);
 			if (WARN("SYMBOLIC_PERMS",
diff --git a/scripts/dtc/include-prefixes/cris b/scripts/dtc/include-prefixes/cris
deleted file mode 120000
index 736d998ba506..000000000000
--- a/scripts/dtc/include-prefixes/cris
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/cris/boot/dts
-\ No newline at end of file
diff --git a/scripts/dtc/include-prefixes/metag b/scripts/dtc/include-prefixes/metag
deleted file mode 120000
index 87a3c847db8f..000000000000
--- a/scripts/dtc/include-prefixes/metag
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/metag/boot/dts
-\ No newline at end of file
diff --git a/security/security.c b/security/security.c
index d2a84cda7e8d..7bc2fde023a7 100644
--- a/security/security.c
+++ b/security/security.c
@@ -30,6 +30,8 @@
 #include <linux/string.h>
 #include <net/flow.h>
 
+#include <trace/events/initcall.h>
+
 #define MAX_LSM_EVM_XATTR	2
 
 /* Maximum number of letters for an LSM name string */
@@ -45,10 +47,14 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
 
 static void __init do_security_initcalls(void)
 {
+	int ret;
 	initcall_t *call;
 	call = __security_initcall_start;
+	trace_initcall_level("security");
 	while (call < __security_initcall_end) {
-		(*call) ();
+		trace_initcall_start((*call));
+		ret = (*call) ();
+		trace_initcall_finish((*call), ret);
 		call++;
 	}
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1eeb70e439d7..4cafe6a19167 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6006,6 +6006,7 @@ static int selinux_msg_queue_msgctl(struct kern_ipc_perm *msq, int cmd)
 				    SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
 	case IPC_STAT:
 	case MSG_STAT:
+	case MSG_STAT_ANY:
 		perms = MSGQ__GETATTR | MSGQ__ASSOCIATE;
 		break;
 	case IPC_SET:
@@ -6157,6 +6158,7 @@ static int selinux_shm_shmctl(struct kern_ipc_perm *shp, int cmd)
 				    SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
 	case IPC_STAT:
 	case SHM_STAT:
+	case SHM_STAT_ANY:
 		perms = SHM__GETATTR | SHM__ASSOCIATE;
 		break;
 	case IPC_SET:
@@ -6272,6 +6274,7 @@ static int selinux_sem_semctl(struct kern_ipc_perm *sma, int cmd)
 		break;
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		perms = SEM__GETATTR | SEM__ASSOCIATE;
 		break;
 	default:
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 73549007bf9e..0b414836bebd 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3046,6 +3046,7 @@ static int smack_shm_shmctl(struct kern_ipc_perm *isp, int cmd)
 	switch (cmd) {
 	case IPC_STAT:
 	case SHM_STAT:
+	case SHM_STAT_ANY:
 		may = MAY_READ;
 		break;
 	case IPC_SET:
@@ -3139,6 +3140,7 @@ static int smack_sem_semctl(struct kern_ipc_perm *isp, int cmd)
 	case GETALL:
 	case IPC_STAT:
 	case SEM_STAT:
+	case SEM_STAT_ANY:
 		may = MAY_READ;
 		break;
 	case SETVAL:
@@ -3228,6 +3230,7 @@ static int smack_msg_queue_msgctl(struct kern_ipc_perm *isp, int cmd)
 	switch (cmd) {
 	case IPC_STAT:
 	case MSG_STAT:
+	case MSG_STAT_ANY:
 		may = MAY_READ;
 		break;
 	case IPC_SET:
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 481ab0e94ffa..1980f68246cb 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1128,13 +1128,14 @@ static int snd_pcm_oss_get_active_substream(struct snd_pcm_oss_file *pcm_oss_fil
 }
 
 /* call with params_lock held */
+/* NOTE: this always call PREPARE unconditionally no matter whether
+ * runtime->oss.prepare is set or not
+ */
 static int snd_pcm_oss_prepare(struct snd_pcm_substream *substream)
 {
 	int err;
 	struct snd_pcm_runtime *runtime = substream->runtime;
 
-	if (!runtime->oss.prepare)
-		return 0;
 	err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_PREPARE, NULL);
 	if (err < 0) {
 		pcm_dbg(substream->pcm,
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index b84554893fab..35ffccea94c3 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -617,7 +617,7 @@ static int snd_pcm_hw_params_choose(struct snd_pcm_substream *pcm,
 			changed = snd_pcm_hw_param_first(pcm, params, *v, NULL);
 		else
 			changed = snd_pcm_hw_param_last(pcm, params, *v, NULL);
-		if (snd_BUG_ON(changed < 0))
+		if (changed < 0)
 			return changed;
 		if (changed == 0)
 			continue;
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index ab39ccb974c6..0b030d8fe3fa 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -35,105 +35,85 @@
 #include "clock.h"
 #include "quirks.h"
 
-static struct uac_clock_source_descriptor *
-	snd_usb_find_clock_source(struct usb_host_interface *ctrl_iface,
-				  int clock_id)
+static void *find_uac_clock_desc(struct usb_host_interface *iface, int id,
+				 bool (*validator)(void *, int), u8 type)
 {
-	struct uac_clock_source_descriptor *cs = NULL;
+	void *cs = NULL;
 
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC2_CLOCK_SOURCE))) {
-		if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
+	while ((cs = snd_usb_find_csint_desc(iface->extra, iface->extralen,
+					     cs, type))) {
+		if (validator(cs, id))
 			return cs;
 	}
 
 	return NULL;
 }
 
-static struct uac3_clock_source_descriptor *
-	snd_usb_find_clock_source_v3(struct usb_host_interface *ctrl_iface,
-				  int clock_id)
+static bool validate_clock_source_v2(void *p, int id)
 {
-	struct uac3_clock_source_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC3_CLOCK_SOURCE))) {
-		if (cs->bClockID == clock_id)
-			return cs;
-	}
-
-	return NULL;
+	struct uac_clock_source_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
 }
 
-static struct uac_clock_selector_descriptor *
-	snd_usb_find_clock_selector(struct usb_host_interface *ctrl_iface,
-				    int clock_id)
+static bool validate_clock_source_v3(void *p, int id)
 {
-	struct uac_clock_selector_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC2_CLOCK_SELECTOR))) {
-		if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id) {
-			if (cs->bLength < 5 + cs->bNrInPins)
-				return NULL;
-			return cs;
-		}
-	}
-
-	return NULL;
+	struct uac3_clock_source_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
 }
 
-static struct uac3_clock_selector_descriptor *
-	snd_usb_find_clock_selector_v3(struct usb_host_interface *ctrl_iface,
-				    int clock_id)
+static bool validate_clock_selector_v2(void *p, int id)
 {
-	struct uac3_clock_selector_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC3_CLOCK_SELECTOR))) {
-		if (cs->bClockID == clock_id)
-			return cs;
-	}
-
-	return NULL;
+	struct uac_clock_selector_descriptor *cs = p;
+	return cs->bLength >= sizeof(*cs) && cs->bClockID == id &&
+		cs->bLength == 7 + cs->bNrInPins;
 }
 
-static struct uac_clock_multiplier_descriptor *
-	snd_usb_find_clock_multiplier(struct usb_host_interface *ctrl_iface,
-				      int clock_id)
+static bool validate_clock_selector_v3(void *p, int id)
 {
-	struct uac_clock_multiplier_descriptor *cs = NULL;
-
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC2_CLOCK_MULTIPLIER))) {
-		if (cs->bLength >= sizeof(*cs) && cs->bClockID == clock_id)
-			return cs;
-	}
-
-	return NULL;
+	struct uac3_clock_selector_descriptor *cs = p;
+	return cs->bLength >= sizeof(*cs) && cs->bClockID == id &&
+		cs->bLength == 11 + cs->bNrInPins;
 }
 
-static struct uac3_clock_multiplier_descriptor *
-	snd_usb_find_clock_multiplier_v3(struct usb_host_interface *ctrl_iface,
-				      int clock_id)
+static bool validate_clock_multiplier_v2(void *p, int id)
 {
-	struct uac3_clock_multiplier_descriptor *cs = NULL;
+	struct uac_clock_multiplier_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
+}
 
-	while ((cs = snd_usb_find_csint_desc(ctrl_iface->extra,
-					     ctrl_iface->extralen,
-					     cs, UAC3_CLOCK_MULTIPLIER))) {
-		if (cs->bClockID == clock_id)
-			return cs;
-	}
+static bool validate_clock_multiplier_v3(void *p, int id)
+{
+	struct uac3_clock_multiplier_descriptor *cs = p;
+	return cs->bLength == sizeof(*cs) && cs->bClockID == id;
+}
 
-	return NULL;
+#define DEFINE_FIND_HELPER(name, obj, validator, type)		\
+static obj *name(struct usb_host_interface *iface, int id)	\
+{								\
+	return find_uac_clock_desc(iface, id, validator, type);	\
 }
 
+DEFINE_FIND_HELPER(snd_usb_find_clock_source,
+		   struct uac_clock_source_descriptor,
+		   validate_clock_source_v2, UAC2_CLOCK_SOURCE);
+DEFINE_FIND_HELPER(snd_usb_find_clock_source_v3,
+		   struct uac3_clock_source_descriptor,
+		   validate_clock_source_v3, UAC3_CLOCK_SOURCE);
+
+DEFINE_FIND_HELPER(snd_usb_find_clock_selector,
+		   struct uac_clock_selector_descriptor,
+		   validate_clock_selector_v2, UAC2_CLOCK_SELECTOR);
+DEFINE_FIND_HELPER(snd_usb_find_clock_selector_v3,
+		   struct uac3_clock_selector_descriptor,
+		   validate_clock_selector_v3, UAC3_CLOCK_SELECTOR);
+
+DEFINE_FIND_HELPER(snd_usb_find_clock_multiplier,
+		   struct uac_clock_multiplier_descriptor,
+		   validate_clock_multiplier_v2, UAC2_CLOCK_MULTIPLIER);
+DEFINE_FIND_HELPER(snd_usb_find_clock_multiplier_v3,
+		   struct uac3_clock_multiplier_descriptor,
+		   validate_clock_multiplier_v3, UAC3_CLOCK_MULTIPLIER);
+
 static int uac_clock_selector_get_val(struct snd_usb_audio *chip, int selector_id)
 {
 	unsigned char buf;
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 4ed569fcb139..b21b586b9854 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -7,6 +7,7 @@
 
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+#define __SPIN_LOCK_UNLOCKED(x)	(pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
 
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
new file mode 100755
index 000000000000..b28feea7c363
--- /dev/null
+++ b/tools/testing/ktest/config-bisect.pl
@@ -0,0 +1,770 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2015 - Steven Rostedt, Red Hat Inc.
+# Copyright 2017 - Steven Rostedt, VMware, Inc.
+#
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+# usage:
+#  config-bisect.pl [options] good-config bad-config [good|bad]
+#
+
+# Compares a good config to a bad config, then takes half of the diffs
+# and produces a config that is somewhere between the good config and
+# the bad config. That is, the resulting config will start with the
+# good config and will try to make half of the differences of between
+# the good and bad configs match the bad config. It tries because of
+# dependencies between the two configs it may not be able to change
+# exactly half of the configs that are different between the two config
+# files.
+
+# Here's a normal way to use it:
+#
+#  $ cd /path/to/linux/kernel
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config
+
+# This will now pull in good config (blowing away .config in that directory
+# so do not make that be one of the good or bad configs), and then
+# build the config with "make oldconfig" to make sure it matches the
+# current kernel. It will then store the configs in that result for
+# the good config. It does the same for the bad config as well.
+# The algorithm will run, merging half of the differences between
+# the two configs and building them with "make oldconfig" to make sure
+# the result changes (dependencies may reset changes the tool had made).
+# It then copies the result of its good config to /path/to/good/config.tmp
+# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the
+# files passed in). And the ".config" that you should test will be in
+# directory
+
+# After the first run, determine if the result is good or bad then
+# run the same command appending the result
+
+# For good results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config good
+
+# For bad results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config bad
+
+# Do not change the good-config or bad-config, config-bisect.pl will
+# copy the good-config to a temp file with the same name as good-config
+# but with a ".tmp" after it. It will do the same with the bad-config.
+
+# If "good" or "bad" is not stated at the end, it will copy the good and
+# bad configs to the .tmp versions. If a .tmp version already exists, it will
+# warn before writing over them (-r will not warn, and just write over them).
+# If the last config is labeled "good", then it will copy it to the good .tmp
+# version. If the last config is labeled "bad", it will copy it to the bad
+# .tmp version. It will continue this until it can not merge the two any more
+# without the result being equal to either the good or bad .tmp configs.
+
+my $start = 0;
+my $val = "";
+
+my $pwd = `pwd`;
+chomp $pwd;
+my $tree = $pwd;
+my $build;
+
+my $output_config;
+my $reset_bisect;
+
+sub usage {
+    print << "EOF"
+
+usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad]
+  -l [optional] define location of linux-tree (default is current directory)
+  -b [optional] define location to build (O=build-dir) (default is linux-tree)
+  good-config the config that is considered good
+  bad-config the config that does not work
+  "good" add this if the last run produced a good config
+  "bad" add this if the last run produced a bad config
+  If "good" or "bad" is not specified, then it is the start of a new bisect
+
+  Note, each run will create copy of good and bad configs with ".tmp" appended.
+
+EOF
+;
+
+    exit(-1);
+}
+
+sub doprint {
+    print @_;
+}
+
+sub dodie {
+    doprint "CRITICAL FAILURE... ", @_, "\n";
+
+    die @_, "\n";
+}
+
+sub expand_path {
+    my ($file) = @_;
+
+    if ($file =~ m,^/,) {
+	return $file;
+    }
+    return "$pwd/$file";
+}
+
+sub read_prompt {
+    my ($cancel, $prompt) = @_;
+
+    my $ans;
+
+    for (;;) {
+	if ($cancel) {
+	    print "$prompt [y/n/C] ";
+	} else {
+	    print "$prompt [y/N] ";
+	}
+	$ans = <STDIN>;
+	chomp $ans;
+	if ($ans =~ /^\s*$/) {
+	    if ($cancel) {
+		$ans = "c";
+	    } else {
+		$ans = "n";
+	    }
+	}
+	last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+	if ($cancel) {
+	    last if ($ans =~ /^c$/i);
+	    print "Please answer either 'y', 'n' or 'c'.\n";
+	} else {
+	    print "Please answer either 'y' or 'n'.\n";
+	}
+    }
+    if ($ans =~ /^c/i) {
+	exit;
+    }
+    if ($ans !~ /^y$/i) {
+	return 0;
+    }
+    return 1;
+}
+
+sub read_yn {
+    my ($prompt) = @_;
+
+    return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+    my ($prompt) = @_;
+
+    return read_prompt 1, $prompt;
+}
+
+sub run_command {
+    my ($command, $redirect) = @_;
+    my $start_time;
+    my $end_time;
+    my $dord = 0;
+    my $pid;
+
+    $start_time = time;
+
+    doprint("$command ... ");
+
+    $pid = open(CMD, "$command 2>&1 |") or
+	dodie "unable to exec $command";
+
+    if (defined($redirect)) {
+	open (RD, ">$redirect") or
+	    dodie "failed to write to redirect $redirect";
+	$dord = 1;
+    }
+
+    while (<CMD>) {
+	print RD  if ($dord);
+    }
+
+    waitpid($pid, 0);
+    my $failed = $?;
+
+    close(CMD);
+    close(RD)  if ($dord);
+
+    $end_time = time;
+    my $delta = $end_time - $start_time;
+
+    if ($delta == 1) {
+	doprint "[1 second] ";
+    } else {
+	doprint "[$delta seconds] ";
+    }
+
+    if ($failed) {
+	doprint "FAILED!\n";
+    } else {
+	doprint "SUCCESS\n";
+    }
+
+    return !$failed;
+}
+
+###### CONFIG BISECT ######
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+my $make;
+
+sub make_oldconfig {
+
+    if (!run_command "$make olddefconfig") {
+	# Perhaps olddefconfig doesn't exist in this version of the kernel
+	# try oldnoconfig
+	doprint "olddefconfig failed, trying make oldnoconfig\n";
+	if (!run_command "$make oldnoconfig") {
+	    doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+	    # try a yes '' | oldconfig
+	    run_command "yes '' | $make oldconfig" or
+		dodie "failed make config oldconfig";
+	}
+    }
+}
+
+sub assign_configs {
+    my ($hash, $config) = @_;
+
+    doprint "Reading configs from $config\n";
+
+    open (IN, $config)
+	or dodie "Failed to read $config";
+
+    while (<IN>) {
+	chomp;
+	if (/^((CONFIG\S*)=.*)/) {
+	    ${$hash}{$2} = $1;
+	} elsif (/^(# (CONFIG\S*) is not set)/) {
+	    ${$hash}{$2} = $1;
+	}
+    }
+
+    close(IN);
+}
+
+sub process_config_ignore {
+    my ($config) = @_;
+
+    assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+    my ($config) = @_;
+
+    my $arr = $dependency{$config};
+    if (!defined($arr)) {
+	return ();
+    }
+
+    my @deps = @{$arr};
+
+    foreach my $dep (@{$arr}) {
+	print "ADD DEP $dep\n";
+	@deps = (@deps, get_dependencies $dep);
+    }
+
+    return @deps;
+}
+
+sub save_config {
+    my ($pc, $file) = @_;
+
+    my %configs = %{$pc};
+
+    doprint "Saving configs into $file\n";
+
+    open(OUT, ">$file") or dodie "Can not write to $file";
+
+    foreach my $config (keys %configs) {
+	print OUT "$configs{$config}\n";
+    }
+    close(OUT);
+}
+
+sub create_config {
+    my ($name, $pc) = @_;
+
+    doprint "Creating old config from $name configs\n";
+
+    save_config $pc, $output_config;
+
+    make_oldconfig;
+}
+
+# compare two config hashes, and return configs with different vals.
+# It returns B's config values, but you can use A to see what A was.
+sub diff_config_vals {
+    my ($pa, $pb) = @_;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    my %ret;
+
+    foreach my $item (keys %a) {
+	if (defined($b{$item}) && $b{$item} ne $a{$item}) {
+	    $ret{$item} = $b{$item};
+	}
+    }
+
+    return %ret;
+}
+
+# compare two config hashes and return the configs in B but not A
+sub diff_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+	if (!defined($a{$item})) {
+	    $ret{$item} = $b{$item};
+	}
+    }
+
+    return %ret;
+}
+
+# return if two configs are equal or not
+# 0 is equal +1 b has something a does not
+# +1 if a and b have a different item.
+# -1 if a has something b does not
+sub compare_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+	if (!defined($a{$item})) {
+	    return 1;
+	}
+	if ($a{$item} ne $b{$item}) {
+	    return 1;
+	}
+    }
+
+    foreach my $item (keys %a) {
+	if (!defined($b{$item})) {
+	    return -1;
+	}
+    }
+
+    return 0;
+}
+
+sub process_failed {
+    my ($config) = @_;
+
+    doprint "\n\n***************************************\n";
+    doprint "Found bad config: $config\n";
+    doprint "***************************************\n\n";
+}
+
+sub process_new_config {
+    my ($tc, $nc, $gc, $bc) = @_;
+
+    my %tmp_config = %{$tc};
+    my %good_configs = %{$gc};
+    my %bad_configs = %{$bc};
+
+    my %new_configs;
+
+    my $runtest = 1;
+    my $ret;
+
+    create_config "tmp_configs", \%tmp_config;
+    assign_configs \%new_configs, $output_config;
+
+    $ret = compare_configs \%new_configs, \%bad_configs;
+    if (!$ret) {
+	doprint "New config equals bad config, try next test\n";
+	$runtest = 0;
+    }
+
+    if ($runtest) {
+	$ret = compare_configs \%new_configs, \%good_configs;
+	if (!$ret) {
+	    doprint "New config equals good config, try next test\n";
+	    $runtest = 0;
+	}
+    }
+
+    %{$nc} = %new_configs;
+
+    return $runtest;
+}
+
+sub convert_config {
+    my ($config) = @_;
+
+    if ($config =~ /^# (.*) is not set/) {
+	$config = "$1=n";
+    }
+
+    $config =~ s/^CONFIG_//;
+    return $config;
+}
+
+sub print_config {
+    my ($sym, $config) = @_;
+
+    $config = convert_config $config;
+    doprint "$sym$config\n";
+}
+
+sub print_config_compare {
+    my ($good_config, $bad_config) = @_;
+
+    $good_config = convert_config $good_config;
+    $bad_config = convert_config $bad_config;
+
+    my $good_value = $good_config;
+    my $bad_value = $bad_config;
+    $good_value =~ s/(.*)=//;
+    my $config = $1;
+
+    $bad_value =~ s/.*=//;
+
+    doprint " $config $good_value -> $bad_value\n";
+}
+
+# Pass in:
+# $phalf: half of the configs names you want to add
+# $oconfigs: The orginial configs to start with
+# $sconfigs: The source to update $oconfigs with (from $phalf)
+# $which: The name of which half that is updating (top / bottom)
+# $type: The name of the source type (good / bad)
+sub make_half {
+    my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_;
+
+    my @half = @{$phalf};
+    my %orig_configs = %{$oconfigs};
+    my %source_configs = %{$sconfigs};
+
+    my %tmp_config = %orig_configs;
+
+    doprint "Settings bisect with $which half of $type configs:\n";
+    foreach my $item (@half) {
+	doprint "Updating $item to $source_configs{$item}\n";
+	$tmp_config{$item} = $source_configs{$item};
+    }
+
+    return %tmp_config;
+}
+
+sub run_config_bisect {
+    my ($pgood, $pbad) = @_;
+
+    my %good_configs = %{$pgood};
+    my %bad_configs = %{$pbad};
+
+    my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
+    my %b_configs = diff_configs \%good_configs, \%bad_configs;
+    my %g_configs = diff_configs \%bad_configs, \%good_configs;
+
+    # diff_arr is what is in both good and bad but are different (y->n)
+    my @diff_arr = keys %diff_configs;
+    my $len_diff = $#diff_arr + 1;
+
+    # b_arr is what is in bad but not in good (has depends)
+    my @b_arr = keys %b_configs;
+    my $len_b = $#b_arr + 1;
+
+    # g_arr is what is in good but not in bad
+    my @g_arr = keys %g_configs;
+    my $len_g = $#g_arr + 1;
+
+    my $runtest = 0;
+    my %new_configs;
+    my $ret;
+
+    # Look at the configs that are different between good and bad.
+    # This does not include those that depend on other configs
+    #  (configs depending on other configs that are not set would
+    #   not show up even as a "# CONFIG_FOO is not set"
+
+
+    doprint "# of configs to check:             $len_diff\n";
+    doprint "# of configs showing only in good: $len_g\n";
+    doprint "# of configs showing only in bad:  $len_b\n";
+
+    if ($len_diff > 0) {
+	# Now test for different values
+
+	doprint "Configs left to check:\n";
+	doprint "  Good Config\t\t\tBad Config\n";
+	doprint "  -----------\t\t\t----------\n";
+	foreach my $item (@diff_arr) {
+	    doprint "  $good_configs{$item}\t$bad_configs{$item}\n";
+	}
+
+	my $half = int($#diff_arr / 2);
+	my @tophalf = @diff_arr[0 .. $half];
+
+	doprint "Set tmp config to be good config with some bad config values\n";
+
+	my %tmp_config = make_half \@tophalf, \%good_configs,
+	    \%bad_configs, "top", "bad";
+
+	$runtest = process_new_config \%tmp_config, \%new_configs,
+			    \%good_configs, \%bad_configs;
+
+	if (!$runtest) {
+	    doprint "Set tmp config to be bad config with some good config values\n";
+
+	    my %tmp_config = make_half \@tophalf, \%bad_configs,
+		\%good_configs, "top", "good";
+
+	    $runtest = process_new_config \%tmp_config, \%new_configs,
+		\%good_configs, \%bad_configs;
+	}
+    }
+
+    if (!$runtest && $len_diff > 0) {
+	# do the same thing, but this time with bottom half
+
+	my $half = int($#diff_arr / 2);
+	my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
+
+	doprint "Set tmp config to be good config with some bad config values\n";
+
+	my %tmp_config = make_half \@bottomhalf, \%good_configs,
+	    \%bad_configs, "bottom", "bad";
+
+	$runtest = process_new_config \%tmp_config, \%new_configs,
+			    \%good_configs, \%bad_configs;
+
+	if (!$runtest) {
+	    doprint "Set tmp config to be bad config with some good config values\n";
+
+	    my %tmp_config = make_half \@bottomhalf, \%bad_configs,
+		\%good_configs, "bottom", "good";
+
+	    $runtest = process_new_config \%tmp_config, \%new_configs,
+		\%good_configs, \%bad_configs;
+	}
+    }
+
+    if ($runtest) {
+	make_oldconfig;
+	doprint "READY TO TEST .config IN $build\n";
+	return 0;
+    }
+
+    doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+    doprint "Hmm, can't make any more changes without making good == bad?\n";
+    doprint "Difference between good (+) and bad (-)\n";
+
+    foreach my $item (keys %bad_configs) {
+	if (!defined($good_configs{$item})) {
+	    print_config "-", $bad_configs{$item};
+	}
+    }
+
+    foreach my $item (keys %good_configs) {
+	next if (!defined($bad_configs{$item}));
+	if ($good_configs{$item} ne $bad_configs{$item}) {
+	    print_config_compare $good_configs{$item}, $bad_configs{$item};
+	}
+    }
+
+    foreach my $item (keys %good_configs) {
+	if (!defined($bad_configs{$item})) {
+	    print_config "+", $good_configs{$item};
+	}
+    }
+    return -1;
+}
+
+sub config_bisect {
+    my ($good_config, $bad_config) = @_;
+    my $ret;
+
+    my %good_configs;
+    my %bad_configs;
+    my %tmp_configs;
+
+    doprint "Run good configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $good_config;
+    create_config "$good_config", \%tmp_configs;
+    assign_configs \%good_configs, $output_config;
+
+    doprint "Run bad configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $bad_config;
+    create_config "$bad_config", \%tmp_configs;
+    assign_configs \%bad_configs, $output_config;
+
+    save_config \%good_configs, $good_config;
+    save_config \%bad_configs, $bad_config;
+
+    return run_config_bisect \%good_configs, \%bad_configs;
+}
+
+while ($#ARGV >= 0) {
+    if ($ARGV[0] !~ m/^-/) {
+	last;
+    }
+    my $opt = shift @ARGV;
+
+    if ($opt eq "-b") {
+	$val = shift @ARGV;
+	if (!defined($val)) {
+	    die "-b requires value\n";
+	}
+	$build = $val;
+    }
+
+    elsif ($opt eq "-l") {
+	$val = shift @ARGV;
+	if (!defined($val)) {
+	    die "-l requires value\n";
+	}
+	$tree = $val;
+    }
+
+    elsif ($opt eq "-r") {
+	$reset_bisect = 1;
+    }
+
+    elsif ($opt eq "-h") {
+	usage;
+    }
+
+    else {
+	die "Unknow option $opt\n";
+    }
+}
+
+$build = $tree if (!defined($build));
+
+$tree = expand_path $tree;
+$build = expand_path $build;
+
+if ( ! -d $tree ) {
+    die "$tree not a directory\n";
+}
+
+if ( ! -d $build ) {
+    die "$build not a directory\n";
+}
+
+usage if $#ARGV < 1;
+
+if ($#ARGV == 1) {
+    $start = 1;
+} elsif ($#ARGV == 2) {
+    $val = $ARGV[2];
+    if ($val ne "good" && $val ne "bad") {
+	die "Unknown command '$val', bust be either \"good\" or \"bad\"\n";
+    }
+} else {
+    usage;
+}
+
+my $good_start = expand_path $ARGV[0];
+my $bad_start = expand_path $ARGV[1];
+
+my $good = "$good_start.tmp";
+my $bad = "$bad_start.tmp";
+
+$make = "make";
+
+if ($build ne $tree) {
+    $make = "make O=$build"
+}
+
+$output_config = "$build/.config";
+
+if ($start) {
+    if ( ! -f $good_start ) {
+	die "$good_start not found\n";
+    }
+    if ( ! -f $bad_start ) {
+	die "$bad_start not found\n";
+    }
+    if ( -f $good || -f $bad ) {
+	my $p = "";
+
+	if ( -f $good ) {
+	    $p = "$good exists\n";
+	}
+
+	if ( -f $bad ) {
+	    $p = "$p$bad exists\n";
+	}
+
+	if (!defined($reset_bisect)) {
+	    if (!read_yn "${p}Overwrite and start new bisect anyway?") {
+		exit (-1);
+	    }
+	}
+    }
+    run_command "cp $good_start $good" or die "failed to copy to $good\n";
+    run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+} else {
+    if ( ! -f $good ) {
+	die "Can not find file $good\n";
+    }
+    if ( ! -f $bad ) {
+	die "Can not find file $bad\n";
+    }
+    if ($val eq "good") {
+	run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+    } elsif ($val eq "bad") {
+	run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+    }
+}
+
+chdir $tree || die "can't change directory to $tree";
+
+my $ret = config_bisect $good, $bad;
+
+if (!$ret) {
+    exit(0);
+}
+
+if ($ret > 0) {
+    doprint "Cleaning temp files\n";
+    run_command "rm $good";
+    run_command "rm $bad";
+    exit(1);
+} else {
+    doprint "See good and bad configs for details:\n";
+    doprint "good: $good\n";
+    doprint "bad:  $bad\n";
+    doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+}
+exit(2);
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 8809f244bb7c..87af8a68ab25 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -10,6 +10,7 @@ use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
 use File::Path qw(mkpath);
 use File::Copy qw(cp);
 use FileHandle;
+use FindBin;
 
 my $VERSION = "0.2";
 
@@ -22,6 +23,11 @@ my %evals;
 
 #default opts
 my %default = (
+    "MAILER"			=> "sendmail",  # default mailer
+    "EMAIL_ON_ERROR"		=> 1,
+    "EMAIL_WHEN_FINISHED"	=> 1,
+    "EMAIL_WHEN_CANCELED"	=> 0,
+    "EMAIL_WHEN_STARTED"	=> 0,
     "NUM_TESTS"			=> 1,
     "TEST_TYPE"			=> "build",
     "BUILD_TYPE"		=> "randconfig",
@@ -59,6 +65,7 @@ my %default = (
     "GRUB_REBOOT"		=> "grub2-reboot",
     "SYSLINUX"			=> "extlinux",
     "SYSLINUX_PATH"		=> "/boot/extlinux",
+    "CONNECT_TIMEOUT"		=> 25,
 
 # required, and we will ask users if they don't have them but we keep the default
 # value something that is common.
@@ -163,6 +170,8 @@ my $store_failures;
 my $store_successes;
 my $test_name;
 my $timeout;
+my $connect_timeout;
+my $config_bisect_exec;
 my $booted_timeout;
 my $detect_triplefault;
 my $console;
@@ -204,6 +213,20 @@ my $install_time;
 my $reboot_time;
 my $test_time;
 
+my $pwd;
+my $dirname = $FindBin::Bin;
+
+my $mailto;
+my $mailer;
+my $mail_path;
+my $mail_command;
+my $email_on_error;
+my $email_when_finished;
+my $email_when_started;
+my $email_when_canceled;
+
+my $script_start_time = localtime();
+
 # set when a test is something other that just building or install
 # which would require more options.
 my $buildonly = 1;
@@ -229,6 +252,14 @@ my $no_reboot = 1;
 my $reboot_success = 0;
 
 my %option_map = (
+    "MAILTO"			=> \$mailto,
+    "MAILER"			=> \$mailer,
+    "MAIL_PATH"			=> \$mail_path,
+    "MAIL_COMMAND"		=> \$mail_command,
+    "EMAIL_ON_ERROR"		=> \$email_on_error,
+    "EMAIL_WHEN_FINISHED"	=> \$email_when_finished,
+    "EMAIL_WHEN_STARTED"	=> \$email_when_started,
+    "EMAIL_WHEN_CANCELED"	=> \$email_when_canceled,
     "MACHINE"			=> \$machine,
     "SSH_USER"			=> \$ssh_user,
     "TMP_DIR"			=> \$tmpdir,
@@ -296,6 +327,8 @@ my %option_map = (
     "STORE_SUCCESSES"		=> \$store_successes,
     "TEST_NAME"			=> \$test_name,
     "TIMEOUT"			=> \$timeout,
+    "CONNECT_TIMEOUT"		=> \$connect_timeout,
+    "CONFIG_BISECT_EXEC"	=> \$config_bisect_exec,
     "BOOTED_TIMEOUT"		=> \$booted_timeout,
     "CONSOLE"			=> \$console,
     "CLOSE_CONSOLE_SIGNAL"	=> \$close_console_signal,
@@ -337,6 +370,7 @@ my %used_options;
 
 # default variables that can be used
 chomp ($variable{"PWD"} = `pwd`);
+$pwd = $variable{"PWD"};
 
 $config_help{"MACHINE"} = << "EOF"
  The machine hostname that you will test.
@@ -718,22 +752,14 @@ sub set_value {
 
     my $prvalue = process_variables($rvalue);
 
-    if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") {
+    if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
+	$prvalue !~ /^(config_|)bisect$/ &&
+	$prvalue !~ /^build$/ &&
+	$buildonly) {
+
 	# Note if a test is something other than build, then we
 	# will need other mandatory options.
 	if ($prvalue ne "install") {
-	    # for bisect, we need to check BISECT_TYPE
-	    if ($prvalue ne "bisect") {
-		$buildonly = 0;
-	    }
-	} else {
-	    # install still limits some mandatory options.
-	    $buildonly = 2;
-	}
-    }
-
-    if ($buildonly && $lvalue =~ /^BISECT_TYPE(\[.*\])?$/ && $prvalue ne "build") {
-	if ($prvalue ne "install") {
 	    $buildonly = 0;
 	} else {
 	    # install still limits some mandatory options.
@@ -1140,7 +1166,8 @@ sub __read_config {
 sub get_test_case {
 	print "What test case would you like to run?\n";
 	print " (build, install or boot)\n";
-	print " Other tests are available but require editing the config file\n";
+	print " Other tests are available but require editing ktest.conf\n";
+	print " (see tools/testing/ktest/sample.conf)\n";
 	my $ans = <STDIN>;
 	chomp $ans;
 	$default{"TEST_TYPE"} = $ans;
@@ -1328,8 +1355,8 @@ sub reboot {
     my ($time) = @_;
     my $powercycle = 0;
 
-    # test if the machine can be connected to within 5 seconds
-    my $stat = run_ssh("echo check machine status", 5);
+    # test if the machine can be connected to within a few seconds
+    my $stat = run_ssh("echo check machine status", $connect_timeout);
     if (!$stat) {
 	doprint("power cycle\n");
 	$powercycle = 1;
@@ -1404,10 +1431,18 @@ sub do_not_reboot {
 
     return $test_type eq "build" || $no_reboot ||
 	($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
-	($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
+	($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") ||
+	($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
 }
 
+my $in_die = 0;
+
 sub dodie {
+
+    # avoid recusion
+    return if ($in_die);
+    $in_die = 1;
+
     doprint "CRITICAL FAILURE... ", @_, "\n";
 
     my $i = $iteration;
@@ -1426,6 +1461,11 @@ sub dodie {
 	print " See $opt{LOG_FILE} for more info.\n";
     }
 
+    if ($email_on_error) {
+        send_email("KTEST: critical failure for your [$test_type] test",
+                "Your test started at $script_start_time has failed with:\n@_\n");
+    }
+
     if ($monitor_cnt) {
 	    # restore terminal settings
 	    system("stty $stty_orig");
@@ -1477,7 +1517,7 @@ sub exec_console {
     close($pts);
 
     exec $console or
-	die "Can't open console $console";
+	dodie "Can't open console $console";
 }
 
 sub open_console {
@@ -1515,6 +1555,9 @@ sub close_console {
     doprint "kill child process $pid\n";
     kill $close_console_signal, $pid;
 
+    doprint "wait for child process $pid to exit\n";
+    waitpid($pid, 0);
+
     print "closing!\n";
     close($fp);
 
@@ -1625,7 +1668,7 @@ sub save_logs {
 
 	if (!-d $dir) {
 	    mkpath($dir) or
-		die "can't create $dir";
+		dodie "can't create $dir";
 	}
 
 	my %files = (
@@ -1638,7 +1681,7 @@ sub save_logs {
 	while (my ($name, $source) = each(%files)) {
 		if (-f "$source") {
 			cp "$source", "$dir/$name" or
-				die "failed to copy $source";
+				dodie "failed to copy $source";
 		}
 	}
 
@@ -1692,6 +1735,7 @@ sub run_command {
     my $end_time;
     my $dolog = 0;
     my $dord = 0;
+    my $dostdout = 0;
     my $pid;
 
     $command =~ s/\$SSH_USER/$ssh_user/g;
@@ -1710,9 +1754,15 @@ sub run_command {
     }
 
     if (defined($redirect)) {
-	open (RD, ">$redirect") or
-	    dodie "failed to write to redirect $redirect";
-	$dord = 1;
+	if ($redirect eq 1) {
+	    $dostdout = 1;
+	    # Have the output of the command on its own line
+	    doprint "\n";
+	} else {
+	    open (RD, ">$redirect") or
+		dodie "failed to write to redirect $redirect";
+	    $dord = 1;
+	}
     }
 
     my $hit_timeout = 0;
@@ -1734,6 +1784,7 @@ sub run_command {
 	}
 	print LOG $line if ($dolog);
 	print RD $line if ($dord);
+	print $line if ($dostdout);
     }
 
     waitpid($pid, 0);
@@ -1812,7 +1863,7 @@ sub get_grub2_index {
     $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g;
 
     open(IN, "$ssh_grub |")
-	or die "unable to get $grub_file";
+	or dodie "unable to get $grub_file";
 
     my $found = 0;
 
@@ -1821,13 +1872,13 @@ sub get_grub2_index {
 	    $grub_number++;
 	    $found = 1;
 	    last;
-	} elsif (/^menuentry\s/) {
+	} elsif (/^menuentry\s|^submenu\s/) {
 	    $grub_number++;
 	}
     }
     close(IN);
 
-    die "Could not find '$grub_menu' in $grub_file on $machine"
+    dodie "Could not find '$grub_menu' in $grub_file on $machine"
 	if (!$found);
     doprint "$grub_number\n";
     $last_grub_menu = $grub_menu;
@@ -1855,7 +1906,7 @@ sub get_grub_index {
     $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
 
     open(IN, "$ssh_grub |")
-	or die "unable to get menu.lst";
+	or dodie "unable to get menu.lst";
 
     my $found = 0;
 
@@ -1870,7 +1921,7 @@ sub get_grub_index {
     }
     close(IN);
 
-    die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+    dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine"
 	if (!$found);
     doprint "$grub_number\n";
     $last_grub_menu = $grub_menu;
@@ -1983,7 +2034,7 @@ sub monitor {
     my $full_line = "";
 
     open(DMESG, "> $dmesg") or
-	die "unable to write to $dmesg";
+	dodie "unable to write to $dmesg";
 
     reboot_to;
 
@@ -2862,7 +2913,7 @@ sub run_bisect {
 sub update_bisect_replay {
     my $tmp_log = "$tmpdir/ktest_bisect_log";
     run_command "git bisect log > $tmp_log" or
-	die "can't create bisect log";
+	dodie "can't create bisect log";
     return $tmp_log;
 }
 
@@ -2871,9 +2922,9 @@ sub bisect {
 
     my $result;
 
-    die "BISECT_GOOD[$i] not defined\n"	if (!defined($bisect_good));
-    die "BISECT_BAD[$i] not defined\n"	if (!defined($bisect_bad));
-    die "BISECT_TYPE[$i] not defined\n"	if (!defined($bisect_type));
+    dodie "BISECT_GOOD[$i] not defined\n"	if (!defined($bisect_good));
+    dodie "BISECT_BAD[$i] not defined\n"	if (!defined($bisect_bad));
+    dodie "BISECT_TYPE[$i] not defined\n"	if (!defined($bisect_type));
 
     my $good = $bisect_good;
     my $bad = $bisect_bad;
@@ -2936,7 +2987,7 @@ sub bisect {
 	if ($check ne "good") {
 	    doprint "TESTING BISECT BAD [$bad]\n";
 	    run_command "git checkout $bad" or
-		die "Failed to checkout $bad";
+		dodie "Failed to checkout $bad";
 
 	    $result = run_bisect $type;
 
@@ -2948,7 +2999,7 @@ sub bisect {
 	if ($check ne "bad") {
 	    doprint "TESTING BISECT GOOD [$good]\n";
 	    run_command "git checkout $good" or
-		die "Failed to checkout $good";
+		dodie "Failed to checkout $good";
 
 	    $result = run_bisect $type;
 
@@ -2959,7 +3010,7 @@ sub bisect {
 
 	# checkout where we started
 	run_command "git checkout $head" or
-	    die "Failed to checkout $head";
+	    dodie "Failed to checkout $head";
     }
 
     run_command "git bisect start$start_files" or
@@ -3092,76 +3143,6 @@ sub create_config {
     make_oldconfig;
 }
 
-# compare two config hashes, and return configs with different vals.
-# It returns B's config values, but you can use A to see what A was.
-sub diff_config_vals {
-    my ($pa, $pb) = @_;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    my %ret;
-
-    foreach my $item (keys %a) {
-	if (defined($b{$item}) && $b{$item} ne $a{$item}) {
-	    $ret{$item} = $b{$item};
-	}
-    }
-
-    return %ret;
-}
-
-# compare two config hashes and return the configs in B but not A
-sub diff_configs {
-    my ($pa, $pb) = @_;
-
-    my %ret;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    foreach my $item (keys %b) {
-	if (!defined($a{$item})) {
-	    $ret{$item} = $b{$item};
-	}
-    }
-
-    return %ret;
-}
-
-# return if two configs are equal or not
-# 0 is equal +1 b has something a does not
-# +1 if a and b have a different item.
-# -1 if a has something b does not
-sub compare_configs {
-    my ($pa, $pb) = @_;
-
-    my %ret;
-
-    # crappy Perl way to pass in hashes.
-    my %a = %{$pa};
-    my %b = %{$pb};
-
-    foreach my $item (keys %b) {
-	if (!defined($a{$item})) {
-	    return 1;
-	}
-	if ($a{$item} ne $b{$item}) {
-	    return 1;
-	}
-    }
-
-    foreach my $item (keys %a) {
-	if (!defined($b{$item})) {
-	    return -1;
-	}
-    }
-
-    return 0;
-}
-
 sub run_config_bisect_test {
     my ($type) = @_;
 
@@ -3174,166 +3155,57 @@ sub run_config_bisect_test {
     return $ret;
 }
 
-sub process_failed {
-    my ($config) = @_;
+sub config_bisect_end {
+    my ($good, $bad) = @_;
+    my $diffexec = "diff -u";
 
+    if (-f "$builddir/scripts/diffconfig") {
+	$diffexec = "$builddir/scripts/diffconfig";
+    }
     doprint "\n\n***************************************\n";
-    doprint "Found bad config: $config\n";
+    doprint "No more config bisecting possible.\n";
+    run_command "$diffexec $good $bad", 1;
     doprint "***************************************\n\n";
 }
 
-# used for config bisecting
-my $good_config;
-my $bad_config;
-
-sub process_new_config {
-    my ($tc, $nc, $gc, $bc) = @_;
-
-    my %tmp_config = %{$tc};
-    my %good_configs = %{$gc};
-    my %bad_configs = %{$bc};
-
-    my %new_configs;
-
-    my $runtest = 1;
-    my $ret;
-
-    create_config "tmp_configs", \%tmp_config;
-    assign_configs \%new_configs, $output_config;
-
-    $ret = compare_configs \%new_configs, \%bad_configs;
-    if (!$ret) {
-	doprint "New config equals bad config, try next test\n";
-	$runtest = 0;
-    }
-
-    if ($runtest) {
-	$ret = compare_configs \%new_configs, \%good_configs;
-	if (!$ret) {
-	    doprint "New config equals good config, try next test\n";
-	    $runtest = 0;
-	}
-    }
-
-    %{$nc} = %new_configs;
-
-    return $runtest;
-}
-
 sub run_config_bisect {
-    my ($pgood, $pbad) = @_;
-
-    my $type = $config_bisect_type;
-
-    my %good_configs = %{$pgood};
-    my %bad_configs = %{$pbad};
-
-    my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
-    my %b_configs = diff_configs \%good_configs, \%bad_configs;
-    my %g_configs = diff_configs \%bad_configs, \%good_configs;
-
-    my @diff_arr = keys %diff_configs;
-    my $len_diff = $#diff_arr + 1;
-
-    my @b_arr = keys %b_configs;
-    my $len_b = $#b_arr + 1;
-
-    my @g_arr = keys %g_configs;
-    my $len_g = $#g_arr + 1;
-
-    my $runtest = 1;
-    my %new_configs;
+    my ($good, $bad, $last_result) = @_;
+    my $reset = "";
+    my $cmd;
     my $ret;
 
-    # First, lets get it down to a single subset.
-    # Is the problem with a difference in values?
-    # Is the problem with a missing config?
-    # Is the problem with a config that breaks things?
-
-    # Enable all of one set and see if we get a new bad
-    # or good config.
-
-    # first set the good config to the bad values.
-
-    doprint "d=$len_diff g=$len_g b=$len_b\n";
-
-    # first lets enable things in bad config that are enabled in good config
-
-    if ($len_diff > 0) {
-	if ($len_b > 0 || $len_g > 0) {
-	    my %tmp_config = %bad_configs;
-
-	    doprint "Set tmp config to be bad config with good config values\n";
-	    foreach my $item (@diff_arr) {
-		$tmp_config{$item} = $good_configs{$item};
-	    }
-
-	    $runtest = process_new_config \%tmp_config, \%new_configs,
-			    \%good_configs, \%bad_configs;
-	}
+    if (!length($last_result)) {
+	$reset = "-r";
     }
+    run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1;
 
-    if (!$runtest && $len_diff > 0) {
-
-	if ($len_diff == 1) {
-	    process_failed $diff_arr[0];
-	    return 1;
-	}
-	my %tmp_config = %bad_configs;
-
-	my $half = int($#diff_arr / 2);
-	my @tophalf = @diff_arr[0 .. $half];
-
-	doprint "Settings bisect with top half:\n";
-	doprint "Set tmp config to be bad config with some good config values\n";
-	foreach my $item (@tophalf) {
-	    $tmp_config{$item} = $good_configs{$item};
-	}
-
-	$runtest = process_new_config \%tmp_config, \%new_configs,
-			    \%good_configs, \%bad_configs;
-
-	if (!$runtest) {
-	    my %tmp_config = %bad_configs;
-
-	    doprint "Try bottom half\n";
-
-	    my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
-
-	    foreach my $item (@bottomhalf) {
-		$tmp_config{$item} = $good_configs{$item};
-	    }
-
-	    $runtest = process_new_config \%tmp_config, \%new_configs,
-			    \%good_configs, \%bad_configs;
-	}
+    # config-bisect returns:
+    #   0 if there is more to bisect
+    #   1 for finding a good config
+    #   2 if it can not find any more configs
+    #  -1 (255) on error
+    if ($run_command_status) {
+	return $run_command_status;
     }
 
-    if ($runtest) {
-	$ret = run_config_bisect_test $type;
-	if ($ret) {
-	    doprint "NEW GOOD CONFIG\n";
-	    %good_configs = %new_configs;
-	    run_command "mv $good_config ${good_config}.last";
-	    save_config \%good_configs, $good_config;
-	    %{$pgood} = %good_configs;
-	} else {
-	    doprint "NEW BAD CONFIG\n";
-	    %bad_configs = %new_configs;
-	    run_command "mv $bad_config ${bad_config}.last";
-	    save_config \%bad_configs, $bad_config;
-	    %{$pbad} = %bad_configs;
-	}
-	return 0;
+    $ret = run_config_bisect_test $config_bisect_type;
+    if ($ret) {
+        doprint "NEW GOOD CONFIG\n";
+	# Return 3 for good config
+	return 3;
+    } else {
+        doprint "NEW BAD CONFIG\n";
+	# Return 4 for bad config
+	return 4;
     }
-
-    fail "Hmm, need to do a mix match?\n";
-    return -1;
 }
 
 sub config_bisect {
     my ($i) = @_;
 
+    my $good_config;
+    my $bad_config;
+
     my $type = $config_bisect_type;
     my $ret;
 
@@ -3353,6 +3225,24 @@ sub config_bisect {
 	$good_config = $output_config;
     }
 
+    if (!defined($config_bisect_exec)) {
+	# First check the location that ktest.pl ran
+	my @locations = ( "$pwd/config-bisect.pl",
+			  "$dirname/config-bisect.pl",
+			  "$builddir/tools/testing/ktest/config-bisect.pl",
+			  undef );
+	foreach my $loc (@locations) {
+	    doprint "loc = $loc\n";
+	    $config_bisect_exec = $loc;
+	    last if (defined($config_bisect_exec && -x $config_bisect_exec));
+	}
+	if (!defined($config_bisect_exec)) {
+	    fail "Could not find an executable config-bisect.pl\n",
+		"  Set CONFIG_BISECT_EXEC to point to config-bisect.pl";
+	    return 1;
+	}
+    }
+
     # we don't want min configs to cause issues here.
     doprint "Disabling 'MIN_CONFIG' for this test\n";
     undef $minconfig;
@@ -3361,21 +3251,31 @@ sub config_bisect {
     my %bad_configs;
     my %tmp_configs;
 
+    if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") {
+	if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") {
+	    if (-f "$tmpdir/good_config.tmp") {
+		$good_config = "$tmpdir/good_config.tmp";
+	    } else {
+		$good_config = "$tmpdir/good_config";
+	    }
+	    if (-f "$tmpdir/bad_config.tmp") {
+		$bad_config = "$tmpdir/bad_config.tmp";
+	    } else {
+		$bad_config = "$tmpdir/bad_config";
+	    }
+	}
+    }
     doprint "Run good configs through make oldconfig\n";
     assign_configs \%tmp_configs, $good_config;
     create_config "$good_config", \%tmp_configs;
-    assign_configs \%good_configs, $output_config;
+    $good_config = "$tmpdir/good_config";
+    system("cp $output_config $good_config") == 0 or dodie "cp good config";
 
     doprint "Run bad configs through make oldconfig\n";
     assign_configs \%tmp_configs, $bad_config;
     create_config "$bad_config", \%tmp_configs;
-    assign_configs \%bad_configs, $output_config;
-
-    $good_config = "$tmpdir/good_config";
     $bad_config = "$tmpdir/bad_config";
-
-    save_config \%good_configs, $good_config;
-    save_config \%bad_configs, $bad_config;
+    system("cp $output_config $bad_config") == 0 or dodie "cp bad config";
 
     if (defined($config_bisect_check) && $config_bisect_check ne "0") {
 	if ($config_bisect_check ne "good") {
@@ -3398,10 +3298,21 @@ sub config_bisect {
 	}
     }
 
+    my $last_run = "";
+
     do {
-	$ret = run_config_bisect \%good_configs, \%bad_configs;
+	$ret = run_config_bisect $good_config, $bad_config, $last_run;
+	if ($ret == 3) {
+	    $last_run = "good";
+	} elsif ($ret == 4) {
+	    $last_run = "bad";
+	}
 	print_times;
-    } while (!$ret);
+    } while ($ret == 3 || $ret == 4);
+
+    if ($ret == 2) {
+        config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+    }
 
     return $ret if ($ret < 0);
 
@@ -3416,9 +3327,9 @@ sub patchcheck_reboot {
 sub patchcheck {
     my ($i) = @_;
 
-    die "PATCHCHECK_START[$i] not defined\n"
+    dodie "PATCHCHECK_START[$i] not defined\n"
 	if (!defined($patchcheck_start));
-    die "PATCHCHECK_TYPE[$i] not defined\n"
+    dodie "PATCHCHECK_TYPE[$i] not defined\n"
 	if (!defined($patchcheck_type));
 
     my $start = $patchcheck_start;
@@ -3432,7 +3343,7 @@ sub patchcheck {
     if (defined($patchcheck_end)) {
 	$end = $patchcheck_end;
     } elsif ($cherry) {
-	die "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
+	dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
     }
 
     # Get the true sha1's since we can use things like HEAD~3
@@ -3496,7 +3407,7 @@ sub patchcheck {
 	doprint "\nProcessing commit \"$item\"\n\n";
 
 	run_command "git checkout $sha1" or
-	    die "Failed to checkout $sha1";
+	    dodie "Failed to checkout $sha1";
 
 	# only clean on the first and last patch
 	if ($item eq $list[0] ||
@@ -3587,7 +3498,7 @@ sub read_kconfig {
     }
 
     open(KIN, "$kconfig")
-	or die "Can't open $kconfig";
+	or dodie "Can't open $kconfig";
     while (<KIN>) {
 	chomp;
 
@@ -3746,7 +3657,7 @@ sub get_depends {
 
 	    $dep =~ s/^[^$valid]*[$valid]+//;
 	} else {
-	    die "this should never happen";
+	    dodie "this should never happen";
 	}
     }
 
@@ -4007,7 +3918,7 @@ sub make_min_config {
 	    # update new ignore configs
 	    if (defined($ignore_config)) {
 		open (OUT, ">$temp_config")
-		    or die "Can't write to $temp_config";
+		    or dodie "Can't write to $temp_config";
 		foreach my $config (keys %save_configs) {
 		    print OUT "$save_configs{$config}\n";
 		}
@@ -4035,7 +3946,7 @@ sub make_min_config {
 
 	    # Save off all the current mandatory configs
 	    open (OUT, ">$temp_config")
-		or die "Can't write to $temp_config";
+		or dodie "Can't write to $temp_config";
 	    foreach my $config (keys %keep_configs) {
 		print OUT "$keep_configs{$config}\n";
 	    }
@@ -4222,6 +4133,74 @@ sub set_test_option {
     return eval_option($name, $option, $i);
 }
 
+sub find_mailer {
+    my ($mailer) = @_;
+
+    my @paths = split /:/, $ENV{PATH};
+
+    # sendmail is usually in /usr/sbin
+    $paths[$#paths + 1] = "/usr/sbin";
+
+    foreach my $path (@paths) {
+	if (-x "$path/$mailer") {
+	    return $path;
+	}
+    }
+
+    return undef;
+}
+
+sub do_send_mail {
+    my ($subject, $message) = @_;
+
+    if (!defined($mail_path)) {
+	# find the mailer
+	$mail_path = find_mailer $mailer;
+	if (!defined($mail_path)) {
+	    die "\nCan not find $mailer in PATH\n";
+	}
+    }
+
+    if (!defined($mail_command)) {
+	if ($mailer eq "mail" || $mailer eq "mailx") {
+	    $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+	} elsif ($mailer eq "sendmail" ) {
+	    $mail_command =  "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+	} else {
+	    die "\nYour mailer: $mailer is not supported.\n";
+	}
+    }
+
+    $mail_command =~ s/\$MAILER/$mailer/g;
+    $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
+    $mail_command =~ s/\$MAILTO/$mailto/g;
+    $mail_command =~ s/\$SUBJECT/$subject/g;
+    $mail_command =~ s/\$MESSAGE/$message/g;
+
+    run_command $mail_command;
+}
+
+sub send_email {
+
+    if (defined($mailto)) {
+	if (!defined($mailer)) {
+	    doprint "No email sent: email or mailer not specified in config.\n";
+	    return;
+	}
+	do_send_mail @_;
+    }
+}
+
+sub cancel_test {
+    if ($email_when_canceled) {
+        send_email("KTEST: Your [$test_type] test was cancelled",
+                "Your test started at $script_start_time was cancelled: sig int");
+    }
+    die "\nCaught Sig Int, test interrupted: $!\n"
+}
+
+$SIG{INT} = qw(cancel_test);
+
 # First we need to do is the builds
 for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
@@ -4245,11 +4224,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
     $outputdir = set_test_option("OUTPUT_DIR", $i);
     $builddir = set_test_option("BUILD_DIR", $i);
 
-    chdir $builddir || die "can't change directory to $builddir";
+    chdir $builddir || dodie "can't change directory to $builddir";
 
     if (!-d $outputdir) {
 	mkpath($outputdir) or
-	    die "can't create $outputdir";
+	    dodie "can't create $outputdir";
     }
 
     $make = "$makecmd O=$outputdir";
@@ -4262,9 +4241,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
     $start_minconfig_defined = 1;
 
     # The first test may override the PRE_KTEST option
-    if (defined($pre_ktest) && $i == 1) {
-	doprint "\n";
-	run_command $pre_ktest;
+    if ($i == 1) {
+        if (defined($pre_ktest)) {
+            doprint "\n";
+            run_command $pre_ktest;
+        }
+        if ($email_when_started) {
+            send_email("KTEST: Your [$test_type] test was started",
+                "Your test was started on $script_start_time");
+        }
     }
 
     # Any test can override the POST_KTEST option
@@ -4280,7 +4265,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
     if (!-d $tmpdir) {
 	mkpath($tmpdir) or
-	    die "can't create $tmpdir";
+	    dodie "can't create $tmpdir";
     }
 
     $ENV{"SSH_USER"} = $ssh_user;
@@ -4353,7 +4338,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
 
     if (defined($checkout)) {
 	run_command "git checkout $checkout" or
-	    die "failed to checkout $checkout";
+	    dodie "failed to checkout $checkout";
     }
 
     $no_reboot = 0;
@@ -4428,4 +4413,8 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) {
 
 doprint "\n    $successes of $opt{NUM_TESTS} tests were successful\n\n";
 
+if ($email_when_finished) {
+    send_email("KTEST: Your [$test_type] test has finished!",
+            "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+}
 exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 6c58cd8bbbae..6ca6ca0ce695 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -1,6 +1,11 @@
 #
 # Config file for ktest.pl
 #
+# Place your customized version of this, in the working directory that
+# ktest.pl is run from. By default, ktest.pl will look for a file
+# called "ktest.conf", but you can name it anything you like and specify
+# the name of your config file as the first argument of ktest.pl.
+#
 # Note, all paths must be absolute
 #
 
@@ -396,6 +401,44 @@
 
 #### Optional Config Options (all have defaults) ####
 
+# Email options for receiving notifications. Users must setup
+# the specified mailer prior to using this feature.
+#
+# (default undefined)
+#MAILTO =
+#
+# Supported mailers: sendmail, mail, mailx
+# (default sendmail)
+#MAILER = sendmail
+#
+# The executable to run
+# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER})
+#MAIL_EXEC = /usr/sbin/sendmail
+#
+# The command used to send mail, which uses the above options
+# can be modified. By default if the mailer is "sendmail" then
+#  MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+# For mail or mailx:
+#  MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\'
+# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time
+#    it sends the mail if "$FOO" format is used. If "${FOO}" format is used,
+#    then the substitutions will occur at the time the config file is read.
+#    But note, MAIL_PATH and MAILER require being set by the config file if
+#     ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are.
+#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+#
+# Errors are defined as those would terminate the script
+# (default 1)
+#EMAIL_ON_ERROR = 1
+# (default 1)
+#EMAIL_WHEN_FINISHED = 1
+# (default 0)
+#EMAIL_WHEN_STARTED = 1
+#
+# Users can cancel the test by Ctrl^C
+# (default 0)
+#EMAIL_WHEN_CANCELED = 1
+
 # Start a test setup. If you leave this off, all options
 # will be default and the test will run once.
 # This is a label and not really an option (it takes no value).
@@ -725,6 +768,13 @@
 # (default 120)
 #TIMEOUT = 120
 
+# The timeout in seconds when to test if the box can be rebooted
+# or not. Before issuing the reboot command, a ssh connection
+# is attempted to see if the target machine is still active.
+# If the target does not connect within this timeout, a power cycle
+# is issued instead of a reboot.
+# CONNECT_TIMEOUT = 25
+
 # In between tests, a reboot of the box may occur, and this
 # is the time to wait for the console after it stops producing
 # output. Some machines may not produce a large lag on reboot
@@ -1167,6 +1217,16 @@
 #  Set it to "good" to test only the good config and set it
 #  to "bad" to only test the bad config.
 #
+# CONFIG_BISECT_EXEC (optional)
+#  The config bisect is a separate program that comes with ktest.pl.
+#  By befault, it will look for:
+#    `pwd`/config-bisect.pl # the location ktest.pl was executed from.
+#  If it does not find it there, it will look for:
+#    `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
+#  If it does not find it there, it will look for:
+#    ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl
+#  Setting CONFIG_BISECT_EXEC will override where it looks.
+#
 # Example:
 #   TEST_START
 #   TEST_TYPE = config_bisect
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 620fa78b3b1b..cb166be4918d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -104,7 +104,8 @@ enum {
 	NUM_HINTS = 8,
 	NUM_BDW = NUM_DCR,
 	NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
-	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+		+ 4 /* spa1 iset */ + 1 /* spa11 iset */,
 	DIMM_SIZE = SZ_32M,
 	LABEL_SIZE = SZ_128K,
 	SPA_VCD_SIZE = SZ_4M,
@@ -153,6 +154,7 @@ struct nfit_test {
 	void *nfit_buf;
 	dma_addr_t nfit_dma;
 	size_t nfit_size;
+	size_t nfit_filled;
 	int dcr_idx;
 	int num_dcr;
 	int num_pm;
@@ -709,7 +711,9 @@ static void smart_notify(struct device *bus_dev,
 				>= thresh->media_temperature)
 			|| ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
 				&& smart->ctrl_temperature
-				>= thresh->ctrl_temperature)) {
+				>= thresh->ctrl_temperature)
+			|| (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+			|| (smart->shutdown_state != 0)) {
 		device_lock(bus_dev);
 		__acpi_nvdimm_notify(dimm_dev, 0x81);
 		device_unlock(bus_dev);
@@ -735,6 +739,32 @@ static int nfit_test_cmd_smart_set_threshold(
 	return 0;
 }
 
+static int nfit_test_cmd_smart_inject(
+		struct nd_intel_smart_inject *inj,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *thresh,
+		struct nd_intel_smart *smart,
+		struct device *bus_dev, struct device *dimm_dev)
+{
+	if (buf_len != sizeof(*inj))
+		return -EINVAL;
+
+	if (inj->mtemp_enable)
+		smart->media_temperature = inj->media_temperature;
+	if (inj->spare_enable)
+		smart->spares = inj->spares;
+	if (inj->fatal_enable)
+		smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+	if (inj->unsafe_shutdown_enable) {
+		smart->shutdown_state = 1;
+		smart->shutdown_count++;
+	}
+	inj->status = 0;
+	smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+	return 0;
+}
+
 static void uc_error_notify(struct work_struct *work)
 {
 	struct nfit_test *t = container_of(work, typeof(*t), work);
@@ -935,6 +965,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 							t->dcr_idx],
 						&t->smart[i - t->dcr_idx],
 						&t->pdev.dev, t->dimm_dev[i]);
+			case ND_INTEL_SMART_INJECT:
+				return nfit_test_cmd_smart_inject(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx],
+						&t->smart[i - t->dcr_idx],
+						&t->pdev.dev, t->dimm_dev[i]);
 			default:
 				return -ENOTTY;
 			}
@@ -1222,7 +1259,7 @@ static void smart_init(struct nfit_test *t)
 			| ND_INTEL_SMART_MTEMP_VALID,
 		.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
 		.media_temperature = 23 * 16,
-		.ctrl_temperature = 30 * 16,
+		.ctrl_temperature = 25 * 16,
 		.pmic_temperature = 40 * 16,
 		.spares = 75,
 		.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
@@ -1366,7 +1403,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_flush_address *flush;
 	struct acpi_nfit_capabilities *pcap;
-	unsigned int offset, i;
+	unsigned int offset = 0, i;
 
 	/*
 	 * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -1380,93 +1417,102 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
 	spa->length = SPA0_SIZE;
+	offset += spa->header.length;
 
 	/*
 	 * spa1 (interleave last half of the 4 DIMMS, note storage
 	 * does not actually alias the related block-data-window
 	 * regions)
 	 */
-	spa = nfit_buf + sizeof(*spa);
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 1+1;
 	spa->address = t->spa_set_dma[1];
 	spa->length = SPA1_SIZE;
+	offset += spa->header.length;
 
 	/* spa2 (dcr0) dimm0 */
-	spa = nfit_buf + sizeof(*spa) * 2;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 2+1;
 	spa->address = t->dcr_dma[0];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa3 (dcr1) dimm1 */
-	spa = nfit_buf + sizeof(*spa) * 3;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 3+1;
 	spa->address = t->dcr_dma[1];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa4 (dcr2) dimm2 */
-	spa = nfit_buf + sizeof(*spa) * 4;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 4+1;
 	spa->address = t->dcr_dma[2];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa5 (dcr3) dimm3 */
-	spa = nfit_buf + sizeof(*spa) * 5;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 5+1;
 	spa->address = t->dcr_dma[3];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa6 (bdw for dcr0) dimm0 */
-	spa = nfit_buf + sizeof(*spa) * 6;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 6+1;
 	spa->address = t->dimm_dma[0];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa7 (bdw for dcr1) dimm1 */
-	spa = nfit_buf + sizeof(*spa) * 7;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 7+1;
 	spa->address = t->dimm_dma[1];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa8 (bdw for dcr2) dimm2 */
-	spa = nfit_buf + sizeof(*spa) * 8;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 8+1;
 	spa->address = t->dimm_dma[2];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa9 (bdw for dcr3) dimm3 */
-	spa = nfit_buf + sizeof(*spa) * 9;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 9+1;
 	spa->address = t->dimm_dma[3];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
-	offset = sizeof(*spa) * 10;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1481,9 +1527,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
+	offset += memdev->header.length;
 
 	/* mem-region1 (spa0, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map);
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1497,9 +1544,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region2 (spa1, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1513,9 +1561,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region3 (spa1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1528,9 +1577,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
 
 	/* mem-region4 (spa1, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1544,9 +1594,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region5 (spa1, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1559,9 +1610,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
 
 	/* mem-region6 (spa/dcr0, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1574,9 +1626,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region7 (spa/dcr1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1589,9 +1642,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region8 (spa/dcr2, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1604,9 +1658,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region9 (spa/dcr3, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1619,9 +1674,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region10 (spa/bdw0, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1634,9 +1690,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region11 (spa/bdw1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1649,9 +1706,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region12 (spa/bdw2, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1664,9 +1722,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region13 (spa/dcr3, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1680,12 +1739,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
 	/* dcr-descriptor0: blk */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 0+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[0];
@@ -1696,11 +1755,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor1: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 1+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[1];
@@ -1711,11 +1771,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor2: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 2+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[2];
@@ -1726,11 +1787,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor3: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 3+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[3];
@@ -1741,8 +1803,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
 	/* dcr-descriptor0: pmem */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1753,10 +1815,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[0];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor1: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size);
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1765,10 +1827,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[1];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor2: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 2;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1777,10 +1839,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[2];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor3: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 3;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1789,54 +1851,56 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[3];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
-	offset = offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 4;
 	/* bdw0 (spa/dcr0, dimm0) */
 	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 0+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw1 (spa/dcr1, dimm1) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region);
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 1+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw2 (spa/dcr2, dimm2) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2;
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 2+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw3 (spa/dcr3, dimm3) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3;
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 3+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
 	/* flush0 (dimm0) */
 	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -1845,48 +1909,52 @@ static void nfit_test0_setup(struct nfit_test *t)
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush1 (dimm1) */
-	flush = nfit_buf + offset + flush_hint_size * 1;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[1];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush2 (dimm2) */
-	flush = nfit_buf + offset + flush_hint_size  * 2;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[2];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush3 (dimm3) */
-	flush = nfit_buf + offset + flush_hint_size * 3;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[3];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* platform capabilities */
-	pcap = nfit_buf + offset + flush_hint_size * 4;
+	pcap = nfit_buf + offset;
 	pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
 	pcap->header.length = sizeof(*pcap);
 	pcap->highest_capability = 1;
 	pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
 		ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+	offset += pcap->header.length;
 
 	if (t->setup_hotplug) {
-		offset = offset + flush_hint_size * 4 + sizeof(*pcap);
 		/* dcr-descriptor4: blk */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-		dcr->header.length = sizeof(struct acpi_nfit_control_region);
+		dcr->header.length = sizeof(*dcr);
 		dcr->region_index = 8+1;
 		dcr_common_init(dcr);
 		dcr->serial_number = ~handle[4];
@@ -1897,8 +1965,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 		dcr->command_size = 8;
 		dcr->status_offset = 8;
 		dcr->status_size = 4;
+		offset += dcr->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_control_region);
 		/* dcr-descriptor4: pmem */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1909,21 +1977,20 @@ static void nfit_test0_setup(struct nfit_test *t)
 		dcr->serial_number = ~handle[4];
 		dcr->code = NFIT_FIC_BYTEN;
 		dcr->windows = 0;
+		offset += dcr->header.length;
 
-		offset = offset + offsetof(struct acpi_nfit_control_region,
-				window_size);
 		/* bdw4 (spa/dcr4, dimm4) */
 		bdw = nfit_buf + offset;
 		bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-		bdw->header.length = sizeof(struct acpi_nfit_data_region);
+		bdw->header.length = sizeof(*bdw);
 		bdw->region_index = 8+1;
 		bdw->windows = 1;
 		bdw->offset = 0;
 		bdw->size = BDW_SIZE;
 		bdw->capacity = DIMM_SIZE;
 		bdw->start_address = 0;
+		offset += bdw->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_data_region);
 		/* spa10 (dcr4) dimm4 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
@@ -1932,30 +1999,32 @@ static void nfit_test0_setup(struct nfit_test *t)
 		spa->range_index = 10+1;
 		spa->address = t->dcr_dma[4];
 		spa->length = DCR_SIZE;
+		offset += spa->header.length;
 
 		/*
 		 * spa11 (single-dimm interleave for hotplug, note storage
 		 * does not actually alias the related block-data-window
 		 * regions)
 		 */
-		spa = nfit_buf + offset + sizeof(*spa);
+		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 		spa->header.length = sizeof(*spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 		spa->range_index = 11+1;
 		spa->address = t->spa_set_dma[2];
 		spa->length = SPA0_SIZE;
+		offset += spa->header.length;
 
 		/* spa12 (bdw for dcr4) dimm4 */
-		spa = nfit_buf + offset + sizeof(*spa) * 2;
+		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 		spa->header.length = sizeof(*spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 		spa->range_index = 12+1;
 		spa->address = t->dimm_dma[4];
 		spa->length = DIMM_SIZE;
+		offset += spa->header.length;
 
-		offset = offset + sizeof(*spa) * 3;
 		/* mem-region14 (spa/dcr4, dimm4) */
 		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1970,10 +2039,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
 
-		/* mem-region15 (spa0, dimm4) */
-		memdev = nfit_buf + offset +
-				sizeof(struct acpi_nfit_memory_map);
+		/* mem-region15 (spa11, dimm4) */
+		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 		memdev->header.length = sizeof(*memdev);
 		memdev->device_handle = handle[4];
@@ -1987,10 +2056,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
 		memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+		offset += memdev->header.length;
 
 		/* mem-region16 (spa/bdw4, dimm4) */
-		memdev = nfit_buf + offset +
-				sizeof(struct acpi_nfit_memory_map) * 2;
+		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 		memdev->header.length = sizeof(*memdev);
 		memdev->device_handle = handle[4];
@@ -2003,8 +2072,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_memory_map) * 3;
 		/* flush3 (dimm4) */
 		flush = nfit_buf + offset;
 		flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -2014,8 +2083,14 @@ static void nfit_test0_setup(struct nfit_test *t)
 		for (i = 0; i < NUM_HINTS; i++)
 			flush->hint_address[i] = t->flush_dma[4]
 				+ i * sizeof(u64);
+		offset += flush->header.length;
+
+		/* sanity check to make sure we've filled the buffer */
+		WARN_ON(offset != t->nfit_size);
 	}
 
+	t->nfit_filled = offset;
+
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA0_SIZE);
 
@@ -2026,6 +2101,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
@@ -2061,17 +2137,18 @@ static void nfit_test1_setup(struct nfit_test *t)
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
 	spa->length = SPA2_SIZE;
+	offset += spa->header.length;
 
 	/* virtual cd region */
-	spa = nfit_buf + sizeof(*spa);
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
 	spa->range_index = 0;
 	spa->address = t->spa_set_dma[1];
 	spa->length = SPA_VCD_SIZE;
+	offset += spa->header.length;
 
-	offset += sizeof(*spa) * 2;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -2089,8 +2166,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
 		| ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
 		| ACPI_NFIT_MEM_NOT_ARMED;
+	offset += memdev->header.length;
 
-	offset += sizeof(*memdev);
 	/* dcr-descriptor0 */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -2101,8 +2178,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[5];
 	dcr->code = NFIT_FIC_BYTE;
 	dcr->windows = 0;
-
 	offset += dcr->header.length;
+
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
@@ -2117,9 +2194,9 @@ static void nfit_test1_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
 	memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+	offset += memdev->header.length;
 
 	/* dcr-descriptor1 */
-	offset += sizeof(*memdev);
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
@@ -2129,6 +2206,12 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[6];
 	dcr->code = NFIT_FIC_BYTE;
 	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	/* sanity check to make sure we've filled the buffer */
+	WARN_ON(offset != t->nfit_size);
+
+	t->nfit_filled = offset;
 
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA2_SIZE);
@@ -2487,7 +2570,7 @@ static int nfit_test_probe(struct platform_device *pdev)
 	nd_desc->ndctl = nfit_test_ctl;
 
 	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
-			nfit_test->nfit_size);
+			nfit_test->nfit_filled);
 	if (rc)
 		return rc;
 
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 428344519cdf..33752e06ff8d 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_FW_FINISH_UPDATE	15
 #define ND_INTEL_FW_FINISH_QUERY	16
 #define ND_INTEL_SMART_SET_THRESHOLD	17
+#define ND_INTEL_SMART_INJECT		18
 
 #define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
 #define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
@@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_SMART_NON_CRITICAL_HEALTH      (1 << 0)
 #define ND_INTEL_SMART_CRITICAL_HEALTH          (1 << 1)
 #define ND_INTEL_SMART_FATAL_HEALTH             (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP		(1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE		(1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL		(1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN		(1 << 3)
 
 struct nd_intel_smart {
 	__u32 status;
@@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold {
 	__u32 status;
 } __packed;
 
+struct nd_intel_smart_inject {
+	__u64 flags;
+	__u8 mtemp_enable;
+	__u16 media_temperature;
+	__u8 spare_enable;
+	__u8 spares;
+	__u8 fatal_enable;
+	__u8 unsafe_shutdown_enable;
+	__u32 status;
+} __packed;
+
 #define INTEL_FW_STORAGE_SIZE		0x100000
 #define INTEL_FW_MAX_SEND_LEN		0xFFEC
 #define INTEL_FW_QUERY_INTERVAL		250000
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index e3201ccf54c3..32159c08a52e 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -19,6 +19,7 @@
 
 #define __GFP_RECLAIM	(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
 
+#define GFP_ZONEMASK	0x0fu
 #define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
 #define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 #define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2fc410bc4f33..32aafa92074c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -25,6 +25,7 @@ TARGETS += mqueue
 TARGETS += net
 TARGETS += nsfs
 TARGETS += powerpc
+TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += seccomp
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index df3dd7fe5f9b..2a4f16fc9819 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -59,6 +59,13 @@ disable_events() {
     echo 0 > events/enable
 }
 
+clear_synthetic_events() { # reset all current synthetic events
+    grep -v ^# synthetic_events |
+    while read line; do
+        echo "!$line" >> synthetic_events
+    done
+}
+
 initialize_ftrace() { # Reset ftrace to initial-state
 # As the initial state, ftrace will be set to nop tracer,
 # no events, no triggers, no filters, no function filters,
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644
index 000000000000..786dce7e48be
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+    fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644
index 000000000000..7fd5b4a8f060
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644
index 000000000000..c93dbe38b5df
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency  u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+    fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+    fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644
index 000000000000..e84e7d048566
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644
index 000000000000..7907d8aacde3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+    fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644
index 000000000000..38b7ed6242b2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+    fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644
index 000000000000..cef11377dcbd
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644
index 000000000000..6c16f77c722c
--- /dev/null
+++ b/tools/testing/selftests/proc/.gitignore
@@ -0,0 +1,8 @@
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644
index 000000000000..dbb87e56264c
--- /dev/null
+++ b/tools/testing/selftests/proc/Makefile
@@ -0,0 +1,13 @@
+CFLAGS += -Wall -O2
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644
index 000000000000..68fbd2b35884
--- /dev/null
+++ b/tools/testing/selftests/proc/config
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644
index 000000000000..e38ad6d94d4b
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+	pid_t pid;
+	int wstatus;
+
+	if (unshare(CLONE_NEWPID) == -1) {
+		if (errno == ENOSYS || errno == EPERM)
+			return 2;
+		return 1;
+	}
+
+	pid = fork();
+	if (pid == -1)
+		return 1;
+	if (pid == 0) {
+		char buf[128], *p;
+		int fd;
+		ssize_t rv;
+
+		fd = open("/proc/loadavg" , O_RDONLY);
+		if (fd == -1)
+			return 1;
+		rv = read(fd, buf, sizeof(buf));
+		if (rv < 3)
+			return 1;
+		p = buf + rv;
+
+		/* pid 1 */
+		if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+			return 1;
+
+		pid = fork();
+		if (pid == -1)
+			return 1;
+		if (pid == 0)
+			return 0;
+		if (waitpid(pid, NULL, 0) == -1)
+			return 1;
+
+		lseek(fd, 0, SEEK_SET);
+		rv = read(fd, buf, sizeof(buf));
+		if (rv < 3)
+			return 1;
+		p = buf + rv;
+
+		/* pid 2 */
+		if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+			return 1;
+
+		return 0;
+	}
+
+	if (waitpid(pid, &wstatus, 0) == -1)
+		return 1;
+	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+		return 0;
+	return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644
index 000000000000..af1d0a6af810
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1)
+		exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+		return;
+	exit(1);
+}
+
+int main(void)
+{
+	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	void *p;
+	int fd;
+	unsigned long a, b;
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd == -1)
+		return 1;
+
+	p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+	if (p == MAP_FAILED)
+		return 1;
+
+	a = (unsigned long)p;
+	b = (unsigned long)p + PAGE_SIZE;
+
+	pass("/proc/self/map_files/%lx-%lx", a, b);
+	fail("/proc/self/map_files/ %lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx -%lx", a, b);
+	fail("/proc/self/map_files/%lx- %lx", a, b);
+	fail("/proc/self/map_files/%lx-%lx ", a, b);
+	fail("/proc/self/map_files/0%lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx-0%lx", a, b);
+	if (sizeof(long) == 4) {
+		fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+	} else if (sizeof(long) == 8) {
+		fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+	} else
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644
index 000000000000..aebf4be56111
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with address 0. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1)
+		exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+		return;
+	exit(1);
+}
+
+int main(void)
+{
+	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	void *p;
+	int fd;
+	unsigned long a, b;
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd == -1)
+		return 1;
+
+	p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+	if (p == MAP_FAILED) {
+		if (errno == EPERM)
+			return 2;
+		return 1;
+	}
+
+	a = (unsigned long)p;
+	b = (unsigned long)p + PAGE_SIZE;
+
+	pass("/proc/self/map_files/%lx-%lx", a, b);
+	fail("/proc/self/map_files/ %lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx -%lx", a, b);
+	fail("/proc/self/map_files/%lx- %lx", a, b);
+	fail("/proc/self/map_files/%lx-%lx ", a, b);
+	fail("/proc/self/map_files/0%lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx-0%lx", a, b);
+	if (sizeof(long) == 4) {
+		fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+	} else if (sizeof(long) == 8) {
+		fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+	} else
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644
index 000000000000..05eb6f91f1e9
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -0,0 +1,45 @@
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+	return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+	char buf1[64];
+	char buf2[64];
+	int fd;
+	ssize_t rv;
+
+	fd = open("/proc/self/syscall", O_RDONLY);
+	if (fd == -1) {
+		if (errno == ENOENT)
+			return 2;
+		return 1;
+	}
+
+	/* Do direct system call as libc can wrap anything. */
+	snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+		 (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+	memset(buf2, 0, sizeof(buf2));
+	rv = sys_read(fd, buf2, sizeof(buf2));
+	if (rv < 0)
+		return 1;
+	if (rv < strlen(buf1))
+		return 1;
+	if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644
index 000000000000..b8d8728a6869
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-wchan.c
@@ -0,0 +1,25 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+	char buf[64];
+	int fd;
+
+	fd = open("/proc/self/wchan", O_RDONLY);
+	if (fd == -1) {
+		if (errno == ENOENT)
+			return 2;
+		return 1;
+	}
+
+	buf[0] = '\0';
+	if (read(fd, buf, sizeof(buf)) != 1)
+		return 1;
+	if (buf[0] != '0')
+		return 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644
index 000000000000..303f26092306
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+	uint64_t start, u0, u1, i0, i1;
+	int fd;
+
+	fd = open("/proc/uptime", O_RDONLY);
+	assert(fd >= 0);
+
+	proc_uptime(fd, &u0, &i0);
+	start = u0;
+	do {
+		proc_uptime(fd, &u1, &i1);
+		assert(u1 >= u0);
+		assert(i1 >= i0);
+		u0 = u1;
+		i0 = i1;
+	} while (u1 - start < 100);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644
index 000000000000..0cb79e1f1674
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+	return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+	return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+	unsigned int len;
+	unsigned long *m;
+	unsigned int cpu;
+	uint64_t u0, u1, i0, i1;
+	int fd;
+
+	/* find out "nr_cpu_ids" */
+	m = NULL;
+	len = 0;
+	do {
+		len += sizeof(unsigned long);
+		free(m);
+		m = malloc(len);
+	} while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+	fd = open("/proc/uptime", O_RDONLY);
+	assert(fd >= 0);
+
+	proc_uptime(fd, &u0, &i0);
+	for (cpu = 0; cpu < len * 8; cpu++) {
+		memset(m, 0, len);
+		m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+		/* CPU might not exist, ignore error */
+		sys_sched_setaffinity(0, len, m);
+
+		proc_uptime(fd, &u1, &i1);
+		assert(u1 >= u0);
+		assert(i1 >= i0);
+		u0 = u1;
+		i0 = i1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644
index 000000000000..d584419f50a7
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+	if (*p == '0') {
+		*end = (char *)p + 1;
+		return 0;
+	} else if ('1' <= *p && *p <= '9') {
+		unsigned long long val;
+
+		errno = 0;
+		val = strtoull(p, end, 10);
+		assert(errno == 0);
+		return val;
+	} else
+		assert(0);
+}
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+	uint64_t val1, val2;
+	char buf[64], *p;
+	ssize_t rv;
+
+	/* save "p < end" checks */
+	memset(buf, 0, sizeof(buf));
+	rv = pread(fd, buf, sizeof(buf), 0);
+	assert(0 <= rv && rv <= sizeof(buf));
+	buf[sizeof(buf) - 1] = '\0';
+
+	p = buf;
+
+	val1 = xstrtoull(p, &p);
+	assert(p[0] == '.');
+	assert('0' <= p[1] && p[1] <= '9');
+	assert('0' <= p[2] && p[2] <= '9');
+	assert(p[3] == ' ');
+
+	val2 = (p[1] - '0') * 10 + p[2] - '0';
+	*uptime = val1 * 100 + val2;
+
+	p += 4;
+
+	val1 = xstrtoull(p, &p);
+	assert(p[0] == '.');
+	assert('0' <= p[1] && p[1] <= '9');
+	assert('0' <= p[2] && p[2] <= '9');
+	assert(p[3] == '\n');
+
+	val2 = (p[1] - '0') * 10 + p[2] - '0';
+	*idle = val1 * 100 + val2;
+
+	assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644
index 000000000000..12e397f78592
--- /dev/null
+++ b/tools/testing/selftests/proc/read.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright _ 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static inline bool streq(const char *s1, const char *s2)
+{
+	return strcmp(s1, s2) == 0;
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+	struct dirent *de;
+
+	errno = 0;
+	de = readdir(d);
+	if (!de && errno != 0) {
+		exit(1);
+	}
+	return de;
+}
+
+static void f_reg(DIR *d, const char *filename)
+{
+	char buf[4096];
+	int fd;
+	ssize_t rv;
+
+	/* read from /proc/kmsg can block */
+	fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+	if (fd == -1)
+		return;
+	rv = read(fd, buf, sizeof(buf));
+	assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+	close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+	int fd;
+	ssize_t rv;
+
+	fd = openat(dirfd(d), filename, O_WRONLY);
+	if (fd == -1)
+		return;
+	rv = write(fd, buf, len);
+	assert((0 <= rv && rv <= len) || rv == -1);
+	close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+	char buf[4096];
+	ssize_t rv;
+
+	rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+	assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+	struct dirent *de;
+
+	de = xreaddir(d);
+	assert(de->d_type == DT_DIR);
+	assert(streq(de->d_name, "."));
+
+	de = xreaddir(d);
+	assert(de->d_type == DT_DIR);
+	assert(streq(de->d_name, ".."));
+
+	while ((de = xreaddir(d))) {
+		assert(!streq(de->d_name, "."));
+		assert(!streq(de->d_name, ".."));
+
+		switch (de->d_type) {
+			DIR *dd;
+			int fd;
+
+		case DT_REG:
+			if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+				f_reg_write(d, de->d_name, "h", 1);
+			} else if (level == 1 && streq(de->d_name, "clear_refs")) {
+				f_reg_write(d, de->d_name, "1", 1);
+			} else if (level == 3 && streq(de->d_name, "clear_refs")) {
+				f_reg_write(d, de->d_name, "1", 1);
+			} else {
+				f_reg(d, de->d_name);
+			}
+			break;
+		case DT_DIR:
+			fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+			if (fd == -1)
+				continue;
+			dd = fdopendir(fd);
+			if (!dd)
+				continue;
+			f(dd, level + 1);
+			closedir(dd);
+			break;
+		case DT_LNK:
+			f_lnk(d, de->d_name);
+			break;
+		default:
+			assert(0);
+		}
+	}
+}
+
+int main(void)
+{
+	DIR *d;
+
+	d = opendir("/proc");
+	if (!d)
+		return 2;
+	f(d, 0);
+	return 0;
+}